Re: [PATCH 1/4] libceph: add function that reset client's entity addr

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, 2019-06-04 at 20:35 +0800, Yan, Zheng wrote:
> On Tue, Jun 4, 2019 at 7:04 PM Jeff Layton <jlayton@xxxxxxxxxx> wrote:
> > On Tue, 2019-06-04 at 17:39 +0800, Yan, Zheng wrote:
> > > Signed-off-by: "Yan, Zheng" <zyan@xxxxxxxxxx>
> > > ---
> > >  include/linux/ceph/libceph.h    |  1 +
> > >  include/linux/ceph/messenger.h  |  1 +
> > >  include/linux/ceph/mon_client.h |  1 +
> > >  include/linux/ceph/osd_client.h |  1 +
> > >  net/ceph/ceph_common.c          |  8 ++++++++
> > >  net/ceph/messenger.c            |  5 +++++
> > >  net/ceph/mon_client.c           |  7 +++++++
> > >  net/ceph/osd_client.c           | 16 ++++++++++++++++
> > >  8 files changed, 40 insertions(+)
> > > 
> > > diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
> > > index a3cddf5f0e60..f29959eed025 100644
> > > --- a/include/linux/ceph/libceph.h
> > > +++ b/include/linux/ceph/libceph.h
> > > @@ -291,6 +291,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private);
> > >  struct ceph_entity_addr *ceph_client_addr(struct ceph_client *client);
> > >  u64 ceph_client_gid(struct ceph_client *client);
> > >  extern void ceph_destroy_client(struct ceph_client *client);
> > > +extern void ceph_reset_client_addr(struct ceph_client *client);
> > >  extern int __ceph_open_session(struct ceph_client *client,
> > >                              unsigned long started);
> > >  extern int ceph_open_session(struct ceph_client *client);
> > > diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
> > > index 23895d178149..c4458dc6a757 100644
> > > --- a/include/linux/ceph/messenger.h
> > > +++ b/include/linux/ceph/messenger.h
> > > @@ -337,6 +337,7 @@ extern void ceph_msgr_flush(void);
> > >  extern void ceph_messenger_init(struct ceph_messenger *msgr,
> > >                               struct ceph_entity_addr *myaddr);
> > >  extern void ceph_messenger_fini(struct ceph_messenger *msgr);
> > > +extern void ceph_messenger_reset_nonce(struct ceph_messenger *msgr);
> > > 
> > >  extern void ceph_con_init(struct ceph_connection *con, void *private,
> > >                       const struct ceph_connection_operations *ops,
> > > diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
> > > index 3a4688af7455..0d8d890c6759 100644
> > > --- a/include/linux/ceph/mon_client.h
> > > +++ b/include/linux/ceph/mon_client.h
> > > @@ -110,6 +110,7 @@ extern int ceph_monmap_contains(struct ceph_monmap *m,
> > > 
> > >  extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl);
> > >  extern void ceph_monc_stop(struct ceph_mon_client *monc);
> > > +extern void ceph_monc_reopen_session(struct ceph_mon_client *monc);
> > > 
> > >  enum {
> > >       CEPH_SUB_MONMAP = 0,
> > > diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
> > > index 2294f963dab7..a12b7fc9cfd6 100644
> > > --- a/include/linux/ceph/osd_client.h
> > > +++ b/include/linux/ceph/osd_client.h
> > > @@ -381,6 +381,7 @@ extern void ceph_osdc_cleanup(void);
> > >  extern int ceph_osdc_init(struct ceph_osd_client *osdc,
> > >                         struct ceph_client *client);
> > >  extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
> > > +extern void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc);
> > > 
> > >  extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
> > >                                  struct ceph_msg *msg);
> > > diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
> > > index 79eac465ec65..55210823d1cc 100644
> > > --- a/net/ceph/ceph_common.c
> > > +++ b/net/ceph/ceph_common.c
> > > @@ -693,6 +693,14 @@ void ceph_destroy_client(struct ceph_client *client)
> > >  }
> > >  EXPORT_SYMBOL(ceph_destroy_client);
> > > 
> > > +void ceph_reset_client_addr(struct ceph_client *client)
> > > +{
> > > +     ceph_messenger_reset_nonce(&client->msgr);
> > > +     ceph_monc_reopen_session(&client->monc);
> > > +     ceph_osdc_reopen_osds(&client->osdc);
> > > +}
> > > +EXPORT_SYMBOL(ceph_reset_client_addr);
> > > +
> > >  /*
> > >   * true if we have the mon map (and have thus joined the cluster)
> > >   */
> > > diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
> > > index 3ee380758ddd..cd03a1cba849 100644
> > > --- a/net/ceph/messenger.c
> > > +++ b/net/ceph/messenger.c
> > > @@ -3028,6 +3028,11 @@ static void con_fault(struct ceph_connection *con)
> > >  }
> > > 
> > > 
> > > +void ceph_messenger_reset_nonce(struct ceph_messenger *msgr)
> > > +{
> > > +     msgr->inst.addr.nonce += 1000000;
> > > +     encode_my_addr(msgr);
> > > +}
> > > 
> > >  /*
> > >   * initialize a new messenger instance
> > > diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
> > > index 895679d3529b..6dab6a94e9cc 100644
> > > --- a/net/ceph/mon_client.c
> > > +++ b/net/ceph/mon_client.c
> > > @@ -209,6 +209,13 @@ static void reopen_session(struct ceph_mon_client *monc)
> > >       __open_session(monc);
> > >  }
> > > 
> > > +void ceph_monc_reopen_session(struct ceph_mon_client *monc)
> > > +{
> > > +     mutex_lock(&monc->mutex);
> > > +     reopen_session(monc);
> > > +     mutex_unlock(&monc->mutex);
> > > +}
> > > +
> > >  static void un_backoff(struct ceph_mon_client *monc)
> > >  {
> > >       monc->hunt_mult /= 2; /* reduce by 50% */
> > > diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
> > > index e6d31e0f0289..67e9466f27fd 100644
> > > --- a/net/ceph/osd_client.c
> > > +++ b/net/ceph/osd_client.c
> > > @@ -5089,6 +5089,22 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
> > >  }
> > >  EXPORT_SYMBOL(ceph_osdc_call);
> > > 
> > > +/*
> > > + * reset all osd connections
> > > + */
> > > +void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc)
> > > +{
> > > +     struct rb_node *n;
> > > +     down_write(&osdc->lock);
> > > +     for (n = rb_first(&osdc->osds); n; ) {
> > > +             struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
> > > +             n = rb_next(n);
> > > +             if (!reopen_osd(osd))
> > > +                     kick_osd_requests(osd);
> > > +     }
> > > +     up_write(&osdc->lock);
> > > +}
> > > +
> > >  /*
> > >   * init, shutdown
> > >   */
> > 
> > I'd like to better understand what happens to file descriptors that were
> > opened before the blacklisting occurred.
> > 
> > Suppose I hold a POSIX lock on a O_RDWR fd, and then get blacklisted.
> > The admin then remounts the mount. What happens to my fd? Can I carry on
> > using it, or will it be shut down in some fashion? What about the lock?
> > I've definitely lost it during the blacklisting -- how do we make the
> > application aware of that fact?
> 
> After 'mount -f',  cephfs return -EIO for any operation on open files.
> After remount, operations except fsync and close work as normal. fsync
> and close return error if any dirty data got dropped.
> 
> Current cephfs code already handle file lock. after session get
> evicted, cephfs return -EIO for any lock operation until all local
> locks are released. (see commit b3f8d68f38a87)
> 

I think that's not sufficient. After a blacklisting event, we need to
shut down all file descriptors that were open before the event.

Consider:

Suppose I have an application (maybe a distributed database) that opens
a file O_RDWR and takes out a lock on the file. The client gets
blacklisted and loses its lock, and then the admin remounts the thing
leaving the fd intact.

After the blacklisting, the application then performs some writes to the
file and fsyncs and that all succeeds. Those writes are now being done
without holding the file lock, even though the application thinks it has
the lock. That seems like potential silent data corruption.

Remounting is fine if we're just interested in getting the mount working
again, but I don't see how we can reasonably allow applications to
assume continuity of stateful objects across such an event.
-- 
Jeff Layton <jlayton@xxxxxxxxxx>




[Index of Archives]     [CEPH Users]     [Ceph Large]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux