On Tue, Jun 4, 2019 at 9:12 PM Jeff Layton <jlayton@xxxxxxxxxx> wrote: > > On Tue, 2019-06-04 at 20:35 +0800, Yan, Zheng wrote: > > On Tue, Jun 4, 2019 at 7:04 PM Jeff Layton <jlayton@xxxxxxxxxx> wrote: > > > On Tue, 2019-06-04 at 17:39 +0800, Yan, Zheng wrote: > > > > Signed-off-by: "Yan, Zheng" <zyan@xxxxxxxxxx> > > > > --- > > > > include/linux/ceph/libceph.h | 1 + > > > > include/linux/ceph/messenger.h | 1 + > > > > include/linux/ceph/mon_client.h | 1 + > > > > include/linux/ceph/osd_client.h | 1 + > > > > net/ceph/ceph_common.c | 8 ++++++++ > > > > net/ceph/messenger.c | 5 +++++ > > > > net/ceph/mon_client.c | 7 +++++++ > > > > net/ceph/osd_client.c | 16 ++++++++++++++++ > > > > 8 files changed, 40 insertions(+) > > > > > > > > diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h > > > > index a3cddf5f0e60..f29959eed025 100644 > > > > --- a/include/linux/ceph/libceph.h > > > > +++ b/include/linux/ceph/libceph.h > > > > @@ -291,6 +291,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private); > > > > struct ceph_entity_addr *ceph_client_addr(struct ceph_client *client); > > > > u64 ceph_client_gid(struct ceph_client *client); > > > > extern void ceph_destroy_client(struct ceph_client *client); > > > > +extern void ceph_reset_client_addr(struct ceph_client *client); > > > > extern int __ceph_open_session(struct ceph_client *client, > > > > unsigned long started); > > > > extern int ceph_open_session(struct ceph_client *client); > > > > diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h > > > > index 23895d178149..c4458dc6a757 100644 > > > > --- a/include/linux/ceph/messenger.h > > > > +++ b/include/linux/ceph/messenger.h > > > > @@ -337,6 +337,7 @@ extern void ceph_msgr_flush(void); > > > > extern void ceph_messenger_init(struct ceph_messenger *msgr, > > > > struct ceph_entity_addr *myaddr); > > > > extern void ceph_messenger_fini(struct ceph_messenger *msgr); > > > > +extern void ceph_messenger_reset_nonce(struct ceph_messenger *msgr); > > > > > > > > extern void ceph_con_init(struct ceph_connection *con, void *private, > > > > const struct ceph_connection_operations *ops, > > > > diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h > > > > index 3a4688af7455..0d8d890c6759 100644 > > > > --- a/include/linux/ceph/mon_client.h > > > > +++ b/include/linux/ceph/mon_client.h > > > > @@ -110,6 +110,7 @@ extern int ceph_monmap_contains(struct ceph_monmap *m, > > > > > > > > extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl); > > > > extern void ceph_monc_stop(struct ceph_mon_client *monc); > > > > +extern void ceph_monc_reopen_session(struct ceph_mon_client *monc); > > > > > > > > enum { > > > > CEPH_SUB_MONMAP = 0, > > > > diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h > > > > index 2294f963dab7..a12b7fc9cfd6 100644 > > > > --- a/include/linux/ceph/osd_client.h > > > > +++ b/include/linux/ceph/osd_client.h > > > > @@ -381,6 +381,7 @@ extern void ceph_osdc_cleanup(void); > > > > extern int ceph_osdc_init(struct ceph_osd_client *osdc, > > > > struct ceph_client *client); > > > > extern void ceph_osdc_stop(struct ceph_osd_client *osdc); > > > > +extern void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc); > > > > > > > > extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, > > > > struct ceph_msg *msg); > > > > diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c > > > > index 79eac465ec65..55210823d1cc 100644 > > > > --- a/net/ceph/ceph_common.c > > > > +++ b/net/ceph/ceph_common.c > > > > @@ -693,6 +693,14 @@ void ceph_destroy_client(struct ceph_client *client) > > > > } > > > > EXPORT_SYMBOL(ceph_destroy_client); > > > > > > > > +void ceph_reset_client_addr(struct ceph_client *client) > > > > +{ > > > > + ceph_messenger_reset_nonce(&client->msgr); > > > > + ceph_monc_reopen_session(&client->monc); > > > > + ceph_osdc_reopen_osds(&client->osdc); > > > > +} > > > > +EXPORT_SYMBOL(ceph_reset_client_addr); > > > > + > > > > /* > > > > * true if we have the mon map (and have thus joined the cluster) > > > > */ > > > > diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c > > > > index 3ee380758ddd..cd03a1cba849 100644 > > > > --- a/net/ceph/messenger.c > > > > +++ b/net/ceph/messenger.c > > > > @@ -3028,6 +3028,11 @@ static void con_fault(struct ceph_connection *con) > > > > } > > > > > > > > > > > > +void ceph_messenger_reset_nonce(struct ceph_messenger *msgr) > > > > +{ > > > > + msgr->inst.addr.nonce += 1000000; > > > > + encode_my_addr(msgr); > > > > +} > > > > > > > > /* > > > > * initialize a new messenger instance > > > > diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c > > > > index 895679d3529b..6dab6a94e9cc 100644 > > > > --- a/net/ceph/mon_client.c > > > > +++ b/net/ceph/mon_client.c > > > > @@ -209,6 +209,13 @@ static void reopen_session(struct ceph_mon_client *monc) > > > > __open_session(monc); > > > > } > > > > > > > > +void ceph_monc_reopen_session(struct ceph_mon_client *monc) > > > > +{ > > > > + mutex_lock(&monc->mutex); > > > > + reopen_session(monc); > > > > + mutex_unlock(&monc->mutex); > > > > +} > > > > + > > > > static void un_backoff(struct ceph_mon_client *monc) > > > > { > > > > monc->hunt_mult /= 2; /* reduce by 50% */ > > > > diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c > > > > index e6d31e0f0289..67e9466f27fd 100644 > > > > --- a/net/ceph/osd_client.c > > > > +++ b/net/ceph/osd_client.c > > > > @@ -5089,6 +5089,22 @@ int ceph_osdc_call(struct ceph_osd_client *osdc, > > > > } > > > > EXPORT_SYMBOL(ceph_osdc_call); > > > > > > > > +/* > > > > + * reset all osd connections > > > > + */ > > > > +void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc) > > > > +{ > > > > + struct rb_node *n; > > > > + down_write(&osdc->lock); > > > > + for (n = rb_first(&osdc->osds); n; ) { > > > > + struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node); > > > > + n = rb_next(n); > > > > + if (!reopen_osd(osd)) > > > > + kick_osd_requests(osd); > > > > + } > > > > + up_write(&osdc->lock); > > > > +} > > > > + > > > > /* > > > > * init, shutdown > > > > */ > > > > > > I'd like to better understand what happens to file descriptors that were > > > opened before the blacklisting occurred. > > > > > > Suppose I hold a POSIX lock on a O_RDWR fd, and then get blacklisted. > > > The admin then remounts the mount. What happens to my fd? Can I carry on > > > using it, or will it be shut down in some fashion? What about the lock? > > > I've definitely lost it during the blacklisting -- how do we make the > > > application aware of that fact? > > > > After 'mount -f', cephfs return -EIO for any operation on open files. > > After remount, operations except fsync and close work as normal. fsync > > and close return error if any dirty data got dropped. > > > > Current cephfs code already handle file lock. after session get > > evicted, cephfs return -EIO for any lock operation until all local > > locks are released. (see commit b3f8d68f38a87) > > > > I think that's not sufficient. After a blacklisting event, we need to > shut down all file descriptors that were open before the event. > > Consider: > > Suppose I have an application (maybe a distributed database) that opens > a file O_RDWR and takes out a lock on the file. The client gets > blacklisted and loses its lock, and then the admin remounts the thing > leaving the fd intact. > > After the blacklisting, the application then performs some writes to the > file and fsyncs and that all succeeds. Those writes are now being done > without holding the file lock, even though the application thinks it has > the lock. That seems like potential silent data corruption. > Is it possible that application uses one file descriptor to do file locking, and use different file descriptors to do read/write. If it is, allowing read/write from new file descriptor is not safe either. Regards Yan, Zheng > Remounting is fine if we're just interested in getting the mount working > again, but I don't see how we can reasonably allow applications to > assume continuity of stateful objects across such an event. > -- > Jeff Layton <jlayton@xxxxxxxxxx> >