On Tue, 12 Jun 2012, Yan, Zheng wrote: > On Thu, May 31, 2012 at 3:35 AM, Alex Elder <elder@xxxxxxxxxxx> wrote: > > Start explicitly keeping track of the state of a ceph connection's > > socket, separate from the state of the connection itself. Create > > placeholder functions to encapsulate the state transitions. > > > > -------- > > | NEW* | transient initial state > > -------- > > | con_sock_state_init() > > v > > ---------- > > | CLOSED | initialized, but no socket (and no > > ---------- TCP connection) > > ^ \ > > | \ con_sock_state_connecting() > > | ---------------------- > > | \ > > + con_sock_state_closed() \ > > |\ \ > > | \ \ > > | ----------- \ > > | | CLOSING | socket event; \ > > | ----------- await close \ > > | ^ | > > | | | > > | + con_sock_state_closing() | > > | / \ | > > | / --------------- | > > | / \ v > > | / -------------- > > | / -----------------| CONNECTING | socket created, TCP > > | | / -------------- connect initiated > > | | | con_sock_state_connected() > > | | v > > ------------- > > | CONNECTED | TCP connection established > > ------------- > > > > Make the socket state an atomic variable, reinforcing that it's a > > distinct transtion with no possible "intermediate/both" states. > > This is almost certainly overkill at this point, though the > > transitions into CONNECTED and CLOSING state do get called via > > socket callback (the rest of the transitions occur with the > > connection mutex held). We can back out the atomicity later. > > > > Signed-off-by: Alex Elder <elder@xxxxxxxxxxx> > > --- > > include/linux/ceph/messenger.h | 8 ++++- > > net/ceph/messenger.c | 63 > > ++++++++++++++++++++++++++++++++++++++++ > > 2 files changed, 69 insertions(+), 2 deletions(-) > > > > diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h > > index 920235e..5e852f4 100644 > > --- a/include/linux/ceph/messenger.h > > +++ b/include/linux/ceph/messenger.h > > @@ -137,14 +137,18 @@ struct ceph_connection { > > const struct ceph_connection_operations *ops; > > > > struct ceph_messenger *msgr; > > + > > + atomic_t sock_state; > > struct socket *sock; > > + struct ceph_entity_addr peer_addr; /* peer address */ > > + struct ceph_entity_addr peer_addr_for_me; > > + > > unsigned long flags; > > unsigned long state; > > const char *error_msg; /* error message, if any */ > > > > - struct ceph_entity_addr peer_addr; /* peer address */ > > struct ceph_entity_name peer_name; /* peer name */ > > - struct ceph_entity_addr peer_addr_for_me; > > + > > unsigned peer_features; > > u32 connect_seq; /* identify the most recent connection > > attempt for this connection, client */ > > diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c > > index 29055df..7e11b07 100644 > > --- a/net/ceph/messenger.c > > +++ b/net/ceph/messenger.c > > @@ -29,6 +29,14 @@ > > * the sender. > > */ > > > > +/* State values for ceph_connection->sock_state; NEW is assumed to be 0 */ > > + > > +#define CON_SOCK_STATE_NEW 0 /* -> CLOSED */ > > +#define CON_SOCK_STATE_CLOSED 1 /* -> CONNECTING */ > > +#define CON_SOCK_STATE_CONNECTING 2 /* -> CONNECTED or -> > > CLOSING */ > > +#define CON_SOCK_STATE_CONNECTED 3 /* -> CLOSING or -> CLOSED > > */ > > +#define CON_SOCK_STATE_CLOSING 4 /* -> CLOSED */ > > + > > /* static tag bytes (protocol control messages) */ > > static char tag_msg = CEPH_MSGR_TAG_MSG; > > static char tag_ack = CEPH_MSGR_TAG_ACK; > > @@ -147,6 +155,54 @@ void ceph_msgr_flush(void) > > } > > EXPORT_SYMBOL(ceph_msgr_flush); > > > > +/* Connection socket state transition functions */ > > + > > +static void con_sock_state_init(struct ceph_connection *con) > > +{ > > + int old_state; > > + > > + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED); > > + if (WARN_ON(old_state != CON_SOCK_STATE_NEW)) > > + printk("%s: unexpected old state %d\n", __func__, > > old_state); > > +} > > + > > +static void con_sock_state_connecting(struct ceph_connection *con) > > +{ > > + int old_state; > > + > > + old_state = atomic_xchg(&con->sock_state, > > CON_SOCK_STATE_CONNECTING); > > + if (WARN_ON(old_state != CON_SOCK_STATE_CLOSED)) > > + printk("%s: unexpected old state %d\n", __func__, > > old_state); > > +} > > + > > +static void con_sock_state_connected(struct ceph_connection *con) > > +{ > > + int old_state; > > + > > + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTED); > > + if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING)) > > + printk("%s: unexpected old state %d\n", __func__, > > old_state); > > +} > > + > > +static void con_sock_state_closing(struct ceph_connection *con) > > +{ > > + int old_state; > > + > > + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSING); > > + if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING && > > + old_state != CON_SOCK_STATE_CONNECTED)) > > + printk("%s: unexpected old state %d\n", __func__, > > old_state); > > +} > > + > > +static void con_sock_state_closed(struct ceph_connection *con) > > +{ > > + int old_state; > > + > > + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED); > > + if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTED && > > + old_state != CON_SOCK_STATE_CLOSING)) > > + printk("%s: unexpected old state %d\n", __func__, > > old_state); > > +} > > > Hi Alex, > > Single node setup can easily trigger above WARN_ON. I think this is because > local TCP connection setup and shutdown is very fast, they finish immediately > after sock->connect and sock->shutdown return. Yep. This was just fixed yesterday, in the testing-next branch, by 'libceph: transition socket state prior to actual connect'. Are you still hitting the bio null deref? Thanks- sage > > Yan, Zheng > > > /* > > * socket callback functions > > @@ -203,6 +259,7 @@ static void ceph_sock_state_change(struct sock *sk) > > dout("%s TCP_CLOSE\n", __func__); > > case TCP_CLOSE_WAIT: > > dout("%s TCP_CLOSE_WAIT\n", __func__); > > + con_sock_state_closing(con); > > if (test_and_set_bit(SOCK_CLOSED, &con->flags) == 0) { > > if (test_bit(CONNECTING, &con->state)) > > con->error_msg = "connection failed"; > > @@ -213,6 +270,7 @@ static void ceph_sock_state_change(struct sock *sk) > > break; > > case TCP_ESTABLISHED: > > dout("%s TCP_ESTABLISHED\n", __func__); > > + con_sock_state_connected(con); > > queue_con(con); > > break; > > default: /* Everything else is uninteresting */ > > @@ -277,6 +335,7 @@ static int ceph_tcp_connect(struct ceph_connection *con) > > return ret; > > } > > con->sock = sock; > > + con_sock_state_connecting(con); > > > > return 0; > > } > > @@ -341,6 +400,7 @@ static int con_close_socket(struct ceph_connection *con) > > rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR); > > sock_release(con->sock); > > con->sock = NULL; > > + con_sock_state_closed(con); > > return rc; > > } > > > > @@ -460,6 +520,9 @@ void ceph_con_init(struct ceph_messenger *msgr, struct > > ceph_connection *con) > > memset(con, 0, sizeof(*con)); > > atomic_set(&con->nref, 1); > > con->msgr = msgr; > > + > > + con_sock_state_init(con); > > + > > mutex_init(&con->mutex); > > INIT_LIST_HEAD(&con->out_queue); > > INIT_LIST_HEAD(&con->out_sent); > > -- > > 1.7.5.4 > > > > -- > > To unsubscribe from this list: send the line "unsubscribe ceph-devel" in > > the body of a message to majordomo@xxxxxxxxxxxxxxx > > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- > To unsubscribe from this list: send the line "unsubscribe ceph-devel" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html > >