On Thu, May 31, 2012 at 3:35 AM, Alex Elder <elder@xxxxxxxxxxx> wrote: > Start explicitly keeping track of the state of a ceph connection's > socket, separate from the state of the connection itself. Create > placeholder functions to encapsulate the state transitions. > > -------- > | NEW* | transient initial state > -------- > | con_sock_state_init() > v > ---------- > | CLOSED | initialized, but no socket (and no > ---------- TCP connection) > ^ \ > | \ con_sock_state_connecting() > | ---------------------- > | \ > + con_sock_state_closed() \ > |\ \ > | \ \ > | ----------- \ > | | CLOSING | socket event; \ > | ----------- await close \ > | ^ | > | | | > | + con_sock_state_closing() | > | / \ | > | / --------------- | > | / \ v > | / -------------- > | / -----------------| CONNECTING | socket created, TCP > | | / -------------- connect initiated > | | | con_sock_state_connected() > | | v > ------------- > | CONNECTED | TCP connection established > ------------- > > Make the socket state an atomic variable, reinforcing that it's a > distinct transtion with no possible "intermediate/both" states. > This is almost certainly overkill at this point, though the > transitions into CONNECTED and CLOSING state do get called via > socket callback (the rest of the transitions occur with the > connection mutex held). We can back out the atomicity later. > > Signed-off-by: Alex Elder <elder@xxxxxxxxxxx> > --- > include/linux/ceph/messenger.h | 8 ++++- > net/ceph/messenger.c | 63 > ++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 69 insertions(+), 2 deletions(-) > > diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h > index 920235e..5e852f4 100644 > --- a/include/linux/ceph/messenger.h > +++ b/include/linux/ceph/messenger.h > @@ -137,14 +137,18 @@ struct ceph_connection { > const struct ceph_connection_operations *ops; > > struct ceph_messenger *msgr; > + > + atomic_t sock_state; > struct socket *sock; > + struct ceph_entity_addr peer_addr; /* peer address */ > + struct ceph_entity_addr peer_addr_for_me; > + > unsigned long flags; > unsigned long state; > const char *error_msg; /* error message, if any */ > > - struct ceph_entity_addr peer_addr; /* peer address */ > struct ceph_entity_name peer_name; /* peer name */ > - struct ceph_entity_addr peer_addr_for_me; > + > unsigned peer_features; > u32 connect_seq; /* identify the most recent connection > attempt for this connection, client */ > diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c > index 29055df..7e11b07 100644 > --- a/net/ceph/messenger.c > +++ b/net/ceph/messenger.c > @@ -29,6 +29,14 @@ > * the sender. > */ > > +/* State values for ceph_connection->sock_state; NEW is assumed to be 0 */ > + > +#define CON_SOCK_STATE_NEW 0 /* -> CLOSED */ > +#define CON_SOCK_STATE_CLOSED 1 /* -> CONNECTING */ > +#define CON_SOCK_STATE_CONNECTING 2 /* -> CONNECTED or -> > CLOSING */ > +#define CON_SOCK_STATE_CONNECTED 3 /* -> CLOSING or -> CLOSED > */ > +#define CON_SOCK_STATE_CLOSING 4 /* -> CLOSED */ > + > /* static tag bytes (protocol control messages) */ > static char tag_msg = CEPH_MSGR_TAG_MSG; > static char tag_ack = CEPH_MSGR_TAG_ACK; > @@ -147,6 +155,54 @@ void ceph_msgr_flush(void) > } > EXPORT_SYMBOL(ceph_msgr_flush); > > +/* Connection socket state transition functions */ > + > +static void con_sock_state_init(struct ceph_connection *con) > +{ > + int old_state; > + > + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED); > + if (WARN_ON(old_state != CON_SOCK_STATE_NEW)) > + printk("%s: unexpected old state %d\n", __func__, > old_state); > +} > + > +static void con_sock_state_connecting(struct ceph_connection *con) > +{ > + int old_state; > + > + old_state = atomic_xchg(&con->sock_state, > CON_SOCK_STATE_CONNECTING); > + if (WARN_ON(old_state != CON_SOCK_STATE_CLOSED)) > + printk("%s: unexpected old state %d\n", __func__, > old_state); > +} > + > +static void con_sock_state_connected(struct ceph_connection *con) > +{ > + int old_state; > + > + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTED); > + if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING)) > + printk("%s: unexpected old state %d\n", __func__, > old_state); > +} > + > +static void con_sock_state_closing(struct ceph_connection *con) > +{ > + int old_state; > + > + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSING); > + if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING && > + old_state != CON_SOCK_STATE_CONNECTED)) > + printk("%s: unexpected old state %d\n", __func__, > old_state); > +} > + > +static void con_sock_state_closed(struct ceph_connection *con) > +{ > + int old_state; > + > + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED); > + if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTED && > + old_state != CON_SOCK_STATE_CLOSING)) > + printk("%s: unexpected old state %d\n", __func__, > old_state); > +} > Hi Alex, Single node setup can easily trigger above WARN_ON. I think this is because local TCP connection setup and shutdown is very fast, they finish immediately after sock->connect and sock->shutdown return. Yan, Zheng > /* > * socket callback functions > @@ -203,6 +259,7 @@ static void ceph_sock_state_change(struct sock *sk) > dout("%s TCP_CLOSE\n", __func__); > case TCP_CLOSE_WAIT: > dout("%s TCP_CLOSE_WAIT\n", __func__); > + con_sock_state_closing(con); > if (test_and_set_bit(SOCK_CLOSED, &con->flags) == 0) { > if (test_bit(CONNECTING, &con->state)) > con->error_msg = "connection failed"; > @@ -213,6 +270,7 @@ static void ceph_sock_state_change(struct sock *sk) > break; > case TCP_ESTABLISHED: > dout("%s TCP_ESTABLISHED\n", __func__); > + con_sock_state_connected(con); > queue_con(con); > break; > default: /* Everything else is uninteresting */ > @@ -277,6 +335,7 @@ static int ceph_tcp_connect(struct ceph_connection *con) > return ret; > } > con->sock = sock; > + con_sock_state_connecting(con); > > return 0; > } > @@ -341,6 +400,7 @@ static int con_close_socket(struct ceph_connection *con) > rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR); > sock_release(con->sock); > con->sock = NULL; > + con_sock_state_closed(con); > return rc; > } > > @@ -460,6 +520,9 @@ void ceph_con_init(struct ceph_messenger *msgr, struct > ceph_connection *con) > memset(con, 0, sizeof(*con)); > atomic_set(&con->nref, 1); > con->msgr = msgr; > + > + con_sock_state_init(con); > + > mutex_init(&con->mutex); > INIT_LIST_HEAD(&con->out_queue); > INIT_LIST_HEAD(&con->out_sent); > -- > 1.7.5.4 > > -- > To unsubscribe from this list: send the line "unsubscribe ceph-devel" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html