Hi, the implementation looks fine, but I am not sure i understand the rationale behind this patch. On 10/04/2012 02:03 PM, Jan Friesse wrote: > Instead of rely on multicast loop functionality of kernel, we now use > unix socket created by socketpair to deliver multicast messages to > local node. what's the performance hit of adding more sockets to the loop? > This handles problems with improperly configured local > firewall. So if output/input to/from ethernet interface is blocked, node > is still able to create single node membership. > > Dark side of the patch is fact, that membership is always created, so > "Totem is unable to form a cluster..." will never appear (same applies > to continuous_gather key). and this is the part the leaves me a bit puzzled. IIRC you spent a decent amount of time adding the "Totem is unable to form a cluster..." error message to detect improperly configured firewalls and all. This commit appears to go against it completely and I am not sure i understand why. If somebody really needs a localhost cluster node only, it makes more sense to have a specific option to bind to localhost or use socketpair as alternate, but without introducing a regression. Fabio > > Signed-off-by: Jan Friesse <jfriesse@xxxxxxxxxx> > --- > exec/totemudp.c | 175 ++++++++++++++++++++++++++++++++++++++++++++++--------- > 1 files changed, 148 insertions(+), 27 deletions(-) > > diff --git a/exec/totemudp.c b/exec/totemudp.c > index 371746d..e702a32 100644 > --- a/exec/totemudp.c > +++ b/exec/totemudp.c > @@ -94,6 +94,13 @@ struct totemudp_socket { > int mcast_recv; > int mcast_send; > int token; > + /* > + * Socket used for local multicast delivery. We don't rely on multicast > + * loop and rather this UNIX DGRAM socket is used. Socket is created by > + * socketpair call and they are used in same way as pipe (so [0] is read > + * end and [1] is write end) > + */ > + int local_mcast_loop[2]; > }; > > struct totemudp_instance { > @@ -381,6 +388,20 @@ static inline void mcast_sendmsg ( > LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, > "sendmsg(mcast) failed (non-critical)"); > } > + > + /* > + * Transmit multicast message to local unix mcast loop > + * An error here is recovered by totemsrp > + */ > + msg_mcast.msg_name = NULL; > + msg_mcast.msg_namelen = 0; > + > + res = sendmsg (instance->totemudp_sockets.local_mcast_loop[1], &msg_mcast, > + MSG_NOSIGNAL); > + if (res < 0) { > + LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, > + "sendmsg(local mcast loop) failed (non-critical)"); > + } > } > > > @@ -398,6 +419,12 @@ int totemudp_finalize ( > if (instance->totemudp_sockets.mcast_send > 0) { > close (instance->totemudp_sockets.mcast_send); > } > + if (instance->totemudp_sockets.local_mcast_loop[0] > 0) { > + qb_loop_poll_del (instance->totemudp_poll_handle, > + instance->totemudp_sockets.local_mcast_loop[0]); > + close (instance->totemudp_sockets.local_mcast_loop[0]); > + close (instance->totemudp_sockets.local_mcast_loop[1]); > + } > if (instance->totemudp_sockets.token > 0) { > qb_loop_poll_del (instance->totemudp_poll_handle, > instance->totemudp_sockets.token); > @@ -564,6 +591,12 @@ static void timer_function_netif_check_timeout ( > if (instance->totemudp_sockets.mcast_send > 0) { > close (instance->totemudp_sockets.mcast_send); > } > + if (instance->totemudp_sockets.local_mcast_loop[0] > 0) { > + qb_loop_poll_del (instance->totemudp_poll_handle, > + instance->totemudp_sockets.local_mcast_loop[0]); > + close (instance->totemudp_sockets.local_mcast_loop[0]); > + close (instance->totemudp_sockets.local_mcast_loop[1]); > + } > if (instance->totemudp_sockets.token > 0) { > qb_loop_poll_del (instance->totemudp_poll_handle, > instance->totemudp_sockets.token); > @@ -611,6 +644,12 @@ static void timer_function_netif_check_timeout ( > qb_loop_poll_add ( > instance->totemudp_poll_handle, > QB_LOOP_MED, > + instance->totemudp_sockets.local_mcast_loop[0], > + POLLIN, instance, net_deliver_fn); > + > + qb_loop_poll_add ( > + instance->totemudp_poll_handle, > + QB_LOOP_MED, > instance->totemudp_sockets.token, > POLLIN, instance, net_deliver_fn); > > @@ -685,6 +724,7 @@ static int totemudp_build_sockets_ip ( > int res; > int flag; > uint8_t sflag; > + int i; > > /* > * Create multicast recv socket > @@ -727,6 +767,27 @@ static int totemudp_build_sockets_ip ( > } > > /* > + * Create local multicast loop socket > + */ > + if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sockets->local_mcast_loop) == -1) { > + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, > + "socket() failed"); > + return (-1); > + } > + > + for (i = 0; i < 2; i++) { > + totemip_nosigpipe (sockets->local_mcast_loop[i]); > + res = fcntl (sockets->local_mcast_loop[i], F_SETFL, O_NONBLOCK); > + if (res == -1) { > + LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, > + "Could not set non-blocking operation on multicast socket"); > + return (-1); > + } > + } > + > + > + > + /* > * Setup mcast send socket > */ > sockets->mcast_send = socket (bindnet_address->family, SOCK_DGRAM, 0); > @@ -808,12 +869,34 @@ static int totemudp_build_sockets_ip ( > /* > * Set buffer sizes to avoid overruns > */ > - res = setsockopt (sockets->mcast_recv, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, optlen); > - res = setsockopt (sockets->mcast_send, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, optlen); > + res = setsockopt (sockets->mcast_recv, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, optlen); > + if (res == -1) { > + LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, > + "Unable to set SO_RCVBUF size on UDP mcast socket"); > + return (-1); > + } > + res = setsockopt (sockets->mcast_send, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, optlen); > + if (res == -1) { > + LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, > + "Unable to set SO_SNDBUF size on UDP mcast socket"); > + return (-1); > + } > + res = setsockopt (sockets->local_mcast_loop[0], SOL_SOCKET, SO_RCVBUF, &recvbuf_size, optlen); > + if (res == -1) { > + LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, > + "Unable to set SO_RCVBUF size on UDP local mcast loop socket"); > + return (-1); > + } > + res = setsockopt (sockets->local_mcast_loop[1], SOL_SOCKET, SO_SNDBUF, &sendbuf_size, optlen); > + if (res == -1) { > + LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, > + "Unable to set SO_SNDBUF size on UDP local mcast loop socket"); > + return (-1); > + } > > res = getsockopt (sockets->mcast_recv, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, &optlen); > if (res == 0) { > - log_printf (instance->totemudp_log_level_debug, > + log_printf (instance->totemudp_log_level_debug, > "Receive multicast socket recv buffer size (%d bytes).", recvbuf_size); > } > > @@ -823,6 +906,19 @@ static int totemudp_build_sockets_ip ( > "Transmit multicast socket send buffer size (%d bytes).", sendbuf_size); > } > > + res = getsockopt (sockets->local_mcast_loop[0], SOL_SOCKET, SO_RCVBUF, &recvbuf_size, &optlen); > + if (res == 0) { > + log_printf (instance->totemudp_log_level_debug, > + "Local receive multicast loop socket recv buffer size (%d bytes).", recvbuf_size); > + } > + > + res = getsockopt (sockets->local_mcast_loop[1], SOL_SOCKET, SO_SNDBUF, &sendbuf_size, &optlen); > + if (res == 0) { > + log_printf (instance->totemudp_log_level_debug, > + "Local transmit multicast loop socket send buffer size (%d bytes).", sendbuf_size); > + } > + > + > /* > * Join group membership on socket > */ > @@ -875,13 +971,13 @@ static int totemudp_build_sockets_ip ( > } > > /* > - * Turn on multicast loopback > + * Turn off multicast loopback > */ > > - flag = 1; > + flag = 0; > switch ( bindnet_address->family ) { > case AF_INET: > - sflag = 1; > + sflag = 0; > res = setsockopt (sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_LOOP, > &sflag, sizeof (sflag)); > break; > @@ -891,7 +987,7 @@ static int totemudp_build_sockets_ip ( > } > if (res == -1) { > LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, > - "Unable to turn on multicast loopback"); > + "Unable to turn off multicast loopback"); > return (-1); > } > > @@ -1126,18 +1222,30 @@ int totemudp_recv_flush (void *udp_context) > struct pollfd ufd; > int nfds; > int res = 0; > + int i; > + int sock; > > instance->flushing = 1; > > - do { > - ufd.fd = instance->totemudp_sockets.mcast_recv; > - ufd.events = POLLIN; > - nfds = poll (&ufd, 1, 0); > - if (nfds == 1 && ufd.revents & POLLIN) { > - net_deliver_fn (instance->totemudp_sockets.mcast_recv, > - ufd.revents, instance); > + for (i = 0; i < 2; i++) { > + sock = -1; > + if (i == 0) { > + sock = instance->totemudp_sockets.mcast_recv; > + } > + if (i == 1) { > + sock = instance->totemudp_sockets.local_mcast_loop[0]; > } > - } while (nfds == 1); > + assert(sock != -1); > + > + do { > + ufd.fd = sock; > + ufd.events = POLLIN; > + nfds = poll (&ufd, 1, 0); > + if (nfds == 1 && ufd.revents & POLLIN) { > + net_deliver_fn (sock, ufd.revents, instance); > + } > + } while (nfds == 1); > + } > > instance->flushing = 0; > > @@ -1251,6 +1359,8 @@ extern int totemudp_recv_mcast_empty ( > struct pollfd ufd; > int nfds; > int msg_processed = 0; > + int i; > + int sock; > > /* > * Receive datagram > @@ -1275,19 +1385,30 @@ extern int totemudp_recv_mcast_empty ( > msg_recv.msg_accrightslen = 0; > #endif > > - do { > - ufd.fd = instance->totemudp_sockets.mcast_recv; > - ufd.events = POLLIN; > - nfds = poll (&ufd, 1, 0); > - if (nfds == 1 && ufd.revents & POLLIN) { > - res = recvmsg (instance->totemudp_sockets.mcast_recv, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); > - if (res != -1) { > - msg_processed = 1; > - } else { > - msg_processed = -1; > - } > + for (i = 0; i < 2; i++) { > + sock = -1; > + if (i == 0) { > + sock = instance->totemudp_sockets.mcast_recv; > + } > + if (i == 1) { > + sock = instance->totemudp_sockets.local_mcast_loop[0]; > } > - } while (nfds == 1); > + assert(sock != -1); > + > + do { > + ufd.fd = sock; > + ufd.events = POLLIN; > + nfds = poll (&ufd, 1, 0); > + if (nfds == 1 && ufd.revents & POLLIN) { > + res = recvmsg (sock, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); > + if (res != -1) { > + msg_processed = 1; > + } else { > + msg_processed = -1; > + } > + } > + } while (nfds == 1); > + } > > return (msg_processed); > } > _______________________________________________ discuss mailing list discuss@xxxxxxxxxxxx http://lists.corosync.org/mailman/listinfo/discuss