From: John Thompson <john.thompson@xxxxxxxxxxxxxxxxxxx> If recovery is entered repeatedly (i.e. without entering the operational state in between), then some messages are still leaked. + memb_state_operational_enter - the recovery queue was left with hanging message pointers so when recovery was entered it wasn't safe to free these messages. Now the messages are cleared off the recovery queue first, and then the recovery queue is copied into the regular queue. + memb_state_recover_enter - free any messages that were already on the recovery message queue or retrans message queue before we reinit them. These recovery messages get freed when operational state is entered but if recovery were entered again before it gets to the operational state, some messages could be leaked. Signed-off-by: John Thompson <john.thompson@xxxxxxxxxxxxxxxxxxx> --- exec/totemsrp.c | 96 +++++++++++++++++++++++++++++++++++++++++++------------ 1 files changed, 75 insertions(+), 21 deletions(-) diff --git a/exec/totemsrp.c b/exec/totemsrp.c index c319553..5991023 100644 --- a/exec/totemsrp.c +++ b/exec/totemsrp.c @@ -1810,12 +1810,6 @@ static void memb_state_operational_enter (struct totemsrp_instance *instance) 0, 0, joined_list_totemip, joined_list_entries, &instance->my_ring_id); - /* - * The recovery sort queue now becomes the regular - * sort queue. It is necessary to copy the state - * into the regular sort queue. - */ - sq_copy (&instance->regular_sort_queue, &instance->recovery_sort_queue); instance->my_last_aru = SEQNO_START_MSG; /* When making my_proc_list smaller, ensure that the @@ -1835,11 +1829,14 @@ static void memb_state_operational_enter (struct totemsrp_instance *instance) /* * TODO Not exactly to spec * - * At the entry to this function all messages without a gap are - * deliered. + * At the entry to this function all messages in the recovery sort queue + * without a gap are delivered. * - * This code throw away messages from the last gap in the sort queue - * to my_high_seq_received + * This code throws away messages from the last gap in the recovery sort + * queue to my_high_seq_received, by setting my_high_delivered. + * + * All mesages on the recovery queue are freed as they have an extra + * encapsulation that the regular sort queue cannot handle. * * What should really happen is we should deliver all messages up to * a gap, then delier the transitional configuration, then deliver @@ -1855,7 +1852,7 @@ static void memb_state_operational_enter (struct totemsrp_instance *instance) void *ptr; i -= 1; - res = sq_item_get (&instance->regular_sort_queue, i, &ptr); + res = sq_item_get (&instance->recovery_sort_queue, i, &ptr); if (i == 0) { break; } @@ -1863,18 +1860,27 @@ static void memb_state_operational_enter (struct totemsrp_instance *instance) instance->my_high_delivered = i; - for (i = 0; i <= instance->my_high_delivered; i++) { + /* + * Free all messages on the recovery queue. Note that recovery messages are + * encapsulated differently, so shouldn't be copied onto the regular queue. + */ + for (i = 0; i <= instance->my_high_seq_received; i++) { void *ptr; - res = sq_item_get (&instance->regular_sort_queue, i, &ptr); + res = sq_item_get (&instance->recovery_sort_queue, i, &ptr); if (res == 0) { - struct sort_queue_item *regular_message; - - regular_message = ptr; - free (regular_message->mcast); + struct sort_queue_item *recovery_message; + recovery_message = ptr; + free (recovery_message->mcast); } } - sq_items_release (&instance->regular_sort_queue, instance->my_high_delivered); + sq_items_release (&instance->recovery_sort_queue, instance->my_high_seq_received); + + /* + * The recovery sort queue now becomes the regular sort queue, so copy the + * state info (i.e. seq numbers). Note recovery queue should be empty now + */ + sq_copy (&instance->regular_sort_queue, &instance->recovery_sort_queue); instance->last_released = instance->my_high_delivered; log_printf (instance->totemsrp_log_level_debug, @@ -2026,6 +2032,55 @@ static void memb_state_commit_enter ( */ } +static void reinit_recovery_sort_queue (struct totemsrp_instance *instance) +{ + unsigned int i; + struct sort_queue_item *recovery_message_item; + unsigned int range; + unsigned int start; + unsigned int del_count = 0; + void *ptr; + + start = instance->recovery_sort_queue.head_seqid; + range = instance->recovery_sort_queue.pos_max; + + for (i = 0; i <= range; i++) { + if (sq_item_get(&instance->recovery_sort_queue, + i + start, &ptr) != 0) { + continue; + } + recovery_message_item = ptr; + free (recovery_message_item->mcast); + del_count++; + } + + if (del_count != 0) { + log_printf (instance->totemsrp_log_level_debug, + "Deleted %u messages from the recovery queue.\n", del_count); + } + + sq_reinit (&instance->recovery_sort_queue, SEQNO_START_MSG); +} + +static void reinit_retransmit_message_queue (struct totemsrp_instance *instance) +{ + struct message_item *msg_item; + + if (!cs_queue_is_empty (&instance->retrans_message_queue)) { + log_printf (instance->totemsrp_log_level_debug, + "Deleting %u messages from the retransmit queue.\n", + cs_queue_used (&instance->retrans_message_queue)); + } + + while (!cs_queue_is_empty (&instance->retrans_message_queue)) { + msg_item = (struct message_item *)cs_queue_item_get (&instance->retrans_message_queue); + cs_queue_item_remove (&instance->retrans_message_queue); + free (msg_item->mcast); + } + + cs_queue_reinit (&instance->retrans_message_queue); +} + static void memb_state_recovery_enter ( struct totemsrp_instance *instance, struct memb_commit_token *commit_token) @@ -2049,8 +2104,8 @@ static void memb_state_recovery_enter ( instance->my_high_ring_delivered = 0; - sq_reinit (&instance->recovery_sort_queue, SEQNO_START_MSG); - cs_queue_reinit (&instance->retrans_message_queue); + reinit_recovery_sort_queue (instance); + reinit_retransmit_message_queue (instance); low_ring_aru = instance->old_ring_state_high_seq_received; @@ -2174,7 +2229,6 @@ static void memb_state_recovery_enter ( sort_queue_item = ptr; messages_originated++; memset (&message_item, 0, sizeof (struct message_item)); - // TODO LEAK message_item.mcast = totemsrp_buffer_alloc (instance); assert (message_item.mcast); message_item.mcast->header.type = MESSAGE_TYPE_MCAST; -- 1.7.0.4 NOTICE: This message contains privileged and confidential information intended only for the use of the addressee named above. If you are not the intended recipient of this message you are hereby notified that you must not disseminate, copy or take any action in reliance on it. If you have received this message in error please notify Allied Telesis Labs Ltd immediately. Any views expressed in this message are those of the individual sender, except where the sender has the authority to issue and specifically states them to be the views of Allied Telesis Labs. _______________________________________________ discuss mailing list discuss@xxxxxxxxxxxx http://lists.corosync.org/mailman/listinfo/discuss