Re: graceful-stop closes established connections without response

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Jan 30, 2024 at 4:37 AM Sherrard Burton <sburton@xxxxxxxxxxxxx> wrote:
>
> i was going to add some debugging lines, but when i took a quick look at
> the patch, i wasn't clear on which sections of the code i should be
> guaranteed to hit. can you be so kind as to send an updated patch with
> some gratuitous logging in the appropriate sections so that there will
> be positive affirmation that the patch has (or hasn't) been applied and
> is falling into the expected sections?

Sure, here is a v2 (which also includes a fix w.r.t. v1).

Regards;
Yann.
Index: server/mpm/event/event.c
===================================================================
--- server/mpm/event/event.c	(revision 1915442)
+++ server/mpm/event/event.c	(working copy)
@@ -174,7 +174,7 @@ static int had_healthy_child = 0;
 static volatile int dying = 0;
 static volatile int workers_may_exit = 0;
 static volatile int start_thread_may_exit = 0;
-static volatile int listener_may_exit = 0;
+static volatile apr_uint32_t listener_may_exit = 0;
 static int listener_is_wakeable = 0;        /* Pollset supports APR_POLLSET_WAKEABLE */
 static int num_listensocks = 0;
 static apr_int32_t conns_this_child;        /* MaxConnectionsPerChild, only access
@@ -481,8 +481,7 @@ static void disable_listensocks(void)
 static void enable_listensocks(void)
 {
     int i;
-    if (listener_may_exit
-            || apr_atomic_cas32(&listensocks_disabled, 0, 1) != 1) {
+    if (dying || apr_atomic_cas32(&listensocks_disabled, 0, 1) != 1) {
         return;
     }
     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00457)
@@ -575,8 +574,7 @@ static void wakeup_listener(void)
     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
                  "wake up listener%s", listener_may_exit ? " again" : "");
 
-    listener_may_exit = 1;
-    disable_listensocks();
+    apr_atomic_cas32(&listener_may_exit, 1, 0);
 
     /* Unblock the listener if it's poll()ing */
     if (event_pollset && listener_is_wakeable) {
@@ -1184,12 +1182,9 @@ read_request:
             cs->pub.state = CONN_STATE_READ_REQUEST_LINE;
             goto read_request;
         }
-        else if (!listener_may_exit) {
+        else {
             cs->pub.state = CONN_STATE_CHECK_REQUEST_LINE_READABLE;
         }
-        else {
-            cs->pub.state = CONN_STATE_LINGER;
-        }
     }
 
     if (cs->pub.state == CONN_STATE_CHECK_REQUEST_LINE_READABLE) {
@@ -1256,18 +1251,21 @@ static void check_infinite_requests(void)
     }
 }
 
-static int close_listeners(int *closed)
+static int close_listeners(void)
 {
     ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
                  "clos%s listeners (connection_count=%u)",
-                 *closed ? "ed" : "ing", apr_atomic_read32(&connection_count));
-    if (!*closed) {
+                 dying ? "ed" : "ing", apr_atomic_read32(&connection_count));
+    if (!dying) {
         int i;
 
+        dying = 1; /* once */
+
+        ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
+                     "XXX: closing");
+
         ap_close_listeners_ex(my_bucket->listeners);
-        *closed = 1; /* once */
 
-        dying = 1;
         ap_scoreboard_image->parent[ap_child_slot].quiescing = 1;
         for (i = 0; i < threads_per_child; ++i) {
             ap_update_child_status_from_indexes(ap_child_slot, i,
@@ -1654,8 +1652,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_
     proc_info *ti = dummy;
     int process_slot = ti->pslot;
     struct process_score *ps = ap_get_scoreboard_process(process_slot);
-    int closed = 0;
-    int have_idle_worker = 0;
+    int have_idle_worker = 0, exiting = 0;
     apr_time_t last_log;
 
     last_log = apr_time_now();
@@ -1678,8 +1675,8 @@ static void * APR_THREAD_FUNC listener_thread(apr_
         if (conns_this_child <= 0)
             check_infinite_requests();
 
-        if (listener_may_exit) {
-            int first_close = close_listeners(&closed);
+        if (exiting) {
+            int first_close = close_listeners();
 
             if (terminate_mode == ST_UNGRACEFUL
                 || apr_atomic_read32(&connection_count) == 0)
@@ -1710,7 +1707,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_
                              apr_atomic_read32(keepalive_q->total),
                              apr_atomic_read32(&lingering_count),
                              apr_atomic_read32(&suspended_count));
-                if (dying) {
+                if (exiting) {
                     ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
                                  "%u/%u workers shutdown",
                                  apr_atomic_read32(&threads_shutdown),
@@ -1792,6 +1789,10 @@ static void * APR_THREAD_FUNC listener_thread(apr_
             }
             num = 0;
         }
+        if (!exiting && apr_atomic_read32(&listener_may_exit)) {
+            ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
+                         "XXX: may exit (%d, %d)", rc, num);
+        }
 
         if (APLOGtrace7(ap_server_conf)) {
             now = apr_time_now();
@@ -1799,7 +1800,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_
                          "polled with num=%u exit=%d/%d conns=%d"
                          " queues_timeout=%" APR_TIME_T_FMT
                          " timers_timeout=%" APR_TIME_T_FMT,
-                         num, listener_may_exit, dying,
+                         num, listener_may_exit, exiting,
                          apr_atomic_read32(&connection_count),
                          queues_next_expiry - now, timers_next_expiry - now);
         }
@@ -1899,7 +1900,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_
                                  "Idle workers: %u",
                                  ap_queue_info_num_idlers(worker_queue_info));
                 }
-                else if (!listener_may_exit) {
+                else if (!exiting) {
                     void *csd = NULL;
                     ap_listen_rec *lr = (ap_listen_rec *) pt->baton;
                     apr_pool_t *ptrans;         /* Pool for per-transaction stuff */
@@ -1933,6 +1934,11 @@ static void * APR_THREAD_FUNC listener_thread(apr_
                         }
                     }
 
+                    if (apr_atomic_read32(&listener_may_exit)) {
+                        ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
+                                     "XXX: draining");
+                    }
+
                     get_worker(&have_idle_worker, 1, &workers_were_busy);
                     rc = lr->accept_func(&csd, lr, ptrans);
 
@@ -1960,6 +1966,16 @@ static void * APR_THREAD_FUNC listener_thread(apr_
             }               /* if:else on pt->type */
         } /* for processing poll */
 
+        /* On graceful stop/restart asked we can close the listening sockets
+         * since the backlog should be drained now.
+         */
+        if (apr_atomic_cas32(&listener_may_exit, 2, 1) == 1) {
+            ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
+                         "XXX: exiting");
+            exiting = 1;
+            continue;
+        }
+
         /* We process the timeout queues here only when the global
          * queues_next_expiry is passed. This happens accurately since
          * adding to the queues (in workers) can only decrease this expiry,
@@ -1979,7 +1995,7 @@ do_maintenance:
             queues_next_expiry = 0;
 
             /* Step 1: keepalive timeouts */
-            if (workers_were_busy || dying) {
+            if (workers_were_busy || exiting) {
                 process_keepalive_queue(0); /* kill'em all \m/ */
             }
             else {
@@ -1989,7 +2005,7 @@ do_maintenance:
             process_timeout_queue(write_completion_q, now,
                                   defer_lingering_close);
             /* Step 3: (normal) lingering close completion timeouts */
-            if (dying && linger_q->timeout > short_linger_q->timeout) {
+            if (exiting && linger_q->timeout > short_linger_q->timeout) {
                 /* Dying, force short timeout for normal lingering close */
                 linger_q->timeout = short_linger_q->timeout;
             }
@@ -2009,7 +2025,7 @@ do_maintenance:
             ps->suspended = apr_atomic_read32(&suspended_count);
             ps->lingering_close = apr_atomic_read32(&lingering_count);
         }
-        else if ((workers_were_busy || dying)
+        else if ((workers_were_busy || exiting)
                  && apr_atomic_read32(keepalive_q->total)) {
             apr_thread_mutex_lock(timeout_mutex);
             process_keepalive_queue(0); /* kill'em all \m/ */
@@ -2040,6 +2056,9 @@ do_maintenance:
         }
     } /* listener main loop */
 
+    ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
+                 "XXX: exited");
+
     ap_queue_term(worker_queue);
 
     apr_thread_exit(thd, APR_SUCCESS);
---------------------------------------------------------------------
To unsubscribe, e-mail: users-unsubscribe@xxxxxxxxxxxxxxxx
For additional commands, e-mail: users-help@xxxxxxxxxxxxxxxx

[Index of Archives]     [Open SSH Users]     [Linux ACPI]     [Linux Kernel]     [Linux Laptop]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Squid]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]

  Powered by Linux