Browse Source

Fix some cases where restarts bypassed the restart delay

Davin McCall 2 years ago
parent
commit
6ce305edef
7 changed files with 47 additions and 40 deletions
  1. 1 1
      src/Makefile
  2. 14 16
      src/baseproc-service.cc
  3. 0 7
      src/includes/proc-service.h
  4. 3 9
      src/includes/service.h
  5. 12 2
      src/proc-service.cc
  6. 2 5
      src/service.cc
  7. 15 0
      src/tests/proctests.cc

+ 1 - 1
src/Makefile

@@ -45,7 +45,7 @@ $(SHUTDOWNPREFIX)shutdown: shutdown.o
 $(objects): %.o: %.cc
 	$(CXX) $(CXXOPTS) -MMD -MP -Iincludes -I../dasynq/include -I../build/includes -c $< -o $@
 
-check: ../build/includes/mconfig.h
+check: ../build/includes/mconfig.h $(dinit_objects)
 	$(MAKE) -C tests check
 
 check-igr: dinit dinitctl dinitcheck

+ 14 - 16
src/baseproc-service.cc

@@ -34,6 +34,10 @@ bool base_process_service::bring_up() noexcept
         return false;
     }
 
+    if (in_auto_restart) {
+        return restart_ps_process();
+    }
+
     restart_interval_count = 0;
     if (start_ps_process(exec_arg_parts,
             onstart_flags.starts_on_console || onstart_flags.shares_console)) {
@@ -65,25 +69,20 @@ void base_process_service::handle_unexpected_termination() noexcept
     //    the usual start_ps_process (the usual bring-up).
     // But we need to issue a forced stop and process queues, to discover our eventual target
     // state (so we know whether we actually want to restart or not).
+    // Note we can't call forced_stop() directly here, because we need to set in_auto_restart in
+    // between do_stop() and processing queues (so that it is set correctly if restart occurs):
 
-    delay_start = true; // inhibit bring-up temporarily
-
-    forced_stop();
+    force_stop = true;
+    do_stop();
+    in_auto_restart = true;
     services->process_queues();
 
-    delay_start = false;
-
-    // It's possible we have no dependents or they stopped immediately, in which case we
-    // either are STOPPED or STARTING (i.e. restarting). Otherwise, let's stop immediately (and potentially
-    // restart immediately):
     if (get_state() == service_state_t::STOPPING) {
-        stopped();  // this might cause us to restart, i.e. state may be STARTING
-    }
-
-    // Finally, if we've returned to STARTING, that means we should restart
-    if (get_state() == service_state_t::STARTING) {
-        if (!restart_ps_process()) {
-            failed_to_start();
+        // We must be waiting for dependents;
+        // If we're going to restart, we can kick that off now:
+        if (get_target_state() == service_state_t::STARTED && !pinned_stopped) {
+            initiate_start();
+            services->process_queues();
         }
     }
 }
@@ -254,7 +253,6 @@ base_process_service::base_process_service(service_set *sset, string name,
 
     waiting_restart_timer = false;
     waiting_stopstart_timer = false;
-    delay_start = false;
     reserved_child_watch = false;
     tracking_child = false;
 }

+ 0 - 7
src/includes/proc-service.h

@@ -175,8 +175,6 @@ class base_process_service : public service_record
     bool waiting_restart_timer : 1;
     bool waiting_stopstart_timer : 1;
 
-    bool delay_start : 1; // delay bring-up in case of unexpected termination
-
     bool reserved_child_watch : 1;
     bool tracking_child : 1;  // whether we expect to see child process status
 
@@ -221,11 +219,6 @@ class base_process_service : public service_record
                 || service_record::can_interrupt_start();
     }
 
-    virtual bool can_proceed_to_start() noexcept override
-    {
-        return !waiting_restart_timer && !delay_start;
-    }
-
     virtual bool interrupt_start() noexcept override;
 
     void becoming_inactive() noexcept override;

+ 3 - 9
src/includes/service.h

@@ -252,6 +252,8 @@ class service_record
     bool start_failed : 1;      // failed to start (reset when begins starting)
     bool start_skipped : 1;     // start was skipped by interrupt
     
+    bool in_auto_restart : 1;
+
     int required_by = 0;        // number of dependents wanting this service to be started
 
     // list of dependencies
@@ -382,14 +384,6 @@ class service_record
 
     // Virtual functions, to be implemented by service implementations:
 
-    // Whether a STARTING service can transition to its STARTED state, once all
-    // dependencies have started. This should return false only if the start is interruptible
-    // at this point.
-    virtual bool can_proceed_to_start() noexcept
-    {
-        return true;
-    }
-
     // Do any post-dependency startup; return false on failure. Should return true if service
     // has started or is in the process of starting. Will only be called once can_proceed_to_start()
     // returns true.
@@ -423,7 +417,7 @@ class service_record
             waiting_for_console(false), have_console(false), waiting_for_execstat(false),
             start_explicit(false), prop_require(false), prop_release(false), prop_failure(false),
             prop_start(false), prop_stop(false), start_failed(false), start_skipped(false),
-            force_stop(false)
+            in_auto_restart(false), force_stop(false)
     {
         services = set;
         record_type = service_type_t::DUMMY;

+ 12 - 2
src/proc-service.cc

@@ -227,7 +227,12 @@ void process_service::handle_exit_status(bp_sys::exit_status exit_status) noexce
         if (waiting_stopstart_timer) {
             process_timer.stop_timer(event_loop);
         }
-        stopped();
+        if (!waiting_for_deps) {
+            stopped();
+        }
+        else if (get_target_state() == service_state_t::STARTED && !pinned_stopped) {
+            initiate_start();
+        }
     }
     else if (smooth_recovery && service_state == service_state_t::STARTED) {
         // unexpected termination, with smooth recovery
@@ -305,7 +310,12 @@ void bgproc_service::handle_exit_status(bp_sys::exit_status exit_status) noexcep
         if (service_state == service_state_t::STOPPING) {
             // Stop was issued during smooth recovery
             if ((did_exit && exit_status.get_exit_status() != 0) || was_signalled) {
-                stopped();
+                if (!waiting_for_deps) {
+                    stopped();
+                }
+                else if (get_target_state() == service_state_t::STARTED && !pinned_stopped) {
+                    initiate_start();
+                }
             }
             else {
                 // We need to re-read the PID, since it has now changed.

+ 2 - 5
src/service.cc

@@ -362,11 +362,6 @@ void service_record::all_deps_started() noexcept
 
     waiting_for_deps = false;
 
-    if (! can_proceed_to_start()) {
-        waiting_for_deps = true;
-        return;
-    }
-
     if (!bring_up()) {
         failed_to_start();
     }
@@ -562,6 +557,8 @@ void service_record::do_stop(bool with_restart) noexcept
 
     if (pinned_started) return;
 
+    in_auto_restart = false;
+
     // Will we restart? desired state of STOPPED inhibits auto-restart
     bool for_restart = with_restart || (auto_restart && desired_state == service_state_t::STARTED);
 

+ 15 - 0
src/tests/proctests.cc

@@ -206,6 +206,8 @@ void test_proc_term_restart()
     p.start();
     sset.process_queues();
 
+    pid_t first_pid = bp_sys::last_forked_pid;
+
     base_process_service_test::exec_succeeded(&p);
     sset.process_queues();
 
@@ -218,11 +220,15 @@ void test_proc_term_restart()
     // Starting, restart timer should be armed:
     assert(p.get_state() == service_state_t::STARTING);
     assert(event_loop.active_timers.size() == 1);
+    assert(bp_sys::last_forked_pid == first_pid);
 
     event_loop.advance_time(time_val(0, 200000000));
     assert(event_loop.active_timers.size() == 0);
 
     sset.process_queues();
+
+    assert(bp_sys::last_forked_pid == (first_pid + 1));
+
     base_process_service_test::exec_succeeded(&p);
     sset.process_queues();
 
@@ -232,6 +238,7 @@ void test_proc_term_restart()
     sset.remove_service(&p);
 }
 
+// Unexpected termination with restart, with dependent
 void test_proc_term_restart2()
 {
     using namespace std;
@@ -253,16 +260,20 @@ void test_proc_term_restart2()
 
     b.add_dep(&p, WAITS);
 
+    pid_t first_pid = bp_sys::last_forked_pid;
+
     b.start();
     sset.process_queues();
 
     assert(p.get_state() == service_state_t::STARTING);
+    assert(bp_sys::last_forked_pid == first_pid + 1);
 
     base_process_service_test::exec_succeeded(&p);
     sset.process_queues();
 
     assert(p.get_state() == service_state_t::STARTED);
     assert(event_loop.active_timers.size() == 0);
+    assert(bp_sys::last_forked_pid == first_pid + 1);
 
     // simulate process terminating, should then be restarted:
     base_process_service_test::handle_exit(&p, 0);
@@ -271,11 +282,14 @@ void test_proc_term_restart2()
     // Starting, restart timer should be armed:
     assert(p.get_state() == service_state_t::STARTING);
     assert(event_loop.active_timers.size() == 1);
+    assert(bp_sys::last_forked_pid == first_pid + 1);
 
     event_loop.advance_time(time_val(0, 200000000));
     assert(event_loop.active_timers.size() == 0);
 
     sset.process_queues();
+    assert(bp_sys::last_forked_pid == first_pid + 2);
+
     base_process_service_test::exec_succeeded(&p);
     sset.process_queues();
 
@@ -293,6 +307,7 @@ void test_proc_term_restart2()
     assert(p.get_state() == service_state_t::STOPPED);
     assert(event_loop.active_timers.size() == 0);
     assert(sset.count_active_services() == 1);
+    assert(bp_sys::last_forked_pid == first_pid + 2);
 
     // simulate terminate dinit
     sset.stop_all_services();