123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638 |
- #include <cstring>
- #include <type_traits>
- #include <sys/un.h>
- #include <sys/socket.h>
- #include "dinit.h"
- #include "dinit-socket.h"
- #include "dinit-util.h"
- #include "dinit-log.h"
- #include "proc-service.h"
- /*
- * Most of the implementation for process-based services (process, scripted, bgprocess) is here.
- *
- * See proc-service.h header for interface details.
- */
- // Strings describing the execution stages (failure points).
- const char * const exec_stage_descriptions[static_cast<int>(exec_stage::DO_EXEC) + 1] = {
- "arranging file descriptors", // ARRANGE_FDS
- "reading environment file", // READ_ENV_FILE
- "setting environment variable", // SET_NOTIFYFD_VAR
- "setting up activation socket", // SETUP_ACTIVATION_SOCKET
- "setting up control socket", // SETUP_CONTROL_SOCKET
- "changing directory", // CHDIR
- "setting up standard input/output descriptors", // SETUP_STDINOUTERR
- "setting resource limits", // SET_RLIMITS
- "setting user/group ID", // SET_UIDGID
- "executing command" // DO_EXEC
- };
- // Given a string and a list of pairs of (start,end) indices for each argument in that string,
- // store a null terminator for the argument. Return a `char *` vector containing the beginning
- // of each argument and a trailing nullptr. (The returned array is invalidated if the string is
- // later modified).
- std::vector<const char *> separate_args(std::string &s,
- const std::list<std::pair<unsigned,unsigned>> &arg_indices)
- {
- std::vector<const char *> r;
- r.reserve(arg_indices.size() + 1);
- // First store nul terminator for each part:
- for (auto index_pair : arg_indices) {
- if (index_pair.second < s.length()) {
- s[index_pair.second] = 0;
- }
- }
- // Now we can get the C string (c_str) and store offsets into it:
- const char * cstr = s.c_str();
- for (auto index_pair : arg_indices) {
- r.push_back(cstr + index_pair.first);
- }
- r.push_back(nullptr);
- return r;
- }
- void process_service::exec_succeeded() noexcept
- {
- // This could be a smooth recovery (state already STARTED). Even more, the process
- // might be stopped (and killed via a signal) during smooth recovery. We don't to
- // process startup again in either case, so we check for state STARTING:
- if (get_state() == service_state_t::STARTING) {
- if (force_notification_fd != -1 || !notification_var.empty()) {
- // Wait for readiness notification:
- readiness_watcher.set_enabled(event_loop, true);
- }
- else {
- started();
- }
- }
- else if (get_state() == service_state_t::STOPPING) {
- // stopping, but smooth recovery was in process. That's now over so we can
- // commence normal stop. Note that if pid == -1 the process already stopped(!),
- // that's handled below.
- if (pid != -1 && stop_check_dependents()) {
- bring_down();
- }
- }
- }
- void scripted_service::exec_succeeded() noexcept
- {
- // For a scripted service, this means nothing other than that the start/stop
- // script will now begin.
- }
- rearm exec_status_pipe_watcher::fd_event(eventloop_t &loop, int fd, int flags) noexcept
- {
- base_process_service *sr = service;
- sr->waiting_for_execstat = false;
- run_proc_err exec_status;
- int r = read(get_watched_fd(), &exec_status, sizeof(exec_status));
- deregister(loop);
- close(get_watched_fd());
- if (r > 0) {
- // We read an errno code; exec() failed, and the service startup failed.
- if (sr->pid != -1) {
- sr->child_listener.deregister(event_loop, sr->pid);
- sr->reserved_child_watch = false;
- if (sr->stop_timer_armed) {
- sr->restart_timer.stop_timer(loop);
- sr->stop_timer_armed = false;
- }
- }
- sr->pid = -1;
- sr->exec_failed(exec_status);
- }
- else {
- sr->exec_succeeded();
- if (sr->pid == -1) {
- // Somehow the process managed to complete before we even saw the exec() status.
- sr->handle_exit_status(sr->exit_status);
- }
- }
- sr->services->process_queues();
- return rearm::REMOVED;
- }
- rearm ready_notify_watcher::fd_event(eventloop_t &, int fd, int flags) noexcept
- {
- char buf[128];
- if (service->get_state() == service_state_t::STARTING) {
- // can we actually read anything from the notification pipe?
- int r = bp_sys::read(fd, buf, sizeof(buf));
- if (r > 0) {
- service->started();
- }
- else if (r == 0 || errno != EAGAIN) {
- service->failed_to_start(false, false);
- service->set_state(service_state_t::STOPPING);
- service->bring_down();
- }
- }
- else {
- // Just keep consuming data from the pipe:
- int r = bp_sys::read(fd, buf, sizeof(buf));
- if (r == 0) {
- // Process closed write end or terminated
- close(fd);
- service->notification_fd = -1;
- return rearm::DISARM;
- }
- }
- service->services->process_queues();
- return rearm::REARM;
- }
- dasynq::rearm service_child_watcher::status_change(eventloop_t &loop, pid_t child, int status) noexcept
- {
- base_process_service *sr = service;
- sr->pid = -1;
- sr->exit_status = bp_sys::exit_status(status);
- // Ok, for a process service, any process death which we didn't rig ourselves is a bit... unexpected.
- // Probably, the child died because we asked it to (sr->service_state == STOPPING). But even if we
- // didn't, there's not much we can do.
- if (sr->waiting_for_execstat) {
- // We still don't have an exec() status from the forked child, wait for that
- // before doing any further processing.
- return dasynq::rearm::NOOP; // hold watch reservation
- }
- // Must stop watch now since handle_exit_status might result in re-launch:
- // (stop_watch instead of deregister, so that we hold watch reservation).
- stop_watch(loop);
- if (sr->stop_timer_armed) {
- sr->restart_timer.stop_timer(loop);
- sr->stop_timer_armed = false;
- }
- sr->handle_exit_status(bp_sys::exit_status(status));
- return dasynq::rearm::NOOP;
- }
- void process_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
- {
- bool did_exit = exit_status.did_exit();
- bool was_signalled = exit_status.was_signalled();
- auto service_state = get_state();
- if (notification_fd != -1) {
- readiness_watcher.deregister(event_loop);
- bp_sys::close(notification_fd);
- notification_fd = -1;
- }
- if (!exit_status.did_exit_clean() && service_state != service_state_t::STOPPING) {
- if (did_exit) {
- log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
- exit_status.get_exit_status());
- }
- else if (was_signalled) {
- log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
- exit_status.get_term_sig());
- }
- }
- #if USE_UTMPX
- if (*inittab_id || *inittab_line) {
- clear_utmp_entry(inittab_id, inittab_line);
- }
- #endif
- if (service_state == service_state_t::STARTING) {
- // If state is STARTING, we must be waiting for readiness notification; the process has
- // terminated before becoming ready.
- stop_reason = stopped_reason_t::FAILED;
- failed_to_start();
- }
- else if (service_state == service_state_t::STOPPING) {
- // We won't log a non-zero exit status or termination due to signal here -
- // we assume that the process died because we signalled it.
- if (stop_timer_armed) {
- restart_timer.stop_timer(event_loop);
- }
- stopped();
- }
- else if (smooth_recovery && service_state == service_state_t::STARTED
- && get_target_state() == service_state_t::STARTED) {
- do_smooth_recovery();
- return;
- }
- else {
- stop_reason = stopped_reason_t::TERMINATED;
- emergency_stop();
- }
- services->process_queues();
- }
- void process_service::exec_failed(run_proc_err errcode) noexcept
- {
- log(loglevel_t::ERROR, get_name(), ": execution failed - ",
- exec_stage_descriptions[static_cast<int>(errcode.stage)], strerror(errcode.st_errno));
- if (notification_fd != -1) {
- readiness_watcher.deregister(event_loop);
- bp_sys::close(notification_fd);
- notification_fd = -1;
- }
- if (get_state() == service_state_t::STARTING) {
- stop_reason = stopped_reason_t::EXECFAILED;
- failed_to_start();
- }
- else {
- // Process service in smooth recovery:
- stop_reason = stopped_reason_t::TERMINATED;
- emergency_stop();
- }
- }
- void bgproc_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
- {
- begin:
- bool did_exit = exit_status.did_exit();
- bool was_signalled = exit_status.was_signalled();
- auto service_state = get_state();
- if (!exit_status.did_exit_clean() && service_state != service_state_t::STOPPING) {
- if (did_exit) {
- log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
- exit_status.get_exit_status());
- }
- else if (was_signalled) {
- log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
- exit_status.get_term_sig());
- }
- }
- // This may be a "smooth recovery" where we are restarting the process while leaving the
- // service in the STARTED state.
- if (restarting && service_state == service_state_t::STARTED) {
- //restarting = false;
- bool need_stop = false;
- if ((did_exit && exit_status.get_exit_status() != 0) || was_signalled) {
- need_stop = true;
- }
- else {
- // We need to re-read the PID, since it has now changed.
- if (pid_file.length() != 0) {
- auto pid_result = read_pid_file(&exit_status);
- switch (pid_result) {
- case pid_result_t::FAILED:
- // Failed startup: no auto-restart.
- need_stop = true;
- break;
- case pid_result_t::TERMINATED:
- goto begin;
- case pid_result_t::OK:
- break;
- }
- }
- }
- if (need_stop) {
- // Failed startup: no auto-restart.
- stop_reason = stopped_reason_t::TERMINATED;
- emergency_stop();
- services->process_queues();
- }
- return;
- }
- //restarting = false;
- if (service_state == service_state_t::STARTING) {
- // POSIX requires that if the process exited clearly with a status code of 0,
- // the exit status value will be 0:
- if (exit_status.did_exit_clean()) {
- auto pid_result = read_pid_file(&exit_status);
- switch (pid_result) {
- case pid_result_t::FAILED:
- // Failed startup: no auto-restart.
- stop_reason = stopped_reason_t::FAILED;
- failed_to_start();
- break;
- case pid_result_t::TERMINATED:
- // started, but immediately terminated
- started();
- goto begin;
- case pid_result_t::OK:
- started();
- break;
- }
- }
- else {
- stop_reason = stopped_reason_t::FAILED;
- failed_to_start();
- }
- }
- else if (service_state == service_state_t::STOPPING) {
- // We won't log a non-zero exit status or termination due to signal here -
- // we assume that the process died because we signalled it.
- stopped();
- }
- else {
- // we must be STARTED
- if (smooth_recovery && get_target_state() == service_state_t::STARTED) {
- do_smooth_recovery();
- return;
- }
- stop_reason = stopped_reason_t::TERMINATED;
- forced_stop();
- stop_dependents();
- stopped();
- }
- services->process_queues();
- }
- void bgproc_service::exec_failed(run_proc_err errcode) noexcept
- {
- log(loglevel_t::ERROR, get_name(), ": execution failed - ",
- exec_stage_descriptions[static_cast<int>(errcode.stage)], strerror(errcode.st_errno));
- // Only time we execute is for startup:
- stop_reason = stopped_reason_t::EXECFAILED;
- failed_to_start();
- }
- void scripted_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
- {
- bool did_exit = exit_status.did_exit();
- bool was_signalled = exit_status.was_signalled();
- auto service_state = get_state();
- // For a scripted service, a termination occurs in one of three main cases:
- // - the start script completed (or failed), when service was STARTING
- // - the start script was interrupted to cancel startup; state is STOPPING
- // - the stop script complete (or failed), state is STOPPING
- if (service_state == service_state_t::STOPPING) {
- // We might be running the stop script, or we might be running the start script and have issued
- // a cancel order via SIGINT:
- if (interrupting_start) {
- if (stop_timer_armed) {
- restart_timer.stop_timer(event_loop);
- stop_timer_armed = false;
- }
- // We issued a start interrupt, so we expected this failure:
- if (did_exit && exit_status.get_exit_status() != 0) {
- log(loglevel_t::INFO, "Service ", get_name(), " start cancelled; exit code ",
- exit_status.get_exit_status());
- // Assume that a command terminating normally (with failure status) requires no cleanup:
- stopped();
- }
- else {
- if (was_signalled) {
- log(loglevel_t::INFO, "Service ", get_name(), " start cancelled from signal ",
- exit_status.get_term_sig());
- }
- // If the start script completed successfully, or was interrupted via our signal,
- // we want to run the stop script to clean up:
- bring_down();
- }
- interrupting_start = false;
- }
- else if (exit_status.did_exit_clean()) {
- // We were running the stop script and finished successfully
- stopped();
- }
- else {
- // ??? failed to stop! Let's log it as warning:
- if (did_exit) {
- log(loglevel_t::WARN, "Service ", get_name(), " stop command failed with exit code ",
- exit_status.get_exit_status());
- }
- else if (was_signalled) {
- log(loglevel_t::WARN, "Service ", get_name(), " stop command terminated due to signal ",
- exit_status.get_term_sig());
- }
- // Even if the stop script failed, assume that service is now stopped, so that any dependencies
- // can be stopped. There's not really any other useful course of action here.
- stopped();
- }
- services->process_queues();
- }
- else { // STARTING
- if (exit_status.did_exit_clean()) {
- started();
- }
- else if (was_signalled && exit_status.get_term_sig() == SIGINT && onstart_flags.skippable) {
- // A skippable service can be skipped by interrupting (eg by ^C if the service
- // starts on the console).
- start_skipped = true;
- started();
- }
- else {
- // failed to start
- if (did_exit) {
- log(loglevel_t::ERROR, "Service ", get_name(), " command failed with exit code ",
- exit_status.get_exit_status());
- }
- else if (was_signalled) {
- log(loglevel_t::ERROR, "Service ", get_name(), " command terminated due to signal ",
- exit_status.get_term_sig());
- }
- stop_reason = stopped_reason_t::FAILED;
- failed_to_start();
- }
- services->process_queues();
- }
- }
- void scripted_service::exec_failed(run_proc_err errcode) noexcept
- {
- log(loglevel_t::ERROR, get_name(), ": execution failed - ",
- exec_stage_descriptions[static_cast<int>(errcode.stage)], strerror(errcode.st_errno));
- auto service_state = get_state();
- if (service_state == service_state_t::STARTING) {
- stop_reason = stopped_reason_t::EXECFAILED;
- failed_to_start();
- }
- else if (service_state == service_state_t::STOPPING) {
- // We've logged the failure, but it's probably better not to leave the service in
- // STOPPING state:
- stopped();
- }
- }
- // Return a value as an unsigned-type value.
- template <typename T> typename std::make_unsigned<T>::type make_unsigned_val(T val)
- {
- return static_cast<typename std::make_unsigned<T>::type>(val);
- }
- bgproc_service::pid_result_t
- bgproc_service::read_pid_file(bp_sys::exit_status *exit_status) noexcept
- {
- const char *pid_file_c = pid_file.c_str();
- int fd = open(pid_file_c, O_CLOEXEC);
- if (fd == -1) {
- log(loglevel_t::ERROR, get_name(), ": read pid file: ", strerror(errno));
- return pid_result_t::FAILED;
- }
- char pidbuf[21]; // just enough to hold any 64-bit integer
- int r = complete_read(fd, pidbuf, 20);
- if (r < 0) {
- // Could not read from PID file
- log(loglevel_t::ERROR, get_name(), ": could not read from pidfile; ", strerror(errno));
- close(fd);
- return pid_result_t::FAILED;
- }
- close(fd);
- pidbuf[r] = 0; // store nul terminator
- bool valid_pid = false;
- try {
- unsigned long long v = std::stoull(pidbuf, nullptr, 0);
- if (v <= make_unsigned_val(std::numeric_limits<pid_t>::max())) {
- pid = (pid_t) v;
- valid_pid = true;
- }
- }
- catch (std::out_of_range &exc) {
- // Too large?
- }
- catch (std::invalid_argument &exc) {
- // Ok, so it doesn't look like a number: proceed...
- }
- if (valid_pid) {
- pid_t wait_r = waitpid(pid, exit_status, WNOHANG);
- if (wait_r == -1 && errno == ECHILD) {
- // We can't track this child - check process exists:
- if (kill(pid, 0) == 0 || errno != ESRCH) {
- tracking_child = false;
- return pid_result_t::OK;
- }
- else {
- log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
- pid = -1;
- return pid_result_t::FAILED;
- }
- }
- else if (wait_r == pid) {
- pid = -1;
- return pid_result_t::TERMINATED;
- }
- else if (wait_r == 0) {
- // We can track the child
- child_listener.add_reserved(event_loop, pid, dasynq::DEFAULT_PRIORITY - 10);
- tracking_child = true;
- reserved_child_watch = true;
- return pid_result_t::OK;
- }
- }
- log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
- pid = -1;
- return pid_result_t::FAILED;
- }
- void process_service::bring_down() noexcept
- {
- if (waiting_for_execstat) {
- // The process is still starting. This should be uncommon, but can occur during
- // smooth recovery. We can't do much now; we have to wait until we get the
- // status, and then act appropriately.
- return;
- }
- else if (pid != -1) {
- // The process is still kicking on - must actually kill it. We signal the process
- // group (-pid) rather than just the process as there's less risk then of creating
- // an orphaned process group:
- if (! onstart_flags.no_sigterm) {
- kill_pg(SIGTERM);
- }
- if (term_signal != -1) {
- kill_pg(term_signal);
- }
- // If there's a stop timeout, arm the timer now:
- if (stop_timeout != time_val(0,0)) {
- restart_timer.arm_timer_rel(event_loop, stop_timeout);
- stop_timer_armed = true;
- }
- // The rest is done in handle_exit_status.
- }
- else {
- // The process is already dead.
- stopped();
- }
- }
- void bgproc_service::bring_down() noexcept
- {
- if (pid != -1) {
- // The process is still kicking on - must actually kill it. We signal the process
- // group (-pid) rather than just the process as there's less risk then of creating
- // an orphaned process group:
- if (! onstart_flags.no_sigterm) {
- kill_pg(SIGTERM);
- }
- if (term_signal != -1) {
- kill_pg(term_signal);
- }
- // In most cases, the rest is done in handle_exit_status.
- // If we are a BGPROCESS and the process is not our immediate child, however, that
- // won't work - check for this now:
- if (! tracking_child) {
- stopped();
- }
- else if (stop_timeout != time_val(0,0)) {
- restart_timer.arm_timer_rel(event_loop, stop_timeout);
- stop_timer_armed = true;
- }
- }
- else {
- // The process is already dead.
- stopped();
- }
- }
- void scripted_service::bring_down() noexcept
- {
- if (pid != -1) {
- // We're already running the stop script; nothing to do.
- return;
- }
- if (stop_command.length() == 0) {
- stopped();
- }
- else if (! start_ps_process(stop_arg_parts, false)) {
- // Couldn't execute stop script, but there's not much we can do:
- stopped();
- }
- else {
- // successfully started stop script: start kill timer:
- if (stop_timeout != time_val(0,0)) {
- restart_timer.arm_timer_rel(event_loop, stop_timeout);
- stop_timer_armed = true;
- }
- }
- }
- dasynq::rearm process_restart_timer::timer_expiry(eventloop_t &, int expiry_count)
- {
- service->timer_expired();
- // Leave the timer disabled, or, if it has been reset by any processing above, leave it armed:
- return dasynq::rearm::NOOP;
- }
|