123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576 |
- #include <cstring>
- #include <cstdlib>
- #include <sys/un.h>
- #include <sys/socket.h>
- #include <sys/types.h>
- #include <sys/stat.h>
- #include <unistd.h>
- #include "dinit.h"
- #include "dinit-log.h"
- #include "dinit-socket.h"
- #include "proc-service.h"
- #include "baseproc-sys.h"
- /*
- * Base process implementation (base_process_service).
- *
- * See proc-service.h for interface documentation.
- */
- void base_process_service::do_smooth_recovery() noexcept
- {
- if (!restart_ps_process()) {
- unrecoverable_stop();
- services->process_queues();
- }
- }
- bool base_process_service::bring_up() noexcept
- {
- if (!open_socket()) {
- return false;
- }
- bool start_success;
- if (in_auto_restart) {
- start_success = restart_ps_process();
- }
- else {
- restart_interval_count = 0;
- start_success = start_ps_process(exec_arg_parts,
- onstart_flags.starts_on_console || onstart_flags.shares_console);
- // start_ps_process updates last_start_time, use it also for restart_interval_time:
- restart_interval_time = last_start_time;
- // Arm start timer. (For restarts, this is only done once the restart interval expires).
- if (start_success) {
- if (start_timeout != time_val(0,0)) {
- process_timer.arm_timer_rel(event_loop, start_timeout);
- waiting_stopstart_timer = true;
- }
- else if (waiting_stopstart_timer) {
- process_timer.stop_timer(event_loop);
- waiting_stopstart_timer = false;
- }
- }
- }
- return start_success;
- }
- void base_process_service::handle_unexpected_termination() noexcept
- {
- // unexpected termination, with possible restart
- stop_reason = stopped_reason_t::TERMINATED;
- // We want to circumvent the normal process of waiting for dependents to stop before we
- // attempt to restart, for two reasons:
- // 1) we can restart more quickly
- // 2) we can use the restart rate-limiting logic from restart_ps_process rather than
- // the usual start_ps_process (the usual bring-up).
- // But we need to issue a forced stop and process queues, to discover our eventual target
- // state (so we know whether we actually want to restart or not).
- // Note we can't call forced_stop() directly here, because we need to set in_auto_restart in
- // between do_stop() and processing queues (so that it is set correctly if restart occurs):
- force_stop = true;
- do_stop();
- services->process_queues();
- if (get_state() == service_state_t::STOPPING) {
- // We must be waiting for dependents;
- // If we're going to restart, we can kick that off now:
- if (get_target_state() == service_state_t::STARTED && !pinned_stopped) {
- initiate_start();
- services->process_queues();
- }
- }
- }
- bool base_process_service::start_ps_process(const std::vector<const char *> &cmd, bool on_console) noexcept
- {
- // In general, you can't tell whether fork/exec is successful. We use a pipe to communicate
- // success/failure from the child to the parent. The pipe is set CLOEXEC so a successful
- // exec closes the pipe, and the parent sees EOF. If the exec is unsuccessful, the errno
- // is written to the pipe, and the parent can read it.
- event_loop.get_time(last_start_time, clock_type::MONOTONIC);
- int pipefd[2];
- if (bp_sys::pipe2(pipefd, O_CLOEXEC)) {
- log(loglevel_t::ERROR, get_name(), ": can't create status check pipe: ", strerror(errno));
- return false;
- }
- int input_fd;
- if (!get_input_fd(&input_fd)) {
- return false;
- }
- const char * logfile = this->logfile.c_str();
- if (this->log_type == log_type_id::LOGFILE || this->log_type == log_type_id::NONE) {
- // Note: if log_type == NONE, logfile should be empty.
- if (*logfile == 0) {
- logfile = "/dev/null";
- }
- }
- else /* log_type_id::BUFFER or ::PIPE */ {
- if (this->log_output_fd == -1) {
- int logfd[2];
- // Note: we set CLOEXEC on the file descriptors here; when the output file descriptor is dup'd
- // to stdout, this will be effectively removed for the output end
- if (bp_sys::pipe2(logfd, O_CLOEXEC)) {
- log(loglevel_t::ERROR, get_name(), ": can't create output pipe: ", strerror(errno));
- goto out_p;
- }
- this->log_input_fd = logfd[0];
- this->log_output_fd = logfd[1];
- if (this->log_type == log_type_id::BUFFER) {
- try {
- this->log_output_listener.add_watch(event_loop, logfd[0], dasynq::IN_EVENTS,
- false /* not enabled */);
- }
- catch (...) {
- log(loglevel_t::ERROR, get_name(), ": can't add output watch (insufficient resources)");
- bp_sys::close(this->log_input_fd);
- bp_sys::close(this->log_output_fd);
- this->log_input_fd = -1;
- this->log_output_fd = -1;
- goto out_p;
- }
- }
- }
- // (More is done below, after we have performed additional setup)
- }
- {
- bool child_status_registered = false;
- control_conn_t *control_conn = nullptr;
- int control_socket[2] = {-1, -1};
- int notify_pipe[2] = {-1, -1};
- bool have_notify = !notification_var.empty() || force_notification_fd != -1;
- ready_notify_watcher * rwatcher = have_notify ? get_ready_watcher() : nullptr;
- bool ready_watcher_registered = false;
- if (onstart_flags.pass_cs_fd) {
- if (dinit_socketpair(AF_UNIX, SOCK_STREAM, /* protocol */ 0, control_socket, SOCK_NONBLOCK)) {
- log(loglevel_t::ERROR, get_name(), ": can't create control socket: ", strerror(errno));
- goto out_lfd;
- }
- // Make the server side socket close-on-exec:
- int fdflags = bp_sys::fcntl(control_socket[0], F_GETFD);
- bp_sys::fcntl(control_socket[0], F_SETFD, fdflags | FD_CLOEXEC);
- try {
- control_conn = new control_conn_t(event_loop, services, control_socket[0]);
- }
- catch (std::exception &exc) {
- log(loglevel_t::ERROR, get_name(), ": can't launch process; out of memory");
- goto out_cs;
- }
- }
- if (have_notify) {
- // Create a notification pipe:
- if (bp_sys::pipe2(notify_pipe, 0) != 0) {
- log(loglevel_t::ERROR, get_name(), ": can't create notification pipe: ", strerror(errno));
- goto out_cs_h;
- }
- // Set the read side as close-on-exec:
- int fdflags = bp_sys::fcntl(notify_pipe[0], F_GETFD);
- bp_sys::fcntl(notify_pipe[0], F_SETFD, fdflags | FD_CLOEXEC);
- // add, but don't yet enable, readiness watcher:
- try {
- rwatcher->add_watch(event_loop, notify_pipe[0], dasynq::IN_EVENTS, false);
- ready_watcher_registered = true;
- }
- catch (std::exception &exc) {
- log(loglevel_t::ERROR, get_name(), ": can't add notification watch: ", exc.what());
- goto out_cs_h;
- }
- }
- if (log_type == log_type_id::BUFFER) {
- // Set watcher enabled if space in buffer
- if (log_buf_size > 0) {
- // Append a "restarted" message to buffer contents
- const char *restarting_msg = "\n(dinit: note: service restarted)\n";
- unsigned restarting_msg_len = strlen(restarting_msg);
- bool trailing_nl = log_buffer[log_buf_size - 1] == '\n';
- if (trailing_nl) {
- ++restarting_msg; // trim leading newline
- --restarting_msg_len;
- }
- if (log_buf_size + restarting_msg_len >= log_buf_max) {
- goto skip_enable_log_watch;
- }
- if (!ensure_log_buffer_backing(log_buf_size + restarting_msg_len)) {
- goto skip_enable_log_watch;
- }
- memcpy(log_buffer.data() + log_buf_size, restarting_msg, restarting_msg_len);
- log_buf_size += restarting_msg_len;
- }
- log_output_listener.set_enabled(event_loop, true);
- }
- skip_enable_log_watch: ;
- // Set up complete, now fork and exec:
- pid_t forkpid;
- try {
- child_status_listener.add_watch(event_loop, pipefd[0], dasynq::IN_EVENTS);
- child_status_registered = true;
- // We specify a high priority (i.e. low priority value) so that process termination is
- // handled early. This means we have always recorded that the process is terminated by the
- // time that we handle events that might otherwise cause us to signal the process, so we
- // avoid sending a signal to an invalid (and possibly recycled) process ID.
- forkpid = child_listener.fork(event_loop, reserved_child_watch, dasynq::DEFAULT_PRIORITY - 10);
- reserved_child_watch = true;
- }
- catch (std::exception &e) {
- log(loglevel_t::ERROR, get_name(), ": could not fork: ", e.what());
- goto out_cs_h;
- }
- if (forkpid == 0) {
- const char * working_dir_c = working_dir_def;
- if (!working_dir.empty()) working_dir_c = working_dir.c_str();
- after_fork(getpid());
- run_proc_params run_params{cmd.data(), working_dir_c, logfile, pipefd[1], run_as_uid, run_as_gid, rlimits};
- run_params.on_console = on_console;
- run_params.in_foreground = !onstart_flags.shares_console;
- run_params.unmask_sigint = onstart_flags.unmask_intr;
- run_params.csfd = control_socket[1];
- run_params.socket_fd = socket_fd;
- run_params.notify_fd = notify_pipe[1];
- run_params.force_notify_fd = force_notification_fd;
- run_params.notify_var = notification_var.c_str();
- run_params.env_file = env_file.c_str();
- run_params.output_fd = log_output_fd;
- run_params.input_fd = input_fd;
- #if SUPPORT_CGROUPS
- run_params.run_in_cgroup = run_in_cgroup.c_str();
- #endif
- run_child_proc(run_params);
- }
- else {
- // Parent process
- pid = forkpid;
- bp_sys::close(pipefd[1]); // close the 'other end' fd
- if (control_socket[1] != -1) bp_sys::close(control_socket[1]);
- if (notify_pipe[1] != -1) bp_sys::close(notify_pipe[1]);
- notification_fd = notify_pipe[0];
- waiting_for_execstat = true;
- return true;
- }
- // Failure exit:
- out_cs_h:
- if (child_status_registered) {
- child_status_listener.deregister(event_loop);
- }
- if (notify_pipe[0] != -1) bp_sys::close(notify_pipe[0]);
- if (notify_pipe[1] != -1) bp_sys::close(notify_pipe[1]);
- if (ready_watcher_registered) {
- rwatcher->deregister(event_loop);
- }
- if (onstart_flags.pass_cs_fd) {
- delete control_conn;
- out_cs:
- bp_sys::close(control_socket[0]);
- bp_sys::close(control_socket[1]);
- }
- }
- out_lfd:
- if (log_input_fd != -1) {
- log_output_listener.deregister(event_loop);
- bp_sys::close(log_input_fd);
- bp_sys::close(log_output_fd);
- log_input_fd = -1;
- log_output_fd = -1;
- }
- out_p:
- bp_sys::close(pipefd[0]);
- bp_sys::close(pipefd[1]);
- return false;
- }
- base_process_service::base_process_service(service_set *sset, string name,
- service_type_t service_type_p, ha_string &&command,
- const std::list<std::pair<unsigned,unsigned>> &command_offsets,
- const std::list<prelim_dep> &deplist_p)
- : service_record(sset, name, service_type_p, deplist_p), child_listener(this),
- child_status_listener(this), process_timer(this), log_output_listener(this)
- {
- program_name = std::move(command);
- exec_arg_parts = separate_args(program_name, command_offsets);
- restart_interval_count = 0;
- restart_interval_time = {0, 0};
- process_timer.service = this;
- process_timer.add_timer(event_loop);
- // By default, allow a maximum of 3 restarts within 10.0 seconds:
- restart_interval.seconds() = 10;
- restart_interval.nseconds() = 0;
- max_restart_interval_count = 3;
- waiting_restart_timer = false;
- waiting_stopstart_timer = false;
- reserved_child_watch = false;
- tracking_child = false;
- }
- void base_process_service::do_restart() noexcept
- {
- // Actually perform process restart. We may be in smooth recovery (state = STARTED) or this may
- // be a regular restart.
- waiting_restart_timer = false;
- auto service_state = get_state();
- if (!start_ps_process(exec_arg_parts, have_console || onstart_flags.shares_console)) {
- if (service_state == service_state_t::STARTING) {
- failed_to_start();
- }
- else {
- // smooth recovery failure
- unrecoverable_stop();
- }
- services->process_queues();
- }
- else {
- // started process successfully (at least as far as fork)
- if (start_timeout != time_val(0,0)) {
- process_timer.arm_timer_rel(event_loop, start_timeout);
- waiting_stopstart_timer = true;
- }
- }
- }
- bool base_process_service::restart_ps_process() noexcept
- {
- using time_val = dasynq::time_val;
- time_val current_time;
- event_loop.get_time(current_time, clock_type::MONOTONIC);
- // Check if enough time has lapsed since the previous restart. If not, start a timer:
- time_val tdiff = current_time - last_start_time;
- if (restart_delay <= tdiff) {
- // > restart delay (normally 200ms)
- do_restart();
- }
- else {
- time_val timeout = restart_delay - tdiff;
- process_timer.arm_timer_rel(event_loop, timeout);
- waiting_restart_timer = true;
- }
- return true;
- }
- bool base_process_service::interrupt_start() noexcept
- {
- if (waiting_restart_timer) {
- process_timer.stop_timer(event_loop);
- waiting_restart_timer = false;
- return service_record::interrupt_start();
- }
- else {
- log(loglevel_t::WARN, "Interrupting start of service ", get_name(), " with pid ", pid,
- " (with SIGINT).");
- kill_pg(SIGINT);
- if (stop_timeout != time_val(0,0)) {
- process_timer.arm_timer_rel(event_loop, stop_timeout);
- waiting_stopstart_timer = true;
- }
- else if (waiting_stopstart_timer) {
- process_timer.stop_timer(event_loop);
- waiting_stopstart_timer = false;
- }
- set_state(service_state_t::STOPPING);
- return false;
- }
- }
- void base_process_service::kill_with_fire() noexcept
- {
- if (pid != -1) {
- log(loglevel_t::WARN, "Service ", get_name(), " with pid ", pid,
- " exceeded allowed stop time; killing.");
- kill_pg(SIGKILL);
- }
- }
- void base_process_service::kill_pg(int signo) noexcept
- {
- if (onstart_flags.signal_process_only) {
- bp_sys::kill(pid, signo);
- }
- else {
- pid_t pgid = bp_sys::getpgid(pid);
- if (pgid == -1) {
- // On some OSes (eg OpenBSD) we aren't allowed to get the pgid of a process in a different
- // session. If the process is in a different session, however, it must be a process group
- // leader and the pgid must equal the process id.
- pgid = pid;
- }
- bp_sys::kill(-pgid, signo);
- }
- }
- void base_process_service::timer_expired() noexcept
- {
- waiting_stopstart_timer = false;
- // Timer expires if:
- // We are stopping, including after having startup cancelled (stop timeout, state is STOPPING); We are
- // starting (start timeout, state is STARTING); We are waiting for restart timer before restarting,
- // including smooth recovery (restart timeout, state is STARTING or STARTED).
- if (get_state() == service_state_t::STOPPING) {
- kill_with_fire();
- }
- else if (pid != -1) {
- // Starting, start timed out.
- log(loglevel_t::WARN, "Service ", get_name(), " with pid ", pid,
- " exceeded allowed start time; cancelling.");
- interrupt_start();
- stop_reason = stopped_reason_t::TIMEDOUT;
- failed_to_start(false, false);
- }
- else {
- // STARTING / STARTED, and we have no pid: must be restarting (smooth recovery if STARTED)
- do_restart();
- }
- }
- void base_process_service::becoming_inactive() noexcept
- {
- if (socket_fd != -1) {
- close(socket_fd);
- socket_fd = -1;
- }
- }
- bool base_process_service::open_socket() noexcept
- {
- if (socket_path.empty() || socket_fd != -1) {
- // No socket, or already open
- return true;
- }
- const char * saddrname = socket_path.c_str();
- // Check the specified socket path
- struct stat stat_buf;
- if (stat(saddrname, &stat_buf) == 0) {
- if ((stat_buf.st_mode & S_IFSOCK) == 0) {
- // Not a socket
- log(loglevel_t::ERROR, get_name(), ": activation socket file exists (and is not a socket)");
- return false;
- }
- }
- else if (errno != ENOENT) {
- // Other error
- log(loglevel_t::ERROR, get_name(), ": error checking activation socket: ", strerror(errno));
- return false;
- }
- // Remove stale socket file (if it exists).
- // We won't test the return from unlink - if it fails other than due to ENOENT, we should get an
- // error when we try to create the socket anyway.
- unlink(saddrname);
- uint sockaddr_size = offsetof(struct sockaddr_un, sun_path) + socket_path.length() + 1;
- struct sockaddr_un * name = static_cast<sockaddr_un *>(malloc(sockaddr_size));
- if (name == nullptr) {
- log(loglevel_t::ERROR, get_name(), ": opening activation socket: out of memory");
- return false;
- }
- name->sun_family = AF_UNIX;
- strcpy(name->sun_path, saddrname);
- int sockfd = dinit_socket(AF_UNIX, SOCK_STREAM, 0, SOCK_NONBLOCK | SOCK_CLOEXEC);
- if (sockfd == -1) {
- log(loglevel_t::ERROR, get_name(), ": error creating activation socket: ", strerror(errno));
- free(name);
- return false;
- }
- if (bind(sockfd, (struct sockaddr *) name, sockaddr_size) == -1) {
- log(loglevel_t::ERROR, get_name(), ": error binding activation socket: ", strerror(errno));
- close(sockfd);
- free(name);
- return false;
- }
- free(name);
- // POSIX (1003.1, 2013) says that fchown and fchmod don't necessarily work on sockets. We have to
- // use chown and chmod instead.
- if (chown(saddrname, socket_uid, socket_gid)) {
- log(loglevel_t::ERROR, get_name(), ": error setting activation socket owner/group: ",
- strerror(errno));
- close(sockfd);
- return false;
- }
- if (chmod(saddrname, socket_perms) == -1) {
- log(loglevel_t::ERROR, get_name(), ": Error setting activation socket permissions: ",
- strerror(errno));
- close(sockfd);
- return false;
- }
- if (listen(sockfd, 128) == -1) { // 128 "seems reasonable".
- log(loglevel_t::ERROR, ": error listening on activation socket: ", strerror(errno));
- close(sockfd);
- return false;
- }
- socket_fd = sockfd;
- return true;
- }
- bool base_process_service::ensure_log_buffer_backing(unsigned new_size) noexcept
- {
- // Note: we manage capacity manually to avoid it exceeding maximum
- if (log_buffer.size() < new_size) {
- if (log_buffer.capacity() < new_size) {
- try {
- unsigned new_capacity = std::max((unsigned)log_buffer.capacity() * 2, new_size);
- new_capacity = std::min(new_capacity, log_buf_max);
- log_buffer.reserve(new_capacity);
- log_buffer.resize(new_capacity);
- }
- catch (std::bad_alloc &badalloc) {
- log(loglevel_t::WARN, get_name(), ": cannot increase log buffer; out-of-memory");
- return false;
- }
- }
- else {
- log_buffer.resize(new_size);
- }
- }
- return true;
- }
|