proc-service.cc 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957
  1. #include <cstring>
  2. #include <type_traits>
  3. #include <sys/un.h>
  4. #include <sys/socket.h>
  5. #include "dinit.h"
  6. #include "dinit-socket.h"
  7. #include "dinit-util.h"
  8. #include "dinit-log.h"
  9. #include "proc-service.h"
  10. /*
  11. * Most of the implementation for process-based services (process, scripted, bgprocess) is here.
  12. *
  13. * See proc-service.h header for interface details.
  14. */
  15. // Given a string and a list of pairs of (start,end) indices for each argument in that string,
  16. // store a null terminator for the argument. Return a `char *` vector containing the beginning
  17. // of each argument and a trailing nullptr. (The returned array is invalidated if the string is
  18. // later modified).
  19. std::vector<const char *> separate_args(ha_string &s,
  20. const std::list<std::pair<unsigned,unsigned>> &arg_indices)
  21. {
  22. std::vector<const char *> r;
  23. r.reserve(arg_indices.size() + 1);
  24. // First store nul terminator for each part:
  25. for (auto index_pair : arg_indices) {
  26. if (index_pair.second < s.length()) {
  27. s[index_pair.second] = 0;
  28. }
  29. }
  30. // Now we can get the C string (c_str) and store offsets into it:
  31. const char * cstr = s.c_str();
  32. for (auto index_pair : arg_indices) {
  33. r.push_back(cstr + index_pair.first);
  34. }
  35. r.push_back(nullptr);
  36. return r;
  37. }
  38. void process_service::exec_succeeded() noexcept
  39. {
  40. if (get_type() != service_type_t::PROCESS) {
  41. return;
  42. }
  43. tracking_child = true;
  44. // This could be a smooth recovery (state already STARTED). No need to do anything here in
  45. // that case. Otherwise, we are STARTING or STOPPING:
  46. if (get_state() == service_state_t::STARTING) {
  47. if (force_notification_fd != -1 || !notification_var.empty()) {
  48. // Wait for readiness notification:
  49. readiness_watcher.set_enabled(event_loop, true);
  50. }
  51. else {
  52. if (waiting_stopstart_timer) {
  53. process_timer.stop_timer(event_loop);
  54. waiting_stopstart_timer = false;
  55. }
  56. started();
  57. }
  58. }
  59. else if (get_state() == service_state_t::STARTED) {
  60. // Smooth recovery (is now complete, if we don't need readiness notification)
  61. if (waiting_stopstart_timer && notification_fd == -1) {
  62. process_timer.stop_timer(event_loop);
  63. waiting_stopstart_timer = false;
  64. }
  65. }
  66. else if (get_state() == service_state_t::STOPPING) {
  67. // stopping, but smooth recovery was in process. That's now over so we can
  68. // commence normal stop. Note that if pid == -1 the process already stopped,
  69. // that is correctly handled by bring_down().
  70. if (stop_check_dependents()) {
  71. bring_down();
  72. }
  73. }
  74. }
  75. void scripted_service::exec_succeeded() noexcept
  76. {
  77. // For a scripted service, this means nothing other than that the start/stop
  78. // script will now begin.
  79. }
  80. rearm exec_status_pipe_watcher::fd_event(eventloop_t &loop, int fd, int flags) noexcept
  81. {
  82. base_process_service *sr = service;
  83. sr->waiting_for_execstat = false;
  84. run_proc_err exec_status;
  85. int r = read(get_watched_fd(), &exec_status, sizeof(exec_status));
  86. deregister(loop);
  87. close(get_watched_fd());
  88. if (r > 0) {
  89. // We read an errno code; exec() failed, and the service startup failed.
  90. if (sr->pid != -1) {
  91. sr->child_listener.deregister(event_loop, sr->pid);
  92. sr->reserved_child_watch = false;
  93. if (sr->waiting_stopstart_timer) {
  94. sr->process_timer.stop_timer(loop);
  95. sr->waiting_stopstart_timer = false;
  96. }
  97. }
  98. sr->pid = -1;
  99. sr->exec_err_info = exec_status;
  100. sr->exec_failed(exec_status);
  101. }
  102. else {
  103. sr->exec_succeeded();
  104. if (sr->pid == -1) {
  105. // Somehow the process managed to complete before we even saw the exec() status.
  106. sr->handle_exit_status(sr->exit_status);
  107. }
  108. }
  109. sr->services->process_queues();
  110. return rearm::REMOVED;
  111. }
  112. rearm stop_status_pipe_watcher::fd_event(eventloop_t &loop, int fd, int flags) noexcept
  113. {
  114. process_service *sr = service;
  115. sr->waiting_for_execstat = false;
  116. run_proc_err exec_status;
  117. int r = read(get_watched_fd(), &exec_status, sizeof(exec_status));
  118. deregister(loop);
  119. close(get_watched_fd());
  120. if (r > 0) {
  121. // We read an errno code; exec() failed, and the service startup failed.
  122. if (sr->stop_pid != -1) {
  123. log(loglevel_t::ERROR, "Service ", sr->get_name(), ": could not fork for stop command: ",
  124. exec_stage_descriptions[static_cast<int>(exec_status.stage)], ": ",
  125. strerror(exec_status.st_errno));
  126. sr->stop_watcher.deregister(event_loop, sr->stop_pid);
  127. sr->reserved_child_watch = false;
  128. sr->stop_pid = -1;
  129. if (sr->pid != -1) {
  130. if (sr->term_signal != 0) {
  131. sr->kill_pg(sr->term_signal);
  132. }
  133. if (!sr->tracking_child) {
  134. sr->stop_issued = false;
  135. sr->stopped();
  136. }
  137. }
  138. }
  139. }
  140. else {
  141. // Nothing to do really but wait for termination - unless it's already happened, so let's
  142. // check that now:
  143. if (sr->stop_pid == -1) {
  144. sr->handle_stop_exit();
  145. }
  146. }
  147. sr->services->process_queues();
  148. return rearm::REMOVED;
  149. }
  150. rearm ready_notify_watcher::fd_event(eventloop_t &, int fd, int flags) noexcept
  151. {
  152. char buf[128];
  153. if (service->get_state() == service_state_t::STARTING) {
  154. // can we actually read anything from the notification pipe?
  155. int r = bp_sys::read(fd, buf, sizeof(buf));
  156. if (r > 0) {
  157. if (service->waiting_stopstart_timer) {
  158. service->process_timer.stop_timer(event_loop);
  159. service->waiting_stopstart_timer = false;
  160. }
  161. service->started();
  162. }
  163. else if (r == 0 || errno != EAGAIN) {
  164. if (service->waiting_stopstart_timer) {
  165. service->process_timer.stop_timer(event_loop);
  166. service->waiting_stopstart_timer = false;
  167. }
  168. service->set_state(service_state_t::STOPPING);
  169. service->failed_to_start(false, false);
  170. service->bring_down();
  171. }
  172. service->services->process_queues();
  173. }
  174. else {
  175. // Just keep consuming data from the pipe:
  176. int r = bp_sys::read(fd, buf, sizeof(buf));
  177. if (r == 0) {
  178. // Process closed write end or terminated
  179. close(fd);
  180. service->notification_fd = -1;
  181. return rearm::DISARM;
  182. }
  183. }
  184. return rearm::REARM;
  185. }
  186. dasynq::rearm service_child_watcher::status_change(eventloop_t &loop, pid_t child, int status) noexcept
  187. {
  188. base_process_service *sr = service;
  189. sr->pid = -1;
  190. sr->exit_status = bp_sys::exit_status(status);
  191. // Ok, for a process service, any process death which we didn't rig ourselves is a bit... unexpected.
  192. // Probably, the child died because we asked it to (sr->service_state == STOPPING). But even if we
  193. // didn't, there's not much we can do.
  194. // Must stop watch now since handle_exit_status might result in re-launch:
  195. // (stop_watch instead of deregister, so that we hold watch reservation).
  196. stop_watch(loop);
  197. if (sr->waiting_for_execstat) {
  198. // We still don't have an exec() status from the forked child, wait for that
  199. // before doing any further processing.
  200. return dasynq::rearm::NOOP; // hold watch reservation
  201. }
  202. if (sr->waiting_stopstart_timer) {
  203. sr->process_timer.stop_timer(loop);
  204. sr->waiting_stopstart_timer = false;
  205. }
  206. sr->handle_exit_status(bp_sys::exit_status(status));
  207. return dasynq::rearm::NOOP;
  208. }
  209. dasynq::rearm stop_child_watcher::status_change(eventloop_t &loop, pid_t child, int status) noexcept
  210. {
  211. process_service *sr = service;
  212. sr->stop_pid = -1;
  213. sr->stop_status = bp_sys::exit_status(status);
  214. stop_watch(loop);
  215. if (sr->waiting_for_execstat) {
  216. // no exec status yet, wait for that first
  217. return dasynq::rearm::NOOP;
  218. }
  219. sr->handle_stop_exit();
  220. sr->services->process_queues();
  221. return dasynq::rearm::NOOP;
  222. }
  223. rearm log_output_watcher::fd_event(eventloop_t &eloop, int fd, int flags) noexcept
  224. {
  225. // In case buffer size has been decreased, check if we are already at the limit:
  226. if (service->log_buf_size >= service->log_buf_max) {
  227. // If so, read and discard.
  228. char buf[1024];
  229. int r = bp_sys::read(fd, buf, 1024);
  230. if (r == -1) {
  231. if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) {
  232. return rearm::REARM;
  233. }
  234. goto bad_read;
  235. }
  236. if (r == 0) goto eof_read;
  237. return rearm::REARM;
  238. }
  239. {
  240. size_t max_read = std::max(service->log_buf_max / 8, 256u);
  241. max_read = std::min((unsigned)max_read, service->log_buf_max - service->log_buf_size);
  242. // ensure vector has size sufficient to read
  243. unsigned new_size = service->log_buf_size + max_read;
  244. if (!service->ensure_log_buffer_backing(new_size)) {
  245. return rearm::DISARM;
  246. }
  247. max_read = service->log_buffer.size() - service->log_buf_size;
  248. int r = bp_sys::read(fd, service->log_buffer.data() + service->log_buf_size, max_read);
  249. if (r == -1) {
  250. if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) {
  251. return rearm::REARM;
  252. }
  253. goto bad_read;
  254. }
  255. if (r == 0) goto eof_read;
  256. service->log_buf_size += r;
  257. return rearm::REARM;
  258. }
  259. // error/end-of-stream handling:
  260. bad_read:
  261. log(loglevel_t::WARN, "Service ", service->get_name(), " output not readable: ", strerror(errno));
  262. eof_read:
  263. deregister(eloop);
  264. close(fd);
  265. close(service->log_output_fd);
  266. service->log_input_fd = -1;
  267. service->log_output_fd = -1;
  268. return rearm::REMOVED;
  269. }
  270. void process_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
  271. {
  272. bool did_exit = exit_status.did_exit();
  273. bool was_signalled = exit_status.was_signalled();
  274. auto current_state = get_state();
  275. if (notification_fd != -1) {
  276. readiness_watcher.deregister(event_loop);
  277. bp_sys::close(notification_fd);
  278. notification_fd = -1;
  279. }
  280. if (!exit_status.did_exit_clean() && current_state != service_state_t::STOPPING) {
  281. if (did_exit) {
  282. log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
  283. exit_status.get_exit_status());
  284. }
  285. else if (was_signalled) {
  286. log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
  287. exit_status.get_term_sig());
  288. }
  289. }
  290. if (waiting_stopstart_timer) {
  291. process_timer.stop_timer(event_loop);
  292. waiting_stopstart_timer = false;
  293. if (current_state == service_state_t::STARTED) {
  294. // Must have been in smooth recovery and waiting for readiness notification.
  295. // Treat this the same as if we were STARTING:
  296. current_state = service_state_t::STARTING;
  297. }
  298. }
  299. #if USE_UTMPX
  300. if (*inittab_id || *inittab_line) {
  301. clear_utmp_entry(inittab_id, inittab_line);
  302. }
  303. #endif
  304. if (current_state == service_state_t::STARTING) {
  305. // If state is STARTING, we must be waiting for readiness notification; the process has
  306. // terminated before becoming ready.
  307. stop_reason = stopped_reason_t::FAILED;
  308. failed_to_start();
  309. }
  310. else if (current_state == service_state_t::STOPPING) {
  311. // We won't log a non-zero exit status or termination due to signal here -
  312. // we assume that the process died because we signalled it.
  313. if (waiting_stopstart_timer) {
  314. process_timer.stop_timer(event_loop);
  315. }
  316. if (!waiting_for_deps) {
  317. if (stop_pid == -1 && !waiting_for_execstat) {
  318. stop_issued = false; // reset for next time
  319. stopped();
  320. }
  321. }
  322. else if (get_target_state() == service_state_t::STARTED && !pinned_stopped) {
  323. initiate_start();
  324. }
  325. }
  326. else if (smooth_recovery && current_state == service_state_t::STARTED && check_restart()) {
  327. // unexpected termination, with smooth recovery
  328. doing_smooth_recovery = true;
  329. do_smooth_recovery();
  330. return;
  331. }
  332. else {
  333. handle_unexpected_termination();
  334. }
  335. services->process_queues();
  336. }
  337. void process_service::exec_failed(run_proc_err errcode) noexcept
  338. {
  339. log(loglevel_t::ERROR, get_name(), ": execution failed - ",
  340. exec_stage_descriptions[static_cast<int>(errcode.stage)], ": ", strerror(errcode.st_errno));
  341. if (waiting_stopstart_timer) {
  342. process_timer.stop_timer(event_loop);
  343. waiting_stopstart_timer = false;
  344. }
  345. if (notification_fd != -1) {
  346. readiness_watcher.deregister(event_loop);
  347. bp_sys::close(notification_fd);
  348. notification_fd = -1;
  349. }
  350. if (get_state() == service_state_t::STARTING) {
  351. stop_reason = stopped_reason_t::EXECFAILED;
  352. set_state(service_state_t::STOPPING);
  353. failed_to_start();
  354. }
  355. else {
  356. // Process service in smooth recovery:
  357. doing_smooth_recovery = false;
  358. stop_reason = stopped_reason_t::TERMINATED;
  359. unrecoverable_stop();
  360. }
  361. }
  362. void bgproc_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
  363. {
  364. // For bgproc services, receiving exit status can mean one of two things:
  365. // 1. We were launching the process, and it finished (possibly after forking). If it did fork
  366. // we want to obtain the process id of the process that we should now monitor, the actual
  367. // daemon. Or,
  368. // 2. The above has already happened, and we are monitoring the daemon process, which has now
  369. // terminated for some reason.
  370. begin:
  371. bool did_exit = exit_status.did_exit();
  372. bool was_signalled = exit_status.was_signalled();
  373. auto current_state = get_state();
  374. if (!exit_status.did_exit_clean() && current_state != service_state_t::STOPPING) {
  375. if (did_exit) {
  376. log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
  377. exit_status.get_exit_status());
  378. }
  379. else if (was_signalled) {
  380. log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
  381. exit_status.get_term_sig());
  382. }
  383. }
  384. if (waiting_stopstart_timer) {
  385. process_timer.stop_timer(event_loop);
  386. waiting_stopstart_timer = false;
  387. }
  388. if (doing_smooth_recovery) {
  389. doing_smooth_recovery = false;
  390. // We're either started, or stopping (i.e. we were requested to stop during smooth recovery).
  391. if (current_state == service_state_t::STOPPING) {
  392. // Stop was issued during smooth recovery
  393. if ((did_exit && exit_status.get_exit_status() != 0) || was_signalled) {
  394. if (!waiting_for_deps) {
  395. stopped();
  396. }
  397. else if (get_target_state() == service_state_t::STARTED && !pinned_stopped) {
  398. initiate_start();
  399. }
  400. }
  401. else {
  402. // We need to re-read the PID, since it has now changed.
  403. if (pid_file.length() != 0) {
  404. auto pid_result = read_pid_file(&exit_status);
  405. if (waiting_for_deps) {
  406. // don't do anything else until dependents have stopped
  407. return;
  408. }
  409. switch (pid_result) {
  410. case pid_result_t::FAILED:
  411. case pid_result_t::TERMINATED:
  412. // Failed startup: no auto-restart.
  413. stopped();
  414. break;
  415. case pid_result_t::OK:
  416. // We now need to bring down the daemon process
  417. bring_down();
  418. break;
  419. }
  420. }
  421. }
  422. services->process_queues();
  423. return;
  424. }
  425. else /* if (service_state == service_state_t::STARTED) */ {
  426. bool need_stop = false;
  427. if ((did_exit && exit_status.get_exit_status() != 0) || was_signalled) {
  428. need_stop = true;
  429. }
  430. else {
  431. // We need to re-read the PID, since it has now changed.
  432. if (pid_file.length() != 0) {
  433. auto pid_result = read_pid_file(&exit_status);
  434. switch (pid_result) {
  435. case pid_result_t::FAILED:
  436. // Failed startup: no auto-restart.
  437. need_stop = true;
  438. break;
  439. case pid_result_t::TERMINATED:
  440. goto begin;
  441. case pid_result_t::OK:
  442. break;
  443. }
  444. }
  445. }
  446. if (need_stop) {
  447. // Failed startup: no auto-restart.
  448. stop_reason = stopped_reason_t::TERMINATED;
  449. unrecoverable_stop();
  450. services->process_queues();
  451. }
  452. return;
  453. }
  454. }
  455. if (current_state == service_state_t::STARTING) {
  456. if (exit_status.did_exit_clean()) {
  457. auto pid_result = read_pid_file(&exit_status);
  458. switch (pid_result) {
  459. case pid_result_t::FAILED:
  460. // Failed startup: no auto-restart.
  461. stop_reason = stopped_reason_t::FAILED;
  462. failed_to_start();
  463. break;
  464. case pid_result_t::TERMINATED:
  465. // started, but immediately terminated
  466. started();
  467. goto begin;
  468. case pid_result_t::OK:
  469. started();
  470. break;
  471. }
  472. }
  473. else {
  474. stop_reason = stopped_reason_t::FAILED;
  475. failed_to_start();
  476. }
  477. }
  478. else if (current_state == service_state_t::STOPPING) {
  479. // We won't log a non-zero exit status or termination due to signal here -
  480. // we assume that the process died because we signalled it.
  481. if (stop_pid == -1 && !waiting_for_execstat) {
  482. stopped();
  483. }
  484. }
  485. else {
  486. // we must be STARTED
  487. if (smooth_recovery && get_target_state() == service_state_t::STARTED && check_restart()) {
  488. doing_smooth_recovery = true;
  489. do_smooth_recovery();
  490. if (get_state() != service_state_t::STARTED) {
  491. doing_smooth_recovery = false;
  492. }
  493. return;
  494. }
  495. handle_unexpected_termination();
  496. }
  497. services->process_queues();
  498. }
  499. void bgproc_service::exec_failed(run_proc_err errcode) noexcept
  500. {
  501. log(loglevel_t::ERROR, get_name(), ": execution failed - ",
  502. exec_stage_descriptions[static_cast<int>(errcode.stage)], ": ", strerror(errcode.st_errno));
  503. if (waiting_stopstart_timer) {
  504. process_timer.stop_timer(event_loop);
  505. waiting_stopstart_timer = false;
  506. }
  507. if (doing_smooth_recovery) {
  508. doing_smooth_recovery = false;
  509. stop_reason = stopped_reason_t::TERMINATED;
  510. unrecoverable_stop();
  511. }
  512. else {
  513. // Only time we execute is for startup:
  514. stop_reason = stopped_reason_t::EXECFAILED;
  515. set_state(service_state_t::STOPPING);
  516. failed_to_start();
  517. }
  518. }
  519. void scripted_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
  520. {
  521. bool did_exit = exit_status.did_exit();
  522. bool was_signalled = exit_status.was_signalled();
  523. auto current_state = get_state();
  524. // For a scripted service, a termination occurs in one of three main cases:
  525. // - the start script completed (or failed), when service was STARTING
  526. // - the start script was interrupted to cancel startup; state is STOPPING
  527. // - the stop script complete (or failed), state is STOPPING
  528. if (current_state == service_state_t::STOPPING) {
  529. // We might be running the stop script, or we might be running the start script and have issued
  530. // a cancel order via SIGINT:
  531. if (interrupting_start) {
  532. if (waiting_stopstart_timer) {
  533. process_timer.stop_timer(event_loop);
  534. waiting_stopstart_timer = false;
  535. }
  536. // We issued a start interrupt, so we expected this failure:
  537. if (did_exit && exit_status.get_exit_status() != 0) {
  538. log(loglevel_t::NOTICE, "Service ", get_name(), " start cancelled; exit code ",
  539. exit_status.get_exit_status());
  540. // Assume that a command terminating normally (with failure status) requires no cleanup:
  541. stopped();
  542. }
  543. else {
  544. if (was_signalled) {
  545. log(loglevel_t::NOTICE, "Service ", get_name(), " start cancelled from signal ",
  546. exit_status.get_term_sig());
  547. }
  548. // If the start script completed successfully, or was interrupted via our signal,
  549. // we want to run the stop script to clean up:
  550. bring_down();
  551. }
  552. interrupting_start = false;
  553. }
  554. else if (exit_status.did_exit_clean()) {
  555. // We were running the stop script and finished successfully
  556. stopped();
  557. }
  558. else {
  559. // ??? failed to stop! Let's log it as warning:
  560. if (did_exit) {
  561. log(loglevel_t::WARN, "Service ", get_name(), " stop command failed with exit code ",
  562. exit_status.get_exit_status());
  563. }
  564. else if (was_signalled) {
  565. log(loglevel_t::WARN, "Service ", get_name(), " stop command terminated due to signal ",
  566. exit_status.get_term_sig());
  567. }
  568. // Even if the stop script failed, assume that service is now stopped, so that any dependencies
  569. // can be stopped. There's not really any other useful course of action here.
  570. stopped();
  571. }
  572. services->process_queues();
  573. }
  574. else { // STARTING
  575. if (exit_status.did_exit_clean()) {
  576. started();
  577. }
  578. else if (was_signalled && exit_status.get_term_sig() == SIGINT && onstart_flags.skippable) {
  579. // A skippable service can be skipped by interrupting (eg by ^C if the service
  580. // starts on the console).
  581. start_skipped = true;
  582. started();
  583. }
  584. else {
  585. // failed to start
  586. if (did_exit) {
  587. log(loglevel_t::ERROR, "Service ", get_name(), " command failed with exit code ",
  588. exit_status.get_exit_status());
  589. }
  590. else if (was_signalled) {
  591. log(loglevel_t::ERROR, "Service ", get_name(), " command terminated due to signal ",
  592. exit_status.get_term_sig());
  593. }
  594. stop_reason = stopped_reason_t::FAILED;
  595. failed_to_start();
  596. }
  597. services->process_queues();
  598. }
  599. }
  600. void scripted_service::exec_failed(run_proc_err errcode) noexcept
  601. {
  602. log(loglevel_t::ERROR, get_name(), ": execution failed - ",
  603. exec_stage_descriptions[static_cast<int>(errcode.stage)], ": ", strerror(errcode.st_errno));
  604. auto current_state = get_state();
  605. if (current_state == service_state_t::STARTING) {
  606. stop_reason = stopped_reason_t::EXECFAILED;
  607. failed_to_start();
  608. }
  609. else if (current_state == service_state_t::STOPPING) {
  610. // We've logged the failure, but it's probably better not to leave the service in
  611. // STOPPING state:
  612. stopped();
  613. }
  614. }
  615. // Return a value as an unsigned-type value.
  616. template <typename T> typename std::make_unsigned<T>::type make_unsigned_val(T val)
  617. {
  618. return static_cast<typename std::make_unsigned<T>::type>(val);
  619. }
  620. bgproc_service::pid_result_t
  621. bgproc_service::read_pid_file(bp_sys::exit_status *exit_status) noexcept
  622. {
  623. const char *pid_file_c = pid_file.c_str();
  624. int fd = bp_sys::open(pid_file_c, O_CLOEXEC);
  625. if (fd == -1) {
  626. log(loglevel_t::ERROR, get_name(), ": read pid file: ", strerror(errno));
  627. return pid_result_t::FAILED;
  628. }
  629. char pidbuf[21]; // just enough to hold any 64-bit integer
  630. int r = complete_read(fd, pidbuf, 20);
  631. if (r < 0) {
  632. // Could not read from PID file
  633. log(loglevel_t::ERROR, get_name(), ": could not read from pidfile; ", strerror(errno));
  634. bp_sys::close(fd);
  635. return pid_result_t::FAILED;
  636. }
  637. bp_sys::close(fd);
  638. pidbuf[r] = 0; // store nul terminator
  639. bool valid_pid = false;
  640. try {
  641. unsigned long long v = std::strtoull(pidbuf, nullptr, 0);
  642. if (v <= make_unsigned_val(std::numeric_limits<pid_t>::max())) {
  643. pid = (pid_t) v;
  644. valid_pid = true;
  645. }
  646. }
  647. catch (std::out_of_range &exc) {
  648. // Too large?
  649. }
  650. catch (std::invalid_argument &exc) {
  651. // Ok, so it doesn't look like a number: proceed...
  652. }
  653. if (valid_pid) {
  654. pid_t wait_r = waitpid(pid, exit_status, WNOHANG);
  655. if (wait_r == -1 && errno == ECHILD) {
  656. // We can't track this child - check process exists:
  657. if (bp_sys::kill(pid, 0) == 0 || errno != ESRCH) {
  658. tracking_child = false;
  659. return pid_result_t::OK;
  660. }
  661. else {
  662. log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
  663. pid = -1;
  664. return pid_result_t::FAILED;
  665. }
  666. }
  667. else if (wait_r == pid) {
  668. pid = -1;
  669. return pid_result_t::TERMINATED;
  670. }
  671. else if (wait_r == 0) {
  672. // We can track the child
  673. child_listener.add_reserved(event_loop, pid, dasynq::DEFAULT_PRIORITY - 10);
  674. tracking_child = true;
  675. reserved_child_watch = true;
  676. return pid_result_t::OK;
  677. }
  678. }
  679. log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
  680. pid = -1;
  681. return pid_result_t::FAILED;
  682. }
  683. void process_service::bring_down() noexcept
  684. {
  685. if (stop_pid != -1 || stop_issued) {
  686. // waiting for stop command to complete (or for process to die after it has complete);
  687. // can't do anything here.
  688. return;
  689. }
  690. if (waiting_for_execstat) {
  691. // The process is still starting. This should be uncommon, but can occur during
  692. // smooth recovery (or it may mean the stop command process is still starting). We can't
  693. // do much now; we have to wait until we get the status, and then act appropriately.
  694. return;
  695. }
  696. else if (pid != -1) {
  697. // The process is still kicking on - must actually kill it.
  698. if (!stop_command.empty() && !stop_issued) {
  699. if (start_stop_process(stop_arg_parts)) {
  700. goto arm_timer;
  701. }
  702. // stop-command failed, need to try something else:
  703. if (term_signal != 0) {
  704. kill_pg(term_signal);
  705. }
  706. else {
  707. kill_pg(SIGKILL);
  708. }
  709. }
  710. else if (term_signal != 0) {
  711. // We signal the process group (-pid) rather than just the process as there's less
  712. // risk then of creating an orphaned process group:
  713. kill_pg(term_signal);
  714. }
  715. if (stop_pid == -1 && !tracking_child) {
  716. // If we have no way of tracking when the child terminates, assume stopped now
  717. stopped();
  718. return;
  719. }
  720. arm_timer:
  721. stop_issued = true; // (don't try again)
  722. // If there's a stop timeout, arm the timer now:
  723. if (stop_timeout != time_val(0,0)) {
  724. process_timer.arm_timer_rel(event_loop, stop_timeout);
  725. waiting_stopstart_timer = true;
  726. }
  727. // The rest is done in handle_exit_status.
  728. }
  729. else {
  730. // The process is already dead (possibly, we are in smooth recovery waiting for timer)
  731. doing_smooth_recovery = false;
  732. if (waiting_restart_timer) {
  733. process_timer.stop_timer(event_loop);
  734. waiting_restart_timer = false;
  735. }
  736. stopped();
  737. }
  738. }
  739. void process_service::kill_with_fire() noexcept
  740. {
  741. base_process_service::kill_with_fire();
  742. if (stop_pid != -1) {
  743. log(loglevel_t::WARN, "Service ", get_name(), " stop command, with pid ", pid,
  744. ", exceeded allowed stop time; killing.");
  745. pid_t pgid = bp_sys::getpgid(stop_pid);
  746. if (pgid == -1) {
  747. // On OpenBSD, not allowed to query pgid of a process in another session, but in that
  748. // case we know the group anyway:
  749. pgid = stop_pid;
  750. }
  751. bp_sys::kill(-pgid, SIGKILL);
  752. }
  753. }
  754. void scripted_service::bring_down() noexcept
  755. {
  756. if (pid != -1) {
  757. // We're already running the stop script; nothing to do.
  758. return;
  759. }
  760. if (stop_command.length() == 0) {
  761. stopped();
  762. }
  763. else if (! start_ps_process(stop_arg_parts, false)) {
  764. // Couldn't execute stop script, but there's not much we can do:
  765. stopped();
  766. }
  767. else {
  768. // successfully started stop script: start kill timer:
  769. if (stop_timeout != time_val(0,0)) {
  770. process_timer.arm_timer_rel(event_loop, stop_timeout);
  771. waiting_stopstart_timer = true;
  772. }
  773. }
  774. }
  775. dasynq::rearm process_restart_timer::timer_expiry(eventloop_t &, int expiry_count)
  776. {
  777. service->timer_expired();
  778. // Leave the timer disabled, or, if it has been reset by any processing above, leave it armed:
  779. return dasynq::rearm::NOOP;
  780. }
  781. bool process_service::start_stop_process(const std::vector<const char *> &cmd) noexcept
  782. {
  783. // In general, you can't tell whether fork/exec is successful. We use a pipe to communicate
  784. // success/failure from the child to the parent. The pipe is set CLOEXEC so a successful
  785. // exec closes the pipe, and the parent sees EOF. If the exec is unsuccessful, the errno
  786. // is written to the pipe, and the parent can read it.
  787. int pipefd[2];
  788. if (bp_sys::pipe2(pipefd, O_CLOEXEC)) {
  789. log(loglevel_t::ERROR, get_name(), ": can't create status check pipe (for stop command): ",
  790. strerror(errno));
  791. return false;
  792. }
  793. const char * logfile = this->logfile.c_str();
  794. if (*logfile == 0) {
  795. logfile = "/dev/null";
  796. }
  797. bool child_status_registered = false;
  798. // Set up complete, now fork and exec:
  799. pid_t forkpid;
  800. try {
  801. stop_pipe_watcher.add_watch(event_loop, pipefd[0], dasynq::IN_EVENTS);
  802. child_status_registered = true;
  803. // We specify a high priority (i.e. low priority value) so that process termination is
  804. // handled early. This means we have always recorded that the process is terminated by the
  805. // time that we handle events that might otherwise cause us to signal the process, so we
  806. // avoid sending a signal to an invalid (and possibly recycled) process ID.
  807. forkpid = stop_watcher.fork(event_loop, reserved_stop_watch, dasynq::DEFAULT_PRIORITY - 10);
  808. reserved_stop_watch = true;
  809. }
  810. catch (std::exception &e) {
  811. log(loglevel_t::ERROR, get_name(), ": could not fork (for stop command): ", e.what());
  812. goto out_cs_h;
  813. }
  814. if (forkpid == 0) {
  815. close(pipefd[0]);
  816. const char * working_dir_c = service_dsc_dir;
  817. if (! working_dir.empty()) working_dir_c = working_dir.c_str();
  818. run_proc_params run_params{cmd.data(), working_dir_c, logfile, pipefd[1], run_as_uid, run_as_gid, rlimits};
  819. run_params.on_console = false;
  820. run_params.in_foreground = false;
  821. run_params.csfd = -1;
  822. run_params.socket_fd = socket_fd;
  823. run_params.notify_fd = -1;
  824. run_params.force_notify_fd = -1;
  825. run_params.notify_var = nullptr;
  826. run_params.env_file = env_file.c_str();
  827. #if SUPPORT_CGROUPS
  828. run_params.run_in_cgroup = run_in_cgroup.c_str();
  829. #endif
  830. run_child_proc(run_params);
  831. }
  832. else {
  833. // Parent process
  834. stop_pid = forkpid;
  835. bp_sys::close(pipefd[1]); // close the 'other end' fd
  836. waiting_for_execstat = true;
  837. return true;
  838. }
  839. // Failure exit:
  840. out_cs_h:
  841. if (child_status_registered) {
  842. stop_pipe_watcher.deregister(event_loop);
  843. }
  844. bp_sys::close(pipefd[0]);
  845. bp_sys::close(pipefd[1]);
  846. return false;
  847. }