proc-service.cc 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957
  1. #include <cstring>
  2. #include <type_traits>
  3. #include <sys/un.h>
  4. #include <sys/socket.h>
  5. #include "dinit.h"
  6. #include "dinit-socket.h"
  7. #include "dinit-util.h"
  8. #include "dinit-log.h"
  9. #include "proc-service.h"
  10. /*
  11. * Most of the implementation for process-based services (process, scripted, bgprocess) is here.
  12. *
  13. * See proc-service.h header for interface details.
  14. */
  15. // Given a string and a list of pairs of (start,end) indices for each argument in that string,
  16. // store a null terminator for the argument. Return a `char *` vector containing the beginning
  17. // of each argument and a trailing nullptr. (The returned array is invalidated if the string is
  18. // later modified).
  19. std::vector<const char *> separate_args(ha_string &s,
  20. const std::list<std::pair<unsigned,unsigned>> &arg_indices)
  21. {
  22. std::vector<const char *> r;
  23. r.reserve(arg_indices.size() + 1);
  24. // First store nul terminator for each part:
  25. for (auto index_pair : arg_indices) {
  26. if (index_pair.second < s.length()) {
  27. s[index_pair.second] = 0;
  28. }
  29. }
  30. // Now we can get the C string (c_str) and store offsets into it:
  31. const char * cstr = s.c_str();
  32. for (auto index_pair : arg_indices) {
  33. r.push_back(cstr + index_pair.first);
  34. }
  35. r.push_back(nullptr);
  36. return r;
  37. }
  38. void process_service::exec_succeeded() noexcept
  39. {
  40. if (get_type() != service_type_t::PROCESS) {
  41. return;
  42. }
  43. tracking_child = true;
  44. // This could be a smooth recovery (state already STARTED). No need to do anything here in
  45. // that case. Otherwise, we are STARTING or STOPPING:
  46. if (get_state() == service_state_t::STARTING) {
  47. if (force_notification_fd != -1 || !notification_var.empty()) {
  48. // Wait for readiness notification:
  49. readiness_watcher.set_enabled(event_loop, true);
  50. }
  51. else {
  52. if (waiting_stopstart_timer) {
  53. process_timer.stop_timer(event_loop);
  54. waiting_stopstart_timer = false;
  55. }
  56. started();
  57. }
  58. }
  59. else if (get_state() == service_state_t::STARTED) {
  60. // Smooth recovery (is now complete)
  61. if (waiting_stopstart_timer) {
  62. process_timer.stop_timer(event_loop);
  63. waiting_stopstart_timer = false;
  64. }
  65. }
  66. else if (get_state() == service_state_t::STOPPING) {
  67. // stopping, but smooth recovery was in process. That's now over so we can
  68. // commence normal stop. Note that if pid == -1 the process already stopped,
  69. // that is correctly handled by bring_down().
  70. if (stop_check_dependents()) {
  71. bring_down();
  72. }
  73. }
  74. }
  75. void scripted_service::exec_succeeded() noexcept
  76. {
  77. // For a scripted service, this means nothing other than that the start/stop
  78. // script will now begin.
  79. }
  80. rearm exec_status_pipe_watcher::fd_event(eventloop_t &loop, int fd, int flags) noexcept
  81. {
  82. base_process_service *sr = service;
  83. sr->waiting_for_execstat = false;
  84. run_proc_err exec_status;
  85. int r = read(get_watched_fd(), &exec_status, sizeof(exec_status));
  86. deregister(loop);
  87. close(get_watched_fd());
  88. if (r > 0) {
  89. // We read an errno code; exec() failed, and the service startup failed.
  90. if (sr->pid != -1) {
  91. sr->child_listener.deregister(event_loop, sr->pid);
  92. sr->reserved_child_watch = false;
  93. if (sr->waiting_stopstart_timer) {
  94. sr->process_timer.stop_timer(loop);
  95. sr->waiting_stopstart_timer = false;
  96. }
  97. }
  98. sr->pid = -1;
  99. sr->exec_err_info = exec_status;
  100. sr->exec_failed(exec_status);
  101. }
  102. else {
  103. sr->exec_succeeded();
  104. if (sr->pid == -1) {
  105. // Somehow the process managed to complete before we even saw the exec() status.
  106. sr->handle_exit_status(sr->exit_status);
  107. }
  108. }
  109. sr->services->process_queues();
  110. return rearm::REMOVED;
  111. }
  112. rearm stop_status_pipe_watcher::fd_event(eventloop_t &loop, int fd, int flags) noexcept
  113. {
  114. process_service *sr = service;
  115. sr->waiting_for_execstat = false;
  116. run_proc_err exec_status;
  117. int r = read(get_watched_fd(), &exec_status, sizeof(exec_status));
  118. deregister(loop);
  119. close(get_watched_fd());
  120. if (r > 0) {
  121. // We read an errno code; exec() failed, and the service startup failed.
  122. if (sr->stop_pid != -1) {
  123. log(loglevel_t::ERROR, "Service ", sr->get_name(), ": could not fork for stop command: ",
  124. exec_stage_descriptions[static_cast<int>(exec_status.stage)], ": ",
  125. strerror(exec_status.st_errno));
  126. sr->stop_watcher.deregister(event_loop, sr->stop_pid);
  127. sr->reserved_child_watch = false;
  128. sr->stop_pid = -1;
  129. if (sr->pid != -1) {
  130. if (sr->term_signal != 0) {
  131. sr->kill_pg(sr->term_signal);
  132. }
  133. if (!sr->tracking_child) {
  134. sr->stop_issued = false;
  135. sr->stopped();
  136. }
  137. }
  138. }
  139. }
  140. else {
  141. // Nothing to do really but wait for termination - unless it's already happened, so let's
  142. // check that now:
  143. if (sr->stop_pid == -1) {
  144. sr->handle_stop_exit();
  145. }
  146. }
  147. sr->services->process_queues();
  148. return rearm::REMOVED;
  149. }
  150. rearm ready_notify_watcher::fd_event(eventloop_t &, int fd, int flags) noexcept
  151. {
  152. char buf[128];
  153. if (service->get_state() == service_state_t::STARTING) {
  154. // can we actually read anything from the notification pipe?
  155. int r = bp_sys::read(fd, buf, sizeof(buf));
  156. if (r > 0) {
  157. if (service->waiting_stopstart_timer) {
  158. service->process_timer.stop_timer(event_loop);
  159. service->waiting_stopstart_timer = false;
  160. }
  161. service->started();
  162. }
  163. else if (r == 0 || errno != EAGAIN) {
  164. if (service->waiting_stopstart_timer) {
  165. service->process_timer.stop_timer(event_loop);
  166. service->waiting_stopstart_timer = false;
  167. }
  168. service->set_state(service_state_t::STOPPING);
  169. service->failed_to_start(false, false);
  170. service->bring_down();
  171. }
  172. service->services->process_queues();
  173. }
  174. else {
  175. // Just keep consuming data from the pipe:
  176. int r = bp_sys::read(fd, buf, sizeof(buf));
  177. if (r == 0) {
  178. // Process closed write end or terminated
  179. close(fd);
  180. service->notification_fd = -1;
  181. return rearm::DISARM;
  182. }
  183. }
  184. return rearm::REARM;
  185. }
  186. dasynq::rearm service_child_watcher::status_change(eventloop_t &loop, pid_t child, int status) noexcept
  187. {
  188. base_process_service *sr = service;
  189. sr->pid = -1;
  190. sr->exit_status = bp_sys::exit_status(status);
  191. // Ok, for a process service, any process death which we didn't rig ourselves is a bit... unexpected.
  192. // Probably, the child died because we asked it to (sr->service_state == STOPPING). But even if we
  193. // didn't, there's not much we can do.
  194. // Must stop watch now since handle_exit_status might result in re-launch:
  195. // (stop_watch instead of deregister, so that we hold watch reservation).
  196. stop_watch(loop);
  197. if (sr->waiting_for_execstat) {
  198. // We still don't have an exec() status from the forked child, wait for that
  199. // before doing any further processing.
  200. return dasynq::rearm::NOOP; // hold watch reservation
  201. }
  202. if (sr->waiting_stopstart_timer) {
  203. sr->process_timer.stop_timer(loop);
  204. sr->waiting_stopstart_timer = false;
  205. }
  206. sr->handle_exit_status(bp_sys::exit_status(status));
  207. return dasynq::rearm::NOOP;
  208. }
  209. dasynq::rearm stop_child_watcher::status_change(eventloop_t &loop, pid_t child, int status) noexcept
  210. {
  211. process_service *sr = service;
  212. sr->stop_pid = -1;
  213. sr->stop_status = bp_sys::exit_status(status);
  214. stop_watch(loop);
  215. if (sr->waiting_for_execstat) {
  216. // no exec status yet, wait for that first
  217. return dasynq::rearm::NOOP;
  218. }
  219. sr->handle_stop_exit();
  220. sr->services->process_queues();
  221. return dasynq::rearm::NOOP;
  222. }
  223. rearm log_output_watcher::fd_event(eventloop_t &eloop, int fd, int flags) noexcept
  224. {
  225. // In case buffer size has been decreased, check if we are already at the limit:
  226. if (service->log_buf_size >= service->log_buf_max) {
  227. // If so, read and discard.
  228. char buf[1024];
  229. int r = bp_sys::read(fd, buf, 1024);
  230. if (r == -1) {
  231. if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) {
  232. return rearm::REARM;
  233. }
  234. goto bad_read;
  235. }
  236. if (r == 0) goto eof_read;
  237. return rearm::REARM;
  238. }
  239. {
  240. size_t max_read = std::max(service->log_buf_max / 8, 256u);
  241. max_read = std::min((unsigned)max_read, service->log_buf_max - service->log_buf_size);
  242. // ensure vector has size sufficient to read
  243. unsigned new_size = service->log_buf_size + max_read;
  244. if (!service->ensure_log_buffer_backing(new_size)) {
  245. return rearm::DISARM;
  246. }
  247. max_read = service->log_buffer.size() - service->log_buf_size;
  248. int r = bp_sys::read(fd, service->log_buffer.data() + service->log_buf_size, max_read);
  249. if (r == -1) {
  250. if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) {
  251. return rearm::REARM;
  252. }
  253. goto bad_read;
  254. }
  255. if (r == 0) goto eof_read;
  256. service->log_buf_size += r;
  257. return rearm::REARM;
  258. }
  259. // error/end-of-stream handling:
  260. bad_read:
  261. log(loglevel_t::WARN, "Service ", service->get_name(), " output not readable: ", strerror(errno));
  262. eof_read:
  263. deregister(eloop);
  264. close(fd);
  265. close(service->log_output_fd);
  266. service->log_input_fd = -1;
  267. service->log_output_fd = -1;
  268. return rearm::REMOVED;
  269. }
  270. void process_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
  271. {
  272. bool did_exit = exit_status.did_exit();
  273. bool was_signalled = exit_status.was_signalled();
  274. auto service_state = get_state();
  275. if (notification_fd != -1) {
  276. readiness_watcher.deregister(event_loop);
  277. bp_sys::close(notification_fd);
  278. notification_fd = -1;
  279. }
  280. if (!exit_status.did_exit_clean() && service_state != service_state_t::STOPPING) {
  281. if (did_exit) {
  282. log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
  283. exit_status.get_exit_status());
  284. }
  285. else if (was_signalled) {
  286. log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
  287. exit_status.get_term_sig());
  288. }
  289. }
  290. if (waiting_stopstart_timer) {
  291. process_timer.stop_timer(event_loop);
  292. waiting_stopstart_timer = false;
  293. }
  294. #if USE_UTMPX
  295. if (*inittab_id || *inittab_line) {
  296. clear_utmp_entry(inittab_id, inittab_line);
  297. }
  298. #endif
  299. if (service_state == service_state_t::STARTING) {
  300. // If state is STARTING, we must be waiting for readiness notification; the process has
  301. // terminated before becoming ready.
  302. stop_reason = stopped_reason_t::FAILED;
  303. service_state = service_state_t::STOPPING;
  304. failed_to_start();
  305. }
  306. else if (service_state == service_state_t::STOPPING) {
  307. // We won't log a non-zero exit status or termination due to signal here -
  308. // we assume that the process died because we signalled it.
  309. if (waiting_stopstart_timer) {
  310. process_timer.stop_timer(event_loop);
  311. }
  312. if (!waiting_for_deps) {
  313. if (stop_pid == -1 && !waiting_for_execstat) {
  314. stop_issued = false; // reset for next time
  315. stopped();
  316. }
  317. }
  318. else if (get_target_state() == service_state_t::STARTED && !pinned_stopped) {
  319. initiate_start();
  320. }
  321. }
  322. else if (smooth_recovery && service_state == service_state_t::STARTED && check_restart()) {
  323. // unexpected termination, with smooth recovery
  324. doing_smooth_recovery = true;
  325. do_smooth_recovery();
  326. return;
  327. }
  328. else {
  329. handle_unexpected_termination();
  330. }
  331. services->process_queues();
  332. }
  333. void process_service::exec_failed(run_proc_err errcode) noexcept
  334. {
  335. log(loglevel_t::ERROR, get_name(), ": execution failed - ",
  336. exec_stage_descriptions[static_cast<int>(errcode.stage)], ": ", strerror(errcode.st_errno));
  337. if (waiting_stopstart_timer) {
  338. process_timer.stop_timer(event_loop);
  339. waiting_stopstart_timer = false;
  340. }
  341. if (notification_fd != -1) {
  342. readiness_watcher.deregister(event_loop);
  343. bp_sys::close(notification_fd);
  344. notification_fd = -1;
  345. }
  346. if (get_state() == service_state_t::STARTING) {
  347. stop_reason = stopped_reason_t::EXECFAILED;
  348. set_state(service_state_t::STOPPING);
  349. failed_to_start();
  350. }
  351. else {
  352. // Process service in smooth recovery:
  353. doing_smooth_recovery = false;
  354. stop_reason = stopped_reason_t::TERMINATED;
  355. unrecoverable_stop();
  356. }
  357. }
  358. void bgproc_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
  359. {
  360. // For bgproc services, receiving exit status can mean one of two things:
  361. // 1. We were launching the process, and it finished (possibly after forking). If it did fork
  362. // we want to obtain the process id of the process that we should now monitor, the actual
  363. // daemon. Or,
  364. // 2. The above has already happened, and we are monitoring the daemon process, which has now
  365. // terminated for some reason.
  366. begin:
  367. bool did_exit = exit_status.did_exit();
  368. bool was_signalled = exit_status.was_signalled();
  369. auto service_state = get_state();
  370. if (!exit_status.did_exit_clean() && service_state != service_state_t::STOPPING) {
  371. if (did_exit) {
  372. log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
  373. exit_status.get_exit_status());
  374. }
  375. else if (was_signalled) {
  376. log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
  377. exit_status.get_term_sig());
  378. }
  379. }
  380. if (waiting_stopstart_timer) {
  381. process_timer.stop_timer(event_loop);
  382. waiting_stopstart_timer = false;
  383. }
  384. if (doing_smooth_recovery) {
  385. doing_smooth_recovery = false;
  386. // We're either started, or stopping (i.e. we were requested to stop during smooth recovery).
  387. if (service_state == service_state_t::STOPPING) {
  388. // Stop was issued during smooth recovery
  389. if ((did_exit && exit_status.get_exit_status() != 0) || was_signalled) {
  390. if (!waiting_for_deps) {
  391. stopped();
  392. }
  393. else if (get_target_state() == service_state_t::STARTED && !pinned_stopped) {
  394. initiate_start();
  395. }
  396. }
  397. else {
  398. // We need to re-read the PID, since it has now changed.
  399. if (pid_file.length() != 0) {
  400. auto pid_result = read_pid_file(&exit_status);
  401. if (waiting_for_deps) {
  402. // don't do anything else until dependents have stopped
  403. return;
  404. }
  405. switch (pid_result) {
  406. case pid_result_t::FAILED:
  407. case pid_result_t::TERMINATED:
  408. // Failed startup: no auto-restart.
  409. stopped();
  410. break;
  411. case pid_result_t::OK:
  412. // We now need to bring down the daemon process
  413. bring_down();
  414. break;
  415. }
  416. }
  417. }
  418. services->process_queues();
  419. return;
  420. }
  421. else /* if (service_state == service_state_t::STARTED) */ {
  422. bool need_stop = false;
  423. if ((did_exit && exit_status.get_exit_status() != 0) || was_signalled) {
  424. need_stop = true;
  425. }
  426. else {
  427. // We need to re-read the PID, since it has now changed.
  428. if (pid_file.length() != 0) {
  429. auto pid_result = read_pid_file(&exit_status);
  430. switch (pid_result) {
  431. case pid_result_t::FAILED:
  432. // Failed startup: no auto-restart.
  433. need_stop = true;
  434. break;
  435. case pid_result_t::TERMINATED:
  436. goto begin;
  437. case pid_result_t::OK:
  438. break;
  439. }
  440. }
  441. }
  442. if (need_stop) {
  443. // Failed startup: no auto-restart.
  444. stop_reason = stopped_reason_t::TERMINATED;
  445. unrecoverable_stop();
  446. services->process_queues();
  447. }
  448. return;
  449. }
  450. }
  451. if (service_state == service_state_t::STARTING) {
  452. if (exit_status.did_exit_clean()) {
  453. auto pid_result = read_pid_file(&exit_status);
  454. switch (pid_result) {
  455. case pid_result_t::FAILED:
  456. // Failed startup: no auto-restart.
  457. stop_reason = stopped_reason_t::FAILED;
  458. service_state = service_state_t::STOPPING;
  459. failed_to_start();
  460. break;
  461. case pid_result_t::TERMINATED:
  462. // started, but immediately terminated
  463. started();
  464. goto begin;
  465. case pid_result_t::OK:
  466. started();
  467. break;
  468. }
  469. }
  470. else {
  471. stop_reason = stopped_reason_t::FAILED;
  472. service_state = service_state_t::STOPPING;
  473. failed_to_start();
  474. }
  475. }
  476. else if (service_state == service_state_t::STOPPING) {
  477. // We won't log a non-zero exit status or termination due to signal here -
  478. // we assume that the process died because we signalled it.
  479. if (stop_pid == -1 && !waiting_for_execstat) {
  480. stopped();
  481. }
  482. }
  483. else {
  484. // we must be STARTED
  485. if (smooth_recovery && get_target_state() == service_state_t::STARTED && check_restart()) {
  486. doing_smooth_recovery = true;
  487. do_smooth_recovery();
  488. if (get_state() != service_state_t::STARTED) {
  489. doing_smooth_recovery = false;
  490. }
  491. return;
  492. }
  493. handle_unexpected_termination();
  494. }
  495. services->process_queues();
  496. }
  497. void bgproc_service::exec_failed(run_proc_err errcode) noexcept
  498. {
  499. log(loglevel_t::ERROR, get_name(), ": execution failed - ",
  500. exec_stage_descriptions[static_cast<int>(errcode.stage)], ": ", strerror(errcode.st_errno));
  501. if (waiting_stopstart_timer) {
  502. process_timer.stop_timer(event_loop);
  503. waiting_stopstart_timer = false;
  504. }
  505. if (doing_smooth_recovery) {
  506. doing_smooth_recovery = false;
  507. stop_reason = stopped_reason_t::TERMINATED;
  508. unrecoverable_stop();
  509. }
  510. else {
  511. // Only time we execute is for startup:
  512. stop_reason = stopped_reason_t::EXECFAILED;
  513. set_state(service_state_t::STOPPING);
  514. failed_to_start();
  515. }
  516. }
  517. void scripted_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
  518. {
  519. bool did_exit = exit_status.did_exit();
  520. bool was_signalled = exit_status.was_signalled();
  521. auto service_state = get_state();
  522. // For a scripted service, a termination occurs in one of three main cases:
  523. // - the start script completed (or failed), when service was STARTING
  524. // - the start script was interrupted to cancel startup; state is STOPPING
  525. // - the stop script complete (or failed), state is STOPPING
  526. if (service_state == service_state_t::STOPPING) {
  527. // We might be running the stop script, or we might be running the start script and have issued
  528. // a cancel order via SIGINT:
  529. if (interrupting_start) {
  530. if (waiting_stopstart_timer) {
  531. process_timer.stop_timer(event_loop);
  532. waiting_stopstart_timer = false;
  533. }
  534. // We issued a start interrupt, so we expected this failure:
  535. if (did_exit && exit_status.get_exit_status() != 0) {
  536. log(loglevel_t::NOTICE, "Service ", get_name(), " start cancelled; exit code ",
  537. exit_status.get_exit_status());
  538. // Assume that a command terminating normally (with failure status) requires no cleanup:
  539. stopped();
  540. }
  541. else {
  542. if (was_signalled) {
  543. log(loglevel_t::NOTICE, "Service ", get_name(), " start cancelled from signal ",
  544. exit_status.get_term_sig());
  545. }
  546. // If the start script completed successfully, or was interrupted via our signal,
  547. // we want to run the stop script to clean up:
  548. bring_down();
  549. }
  550. interrupting_start = false;
  551. }
  552. else if (exit_status.did_exit_clean()) {
  553. // We were running the stop script and finished successfully
  554. stopped();
  555. }
  556. else {
  557. // ??? failed to stop! Let's log it as warning:
  558. if (did_exit) {
  559. log(loglevel_t::WARN, "Service ", get_name(), " stop command failed with exit code ",
  560. exit_status.get_exit_status());
  561. }
  562. else if (was_signalled) {
  563. log(loglevel_t::WARN, "Service ", get_name(), " stop command terminated due to signal ",
  564. exit_status.get_term_sig());
  565. }
  566. // Even if the stop script failed, assume that service is now stopped, so that any dependencies
  567. // can be stopped. There's not really any other useful course of action here.
  568. stopped();
  569. }
  570. services->process_queues();
  571. }
  572. else { // STARTING
  573. if (exit_status.did_exit_clean()) {
  574. started();
  575. }
  576. else if (was_signalled && exit_status.get_term_sig() == SIGINT && onstart_flags.skippable) {
  577. // A skippable service can be skipped by interrupting (eg by ^C if the service
  578. // starts on the console).
  579. start_skipped = true;
  580. started();
  581. }
  582. else {
  583. // failed to start
  584. if (did_exit) {
  585. log(loglevel_t::ERROR, "Service ", get_name(), " command failed with exit code ",
  586. exit_status.get_exit_status());
  587. }
  588. else if (was_signalled) {
  589. log(loglevel_t::ERROR, "Service ", get_name(), " command terminated due to signal ",
  590. exit_status.get_term_sig());
  591. }
  592. service_state = service_state_t::STOPPED;
  593. stop_reason = stopped_reason_t::FAILED;
  594. failed_to_start();
  595. }
  596. services->process_queues();
  597. }
  598. }
  599. void scripted_service::exec_failed(run_proc_err errcode) noexcept
  600. {
  601. log(loglevel_t::ERROR, get_name(), ": execution failed - ",
  602. exec_stage_descriptions[static_cast<int>(errcode.stage)], ": ", strerror(errcode.st_errno));
  603. auto service_state = get_state();
  604. if (service_state == service_state_t::STARTING) {
  605. stop_reason = stopped_reason_t::EXECFAILED;
  606. service_state = service_state_t::STOPPING;
  607. failed_to_start();
  608. }
  609. else if (service_state == service_state_t::STOPPING) {
  610. // We've logged the failure, but it's probably better not to leave the service in
  611. // STOPPING state:
  612. stopped();
  613. }
  614. }
  615. // Return a value as an unsigned-type value.
  616. template <typename T> typename std::make_unsigned<T>::type make_unsigned_val(T val)
  617. {
  618. return static_cast<typename std::make_unsigned<T>::type>(val);
  619. }
  620. bgproc_service::pid_result_t
  621. bgproc_service::read_pid_file(bp_sys::exit_status *exit_status) noexcept
  622. {
  623. const char *pid_file_c = pid_file.c_str();
  624. int fd = bp_sys::open(pid_file_c, O_CLOEXEC);
  625. if (fd == -1) {
  626. log(loglevel_t::ERROR, get_name(), ": read pid file: ", strerror(errno));
  627. return pid_result_t::FAILED;
  628. }
  629. char pidbuf[21]; // just enough to hold any 64-bit integer
  630. int r = complete_read(fd, pidbuf, 20);
  631. if (r < 0) {
  632. // Could not read from PID file
  633. log(loglevel_t::ERROR, get_name(), ": could not read from pidfile; ", strerror(errno));
  634. bp_sys::close(fd);
  635. return pid_result_t::FAILED;
  636. }
  637. bp_sys::close(fd);
  638. pidbuf[r] = 0; // store nul terminator
  639. bool valid_pid = false;
  640. try {
  641. unsigned long long v = std::strtoull(pidbuf, nullptr, 0);
  642. if (v <= make_unsigned_val(std::numeric_limits<pid_t>::max())) {
  643. pid = (pid_t) v;
  644. valid_pid = true;
  645. }
  646. }
  647. catch (std::out_of_range &exc) {
  648. // Too large?
  649. }
  650. catch (std::invalid_argument &exc) {
  651. // Ok, so it doesn't look like a number: proceed...
  652. }
  653. if (valid_pid) {
  654. pid_t wait_r = waitpid(pid, exit_status, WNOHANG);
  655. if (wait_r == -1 && errno == ECHILD) {
  656. // We can't track this child - check process exists:
  657. if (bp_sys::kill(pid, 0) == 0 || errno != ESRCH) {
  658. tracking_child = false;
  659. return pid_result_t::OK;
  660. }
  661. else {
  662. log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
  663. pid = -1;
  664. return pid_result_t::FAILED;
  665. }
  666. }
  667. else if (wait_r == pid) {
  668. pid = -1;
  669. return pid_result_t::TERMINATED;
  670. }
  671. else if (wait_r == 0) {
  672. // We can track the child
  673. child_listener.add_reserved(event_loop, pid, dasynq::DEFAULT_PRIORITY - 10);
  674. tracking_child = true;
  675. reserved_child_watch = true;
  676. return pid_result_t::OK;
  677. }
  678. }
  679. log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
  680. pid = -1;
  681. return pid_result_t::FAILED;
  682. }
  683. void process_service::bring_down() noexcept
  684. {
  685. if (stop_pid != -1 || stop_issued) {
  686. // waiting for stop command to complete (or for process to die after it has complete);
  687. // can't do anything here.
  688. return;
  689. }
  690. if (waiting_for_execstat) {
  691. // The process is still starting. This should be uncommon, but can occur during
  692. // smooth recovery (or it may mean the stop command process is still starting). We can't
  693. // do much now; we have to wait until we get the status, and then act appropriately.
  694. return;
  695. }
  696. else if (pid != -1) {
  697. // The process is still kicking on - must actually kill it.
  698. if (!stop_command.empty() && !stop_issued) {
  699. if (start_stop_process(stop_arg_parts)) {
  700. goto arm_timer;
  701. }
  702. // stop-command failed, need to try something else:
  703. if (term_signal != 0) {
  704. kill_pg(term_signal);
  705. }
  706. else {
  707. kill_pg(SIGKILL);
  708. }
  709. }
  710. else if (term_signal != 0) {
  711. // We signal the process group (-pid) rather than just the process as there's less
  712. // risk then of creating an orphaned process group:
  713. kill_pg(term_signal);
  714. }
  715. if (stop_pid == -1 && !tracking_child) {
  716. // If we have no way of tracking when the child terminates, assume stopped now
  717. stopped();
  718. return;
  719. }
  720. arm_timer:
  721. stop_issued = true; // (don't try again)
  722. // If there's a stop timeout, arm the timer now:
  723. if (stop_timeout != time_val(0,0)) {
  724. process_timer.arm_timer_rel(event_loop, stop_timeout);
  725. waiting_stopstart_timer = true;
  726. }
  727. // The rest is done in handle_exit_status.
  728. }
  729. else {
  730. // The process is already dead (possibly, we are in smooth recovery waiting for timer)
  731. doing_smooth_recovery = false;
  732. if (waiting_restart_timer) {
  733. process_timer.stop_timer(event_loop);
  734. waiting_restart_timer = false;
  735. }
  736. stopped();
  737. }
  738. }
  739. void process_service::kill_with_fire() noexcept
  740. {
  741. base_process_service::kill_with_fire();
  742. if (stop_pid != -1) {
  743. log(loglevel_t::WARN, "Service ", get_name(), " stop command, with pid ", pid,
  744. ", exceeded allowed stop time; killing.");
  745. pid_t pgid = bp_sys::getpgid(stop_pid);
  746. if (pgid == -1) {
  747. // On OpenBSD, not allowed to query pgid of a process in another session, but in that
  748. // case we know the group anyway:
  749. pgid = stop_pid;
  750. }
  751. bp_sys::kill(-pgid, SIGKILL);
  752. }
  753. }
  754. void scripted_service::bring_down() noexcept
  755. {
  756. if (pid != -1) {
  757. // We're already running the stop script; nothing to do.
  758. return;
  759. }
  760. if (stop_command.length() == 0) {
  761. stopped();
  762. }
  763. else if (! start_ps_process(stop_arg_parts, false)) {
  764. // Couldn't execute stop script, but there's not much we can do:
  765. stopped();
  766. }
  767. else {
  768. // successfully started stop script: start kill timer:
  769. if (stop_timeout != time_val(0,0)) {
  770. process_timer.arm_timer_rel(event_loop, stop_timeout);
  771. waiting_stopstart_timer = true;
  772. }
  773. }
  774. }
  775. dasynq::rearm process_restart_timer::timer_expiry(eventloop_t &, int expiry_count)
  776. {
  777. service->timer_expired();
  778. // Leave the timer disabled, or, if it has been reset by any processing above, leave it armed:
  779. return dasynq::rearm::NOOP;
  780. }
  781. bool process_service::start_stop_process(const std::vector<const char *> &cmd) noexcept
  782. {
  783. // In general, you can't tell whether fork/exec is successful. We use a pipe to communicate
  784. // success/failure from the child to the parent. The pipe is set CLOEXEC so a successful
  785. // exec closes the pipe, and the parent sees EOF. If the exec is unsuccessful, the errno
  786. // is written to the pipe, and the parent can read it.
  787. int pipefd[2];
  788. if (bp_sys::pipe2(pipefd, O_CLOEXEC)) {
  789. log(loglevel_t::ERROR, get_name(), ": can't create status check pipe (for stop command): ",
  790. strerror(errno));
  791. return false;
  792. }
  793. const char * logfile = this->logfile.c_str();
  794. if (*logfile == 0) {
  795. logfile = "/dev/null";
  796. }
  797. bool child_status_registered = false;
  798. // Set up complete, now fork and exec:
  799. pid_t forkpid;
  800. try {
  801. stop_pipe_watcher.add_watch(event_loop, pipefd[0], dasynq::IN_EVENTS);
  802. child_status_registered = true;
  803. // We specify a high priority (i.e. low priority value) so that process termination is
  804. // handled early. This means we have always recorded that the process is terminated by the
  805. // time that we handle events that might otherwise cause us to signal the process, so we
  806. // avoid sending a signal to an invalid (and possibly recycled) process ID.
  807. forkpid = stop_watcher.fork(event_loop, reserved_stop_watch, dasynq::DEFAULT_PRIORITY - 10);
  808. reserved_stop_watch = true;
  809. }
  810. catch (std::exception &e) {
  811. log(loglevel_t::ERROR, get_name(), ": could not fork (for stop command): ", e.what());
  812. goto out_cs_h;
  813. }
  814. if (forkpid == 0) {
  815. close(pipefd[0]);
  816. const char * working_dir_c = nullptr;
  817. if (! working_dir.empty()) working_dir_c = working_dir.c_str();
  818. run_proc_params run_params{cmd.data(), working_dir_c, logfile, pipefd[1], run_as_uid, run_as_gid, rlimits};
  819. run_params.on_console = false;
  820. run_params.in_foreground = false;
  821. run_params.csfd = -1;
  822. run_params.socket_fd = socket_fd;
  823. run_params.notify_fd = -1;
  824. run_params.force_notify_fd = -1;
  825. run_params.notify_var = nullptr;
  826. run_params.env_file = env_file.c_str();
  827. #if SUPPORT_CGROUPS
  828. run_params.run_in_cgroup = run_in_cgroup.c_str();
  829. #endif
  830. run_child_proc(run_params);
  831. }
  832. else {
  833. // Parent process
  834. stop_pid = forkpid;
  835. bp_sys::close(pipefd[1]); // close the 'other end' fd
  836. waiting_for_execstat = true;
  837. return true;
  838. }
  839. // Failure exit:
  840. out_cs_h:
  841. if (child_status_registered) {
  842. stop_pipe_watcher.deregister(event_loop);
  843. }
  844. bp_sys::close(pipefd[0]);
  845. bp_sys::close(pipefd[1]);
  846. return false;
  847. }