proc-service.cc 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893
  1. #include <cstring>
  2. #include <type_traits>
  3. #include <sys/un.h>
  4. #include <sys/socket.h>
  5. #include "dinit.h"
  6. #include "dinit-socket.h"
  7. #include "dinit-util.h"
  8. #include "dinit-log.h"
  9. #include "proc-service.h"
  10. /*
  11. * Most of the implementation for process-based services (process, scripted, bgprocess) is here.
  12. *
  13. * See proc-service.h header for interface details.
  14. */
  15. // Given a string and a list of pairs of (start,end) indices for each argument in that string,
  16. // store a null terminator for the argument. Return a `char *` vector containing the beginning
  17. // of each argument and a trailing nullptr. (The returned array is invalidated if the string is
  18. // later modified).
  19. std::vector<const char *> separate_args(std::string &s,
  20. const std::list<std::pair<unsigned,unsigned>> &arg_indices)
  21. {
  22. std::vector<const char *> r;
  23. r.reserve(arg_indices.size() + 1);
  24. // First store nul terminator for each part:
  25. for (auto index_pair : arg_indices) {
  26. if (index_pair.second < s.length()) {
  27. s[index_pair.second] = 0;
  28. }
  29. }
  30. // Now we can get the C string (c_str) and store offsets into it:
  31. const char * cstr = s.c_str();
  32. for (auto index_pair : arg_indices) {
  33. r.push_back(cstr + index_pair.first);
  34. }
  35. r.push_back(nullptr);
  36. return r;
  37. }
  38. void process_service::exec_succeeded() noexcept
  39. {
  40. if (get_type() != service_type_t::PROCESS) {
  41. return;
  42. }
  43. tracking_child = true;
  44. // This could be a smooth recovery (state already STARTED). No need to do anything here in
  45. // that case. Otherwise, we are STARTING or STOPPING:
  46. if (get_state() == service_state_t::STARTING) {
  47. if (force_notification_fd != -1 || !notification_var.empty()) {
  48. // Wait for readiness notification:
  49. readiness_watcher.set_enabled(event_loop, true);
  50. }
  51. else {
  52. if (waiting_stopstart_timer) {
  53. process_timer.stop_timer(event_loop);
  54. waiting_stopstart_timer = false;
  55. }
  56. started();
  57. }
  58. }
  59. else if (get_state() == service_state_t::STARTED) {
  60. // Smooth recovery (is now complete)
  61. if (waiting_stopstart_timer) {
  62. process_timer.stop_timer(event_loop);
  63. waiting_stopstart_timer = false;
  64. }
  65. }
  66. else if (get_state() == service_state_t::STOPPING) {
  67. // stopping, but smooth recovery was in process. That's now over so we can
  68. // commence normal stop. Note that if pid == -1 the process already stopped,
  69. // that is correctly handled by bring_down().
  70. if (stop_check_dependents()) {
  71. bring_down();
  72. }
  73. }
  74. }
  75. void scripted_service::exec_succeeded() noexcept
  76. {
  77. // For a scripted service, this means nothing other than that the start/stop
  78. // script will now begin.
  79. }
  80. rearm exec_status_pipe_watcher::fd_event(eventloop_t &loop, int fd, int flags) noexcept
  81. {
  82. base_process_service *sr = service;
  83. sr->waiting_for_execstat = false;
  84. run_proc_err exec_status;
  85. int r = read(get_watched_fd(), &exec_status, sizeof(exec_status));
  86. deregister(loop);
  87. close(get_watched_fd());
  88. if (r > 0) {
  89. // We read an errno code; exec() failed, and the service startup failed.
  90. if (sr->pid != -1) {
  91. sr->child_listener.deregister(event_loop, sr->pid);
  92. sr->reserved_child_watch = false;
  93. if (sr->waiting_stopstart_timer) {
  94. sr->process_timer.stop_timer(loop);
  95. sr->waiting_stopstart_timer = false;
  96. }
  97. }
  98. sr->pid = -1;
  99. sr->exec_err_info = exec_status;
  100. sr->exec_failed(exec_status);
  101. }
  102. else {
  103. sr->exec_succeeded();
  104. if (sr->pid == -1) {
  105. // Somehow the process managed to complete before we even saw the exec() status.
  106. sr->handle_exit_status(sr->exit_status);
  107. }
  108. }
  109. sr->services->process_queues();
  110. return rearm::REMOVED;
  111. }
  112. rearm stop_status_pipe_watcher::fd_event(eventloop_t &loop, int fd, int flags) noexcept
  113. {
  114. process_service *sr = service;
  115. sr->waiting_for_execstat = false;
  116. run_proc_err exec_status;
  117. int r = read(get_watched_fd(), &exec_status, sizeof(exec_status));
  118. deregister(loop);
  119. close(get_watched_fd());
  120. if (r > 0) {
  121. // We read an errno code; exec() failed, and the service startup failed.
  122. if (sr->stop_pid != -1) {
  123. log(loglevel_t::ERROR, "Service ", sr->get_name(), ": could not fork for stop command: ",
  124. exec_stage_descriptions[static_cast<int>(exec_status.stage)], ": ",
  125. strerror(exec_status.st_errno));
  126. sr->stop_watcher.deregister(event_loop, sr->stop_pid);
  127. sr->reserved_child_watch = false;
  128. sr->stop_pid = -1;
  129. if (sr->pid != -1) {
  130. if (sr->term_signal != 0) {
  131. sr->kill_pg(sr->term_signal);
  132. }
  133. if (!sr->tracking_child) {
  134. sr->stop_issued = false;
  135. sr->stopped();
  136. }
  137. }
  138. }
  139. }
  140. else {
  141. // Nothing to do really but wait for termination - unless it's already happened, so let's
  142. // check that now:
  143. if (sr->stop_pid == -1) {
  144. sr->handle_stop_exit();
  145. }
  146. }
  147. sr->services->process_queues();
  148. return rearm::REMOVED;
  149. }
  150. rearm ready_notify_watcher::fd_event(eventloop_t &, int fd, int flags) noexcept
  151. {
  152. char buf[128];
  153. if (service->get_state() == service_state_t::STARTING) {
  154. // can we actually read anything from the notification pipe?
  155. int r = bp_sys::read(fd, buf, sizeof(buf));
  156. if (r > 0) {
  157. if (service->waiting_stopstart_timer) {
  158. service->process_timer.stop_timer(event_loop);
  159. service->waiting_stopstart_timer = false;
  160. }
  161. service->started();
  162. }
  163. else if (r == 0 || errno != EAGAIN) {
  164. if (service->waiting_stopstart_timer) {
  165. service->process_timer.stop_timer(event_loop);
  166. service->waiting_stopstart_timer = false;
  167. }
  168. service->failed_to_start(false, false);
  169. service->set_state(service_state_t::STOPPING);
  170. service->bring_down();
  171. }
  172. service->services->process_queues();
  173. }
  174. else {
  175. // Just keep consuming data from the pipe:
  176. int r = bp_sys::read(fd, buf, sizeof(buf));
  177. if (r == 0) {
  178. // Process closed write end or terminated
  179. close(fd);
  180. service->notification_fd = -1;
  181. return rearm::DISARM;
  182. }
  183. }
  184. return rearm::REARM;
  185. }
  186. dasynq::rearm service_child_watcher::status_change(eventloop_t &loop, pid_t child, int status) noexcept
  187. {
  188. base_process_service *sr = service;
  189. sr->pid = -1;
  190. sr->exit_status = bp_sys::exit_status(status);
  191. // Ok, for a process service, any process death which we didn't rig ourselves is a bit... unexpected.
  192. // Probably, the child died because we asked it to (sr->service_state == STOPPING). But even if we
  193. // didn't, there's not much we can do.
  194. // Must stop watch now since handle_exit_status might result in re-launch:
  195. // (stop_watch instead of deregister, so that we hold watch reservation).
  196. stop_watch(loop);
  197. if (sr->waiting_for_execstat) {
  198. // We still don't have an exec() status from the forked child, wait for that
  199. // before doing any further processing.
  200. return dasynq::rearm::NOOP; // hold watch reservation
  201. }
  202. if (sr->waiting_stopstart_timer) {
  203. sr->process_timer.stop_timer(loop);
  204. sr->waiting_stopstart_timer = false;
  205. }
  206. sr->handle_exit_status(bp_sys::exit_status(status));
  207. return dasynq::rearm::NOOP;
  208. }
  209. dasynq::rearm stop_child_watcher::status_change(eventloop_t &loop, pid_t child, int status) noexcept
  210. {
  211. process_service *sr = service;
  212. sr->stop_pid = -1;
  213. sr->stop_status = bp_sys::exit_status(status);
  214. stop_watch(loop);
  215. if (sr->waiting_for_execstat) {
  216. // no exec status yet, wait for that first
  217. return dasynq::rearm::NOOP;
  218. }
  219. sr->handle_stop_exit();
  220. sr->services->process_queues();
  221. return dasynq::rearm::NOOP;
  222. }
  223. void process_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
  224. {
  225. bool did_exit = exit_status.did_exit();
  226. bool was_signalled = exit_status.was_signalled();
  227. auto service_state = get_state();
  228. if (notification_fd != -1) {
  229. readiness_watcher.deregister(event_loop);
  230. bp_sys::close(notification_fd);
  231. notification_fd = -1;
  232. }
  233. if (!exit_status.did_exit_clean() && service_state != service_state_t::STOPPING) {
  234. if (did_exit) {
  235. log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
  236. exit_status.get_exit_status());
  237. }
  238. else if (was_signalled) {
  239. log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
  240. exit_status.get_term_sig());
  241. }
  242. }
  243. if (waiting_stopstart_timer) {
  244. process_timer.stop_timer(event_loop);
  245. waiting_stopstart_timer = false;
  246. }
  247. #if USE_UTMPX
  248. if (*inittab_id || *inittab_line) {
  249. clear_utmp_entry(inittab_id, inittab_line);
  250. }
  251. #endif
  252. if (service_state == service_state_t::STARTING) {
  253. // If state is STARTING, we must be waiting for readiness notification; the process has
  254. // terminated before becoming ready.
  255. stop_reason = stopped_reason_t::FAILED;
  256. failed_to_start();
  257. }
  258. else if (service_state == service_state_t::STOPPING) {
  259. // We won't log a non-zero exit status or termination due to signal here -
  260. // we assume that the process died because we signalled it.
  261. if (waiting_stopstart_timer) {
  262. process_timer.stop_timer(event_loop);
  263. }
  264. if (!waiting_for_deps) {
  265. if (stop_pid == -1 && !waiting_for_execstat) {
  266. stop_issued = false; // reset for next time
  267. stopped();
  268. }
  269. }
  270. else if (get_target_state() == service_state_t::STARTED && !pinned_stopped) {
  271. initiate_start();
  272. }
  273. }
  274. else if (smooth_recovery && service_state == service_state_t::STARTED) {
  275. // unexpected termination, with smooth recovery
  276. doing_smooth_recovery = true;
  277. do_smooth_recovery();
  278. return;
  279. }
  280. else {
  281. handle_unexpected_termination();
  282. }
  283. services->process_queues();
  284. }
  285. void process_service::exec_failed(run_proc_err errcode) noexcept
  286. {
  287. log(loglevel_t::ERROR, get_name(), ": execution failed - ",
  288. exec_stage_descriptions[static_cast<int>(errcode.stage)], ": ", strerror(errcode.st_errno));
  289. if (waiting_stopstart_timer) {
  290. process_timer.stop_timer(event_loop);
  291. waiting_stopstart_timer = false;
  292. }
  293. if (notification_fd != -1) {
  294. readiness_watcher.deregister(event_loop);
  295. bp_sys::close(notification_fd);
  296. notification_fd = -1;
  297. }
  298. if (get_state() == service_state_t::STARTING) {
  299. stop_reason = stopped_reason_t::EXECFAILED;
  300. failed_to_start();
  301. }
  302. else {
  303. // Process service in smooth recovery:
  304. doing_smooth_recovery = false;
  305. stop_reason = stopped_reason_t::TERMINATED;
  306. unrecoverable_stop();
  307. }
  308. }
  309. void bgproc_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
  310. {
  311. // For bgproc services, receiving exit status can mean one of two things:
  312. // 1. We were launching the process, and it finished (possibly after forking). If it did fork
  313. // we want to obtain the process id of the process that we should now monitor, the actual
  314. // daemon. Or,
  315. // 2. The above has already happened, and we are monitoring the daemon process, which has now
  316. // terminated for some reason.
  317. begin:
  318. bool did_exit = exit_status.did_exit();
  319. bool was_signalled = exit_status.was_signalled();
  320. auto service_state = get_state();
  321. if (!exit_status.did_exit_clean() && service_state != service_state_t::STOPPING) {
  322. if (did_exit) {
  323. log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
  324. exit_status.get_exit_status());
  325. }
  326. else if (was_signalled) {
  327. log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
  328. exit_status.get_term_sig());
  329. }
  330. }
  331. if (waiting_stopstart_timer) {
  332. process_timer.stop_timer(event_loop);
  333. waiting_stopstart_timer = false;
  334. }
  335. if (doing_smooth_recovery) {
  336. doing_smooth_recovery = false;
  337. // We're either started, or stopping (i.e. we were requested to stop during smooth recovery).
  338. if (service_state == service_state_t::STOPPING) {
  339. // Stop was issued during smooth recovery
  340. if ((did_exit && exit_status.get_exit_status() != 0) || was_signalled) {
  341. if (!waiting_for_deps) {
  342. stopped();
  343. }
  344. else if (get_target_state() == service_state_t::STARTED && !pinned_stopped) {
  345. initiate_start();
  346. }
  347. }
  348. else {
  349. // We need to re-read the PID, since it has now changed.
  350. if (pid_file.length() != 0) {
  351. auto pid_result = read_pid_file(&exit_status);
  352. if (waiting_for_deps) {
  353. // don't do anything else until dependents have stopped
  354. return;
  355. }
  356. switch (pid_result) {
  357. case pid_result_t::FAILED:
  358. case pid_result_t::TERMINATED:
  359. // Failed startup: no auto-restart.
  360. stopped();
  361. break;
  362. case pid_result_t::OK:
  363. // We now need to bring down the daemon process
  364. bring_down();
  365. break;
  366. }
  367. }
  368. }
  369. services->process_queues();
  370. return;
  371. }
  372. else /* if (service_state == service_state_t::STARTED) */ {
  373. bool need_stop = false;
  374. if ((did_exit && exit_status.get_exit_status() != 0) || was_signalled) {
  375. need_stop = true;
  376. }
  377. else {
  378. // We need to re-read the PID, since it has now changed.
  379. if (pid_file.length() != 0) {
  380. auto pid_result = read_pid_file(&exit_status);
  381. switch (pid_result) {
  382. case pid_result_t::FAILED:
  383. // Failed startup: no auto-restart.
  384. need_stop = true;
  385. break;
  386. case pid_result_t::TERMINATED:
  387. goto begin;
  388. case pid_result_t::OK:
  389. break;
  390. }
  391. }
  392. }
  393. if (need_stop) {
  394. // Failed startup: no auto-restart.
  395. stop_reason = stopped_reason_t::TERMINATED;
  396. unrecoverable_stop();
  397. services->process_queues();
  398. }
  399. return;
  400. }
  401. }
  402. if (service_state == service_state_t::STARTING) {
  403. if (exit_status.did_exit_clean()) {
  404. auto pid_result = read_pid_file(&exit_status);
  405. switch (pid_result) {
  406. case pid_result_t::FAILED:
  407. // Failed startup: no auto-restart.
  408. stop_reason = stopped_reason_t::FAILED;
  409. failed_to_start();
  410. break;
  411. case pid_result_t::TERMINATED:
  412. // started, but immediately terminated
  413. started();
  414. goto begin;
  415. case pid_result_t::OK:
  416. started();
  417. break;
  418. }
  419. }
  420. else {
  421. stop_reason = stopped_reason_t::FAILED;
  422. failed_to_start();
  423. }
  424. }
  425. else if (service_state == service_state_t::STOPPING) {
  426. // We won't log a non-zero exit status or termination due to signal here -
  427. // we assume that the process died because we signalled it.
  428. if (stop_pid == -1 && !waiting_for_execstat) {
  429. stopped();
  430. }
  431. }
  432. else {
  433. // we must be STARTED
  434. if (smooth_recovery && get_target_state() == service_state_t::STARTED) {
  435. doing_smooth_recovery = true;
  436. do_smooth_recovery();
  437. if (get_state() != service_state_t::STARTED) {
  438. doing_smooth_recovery = false;
  439. }
  440. return;
  441. }
  442. handle_unexpected_termination();
  443. }
  444. services->process_queues();
  445. }
  446. void bgproc_service::exec_failed(run_proc_err errcode) noexcept
  447. {
  448. log(loglevel_t::ERROR, get_name(), ": execution failed - ",
  449. exec_stage_descriptions[static_cast<int>(errcode.stage)], ": ", strerror(errcode.st_errno));
  450. if (waiting_stopstart_timer) {
  451. process_timer.stop_timer(event_loop);
  452. waiting_stopstart_timer = false;
  453. }
  454. if (doing_smooth_recovery) {
  455. doing_smooth_recovery = false;
  456. stop_reason = stopped_reason_t::TERMINATED;
  457. unrecoverable_stop();
  458. }
  459. else {
  460. // Only time we execute is for startup:
  461. stop_reason = stopped_reason_t::EXECFAILED;
  462. failed_to_start();
  463. }
  464. }
  465. void scripted_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
  466. {
  467. bool did_exit = exit_status.did_exit();
  468. bool was_signalled = exit_status.was_signalled();
  469. auto service_state = get_state();
  470. // For a scripted service, a termination occurs in one of three main cases:
  471. // - the start script completed (or failed), when service was STARTING
  472. // - the start script was interrupted to cancel startup; state is STOPPING
  473. // - the stop script complete (or failed), state is STOPPING
  474. if (service_state == service_state_t::STOPPING) {
  475. // We might be running the stop script, or we might be running the start script and have issued
  476. // a cancel order via SIGINT:
  477. if (interrupting_start) {
  478. if (waiting_stopstart_timer) {
  479. process_timer.stop_timer(event_loop);
  480. waiting_stopstart_timer = false;
  481. }
  482. // We issued a start interrupt, so we expected this failure:
  483. if (did_exit && exit_status.get_exit_status() != 0) {
  484. log(loglevel_t::NOTICE, "Service ", get_name(), " start cancelled; exit code ",
  485. exit_status.get_exit_status());
  486. // Assume that a command terminating normally (with failure status) requires no cleanup:
  487. stopped();
  488. }
  489. else {
  490. if (was_signalled) {
  491. log(loglevel_t::NOTICE, "Service ", get_name(), " start cancelled from signal ",
  492. exit_status.get_term_sig());
  493. }
  494. // If the start script completed successfully, or was interrupted via our signal,
  495. // we want to run the stop script to clean up:
  496. bring_down();
  497. }
  498. interrupting_start = false;
  499. }
  500. else if (exit_status.did_exit_clean()) {
  501. // We were running the stop script and finished successfully
  502. stopped();
  503. }
  504. else {
  505. // ??? failed to stop! Let's log it as warning:
  506. if (did_exit) {
  507. log(loglevel_t::WARN, "Service ", get_name(), " stop command failed with exit code ",
  508. exit_status.get_exit_status());
  509. }
  510. else if (was_signalled) {
  511. log(loglevel_t::WARN, "Service ", get_name(), " stop command terminated due to signal ",
  512. exit_status.get_term_sig());
  513. }
  514. // Even if the stop script failed, assume that service is now stopped, so that any dependencies
  515. // can be stopped. There's not really any other useful course of action here.
  516. stopped();
  517. }
  518. services->process_queues();
  519. }
  520. else { // STARTING
  521. if (exit_status.did_exit_clean()) {
  522. started();
  523. }
  524. else if (was_signalled && exit_status.get_term_sig() == SIGINT && onstart_flags.skippable) {
  525. // A skippable service can be skipped by interrupting (eg by ^C if the service
  526. // starts on the console).
  527. start_skipped = true;
  528. started();
  529. }
  530. else {
  531. // failed to start
  532. if (did_exit) {
  533. log(loglevel_t::ERROR, "Service ", get_name(), " command failed with exit code ",
  534. exit_status.get_exit_status());
  535. }
  536. else if (was_signalled) {
  537. log(loglevel_t::ERROR, "Service ", get_name(), " command terminated due to signal ",
  538. exit_status.get_term_sig());
  539. }
  540. stop_reason = stopped_reason_t::FAILED;
  541. failed_to_start();
  542. }
  543. services->process_queues();
  544. }
  545. }
  546. void scripted_service::exec_failed(run_proc_err errcode) noexcept
  547. {
  548. log(loglevel_t::ERROR, get_name(), ": execution failed - ",
  549. exec_stage_descriptions[static_cast<int>(errcode.stage)], ": ", strerror(errcode.st_errno));
  550. auto service_state = get_state();
  551. if (service_state == service_state_t::STARTING) {
  552. stop_reason = stopped_reason_t::EXECFAILED;
  553. failed_to_start();
  554. }
  555. else if (service_state == service_state_t::STOPPING) {
  556. // We've logged the failure, but it's probably better not to leave the service in
  557. // STOPPING state:
  558. stopped();
  559. }
  560. }
  561. // Return a value as an unsigned-type value.
  562. template <typename T> typename std::make_unsigned<T>::type make_unsigned_val(T val)
  563. {
  564. return static_cast<typename std::make_unsigned<T>::type>(val);
  565. }
  566. bgproc_service::pid_result_t
  567. bgproc_service::read_pid_file(bp_sys::exit_status *exit_status) noexcept
  568. {
  569. const char *pid_file_c = pid_file.c_str();
  570. int fd = bp_sys::open(pid_file_c, O_CLOEXEC);
  571. if (fd == -1) {
  572. log(loglevel_t::ERROR, get_name(), ": read pid file: ", strerror(errno));
  573. return pid_result_t::FAILED;
  574. }
  575. char pidbuf[21]; // just enough to hold any 64-bit integer
  576. int r = complete_read(fd, pidbuf, 20);
  577. if (r < 0) {
  578. // Could not read from PID file
  579. log(loglevel_t::ERROR, get_name(), ": could not read from pidfile; ", strerror(errno));
  580. bp_sys::close(fd);
  581. return pid_result_t::FAILED;
  582. }
  583. bp_sys::close(fd);
  584. pidbuf[r] = 0; // store nul terminator
  585. bool valid_pid = false;
  586. try {
  587. unsigned long long v = std::strtoull(pidbuf, nullptr, 0);
  588. if (v <= make_unsigned_val(std::numeric_limits<pid_t>::max())) {
  589. pid = (pid_t) v;
  590. valid_pid = true;
  591. }
  592. }
  593. catch (std::out_of_range &exc) {
  594. // Too large?
  595. }
  596. catch (std::invalid_argument &exc) {
  597. // Ok, so it doesn't look like a number: proceed...
  598. }
  599. if (valid_pid) {
  600. pid_t wait_r = waitpid(pid, exit_status, WNOHANG);
  601. if (wait_r == -1 && errno == ECHILD) {
  602. // We can't track this child - check process exists:
  603. if (bp_sys::kill(pid, 0) == 0 || errno != ESRCH) {
  604. tracking_child = false;
  605. return pid_result_t::OK;
  606. }
  607. else {
  608. log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
  609. pid = -1;
  610. return pid_result_t::FAILED;
  611. }
  612. }
  613. else if (wait_r == pid) {
  614. pid = -1;
  615. return pid_result_t::TERMINATED;
  616. }
  617. else if (wait_r == 0) {
  618. // We can track the child
  619. child_listener.add_reserved(event_loop, pid, dasynq::DEFAULT_PRIORITY - 10);
  620. tracking_child = true;
  621. reserved_child_watch = true;
  622. return pid_result_t::OK;
  623. }
  624. }
  625. log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
  626. pid = -1;
  627. return pid_result_t::FAILED;
  628. }
  629. void process_service::bring_down() noexcept
  630. {
  631. if (stop_pid != -1 || stop_issued) {
  632. // waiting for stop command to complete (or for process to die after it has complete);
  633. // can't do anything here.
  634. return;
  635. }
  636. if (waiting_for_execstat) {
  637. // The process is still starting. This should be uncommon, but can occur during
  638. // smooth recovery (or it may mean the stop command process is still starting). We can't
  639. // do much now; we have to wait until we get the status, and then act appropriately.
  640. return;
  641. }
  642. else if (pid != -1) {
  643. // The process is still kicking on - must actually kill it.
  644. if (!stop_command.empty() && !stop_issued) {
  645. if (start_stop_process(stop_arg_parts)) {
  646. goto arm_timer;
  647. }
  648. // stop-command failed, need to try something else:
  649. if (term_signal != 0) {
  650. kill_pg(term_signal);
  651. }
  652. else {
  653. kill_pg(SIGKILL);
  654. }
  655. }
  656. else if (term_signal != 0) {
  657. // We signal the process group (-pid) rather than just the process as there's less
  658. // risk then of creating an orphaned process group:
  659. kill_pg(term_signal);
  660. }
  661. if (stop_pid == -1 && !tracking_child) {
  662. // If we have no way of tracking when the child terminates, assume stopped now
  663. stopped();
  664. return;
  665. }
  666. arm_timer:
  667. stop_issued = true; // (don't try again)
  668. // If there's a stop timeout, arm the timer now:
  669. if (stop_timeout != time_val(0,0)) {
  670. process_timer.arm_timer_rel(event_loop, stop_timeout);
  671. waiting_stopstart_timer = true;
  672. }
  673. // The rest is done in handle_exit_status.
  674. }
  675. else {
  676. // The process is already dead (possibly, we are in smooth recovery waiting for timer)
  677. doing_smooth_recovery = false;
  678. if (waiting_restart_timer) {
  679. process_timer.stop_timer(event_loop);
  680. waiting_restart_timer = false;
  681. }
  682. stopped();
  683. }
  684. }
  685. void process_service::kill_with_fire() noexcept
  686. {
  687. base_process_service::kill_with_fire();
  688. if (stop_pid != -1) {
  689. log(loglevel_t::WARN, "Service ", get_name(), " stop command, with pid ", pid,
  690. ", exceeded allowed stop time; killing.");
  691. pid_t pgid = bp_sys::getpgid(stop_pid);
  692. if (pgid == -1) {
  693. // On OpenBSD, not allowed to query pgid of a process in another session, but in that
  694. // case we know the group anyway:
  695. pgid = stop_pid;
  696. }
  697. bp_sys::kill(-pgid, SIGKILL);
  698. }
  699. }
  700. void scripted_service::bring_down() noexcept
  701. {
  702. if (pid != -1) {
  703. // We're already running the stop script; nothing to do.
  704. return;
  705. }
  706. if (stop_command.length() == 0) {
  707. stopped();
  708. }
  709. else if (! start_ps_process(stop_arg_parts, false)) {
  710. // Couldn't execute stop script, but there's not much we can do:
  711. stopped();
  712. }
  713. else {
  714. // successfully started stop script: start kill timer:
  715. if (stop_timeout != time_val(0,0)) {
  716. process_timer.arm_timer_rel(event_loop, stop_timeout);
  717. waiting_stopstart_timer = true;
  718. }
  719. }
  720. }
  721. dasynq::rearm process_restart_timer::timer_expiry(eventloop_t &, int expiry_count)
  722. {
  723. service->timer_expired();
  724. // Leave the timer disabled, or, if it has been reset by any processing above, leave it armed:
  725. return dasynq::rearm::NOOP;
  726. }
  727. bool process_service::start_stop_process(const std::vector<const char *> &cmd) noexcept
  728. {
  729. // In general, you can't tell whether fork/exec is successful. We use a pipe to communicate
  730. // success/failure from the child to the parent. The pipe is set CLOEXEC so a successful
  731. // exec closes the pipe, and the parent sees EOF. If the exec is unsuccessful, the errno
  732. // is written to the pipe, and the parent can read it.
  733. int pipefd[2];
  734. if (bp_sys::pipe2(pipefd, O_CLOEXEC)) {
  735. log(loglevel_t::ERROR, get_name(), ": can't create status check pipe (for stop command): ",
  736. strerror(errno));
  737. return false;
  738. }
  739. const char * logfile = this->logfile.c_str();
  740. if (*logfile == 0) {
  741. logfile = "/dev/null";
  742. }
  743. bool child_status_registered = false;
  744. // Set up complete, now fork and exec:
  745. pid_t forkpid;
  746. try {
  747. stop_pipe_watcher.add_watch(event_loop, pipefd[0], dasynq::IN_EVENTS);
  748. child_status_registered = true;
  749. // We specify a high priority (i.e. low priority value) so that process termination is
  750. // handled early. This means we have always recorded that the process is terminated by the
  751. // time that we handle events that might otherwise cause us to signal the process, so we
  752. // avoid sending a signal to an invalid (and possibly recycled) process ID.
  753. forkpid = stop_watcher.fork(event_loop, reserved_stop_watch, dasynq::DEFAULT_PRIORITY - 10);
  754. reserved_stop_watch = true;
  755. }
  756. catch (std::exception &e) {
  757. log(loglevel_t::ERROR, get_name(), ": could not fork (for stop command): ", e.what());
  758. goto out_cs_h;
  759. }
  760. if (forkpid == 0) {
  761. close(pipefd[0]);
  762. const char * working_dir_c = nullptr;
  763. if (! working_dir.empty()) working_dir_c = working_dir.c_str();
  764. run_proc_params run_params{cmd.data(), working_dir_c, logfile, pipefd[1], run_as_uid, run_as_gid, rlimits};
  765. run_params.on_console = false;
  766. run_params.in_foreground = false;
  767. run_params.csfd = -1;
  768. run_params.socket_fd = socket_fd;
  769. run_params.notify_fd = -1;
  770. run_params.force_notify_fd = force_notification_fd;
  771. run_params.notify_var = nullptr;
  772. run_params.env_file = env_file.c_str();
  773. #if SUPPORT_CGROUPS
  774. run_params.run_in_cgroup = run_in_cgroup.c_str();
  775. #endif
  776. run_child_proc(run_params);
  777. }
  778. else {
  779. // Parent process
  780. stop_pid = forkpid;
  781. bp_sys::close(pipefd[1]); // close the 'other end' fd
  782. waiting_for_execstat = true;
  783. return true;
  784. }
  785. // Failure exit:
  786. out_cs_h:
  787. if (child_status_registered) {
  788. stop_pipe_watcher.deregister(event_loop);
  789. }
  790. bp_sys::close(pipefd[0]);
  791. bp_sys::close(pipefd[1]);
  792. return false;
  793. }