proc-service.cc 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900
  1. #include <cstring>
  2. #include <type_traits>
  3. #include <sys/un.h>
  4. #include <sys/socket.h>
  5. #include "dinit.h"
  6. #include "dinit-socket.h"
  7. #include "dinit-util.h"
  8. #include "dinit-log.h"
  9. #include "proc-service.h"
  10. /*
  11. * Most of the implementation for process-based services (process, scripted, bgprocess) is here.
  12. *
  13. * See proc-service.h header for interface details.
  14. */
  15. // Given a string and a list of pairs of (start,end) indices for each argument in that string,
  16. // store a null terminator for the argument. Return a `char *` vector containing the beginning
  17. // of each argument and a trailing nullptr. (The returned array is invalidated if the string is
  18. // later modified).
  19. std::vector<const char *> separate_args(ha_string &s,
  20. const std::list<std::pair<unsigned,unsigned>> &arg_indices)
  21. {
  22. std::vector<const char *> r;
  23. r.reserve(arg_indices.size() + 1);
  24. // First store nul terminator for each part:
  25. for (auto index_pair : arg_indices) {
  26. if (index_pair.second < s.length()) {
  27. s[index_pair.second] = 0;
  28. }
  29. }
  30. // Now we can get the C string (c_str) and store offsets into it:
  31. const char * cstr = s.c_str();
  32. for (auto index_pair : arg_indices) {
  33. r.push_back(cstr + index_pair.first);
  34. }
  35. r.push_back(nullptr);
  36. return r;
  37. }
  38. void process_service::exec_succeeded() noexcept
  39. {
  40. if (get_type() != service_type_t::PROCESS) {
  41. return;
  42. }
  43. tracking_child = true;
  44. // This could be a smooth recovery (state already STARTED). No need to do anything here in
  45. // that case. Otherwise, we are STARTING or STOPPING:
  46. if (get_state() == service_state_t::STARTING) {
  47. if (force_notification_fd != -1 || !notification_var.empty()) {
  48. // Wait for readiness notification:
  49. readiness_watcher.set_enabled(event_loop, true);
  50. }
  51. else {
  52. if (waiting_stopstart_timer) {
  53. process_timer.stop_timer(event_loop);
  54. waiting_stopstart_timer = false;
  55. }
  56. started();
  57. }
  58. }
  59. else if (get_state() == service_state_t::STARTED) {
  60. // Smooth recovery (is now complete)
  61. if (waiting_stopstart_timer) {
  62. process_timer.stop_timer(event_loop);
  63. waiting_stopstart_timer = false;
  64. }
  65. }
  66. else if (get_state() == service_state_t::STOPPING) {
  67. // stopping, but smooth recovery was in process. That's now over so we can
  68. // commence normal stop. Note that if pid == -1 the process already stopped,
  69. // that is correctly handled by bring_down().
  70. if (stop_check_dependents()) {
  71. bring_down();
  72. }
  73. }
  74. }
  75. void scripted_service::exec_succeeded() noexcept
  76. {
  77. // For a scripted service, this means nothing other than that the start/stop
  78. // script will now begin.
  79. }
  80. rearm exec_status_pipe_watcher::fd_event(eventloop_t &loop, int fd, int flags) noexcept
  81. {
  82. base_process_service *sr = service;
  83. sr->waiting_for_execstat = false;
  84. run_proc_err exec_status;
  85. int r = read(get_watched_fd(), &exec_status, sizeof(exec_status));
  86. deregister(loop);
  87. close(get_watched_fd());
  88. if (r > 0) {
  89. // We read an errno code; exec() failed, and the service startup failed.
  90. if (sr->pid != -1) {
  91. sr->child_listener.deregister(event_loop, sr->pid);
  92. sr->reserved_child_watch = false;
  93. if (sr->waiting_stopstart_timer) {
  94. sr->process_timer.stop_timer(loop);
  95. sr->waiting_stopstart_timer = false;
  96. }
  97. }
  98. sr->pid = -1;
  99. sr->exec_err_info = exec_status;
  100. sr->exec_failed(exec_status);
  101. }
  102. else {
  103. sr->exec_succeeded();
  104. if (sr->pid == -1) {
  105. // Somehow the process managed to complete before we even saw the exec() status.
  106. sr->handle_exit_status(sr->exit_status);
  107. }
  108. }
  109. sr->services->process_queues();
  110. return rearm::REMOVED;
  111. }
  112. rearm stop_status_pipe_watcher::fd_event(eventloop_t &loop, int fd, int flags) noexcept
  113. {
  114. process_service *sr = service;
  115. sr->waiting_for_execstat = false;
  116. run_proc_err exec_status;
  117. int r = read(get_watched_fd(), &exec_status, sizeof(exec_status));
  118. deregister(loop);
  119. close(get_watched_fd());
  120. if (r > 0) {
  121. // We read an errno code; exec() failed, and the service startup failed.
  122. if (sr->stop_pid != -1) {
  123. log(loglevel_t::ERROR, "Service ", sr->get_name(), ": could not fork for stop command: ",
  124. exec_stage_descriptions[static_cast<int>(exec_status.stage)], ": ",
  125. strerror(exec_status.st_errno));
  126. sr->stop_watcher.deregister(event_loop, sr->stop_pid);
  127. sr->reserved_child_watch = false;
  128. sr->stop_pid = -1;
  129. if (sr->pid != -1) {
  130. if (sr->term_signal != 0) {
  131. sr->kill_pg(sr->term_signal);
  132. }
  133. if (!sr->tracking_child) {
  134. sr->stop_issued = false;
  135. sr->stopped();
  136. }
  137. }
  138. }
  139. }
  140. else {
  141. // Nothing to do really but wait for termination - unless it's already happened, so let's
  142. // check that now:
  143. if (sr->stop_pid == -1) {
  144. sr->handle_stop_exit();
  145. }
  146. }
  147. sr->services->process_queues();
  148. return rearm::REMOVED;
  149. }
  150. rearm ready_notify_watcher::fd_event(eventloop_t &, int fd, int flags) noexcept
  151. {
  152. char buf[128];
  153. if (service->get_state() == service_state_t::STARTING) {
  154. // can we actually read anything from the notification pipe?
  155. int r = bp_sys::read(fd, buf, sizeof(buf));
  156. if (r > 0) {
  157. if (service->waiting_stopstart_timer) {
  158. service->process_timer.stop_timer(event_loop);
  159. service->waiting_stopstart_timer = false;
  160. }
  161. service->started();
  162. }
  163. else if (r == 0 || errno != EAGAIN) {
  164. if (service->waiting_stopstart_timer) {
  165. service->process_timer.stop_timer(event_loop);
  166. service->waiting_stopstart_timer = false;
  167. }
  168. service->set_state(service_state_t::STOPPING);
  169. service->failed_to_start(false, false);
  170. service->bring_down();
  171. }
  172. service->services->process_queues();
  173. }
  174. else {
  175. // Just keep consuming data from the pipe:
  176. int r = bp_sys::read(fd, buf, sizeof(buf));
  177. if (r == 0) {
  178. // Process closed write end or terminated
  179. close(fd);
  180. service->notification_fd = -1;
  181. return rearm::DISARM;
  182. }
  183. }
  184. return rearm::REARM;
  185. }
  186. dasynq::rearm service_child_watcher::status_change(eventloop_t &loop, pid_t child, int status) noexcept
  187. {
  188. base_process_service *sr = service;
  189. sr->pid = -1;
  190. sr->exit_status = bp_sys::exit_status(status);
  191. // Ok, for a process service, any process death which we didn't rig ourselves is a bit... unexpected.
  192. // Probably, the child died because we asked it to (sr->service_state == STOPPING). But even if we
  193. // didn't, there's not much we can do.
  194. // Must stop watch now since handle_exit_status might result in re-launch:
  195. // (stop_watch instead of deregister, so that we hold watch reservation).
  196. stop_watch(loop);
  197. if (sr->waiting_for_execstat) {
  198. // We still don't have an exec() status from the forked child, wait for that
  199. // before doing any further processing.
  200. return dasynq::rearm::NOOP; // hold watch reservation
  201. }
  202. if (sr->waiting_stopstart_timer) {
  203. sr->process_timer.stop_timer(loop);
  204. sr->waiting_stopstart_timer = false;
  205. }
  206. sr->handle_exit_status(bp_sys::exit_status(status));
  207. return dasynq::rearm::NOOP;
  208. }
  209. dasynq::rearm stop_child_watcher::status_change(eventloop_t &loop, pid_t child, int status) noexcept
  210. {
  211. process_service *sr = service;
  212. sr->stop_pid = -1;
  213. sr->stop_status = bp_sys::exit_status(status);
  214. stop_watch(loop);
  215. if (sr->waiting_for_execstat) {
  216. // no exec status yet, wait for that first
  217. return dasynq::rearm::NOOP;
  218. }
  219. sr->handle_stop_exit();
  220. sr->services->process_queues();
  221. return dasynq::rearm::NOOP;
  222. }
  223. void process_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
  224. {
  225. bool did_exit = exit_status.did_exit();
  226. bool was_signalled = exit_status.was_signalled();
  227. auto service_state = get_state();
  228. if (notification_fd != -1) {
  229. readiness_watcher.deregister(event_loop);
  230. bp_sys::close(notification_fd);
  231. notification_fd = -1;
  232. }
  233. if (!exit_status.did_exit_clean() && service_state != service_state_t::STOPPING) {
  234. if (did_exit) {
  235. log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
  236. exit_status.get_exit_status());
  237. }
  238. else if (was_signalled) {
  239. log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
  240. exit_status.get_term_sig());
  241. }
  242. }
  243. if (waiting_stopstart_timer) {
  244. process_timer.stop_timer(event_loop);
  245. waiting_stopstart_timer = false;
  246. }
  247. #if USE_UTMPX
  248. if (*inittab_id || *inittab_line) {
  249. clear_utmp_entry(inittab_id, inittab_line);
  250. }
  251. #endif
  252. if (service_state == service_state_t::STARTING) {
  253. // If state is STARTING, we must be waiting for readiness notification; the process has
  254. // terminated before becoming ready.
  255. stop_reason = stopped_reason_t::FAILED;
  256. service_state = service_state_t::STOPPING;
  257. failed_to_start();
  258. }
  259. else if (service_state == service_state_t::STOPPING) {
  260. // We won't log a non-zero exit status or termination due to signal here -
  261. // we assume that the process died because we signalled it.
  262. if (waiting_stopstart_timer) {
  263. process_timer.stop_timer(event_loop);
  264. }
  265. if (!waiting_for_deps) {
  266. if (stop_pid == -1 && !waiting_for_execstat) {
  267. stop_issued = false; // reset for next time
  268. stopped();
  269. }
  270. }
  271. else if (get_target_state() == service_state_t::STARTED && !pinned_stopped) {
  272. initiate_start();
  273. }
  274. }
  275. else if (smooth_recovery && service_state == service_state_t::STARTED && check_restart()) {
  276. // unexpected termination, with smooth recovery
  277. doing_smooth_recovery = true;
  278. do_smooth_recovery();
  279. return;
  280. }
  281. else {
  282. handle_unexpected_termination();
  283. }
  284. services->process_queues();
  285. }
  286. void process_service::exec_failed(run_proc_err errcode) noexcept
  287. {
  288. log(loglevel_t::ERROR, get_name(), ": execution failed - ",
  289. exec_stage_descriptions[static_cast<int>(errcode.stage)], ": ", strerror(errcode.st_errno));
  290. if (waiting_stopstart_timer) {
  291. process_timer.stop_timer(event_loop);
  292. waiting_stopstart_timer = false;
  293. }
  294. if (notification_fd != -1) {
  295. readiness_watcher.deregister(event_loop);
  296. bp_sys::close(notification_fd);
  297. notification_fd = -1;
  298. }
  299. if (get_state() == service_state_t::STARTING) {
  300. stop_reason = stopped_reason_t::EXECFAILED;
  301. set_state(service_state_t::STOPPING);
  302. failed_to_start();
  303. }
  304. else {
  305. // Process service in smooth recovery:
  306. doing_smooth_recovery = false;
  307. stop_reason = stopped_reason_t::TERMINATED;
  308. unrecoverable_stop();
  309. }
  310. }
  311. void bgproc_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
  312. {
  313. // For bgproc services, receiving exit status can mean one of two things:
  314. // 1. We were launching the process, and it finished (possibly after forking). If it did fork
  315. // we want to obtain the process id of the process that we should now monitor, the actual
  316. // daemon. Or,
  317. // 2. The above has already happened, and we are monitoring the daemon process, which has now
  318. // terminated for some reason.
  319. begin:
  320. bool did_exit = exit_status.did_exit();
  321. bool was_signalled = exit_status.was_signalled();
  322. auto service_state = get_state();
  323. if (!exit_status.did_exit_clean() && service_state != service_state_t::STOPPING) {
  324. if (did_exit) {
  325. log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
  326. exit_status.get_exit_status());
  327. }
  328. else if (was_signalled) {
  329. log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
  330. exit_status.get_term_sig());
  331. }
  332. }
  333. if (waiting_stopstart_timer) {
  334. process_timer.stop_timer(event_loop);
  335. waiting_stopstart_timer = false;
  336. }
  337. if (doing_smooth_recovery) {
  338. doing_smooth_recovery = false;
  339. // We're either started, or stopping (i.e. we were requested to stop during smooth recovery).
  340. if (service_state == service_state_t::STOPPING) {
  341. // Stop was issued during smooth recovery
  342. if ((did_exit && exit_status.get_exit_status() != 0) || was_signalled) {
  343. if (!waiting_for_deps) {
  344. stopped();
  345. }
  346. else if (get_target_state() == service_state_t::STARTED && !pinned_stopped) {
  347. initiate_start();
  348. }
  349. }
  350. else {
  351. // We need to re-read the PID, since it has now changed.
  352. if (pid_file.length() != 0) {
  353. auto pid_result = read_pid_file(&exit_status);
  354. if (waiting_for_deps) {
  355. // don't do anything else until dependents have stopped
  356. return;
  357. }
  358. switch (pid_result) {
  359. case pid_result_t::FAILED:
  360. case pid_result_t::TERMINATED:
  361. // Failed startup: no auto-restart.
  362. stopped();
  363. break;
  364. case pid_result_t::OK:
  365. // We now need to bring down the daemon process
  366. bring_down();
  367. break;
  368. }
  369. }
  370. }
  371. services->process_queues();
  372. return;
  373. }
  374. else /* if (service_state == service_state_t::STARTED) */ {
  375. bool need_stop = false;
  376. if ((did_exit && exit_status.get_exit_status() != 0) || was_signalled) {
  377. need_stop = true;
  378. }
  379. else {
  380. // We need to re-read the PID, since it has now changed.
  381. if (pid_file.length() != 0) {
  382. auto pid_result = read_pid_file(&exit_status);
  383. switch (pid_result) {
  384. case pid_result_t::FAILED:
  385. // Failed startup: no auto-restart.
  386. need_stop = true;
  387. break;
  388. case pid_result_t::TERMINATED:
  389. goto begin;
  390. case pid_result_t::OK:
  391. break;
  392. }
  393. }
  394. }
  395. if (need_stop) {
  396. // Failed startup: no auto-restart.
  397. stop_reason = stopped_reason_t::TERMINATED;
  398. unrecoverable_stop();
  399. services->process_queues();
  400. }
  401. return;
  402. }
  403. }
  404. if (service_state == service_state_t::STARTING) {
  405. if (exit_status.did_exit_clean()) {
  406. auto pid_result = read_pid_file(&exit_status);
  407. switch (pid_result) {
  408. case pid_result_t::FAILED:
  409. // Failed startup: no auto-restart.
  410. stop_reason = stopped_reason_t::FAILED;
  411. service_state = service_state_t::STOPPING;
  412. failed_to_start();
  413. break;
  414. case pid_result_t::TERMINATED:
  415. // started, but immediately terminated
  416. started();
  417. goto begin;
  418. case pid_result_t::OK:
  419. started();
  420. break;
  421. }
  422. }
  423. else {
  424. stop_reason = stopped_reason_t::FAILED;
  425. service_state = service_state_t::STOPPING;
  426. failed_to_start();
  427. }
  428. }
  429. else if (service_state == service_state_t::STOPPING) {
  430. // We won't log a non-zero exit status or termination due to signal here -
  431. // we assume that the process died because we signalled it.
  432. if (stop_pid == -1 && !waiting_for_execstat) {
  433. stopped();
  434. }
  435. }
  436. else {
  437. // we must be STARTED
  438. if (smooth_recovery && get_target_state() == service_state_t::STARTED && check_restart()) {
  439. doing_smooth_recovery = true;
  440. do_smooth_recovery();
  441. if (get_state() != service_state_t::STARTED) {
  442. doing_smooth_recovery = false;
  443. }
  444. return;
  445. }
  446. handle_unexpected_termination();
  447. }
  448. services->process_queues();
  449. }
  450. void bgproc_service::exec_failed(run_proc_err errcode) noexcept
  451. {
  452. log(loglevel_t::ERROR, get_name(), ": execution failed - ",
  453. exec_stage_descriptions[static_cast<int>(errcode.stage)], ": ", strerror(errcode.st_errno));
  454. if (waiting_stopstart_timer) {
  455. process_timer.stop_timer(event_loop);
  456. waiting_stopstart_timer = false;
  457. }
  458. if (doing_smooth_recovery) {
  459. doing_smooth_recovery = false;
  460. stop_reason = stopped_reason_t::TERMINATED;
  461. unrecoverable_stop();
  462. }
  463. else {
  464. // Only time we execute is for startup:
  465. stop_reason = stopped_reason_t::EXECFAILED;
  466. set_state(service_state_t::STOPPING);
  467. failed_to_start();
  468. }
  469. }
  470. void scripted_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
  471. {
  472. bool did_exit = exit_status.did_exit();
  473. bool was_signalled = exit_status.was_signalled();
  474. auto service_state = get_state();
  475. // For a scripted service, a termination occurs in one of three main cases:
  476. // - the start script completed (or failed), when service was STARTING
  477. // - the start script was interrupted to cancel startup; state is STOPPING
  478. // - the stop script complete (or failed), state is STOPPING
  479. if (service_state == service_state_t::STOPPING) {
  480. // We might be running the stop script, or we might be running the start script and have issued
  481. // a cancel order via SIGINT:
  482. if (interrupting_start) {
  483. if (waiting_stopstart_timer) {
  484. process_timer.stop_timer(event_loop);
  485. waiting_stopstart_timer = false;
  486. }
  487. // We issued a start interrupt, so we expected this failure:
  488. if (did_exit && exit_status.get_exit_status() != 0) {
  489. log(loglevel_t::NOTICE, "Service ", get_name(), " start cancelled; exit code ",
  490. exit_status.get_exit_status());
  491. // Assume that a command terminating normally (with failure status) requires no cleanup:
  492. stopped();
  493. }
  494. else {
  495. if (was_signalled) {
  496. log(loglevel_t::NOTICE, "Service ", get_name(), " start cancelled from signal ",
  497. exit_status.get_term_sig());
  498. }
  499. // If the start script completed successfully, or was interrupted via our signal,
  500. // we want to run the stop script to clean up:
  501. bring_down();
  502. }
  503. interrupting_start = false;
  504. }
  505. else if (exit_status.did_exit_clean()) {
  506. // We were running the stop script and finished successfully
  507. stopped();
  508. }
  509. else {
  510. // ??? failed to stop! Let's log it as warning:
  511. if (did_exit) {
  512. log(loglevel_t::WARN, "Service ", get_name(), " stop command failed with exit code ",
  513. exit_status.get_exit_status());
  514. }
  515. else if (was_signalled) {
  516. log(loglevel_t::WARN, "Service ", get_name(), " stop command terminated due to signal ",
  517. exit_status.get_term_sig());
  518. }
  519. // Even if the stop script failed, assume that service is now stopped, so that any dependencies
  520. // can be stopped. There's not really any other useful course of action here.
  521. stopped();
  522. }
  523. services->process_queues();
  524. }
  525. else { // STARTING
  526. if (exit_status.did_exit_clean()) {
  527. started();
  528. }
  529. else if (was_signalled && exit_status.get_term_sig() == SIGINT && onstart_flags.skippable) {
  530. // A skippable service can be skipped by interrupting (eg by ^C if the service
  531. // starts on the console).
  532. start_skipped = true;
  533. started();
  534. }
  535. else {
  536. // failed to start
  537. if (did_exit) {
  538. log(loglevel_t::ERROR, "Service ", get_name(), " command failed with exit code ",
  539. exit_status.get_exit_status());
  540. }
  541. else if (was_signalled) {
  542. log(loglevel_t::ERROR, "Service ", get_name(), " command terminated due to signal ",
  543. exit_status.get_term_sig());
  544. }
  545. service_state = service_state_t::STOPPED;
  546. stop_reason = stopped_reason_t::FAILED;
  547. failed_to_start();
  548. }
  549. services->process_queues();
  550. }
  551. }
  552. void scripted_service::exec_failed(run_proc_err errcode) noexcept
  553. {
  554. log(loglevel_t::ERROR, get_name(), ": execution failed - ",
  555. exec_stage_descriptions[static_cast<int>(errcode.stage)], ": ", strerror(errcode.st_errno));
  556. auto service_state = get_state();
  557. if (service_state == service_state_t::STARTING) {
  558. stop_reason = stopped_reason_t::EXECFAILED;
  559. service_state = service_state_t::STOPPING;
  560. failed_to_start();
  561. }
  562. else if (service_state == service_state_t::STOPPING) {
  563. // We've logged the failure, but it's probably better not to leave the service in
  564. // STOPPING state:
  565. stopped();
  566. }
  567. }
  568. // Return a value as an unsigned-type value.
  569. template <typename T> typename std::make_unsigned<T>::type make_unsigned_val(T val)
  570. {
  571. return static_cast<typename std::make_unsigned<T>::type>(val);
  572. }
  573. bgproc_service::pid_result_t
  574. bgproc_service::read_pid_file(bp_sys::exit_status *exit_status) noexcept
  575. {
  576. const char *pid_file_c = pid_file.c_str();
  577. int fd = bp_sys::open(pid_file_c, O_CLOEXEC);
  578. if (fd == -1) {
  579. log(loglevel_t::ERROR, get_name(), ": read pid file: ", strerror(errno));
  580. return pid_result_t::FAILED;
  581. }
  582. char pidbuf[21]; // just enough to hold any 64-bit integer
  583. int r = complete_read(fd, pidbuf, 20);
  584. if (r < 0) {
  585. // Could not read from PID file
  586. log(loglevel_t::ERROR, get_name(), ": could not read from pidfile; ", strerror(errno));
  587. bp_sys::close(fd);
  588. return pid_result_t::FAILED;
  589. }
  590. bp_sys::close(fd);
  591. pidbuf[r] = 0; // store nul terminator
  592. bool valid_pid = false;
  593. try {
  594. unsigned long long v = std::strtoull(pidbuf, nullptr, 0);
  595. if (v <= make_unsigned_val(std::numeric_limits<pid_t>::max())) {
  596. pid = (pid_t) v;
  597. valid_pid = true;
  598. }
  599. }
  600. catch (std::out_of_range &exc) {
  601. // Too large?
  602. }
  603. catch (std::invalid_argument &exc) {
  604. // Ok, so it doesn't look like a number: proceed...
  605. }
  606. if (valid_pid) {
  607. pid_t wait_r = waitpid(pid, exit_status, WNOHANG);
  608. if (wait_r == -1 && errno == ECHILD) {
  609. // We can't track this child - check process exists:
  610. if (bp_sys::kill(pid, 0) == 0 || errno != ESRCH) {
  611. tracking_child = false;
  612. return pid_result_t::OK;
  613. }
  614. else {
  615. log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
  616. pid = -1;
  617. return pid_result_t::FAILED;
  618. }
  619. }
  620. else if (wait_r == pid) {
  621. pid = -1;
  622. return pid_result_t::TERMINATED;
  623. }
  624. else if (wait_r == 0) {
  625. // We can track the child
  626. child_listener.add_reserved(event_loop, pid, dasynq::DEFAULT_PRIORITY - 10);
  627. tracking_child = true;
  628. reserved_child_watch = true;
  629. return pid_result_t::OK;
  630. }
  631. }
  632. log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
  633. pid = -1;
  634. return pid_result_t::FAILED;
  635. }
  636. void process_service::bring_down() noexcept
  637. {
  638. if (stop_pid != -1 || stop_issued) {
  639. // waiting for stop command to complete (or for process to die after it has complete);
  640. // can't do anything here.
  641. return;
  642. }
  643. if (waiting_for_execstat) {
  644. // The process is still starting. This should be uncommon, but can occur during
  645. // smooth recovery (or it may mean the stop command process is still starting). We can't
  646. // do much now; we have to wait until we get the status, and then act appropriately.
  647. return;
  648. }
  649. else if (pid != -1) {
  650. // The process is still kicking on - must actually kill it.
  651. if (!stop_command.empty() && !stop_issued) {
  652. if (start_stop_process(stop_arg_parts)) {
  653. goto arm_timer;
  654. }
  655. // stop-command failed, need to try something else:
  656. if (term_signal != 0) {
  657. kill_pg(term_signal);
  658. }
  659. else {
  660. kill_pg(SIGKILL);
  661. }
  662. }
  663. else if (term_signal != 0) {
  664. // We signal the process group (-pid) rather than just the process as there's less
  665. // risk then of creating an orphaned process group:
  666. kill_pg(term_signal);
  667. }
  668. if (stop_pid == -1 && !tracking_child) {
  669. // If we have no way of tracking when the child terminates, assume stopped now
  670. stopped();
  671. return;
  672. }
  673. arm_timer:
  674. stop_issued = true; // (don't try again)
  675. // If there's a stop timeout, arm the timer now:
  676. if (stop_timeout != time_val(0,0)) {
  677. process_timer.arm_timer_rel(event_loop, stop_timeout);
  678. waiting_stopstart_timer = true;
  679. }
  680. // The rest is done in handle_exit_status.
  681. }
  682. else {
  683. // The process is already dead (possibly, we are in smooth recovery waiting for timer)
  684. doing_smooth_recovery = false;
  685. if (waiting_restart_timer) {
  686. process_timer.stop_timer(event_loop);
  687. waiting_restart_timer = false;
  688. }
  689. stopped();
  690. }
  691. }
  692. void process_service::kill_with_fire() noexcept
  693. {
  694. base_process_service::kill_with_fire();
  695. if (stop_pid != -1) {
  696. log(loglevel_t::WARN, "Service ", get_name(), " stop command, with pid ", pid,
  697. ", exceeded allowed stop time; killing.");
  698. pid_t pgid = bp_sys::getpgid(stop_pid);
  699. if (pgid == -1) {
  700. // On OpenBSD, not allowed to query pgid of a process in another session, but in that
  701. // case we know the group anyway:
  702. pgid = stop_pid;
  703. }
  704. bp_sys::kill(-pgid, SIGKILL);
  705. }
  706. }
  707. void scripted_service::bring_down() noexcept
  708. {
  709. if (pid != -1) {
  710. // We're already running the stop script; nothing to do.
  711. return;
  712. }
  713. if (stop_command.length() == 0) {
  714. stopped();
  715. }
  716. else if (! start_ps_process(stop_arg_parts, false)) {
  717. // Couldn't execute stop script, but there's not much we can do:
  718. stopped();
  719. }
  720. else {
  721. // successfully started stop script: start kill timer:
  722. if (stop_timeout != time_val(0,0)) {
  723. process_timer.arm_timer_rel(event_loop, stop_timeout);
  724. waiting_stopstart_timer = true;
  725. }
  726. }
  727. }
  728. dasynq::rearm process_restart_timer::timer_expiry(eventloop_t &, int expiry_count)
  729. {
  730. service->timer_expired();
  731. // Leave the timer disabled, or, if it has been reset by any processing above, leave it armed:
  732. return dasynq::rearm::NOOP;
  733. }
  734. bool process_service::start_stop_process(const std::vector<const char *> &cmd) noexcept
  735. {
  736. // In general, you can't tell whether fork/exec is successful. We use a pipe to communicate
  737. // success/failure from the child to the parent. The pipe is set CLOEXEC so a successful
  738. // exec closes the pipe, and the parent sees EOF. If the exec is unsuccessful, the errno
  739. // is written to the pipe, and the parent can read it.
  740. int pipefd[2];
  741. if (bp_sys::pipe2(pipefd, O_CLOEXEC)) {
  742. log(loglevel_t::ERROR, get_name(), ": can't create status check pipe (for stop command): ",
  743. strerror(errno));
  744. return false;
  745. }
  746. const char * logfile = this->logfile.c_str();
  747. if (*logfile == 0) {
  748. logfile = "/dev/null";
  749. }
  750. bool child_status_registered = false;
  751. // Set up complete, now fork and exec:
  752. pid_t forkpid;
  753. try {
  754. stop_pipe_watcher.add_watch(event_loop, pipefd[0], dasynq::IN_EVENTS);
  755. child_status_registered = true;
  756. // We specify a high priority (i.e. low priority value) so that process termination is
  757. // handled early. This means we have always recorded that the process is terminated by the
  758. // time that we handle events that might otherwise cause us to signal the process, so we
  759. // avoid sending a signal to an invalid (and possibly recycled) process ID.
  760. forkpid = stop_watcher.fork(event_loop, reserved_stop_watch, dasynq::DEFAULT_PRIORITY - 10);
  761. reserved_stop_watch = true;
  762. }
  763. catch (std::exception &e) {
  764. log(loglevel_t::ERROR, get_name(), ": could not fork (for stop command): ", e.what());
  765. goto out_cs_h;
  766. }
  767. if (forkpid == 0) {
  768. close(pipefd[0]);
  769. const char * working_dir_c = nullptr;
  770. if (! working_dir.empty()) working_dir_c = working_dir.c_str();
  771. run_proc_params run_params{cmd.data(), working_dir_c, logfile, pipefd[1], run_as_uid, run_as_gid, rlimits};
  772. run_params.on_console = false;
  773. run_params.in_foreground = false;
  774. run_params.csfd = -1;
  775. run_params.socket_fd = socket_fd;
  776. run_params.notify_fd = -1;
  777. run_params.force_notify_fd = -1;
  778. run_params.notify_var = nullptr;
  779. run_params.env_file = env_file.c_str();
  780. #if SUPPORT_CGROUPS
  781. run_params.run_in_cgroup = run_in_cgroup.c_str();
  782. #endif
  783. run_child_proc(run_params);
  784. }
  785. else {
  786. // Parent process
  787. stop_pid = forkpid;
  788. bp_sys::close(pipefd[1]); // close the 'other end' fd
  789. waiting_for_execstat = true;
  790. return true;
  791. }
  792. // Failure exit:
  793. out_cs_h:
  794. if (child_status_registered) {
  795. stop_pipe_watcher.deregister(event_loop);
  796. }
  797. bp_sys::close(pipefd[0]);
  798. bp_sys::close(pipefd[1]);
  799. return false;
  800. }