Browse Source

Initial commit.

Davin McCall 8 years ago
commit
150d7e46a7
9 changed files with 1582 additions and 0 deletions
  1. 32 0
      Makefile
  2. 49 0
      README
  3. 44 0
      TODO
  4. 143 0
      control.h
  5. 88 0
      dinit-start.cc
  6. 329 0
      dinit.cc
  7. 200 0
      load_service.cc
  8. 493 0
      service.cc
  9. 204 0
      service.h

+ 32 - 0
Makefile

@@ -0,0 +1,32 @@
+-include mconfig
+
+objects = dinit.o load_service.o service.o dinit-start.o
+
+dinit_objects = dinit.o load_service.o service.o
+
+all: dinit dinit-start
+
+dinit: $(dinit_objects)
+	g++ -Wall -o dinit $(dinit_objects) -lev
+
+dinit-start: dinit-start.o
+	g++ -Wall -o dinit-start dinit-start.o
+
+# Note we use the old ABI on GCC 5.2 to avoid GCC bug 66145.
+$(objects): %.o: %.cc service.h
+	g++ -D_GLIBCXX_USE_CXX11_ABI=0 -std=gnu++11 -c -Os -Wall $< -o $@
+
+install: all
+	#install -d $(LOGINBINDIR) $(LOGINDOCDIR)
+	#install -s login $(LOGINBINDIR)
+	#install --mode=644 README $(LOGINDOCDIR)
+	#@echo
+	#@echo "You may also wish to \"make install.man\"."
+
+install.man:
+	#install -d $(MAN1DIR)
+	#install --mode=644 login.1 $(MAN1DIR)
+
+clean:
+	rm *.o
+	rm dinit

+ 49 - 0
README

@@ -0,0 +1,49 @@
+dinit
+-----
+v0.1
+
+What is it?
+=-=-=-=-=-=
+
+"dinit" is destined to eventually become a replacement "init" program for
+Linux (and possibly other) systems. But it's not there yet.
+
+However, dinit can be used as a general service monitor. Specifically it
+can launch multiple services (generally, "daemon" processes, but see notes
+below) in parallel, with dependency management (i.e. if one service's
+operation depends on another, the latter service will be started first).
+
+For "process" services (the only kind of service that v0.1 supports) dinit
+can monitor the process corresponding to the service, and re-start it if it
+dies. It does this in an intelligent way, first "rolling back" all dependent
+services (which it will later re-start, if configured to do so).
+
+Notes on services
+=-=-=-=-=-=-=-=-=
+
+The only type of service supported in v0.1 are "process" services. Process
+services are so named because they run as a process which dinit launches
+and monitors directly (this is opposed to "scripted" services, scheduled for
+v0.5, which will be started and stopped with the aid of a script or other
+program, and which cannot be directly monitored by dinit).
+
+Many programs that you might want to run under dinit's supervision can run
+either "in the foreground" or as a daemon ("in the background"), and the
+choice is dictated by a command line switch (for instance the -D and -F
+switches to Samba's "smbd"). Although it might seem counterintuitive,
+the "foreground" mode should be used for programs registered as process
+services in dinit; this allows dinit to monitor the process.
+
+Process services are attractive due to the ease of monitoring (and
+restarting) the service, however, they have one inherent problem, which is
+that dinit cannot tell when the service is truly started. Once the process
+has been launched, dinit assumes that the service has started, but in fact
+there will be a short delay before the process sets itself up, starts
+listening on sockets, etc; during this time any other process (including
+one from a service listed as dependent) which tries to contact it will not
+be able to do so.
+
+
+Things to document about current implementation
+-----------------------------------------------
+* logfiles won't work until filesystem mounted (/dev/null should, though)

+ 44 - 0
TODO

@@ -0,0 +1,44 @@
+* Documentation including sample service definitions
+* if PID = 1, don't interpret comnand line args as services to start (except for
+  "single").
+* if PID != 1, choose a more sensible service definition directory
+  (something like $HOME/dinit.d)
+* Support "virtual" services (so you don't have to have "scripted" services where
+  where the script is /bin/true)
+* Better error handling, logging of errors
+* Implement a control utility to start/stop services after dinit has started
+* Allow command-line arguments to service processes
+* Allow logging tasks to memory (growing or circular buffer) and later
+  switching to disk logging (allows for filesystem mounted readonly on boot)
+* Rate control on process respawn
+* Maybe re-implement "shutdown" ("halt", "reboot") from util-linux to better work
+  with dinit.
+* Support recognising /etc/init.d services automatically (as script services, with
+  no dependency management - or upstart compatible dependency management)
+* Write wtmp entry on startup (see simpleinit)
+* Allow running services as a different UID, resource limits, chroot, cgroups,
+  namespaces (pid/fs/uid), etc
+* Should services be started in a new session (setsid()?) and/or process group
+  (setpgid()?)
+* Make default control socket location build-time configurable
+
+Later:
+* Place some reasonable, soft limit on the number of services to be started
+  simultaneously, to prevent thrashing. Services that are taking a long time
+  to start don't count to the limit. Maybe use CPU/IO usage as a controlling
+  factor.
+* Cron-like tasks (if started, they run a sub-task periodically. Stopping the
+  task will wait until the sub-task is complete). These don't need to be
+  special service type, just have dinit handle cron entries which specify when
+  a particular service needs to be started (and have it not auto-restart).
+* Allow binding AF_UNIX sockets and when receiving connection to them, start some
+  service which implements that service eg:
+   /dev/log -> syslog service
+   /var/run/mdnsd -> mDNSResponder (mdndsd)
+* Allow to run services attached to virtual tty, allow connection to that tty (ala "screen").
+* SystemD-like handling of filesystem mounts (see autofs documentation in kernel)
+
+
+Investigate:
+* What's the best TERM setting? gogetty gives me "linux" but I think other variants may be
+  better.

+ 143 - 0
control.h

@@ -0,0 +1,143 @@
+#include <ev++.h>
+
+// Control connection for dinit
+
+
+// forward-declaration of callback:
+static void control_conn_cb(struct ev_loop * loop, ev_io * w, int revents);
+
+
+// Packet types:
+constexpr static int DINIT_CP_STARTSERVICE = 0;
+constexpr static int DINIT_CP_STOPSERVICE  = 1;
+
+// "packet" format:
+// (1 byte) packet type
+// (N bytes) additional data (service name, etc)
+//   for STARTSERVICE/STOPSERVICE:
+//      (2 bytes) service name length
+//      (M buyes) service name (without nul terminator)
+
+
+class ControlConn
+{
+    struct ev_io iob;
+    struct ev_loop *loop;
+    ServiceSet *service_set;
+    char * iobuf;
+    int bufidx;
+    
+    // The packet length before we need to re-check if the packet is complete
+    int chklen;
+    
+    public:
+    ControlConn(struct ev_loop * loop, ServiceSet * service_set, int fd) : loop(loop), service_set(service_set), bufidx(0), chklen(0)
+    {
+        iobuf = new char[1024];
+    
+        ev_io_init(&iob, control_conn_cb, fd, EV_READ);
+        iob.data = this;
+        ev_io_start(loop, &iob);
+    }
+    
+    void processPacket()
+    {
+        using std::string;
+    
+        int pktType = iobuf[0];
+        if (pktType == DINIT_CP_STARTSERVICE || pktType == DINIT_CP_STOPSERVICE) {
+            if (bufidx < 4) {
+                chklen = 4;
+                return;
+            }
+            
+            uint16_t svcSize;
+            memcpy(&svcSize, iobuf + 1, 2);
+            if (svcSize <= 0) {
+                // TODO error response
+                bufidx = 1024; // dataReady will delete - TODO clean up
+            }
+            
+            chklen = svcSize + 3;
+            if (chklen > 1024) {
+                // We can't have a service name this long
+                // TODO error response
+                bufidx = 1024; // TODO cleanup.
+            }
+            
+            if (bufidx < chklen) {
+                // packet not complete yet; read more
+                return;
+            }
+            
+            string serviceName(iobuf + 3, (size_t) svcSize);
+            if (pktType == DINIT_CP_STARTSERVICE) {
+                service_set->startService(serviceName.c_str());
+                // TODO catch exceptions, error response
+            }
+            else {
+                // TODO verify the named service exists?
+                service_set->stopService(serviceName.c_str());
+            }
+            
+            // Clear the packet from the buffer
+            memmove(iobuf, iobuf + chklen, 1024 - chklen);
+            bufidx -= chklen;
+            chklen = 0;
+            return;
+        }
+    
+    }
+    
+    void dataReady()
+    {
+        int fd = iob.fd;
+        int buffree = 1024 - bufidx;
+        
+        int r = read(fd, iobuf + bufidx, buffree);
+        
+        // Note file descriptor is non-blocking
+        if (r == -1) {
+            if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) {
+                return;
+            }
+            // TODO log error
+            delete this;
+            return;
+        }
+        
+        if (r == 0) {
+            delete this;
+            return;
+        }
+        
+        bufidx += r;
+        buffree -= r;
+        
+        // complete packet?
+        if (bufidx >= chklen) {
+            processPacket();
+        }
+        
+        if (bufidx == 1024) {
+            // Too big packet
+            // TODO log error?
+            // TODO error response?
+            delete this;
+        }
+    }
+    
+    ~ControlConn()
+    {
+        close(iob.fd);
+        ev_io_stop(loop, &iob);
+        delete [] iobuf;
+    }
+};
+
+
+static void control_conn_cb(struct ev_loop * loop, ev_io * w, int revents)
+{
+    ControlConn *conn = (ControlConn *) w->data;
+    conn->dataReady();
+}

+ 88 - 0
dinit-start.cc

@@ -0,0 +1,88 @@
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+// #include <netinet/in.h>
+#include <cstdio>
+#include <unistd.h>
+#include <cstring>
+#include <string>
+#include <iostream>
+
+// dinit-start:  utility to start a dinit service
+
+// This utility communicates with the dinit daemon via a unix socket (/dev/initctl).
+
+// TODO move these into a common include file:
+constexpr static int DINIT_CP_STARTSERVICE = 0;
+constexpr static int DINIT_CP_STOPSERVICE  = 1;
+
+
+int main(int argc, char **argv)
+{
+    using namespace std;
+    
+    bool show_help = argc < 2;
+    char *service_name = nullptr;
+        
+    for (int i = 1; i < argc; i++) {
+        if (argv[i][0] == '-') {
+            if (strcmp(argv[i], "--help") == 0) {
+                show_help = true;
+                break;
+            }
+            else {
+                cerr << "Unrecognized command-line parameter: " << argv[i] << endl;
+                return 1;
+            }
+        }
+        else {
+            // service name
+            service_name = argv[i];
+            // TODO support multiple services (or at least give error if multiple services
+            //     supplied)
+        }
+    }
+
+    if (show_help) {
+        cout << "dinit-start:   start a dinit service" << endl;
+        cout << "  --help           : show this help" << endl;
+        cout << "  <service-name>   : start the named service" << endl;
+        return 1;
+    }
+    
+    int socknum = socket(AF_UNIX, SOCK_STREAM, 0);
+    if (socknum == -1) {
+        perror("socket");
+        return 1;
+    }
+
+    const char *naddr = "/dev/dinitctl";
+    
+    struct sockaddr_un name;
+    name.sun_family = AF_UNIX;
+    // memset(name.sun_path, 0, sizeof(name.sun_path));
+    strcpy(name.sun_path /* + 1 */, naddr);
+    int sunlen = 2 + strlen(naddr); // family, (string), nul
+    
+    int connr = connect(socknum, (struct sockaddr *) &name, sunlen);
+    if (connr == -1) {
+        perror("connect");
+        return 1;
+    }
+    
+    // Build buffer;
+    uint16_t sname_len = strlen(service_name);
+    int bufsize = 3 + sname_len;
+    char * buf = new char[bufsize];
+    
+    buf[0] = DINIT_CP_STARTSERVICE;
+    memcpy(buf + 1, &sname_len, 2);
+    memcpy(buf + 3, service_name, sname_len);
+    
+    int r = write(socknum, buf, bufsize);
+    if (r == -1) {
+        perror("write");
+    }
+    
+    return 0;
+}

+ 329 - 0
dinit.cc

@@ -0,0 +1,329 @@
+#include <iostream>
+#include <cstring>
+#include <csignal>
+#include <list>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/un.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include "service.h"
+#include "ev++.h"
+#include "control.h"
+
+
+/* TODO: prevent services from respawning too quickly */
+/* TODO: detect/guard against dependency cycles */
+/* TODO: optional automatic restart of services */
+
+/*
+ * "simpleinit" from util-linux package handles signals as follows:
+ * SIGTSTP - spawn no more gettys (in preparation for shutdown etc).
+ *          In dinit terms this should probably mean "no more auto restarts"
+ *          (for any service). (Actually the signal acts as a toggle, if
+ *          respawn is disabled it will be re-enabled and init will
+ *          act as if SIGHUP had also been sent)
+ * SIGTERM - kill spawned gettys (which are still alive)
+ *          Interestingly, simpleinit just sends a SIGTERM to the gettys.
+ *          "shutdown" however has already sent SIGTERM to every process...
+ * "/sbin/initctl -r" - rollback services (ran by "shutdown"/halt etc)
+ *           shouldn't return until all services have been stopped.
+ *           shutdown calls this *after* sending SIGTERM to all processes.
+ *           I guess this allows user processes, if any are still around,
+ *           to die before (or just as) the services fall out from underneath
+ *           them. On the other hand it largely subverts the ordered service
+ *           shutdown that init provides.
+ * SIGQUIT - init will exec() shutdown. shutdown will detect that it is
+ *           running as pid 1 and will just loop and reap child processes.
+ *           This is used by shutdown so that init will not hang on to its
+ *           inode, allowing clean filesystem unmounting.
+ *
+ * Not sent by shutdown:
+ * SIGHUP -  re-read inittab and spawn any new getty entries
+ * SIGINT - (ctrl+alt+del handler) - fork & exec "reboot"
+ * 
+ * On the contrary dinit currently uses:
+ * SIGTERM - roll back services and then exec /sbin/halt
+ * SIGINT - roll back services and then exec /sbin/reboot
+ *
+ * It's an open question about whether dinit should roll back services *before*
+ * running halt/reboot, since those commands should prompt rollback of services
+ * anyway. But it seems safe to do so.
+ */
+
+
+static bool got_sigterm = false;
+
+static ServiceSet *service_set;
+
+static bool am_system_init = false; // true if we are the system init process
+static bool reboot = false; // whether to reboot (instead of halting)
+
+static void sigint_reboot_cb(struct ev_loop *loop, ev_signal *w, int revents);
+static void sigquit_cb(struct ev_loop *loop, ev_signal *w, int revents);
+static void sigterm_cb(struct ev_loop *loop, ev_signal *w, int revents);
+
+static void open_control_socket(struct ev_loop *loop);
+
+struct ev_io control_socket_io;
+
+
+int main(int argc, char **argv)
+{
+    using namespace std;
+    
+    am_system_init = (getpid() == 1);
+    
+    if (am_system_init) {
+        // setup STDIN, STDOUT, STDERR so that we can use them
+        int onefd = open("/dev/console", O_RDONLY, 0);
+        dup2(onefd, 0);
+        int twofd = open("/dev/console", O_RDWR, 0);
+        dup2(twofd, 1);
+        dup2(twofd, 2);
+    }
+    
+    /* Set up signal handlers etc */
+    /* SIG_CHILD is ignored by default: good */
+    /* sigemptyset(&sigwait_set); */
+    /* sigaddset(&sigwait_set, SIGCHLD); */
+    /* sigaddset(&sigwait_set, SIGINT); */
+    /* sigaddset(&sigwait_set, SIGTERM); */
+    /* sigprocmask(SIG_BLOCK, &sigwait_set, NULL); */
+    
+    /* list of services to start */
+    list<const char *> services_to_start;
+    
+    /* service directory name */
+    const char * service_dir = "/etc/dinit.d";
+    
+    /* arguments, if given, specify a list of services to start. */
+    /* if none are given the "boot" service is started. */
+    if (argc > 1) {
+      for (int i = 1; i < argc; i++) {
+        if (argv[i][0] == '-') {
+            // An option...
+            if (strcmp(argv[i], "--services-dir") == 0 ||
+                    strcmp(argv[i], "-d") == 0) {
+                ++i;
+                if (i < argc) {
+                    service_dir = argv[i];
+                }
+                else {
+                    // error TODO
+                }
+            }
+            else if (strcmp(argv[i], "--help") == 0) {
+                cout << "dinit, an init with dependency management" << endl;
+                cout << " --help                         : display help" << endl;
+                cout << " --services-dir <dir>, -d <dir> : set base directory for service description files (-d <dir>)" << endl;
+                cout << " <service-name>                 : start service with name <service-name>" << endl;
+                return 0;
+            }
+            else {
+                // unrecognized
+                if (! am_system_init) {
+                    cerr << "Unrecognized option: " << argv[i] << endl;
+                    return 1;
+                }
+            }
+        }
+        else {
+            services_to_start.push_back(argv[i]);
+        }
+      }
+    }
+    
+    if (services_to_start.empty()) {
+        services_to_start.push_back("boot");
+    }
+
+    // Set up signal handlers
+    ev_signal sigint_ev_signal;
+    if (am_system_init) {
+      ev_signal_init(&sigint_ev_signal, sigint_reboot_cb, SIGINT);
+    }
+    else {
+      ev_signal_init(&sigint_ev_signal, sigterm_cb, SIGINT);
+    }
+    
+    ev_signal sigquit_ev_signal;
+    if (am_system_init) {
+        // PID 1: SIGQUIT exec's shutdown
+        ev_signal_init(&sigquit_ev_signal, sigquit_cb, SIGQUIT);
+    }
+    else {
+        // Otherwise: SIGQUIT terminates dinit
+        ev_signal_init(&sigquit_ev_signal, sigterm_cb, SIGQUIT);
+    }
+    
+    ev_signal sigterm_ev_signal;
+    ev_signal_init(&sigterm_ev_signal, sigterm_cb, SIGTERM);
+    
+    /* Set up libev */
+    struct ev_loop *loop = ev_default_loop(EVFLAG_AUTO /* | EVFLAG_SIGNALFD */);
+    ev_signal_start(loop, &sigint_ev_signal);
+    ev_signal_start(loop, &sigquit_ev_signal);
+    ev_signal_start(loop, &sigterm_ev_signal);
+
+    // Try to open control socket (may fail due to readonly filesystem)
+    open_control_socket(loop);
+    
+    /* start requested services */
+    service_set = new ServiceSet(service_dir);
+    for (list<const char *>::iterator i = services_to_start.begin();
+            i != services_to_start.end();
+            ++i) {
+        try {
+            service_set->startService(*i);
+        }
+        catch (ServiceNotFound &snf) {
+            // TODO log this better
+            std::cerr << "Could not find service: " << snf.serviceName << endl;
+        }
+        catch (std::string err) {
+            std::cerr << err << std::endl;
+            throw err;
+        }
+    }
+    
+    event_loop:
+    
+    // Process events until all services have terminated.
+    while (! service_set->count_active_services() == 0) {
+        ev_loop(loop, EVLOOP_ONESHOT);
+    }
+    
+    if (am_system_init) {
+        cout << "dinit: No more active services.";
+        if (reboot) {
+            cout << " Will reboot.";
+        }
+        else if (got_sigterm) {
+            cout << " Will halt.";
+        }
+        else {
+            cout << " Re-initiating boot sequence.";
+        }
+        cout << endl;
+    }
+    
+    
+    if (am_system_init) {
+        if (reboot) {
+            // TODO log error from fork
+            if (fork() == 0) {
+                execl("/sbin/reboot", "/sbin/reboot", (char *) 0);
+            }
+        }
+        else if (got_sigterm) {
+            // TODO log error from fork
+            if (fork() == 0) {
+                execl("/sbin/halt", "/sbin/halt", (char *) 0);
+            }
+        }
+        else {
+            // Hmmmmmm.
+            // It could be that we started in single user mode, and the
+            // user has now exited the shell. We'll try and re-start the
+            // boot process...
+            try {
+                service_set->startService("boot");
+                goto event_loop; // yes, the "evil" goto
+            }
+            catch (...) {
+                // TODO catch exceptions and log message as appropriate
+                // Now WTF do we do? try and reboot
+                if (fork() == 0) {
+                    execl("/sbin/reboot", "/sbin/reboot", (char *) 0);
+                }
+            }
+        }
+        
+        // PID 1 should never exit:
+        while (true) {
+            pause();
+        }
+    }
+    
+    return 0;
+}
+
+// Callback for control socket
+static void control_socket_cb(struct ev_loop *loop, ev_io *w, int revents)
+{
+    // Accept a connection
+    int sockfd = w->fd;
+    
+    int newfd = accept4(sockfd, nullptr, nullptr, SOCK_NONBLOCK | SOCK_CLOEXEC);
+    
+    if (newfd != -1) {    
+        new ControlConn(loop, service_set, newfd);  // will delete itself when it's finished
+        // TODO keep a set of control connections so that we can close them when
+        // terminating?
+    }
+}
+
+static void open_control_socket(struct ev_loop *loop)
+{
+    // TODO make this use a per-user address if PID != 1, and make the address
+    // overridable from the command line
+    
+    const char * saddrname = "/dev/dinitctl";
+    struct sockaddr_un name;
+
+    unlink(saddrname);
+
+    name.sun_family = AF_UNIX;
+    strcpy(name.sun_path, saddrname); // TODO make this safe for long names
+    int namelen = 2 + strlen(saddrname);
+    //int namelen = sizeof(name);
+    
+    int sockfd = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0);
+    if (sockfd == -1) {
+        // TODO log error
+        perror("socket");
+        return;
+    }
+    
+    if (bind(sockfd, (struct sockaddr *) &name, namelen) == -1) {
+        // TODO log error
+        perror("bind");
+        close(sockfd);
+        return;
+    }
+    
+    if (listen(sockfd, 10) == -1) {
+        // TODO log error
+        perror("listen");
+        close(sockfd);
+        return;
+    }
+    
+    ev_io_init(&control_socket_io, control_socket_cb, sockfd, EV_READ);
+    ev_io_start(loop, &control_socket_io);
+}
+
+/* handle SIGINT signal (generated by kernel when ctrl+alt+del pressed) */
+static void sigint_reboot_cb(struct ev_loop *loop, ev_signal *w, int revents)
+{
+    reboot = true;
+    service_set->stop_all_services();
+}
+
+/* handle SIGQUIT (if we are system init) */
+static void sigquit_cb(struct ev_loop *loop, ev_signal *w, int revents)
+{
+    // This allows remounting the filesystem read-only if the dinit binary has been
+    // unlinked. In that case the kernel holds the binary open, so that it can't be
+    // properly removed.
+    execl("/sbin/shutdown", "/sbin/shutdown", (char *) 0);
+}
+
+/* handle SIGTERM - stop all services */
+static void sigterm_cb(struct ev_loop *loop, ev_signal *w, int revents)
+{
+    got_sigterm = true;
+    service_set->stop_all_services();
+}

+ 200 - 0
load_service.cc

@@ -0,0 +1,200 @@
+#include "service.h"
+#include <string>
+#include <fstream>
+#include <locale>
+#include <iostream>
+
+typedef std::string string;
+typedef std::string::iterator string_iterator;
+
+// Utility function to skip white space. Returns an iterator at the
+// first non-white-space position (or at end).
+static string_iterator skipws(string_iterator i, string_iterator end)
+{
+    using std::locale;
+    using std::isspace;
+    
+    while (i != end) {
+      if (! isspace(*i, locale::classic())) {
+        break;
+      }
+      ++i;
+    }
+    return i;
+}
+
+// Read a setting name.
+static string read_setting_name(string_iterator * const i, string_iterator end)
+{
+    using std::locale;
+    using std::ctype;
+    using std::use_facet;
+    
+    const ctype<char> & facet = use_facet<ctype<char> >(locale::classic());
+
+    string rval;
+    // Allow alphabetical characters, and dash (-) in setting name
+    while (*i != end && (**i == '-' || facet.is(ctype<char>::alpha, **i))) {
+        rval += **i;
+        ++(*i);
+    }
+    return rval;
+}
+
+// Read a setting value
+// Try to allow quoted strings:
+static string read_setting_value(string_iterator * const i, string_iterator end)
+{
+    // TODO handle quoting, error if multiple white-space separated strings
+    // occur without quoting (unless the second one is a '#' comment)
+    using std::locale;
+    using std::isspace;
+
+    *i = skipws(*i, end);
+    
+    string rval;
+    // bool quoting = false;
+    
+    while (*i != end) {
+        char c = **i;
+        if (c == '\"') {
+            // quoted ...
+            // TODO
+        }
+        if (isspace(c, locale::classic())) {
+            *i = skipws(*i, end);
+            if (*i == end) break;
+            if (**i == '#') break; // comment
+            rval += ' ';  // collapse ws to a single space
+            continue;
+        }
+        else if (c == '#') {
+            // hmm... comment? Probably, though they should have put a space
+            // before it really. TODO throw an exception, and document
+            // that '#' for comments must be preceded by space, and in values
+            // must be quoted.
+            break;
+        }
+        else {
+            rval += c;
+        }
+        ++(*i);
+    }
+    
+    return rval;
+}
+
+
+// Find a service record, or load it from file. If the service has
+// dependencies, load those also.
+//
+// Might throw an exception if a dependency cycle is found or if another
+// problem occurs (I/O error, service description not found).
+ServiceRecord * ServiceSet::loadServiceRecord(const char * name)
+{
+    using std::string;
+    using std::ifstream;
+    using std::ios;
+    using std::ios_base;
+    using std::locale;
+    using std::isspace;
+    
+    // First try and find an existing record...
+    ServiceRecord * rval = findService(string(name));
+    if (rval != 0) {
+        return rval;
+    }
+
+    // Couldn't find one. Have to load it.    
+    string service_filename = service_dir;
+    if (*(service_filename.rbegin()) != '/') {
+        service_filename += '/';
+    }
+    service_filename += name;
+    
+    string command;
+    int service_type = SVC_PROCESS;
+    std::list<ServiceRecord *> depends_on;
+    string logfile;
+    
+    // TODO catch I/O exceptions, wrap & re-throw?
+    string line;
+    bool auto_restart = false;
+    ifstream service_file;
+    service_file.exceptions(ios::badbit | ios::failbit);
+    
+    try {
+        service_file.open(service_filename.c_str(), ios::in);
+    }
+    catch (std::ios_base::failure &exc) {
+        ServiceNotFound snf;
+        snf.serviceName = name;
+        throw snf;
+    }
+    
+    // getline can set failbit if it reaches end-of-file, we don't want an exception in that case:
+    service_file.exceptions(ios::badbit);
+    
+    while (! (service_file.rdstate() & ios::eofbit)) {
+        getline(service_file, line);
+        string::iterator i = line.begin();
+        string::iterator end = line.end();
+      
+        i = skipws(i, end);
+        if (i != end) {
+            if (*i == '#') {
+                continue;  // comment line
+            }
+            string setting = read_setting_name(&i, end);
+            i = skipws(i, end);
+            if (i == end || *i != '=') {
+                // TODO: throw a documented exception
+                throw std::string("Badly formed line.");
+            }
+            i = skipws(++i, end);
+            
+            if (setting == "command") {
+                command = read_setting_value(&i, end);
+            }
+            else if (setting == "depends-on") {
+                string dependency_name = read_setting_value(&i, end);
+                depends_on.push_back(loadServiceRecord(dependency_name.c_str()));
+            }
+            else if (setting == "logfile") {
+                logfile = read_setting_value(&i, end);
+            }
+            else if (setting == "restart") {
+                string restart = read_setting_value(&i, end);
+                auto_restart = (restart == "yes" || restart == "true");
+            }
+            else if (setting == "type") {
+                string type_str = read_setting_value(&i, end);
+                if (type_str == "scripted") {
+                    service_type = SVC_SCRIPTED;
+                }
+                else if (type_str == "process") {
+                    service_type = SVC_PROCESS;
+                }
+                else {
+                    throw string("Service type must be \"scripted\""
+                        " or \"process\"");
+                    // TODO throw a better exception
+                }
+            }
+            else {
+                // TODO throw a better exception
+                throw string("Unknown setting");
+            }
+        }
+    }
+    
+    // TODO check we actually have all the settings - type, command
+    
+    rval = new ServiceRecord(this, string(name), service_type, command,
+            &depends_on);
+    rval->setLogfile(logfile);
+    rval->setAutoRestart(auto_restart);
+            
+    records.push_back(rval);
+    return rval;
+}

+ 493 - 0
service.cc

@@ -0,0 +1,493 @@
+#include "service.h"
+#include <cstring>
+#include <cerrno>
+#include <sstream>
+#include <iterator>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+// Tokenize a string, allow quoting
+// TODO doesn't yet allow quoting...
+static std::vector<std::string> tokenize(std::string arg)
+{
+    // TODO rewrite to be more efficient.
+    using namespace std;
+    istringstream iss(arg);
+    return vector<string>(istream_iterator<string>(iss), istream_iterator<string>());
+}
+
+// Find the requested service by name
+static ServiceRecord * findService(const std::list<ServiceRecord *> & records,
+                                    const char *name)
+{
+    using std::list;
+    list<ServiceRecord *>::const_iterator i = records.begin();
+    for ( ; i != records.end(); i++ ) {
+        if (strcmp((*i)->getServiceName(), name) == 0) {
+            return *i;
+        }
+    }
+    return (ServiceRecord *)0;
+}
+
+ServiceRecord * ServiceSet::findService(std::string name)
+{
+    return ::findService(records, name.c_str());
+}
+
+void ServiceSet::startService(const char *name)
+{
+    using namespace std;
+    ServiceRecord *record = loadServiceRecord(name);
+    
+    record->start();
+}
+
+void ServiceSet::stopService(const std::string & name)
+{
+    ServiceRecord *record = findService(name);
+    if (record != nullptr) {
+        record->stop();
+    }
+}
+
+// Called when a service has actually stopped.
+void ServiceRecord::stopped()
+{
+    service_state = SVC_STOPPED;
+    force_stop = false;
+    
+    // Stop any dependencies whose desired state is SVC_STOPPED:
+    for (sr_iter i = depends_on.begin(); i != depends_on.end(); i++) {
+        (*i)->dependentStopped();
+    }
+
+    service_set->service_inactive(this);
+    
+    // TODO inform listeners.
+    if (desired_state == SVC_STARTED) {
+        // Desired state is "started".
+        start();
+    }
+}
+
+void ServiceRecord::process_child_callback(struct ev_loop *loop, ev_child *w, int revents)
+{    
+    ServiceRecord *sr = (ServiceRecord *) w->data;
+
+    sr->pid = -1;
+    ev_child_stop(ev_default_loop(EVFLAG_AUTO), &sr->child_listener);
+    
+    // Ok, for a process service, any process death which we didn't rig
+    // ourselves is a bit... unexpected. Probably, the child died because
+    // we asked it to (sr->service_state == SVC_STOPPING). But even if
+    // we didn't, there's not much we can do.
+    
+    if (sr->service_type == SVC_PROCESS) {
+        // TODO log non-zero rstatus?
+        if (sr->service_state == SVC_STOPPING) {
+            sr->stopped();
+        }
+        else {
+            sr->forceStop();
+            sr->stop();
+        }
+        
+        if (sr->auto_restart && sr->service_set->get_auto_restart()) {
+            sr->start();
+        }
+    }
+    else {  // SVC_SCRIPTED
+        if (sr->service_state == SVC_STOPPING) {
+            if (w->rstatus == 0) {
+                sr->stopped();
+            }
+            else {
+                // TODO
+                // ??? failed to stop!
+                // For now just pretend we stopped, so that any dependencies
+                // can be stopped:
+                sr->stopped();
+            }
+        }
+        else { // SVC_STARTING
+            if (w->rstatus == 0) {
+                sr->started();
+            }
+            else {
+                // failed to start
+                sr->failed_to_start();
+            }
+        }
+    }
+}
+
+void ServiceRecord::start()
+{
+    if ((service_state == SVC_STARTING || service_state == SVC_STARTED)
+            && desired_state == SVC_STOPPED) {
+        // This service was starting, or started, but was set to be stopped.
+        // Cancel the stop (and continue starting/running).
+        // TODO any listeners waiting for stop should be notified of
+        //      its cancellation
+    }
+    
+    desired_state = SVC_STARTED;
+
+    if (service_state != SVC_STOPPED) {
+        // Either we need do nothing (service is already started/starting)
+        // or the service is currently being stopped and we must wait for
+        // that to complete.
+        return;
+    }
+    
+    // Service state is SVC_STOPPED. Start the service.
+    
+    // First, start dependencies
+    bool all_deps_started = true;
+    for (sr_iter i = depends_on.begin(); i != depends_on.end(); ++i) {
+        // Note, we cannot treat a dependency as started if its force_stop
+        // flag is set.
+        if ((*i)->service_state != SVC_STARTED || (*i)->force_stop) {
+            all_deps_started = false;
+            (*i)->start();
+        }
+    }
+    
+    if (! all_deps_started) {
+        // The dependencies will notify this service once they've started.
+        return;
+    }
+    
+    // Actually start this service.
+    service_state = SVC_STARTING;
+    service_set->service_active(this);
+    
+    if (service_type == SVC_PROCESS) {
+        bool start_success = start_ps_process();
+        if (start_success) {
+            started();
+        }
+        else {
+            failed_to_start();
+        }
+    }
+    else {
+        // Script-controlled service
+        bool start_success = start_ps_process(std::vector<std::string>(1, "start"));
+        if (! start_success) {
+            failed_to_start();
+        }
+    }
+}
+
+void ServiceRecord::started()
+{
+    service_state = SVC_STARTED;
+    // TODO - inform listeners
+
+    if (desired_state == SVC_STARTED) {
+        // Start any dependents whose desired state is SVC_STARTED:
+        for (sr_iter i = dependents.begin(); i != dependents.end(); i++) {
+            if ((*i)->desired_state == SVC_STARTED) {
+                (*i)->start();
+            }
+        }
+    }
+    else {
+        stop();
+    }
+}
+
+void ServiceRecord::failed_to_start()
+{
+    service_state = SVC_STOPPED;
+    desired_state = SVC_STOPPED;
+    service_set->service_inactive(this);
+    // failure to start
+    // TODO - inform listeners of failure
+    // Cancel start of dependents:
+    for (sr_iter i = dependents.begin(); i != dependents.end(); i++) {
+        if ((*i)->desired_state == SVC_STARTED) {
+            (*i)->failed_dependency();
+        }
+    }    
+}
+
+bool ServiceRecord::start_ps_process()
+{
+    // BIG FAT NOTE: We rely on linux semantics of vfork() here.
+    // Specifically:
+    // * Parent process execution is suspended until the forked child
+    //   successfully exec's another program, or it exits
+    // * Memory is shared between the two processes until exec()
+    //   succeeds.
+    // Both of the above mean that we can determine in the parent process
+    // whether or not the exec succeeded. If vfork instead is implemented
+    // as an alias of fork, it will look like the exec always succeeded.
+    
+    /*
+    volatile int exec_status = 0;
+    pid_t forkpid = vfork();
+    if (forkpid == 0) {
+        // Child process
+        // ev_default_destroy(); // won't need that on this side, free up fds.
+        // Hmm. causes segfault. Of course. Memory is shared due to vfork.
+        
+        // Re-set stdin, stdout, stderr
+        close(0); close(1); close(2);
+        string logfile = this->logfile;
+        if (logfile.length() == 0) {
+            logfile = "/dev/null";
+        }
+        
+        if (open("/dev/null", O_RDONLY) == 0) {
+          // stdin = 0. That's what we should have; proceed with opening
+          // stdout and stderr.
+          open(logfile.c_str(), O_WRONLY | O_CREAT | O_APPEND, S_IRUSR | S_IWUSR);
+          dup2(1, 2);
+        }
+        
+        const char * pname = program_name.c_str();
+        char const * args[2] = { pname, 0 };
+        execvp(pname, (char ** const) args);
+        // If we got here, the exec failed
+        exec_status = errno;
+        _exit(0);
+    }
+    else {
+        // Parent process - we only reach here once the exec() above
+        // has succeeded, or _exit() above was called (because vfork()
+        // suspends the parent until either of those occurs).
+        if (exec_status == 0) {
+            // success
+            pid = forkpid;
+
+            // Add a process listener so we can detect when the
+            // service stops
+            ev_child_init(&child_listener, process_child_callback, pid, 0);
+            child_listener.data = this;
+            ev_child_start(ev_default_loop(EVFLAG_AUTO), &child_listener);
+
+            service_state = SVC_STARTED;
+            return true;
+        }
+        else {
+            return false;
+        }
+    }
+    */
+    
+    return start_ps_process(std::vector<std::string>());
+}
+
+
+bool ServiceRecord::start_ps_process(const std::vector<std::string> &pargs)
+{
+    // In general, you can't tell whether fork/exec is successful. We use a pipe to communicate
+    // success/failure from the child to the parent. The pipe is set CLOEXEC so a successful
+    // exec closes the pipe, and the parent sees EOF. If the exec is unsuccessful, the errno
+    // is written to the pipe, and the parent can read it.
+    
+    using std::vector;
+    using std::string;
+    
+    int pipefd[2];
+    if (pipe2(pipefd, O_CLOEXEC)) {
+        // TODO log error
+        return false;
+    }
+    
+    // TODO make sure pipefd's are not 0/1/2 (STDIN/OUT/ERR) - if they are, dup them
+    // until they are not.
+    
+    pid_t forkpid = fork();
+    if (forkpid == -1) {
+        // TODO log error
+        close(pipefd[0]);
+        close(pipefd[1]);
+        return false;
+    }
+    
+    if (forkpid == 0) {
+        // Child process
+        ev_default_destroy(); // won't need that on this side, free up fds.
+        
+        // Re-set stdin, stdout, stderr
+        close(0); close(1); close(2);
+        string logfile = this->logfile;
+        if (logfile.length() == 0) {
+            logfile = "/dev/null";
+        }
+        
+        // TODO rethink this logic. If we open it at not-0, shouldn't we just dup it to 0?:
+        if (open("/dev/null", O_RDONLY) == 0) {
+          // stdin = 0. That's what we should have; proceed with opening
+          // stdout and stderr.
+          open(logfile.c_str(), O_WRONLY | O_CREAT | O_APPEND, S_IRUSR | S_IWUSR);
+          dup2(1, 2);
+        }
+        
+        // Tokenize the command, and add additional arguments from pargs:
+        vector<string> progAndArgs = tokenize(program_name);
+        progAndArgs.insert(progAndArgs.end(), pargs.begin(), pargs.end());
+        
+        const char * pname = progAndArgs[0].c_str();
+        const char ** args = new const char *[progAndArgs.size() + 1];
+        
+        for (std::vector<std::string>::size_type i = 0; i < progAndArgs.size(); i++) {
+            args[i] = progAndArgs[i].c_str();
+        }
+        args[progAndArgs.size()] = nullptr;
+        
+        execvp(pname, (char ** const) args);
+        
+        // If we got here, the exec failed:        
+        int exec_status = errno;
+        write(pipefd[1], &exec_status, sizeof(int));
+        exit(0);
+    }
+    else {
+        // Parent process - we only reach here once the exec() above
+        // has succeeded, or _exit() above was called (because vfork()
+        // suspends the parent until either of those occurs).
+        
+        close(pipefd[1]); // close the 'other end' fd
+
+        int exec_status;        
+        if (read(pipefd[0], &exec_status, sizeof(int)) == 0) {
+            // pipe closed; success
+            pid = forkpid;
+
+            // Add a process listener so we can detect when the
+            // service stops
+            ev_child_init(&child_listener, process_child_callback, pid, 0);
+            child_listener.data = this;
+            ev_child_start(ev_default_loop(EVFLAG_AUTO), &child_listener);
+
+            close(pipefd[0]);
+            return true;
+        }
+        else {
+            // TODO log error
+            close(pipefd[0]);
+            return false;
+        }
+    }
+}
+
+
+
+
+// Mark this and all dependent services as force-stopped.
+void ServiceRecord::forceStop()
+{
+    force_stop = true;
+    for (sr_iter i = dependents.begin(); i != dependents.end(); i++) {
+        (*i)->forceStop();
+    }        
+}
+
+// A dependency of this service failed to start.
+void ServiceRecord::failed_dependency()
+{
+    // TODO notify listeners
+    desired_state = SVC_STOPPED;
+    
+    // Presumably, we were starting. So now we're not.
+    service_state = SVC_STOPPED;
+    
+    // Notify dependents of this service also
+    for (sr_iter i = dependents.begin(); i != dependents.end(); i++) {
+        if ((*i)->desired_state == SVC_STARTED) {
+            (*i)->failed_dependency();
+        }
+    }    
+}
+
+void ServiceRecord::dependentStopped()
+{
+    if (desired_state == SVC_STOPPED || force_stop) {
+        bool all_deps_stopped = true;
+        for (sr_iter i = dependents.begin(); i != dependents.end(); ++i) {
+            if ((*i)->service_state != SVC_STOPPED) {
+                all_deps_stopped = false;
+                break;
+            }
+        }
+        
+        if (all_deps_stopped) {
+            stopping();
+        }
+    }
+}
+
+void ServiceRecord::stop()
+{
+    if ((service_state == SVC_STOPPING || service_state == SVC_STOPPED)
+            && desired_state == SVC_STARTED) {
+        // The service *was* stopped/stopping, but it was going to restart.
+        // Now, we'll cancel the restart.
+        // TODO inform listeners waiting for start of cancellation
+    }
+    
+    desired_state = SVC_STOPPED;
+
+    if (service_state != SVC_STARTED) {
+        // If we're starting we need to wait for that to complete.
+        // If we're already stopping/stopped there's nothing to do.
+        return;
+    }
+
+    // Make sure all dependents have stopped.
+    
+    bool all_deps_stopped = true;
+    for (sr_iter i = dependents.begin(); i != dependents.end(); ++i) {
+        if ((*i)->service_state != SVC_STOPPED) {
+            all_deps_stopped = false;
+            (*i)->stop();
+        }
+    }
+    
+    if (! all_deps_stopped) {
+        // The dependents will notify this service once they've stopped.
+        return;
+    }
+    
+    // Ok, dependents have stopped. We can stop ourselves.
+    stopping();
+}
+
+// Dependency stopped or is stopping; we must stop too.
+void ServiceRecord::stopping()
+{
+    service_state = SVC_STOPPING;
+
+    if (service_type == SVC_PROCESS) {
+        if (pid != -1) {
+          // The process is still kicking on - must actually kill it.
+          kill(pid, SIGTERM);
+          // Now we wait; the rest is done in process_child_callback
+        }
+        else {
+            // The process is already dead.
+            stopped();
+        }
+    }
+    else {
+        // Scripted service.
+        start_ps_process(std::vector<string>(1, "stop"));
+    }    
+}
+
+void ServiceSet::service_active(ServiceRecord *sr)
+{
+    active_services++;
+}
+
+void ServiceSet::service_inactive(ServiceRecord *sr)
+{
+    active_services--;
+}

+ 204 - 0
service.h

@@ -0,0 +1,204 @@
+#include <string>
+#include <list>
+#include <vector>
+#include "ev.h"
+
+/* Possible service states */
+#define SVC_STOPPED   0  /* service is not running */
+#define SVC_STARTING  1  /* service script is running with "start" */
+#define SVC_STARTED   2  /* service is running; start script finished. */
+#define SVC_STOPPING  3  /* service script is running with "stop" */
+
+/* Service types */
+#define SVC_PROCESS  0  /* service runs as a process, and can be stopped
+                           by sending the process a signal */
+#define SVC_SCRIPTED 1  /* service requires a command to start, and another
+                           command to stop */
+
+
+
+// Exception
+class ServiceNotFound
+{
+    public:
+    std::string serviceName;
+};
+
+
+class ServiceSet; // forward declaration
+
+class ServiceRecord
+{
+    typedef std::string string;
+    
+    string service_name;
+    int service_type;  /* SVC_DAEMON or SVC_SCRIPTED */
+    int service_state; /* SVC_STOPPED, _STARTING, _STARTED, _STOPPING */
+    int desired_state; /* SVC_STOPPED / SVC_STARTED */
+    bool force_stop; // true if the service must actually stop. This is the
+                     // case if for example the process dies; the service,
+                     // and all its dependencies, MUST be stopped.
+    string program_name;  /* executable program or script */
+    string logfile; /* log file name, empty string specifies /dev/null */
+    bool auto_restart; /* whether to restart this (process) if it dies */
+    
+    typedef std::list<ServiceRecord *> sr_list;
+    typedef sr_list::iterator sr_iter;
+    
+    sr_list depends_on; // services this one depends on
+    sr_list dependents; // services depending on this one
+    // unsigned wait_count;  /* if we are waiting for dependents/dependencies to
+    //                         start/stop, this is how many we're waiting for */
+    
+    ServiceSet *service_set; // the set this service belongs to
+    
+    // Implementation details
+    
+    pid_t pid;  /* PID of the process. If state is STARTING or STOPPING,
+                   this is PID of the service script; otherwise it is the
+                   PID of the process itself (process service).
+                   */
+
+    ev_child child_listener;
+    
+    // Move service to STOPPING state. This can only be called once
+    // all dependents have stopped.
+    void stopping();
+    
+    // Service has actually stopped (includes having all dependents
+    // reaching STOPPED state).
+    void stopped();
+    
+    // Service has successfully started
+    void started();
+    
+    // Service failed to start
+    void failed_to_start();
+    
+    // A dependency of this service failed to start.
+    void failed_dependency();
+    
+    // For process services, start the process, return true on success
+    bool start_ps_process();
+    bool start_ps_process(const std::vector<std::string> &args);
+   
+    // Callback from libev when a child process dies
+    static void process_child_callback(struct ev_loop *loop, struct ev_child *w,
+            int revents);
+    
+    void dependentStopped(); // called when a dependent stopped
+    
+    void forceStop(); // force-stop this service and all dependents
+    
+    public:
+    ServiceRecord(ServiceSet *set, string name, int service_type, string command,
+            std::list<ServiceRecord *> * pdepends_on)
+    {
+        service_state = SVC_STOPPED;
+        desired_state = SVC_STOPPED;
+        
+        service_set = set;
+        service_name = name;
+        this->service_type = service_type;
+        program_name = command;
+        auto_restart = false;
+        // TODO splice the contents from the depends_on parameter
+        // rather than duplicating the list.
+        this->depends_on = *pdepends_on;
+        
+        // For each dependency, add us as a dependent.
+        for (sr_iter i = depends_on.begin(); i != depends_on.end(); ++i) {
+            (*i)->dependents.push_back(this);
+        }
+    }
+    
+    // Set logfile, should be done before service is started
+    void setLogfile(string logfile)
+    {
+        this->logfile = logfile;
+    }
+    
+    // Set whether this service should automatically restart when it dies
+    void setAutoRestart(bool auto_restart)
+    {
+        this->auto_restart = auto_restart;
+    }
+    
+    const char *getServiceName() const { return service_name.c_str(); }
+    int getState() const { return service_state; }
+    
+    void start();  // start the service
+    void stop();   // stop the service
+};
+
+
+class ServiceSet
+{
+    int active_services;
+    std::list<ServiceRecord *> records;
+    const char *service_dir;  // directory containing service descriptions
+    bool restart_enabled; // whether automatic restart is enabled (allowed)
+    
+    // Private methods
+    
+    // Locate an existing service record.
+    ServiceRecord *findService(std::string name);
+    
+    // Load a service description, and dependencies, if there is no existing
+    // record for the given name.
+    ServiceRecord *loadServiceRecord(const char *name);
+
+    // Public
+    
+    public:
+    ServiceSet(const char *service_dir)
+    {
+        this->service_dir = service_dir;
+        active_services = 0;
+        restart_enabled = true;
+    }
+    
+    // Start the service with the given name. The named service will begin
+    // transition to the 'started' state.
+    //
+    // Throws an exception if the
+    // service description cannot be loaded.
+    void startService(const char *name);
+    
+    // Stop the service with the given name. The named service will begin
+    // transition to the 'stopped' state.
+    void stopService(const std::string &name);
+    
+    // Notification from service that it is active (state != SVC_STOPPED)
+    // Only to be called on the transition from inactive to active.
+    void service_active(ServiceRecord *);
+    
+    // Notification from service that it is inactive (SVC_STOPPED)
+    // Only to be called on the transition from active to inactive.
+    void service_inactive(ServiceRecord *);
+    
+    // Find out how many services are active (starting, running or stopping,
+    // but not stopped).
+    int count_active_services()
+    {
+        return active_services;
+    }
+    
+    void stop_all_services()
+    {
+        restart_enabled = false;
+        for (std::list<ServiceRecord *>::iterator i = records.begin(); i != records.end(); ++i) {
+            (*i)->stop();
+        }
+    }
+    
+    void set_auto_restart(bool restart)
+    {
+        restart_enabled = restart;
+    }
+    
+    bool get_auto_restart()
+    {
+        return restart_enabled;
+    }
+};