Browse Source

Implement word-splitting variable expansion with "$/NAME"

As much as I don't want to support configuration via environment
variables people are clearly doing this and there doesn't yet exist a
better alternative, so with this we support word-splitting the expansion
result (and collapsing empty expansion results, i.e. don't produce a
zero-length argument).

Addresses #258
Davin McCall 5 months ago
parent
commit
d9b671e62e
3 changed files with 231 additions and 36 deletions
  1. 8 1
      doc/manpages/dinit-service.5.m4
  2. 138 35
      src/includes/load-service.h
  3. 85 0
      src/tests/loadtests.cc

+ 8 - 1
doc/manpages/dinit-service.5.m4

@@ -687,10 +687,14 @@ set and non\-empty), and `\fB${NAME+word}\fR' (substitute `\fBword\fR' if variab
 Unlike in shell expansion, the substituted \fBword\fR does not itself undergo expansion and
 cannot contain closing brace characters or whitespace, even if quoted.
 .P
-Note command-line variable substitution occurs after splitting the line into separate arguments and so
+Note that by default, command-line variable substitution occurs after splitting the line into
+separate arguments and so
 a single environment variable cannot be used to add multiple arguments to a command line.
 If a designated variable is not defined, it is replaced with an empty (zero-length) string, possibly producing a
 zero-length argument.
+To alter this behaviour use a slash after \fB$\fR, as in `\fB$/NAME\fR'; the expanded value will then
+be split into several arguments separate by whitespace or, if the value is empty or consists only
+of whitespace, will collapse (instead of producing an empty or whitespace argument).
 .P
 Variable substitution occurs when the service is loaded.
 Therefore, it is typically not useful for dynamically changing service parameters (including
@@ -710,6 +714,9 @@ amended via \fBdinitctl setenv\fR commands or equivalent).
 .P
 Note that since variable substitution is performed on service load, the values seen by a service process may differ from those
 used for substitution, if they have been changed in the meantime.
+Using environment variable values in service commands and parameters can be used as means to
+provide easily-adjustable service configuration, but is not ideal for this purpose and alternatives
+should be considered. 
 .\"
 .SH EXAMPLES
 .LP

+ 138 - 35
src/includes/load-service.h

@@ -143,10 +143,39 @@ inline string_iterator skipws(string_iterator i, string_iterator end) noexcept
     using std::isspace;
 
     while (i != end) {
-      if (!isspace(*i, locale::classic())) {
-        break;
-      }
-      ++i;
+        if (!isspace(*i, locale::classic())) {
+            break;
+        }
+        ++i;
+    }
+    return i;
+}
+
+// skipws using "char *" instead of iterator
+inline const char *skipws(const char *i, const char *end) noexcept
+{
+    using std::locale;
+    using std::isspace;
+
+    while (i != end) {
+        if (!isspace(*i, locale::classic())) {
+            break;
+        }
+        ++i;
+    }
+    return i;
+}
+
+inline const char *findws(const char *i, const char *end) noexcept
+{
+    using std::locale;
+    using std::isspace;
+
+    while (i != end) {
+        if (isspace(*i, locale::classic())) {
+            break;
+        }
+        ++i;
     }
     return i;
 }
@@ -703,11 +732,11 @@ void process_service_file(string name, std::istream &service_file, T func)
     }
 }
 
-// A dummy lint-reporting "function".
-static auto dummy_lint = [](const char *){};
+// A dummy lint-reporting function.
+inline void dummy_lint(const char *) {}
 
-// Resolve leading variables in paths using the environment
-static auto resolve_env_var = [](const string &name, environment::env_map const &envmap){
+// Resolve variables from an environment
+inline const char *resolve_env_var(const string &name, const environment::env_map &envmap){
     return envmap.lookup(name);
 };
 
@@ -734,7 +763,7 @@ static void value_var_subst(const char *setting_name, std::string &line,
     }
 
     auto i = offsets.begin();
-    unsigned xpos = 0; // position to copy from
+    unsigned xpos = 0; // position to copy from in original line
     std::string r_line;
     int offadj = 0;
 
@@ -742,6 +771,11 @@ static void value_var_subst(const char *setting_name, std::string &line,
 
         i->first += offadj; // don't adjust end yet
 
+        // inhibit_collapse is set if we process anything which may be empty but shouldn't collapse
+        // to "no argument"
+        bool inhibit_collapse = false;
+        bool do_collapse = false;
+
         while (i->second > dindx) {
             r_line.append(line, xpos, dindx - xpos); // copy unmatched part
             if (line[dindx + 1] == '$') {
@@ -753,54 +787,120 @@ static void value_var_subst(const char *setting_name, std::string &line,
             else {
                 // variable
                 auto token_end = std::next(line.begin(), i->second);
-                bool brace = line[dindx + 1] == '{';
-                auto i = std::next(line.begin(), dindx + 1 + int(brace));
+                auto spos = dindx + 1;
+                bool wsplit = line[spos] == '/';
+                if (wsplit) ++spos;
+                bool brace = line[spos] == '{';
+                if (brace) ++spos;
+                auto j = std::next(line.begin(), spos);
                 // read environment variable name
-                string name = read_config_name(i, token_end, true);
+                string name = read_config_name(j, token_end, true);
                 if (name.empty()) {
                     throw service_description_exc(setting_name, "invalid/missing variable name after '$'");
                 }
                 char altmode = '\0';
                 bool colon = false;
-                auto altbeg = i, altend = i;
+                auto altbeg = j, altend = j;
                 if (brace) {
                     /* ${foo+val}, ${foo-val}, ${foo:+val}, ${foo:-val} */
-                    if (*i == ':') {
+                    if (*j == ':') {
                         colon = true;
-                        ++i;
-                        if (*i != '+' && *i != '-') {
+                        ++j;
+                        if (*j != '+' && *j != '-') {
                             throw service_description_exc(setting_name, "invalid syntax in variable substitution");
                         }
                     }
-                    if (*i == '+' || *i == '-') {
-                        altmode = *i;
-                        altbeg = ++i;
-                        while (i != token_end && *i != '}') {
-                            ++i;
+                    if (*j == '+' || *j == '-') {
+                        altmode = *j;
+                        altbeg = ++j;
+                        while (j != token_end && *j != '}') {
+                            ++j;
                         }
-                        altend = i;
+                        altend = j;
                     }
-                    if (*i != '}') {
+                    if (*j != '}') {
                         throw service_description_exc(setting_name, "unmatched '{' in variable substitution");
                     }
-                    ++i;
+                    ++j;
                 }
                 size_t line_len_before = r_line.size();
+                string_view resolved_vw;
                 auto *resolved = var_resolve(name, envmap);
+                if (resolved) {
+                    resolved_vw = resolved;
+                }
                 /* apply shell-like substitutions */
                 if (altmode == '-') {
                     if (!resolved || (colon && !*resolved)) {
-                        r_line.append(altbeg, altend);
-                    } else if (resolved) {
-                        r_line.append(resolved);
+                        resolved_vw = {line.c_str() + (altbeg - line.begin()), (size_t)(altend - altbeg)};
                     }
                 } else if (altmode == '+') {
                     if (resolved && (!colon || *resolved)) {
-                        r_line.append(altbeg, altend);
+                        resolved_vw = {line.c_str() + (altbeg - line.begin()), (size_t)(altend - altbeg)};
                     }
-                } else if (resolved) {
-                    r_line.append(resolved);
                 }
+
+                xpos = j - line.begin();
+                int name_len = xpos - dindx;
+                offadj -= name_len;
+
+                if (!wsplit) {
+                    inhibit_collapse = true;
+                    do_collapse = false;
+                    if (!resolved_vw.empty()) {
+                        r_line.append(resolved_vw.data(), resolved_vw.length());
+                    }
+                }
+                else {
+                    // Must perform word splitting. Find first whitespace:
+                    auto r_vw_beg = resolved_vw.data();
+                    auto r_vw_end = r_vw_beg + resolved_vw.length();
+                    const char *wsp = findws(r_vw_beg, r_vw_end);
+
+                    // If we have whitespace, append up to that whitespace and then split:
+                    while (wsp != r_vw_end) {
+                        if (wsp != r_vw_beg) {
+                            r_line.append(r_vw_beg, wsp - r_vw_beg);
+                        }
+
+                        auto orig_i_second = i->second;
+
+                        size_t line_len_after = r_line.size();
+                        if (i->first == line_len_after && !inhibit_collapse) {
+                            // whitespace at the start of the word; just trim it
+                            goto next_section;
+                        }
+
+                        // Break here:
+                        i->second = r_line.length();
+
+                        r_line += ' ';
+                        ++line_len_after;
+
+                        if (line_len_after > (size_t)std::numeric_limits<int>::max()) {
+                            // (avoid potential overflow)
+                            throw service_description_exc(setting_name, "value too long (after substitution)");
+                        }
+
+                        // Create new argument from split:
+                        i = offsets.insert(std::next(i), {r_line.length(), orig_i_second});
+                        offadj += (int)line_len_after - (int)line_len_before;
+                        line_len_before = r_line.size();
+
+                        // Find the next break, if any:
+                        next_section:
+                        r_vw_beg = skipws(wsp, r_vw_end);
+                        wsp = findws(r_vw_beg, r_vw_end);
+                        inhibit_collapse = false;
+                    }
+
+                    if (r_vw_beg != r_vw_end) {
+                        r_line.append(r_vw_beg, r_vw_end - r_vw_beg);
+                    }
+
+                    do_collapse = !inhibit_collapse;
+                }
+
                 size_t line_len_after = r_line.size();
 
                 if (line_len_after > (size_t)std::numeric_limits<int>::max()) {
@@ -808,17 +908,20 @@ static void value_var_subst(const char *setting_name, std::string &line,
                     throw service_description_exc(setting_name, "value too long (after substitution)");
                 }
 
-                xpos = i - line.begin();
-                int name_len = xpos - dindx;
-
-                offadj += (int)line_len_after - (int)line_len_before - name_len;
+                offadj += (int)line_len_after - (int)line_len_before;
             }
 
             dindx = line.find('$', xpos);
         }
 
         i->second += offadj;
-        ++i;
+
+        if (do_collapse && i->first == i->second) {
+            i = offsets.erase(i);
+        }
+        else {
+            ++i;
+        }
 
         while (i != offsets.end() && i->second < dindx) {
             i->first += offadj;

+ 85 - 0
src/tests/loadtests.cc

@@ -83,6 +83,90 @@ void test_env_subst2()
     assert((*std::next(offsets.begin(), 2) == std::pair<unsigned,unsigned>{11, 39}));
 }
 
+void test_env_subst3()
+{
+    auto resolve_env_var = [](const std::string &name, environment::env_map const &) {
+        if (name == "EMPTY") return "";
+        if (name == "WS") return "    ";
+        if (name == "PADDED") return " p ";
+        return "";
+    };
+
+    std::string line = "test $/EMPTY foo";
+    std::list<std::pair<unsigned,unsigned>> offsets;
+    std::string::iterator li = line.begin();
+    std::string::iterator le = line.end();
+    dinit_load::read_setting_value(1 /* line_num */, li, le, &offsets);
+    dinit_load::value_var_subst("command", line, offsets, resolve_env_var, tenvmap);
+
+    auto check_arg = [&](unsigned idx, const char *val)
+    {
+        auto &offs = *std::next(offsets.begin(), idx);
+        assert(line.substr(offs.first, offs.second - offs.first) == val);
+    };
+
+    assert(line == "test  foo");
+    check_arg(1, "foo");
+
+    line = "test $EMPTY foo";
+    li = line.begin(); le = line.end(); offsets.clear();
+    dinit_load::read_setting_value(1 /* line_num */, li, le, &offsets);
+    dinit_load::value_var_subst("command", line, offsets, resolve_env_var, tenvmap);
+
+    assert(line == "test  foo");
+    check_arg(1, "");
+    check_arg(2, "foo");
+
+    // adjacent collapsing
+    line = "test $/EMPTY$/EMPTY$/EMPTY foo";
+    li = line.begin(); le = line.end(); offsets.clear();
+    dinit_load::read_setting_value(1 /* line_num */, li, le, &offsets);
+    dinit_load::value_var_subst("command", line, offsets, resolve_env_var, tenvmap);
+
+    assert(line == "test  foo");
+    check_arg(1, "foo");
+
+    // middle empty is non-collapsing:
+    line = "test $/EMPTY$EMPTY$/EMPTY foo";
+    li = line.begin(); le = line.end(); offsets.clear();
+    dinit_load::read_setting_value(1 /* line_num */, li, le, &offsets);
+    dinit_load::value_var_subst("command", line, offsets, resolve_env_var, tenvmap);
+
+    assert(line == "test  foo");
+    check_arg(1, "");
+    check_arg(2, "foo");
+
+    // empty doesn't wordsplit:
+    line = "test abc$/{EMPTY}def";
+    li = line.begin(); le = line.end(); offsets.clear();
+    dinit_load::read_setting_value(1 /* line_num */, li, le, &offsets);
+    dinit_load::value_var_subst("command", line, offsets, resolve_env_var, tenvmap);
+
+    assert(line == "test abcdef");
+    check_arg(1, "abcdef");
+
+    // whitespace does wordsplit:
+    line = "test abc$/{WS}def";
+    li = line.begin(); le = line.end(); offsets.clear();
+    dinit_load::read_setting_value(1 /* line_num */, li, le, &offsets);
+    dinit_load::value_var_subst("command", line, offsets, resolve_env_var, tenvmap);
+
+    assert(line == "test abc def");
+    check_arg(1, "abc");
+    check_arg(2, "def");
+
+    // internal words handled correctly:
+    line = "test abc$/{PADDED}def";
+    li = line.begin(); le = line.end(); offsets.clear();
+    dinit_load::read_setting_value(1 /* line_num */, li, le, &offsets);
+    dinit_load::value_var_subst("command", line, offsets, resolve_env_var, tenvmap);
+
+    assert(line == "test abc p def");
+    check_arg(1, "abc");
+    check_arg(2, "p");
+    check_arg(3, "def");
+}
+
 void test_nonexistent()
 {
     bool got_service_not_found = false;
@@ -243,6 +327,7 @@ int main(int argc, char **argv)
     RUN_TEST(test_basic, "                ");
     RUN_TEST(test_env_subst, "            ");
     RUN_TEST(test_env_subst2, "           ");
+    RUN_TEST(test_env_subst3, "           ");
     RUN_TEST(test_nonexistent, "          ");
     RUN_TEST(test_settings, "             ");
     RUN_TEST(test_path_env_subst, "       ");