jail.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962
  1. /*
  2. * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU Lesser General Public License version 2.1
  6. * as published by the Free Software Foundation
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. * GNU General Public License for more details.
  12. */
  13. #define _GNU_SOURCE
  14. #include <sys/mount.h>
  15. #include <sys/prctl.h>
  16. #include <sys/wait.h>
  17. #include <sys/types.h>
  18. #include <stdlib.h>
  19. #include <unistd.h>
  20. #include <errno.h>
  21. #include <pwd.h>
  22. #include <grp.h>
  23. #include <string.h>
  24. #include <sys/stat.h>
  25. #include <fcntl.h>
  26. #include <libgen.h>
  27. #include <sched.h>
  28. #include <linux/limits.h>
  29. #include <signal.h>
  30. #include "capabilities.h"
  31. #include "elf.h"
  32. #include "fs.h"
  33. #include "jail.h"
  34. #include "log.h"
  35. #include <libubox/uloop.h>
  36. #include <libubus.h>
  37. #define STACK_SIZE (1024 * 1024)
  38. #define OPT_ARGS "S:C:n:h:r:w:d:psulocU:G:NR:fFO:T:Ey"
  39. static struct {
  40. char *name;
  41. char *hostname;
  42. char **jail_argv;
  43. char *seccomp;
  44. char *capabilities;
  45. char *user;
  46. char *group;
  47. char *extroot;
  48. char *overlaydir;
  49. char *tmpoverlaysize;
  50. int no_new_privs;
  51. int namespace;
  52. int procfs;
  53. int ronly;
  54. int sysfs;
  55. int console;
  56. int pw_uid;
  57. int pw_gid;
  58. int gr_gid;
  59. int require_jail;
  60. } opts;
  61. extern int pivot_root(const char *new_root, const char *put_old);
  62. int debug = 0;
  63. static char child_stack[STACK_SIZE];
  64. int console_fd;
  65. static int mkdir_p(char *dir, mode_t mask)
  66. {
  67. char *l = strrchr(dir, '/');
  68. int ret;
  69. if (!l)
  70. return 0;
  71. *l = '\0';
  72. if (mkdir_p(dir, mask))
  73. return -1;
  74. *l = '/';
  75. ret = mkdir(dir, mask);
  76. if (ret && errno == EEXIST)
  77. return 0;
  78. if (ret)
  79. ERROR("mkdir(%s, %d) failed: %m\n", dir, mask);
  80. return ret;
  81. }
  82. static int _mount_bind(const char *root, const char *path, const char *target, int readonly, int strict, int error)
  83. {
  84. struct stat s;
  85. char new[PATH_MAX];
  86. int fd;
  87. int remount_flags = MS_BIND | MS_REMOUNT;
  88. if (stat(path, &s)) {
  89. ERROR("stat(%s) failed: %m\n", path);
  90. return error;
  91. }
  92. snprintf(new, sizeof(new), "%s%s", root, target?target:path);
  93. if (S_ISDIR(s.st_mode)) {
  94. mkdir_p(new, 0755);
  95. } else {
  96. mkdir_p(dirname(new), 0755);
  97. snprintf(new, sizeof(new), "%s%s", root, target?target:path);
  98. fd = creat(new, 0644);
  99. if (fd == -1) {
  100. ERROR("creat(%s) failed: %m\n", new);
  101. return -1;
  102. }
  103. close(fd);
  104. }
  105. if (mount(path, new, NULL, MS_BIND, NULL)) {
  106. ERROR("failed to mount -B %s %s: %m\n", path, new);
  107. return -1;
  108. }
  109. if (readonly)
  110. remount_flags |= MS_RDONLY;
  111. if (strict)
  112. remount_flags |= MS_NOEXEC | MS_NOSUID | MS_NODEV;
  113. if ((strict || readonly) && mount(NULL, new, NULL, remount_flags, NULL)) {
  114. ERROR("failed to remount (%s%s%s) %s: %m\n", readonly?"ro":"rw",
  115. (readonly && strict)?", ":"", strict?"strict":"", new);
  116. return -1;
  117. }
  118. DEBUG("mount -B %s %s (%s%s%s)\n", path, new,
  119. readonly?"ro":"rw", (readonly && strict)?", ":"", strict?"strict":"");
  120. return 0;
  121. }
  122. int mount_bind(const char *root, const char *path, int readonly, int error) {
  123. return _mount_bind(root, path, NULL, readonly, 0, error);
  124. }
  125. static int mount_overlay(char *jail_root, char *overlaydir) {
  126. char *upperdir, *workdir, *optsstr;
  127. const char mountoptsformat[] = "lowerdir=%s,upperdir=%s,workdir=%s";
  128. int ret = -1;
  129. if (asprintf(&upperdir, "%s%s", overlaydir, "/upper") < 0)
  130. goto out;
  131. if (asprintf(&workdir, "%s%s", overlaydir, "/work") < 0)
  132. goto upper_printf;
  133. if (asprintf(&optsstr, mountoptsformat, jail_root, upperdir, workdir) < 0)
  134. goto work_printf;
  135. if (mkdir_p(upperdir, 0755) || mkdir_p(workdir, 0755))
  136. goto opts_printf;
  137. DEBUG("mount -t overlay %s %s (%s)\n", jail_root, jail_root, optsstr);
  138. if (mount(jail_root, jail_root, "overlay", MS_NOATIME, optsstr))
  139. goto opts_printf;
  140. ret = 0;
  141. opts_printf:
  142. free(optsstr);
  143. work_printf:
  144. free(workdir);
  145. upper_printf:
  146. free(upperdir);
  147. out:
  148. return ret;
  149. }
  150. static void pass_console(int console_fd)
  151. {
  152. struct ubus_context *ctx = ubus_connect(NULL);
  153. static struct blob_buf req;
  154. uint32_t id;
  155. if (!ctx)
  156. return;
  157. blob_buf_init(&req, 0);
  158. blobmsg_add_string(&req, "name", opts.name);
  159. if (ubus_lookup_id(ctx, "service", &id) ||
  160. ubus_invoke_fd(ctx, id, "console_set", req.head, NULL, NULL, 3000, console_fd))
  161. INFO("ubus request failed\n");
  162. else
  163. close(console_fd);
  164. blob_buf_free(&req);
  165. ubus_free(ctx);
  166. }
  167. static int create_dev_console(const char *jail_root)
  168. {
  169. char *console_fname;
  170. char dev_console_path[PATH_MAX];
  171. int slave_console_fd;
  172. /* Open UNIX/98 virtual console */
  173. console_fd = posix_openpt(O_RDWR | O_NOCTTY);
  174. if (console_fd == -1)
  175. return -1;
  176. console_fname = ptsname(console_fd);
  177. DEBUG("got console fd %d and PTS client name %s\n", console_fd, console_fname);
  178. if (!console_fname)
  179. goto no_console;
  180. grantpt(console_fd);
  181. unlockpt(console_fd);
  182. /* pass PTY master to procd */
  183. pass_console(console_fd);
  184. /* mount-bind PTY slave to /dev/console in jail */
  185. snprintf(dev_console_path, sizeof(dev_console_path), "%s/dev/console", jail_root);
  186. close(creat(dev_console_path, 0620));
  187. if (mount(console_fname, dev_console_path, NULL, MS_BIND, NULL))
  188. goto no_console;
  189. /* use PTY slave for stdio */
  190. slave_console_fd = open(console_fname, O_RDWR); /* | O_NOCTTY */
  191. dup2(slave_console_fd, 0);
  192. dup2(slave_console_fd, 1);
  193. dup2(slave_console_fd, 2);
  194. close(slave_console_fd);
  195. INFO("using guest console %s\n", console_fname);
  196. return 0;
  197. no_console:
  198. close(console_fd);
  199. return 1;
  200. }
  201. static int build_jail_fs(void)
  202. {
  203. char jail_root[] = "/tmp/ujail-XXXXXX";
  204. char tmpovdir[] = "/tmp/ujail-overlay-XXXXXX";
  205. char tmpdevdir[] = "/tmp/ujail-XXXXXX/dev";
  206. char tmpdevptsdir[] = "/tmp/ujail-XXXXXX/dev/pts";
  207. char *overlaydir = NULL;
  208. if (mkdtemp(jail_root) == NULL) {
  209. ERROR("mkdtemp(%s) failed: %m\n", jail_root);
  210. return -1;
  211. }
  212. /* oldroot can't be MS_SHARED else pivot_root() fails */
  213. if (mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL)) {
  214. ERROR("private mount failed %m\n");
  215. return -1;
  216. }
  217. if (opts.extroot) {
  218. if (mount(opts.extroot, jail_root, NULL, MS_BIND, NULL)) {
  219. ERROR("extroot mount failed %m\n");
  220. return -1;
  221. }
  222. } else {
  223. if (mount("tmpfs", jail_root, "tmpfs", MS_NOATIME, "mode=0755")) {
  224. ERROR("tmpfs mount failed %m\n");
  225. return -1;
  226. }
  227. }
  228. if (opts.tmpoverlaysize) {
  229. char mountoptsstr[] = "mode=0755,size=XXXXXXXX";
  230. snprintf(mountoptsstr, sizeof(mountoptsstr),
  231. "mode=0755,size=%s", opts.tmpoverlaysize);
  232. if (mkdtemp(tmpovdir) == NULL) {
  233. ERROR("mkdtemp(%s) failed: %m\n", jail_root);
  234. return -1;
  235. }
  236. if (mount("tmpfs", tmpovdir, "tmpfs", MS_NOATIME,
  237. mountoptsstr)) {
  238. ERROR("failed to mount tmpfs for overlay (size=%s)\n", opts.tmpoverlaysize);
  239. return -1;
  240. }
  241. overlaydir = tmpovdir;
  242. }
  243. if (opts.overlaydir)
  244. overlaydir = opts.overlaydir;
  245. if (overlaydir)
  246. mount_overlay(jail_root, overlaydir);
  247. if (chdir(jail_root)) {
  248. ERROR("chdir(%s) (jail_root) failed: %m\n", jail_root);
  249. return -1;
  250. }
  251. snprintf(tmpdevdir, sizeof(tmpdevdir), "%s/dev", jail_root);
  252. mkdir_p(tmpdevdir, 0755);
  253. if (mount(NULL, tmpdevdir, "tmpfs", MS_NOATIME | MS_NOEXEC | MS_NOSUID, "size=1M"))
  254. return -1;
  255. snprintf(tmpdevptsdir, sizeof(tmpdevptsdir), "%s/dev/pts", jail_root);
  256. mkdir_p(tmpdevptsdir, 0755);
  257. if (mount(NULL, tmpdevptsdir, "devpts", MS_NOATIME | MS_NOEXEC | MS_NOSUID, NULL))
  258. return -1;
  259. if (opts.console)
  260. create_dev_console(jail_root);
  261. if (mount_all(jail_root)) {
  262. ERROR("mount_all() failed\n");
  263. return -1;
  264. }
  265. if (opts.namespace & CLONE_NEWNET) {
  266. char hostdir[PATH_MAX], jailetc[PATH_MAX], jaillink[PATH_MAX];
  267. snprintf(hostdir, PATH_MAX, "/tmp/resolv.conf-%s.d", opts.name);
  268. mkdir_p(hostdir, 0755);
  269. _mount_bind(jail_root, hostdir, "/tmp/resolv.conf.d", 1, 1, -1);
  270. snprintf(jailetc, PATH_MAX, "%s/etc", jail_root);
  271. mkdir_p(jailetc, 0755);
  272. snprintf(jaillink, PATH_MAX, "%s/etc/resolv.conf", jail_root);
  273. if (overlaydir)
  274. unlink(jaillink);
  275. symlink("../tmp/resolv.conf.d/resolv.conf.auto", jaillink);
  276. }
  277. char dirbuf[sizeof(jail_root) + 4];
  278. snprintf(dirbuf, sizeof(dirbuf), "%s/old", jail_root);
  279. mkdir(dirbuf, 0755);
  280. if (pivot_root(jail_root, dirbuf) == -1) {
  281. ERROR("pivot_root(%s, %s) failed: %m\n", jail_root, dirbuf);
  282. return -1;
  283. }
  284. if (chdir("/")) {
  285. ERROR("chdir(/) (after pivot_root) failed: %m\n");
  286. return -1;
  287. }
  288. snprintf(dirbuf, sizeof(dirbuf), "/old%s", jail_root);
  289. umount2(dirbuf, MNT_DETACH);
  290. rmdir(dirbuf);
  291. if (opts.tmpoverlaysize) {
  292. char tmpdirbuf[sizeof(tmpovdir) + 4];
  293. snprintf(tmpdirbuf, sizeof(tmpdirbuf), "/old%s", tmpovdir);
  294. umount2(tmpdirbuf, MNT_DETACH);
  295. rmdir(tmpdirbuf);
  296. }
  297. umount2("/old", MNT_DETACH);
  298. rmdir("/old");
  299. if (opts.procfs) {
  300. mkdir("/proc", 0755);
  301. mount("proc", "/proc", "proc", MS_NOATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID, 0);
  302. /*
  303. * make /proc/sys read-only while keeping read-write to
  304. * /proc/sys/net if CLONE_NEWNET is set.
  305. */
  306. if (opts.namespace & CLONE_NEWNET)
  307. mount("/proc/sys/net", "/proc/self/net", NULL, MS_BIND, 0);
  308. mount("/proc/sys", "/proc/sys", NULL, MS_BIND, 0);
  309. mount(NULL, "/proc/sys", NULL, MS_REMOUNT | MS_RDONLY, 0);
  310. mount(NULL, "/proc", NULL, MS_REMOUNT, 0);
  311. if (opts.namespace & CLONE_NEWNET)
  312. mount("/proc/self/net", "/proc/sys/net", NULL, MS_MOVE, 0);
  313. }
  314. if (opts.sysfs) {
  315. mkdir("/sys", 0755);
  316. mount("sysfs", "/sys", "sysfs", MS_NOATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RDONLY, 0);
  317. }
  318. if (opts.ronly)
  319. mount(NULL, "/", NULL, MS_RDONLY | MS_REMOUNT, 0);
  320. return 0;
  321. }
  322. static int write_uid_gid_map(pid_t child_pid, bool gidmap, int id)
  323. {
  324. int map_file;
  325. char map_path[64];
  326. const char *map_format = "%d %d %d\n";
  327. if (snprintf(map_path, sizeof(map_path), "/proc/%d/%s",
  328. child_pid, gidmap?"gid_map":"uid_map") < 0)
  329. return -1;
  330. if ((map_file = open(map_path, O_WRONLY)) == -1)
  331. return -1;
  332. if (dprintf(map_file, map_format, 0, id, 1) == -1) {
  333. close(map_file);
  334. return -1;
  335. }
  336. close(map_file);
  337. return 0;
  338. }
  339. static int write_setgroups(pid_t child_pid, bool allow)
  340. {
  341. int setgroups_file;
  342. char setgroups_path[64];
  343. if (snprintf(setgroups_path, sizeof(setgroups_path), "/proc/%d/setgroups",
  344. child_pid) < 0) {
  345. return -1;
  346. }
  347. if ((setgroups_file = open(setgroups_path, O_WRONLY)) == -1) {
  348. return -1;
  349. }
  350. if (dprintf(setgroups_file, allow?"allow":"deny") == -1) {
  351. close(setgroups_file);
  352. return -1;
  353. }
  354. close(setgroups_file);
  355. return 0;
  356. }
  357. static void get_jail_user(int *user, int *user_gid, int *gr_gid)
  358. {
  359. struct passwd *p = NULL;
  360. struct group *g = NULL;
  361. if (opts.user) {
  362. p = getpwnam(opts.user);
  363. if (!p) {
  364. ERROR("failed to get uid/gid for user %s: %d (%s)\n",
  365. opts.user, errno, strerror(errno));
  366. exit(EXIT_FAILURE);
  367. }
  368. *user = p->pw_uid;
  369. *user_gid = p->pw_gid;
  370. } else {
  371. *user = -1;
  372. *user_gid = -1;
  373. }
  374. if (opts.group) {
  375. g = getgrnam(opts.group);
  376. if (!g) {
  377. ERROR("failed to get gid for group %s: %m\n", opts.group);
  378. exit(EXIT_FAILURE);
  379. }
  380. *gr_gid = g->gr_gid;
  381. } else {
  382. *gr_gid = -1;
  383. }
  384. };
  385. static void set_jail_user(int pw_uid, int user_gid, int gr_gid)
  386. {
  387. if ((user_gid != -1) && initgroups(opts.user, user_gid)) {
  388. ERROR("failed to initgroups() for user %s: %m\n", opts.user);
  389. exit(EXIT_FAILURE);
  390. }
  391. if ((gr_gid != -1) && setregid(gr_gid, gr_gid)) {
  392. ERROR("failed to set group id %d: %m\n", gr_gid);
  393. exit(EXIT_FAILURE);
  394. }
  395. if ((pw_uid != -1) && setreuid(pw_uid, pw_uid)) {
  396. ERROR("failed to set user id %d: %m\n", pw_uid);
  397. exit(EXIT_FAILURE);
  398. }
  399. }
  400. #define MAX_ENVP 8
  401. static char** build_envp(const char *seccomp)
  402. {
  403. static char *envp[MAX_ENVP];
  404. static char preload_var[PATH_MAX];
  405. static char seccomp_var[PATH_MAX];
  406. static char debug_var[] = "LD_DEBUG=all";
  407. static char container_var[] = "container=ujail";
  408. const char *preload_lib = find_lib("libpreload-seccomp.so");
  409. int count = 0;
  410. if (seccomp && !preload_lib) {
  411. ERROR("failed to add preload-lib to env\n");
  412. return NULL;
  413. }
  414. if (seccomp) {
  415. snprintf(seccomp_var, sizeof(seccomp_var), "SECCOMP_FILE=%s", seccomp);
  416. envp[count++] = seccomp_var;
  417. snprintf(preload_var, sizeof(preload_var), "LD_PRELOAD=%s", preload_lib);
  418. envp[count++] = preload_var;
  419. }
  420. envp[count++] = container_var;
  421. if (debug > 1)
  422. envp[count++] = debug_var;
  423. return envp;
  424. }
  425. static void usage(void)
  426. {
  427. fprintf(stderr, "ujail <options> -- <binary> <params ...>\n");
  428. fprintf(stderr, " -d <num>\tshow debug log (increase num to increase verbosity)\n");
  429. fprintf(stderr, " -S <file>\tseccomp filter config\n");
  430. fprintf(stderr, " -C <file>\tcapabilities drop config\n");
  431. fprintf(stderr, " -c\t\tset PR_SET_NO_NEW_PRIVS\n");
  432. fprintf(stderr, " -n <name>\tthe name of the jail\n");
  433. fprintf(stderr, "namespace jail options:\n");
  434. fprintf(stderr, " -h <hostname>\tchange the hostname of the jail\n");
  435. fprintf(stderr, " -N\t\tjail has network namespace\n");
  436. fprintf(stderr, " -f\t\tjail has user namespace\n");
  437. fprintf(stderr, " -F\t\tjail has cgroups namespace\n");
  438. fprintf(stderr, " -r <file>\treadonly files that should be staged\n");
  439. fprintf(stderr, " -w <file>\twriteable files that should be staged\n");
  440. fprintf(stderr, " -p\t\tjail has /proc\n");
  441. fprintf(stderr, " -s\t\tjail has /sys\n");
  442. fprintf(stderr, " -l\t\tjail has /dev/log\n");
  443. fprintf(stderr, " -u\t\tjail has a ubus socket\n");
  444. fprintf(stderr, " -U <name>\tuser to run jailed process\n");
  445. fprintf(stderr, " -G <name>\tgroup to run jailed process\n");
  446. fprintf(stderr, " -o\t\tremont jail root (/) read only\n");
  447. fprintf(stderr, " -R <dir>\texternal jail rootfs (system container)\n");
  448. fprintf(stderr, " -O <dir>\tdirectory for r/w overlayfs\n");
  449. fprintf(stderr, " -T <size>\tuse tmpfs r/w overlayfs with <size>\n");
  450. fprintf(stderr, " -E\t\tfail if jail cannot be setup\n");
  451. fprintf(stderr, " -y\t\tprovide jail console\n");
  452. fprintf(stderr, "\nWarning: by default root inside the jail is the same\n\
  453. and he has the same powers as root outside the jail,\n\
  454. thus he can escape the jail and/or break stuff.\n\
  455. Please use seccomp/capabilities (-S/-C) to restrict his powers\n\n\
  456. If you use none of the namespace jail options,\n\
  457. ujail will not use namespace/build a jail,\n\
  458. and will only drop capabilities/apply seccomp filter.\n\n");
  459. }
  460. static int exec_jail(void *pipes_ptr)
  461. {
  462. int *pipes = (int*)pipes_ptr;
  463. char buf[1];
  464. int pw_uid, pw_gid, gr_gid;
  465. close(pipes[0]);
  466. close(pipes[3]);
  467. buf[0] = 'i';
  468. if (write(pipes[1], buf, 1) < 1) {
  469. ERROR("can't write to parent\n");
  470. exit(EXIT_FAILURE);
  471. }
  472. if (read(pipes[2], buf, 1) < 1) {
  473. ERROR("can't read from parent\n");
  474. exit(EXIT_FAILURE);
  475. }
  476. if (buf[0] != 'O') {
  477. ERROR("parent had an error, child exiting\n");
  478. exit(EXIT_FAILURE);
  479. }
  480. close(pipes[1]);
  481. close(pipes[2]);
  482. if (opts.namespace & CLONE_NEWUSER) {
  483. if (setgid(0) < 0) {
  484. ERROR("setgid\n");
  485. exit(EXIT_FAILURE);
  486. }
  487. if (setuid(0) < 0) {
  488. ERROR("setuid\n");
  489. exit(EXIT_FAILURE);
  490. }
  491. // if (setgroups(0, NULL) < 0) {
  492. // ERROR("setgroups\n");
  493. // exit(EXIT_FAILURE);
  494. // }
  495. }
  496. if (opts.namespace && opts.hostname && strlen(opts.hostname) > 0
  497. && sethostname(opts.hostname, strlen(opts.hostname))) {
  498. ERROR("sethostname(%s) failed: %m\n", opts.hostname);
  499. exit(EXIT_FAILURE);
  500. }
  501. if ((opts.namespace & CLONE_NEWNS) && build_jail_fs()) {
  502. ERROR("failed to build jail fs\n");
  503. exit(EXIT_FAILURE);
  504. }
  505. if (opts.capabilities && drop_capabilities(opts.capabilities))
  506. exit(EXIT_FAILURE);
  507. if (opts.no_new_privs && prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
  508. ERROR("prctl(PR_SET_NO_NEW_PRIVS) failed: %m\n");
  509. exit(EXIT_FAILURE);
  510. }
  511. if (!(opts.namespace & CLONE_NEWUSER)) {
  512. get_jail_user(&pw_uid, &pw_gid, &gr_gid);
  513. set_jail_user(pw_uid, pw_gid, gr_gid);
  514. }
  515. char **envp = build_envp(opts.seccomp);
  516. if (!envp)
  517. exit(EXIT_FAILURE);
  518. INFO("exec-ing %s\n", *opts.jail_argv);
  519. execve(*opts.jail_argv, opts.jail_argv, envp);
  520. /* we get there only if execve fails */
  521. ERROR("failed to execve %s: %m\n", *opts.jail_argv);
  522. exit(EXIT_FAILURE);
  523. }
  524. static int jail_running = 1;
  525. static int jail_return_code = 0;
  526. static void jail_process_timeout_cb(struct uloop_timeout *t);
  527. static struct uloop_timeout jail_process_timeout = {
  528. .cb = jail_process_timeout_cb,
  529. };
  530. static void jail_process_handler(struct uloop_process *c, int ret)
  531. {
  532. uloop_timeout_cancel(&jail_process_timeout);
  533. if (WIFEXITED(ret)) {
  534. jail_return_code = WEXITSTATUS(ret);
  535. INFO("jail (%d) exited with exit: %d\n", c->pid, jail_return_code);
  536. } else {
  537. jail_return_code = WTERMSIG(ret);
  538. INFO("jail (%d) exited with signal: %d\n", c->pid, jail_return_code);
  539. }
  540. jail_running = 0;
  541. uloop_end();
  542. }
  543. static struct uloop_process jail_process = {
  544. .cb = jail_process_handler,
  545. };
  546. static void jail_process_timeout_cb(struct uloop_timeout *t)
  547. {
  548. DEBUG("jail process failed to stop, sending SIGKILL\n");
  549. kill(jail_process.pid, SIGKILL);
  550. }
  551. static void jail_handle_signal(int signo)
  552. {
  553. DEBUG("forwarding signal %d to the jailed process\n", signo);
  554. kill(jail_process.pid, signo);
  555. }
  556. static int netns_open_pid(const pid_t target_ns)
  557. {
  558. char pid_net_path[PATH_MAX];
  559. snprintf(pid_net_path, sizeof(pid_net_path), "/proc/%u/ns/net", target_ns);
  560. return open(pid_net_path, O_RDONLY);
  561. }
  562. static void netns_updown(pid_t pid, bool start)
  563. {
  564. struct ubus_context *ctx = ubus_connect(NULL);
  565. static struct blob_buf req;
  566. uint32_t id;
  567. if (!ctx)
  568. return;
  569. blob_buf_init(&req, 0);
  570. blobmsg_add_string(&req, "jail", opts.name);
  571. blobmsg_add_u32(&req, "pid", pid);
  572. blobmsg_add_u8(&req, "start", start);
  573. if (ubus_lookup_id(ctx, "network", &id) ||
  574. ubus_invoke(ctx, id, "netns_updown", req.head, NULL, NULL, 3000))
  575. INFO("ubus request failed\n");
  576. blob_buf_free(&req);
  577. ubus_free(ctx);
  578. }
  579. int main(int argc, char **argv)
  580. {
  581. sigset_t sigmask;
  582. uid_t uid = getuid();
  583. char log[] = "/dev/log";
  584. char ubus[] = "/var/run/ubus.sock";
  585. int ch, i;
  586. int pipes[4];
  587. char sig_buf[1];
  588. int netns_fd;
  589. if (uid) {
  590. ERROR("not root, aborting: %m\n");
  591. return EXIT_FAILURE;
  592. }
  593. umask(022);
  594. mount_list_init();
  595. init_library_search();
  596. while ((ch = getopt(argc, argv, OPT_ARGS)) != -1) {
  597. switch (ch) {
  598. case 'd':
  599. debug = atoi(optarg);
  600. break;
  601. case 'p':
  602. opts.namespace |= CLONE_NEWNS;
  603. opts.procfs = 1;
  604. break;
  605. case 'o':
  606. opts.namespace |= CLONE_NEWNS;
  607. opts.ronly = 1;
  608. break;
  609. case 'f':
  610. opts.namespace |= CLONE_NEWUSER;
  611. break;
  612. case 'F':
  613. opts.namespace |= CLONE_NEWCGROUP;
  614. break;
  615. case 'R':
  616. opts.extroot = optarg;
  617. break;
  618. case 's':
  619. opts.namespace |= CLONE_NEWNS;
  620. opts.sysfs = 1;
  621. break;
  622. case 'S':
  623. opts.seccomp = optarg;
  624. add_mount(optarg, 1, -1);
  625. break;
  626. case 'C':
  627. opts.capabilities = optarg;
  628. break;
  629. case 'c':
  630. opts.no_new_privs = 1;
  631. break;
  632. case 'n':
  633. opts.name = optarg;
  634. break;
  635. case 'N':
  636. opts.namespace |= CLONE_NEWNET;
  637. break;
  638. case 'h':
  639. opts.namespace |= CLONE_NEWUTS;
  640. opts.hostname = optarg;
  641. break;
  642. case 'r':
  643. opts.namespace |= CLONE_NEWNS;
  644. add_path_and_deps(optarg, 1, 0, 0);
  645. break;
  646. case 'w':
  647. opts.namespace |= CLONE_NEWNS;
  648. add_path_and_deps(optarg, 0, 0, 0);
  649. break;
  650. case 'u':
  651. opts.namespace |= CLONE_NEWNS;
  652. add_mount(ubus, 0, -1);
  653. break;
  654. case 'l':
  655. opts.namespace |= CLONE_NEWNS;
  656. add_mount(log, 0, -1);
  657. break;
  658. case 'U':
  659. opts.user = optarg;
  660. break;
  661. case 'G':
  662. opts.group = optarg;
  663. break;
  664. case 'O':
  665. opts.overlaydir = optarg;
  666. break;
  667. case 'T':
  668. opts.tmpoverlaysize = optarg;
  669. break;
  670. case 'E':
  671. opts.require_jail = 1;
  672. break;
  673. case 'y':
  674. opts.console = 1;
  675. break;
  676. }
  677. }
  678. if (opts.namespace)
  679. opts.namespace |= CLONE_NEWIPC | CLONE_NEWPID;
  680. if (opts.tmpoverlaysize && strlen(opts.tmpoverlaysize) > 8) {
  681. ERROR("size parameter too long: \"%s\"\n", opts.tmpoverlaysize);
  682. return -1;
  683. }
  684. /* no <binary> param found */
  685. if (argc - optind < 1) {
  686. usage();
  687. return EXIT_FAILURE;
  688. }
  689. if (!(opts.namespace||opts.capabilities||opts.seccomp)) {
  690. ERROR("Not using namespaces, capabilities or seccomp !!!\n\n");
  691. usage();
  692. return EXIT_FAILURE;
  693. }
  694. DEBUG("Using namespaces(0x%08x), capabilities(%d), seccomp(%d)\n",
  695. opts.namespace,
  696. opts.capabilities != 0,
  697. opts.seccomp != 0);
  698. opts.jail_argv = &argv[optind];
  699. get_jail_user(&opts.pw_uid, &opts.pw_gid, &opts.gr_gid);
  700. if (!opts.extroot) {
  701. if (opts.namespace && add_path_and_deps(*opts.jail_argv, 1, -1, 0)) {
  702. ERROR("failed to load dependencies\n");
  703. return -1;
  704. }
  705. }
  706. if (opts.namespace && opts.seccomp && add_path_and_deps("libpreload-seccomp.so", 1, -1, 1)) {
  707. ERROR("failed to load libpreload-seccomp.so\n");
  708. opts.seccomp = 0;
  709. if (opts.require_jail)
  710. return -1;
  711. }
  712. if (opts.name)
  713. prctl(PR_SET_NAME, opts.name, NULL, NULL, NULL);
  714. uloop_init();
  715. sigfillset(&sigmask);
  716. for (i = 0; i < _NSIG; i++) {
  717. struct sigaction s = { 0 };
  718. if (!sigismember(&sigmask, i))
  719. continue;
  720. if ((i == SIGCHLD) || (i == SIGPIPE) || (i == SIGSEGV))
  721. continue;
  722. s.sa_handler = jail_handle_signal;
  723. sigaction(i, &s, NULL);
  724. }
  725. if (opts.namespace) {
  726. if (opts.namespace & CLONE_NEWNS) {
  727. add_mount("/dev/full", 0, -1);
  728. add_mount("/dev/null", 0, -1);
  729. add_mount("/dev/random", 0, -1);
  730. add_mount("/dev/urandom", 0, -1);
  731. add_mount("/dev/zero", 0, -1);
  732. add_mount("/dev/ptmx", 0, -1);
  733. add_mount("/dev/tty", 0, -1);
  734. if (!opts.extroot && (opts.user || opts.group)) {
  735. add_mount("/etc/passwd", 0, -1);
  736. add_mount("/etc/group", 0, -1);
  737. }
  738. #if defined(__GLIBC__)
  739. if (!opts.extroot)
  740. add_mount("/etc/nsswitch.conf", 0, -1);
  741. #endif
  742. if (!(opts.namespace & CLONE_NEWNET)) {
  743. add_mount("/etc/resolv.conf", 0, -1);
  744. }
  745. }
  746. if (pipe(&pipes[0]) < 0 || pipe(&pipes[2]) < 0)
  747. return -1;
  748. jail_process.pid = clone(exec_jail, child_stack + STACK_SIZE, SIGCHLD | opts.namespace, &pipes);
  749. } else {
  750. jail_process.pid = fork();
  751. }
  752. if (jail_process.pid > 0) {
  753. seteuid(0);
  754. /* parent process */
  755. close(pipes[1]);
  756. close(pipes[2]);
  757. if (read(pipes[0], sig_buf, 1) < 1) {
  758. ERROR("can't read from child\n");
  759. return -1;
  760. }
  761. close(pipes[0]);
  762. if (opts.namespace & CLONE_NEWUSER) {
  763. bool has_gr = (opts.gr_gid != -1);
  764. if (write_setgroups(jail_process.pid, false)) {
  765. ERROR("can't write setgroups\n");
  766. return -1;
  767. }
  768. if (opts.pw_uid != -1) {
  769. write_uid_gid_map(jail_process.pid, 0, opts.pw_uid);
  770. write_uid_gid_map(jail_process.pid, 1, has_gr?opts.gr_gid:opts.pw_gid);
  771. } else {
  772. write_uid_gid_map(jail_process.pid, 0, 65534);
  773. write_uid_gid_map(jail_process.pid, 1, has_gr?opts.gr_gid:65534);
  774. }
  775. }
  776. if (opts.namespace & CLONE_NEWNET) {
  777. if (!opts.name) {
  778. ERROR("netns needs a named jail\n");
  779. return -1;
  780. }
  781. netns_fd = netns_open_pid(jail_process.pid);
  782. netns_updown(jail_process.pid, true);
  783. }
  784. sig_buf[0] = 'O';
  785. if (write(pipes[3], sig_buf, 1) < 0) {
  786. ERROR("can't write to child\n");
  787. return -1;
  788. }
  789. close(pipes[3]);
  790. uloop_process_add(&jail_process);
  791. uloop_run();
  792. if (jail_running) {
  793. DEBUG("uloop interrupted, killing jail process\n");
  794. kill(jail_process.pid, SIGTERM);
  795. uloop_timeout_set(&jail_process_timeout, 1000);
  796. uloop_run();
  797. }
  798. uloop_done();
  799. if (opts.namespace & CLONE_NEWNET) {
  800. setns(netns_fd, CLONE_NEWNET);
  801. netns_updown(getpid(), false);
  802. close(netns_fd);
  803. }
  804. return jail_return_code;
  805. } else if (jail_process.pid == 0) {
  806. /* fork child process */
  807. return exec_jail(NULL);
  808. } else {
  809. ERROR("failed to clone/fork: %m\n");
  810. return EXIT_FAILURE;
  811. }
  812. }