unshare.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. /* vi: set sw=4 ts=4: */
  2. /*
  3. * Mini unshare implementation for busybox.
  4. *
  5. * Copyright (C) 2016 by Bartosz Golaszewski <bartekgola@gmail.com>
  6. *
  7. * Licensed under GPLv2 or later, see file LICENSE in this source tree.
  8. */
  9. //config:config UNSHARE
  10. //config: bool "unshare (9.2 kb)"
  11. //config: default y
  12. //config: depends on !NOMMU
  13. //config: select PLATFORM_LINUX
  14. //config: select LONG_OPTS
  15. //config: help
  16. //config: Run program with some namespaces unshared from parent.
  17. // needs LONG_OPTS: it is awkward to exclude code which handles --propagation
  18. // and --setgroups based on LONG_OPTS, so instead applet requires LONG_OPTS.
  19. // depends on !NOMMU: we need fork()
  20. //applet:IF_UNSHARE(APPLET(unshare, BB_DIR_USR_BIN, BB_SUID_DROP))
  21. //kbuild:lib-$(CONFIG_UNSHARE) += unshare.o
  22. //usage:#define unshare_trivial_usage
  23. //usage: "[OPTIONS] [PROG [ARGS]]"
  24. //usage:#define unshare_full_usage "\n"
  25. //usage: "\n -m,--mount[=FILE] Unshare mount namespace"
  26. //usage: "\n -u,--uts[=FILE] Unshare UTS namespace (hostname etc.)"
  27. //usage: "\n -i,--ipc[=FILE] Unshare System V IPC namespace"
  28. //usage: "\n -n,--net[=FILE] Unshare network namespace"
  29. //usage: "\n -p,--pid[=FILE] Unshare PID namespace"
  30. //usage: "\n -U,--user[=FILE] Unshare user namespace"
  31. //usage: "\n -f,--fork Fork before execing PROG"
  32. //usage: "\n -r,--map-root-user Map current user to root (implies -U)"
  33. //usage: "\n --mount-proc[=DIR] Mount /proc filesystem first (implies -m)"
  34. //usage: "\n --propagation slave|shared|private|unchanged"
  35. //usage: "\n Modify mount propagation in mount namespace"
  36. //usage: "\n --setgroups allow|deny Control the setgroups syscall in user namespaces"
  37. #include <sched.h>
  38. #ifndef CLONE_NEWUTS
  39. # define CLONE_NEWUTS 0x04000000
  40. #endif
  41. #ifndef CLONE_NEWIPC
  42. # define CLONE_NEWIPC 0x08000000
  43. #endif
  44. #ifndef CLONE_NEWUSER
  45. # define CLONE_NEWUSER 0x10000000
  46. #endif
  47. #ifndef CLONE_NEWPID
  48. # define CLONE_NEWPID 0x20000000
  49. #endif
  50. #ifndef CLONE_NEWNET
  51. # define CLONE_NEWNET 0x40000000
  52. #endif
  53. #include <sys/mount.h>
  54. #ifndef MS_REC
  55. # define MS_REC (1 << 14)
  56. #endif
  57. #ifndef MS_PRIVATE
  58. # define MS_PRIVATE (1 << 18)
  59. #endif
  60. #ifndef MS_SLAVE
  61. # define MS_SLAVE (1 << 19)
  62. #endif
  63. #ifndef MS_SHARED
  64. # define MS_SHARED (1 << 20)
  65. #endif
  66. #include "libbb.h"
  67. static void mount_or_die(const char *source, const char *target,
  68. const char *fstype, unsigned long mountflags)
  69. {
  70. if (mount(source, target, fstype, mountflags, NULL)) {
  71. bb_perror_msg_and_die("can't mount %s on %s (flags:0x%lx)",
  72. source, target, mountflags);
  73. /* fstype is always either NULL or "proc".
  74. * "proc" is only used to mount /proc.
  75. * No need to clutter up error message with fstype,
  76. * it is easily deductible.
  77. */
  78. }
  79. }
  80. #define PATH_PROC_SETGROUPS "/proc/self/setgroups"
  81. #define PATH_PROC_UIDMAP "/proc/self/uid_map"
  82. #define PATH_PROC_GIDMAP "/proc/self/gid_map"
  83. struct namespace_descr {
  84. int flag;
  85. const char nsfile4[4];
  86. };
  87. struct namespace_ctx {
  88. char *path;
  89. };
  90. enum {
  91. OPT_mount = 1 << 0,
  92. OPT_uts = 1 << 1,
  93. OPT_ipc = 1 << 2,
  94. OPT_net = 1 << 3,
  95. OPT_pid = 1 << 4,
  96. OPT_user = 1 << 5, /* OPT_user, NS_USR_POS, and ns_list[] index must match! */
  97. OPT_fork = 1 << 6,
  98. OPT_map_root = 1 << 7,
  99. OPT_mount_proc = 1 << 8,
  100. OPT_propagation = 1 << 9,
  101. OPT_setgroups = 1 << 10,
  102. };
  103. enum {
  104. NS_MNT_POS = 0,
  105. NS_UTS_POS,
  106. NS_IPC_POS,
  107. NS_NET_POS,
  108. NS_PID_POS,
  109. NS_USR_POS, /* OPT_user, NS_USR_POS, and ns_list[] index must match! */
  110. NS_COUNT,
  111. };
  112. static const struct namespace_descr ns_list[] = {
  113. { CLONE_NEWNS, "mnt" },
  114. { CLONE_NEWUTS, "uts" },
  115. { CLONE_NEWIPC, "ipc" },
  116. { CLONE_NEWNET, "net" },
  117. { CLONE_NEWPID, "pid" },
  118. { CLONE_NEWUSER, "user" }, /* OPT_user, NS_USR_POS, and ns_list[] index must match! */
  119. };
  120. /*
  121. * Upstream unshare doesn't support short options for --mount-proc,
  122. * --propagation, --setgroups.
  123. * Optional arguments (namespace mountpoints) exist only for long opts,
  124. * we are forced to use "fake" letters for them.
  125. * '+': stop at first non-option.
  126. */
  127. #define OPT_STR "+muinpU""fr""\xfd::""\xfe:""\xff:"
  128. static const char unshare_longopts[] ALIGN1 =
  129. "mount\0" Optional_argument "\xf0"
  130. "uts\0" Optional_argument "\xf1"
  131. "ipc\0" Optional_argument "\xf2"
  132. "net\0" Optional_argument "\xf3"
  133. "pid\0" Optional_argument "\xf4"
  134. "user\0" Optional_argument "\xf5"
  135. "fork\0" No_argument "f"
  136. "map-root-user\0" No_argument "r"
  137. "mount-proc\0" Optional_argument "\xfd"
  138. "propagation\0" Required_argument "\xfe"
  139. "setgroups\0" Required_argument "\xff"
  140. ;
  141. /* Ugly-looking string reuse trick */
  142. #define PRIVATE_STR "private\0""unchanged\0""shared\0""slave\0"
  143. #define PRIVATE_UNCHANGED_SHARED_SLAVE PRIVATE_STR
  144. static unsigned long parse_propagation(const char *prop_str)
  145. {
  146. int i = index_in_strings(PRIVATE_UNCHANGED_SHARED_SLAVE, prop_str);
  147. if (i < 0)
  148. bb_error_msg_and_die("unrecognized: --%s=%s", "propagation", prop_str);
  149. if (i == 0)
  150. return MS_REC | MS_PRIVATE;
  151. if (i == 1)
  152. return 0;
  153. if (i == 2)
  154. return MS_REC | MS_SHARED;
  155. return MS_REC | MS_SLAVE;
  156. }
  157. static void mount_namespaces(pid_t pid, struct namespace_ctx *ns_ctx_list)
  158. {
  159. const struct namespace_descr *ns;
  160. struct namespace_ctx *ns_ctx;
  161. int i;
  162. for (i = 0; i < NS_COUNT; i++) {
  163. char nsf[sizeof("/proc/%u/ns/AAAA") + sizeof(int)*3];
  164. ns = &ns_list[i];
  165. ns_ctx = &ns_ctx_list[i];
  166. if (!ns_ctx->path)
  167. continue;
  168. sprintf(nsf, "/proc/%u/ns/%.4s", (unsigned)pid, ns->nsfile4);
  169. mount_or_die(nsf, ns_ctx->path, NULL, MS_BIND);
  170. }
  171. }
  172. int unshare_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
  173. int unshare_main(int argc UNUSED_PARAM, char **argv)
  174. {
  175. int i;
  176. unsigned int opts;
  177. int unsflags;
  178. uintptr_t need_mount;
  179. const char *proc_mnt_target;
  180. const char *prop_str;
  181. const char *setgrp_str;
  182. unsigned long prop_flags;
  183. uid_t reuid = geteuid();
  184. gid_t regid = getegid();
  185. struct fd_pair fdp;
  186. pid_t child = child; /* for compiler */
  187. struct namespace_ctx ns_ctx_list[NS_COUNT];
  188. memset(ns_ctx_list, 0, sizeof(ns_ctx_list));
  189. proc_mnt_target = "/proc";
  190. prop_str = PRIVATE_STR;
  191. setgrp_str = NULL;
  192. opts = getopt32long(argv, "^" OPT_STR "\0"
  193. "\xf0""m" /* long opts (via their "fake chars") imply short opts */
  194. ":\xf1""u"
  195. ":\xf2""i"
  196. ":\xf3""n"
  197. ":\xf4""p"
  198. ":\xf5""U"
  199. ":rU" /* --map-root-user or -r implies -U */
  200. ":\xfd""m" /* --mount-proc implies -m */
  201. , unshare_longopts,
  202. &proc_mnt_target, &prop_str, &setgrp_str,
  203. &ns_ctx_list[NS_MNT_POS].path,
  204. &ns_ctx_list[NS_UTS_POS].path,
  205. &ns_ctx_list[NS_IPC_POS].path,
  206. &ns_ctx_list[NS_NET_POS].path,
  207. &ns_ctx_list[NS_PID_POS].path,
  208. &ns_ctx_list[NS_USR_POS].path
  209. );
  210. argv += optind;
  211. //bb_error_msg("opts:0x%x", opts);
  212. //bb_error_msg("mount:%s", ns_ctx_list[NS_MNT_POS].path);
  213. //bb_error_msg("proc_mnt_target:%s", proc_mnt_target);
  214. //bb_error_msg("prop_str:%s", prop_str);
  215. //bb_error_msg("setgrp_str:%s", setgrp_str);
  216. //exit(1);
  217. if (setgrp_str) {
  218. if (strcmp(setgrp_str, "allow") == 0) {
  219. if (opts & OPT_map_root) {
  220. bb_error_msg_and_die(
  221. "--setgroups=allow and --map-root-user "
  222. "are mutually exclusive"
  223. );
  224. }
  225. } else {
  226. /* It's not "allow", must be "deny" */
  227. if (strcmp(setgrp_str, "deny") != 0)
  228. bb_error_msg_and_die("unrecognized: --%s=%s",
  229. "setgroups", setgrp_str);
  230. }
  231. }
  232. unsflags = 0;
  233. need_mount = 0;
  234. for (i = 0; i < NS_COUNT; i++) {
  235. const struct namespace_descr *ns = &ns_list[i];
  236. struct namespace_ctx *ns_ctx = &ns_ctx_list[i];
  237. if (opts & (1 << i))
  238. unsflags |= ns->flag;
  239. need_mount |= (uintptr_t)(ns_ctx->path);
  240. }
  241. /* need_mount != 0 if at least one FILE was given */
  242. prop_flags = MS_REC | MS_PRIVATE;
  243. /* Silently ignore --propagation if --mount is not requested. */
  244. if (opts & OPT_mount)
  245. prop_flags = parse_propagation(prop_str);
  246. /*
  247. * Special case: if we were requested to unshare the mount namespace
  248. * AND to make any namespace persistent (by bind mounting it) we need
  249. * to spawn a child process which will wait for the parent to call
  250. * unshare(), then mount parent's namespaces while still in the
  251. * previous namespace.
  252. */
  253. fdp.wr = -1;
  254. if (need_mount && (opts & OPT_mount)) {
  255. /*
  256. * Can't use getppid() in child, as we can be unsharing the
  257. * pid namespace.
  258. */
  259. pid_t ppid = getpid();
  260. xpiped_pair(fdp);
  261. child = xfork();
  262. if (child == 0) {
  263. /* Child */
  264. close(fdp.wr);
  265. /* Wait until parent calls unshare() */
  266. read(fdp.rd, ns_ctx_list, 1); /* ...using bogus buffer */
  267. /*close(fdp.rd);*/
  268. /* Mount parent's unshared namespaces. */
  269. mount_namespaces(ppid, ns_ctx_list);
  270. return EXIT_SUCCESS;
  271. }
  272. /* Parent continues */
  273. }
  274. if (unshare(unsflags) != 0)
  275. bb_perror_msg_and_die("unshare(0x%x)", unsflags);
  276. if (fdp.wr >= 0) {
  277. close(fdp.wr); /* Release child */
  278. close(fdp.rd); /* should close fd, to not confuse exec'ed PROG */
  279. }
  280. if (need_mount) {
  281. /* Wait for the child to finish mounting the namespaces. */
  282. if (opts & OPT_mount) {
  283. int exit_status = wait_for_exitstatus(child);
  284. if (WIFEXITED(exit_status) &&
  285. WEXITSTATUS(exit_status) != EXIT_SUCCESS)
  286. return WEXITSTATUS(exit_status);
  287. } else {
  288. /*
  289. * Regular way - we were requested to mount some other
  290. * namespaces: mount them after the call to unshare().
  291. */
  292. mount_namespaces(getpid(), ns_ctx_list);
  293. }
  294. }
  295. /*
  296. * When we're unsharing the pid namespace, it's not the process that
  297. * calls unshare() that is put into the new namespace, but its first
  298. * child. The user may want to use this option to spawn a new process
  299. * that'll become PID 1 in this new namespace.
  300. */
  301. if (opts & OPT_fork) {
  302. xvfork_parent_waits_and_exits();
  303. /* Child continues */
  304. }
  305. if (opts & OPT_map_root) {
  306. char uidmap_buf[sizeof("0 %u 1") + sizeof(int)*3];
  307. /*
  308. * Since Linux 3.19 unprivileged writing of /proc/self/gid_map
  309. * has been disabled unless /proc/self/setgroups is written
  310. * first to permanently disable the ability to call setgroups
  311. * in that user namespace.
  312. */
  313. xopen_xwrite_close(PATH_PROC_SETGROUPS, "deny");
  314. sprintf(uidmap_buf, "0 %u 1", (unsigned)reuid);
  315. xopen_xwrite_close(PATH_PROC_UIDMAP, uidmap_buf);
  316. sprintf(uidmap_buf, "0 %u 1", (unsigned)regid);
  317. xopen_xwrite_close(PATH_PROC_GIDMAP, uidmap_buf);
  318. } else
  319. if (setgrp_str) {
  320. /* Write "allow" or "deny" */
  321. xopen_xwrite_close(PATH_PROC_SETGROUPS, setgrp_str);
  322. }
  323. if (opts & OPT_mount) {
  324. mount_or_die("none", "/", NULL, prop_flags);
  325. }
  326. if (opts & OPT_mount_proc) {
  327. /*
  328. * When creating a new pid namespace, we might want the pid
  329. * subdirectories in /proc to remain consistent with the new
  330. * process IDs. Without --mount-proc the pids in /proc would
  331. * still reflect the old pid namespace. This is why we make
  332. * /proc private here and then do a fresh mount.
  333. */
  334. mount_or_die("none", proc_mnt_target, NULL, MS_PRIVATE | MS_REC);
  335. mount_or_die("proc", proc_mnt_target, "proc", MS_NOSUID | MS_NOEXEC | MS_NODEV);
  336. }
  337. exec_prog_or_SHELL(argv);
  338. }