seccomp-oci.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449
  1. /*
  2. * parse and setup OCI seccomp filter
  3. * Copyright (c) 2020 Daniel Golle <daniel@makrotopia.org>
  4. * seccomp example with syscall reporting
  5. * Copyright (c) 2012 The Chromium OS Authors <chromium-os-dev@chromium.org>
  6. * Authors:
  7. * Kees Cook <keescook@chromium.org>
  8. * Will Drewry <wad@chromium.org>
  9. *
  10. * Use of this source code is governed by a BSD-style license that can be
  11. * found in the LICENSE file.
  12. *
  13. * BPF control flow
  14. *
  15. * (check_arch)<t>---(check_syscall)<f>---+----[...]<f>---(return default_action)
  16. * |<f> |<t> |
  17. * KILL (check_argument)<f>--+
  18. * |<t>
  19. * [...]
  20. * |<t>
  21. * (return action)
  22. */
  23. #define _GNU_SOURCE 1
  24. #include <assert.h>
  25. #include <stddef.h>
  26. #include <stdlib.h>
  27. #include <unistd.h>
  28. #include <libubox/utils.h>
  29. #include <libubox/blobmsg.h>
  30. #include <libubox/blobmsg_json.h>
  31. #include "log.h"
  32. #include "seccomp-bpf.h"
  33. #include "seccomp-oci.h"
  34. #include "../syscall-names.h"
  35. #include "seccomp-syscalls-helpers.h"
  36. static uint32_t resolve_action(char *actname)
  37. {
  38. if (!strcmp(actname, "SCMP_ACT_KILL"))
  39. return SECCOMP_RET_KILL;
  40. else if (!strcmp(actname, "SCMP_ACT_KILL_PROCESS"))
  41. return SECCOMP_RET_KILLPROCESS;
  42. else if (!strcmp(actname, "SCMP_ACT_TRAP"))
  43. return SECCOMP_RET_TRAP;
  44. else if (!strcmp(actname, "SCMP_ACT_ERRNO"))
  45. return SECCOMP_RET_ERRNO;
  46. else if (!strcmp(actname, "SCMP_ACT_ERROR"))
  47. return SECCOMP_RET_ERRNO;
  48. else if (!strcmp(actname, "SCMP_ACT_TRACE"))
  49. return SECCOMP_RET_TRACE;
  50. else if (!strcmp(actname, "SCMP_ACT_ALLOW"))
  51. return SECCOMP_RET_ALLOW;
  52. else if (!strcmp(actname, "SCMP_ACT_LOG"))
  53. return SECCOMP_RET_LOGALLOW;
  54. else {
  55. ERROR("unknown seccomp action %s\n", actname);
  56. return SECCOMP_RET_KILL;
  57. }
  58. }
  59. static uint8_t resolve_op_ins(const char *op)
  60. {
  61. if (!strcmp(op, "SCMP_CMP_NE")) /* invert EQ */
  62. return BPF_JEQ;
  63. else if (!strcmp(op, "SCMP_CMP_LT")) /* invert GE */
  64. return BPF_JGE;
  65. else if (!strcmp(op, "SCMP_CMP_LE")) /* invert GT */
  66. return BPF_JGT;
  67. else if (!strcmp(op, "SCMP_CMP_EQ"))
  68. return BPF_JEQ;
  69. else if (!strcmp(op, "SCMP_CMP_GE"))
  70. return BPF_JGE;
  71. else if (!strcmp(op, "SCMP_CMP_GT"))
  72. return BPF_JGT;
  73. else if (!strcmp(op, "SCMP_CMP_MASKED_EQ"))
  74. return BPF_JEQ;
  75. else {
  76. ERROR("unknown seccomp op %s\n", op);
  77. return 0;
  78. }
  79. }
  80. static bool resolve_op_is_masked(const char *op)
  81. {
  82. if (!strcmp(op, "SCMP_CMP_MASKED_EQ"))
  83. return true;
  84. return false;
  85. }
  86. static bool resolve_op_inv(const char *op)
  87. {
  88. if (!strcmp(op, "SCMP_CMP_NE") ||
  89. !strcmp(op, "SCMP_CMP_LT") ||
  90. !strcmp(op, "SCMP_CMP_LE"))
  91. return true;
  92. return false;
  93. }
  94. static uint32_t resolve_architecture(char *archname)
  95. {
  96. if (!archname)
  97. return 0;
  98. if (!strcmp(archname, "SCMP_ARCH_X86"))
  99. return AUDIT_ARCH_I386;
  100. else if (!strcmp(archname, "SCMP_ARCH_X86_64"))
  101. return AUDIT_ARCH_X86_64;
  102. else if (!strcmp(archname, "SCMP_ARCH_X32"))
  103. /*
  104. * return AUDIT_ARCH_X86_64;
  105. * 32-bit userland on 64-bit kernel is not supported yet
  106. */
  107. return 0;
  108. else if (!strcmp(archname, "SCMP_ARCH_ARM"))
  109. return AUDIT_ARCH_ARM;
  110. else if (!strcmp(archname, "SCMP_ARCH_AARCH64"))
  111. return AUDIT_ARCH_AARCH64;
  112. else if (!strcmp(archname, "SCMP_ARCH_MIPS"))
  113. return AUDIT_ARCH_MIPS;
  114. else if (!strcmp(archname, "SCMP_ARCH_MIPS64"))
  115. return AUDIT_ARCH_MIPS64;
  116. else if (!strcmp(archname, "SCMP_ARCH_MIPS64N32"))
  117. return AUDIT_ARCH_MIPS64N32;
  118. else if (!strcmp(archname, "SCMP_ARCH_MIPSEL"))
  119. return AUDIT_ARCH_MIPSEL;
  120. else if (!strcmp(archname, "SCMP_ARCH_MIPSEL64"))
  121. return AUDIT_ARCH_MIPSEL64;
  122. else if (!strcmp(archname, "SCMP_ARCH_MIPSEL64N32"))
  123. return AUDIT_ARCH_MIPSEL64N32;
  124. else if (!strcmp(archname, "SCMP_ARCH_PPC"))
  125. return AUDIT_ARCH_PPC;
  126. else if (!strcmp(archname, "SCMP_ARCH_PPC64"))
  127. return AUDIT_ARCH_PPC64;
  128. else if (!strcmp(archname, "SCMP_ARCH_PPC64LE"))
  129. return AUDIT_ARCH_PPC64LE;
  130. else if (!strcmp(archname, "SCMP_ARCH_S390"))
  131. return AUDIT_ARCH_S390;
  132. else if (!strcmp(archname, "SCMP_ARCH_S390X"))
  133. return AUDIT_ARCH_S390X;
  134. else if (!strcmp(archname, "SCMP_ARCH_PARISC"))
  135. return AUDIT_ARCH_PARISC;
  136. else if (!strcmp(archname, "SCMP_ARCH_PARISC64"))
  137. return AUDIT_ARCH_PARISC64;
  138. else {
  139. ERROR("unknown seccomp architecture %s\n", archname);
  140. return 0;
  141. }
  142. }
  143. enum {
  144. OCI_LINUX_SECCOMP_DEFAULTACTION,
  145. OCI_LINUX_SECCOMP_ARCHITECTURES,
  146. OCI_LINUX_SECCOMP_FLAGS,
  147. OCI_LINUX_SECCOMP_SYSCALLS,
  148. __OCI_LINUX_SECCOMP_MAX,
  149. };
  150. static const struct blobmsg_policy oci_linux_seccomp_policy[] = {
  151. [OCI_LINUX_SECCOMP_DEFAULTACTION] = { "defaultAction", BLOBMSG_TYPE_STRING },
  152. [OCI_LINUX_SECCOMP_ARCHITECTURES] = { "architectures", BLOBMSG_TYPE_ARRAY },
  153. [OCI_LINUX_SECCOMP_FLAGS] = { "flags", BLOBMSG_TYPE_ARRAY },
  154. [OCI_LINUX_SECCOMP_SYSCALLS] = { "syscalls", BLOBMSG_TYPE_ARRAY },
  155. };
  156. enum {
  157. OCI_LINUX_SECCOMP_SYSCALLS_NAMES,
  158. OCI_LINUX_SECCOMP_SYSCALLS_ACTION,
  159. OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET,
  160. OCI_LINUX_SECCOMP_SYSCALLS_ARGS,
  161. __OCI_LINUX_SECCOMP_SYSCALLS_MAX
  162. };
  163. static const struct blobmsg_policy oci_linux_seccomp_syscalls_policy[] = {
  164. [OCI_LINUX_SECCOMP_SYSCALLS_NAMES] = { "names", BLOBMSG_TYPE_ARRAY },
  165. [OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET] = { "errnoRet", BLOBMSG_TYPE_INT32 },
  166. [OCI_LINUX_SECCOMP_SYSCALLS_ARGS] = { "args", BLOBMSG_TYPE_ARRAY },
  167. [OCI_LINUX_SECCOMP_SYSCALLS_ACTION] = { "action", BLOBMSG_TYPE_STRING },
  168. };
  169. enum {
  170. OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX,
  171. OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE,
  172. OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO,
  173. OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP,
  174. __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX
  175. };
  176. static const struct blobmsg_policy oci_linux_seccomp_syscalls_args_policy[] = {
  177. [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX] = { "index", BLOBMSG_TYPE_INT32 },
  178. [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE] = { "value", BLOBMSG_CAST_INT64 },
  179. [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO] = { "valueTwo", BLOBMSG_CAST_INT64 },
  180. [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP] = { "op", BLOBMSG_TYPE_STRING },
  181. };
  182. struct sock_fprog *parseOCIlinuxseccomp(struct blob_attr *msg)
  183. {
  184. struct blob_attr *tb[__OCI_LINUX_SECCOMP_MAX];
  185. struct blob_attr *tbn[__OCI_LINUX_SECCOMP_SYSCALLS_MAX];
  186. struct blob_attr *tba[__OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX];
  187. struct blob_attr *cur, *curn, *curarg;
  188. int rem, remn, remargs, sc;
  189. struct sock_filter *filter;
  190. struct sock_fprog *prog;
  191. int sz = 4, idx = 0;
  192. uint32_t default_policy = 0;
  193. uint32_t seccomp_arch;
  194. bool arch_matched;
  195. char *op_str;
  196. blobmsg_parse(oci_linux_seccomp_policy, __OCI_LINUX_SECCOMP_MAX,
  197. tb, blobmsg_data(msg), blobmsg_len(msg));
  198. if (!tb[OCI_LINUX_SECCOMP_DEFAULTACTION]) {
  199. ERROR("seccomp: no default action set\n");
  200. return NULL;
  201. }
  202. default_policy = resolve_action(blobmsg_get_string(tb[OCI_LINUX_SECCOMP_DEFAULTACTION]));
  203. /* verify architecture while ignoring the x86_64 anomaly for now */
  204. if (tb[OCI_LINUX_SECCOMP_ARCHITECTURES]) {
  205. arch_matched = false;
  206. blobmsg_for_each_attr(cur, tb[OCI_LINUX_SECCOMP_ARCHITECTURES], rem) {
  207. seccomp_arch = resolve_architecture(blobmsg_get_string(cur));
  208. if (ARCH_NR == seccomp_arch) {
  209. arch_matched = true;
  210. break;
  211. }
  212. }
  213. if (!arch_matched) {
  214. ERROR("seccomp architecture doesn't match system\n");
  215. return NULL;
  216. }
  217. }
  218. blobmsg_for_each_attr(cur, tb[OCI_LINUX_SECCOMP_SYSCALLS], rem) {
  219. sz += 2; /* load and return */
  220. blobmsg_parse(oci_linux_seccomp_syscalls_policy,
  221. __OCI_LINUX_SECCOMP_SYSCALLS_MAX,
  222. tbn, blobmsg_data(cur), blobmsg_len(cur));
  223. blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_NAMES], remn) {
  224. sc = find_syscall(blobmsg_get_string(curn));
  225. if (sc == -1) {
  226. DEBUG("unknown syscall '%s'\n", blobmsg_get_string(curn));
  227. /* TODO: support run.oci.seccomp_fail_unknown_syscall=1 annotation */
  228. continue;
  229. }
  230. ++sz;
  231. }
  232. if (tbn[OCI_LINUX_SECCOMP_SYSCALLS_ARGS]) {
  233. blobmsg_for_each_attr(curarg, tbn[OCI_LINUX_SECCOMP_SYSCALLS_ARGS], remargs) {
  234. sz += 2; /* load and compare */
  235. blobmsg_parse(oci_linux_seccomp_syscalls_args_policy,
  236. __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX,
  237. tba, blobmsg_data(curarg), blobmsg_len(curarg));
  238. if (!tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX] ||
  239. !tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE] ||
  240. !tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP])
  241. return NULL;
  242. if (blobmsg_get_u32(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX]) > 5)
  243. return NULL;
  244. op_str = blobmsg_get_string(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP]);
  245. if (!resolve_op_ins(op_str))
  246. return NULL;
  247. if (resolve_op_is_masked(op_str))
  248. ++sz; /* SCMP_CMP_MASKED_EQ needs an extra BPF_AND op */
  249. }
  250. }
  251. }
  252. if (sz < 6)
  253. return NULL;
  254. prog = malloc(sizeof(struct sock_fprog));
  255. if (!prog)
  256. return NULL;
  257. filter = calloc(sz, sizeof(struct sock_filter));
  258. if (!filter) {
  259. ERROR("failed to allocate memory for seccomp filter\n");
  260. goto errout2;
  261. }
  262. /* validate arch */
  263. set_filter(&filter[idx++], BPF_LD + BPF_W + BPF_ABS, 0, 0, arch_nr);
  264. set_filter(&filter[idx++], BPF_JMP + BPF_JEQ + BPF_K, 1, 0, ARCH_NR);
  265. set_filter(&filter[idx++], BPF_RET + BPF_K, 0, 0, SECCOMP_RET_KILL);
  266. blobmsg_for_each_attr(cur, tb[OCI_LINUX_SECCOMP_SYSCALLS], rem) {
  267. uint32_t action;
  268. uint32_t op_idx;
  269. uint8_t op_ins;
  270. bool op_inv, op_masked;
  271. uint64_t op_val, op_val2;
  272. int start_rule_idx;
  273. int next_rule_idx;
  274. blobmsg_parse(oci_linux_seccomp_syscalls_policy,
  275. __OCI_LINUX_SECCOMP_SYSCALLS_MAX,
  276. tbn, blobmsg_data(cur), blobmsg_len(cur));
  277. action = resolve_action(blobmsg_get_string(
  278. tbn[OCI_LINUX_SECCOMP_SYSCALLS_ACTION]));
  279. if (tbn[OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET]) {
  280. if (action != SECCOMP_RET_ERRNO)
  281. goto errout1;
  282. action = SECCOMP_RET_ERROR(blobmsg_get_u32(
  283. tbn[OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET]));
  284. } else if (action == SECCOMP_RET_ERRNO)
  285. action = SECCOMP_RET_ERROR(EPERM);
  286. /* load syscall */
  287. set_filter(&filter[idx++], BPF_LD + BPF_W + BPF_ABS, 0, 0, syscall_nr);
  288. /* get number of syscall names */
  289. next_rule_idx = idx;
  290. blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_NAMES], remn) {
  291. if (find_syscall(blobmsg_get_string(curn)) == -1)
  292. continue;
  293. ++next_rule_idx;
  294. }
  295. start_rule_idx = next_rule_idx;
  296. /* calculate length of argument filter rules */
  297. blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_ARGS], remn) {
  298. blobmsg_parse(oci_linux_seccomp_syscalls_args_policy,
  299. __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX,
  300. tba, blobmsg_data(curn), blobmsg_len(curn));
  301. next_rule_idx += 2;
  302. op_str = blobmsg_get_string(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP]);
  303. if (resolve_op_is_masked(op_str))
  304. ++next_rule_idx;
  305. }
  306. ++next_rule_idx; /* account for return action */
  307. blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_NAMES], remn) {
  308. sc = find_syscall(blobmsg_get_string(curn));
  309. if (sc == -1)
  310. continue;
  311. /*
  312. * check syscall, skip other syscall checks if match is found.
  313. * if no match is found, jump to next section
  314. */
  315. set_filter(&filter[idx], BPF_JMP + BPF_JEQ + BPF_K,
  316. start_rule_idx - (idx + 1),
  317. ((idx + 1) == start_rule_idx)?(next_rule_idx - (idx + 1)):0,
  318. sc);
  319. ++idx;
  320. }
  321. assert(idx = start_rule_idx);
  322. /* generate argument filter rules */
  323. blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_ARGS], remn) {
  324. blobmsg_parse(oci_linux_seccomp_syscalls_args_policy,
  325. __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX,
  326. tba, blobmsg_data(curn), blobmsg_len(curn));
  327. op_str = blobmsg_get_string(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP]);
  328. op_ins = resolve_op_ins(op_str);
  329. op_inv = resolve_op_inv(op_str);
  330. op_masked = resolve_op_is_masked(op_str);
  331. op_idx = blobmsg_get_u32(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX]);
  332. op_val = blobmsg_cast_u64(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE]);
  333. if (tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO])
  334. op_val2 = blobmsg_cast_u64(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO]);
  335. else
  336. op_val2 = 0;
  337. /* load argument */
  338. set_filter(&filter[idx++], BPF_LD + BPF_W + BPF_ABS, 0, 0, syscall_arg(op_idx));
  339. /* apply mask */
  340. if (op_masked)
  341. set_filter(&filter[idx++], BPF_ALU + BPF_K + BPF_AND, 0, 0, op_val);
  342. set_filter(&filter[idx], BPF_JMP + op_ins + BPF_K,
  343. op_inv?(next_rule_idx - (idx + 1)):0,
  344. op_inv?0:(next_rule_idx - (idx + 1)),
  345. op_masked?op_val2:op_val);
  346. ++idx;
  347. }
  348. /* if we have reached until here, all conditions were met and we can return */
  349. set_filter(&filter[idx++], BPF_RET + BPF_K, 0, 0, action);
  350. assert(idx == next_rule_idx);
  351. }
  352. set_filter(&filter[idx++], BPF_RET + BPF_K, 0, 0, default_policy);
  353. assert(idx == sz);
  354. prog->len = (unsigned short) idx;
  355. prog->filter = filter;
  356. DEBUG("generated seccomp-bpf program:\n");
  357. if (debug) {
  358. fprintf(stderr, " [idx]\tcode\t jt\t jf\tk\n");
  359. for (idx=0; idx<sz; idx++)
  360. fprintf(stderr, " [%03d]\t%04hx\t%3hhu\t%3hhu\t%08x\n", idx,
  361. filter[idx].code,
  362. filter[idx].jt,
  363. filter[idx].jf,
  364. filter[idx].k);
  365. }
  366. return prog;
  367. errout1:
  368. free(prog->filter);
  369. errout2:
  370. free(prog);
  371. return NULL;
  372. }
  373. int applyOCIlinuxseccomp(struct sock_fprog *prog)
  374. {
  375. if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
  376. ERROR("prctl(PR_SET_NO_NEW_PRIVS) failed: %m\n");
  377. goto errout;
  378. }
  379. if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, prog)) {
  380. ERROR("prctl(PR_SET_SECCOMP) failed: %m\n");
  381. goto errout;
  382. }
  383. free(prog);
  384. return 0;
  385. errout:
  386. free(prog->filter);
  387. free(prog);
  388. return errno;
  389. }