
Sandboxing in Linux with Zero Lines of Code Ignat Korchagin @ignatkn $ whoami ● Performance and security at Cloudflare ● Passionate about security and crypto ● Enjoy low level programming @ignatkn Linux system calls and seccomp Modern operating systems hardware @ignatkn Modern operating systems OS kernel (Linux, Windows, Mac OS) hardware @ignatkn Modern operating systems application application application user space kernel space OS kernel (Linux, Windows, Mac OS) hardware @ignatkn System calls application @ignatkn System calls application OS kernel @ignatkn System calls application open read write send recv accept OS kernel @ignatkn System calls application open read write send recv accept OS kernel @ignatkn System calls ● a well-defined interface between (user space) applications and the OS kernel @ignatkn System calls ● a well-defined interface between (user space) applications and the OS kernel ○ hardware-independent abstractions ○ basic OS services ■ file I/O ■ network access ■ time, sound etc @ignatkn System calls ● a well-defined interface between (user space) applications and the OS kernel ○ hardware-independent abstractions ○ basic OS services ■ file I/O ■ network access ■ time, sound etc ● OS kernel enforces ACLs and permissions @ignatkn System calls ● a well-defined interface between (user space) applications and the OS kernel ○ hardware-independent abstractions ○ basic OS services ■ file I/O ■ network access ■ time, sound etc ● OS kernel enforces ACLs and permissions ● resource management @ignatkn seccomp ● yet another system call @ignatkn seccomp ● yet another system call ● provides a way for application to notify the kernel which other system calls it intends to use ○ and which system calls it will definitely not use @ignatkn seccomp ● yet another system call ● provides a way for application to notify the kernel which other system calls it intends to use ○ and which system calls it will definitely not use ● the kernel enforces the system call policy provided by the application @ignatkn seccomp ● yet another system call ● provides a way for application to notify the kernel which other system calls it intends to use ○ and which system calls it will definitely not use ● the kernel enforces the system call policy provided by the application ● a tool an application can use to confine/sandbox itself @ignatkn seccomp application open read write send recv accept OS kernel @ignatkn seccomp application Contract ● open ● read ● write open read write send recv accept OS kernel @ignatkn seccomp application Contract ● open ● read ● write open read write send recv accept OS kernel @ignatkn seccomp application Contract ● open ● read ● write open read write send recv accept OS kernel @ignatkn seccomp example @ignatkn seccomp example @ignatkn seccomp example Hi! I’m a clock app. I will only use gettimeofday @ignatkn seccomp example Hi! I’m a clock app. I will only use gettimeofday gettimeofday @ignatkn seccomp example Hi! I’m a clock app. I will only use gettimeofday gettimeofday 1970-01-01T00:00:00Z @ignatkn seccomp example Hi! I’m a clock app. I will only use gettimeofday gettimeofday 1970-01-01T00:00:00Z @ignatkn seccomp example Hi! I’m a clock app. I will only use gettimeofday gettimeofday 1970-01-01T00:00:00Z send @ignatkn seccomp example Hi! I’m a clock app. I will only use gettimeofday gettimeofday 1970-01-01T00:00:00Z send @ignatkn Using seccomp A simple “uname-like” toy tool #include <stdio.h> #include <sys/utsname.h> int main(void) { struct utsname name; if (uname(&name)) { perror("uname failed"); return 1; } printf("My OS is %s!\n", name.sysname); return 0; } @ignatkn A simple “uname-like” toy tool #include <stdio.h> #include <sys/utsname.h> int main(void) { struct utsname name; if (uname(&name)) { perror("uname failed"); return 1; } printf("My OS is %s!\n", name.sysname); return 0; } @ignatkn A simple “uname-like” toy tool $ gcc -o myos myos.c @ignatkn A simple “uname-like” toy tool $ gcc -o myos myos.c $ ./myos My OS is Linux! @ignatkn Sandboxing “myos” #include <stdio.h> #include <sys/utsname.h> int main(void) { struct utsname name; if (uname(&name)) { perror("uname failed"); return 1; } printf("My OS is %s!\n", name.sysname); return 0; } @ignatkn Sandboxing “myos” #include <stdio.h> #include <sys/utsname.h> int main(void) { struct utsname name; sandbox(); if (uname(&name)) { perror("uname failed"); return 1; } printf("My OS is %s!\n", name.sysname); return 0; } @ignatkn Sandboxing “myos” static void sandbox(void) struct sock_fprog prog = { { .len = (unsigned short) (sizeof(filter) / sizeof(filter[0])), struct sock_filter filter[] = { .filter = filter, BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, arch))), }; /* if not x86_64, tell the kernel to kill the process if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { */ perror("PR_SET_NO_NEW_PRIVS failed"); BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 0, 4), exit(1); /* get the actual syscall number */ }; BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct /* glibc does not have a wrapper for seccomp(2) */ seccomp_data, nr))), /* invoke it via the generic syscall wrapper */ /* if "uname", tell the kernel to return ENETDOWN, if (syscall(SYS_seccomp, SECCOMP_SET_MODE_FILTER, 0, otherwise just allow */ &prog)) { BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_uname, 0, 1), perror("seccomp failed"); BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (ENETDOWN exit(1); & SECCOMP_RET_DATA)), }; BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), } BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL), }; @ignatkn Sandboxing “myos” static void sandbox(void) struct sock_fprog prog = { { .len = (unsigned short) (sizeof(filter) / sizeof(filter[0])), struct sock_filter filter[] = { .filter = filter, BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, arch))), }; /* if not x86_64, tell the kernel to kill the process if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { */ perror("PR_SET_NO_NEW_PRIVS failed"); BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 0, 4), exit(1); /* get the actual syscall number */ }; BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct /* glibc does not have a wrapper for seccomp(2) */ seccomp_data, nr))), /* invoke it via the generic syscall wrapper */ /* if "uname", tell the kernel to return ENETDOWN, if (syscall(SYS_seccomp, SECCOMP_SET_MODE_FILTER, 0, otherwise just allow */ &prog)) { BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_uname, 0, 1), perror("seccomp failed"); BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (ENETDOWN exit(1); & SECCOMP_RET_DATA)), }; BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), } BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL), }; @ignatkn Sandboxing “myos” static void sandbox(void) struct sock_fprog prog = { { .len = (unsigned short) (sizeof(filter) / sizeof(filter[0])), struct sock_filter filter[] = { .filter = filter, BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, arch))), }; /* if not x86_64, tell the kernel to kill the process if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { */ perror("PR_SET_NO_NEW_PRIVS failed"); BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 0, 4), exit(1); /* get the actual syscall number */ }; BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct /* glibc does not have a wrapper for seccomp(2) */ seccomp_data, nr))), /* invoke it via the generic syscall wrapper */ /* if "uname", tell the kernel to return ENETDOWN, if (syscall(SYS_seccomp, SECCOMP_SET_MODE_FILTER, 0, otherwise just allow */ &prog)) { BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_uname, 0, 1), perror("seccomp failed"); BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (ENETDOWN exit(1); & SECCOMP_RET_DATA)), }; BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), } BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL), }; @ignatkn Sandboxing “myos” static void sandbox(void) struct sock_fprog prog = { { .len = (unsigned short) (sizeof(filter) / sizeof(filter[0])), struct sock_filter filter[] = { .filter = filter, BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, arch))), }; /* if not x86_64, tell the kernel to kill the process if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { */ perror("PR_SET_NO_NEW_PRIVS failed"); BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 0, 4), exit(1); /* get the actual syscall number */ }; BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct /* glibc does not have a wrapper for seccomp(2) */ seccomp_data, nr))), /* invoke it via the generic syscall wrapper */ /* if "uname", tell the kernel to return ENETDOWN, if (syscall(SYS_seccomp, SECCOMP_SET_MODE_FILTER, 0, otherwise just allow */ &prog)) { BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_uname, 0, 1), perror("seccomp failed"); BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (ENETDOWN exit(1); & SECCOMP_RET_DATA)), }; BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), } BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL), }; @ignatkn Sandboxing “myos” $ gcc -o myos myos_raw_seccomp.c @ignatkn Sandboxing “myos” $ gcc -o myos myos_raw_seccomp.c $ ./myos uname failed: Network is down @ignatkn Sandboxing “myos” #include <stdio.h> #include <sys/utsname.h> int main(void) { struct utsname name; sandbox(); if (uname(&name)) { perror("uname failed"); return 1; } printf("My OS is %s!\n", name.sysname); return 0; } @ignatkn Sandboxing “myos” #include <stdio.h> #include <sys/utsname.h> int main(void) { struct utsname name; sandbox(); if (uname(&name)) { perror("uname failed"); return 1; } printf("My OS is %s!\n", name.sysname); return 0; } @ignatkn Writing seccomp by hand ● low level assembly language ○ hard to write ○ hard to review ○ hard to debug ○ hard to update @ignatkn Writing seccomp
Details
-
File Typepdf
-
Upload Time-
-
Content LanguagesEnglish
-
Upload UserAnonymous/Not logged-in
-
File Pages125 Page
-
File Size-