Files
mattermost/plugin/rpcplugin/sandbox/sandbox_linux.go
Chris f5c8a71698 ABC-22: Plugin sandboxing for linux/amd64 (#8068)
* plugin sandboxing

* remove unused type

* better symlink handling, better remounting, better test, whitespace
fixes, and comment on the remounting

* fix test compile error

* big simplification for getting mount flags

* mask statfs flags to the ones we're interested in
2018-01-15 09:21:06 -08:00

469 lines
12 KiB
Go

// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
// See License.txt for license information.
package sandbox
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"syscall"
"unsafe"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
"github.com/mattermost/mattermost-server/plugin/rpcplugin"
)
func init() {
if len(os.Args) < 3 || os.Args[0] != "sandbox.runProcess" {
return
}
var config Configuration
if err := json.Unmarshal([]byte(os.Args[1]), &config); err != nil {
fmt.Println(err.Error())
os.Exit(1)
}
if err := runProcess(&config, os.Args[2]); err != nil {
if eerr, ok := err.(*exec.ExitError); ok {
if status, ok := eerr.Sys().(syscall.WaitStatus); ok {
os.Exit(status.ExitStatus())
}
}
fmt.Println(err.Error())
os.Exit(1)
}
os.Exit(0)
}
func systemMountPoints() (points []*MountPoint) {
points = append(points, &MountPoint{
Source: "proc",
Destination: "/proc",
Type: "proc",
}, &MountPoint{
Source: "/dev/null",
Destination: "/dev/null",
}, &MountPoint{
Source: "/dev/zero",
Destination: "/dev/zero",
}, &MountPoint{
Source: "/dev/full",
Destination: "/dev/full",
})
readOnly := []string{
"/dev/random",
"/dev/urandom",
"/etc/resolv.conf",
"/lib",
"/lib32",
"/lib64",
"/etc/ssl/certs",
"/system/etc/security/cacerts",
"/usr/local/share/certs",
"/etc/pki/tls/certs",
"/etc/openssl/certs",
"/etc/ssl/ca-bundle.pem",
"/etc/pki/tls/cacert.pem",
"/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem",
}
for _, v := range []string{"SSL_CERT_FILE", "SSL_CERT_DIR"} {
if path := os.Getenv(v); path != "" {
readOnly = append(readOnly, path)
}
}
for _, point := range readOnly {
points = append(points, &MountPoint{
Source: point,
Destination: point,
ReadOnly: true,
})
}
return
}
func runProcess(config *Configuration, path string) error {
root, err := ioutil.TempDir("", "")
if err != nil {
return err
}
defer os.RemoveAll(root)
if err := syscall.Mount("", "/", "", syscall.MS_PRIVATE|syscall.MS_REC, ""); err != nil {
return errors.Wrapf(err, "unable to make root private")
}
if err := mountMountPoints(root, systemMountPoints()); err != nil {
return errors.Wrapf(err, "unable to mount sandbox system mount points")
}
if err := mountMountPoints(root, config.MountPoints); err != nil {
return errors.Wrapf(err, "unable to mount sandbox config mount points")
}
if err := pivotRoot(root); err != nil {
return errors.Wrapf(err, "unable to pivot sandbox root")
}
if err := os.Mkdir("/tmp", 0755); err != nil {
return errors.Wrapf(err, "unable to create /tmp")
}
if config.WorkingDirectory != "" {
if err := os.Chdir(config.WorkingDirectory); err != nil {
return errors.Wrapf(err, "unable to set working directory")
}
}
if err := dropInheritableCapabilities(); err != nil {
return errors.Wrapf(err, "unable to drop inheritable capabilities")
}
if err := enableSeccompFilter(); err != nil {
return errors.Wrapf(err, "unable to enable seccomp filter")
}
return runExecutable(path)
}
func mountMountPoint(root string, mountPoint *MountPoint) error {
isDir := true
if mountPoint.Type == "" {
stat, err := os.Lstat(mountPoint.Source)
if err != nil {
return nil
}
if (stat.Mode() & os.ModeSymlink) != 0 {
if path, err := filepath.EvalSymlinks(mountPoint.Source); err == nil {
newMountPoint := *mountPoint
newMountPoint.Source = path
if err := mountMountPoint(root, &newMountPoint); err != nil {
return errors.Wrapf(err, "unable to mount symbolic link target: "+mountPoint.Source)
}
return nil
}
}
isDir = stat.IsDir()
}
target := filepath.Join(root, mountPoint.Destination)
if isDir {
if err := os.MkdirAll(target, 0755); err != nil {
return errors.Wrapf(err, "unable to create directory: "+target)
}
} else {
if err := os.MkdirAll(filepath.Dir(target), 0755); err != nil {
return errors.Wrapf(err, "unable to create directory: "+target)
}
f, err := os.Create(target)
if err != nil {
return errors.Wrapf(err, "unable to create file: "+target)
}
f.Close()
}
flags := uintptr(syscall.MS_NOSUID | syscall.MS_NODEV)
if mountPoint.Type == "" {
flags |= syscall.MS_BIND
}
if mountPoint.ReadOnly {
flags |= syscall.MS_RDONLY
}
if err := syscall.Mount(mountPoint.Source, target, mountPoint.Type, flags, ""); err != nil {
return errors.Wrapf(err, "unable to mount "+mountPoint.Source)
}
if (flags & syscall.MS_BIND) != 0 {
// If this was a bind mount, our other flags actually got silently ignored during the above syscall:
//
// If mountflags includes MS_BIND [...] The remaining bits in the mountflags argument are
// also ignored, with the exception of MS_REC.
//
// Furthermore, remounting will fail if we attempt to unset a bit that was inherited from
// the mount's parent:
//
// The mount(2) flags MS_RDONLY, MS_NOSUID, MS_NOEXEC, and the "atime" flags
// (MS_NOATIME, MS_NODIRATIME, MS_RELATIME) settings become locked when propagated from
// a more privileged to a less privileged mount namespace, and may not be changed in the
// less privileged mount namespace.
//
// So we need to get the actual flags, add our new ones, then do a remount if needed.
var stats syscall.Statfs_t
if err := syscall.Statfs(target, &stats); err != nil {
return errors.Wrap(err, "unable to get mount flags for target: "+target)
}
const lockedFlagsMask = unix.MS_RDONLY | unix.MS_NOSUID | unix.MS_NOEXEC | unix.MS_NOATIME | unix.MS_NODIRATIME | unix.MS_RELATIME
lockedFlags := uintptr(stats.Flags & lockedFlagsMask)
if lockedFlags != ((flags | lockedFlags) & lockedFlagsMask) {
if err := syscall.Mount("", target, "", flags|lockedFlags|syscall.MS_REMOUNT, ""); err != nil {
return errors.Wrapf(err, "unable to remount "+mountPoint.Source)
}
}
}
return nil
}
func mountMountPoints(root string, mountPoints []*MountPoint) error {
for _, mountPoint := range mountPoints {
if err := mountMountPoint(root, mountPoint); err != nil {
return err
}
}
return nil
}
func pivotRoot(newRoot string) error {
if err := syscall.Mount(newRoot, newRoot, "", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
return errors.Wrapf(err, "unable to mount new root")
}
prevRoot := filepath.Join(newRoot, ".prev_root")
if err := os.MkdirAll(prevRoot, 0700); err != nil {
return errors.Wrapf(err, "unable to create directory for previous root")
}
if err := syscall.PivotRoot(newRoot, prevRoot); err != nil {
return errors.Wrapf(err, "syscall error")
}
if err := os.Chdir("/"); err != nil {
return errors.Wrapf(err, "unable to change directory")
}
prevRoot = "/.prev_root"
if err := syscall.Unmount(prevRoot, syscall.MNT_DETACH); err != nil {
return errors.Wrapf(err, "unable to unmount previous root")
}
if err := os.RemoveAll(prevRoot); err != nil {
return errors.Wrapf(err, "unable to remove previous root directory")
}
return nil
}
func dropInheritableCapabilities() error {
type capHeader struct {
version uint32
pid int
}
type capData struct {
effective uint32
permitted uint32
inheritable uint32
}
var hdr capHeader
var data [2]capData
if _, _, errno := syscall.Syscall(syscall.SYS_CAPGET, uintptr(unsafe.Pointer(&hdr)), 0, 0); errno != 0 {
return errors.Wrapf(syscall.Errno(errno), "unable to get capabilities version")
}
if _, _, errno := syscall.Syscall(syscall.SYS_CAPGET, uintptr(unsafe.Pointer(&hdr)), uintptr(unsafe.Pointer(&data[0])), 0); errno != 0 {
return errors.Wrapf(syscall.Errno(errno), "unable to get capabilities")
}
data[0].inheritable = 0
data[1].inheritable = 0
if _, _, errno := syscall.Syscall(syscall.SYS_CAPSET, uintptr(unsafe.Pointer(&hdr)), uintptr(unsafe.Pointer(&data[0])), 0); errno != 0 {
return errors.Wrapf(syscall.Errno(errno), "unable to set inheritable capabilities")
}
for i := 0; i < 64; i++ {
if _, _, errno := syscall.Syscall(syscall.SYS_PRCTL, syscall.PR_CAPBSET_DROP, uintptr(i), 0); errno != 0 && errno != syscall.EINVAL {
return errors.Wrapf(syscall.Errno(errno), "unable to drop bounding set capability")
}
}
return nil
}
func enableSeccompFilter() error {
return EnableSeccompFilter(SeccompFilter(NATIVE_AUDIT_ARCH, AllowedSyscalls))
}
func runExecutable(path string) error {
childFiles := []*os.File{
os.NewFile(3, ""), os.NewFile(4, ""),
}
defer childFiles[0].Close()
defer childFiles[1].Close()
cmd := exec.Command(path)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.ExtraFiles = childFiles
cmd.SysProcAttr = &syscall.SysProcAttr{
Pdeathsig: syscall.SIGTERM,
}
if err := cmd.Run(); err != nil {
return err
}
return nil
}
type process struct {
command *exec.Cmd
}
func newProcess(ctx context.Context, config *Configuration, path string) (rpcplugin.Process, io.ReadWriteCloser, error) {
configJSON, err := json.Marshal(config)
if err != nil {
return nil, nil, err
}
ipc, childFiles, err := rpcplugin.NewIPC()
if err != nil {
return nil, nil, err
}
defer childFiles[0].Close()
defer childFiles[1].Close()
cmd := exec.CommandContext(ctx, "/proc/self/exe")
cmd.Args = []string{"sandbox.runProcess", string(configJSON), path}
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.ExtraFiles = childFiles
cmd.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: syscall.CLONE_NEWNS | syscall.CLONE_NEWUTS | syscall.CLONE_NEWIPC | syscall.CLONE_NEWPID | syscall.CLONE_NEWUSER,
Pdeathsig: syscall.SIGTERM,
GidMappings: []syscall.SysProcIDMap{
{
ContainerID: 0,
HostID: os.Getgid(),
Size: 1,
},
},
UidMappings: []syscall.SysProcIDMap{
{
ContainerID: 0,
HostID: os.Getuid(),
Size: 1,
},
},
}
err = cmd.Start()
if err != nil {
ipc.Close()
return nil, nil, err
}
return &process{
command: cmd,
}, ipc, nil
}
func (p *process) Wait() error {
return p.command.Wait()
}
func init() {
if len(os.Args) < 1 || os.Args[0] != "sandbox.checkSupportInNamespace" {
return
}
if err := checkSupportInNamespace(); err != nil {
fmt.Fprintf(os.Stderr, "%v", err.Error())
os.Exit(1)
}
os.Exit(0)
}
func checkSupportInNamespace() error {
root, err := ioutil.TempDir("", "")
if err != nil {
return err
}
defer os.RemoveAll(root)
if err := syscall.Mount("", "/", "", syscall.MS_PRIVATE|syscall.MS_REC, ""); err != nil {
return errors.Wrapf(err, "unable to make root private")
}
if err := mountMountPoints(root, systemMountPoints()); err != nil {
return errors.Wrapf(err, "unable to mount sandbox system mount points")
}
if err := pivotRoot(root); err != nil {
return errors.Wrapf(err, "unable to pivot sandbox root")
}
if err := dropInheritableCapabilities(); err != nil {
return errors.Wrapf(err, "unable to drop inheritable capabilities")
}
if err := enableSeccompFilter(); err != nil {
return errors.Wrapf(err, "unable to enable seccomp filter")
}
return nil
}
func checkSupport() error {
if AllowedSyscalls == nil {
return fmt.Errorf("unsupported architecture")
}
stderr := &bytes.Buffer{}
cmd := exec.Command("/proc/self/exe")
cmd.Args = []string{"sandbox.checkSupportInNamespace"}
cmd.Stderr = stderr
cmd.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: syscall.CLONE_NEWNS | syscall.CLONE_NEWUTS | syscall.CLONE_NEWIPC | syscall.CLONE_NEWPID | syscall.CLONE_NEWUSER,
Pdeathsig: syscall.SIGTERM,
GidMappings: []syscall.SysProcIDMap{
{
ContainerID: 0,
HostID: os.Getgid(),
Size: 1,
},
},
UidMappings: []syscall.SysProcIDMap{
{
ContainerID: 0,
HostID: os.Getuid(),
Size: 1,
},
},
}
if err := cmd.Start(); err != nil {
return errors.Wrapf(err, "unable to create user namespace")
}
if err := cmd.Wait(); err != nil {
if _, ok := err.(*exec.ExitError); ok {
return errors.Wrapf(fmt.Errorf("%v", stderr.String()), "unable to prepare namespace")
}
return errors.Wrapf(err, "unable to prepare namespace")
}
return nil
}