From 048130d1d0c75644b8ccd1d306b563908d165819 Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Tue, 16 Oct 2018 13:30:02 +0900 Subject: [PATCH] simplify rootless Signed-off-by: Akihiro Suda --- docs/rootless.md | 5 +- examples/buildkit0/buildkit.go | 2 +- examples/buildkit1/buildkit.go | 2 +- examples/buildkit2/buildkit.go | 2 +- examples/buildkit3/buildkit.go | 2 +- frontend/dockerfile/dockerfile_test.go | 4 - hack/dockerfiles/test.Dockerfile | 2 +- hack/dockerfiles/test.buildkit.Dockerfile | 2 +- util/rootless/specconv/specconv_linux.go | 113 +-- util/rootless/specconv/specconv_linux_test.go | 42 - vendor.conf | 5 +- .../runc/libcontainer/configs/blkio_device.go | 61 -- .../runc/libcontainer/configs/cgroup_linux.go | 122 --- .../libcontainer/configs/cgroup_windows.go | 6 - .../runc/libcontainer/configs/config.go | 349 ------- .../runc/libcontainer/configs/config_linux.go | 61 -- .../runc/libcontainer/configs/device.go | 57 -- .../libcontainer/configs/device_defaults.go | 111 --- .../libcontainer/configs/hugepage_limit.go | 9 - .../runc/libcontainer/configs/intelrdt.go | 7 - .../configs/interface_priority_map.go | 14 - .../runc/libcontainer/configs/mount.go | 39 - .../runc/libcontainer/configs/namespaces.go | 5 - .../libcontainer/configs/namespaces_linux.go | 122 --- .../configs/namespaces_syscall.go | 31 - .../configs/namespaces_syscall_unsupported.go | 13 - .../configs/namespaces_unsupported.go | 8 - .../runc/libcontainer/configs/network.go | 72 -- .../runc/libcontainer/nsenter/nsexec.c | 12 +- .../runc/libcontainer/seccomp/config.go | 76 -- .../libcontainer/seccomp/seccomp_linux.go | 258 ------ .../seccomp/seccomp_unsupported.go | 24 - .../runc/libcontainer/specconv/example.go | 221 ----- .../runc/libcontainer/specconv/spec_linux.go | 836 ----------------- .../runc/libcontainer/utils/cmsg.go | 93 -- .../runc/libcontainer/utils/utils.go | 112 --- .../runc/libcontainer/utils/utils_unix.go | 44 - .../seccomp/libseccomp-golang/LICENSE | 22 - .../seccomp/libseccomp-golang/README | 26 - .../seccomp/libseccomp-golang/seccomp.go | 857 ------------------ .../libseccomp-golang/seccomp_internal.go | 506 ----------- 41 files changed, 36 insertions(+), 4319 deletions(-) delete mode 100644 util/rootless/specconv/specconv_linux_test.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_windows.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/config.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/device.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/network.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/seccomp/config.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/specconv/example.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/specconv/spec_linux.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go delete mode 100644 vendor/github.com/seccomp/libseccomp-golang/LICENSE delete mode 100644 vendor/github.com/seccomp/libseccomp-golang/README delete mode 100644 vendor/github.com/seccomp/libseccomp-golang/seccomp.go delete mode 100644 vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go diff --git a/docs/rootless.md b/docs/rootless.md index e8b3369f95e5..8c4cff1cf429 100644 --- a/docs/rootless.md +++ b/docs/rootless.md @@ -1,7 +1,7 @@ # Rootless mode (Experimental) Requirements: -- runc `ecd55a4135e0a26de884ce436442914f945b1e76` (May 30, 2018) or later +- runc `a00bf0190895aa465a5fbed0268888e2c8ddfe85` (Oct 15, 2018) or later - Some distros such as Debian (excluding Ubuntu) and Arch Linux require `echo 1 > /proc/sys/kernel/unprivileged_userns_clone` - `newuidmap` and `newgidmap` need to be installed on the host. These commands are provided by the `uidmap` package. - `/etc/subuid` and `/etc/subgid` should contain >= 65536 sub-IDs. e.g. `penguin:231072:65536`. @@ -45,6 +45,7 @@ unshared# buildkitd * `overlayfs` snapshotter is not supported except Ubuntu-flavored kernel: http://kernel.ubuntu.com/git/ubuntu/ubuntu-artful.git/commit/fs/overlayfs?h=Ubuntu-4.13.0-25.29&id=0a414bdc3d01f3b61ed86cfe3ce8b63a9240eba7 * containerd worker is not supported ( pending PR: https://github.com/containerd/containerd/pull/2006 ) * Network namespace is not used at the moment. +* Cgroups is disabled. ### Terminal 2: @@ -61,7 +62,7 @@ $ docker run --name buildkitd -d --privileged -p 1234:1234 buildkit-rootless -- ``` `docker run` requires `--privileged` but the BuildKit daemon is executed as a normal user. -See [`moby/moby#36597`](https://github.com/moby/moby/issues/36597, [`kubernetes/community#1934`](https://github.com/kubernetes/community/pull/1934) and [Jess's blog](https://blog.jessfraz.com/post/building-container-images-securely-on-kubernetes/) for the ongoing work to remove this requirement +See [`docker/cli#1347`](https://github.com/docker/cli/pull/1347) for the ongoing work to remove this requirement ``` $ docker exec buildkitd id diff --git a/examples/buildkit0/buildkit.go b/examples/buildkit0/buildkit.go index 6a07c3cc6c24..b7cfbba2fa8d 100644 --- a/examples/buildkit0/buildkit.go +++ b/examples/buildkit0/buildkit.go @@ -18,7 +18,7 @@ func main() { var opt buildOpt flag.BoolVar(&opt.withContainerd, "with-containerd", true, "enable containerd worker") flag.StringVar(&opt.containerd, "containerd", "v1.2.0-rc.1", "containerd version") - flag.StringVar(&opt.runc, "runc", "dd56ece8236d6d9e5bed4ea0c31fe53c7b873ff4", "runc version") + flag.StringVar(&opt.runc, "runc", "a00bf0190895aa465a5fbed0268888e2c8ddfe85", "runc version") flag.Parse() bk := buildkit(opt) diff --git a/examples/buildkit1/buildkit.go b/examples/buildkit1/buildkit.go index 650ce81166e2..6abbce1a3060 100644 --- a/examples/buildkit1/buildkit.go +++ b/examples/buildkit1/buildkit.go @@ -18,7 +18,7 @@ func main() { var opt buildOpt flag.BoolVar(&opt.withContainerd, "with-containerd", true, "enable containerd worker") flag.StringVar(&opt.containerd, "containerd", "v1.2.0-rc.1", "containerd version") - flag.StringVar(&opt.runc, "runc", "dd56ece8236d6d9e5bed4ea0c31fe53c7b873ff4", "runc version") + flag.StringVar(&opt.runc, "runc", "a00bf0190895aa465a5fbed0268888e2c8ddfe85", "runc version") flag.Parse() bk := buildkit(opt) diff --git a/examples/buildkit2/buildkit.go b/examples/buildkit2/buildkit.go index 91ff2d70cb80..662d1209349b 100644 --- a/examples/buildkit2/buildkit.go +++ b/examples/buildkit2/buildkit.go @@ -18,7 +18,7 @@ func main() { var opt buildOpt flag.BoolVar(&opt.withContainerd, "with-containerd", true, "enable containerd worker") flag.StringVar(&opt.containerd, "containerd", "v1.2.0-rc.1", "containerd version") - flag.StringVar(&opt.runc, "runc", "dd56ece8236d6d9e5bed4ea0c31fe53c7b873ff4", "runc version") + flag.StringVar(&opt.runc, "runc", "a00bf0190895aa465a5fbed0268888e2c8ddfe85", "runc version") flag.Parse() bk := buildkit(opt) diff --git a/examples/buildkit3/buildkit.go b/examples/buildkit3/buildkit.go index 58e6a43feb17..f255a4b4392d 100644 --- a/examples/buildkit3/buildkit.go +++ b/examples/buildkit3/buildkit.go @@ -19,7 +19,7 @@ func main() { var opt buildOpt flag.BoolVar(&opt.withContainerd, "with-containerd", true, "enable containerd worker") flag.StringVar(&opt.containerd, "containerd", "v1.2.0-rc.1", "containerd version") - flag.StringVar(&opt.runc, "runc", "dd56ece8236d6d9e5bed4ea0c31fe53c7b873ff4", "runc version") + flag.StringVar(&opt.runc, "runc", "a00bf0190895aa465a5fbed0268888e2c8ddfe85", "runc version") flag.StringVar(&opt.buildkit, "buildkit", "master", "buildkit version") flag.Parse() diff --git a/frontend/dockerfile/dockerfile_test.go b/frontend/dockerfile/dockerfile_test.go index a2ed9cb6d6c8..886cec105112 100644 --- a/frontend/dockerfile/dockerfile_test.go +++ b/frontend/dockerfile/dockerfile_test.go @@ -1604,10 +1604,6 @@ RUN ["ls"] } func testUser(t *testing.T, sb integration.Sandbox) { - if sb.Rootless() { - t.Skip("only for rootful worker, due to lack of support for additional gids (https://github.com/opencontainers/runc/issues/1835)") - } - f := getFrontend(t, sb) dockerfile := []byte(` diff --git a/hack/dockerfiles/test.Dockerfile b/hack/dockerfiles/test.Dockerfile index afb63f5a5212..7956dcb98c14 100644 --- a/hack/dockerfiles/test.Dockerfile +++ b/hack/dockerfiles/test.Dockerfile @@ -1,4 +1,4 @@ -ARG RUNC_VERSION=00dc70017d222b178a002ed30e9321b12647af2d +ARG RUNC_VERSION=a00bf0190895aa465a5fbed0268888e2c8ddfe85 ARG CONTAINERD_VERSION=v1.2.0-rc.1 # containerd v1.0 for integration tests ARG CONTAINERD10_VERSION=v1.0.3 diff --git a/hack/dockerfiles/test.buildkit.Dockerfile b/hack/dockerfiles/test.buildkit.Dockerfile index a438b0ed40eb..236e30685cf8 100644 --- a/hack/dockerfiles/test.buildkit.Dockerfile +++ b/hack/dockerfiles/test.buildkit.Dockerfile @@ -1,6 +1,6 @@ # syntax = tonistiigi/dockerfile:runmount20180925 -ARG RUNC_VERSION=00dc70017d222b178a002ed30e9321b12647af2d +ARG RUNC_VERSION=a00bf0190895aa465a5fbed0268888e2c8ddfe85 ARG CONTAINERD_VERSION=v1.2.0-rc.1 # containerd v1.0 for integration tests ARG CONTAINERD10_VERSION=v1.0.3 diff --git a/util/rootless/specconv/specconv_linux.go b/util/rootless/specconv/specconv_linux.go index 1b90219f1dec..12646e430acd 100644 --- a/util/rootless/specconv/specconv_linux.go +++ b/util/rootless/specconv/specconv_linux.go @@ -1,113 +1,40 @@ package specconv import ( - "os" - "sort" "strings" - "github.com/opencontainers/runc/libcontainer/system" - "github.com/opencontainers/runc/libcontainer/user" "github.com/opencontainers/runtime-spec/specs-go" - "github.com/pkg/errors" ) // ToRootless converts spec to be compatible with "rootless" runc. -// * Adds userns (Note: since we are already in userns, ideally we should not need to do this. runc-side issue is tracked at https://github.com/opencontainers/runc/issues/1837) -// * Fix up mount flags (same as above) -// * Replace /sys with bind-mount (FIXME: we don't need to do this if netns is unshared) +// * Remove /sys mount +// * Remove cgroups +// +// See docs/rootless.md for the supported runc revision. func ToRootless(spec *specs.Spec) error { - if !system.RunningInUserNS() { - return errors.New("needs to be in user namespace") - } - uidMap, err := user.CurrentProcessUIDMap() - if err != nil && !os.IsNotExist(err) { - return err - } - gidMap, err := user.CurrentProcessUIDMap() - if err != nil && !os.IsNotExist(err) { - return err - } - return toRootless(spec, uidMap, gidMap) -} - -// toRootless was forked from github.com/opencontainers/runc/libcontainer/specconv -func toRootless(spec *specs.Spec, uidMap, gidMap []user.IDMap) error { - if err := configureUserNS(spec, uidMap, gidMap); err != nil { - return err - } - if err := configureMounts(spec); err != nil { - return err - } - - // Remove cgroup settings. - spec.Linux.Resources = nil - spec.Linux.CgroupsPath = "" - return nil -} - -// configureUserNS add suserns and the current ID map to the spec. -// Since we are already in userns, ideally we should not need to add userns. -// However, currently rootless runc always requires userns to be added. -// https://github.com/opencontainers/runc/issues/1837 -func configureUserNS(spec *specs.Spec, uidMap, gidMap []user.IDMap) error { - spec.Linux.Namespaces = append(spec.Linux.Namespaces, specs.LinuxNamespace{ - Type: specs.UserNamespace, - }) - - sort.Slice(uidMap, func(i, j int) bool { return uidMap[i].ID < uidMap[j].ID }) - uNextContainerID := int64(0) - for _, u := range uidMap { - spec.Linux.UIDMappings = append(spec.Linux.UIDMappings, - specs.LinuxIDMapping{ - HostID: uint32(u.ID), - ContainerID: uint32(uNextContainerID), - Size: uint32(u.Count), - }) - uNextContainerID += int64(u.Count) - } - sort.Slice(gidMap, func(i, j int) bool { return gidMap[i].ID < gidMap[j].ID }) - gNextContainerID := int64(0) - for _, g := range gidMap { - spec.Linux.GIDMappings = append(spec.Linux.GIDMappings, - specs.LinuxIDMapping{ - HostID: uint32(g.ID), - ContainerID: uint32(gNextContainerID), - Size: uint32(g.Count), - }) - gNextContainerID += int64(g.Count) - } - return nil -} - -func configureMounts(spec *specs.Spec) error { + // Remove /sys mount because we can't mount /sys when the daemon netns + // is not unshared from the host. + // + // Instead, we could bind-mount /sys from the host, however, `rbind, ro` + // does not make /sys/fs/cgroup read-only (and we can't bind-mount /sys + // without rbind) + // + // PR for making /sys/fs/cgroup read-only is proposed, but it is very + // complicated: https://github.com/opencontainers/runc/pull/1869 + // + // For buildkit usecase, we suppose we don't need to provide /sys to + // containers and remove /sys mount as a workaround. var mounts []specs.Mount for _, mount := range spec.Mounts { - // Ignore all mounts that are under /sys, because we add /sys later. if strings.HasPrefix(mount.Destination, "/sys") { continue } - - // Remove all gid= and uid= mappings. - // Since we are already in userns, ideally we should not need to do this. - // https://github.com/opencontainers/runc/issues/1837 - var options []string - for _, option := range mount.Options { - if !strings.HasPrefix(option, "gid=") && !strings.HasPrefix(option, "uid=") { - options = append(options, option) - } - } - mount.Options = options mounts = append(mounts, mount) } - - // Add the sysfs mount as an rbind, because we can't mount /sys unless we have netns. - // TODO: keep original /sys mount when we have netns. - mounts = append(mounts, specs.Mount{ - Source: "/sys", - Destination: "/sys", - Type: "none", - Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"}, - }) spec.Mounts = mounts + + // Remove cgroups so as to avoid `container_linux.go:337: starting container process caused "process_linux.go:280: applying cgroup configuration for process caused \"mkdir /sys/fs/cgroup/cpuset/buildkit: permission denied\""` + spec.Linux.Resources = nil + spec.Linux.CgroupsPath = "" return nil } diff --git a/util/rootless/specconv/specconv_linux_test.go b/util/rootless/specconv/specconv_linux_test.go deleted file mode 100644 index 03b2041d553b..000000000000 --- a/util/rootless/specconv/specconv_linux_test.go +++ /dev/null @@ -1,42 +0,0 @@ -package specconv - -import ( - "testing" - - "github.com/opencontainers/runc/libcontainer/specconv" - "github.com/opencontainers/runc/libcontainer/user" - "github.com/opencontainers/runtime-spec/specs-go" - "github.com/stretchr/testify/require" -) - -func TestToRootless(t *testing.T) { - spec := specconv.Example() - uidMap := []user.IDMap{ - { - ID: 0, - ParentID: 4242, - Count: 1, - }, - { - ID: 1, - ParentID: 231072, - Count: 65536, - }, - } - gidMap := uidMap - expectedUIDMappings := []specs.LinuxIDMapping{ - { - HostID: 0, - ContainerID: 0, - Size: 1, - }, - { - HostID: 1, - ContainerID: 1, - Size: 65536, - }, - } - err := toRootless(spec, uidMap, gidMap) - require.NoError(t, err) - require.EqualValues(t, expectedUIDMappings, spec.Linux.UIDMappings) -} diff --git a/vendor.conf b/vendor.conf index f58360c6223e..070bff650925 100644 --- a/vendor.conf +++ b/vendor.conf @@ -18,7 +18,7 @@ github.com/gogo/googleapis b23578765ee54ff6bceff57f397d833bf4ca6869 github.com/golang/protobuf v1.1.0 github.com/containerd/continuity bd77b46c8352f74eb12c85bdc01f4b90f69d66b4 github.com/opencontainers/image-spec v1.0.1 -github.com/opencontainers/runc 00dc70017d222b178a002ed30e9321b12647af2d +github.com/opencontainers/runc a00bf0190895aa465a5fbed0268888e2c8ddfe85 github.com/Microsoft/go-winio v0.4.11 github.com/containerd/fifo 3d5202aec260678c48179c56f40e6f38a095738c github.com/opencontainers/runtime-spec eba862dc2470385a233c7507392675cbeadf7353 # v1.0.1-45-geba862d @@ -67,6 +67,3 @@ github.com/opentracing-contrib/go-stdlib b1a47cfbdd7543e70e9ef3e73d0802ad306cc1c # used by dockerfile tests gotest.tools v2.1.0 github.com/google/go-cmp v0.2.0 - -# used by rootless spec conv test -github.com/seccomp/libseccomp-golang 32f571b70023028bd57d9288c20efbcb237f3ce0 diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go deleted file mode 100644 index e0f3ca16533e..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go +++ /dev/null @@ -1,61 +0,0 @@ -package configs - -import "fmt" - -// blockIODevice holds major:minor format supported in blkio cgroup -type blockIODevice struct { - // Major is the device's major number - Major int64 `json:"major"` - // Minor is the device's minor number - Minor int64 `json:"minor"` -} - -// WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair -type WeightDevice struct { - blockIODevice - // Weight is the bandwidth rate for the device, range is from 10 to 1000 - Weight uint16 `json:"weight"` - // LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only - LeafWeight uint16 `json:"leafWeight"` -} - -// NewWeightDevice returns a configured WeightDevice pointer -func NewWeightDevice(major, minor int64, weight, leafWeight uint16) *WeightDevice { - wd := &WeightDevice{} - wd.Major = major - wd.Minor = minor - wd.Weight = weight - wd.LeafWeight = leafWeight - return wd -} - -// WeightString formats the struct to be writable to the cgroup specific file -func (wd *WeightDevice) WeightString() string { - return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight) -} - -// LeafWeightString formats the struct to be writable to the cgroup specific file -func (wd *WeightDevice) LeafWeightString() string { - return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight) -} - -// ThrottleDevice struct holds a `major:minor rate_per_second` pair -type ThrottleDevice struct { - blockIODevice - // Rate is the IO rate limit per cgroup per device - Rate uint64 `json:"rate"` -} - -// NewThrottleDevice returns a configured ThrottleDevice pointer -func NewThrottleDevice(major, minor int64, rate uint64) *ThrottleDevice { - td := &ThrottleDevice{} - td.Major = major - td.Minor = minor - td.Rate = rate - return td -} - -// String formats the struct to be writable to the cgroup specific file -func (td *ThrottleDevice) String() string { - return fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate) -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go deleted file mode 100644 index e15a662f5228..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go +++ /dev/null @@ -1,122 +0,0 @@ -package configs - -type FreezerState string - -const ( - Undefined FreezerState = "" - Frozen FreezerState = "FROZEN" - Thawed FreezerState = "THAWED" -) - -type Cgroup struct { - // Deprecated, use Path instead - Name string `json:"name,omitempty"` - - // name of parent of cgroup or slice - // Deprecated, use Path instead - Parent string `json:"parent,omitempty"` - - // Path specifies the path to cgroups that are created and/or joined by the container. - // The path is assumed to be relative to the host system cgroup mountpoint. - Path string `json:"path"` - - // ScopePrefix describes prefix for the scope name - ScopePrefix string `json:"scope_prefix"` - - // Paths represent the absolute cgroups paths to join. - // This takes precedence over Path. - Paths map[string]string - - // Resources contains various cgroups settings to apply - *Resources -} - -type Resources struct { - // If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list. - // Deprecated - AllowAllDevices *bool `json:"allow_all_devices,omitempty"` - // Deprecated - AllowedDevices []*Device `json:"allowed_devices,omitempty"` - // Deprecated - DeniedDevices []*Device `json:"denied_devices,omitempty"` - - Devices []*Device `json:"devices"` - - // Memory limit (in bytes) - Memory int64 `json:"memory"` - - // Memory reservation or soft_limit (in bytes) - MemoryReservation int64 `json:"memory_reservation"` - - // Total memory usage (memory + swap); set `-1` to enable unlimited swap - MemorySwap int64 `json:"memory_swap"` - - // Kernel memory limit (in bytes) - KernelMemory int64 `json:"kernel_memory"` - - // Kernel memory limit for TCP use (in bytes) - KernelMemoryTCP int64 `json:"kernel_memory_tcp"` - - // CPU shares (relative weight vs. other containers) - CpuShares uint64 `json:"cpu_shares"` - - // CPU hardcap limit (in usecs). Allowed cpu time in a given period. - CpuQuota int64 `json:"cpu_quota"` - - // CPU period to be used for hardcapping (in usecs). 0 to use system default. - CpuPeriod uint64 `json:"cpu_period"` - - // How many time CPU will use in realtime scheduling (in usecs). - CpuRtRuntime int64 `json:"cpu_rt_quota"` - - // CPU period to be used for realtime scheduling (in usecs). - CpuRtPeriod uint64 `json:"cpu_rt_period"` - - // CPU to use - CpusetCpus string `json:"cpuset_cpus"` - - // MEM to use - CpusetMems string `json:"cpuset_mems"` - - // Process limit; set <= `0' to disable limit. - PidsLimit int64 `json:"pids_limit"` - - // Specifies per cgroup weight, range is from 10 to 1000. - BlkioWeight uint16 `json:"blkio_weight"` - - // Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only - BlkioLeafWeight uint16 `json:"blkio_leaf_weight"` - - // Weight per cgroup per device, can override BlkioWeight. - BlkioWeightDevice []*WeightDevice `json:"blkio_weight_device"` - - // IO read rate limit per cgroup per device, bytes per second. - BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"` - - // IO write rate limit per cgroup per device, bytes per second. - BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"` - - // IO read rate limit per cgroup per device, IO per second. - BlkioThrottleReadIOPSDevice []*ThrottleDevice `json:"blkio_throttle_read_iops_device"` - - // IO write rate limit per cgroup per device, IO per second. - BlkioThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkio_throttle_write_iops_device"` - - // set the freeze value for the process - Freezer FreezerState `json:"freezer"` - - // Hugetlb limit (in bytes) - HugetlbLimit []*HugepageLimit `json:"hugetlb_limit"` - - // Whether to disable OOM Killer - OomKillDisable bool `json:"oom_kill_disable"` - - // Tuning swappiness behaviour per cgroup - MemorySwappiness *uint64 `json:"memory_swappiness"` - - // Set priority of network traffic for container - NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"` - - // Set class identifier for container's network packets - NetClsClassid uint32 `json:"net_cls_classid_u"` -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_windows.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_windows.go deleted file mode 100644 index d74847b0db83..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_windows.go +++ /dev/null @@ -1,6 +0,0 @@ -package configs - -// TODO Windows: This can ultimately be entirely factored out on Windows as -// cgroups are a Unix-specific construct. -type Cgroup struct { -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go deleted file mode 100644 index b1c4762fe20c..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go +++ /dev/null @@ -1,349 +0,0 @@ -package configs - -import ( - "bytes" - "encoding/json" - "fmt" - "os/exec" - "time" - - "github.com/opencontainers/runtime-spec/specs-go" - - "github.com/sirupsen/logrus" -) - -type Rlimit struct { - Type int `json:"type"` - Hard uint64 `json:"hard"` - Soft uint64 `json:"soft"` -} - -// IDMap represents UID/GID Mappings for User Namespaces. -type IDMap struct { - ContainerID int `json:"container_id"` - HostID int `json:"host_id"` - Size int `json:"size"` -} - -// Seccomp represents syscall restrictions -// By default, only the native architecture of the kernel is allowed to be used -// for syscalls. Additional architectures can be added by specifying them in -// Architectures. -type Seccomp struct { - DefaultAction Action `json:"default_action"` - Architectures []string `json:"architectures"` - Syscalls []*Syscall `json:"syscalls"` -} - -// Action is taken upon rule match in Seccomp -type Action int - -const ( - Kill Action = iota + 1 - Errno - Trap - Allow - Trace -) - -// Operator is a comparison operator to be used when matching syscall arguments in Seccomp -type Operator int - -const ( - EqualTo Operator = iota + 1 - NotEqualTo - GreaterThan - GreaterThanOrEqualTo - LessThan - LessThanOrEqualTo - MaskEqualTo -) - -// Arg is a rule to match a specific syscall argument in Seccomp -type Arg struct { - Index uint `json:"index"` - Value uint64 `json:"value"` - ValueTwo uint64 `json:"value_two"` - Op Operator `json:"op"` -} - -// Syscall is a rule to match a syscall in Seccomp -type Syscall struct { - Name string `json:"name"` - Action Action `json:"action"` - Args []*Arg `json:"args"` -} - -// TODO Windows. Many of these fields should be factored out into those parts -// which are common across platforms, and those which are platform specific. - -// Config defines configuration options for executing a process inside a contained environment. -type Config struct { - // NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs - // This is a common option when the container is running in ramdisk - NoPivotRoot bool `json:"no_pivot_root"` - - // ParentDeathSignal specifies the signal that is sent to the container's process in the case - // that the parent process dies. - ParentDeathSignal int `json:"parent_death_signal"` - - // Path to a directory containing the container's root filesystem. - Rootfs string `json:"rootfs"` - - // Readonlyfs will remount the container's rootfs as readonly where only externally mounted - // bind mounts are writtable. - Readonlyfs bool `json:"readonlyfs"` - - // Specifies the mount propagation flags to be applied to /. - RootPropagation int `json:"rootPropagation"` - - // Mounts specify additional source and destination paths that will be mounted inside the container's - // rootfs and mount namespace if specified - Mounts []*Mount `json:"mounts"` - - // The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well! - Devices []*Device `json:"devices"` - - MountLabel string `json:"mount_label"` - - // Hostname optionally sets the container's hostname if provided - Hostname string `json:"hostname"` - - // Namespaces specifies the container's namespaces that it should setup when cloning the init process - // If a namespace is not provided that namespace is shared from the container's parent process - Namespaces Namespaces `json:"namespaces"` - - // Capabilities specify the capabilities to keep when executing the process inside the container - // All capabilities not specified will be dropped from the processes capability mask - Capabilities *Capabilities `json:"capabilities"` - - // Networks specifies the container's network setup to be created - Networks []*Network `json:"networks"` - - // Routes can be specified to create entries in the route table as the container is started - Routes []*Route `json:"routes"` - - // Cgroups specifies specific cgroup settings for the various subsystems that the container is - // placed into to limit the resources the container has available - Cgroups *Cgroup `json:"cgroups"` - - // AppArmorProfile specifies the profile to apply to the process running in the container and is - // change at the time the process is execed - AppArmorProfile string `json:"apparmor_profile,omitempty"` - - // ProcessLabel specifies the label to apply to the process running in the container. It is - // commonly used by selinux - ProcessLabel string `json:"process_label,omitempty"` - - // Rlimits specifies the resource limits, such as max open files, to set in the container - // If Rlimits are not set, the container will inherit rlimits from the parent process - Rlimits []Rlimit `json:"rlimits,omitempty"` - - // OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores - // for a process. Valid values are between the range [-1000, '1000'], where processes with - // higher scores are preferred for being killed. If it is unset then we don't touch the current - // value. - // More information about kernel oom score calculation here: https://lwn.net/Articles/317814/ - OomScoreAdj *int `json:"oom_score_adj,omitempty"` - - // UidMappings is an array of User ID mappings for User Namespaces - UidMappings []IDMap `json:"uid_mappings"` - - // GidMappings is an array of Group ID mappings for User Namespaces - GidMappings []IDMap `json:"gid_mappings"` - - // MaskPaths specifies paths within the container's rootfs to mask over with a bind - // mount pointing to /dev/null as to prevent reads of the file. - MaskPaths []string `json:"mask_paths"` - - // ReadonlyPaths specifies paths within the container's rootfs to remount as read-only - // so that these files prevent any writes. - ReadonlyPaths []string `json:"readonly_paths"` - - // Sysctl is a map of properties and their values. It is the equivalent of using - // sysctl -w my.property.name value in Linux. - Sysctl map[string]string `json:"sysctl"` - - // Seccomp allows actions to be taken whenever a syscall is made within the container. - // A number of rules are given, each having an action to be taken if a syscall matches it. - // A default action to be taken if no rules match is also given. - Seccomp *Seccomp `json:"seccomp"` - - // NoNewPrivileges controls whether processes in the container can gain additional privileges. - NoNewPrivileges bool `json:"no_new_privileges,omitempty"` - - // Hooks are a collection of actions to perform at various container lifecycle events. - // CommandHooks are serialized to JSON, but other hooks are not. - Hooks *Hooks - - // Version is the version of opencontainer specification that is supported. - Version string `json:"version"` - - // Labels are user defined metadata that is stored in the config and populated on the state - Labels []string `json:"labels"` - - // NoNewKeyring will not allocated a new session keyring for the container. It will use the - // callers keyring in this case. - NoNewKeyring bool `json:"no_new_keyring"` - - // Rootless specifies whether the container is a rootless container. - Rootless bool `json:"rootless"` - - // IntelRdt specifies settings for Intel RDT/CAT group that the container is placed into - // to limit the resources (e.g., L3 cache) the container has available - IntelRdt *IntelRdt `json:"intel_rdt,omitempty"` -} - -type Hooks struct { - // Prestart commands are executed after the container namespaces are created, - // but before the user supplied command is executed from init. - Prestart []Hook - - // Poststart commands are executed after the container init process starts. - Poststart []Hook - - // Poststop commands are executed after the container init process exits. - Poststop []Hook -} - -type Capabilities struct { - // Bounding is the set of capabilities checked by the kernel. - Bounding []string - // Effective is the set of capabilities checked by the kernel. - Effective []string - // Inheritable is the capabilities preserved across execve. - Inheritable []string - // Permitted is the limiting superset for effective capabilities. - Permitted []string - // Ambient is the ambient set of capabilities that are kept. - Ambient []string -} - -func (hooks *Hooks) UnmarshalJSON(b []byte) error { - var state struct { - Prestart []CommandHook - Poststart []CommandHook - Poststop []CommandHook - } - - if err := json.Unmarshal(b, &state); err != nil { - return err - } - - deserialize := func(shooks []CommandHook) (hooks []Hook) { - for _, shook := range shooks { - hooks = append(hooks, shook) - } - - return hooks - } - - hooks.Prestart = deserialize(state.Prestart) - hooks.Poststart = deserialize(state.Poststart) - hooks.Poststop = deserialize(state.Poststop) - return nil -} - -func (hooks Hooks) MarshalJSON() ([]byte, error) { - serialize := func(hooks []Hook) (serializableHooks []CommandHook) { - for _, hook := range hooks { - switch chook := hook.(type) { - case CommandHook: - serializableHooks = append(serializableHooks, chook) - default: - logrus.Warnf("cannot serialize hook of type %T, skipping", hook) - } - } - - return serializableHooks - } - - return json.Marshal(map[string]interface{}{ - "prestart": serialize(hooks.Prestart), - "poststart": serialize(hooks.Poststart), - "poststop": serialize(hooks.Poststop), - }) -} - -// HookState is the payload provided to a hook on execution. -type HookState specs.State - -type Hook interface { - // Run executes the hook with the provided state. - Run(HookState) error -} - -// NewFunctionHook will call the provided function when the hook is run. -func NewFunctionHook(f func(HookState) error) FuncHook { - return FuncHook{ - run: f, - } -} - -type FuncHook struct { - run func(HookState) error -} - -func (f FuncHook) Run(s HookState) error { - return f.run(s) -} - -type Command struct { - Path string `json:"path"` - Args []string `json:"args"` - Env []string `json:"env"` - Dir string `json:"dir"` - Timeout *time.Duration `json:"timeout"` -} - -// NewCommandHook will execute the provided command when the hook is run. -func NewCommandHook(cmd Command) CommandHook { - return CommandHook{ - Command: cmd, - } -} - -type CommandHook struct { - Command -} - -func (c Command) Run(s HookState) error { - b, err := json.Marshal(s) - if err != nil { - return err - } - var stdout, stderr bytes.Buffer - cmd := exec.Cmd{ - Path: c.Path, - Args: c.Args, - Env: c.Env, - Stdin: bytes.NewReader(b), - Stdout: &stdout, - Stderr: &stderr, - } - if err := cmd.Start(); err != nil { - return err - } - errC := make(chan error, 1) - go func() { - err := cmd.Wait() - if err != nil { - err = fmt.Errorf("error running hook: %v, stdout: %s, stderr: %s", err, stdout.String(), stderr.String()) - } - errC <- err - }() - var timerCh <-chan time.Time - if c.Timeout != nil { - timer := time.NewTimer(*c.Timeout) - defer timer.Stop() - timerCh = timer.C - } - select { - case err := <-errC: - return err - case <-timerCh: - cmd.Process.Kill() - cmd.Wait() - return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds()) - } -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go deleted file mode 100644 index 07da10804540..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go +++ /dev/null @@ -1,61 +0,0 @@ -package configs - -import "fmt" - -// HostUID gets the translated uid for the process on host which could be -// different when user namespaces are enabled. -func (c Config) HostUID(containerId int) (int, error) { - if c.Namespaces.Contains(NEWUSER) { - if c.UidMappings == nil { - return -1, fmt.Errorf("User namespaces enabled, but no uid mappings found.") - } - id, found := c.hostIDFromMapping(containerId, c.UidMappings) - if !found { - return -1, fmt.Errorf("User namespaces enabled, but no user mapping found.") - } - return id, nil - } - // Return unchanged id. - return containerId, nil -} - -// HostRootUID gets the root uid for the process on host which could be non-zero -// when user namespaces are enabled. -func (c Config) HostRootUID() (int, error) { - return c.HostUID(0) -} - -// HostGID gets the translated gid for the process on host which could be -// different when user namespaces are enabled. -func (c Config) HostGID(containerId int) (int, error) { - if c.Namespaces.Contains(NEWUSER) { - if c.GidMappings == nil { - return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.") - } - id, found := c.hostIDFromMapping(containerId, c.GidMappings) - if !found { - return -1, fmt.Errorf("User namespaces enabled, but no group mapping found.") - } - return id, nil - } - // Return unchanged id. - return containerId, nil -} - -// HostRootGID gets the root gid for the process on host which could be non-zero -// when user namespaces are enabled. -func (c Config) HostRootGID() (int, error) { - return c.HostGID(0) -} - -// Utility function that gets a host ID for a container ID from user namespace map -// if that ID is present in the map. -func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) { - for _, m := range uMap { - if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) { - hostID := m.HostID + (containerID - m.ContainerID) - return hostID, true - } - } - return -1, false -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/device.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/device.go deleted file mode 100644 index 8701bb212d7b..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/device.go +++ /dev/null @@ -1,57 +0,0 @@ -package configs - -import ( - "fmt" - "os" -) - -const ( - Wildcard = -1 -) - -// TODO Windows: This can be factored out in the future - -type Device struct { - // Device type, block, char, etc. - Type rune `json:"type"` - - // Path to the device. - Path string `json:"path"` - - // Major is the device's major number. - Major int64 `json:"major"` - - // Minor is the device's minor number. - Minor int64 `json:"minor"` - - // Cgroup permissions format, rwm. - Permissions string `json:"permissions"` - - // FileMode permission bits for the device. - FileMode os.FileMode `json:"file_mode"` - - // Uid of the device. - Uid uint32 `json:"uid"` - - // Gid of the device. - Gid uint32 `json:"gid"` - - // Write the file to the allowed list - Allow bool `json:"allow"` -} - -func (d *Device) CgroupString() string { - return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions) -} - -func (d *Device) Mkdev() int { - return int((d.Major << 8) | (d.Minor & 0xff) | ((d.Minor & 0xfff00) << 12)) -} - -// deviceNumberString converts the device number to a string return result. -func deviceNumberString(number int64) string { - if number == Wildcard { - return "*" - } - return fmt.Sprint(number) -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go deleted file mode 100644 index e4f423c523ff..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go +++ /dev/null @@ -1,111 +0,0 @@ -// +build linux - -package configs - -var ( - // DefaultSimpleDevices are devices that are to be both allowed and created. - DefaultSimpleDevices = []*Device{ - // /dev/null and zero - { - Path: "/dev/null", - Type: 'c', - Major: 1, - Minor: 3, - Permissions: "rwm", - FileMode: 0666, - }, - { - Path: "/dev/zero", - Type: 'c', - Major: 1, - Minor: 5, - Permissions: "rwm", - FileMode: 0666, - }, - - { - Path: "/dev/full", - Type: 'c', - Major: 1, - Minor: 7, - Permissions: "rwm", - FileMode: 0666, - }, - - // consoles and ttys - { - Path: "/dev/tty", - Type: 'c', - Major: 5, - Minor: 0, - Permissions: "rwm", - FileMode: 0666, - }, - - // /dev/urandom,/dev/random - { - Path: "/dev/urandom", - Type: 'c', - Major: 1, - Minor: 9, - Permissions: "rwm", - FileMode: 0666, - }, - { - Path: "/dev/random", - Type: 'c', - Major: 1, - Minor: 8, - Permissions: "rwm", - FileMode: 0666, - }, - } - DefaultAllowedDevices = append([]*Device{ - // allow mknod for any device - { - Type: 'c', - Major: Wildcard, - Minor: Wildcard, - Permissions: "m", - }, - { - Type: 'b', - Major: Wildcard, - Minor: Wildcard, - Permissions: "m", - }, - - { - Path: "/dev/console", - Type: 'c', - Major: 5, - Minor: 1, - Permissions: "rwm", - }, - // /dev/pts/ - pts namespaces are "coming soon" - { - Path: "", - Type: 'c', - Major: 136, - Minor: Wildcard, - Permissions: "rwm", - }, - { - Path: "", - Type: 'c', - Major: 5, - Minor: 2, - Permissions: "rwm", - }, - - // tuntap - { - Path: "", - Type: 'c', - Major: 10, - Minor: 200, - Permissions: "rwm", - }, - }, DefaultSimpleDevices...) - DefaultAutoCreatedDevices = append([]*Device{}, DefaultSimpleDevices...) -) diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go deleted file mode 100644 index d30216380b50..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go +++ /dev/null @@ -1,9 +0,0 @@ -package configs - -type HugepageLimit struct { - // which type of hugepage to limit. - Pagesize string `json:"page_size"` - - // usage limit for hugepage. - Limit uint64 `json:"limit"` -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go deleted file mode 100644 index 36bd5f96a112..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go +++ /dev/null @@ -1,7 +0,0 @@ -package configs - -type IntelRdt struct { - // The schema for L3 cache id and capacity bitmask (CBM) - // Format: "L3:=;=;..." - L3CacheSchema string `json:"l3_cache_schema,omitempty"` -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go deleted file mode 100644 index 9a0395eaf5ff..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go +++ /dev/null @@ -1,14 +0,0 @@ -package configs - -import ( - "fmt" -) - -type IfPrioMap struct { - Interface string `json:"interface"` - Priority int64 `json:"priority"` -} - -func (i *IfPrioMap) CgroupString() string { - return fmt.Sprintf("%s %d", i.Interface, i.Priority) -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go deleted file mode 100644 index 670757ddb5f4..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go +++ /dev/null @@ -1,39 +0,0 @@ -package configs - -const ( - // EXT_COPYUP is a directive to copy up the contents of a directory when - // a tmpfs is mounted over it. - EXT_COPYUP = 1 << iota -) - -type Mount struct { - // Source path for the mount. - Source string `json:"source"` - - // Destination path for the mount inside the container. - Destination string `json:"destination"` - - // Device the mount is for. - Device string `json:"device"` - - // Mount flags. - Flags int `json:"flags"` - - // Propagation Flags - PropagationFlags []int `json:"propagation_flags"` - - // Mount data applied to the mount. - Data string `json:"data"` - - // Relabel source if set, "z" indicates shared, "Z" indicates unshared. - Relabel string `json:"relabel"` - - // Extensions are additional flags that are specific to runc. - Extensions int `json:"extensions"` - - // Optional Command to be run before Source is mounted. - PremountCmds []Command `json:"premount_cmds"` - - // Optional Command to be run after Source is mounted. - PostmountCmds []Command `json:"postmount_cmds"` -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go deleted file mode 100644 index a3329a31a9cb..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go +++ /dev/null @@ -1,5 +0,0 @@ -package configs - -type NamespaceType string - -type Namespaces []Namespace diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go deleted file mode 100644 index 5fc171a57b36..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go +++ /dev/null @@ -1,122 +0,0 @@ -package configs - -import ( - "fmt" - "os" - "sync" -) - -const ( - NEWNET NamespaceType = "NEWNET" - NEWPID NamespaceType = "NEWPID" - NEWNS NamespaceType = "NEWNS" - NEWUTS NamespaceType = "NEWUTS" - NEWIPC NamespaceType = "NEWIPC" - NEWUSER NamespaceType = "NEWUSER" -) - -var ( - nsLock sync.Mutex - supportedNamespaces = make(map[NamespaceType]bool) -) - -// NsName converts the namespace type to its filename -func NsName(ns NamespaceType) string { - switch ns { - case NEWNET: - return "net" - case NEWNS: - return "mnt" - case NEWPID: - return "pid" - case NEWIPC: - return "ipc" - case NEWUSER: - return "user" - case NEWUTS: - return "uts" - } - return "" -} - -// IsNamespaceSupported returns whether a namespace is available or -// not -func IsNamespaceSupported(ns NamespaceType) bool { - nsLock.Lock() - defer nsLock.Unlock() - supported, ok := supportedNamespaces[ns] - if ok { - return supported - } - nsFile := NsName(ns) - // if the namespace type is unknown, just return false - if nsFile == "" { - return false - } - _, err := os.Stat(fmt.Sprintf("/proc/self/ns/%s", nsFile)) - // a namespace is supported if it exists and we have permissions to read it - supported = err == nil - supportedNamespaces[ns] = supported - return supported -} - -func NamespaceTypes() []NamespaceType { - return []NamespaceType{ - NEWUSER, // Keep user NS always first, don't move it. - NEWIPC, - NEWUTS, - NEWNET, - NEWPID, - NEWNS, - } -} - -// Namespace defines configuration for each namespace. It specifies an -// alternate path that is able to be joined via setns. -type Namespace struct { - Type NamespaceType `json:"type"` - Path string `json:"path"` -} - -func (n *Namespace) GetPath(pid int) string { - return fmt.Sprintf("/proc/%d/ns/%s", pid, NsName(n.Type)) -} - -func (n *Namespaces) Remove(t NamespaceType) bool { - i := n.index(t) - if i == -1 { - return false - } - *n = append((*n)[:i], (*n)[i+1:]...) - return true -} - -func (n *Namespaces) Add(t NamespaceType, path string) { - i := n.index(t) - if i == -1 { - *n = append(*n, Namespace{Type: t, Path: path}) - return - } - (*n)[i].Path = path -} - -func (n *Namespaces) index(t NamespaceType) int { - for i, ns := range *n { - if ns.Type == t { - return i - } - } - return -1 -} - -func (n *Namespaces) Contains(t NamespaceType) bool { - return n.index(t) != -1 -} - -func (n *Namespaces) PathOf(t NamespaceType) string { - i := n.index(t) - if i == -1 { - return "" - } - return (*n)[i].Path -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go deleted file mode 100644 index 4ce6813d2330..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go +++ /dev/null @@ -1,31 +0,0 @@ -// +build linux - -package configs - -import "golang.org/x/sys/unix" - -func (n *Namespace) Syscall() int { - return namespaceInfo[n.Type] -} - -var namespaceInfo = map[NamespaceType]int{ - NEWNET: unix.CLONE_NEWNET, - NEWNS: unix.CLONE_NEWNS, - NEWUSER: unix.CLONE_NEWUSER, - NEWIPC: unix.CLONE_NEWIPC, - NEWUTS: unix.CLONE_NEWUTS, - NEWPID: unix.CLONE_NEWPID, -} - -// CloneFlags parses the container's Namespaces options to set the correct -// flags on clone, unshare. This function returns flags only for new namespaces. -func (n *Namespaces) CloneFlags() uintptr { - var flag int - for _, v := range *n { - if v.Path != "" { - continue - } - flag |= namespaceInfo[v.Type] - } - return uintptr(flag) -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go deleted file mode 100644 index 5d9a5c81f3fd..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go +++ /dev/null @@ -1,13 +0,0 @@ -// +build !linux,!windows - -package configs - -func (n *Namespace) Syscall() int { - panic("No namespace syscall support") -} - -// CloneFlags parses the container's Namespaces options to set the correct -// flags on clone, unshare. This function returns flags only for new namespaces. -func (n *Namespaces) CloneFlags() uintptr { - panic("No namespace syscall support") -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go deleted file mode 100644 index 19bf713de3a3..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go +++ /dev/null @@ -1,8 +0,0 @@ -// +build !linux - -package configs - -// Namespace defines configuration for each namespace. It specifies an -// alternate path that is able to be joined via setns. -type Namespace struct { -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/network.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/network.go deleted file mode 100644 index ccdb228e14c8..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/network.go +++ /dev/null @@ -1,72 +0,0 @@ -package configs - -// Network defines configuration for a container's networking stack -// -// The network configuration can be omitted from a container causing the -// container to be setup with the host's networking stack -type Network struct { - // Type sets the networks type, commonly veth and loopback - Type string `json:"type"` - - // Name of the network interface - Name string `json:"name"` - - // The bridge to use. - Bridge string `json:"bridge"` - - // MacAddress contains the MAC address to set on the network interface - MacAddress string `json:"mac_address"` - - // Address contains the IPv4 and mask to set on the network interface - Address string `json:"address"` - - // Gateway sets the gateway address that is used as the default for the interface - Gateway string `json:"gateway"` - - // IPv6Address contains the IPv6 and mask to set on the network interface - IPv6Address string `json:"ipv6_address"` - - // IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface - IPv6Gateway string `json:"ipv6_gateway"` - - // Mtu sets the mtu value for the interface and will be mirrored on both the host and - // container's interfaces if a pair is created, specifically in the case of type veth - // Note: This does not apply to loopback interfaces. - Mtu int `json:"mtu"` - - // TxQueueLen sets the tx_queuelen value for the interface and will be mirrored on both the host and - // container's interfaces if a pair is created, specifically in the case of type veth - // Note: This does not apply to loopback interfaces. - TxQueueLen int `json:"txqueuelen"` - - // HostInterfaceName is a unique name of a veth pair that resides on in the host interface of the - // container. - HostInterfaceName string `json:"host_interface_name"` - - // HairpinMode specifies if hairpin NAT should be enabled on the virtual interface - // bridge port in the case of type veth - // Note: This is unsupported on some systems. - // Note: This does not apply to loopback interfaces. - HairpinMode bool `json:"hairpin_mode"` -} - -// Routes can be specified to create entries in the route table as the container is started -// -// All of destination, source, and gateway should be either IPv4 or IPv6. -// One of the three options must be present, and omitted entries will use their -// IP family default for the route table. For IPv4 for example, setting the -// gateway to 1.2.3.4 and the interface to eth0 will set up a standard -// destination of 0.0.0.0(or *) when viewed in the route table. -type Route struct { - // Sets the destination and mask, should be a CIDR. Accepts IPv4 and IPv6 - Destination string `json:"destination"` - - // Sets the source and mask, should be a CIDR. Accepts IPv4 and IPv6 - Source string `json:"source"` - - // Sets the gateway. Accepts IPv4 and IPv6 - Gateway string `json:"gateway"` - - // The device to set this route up for, for example: eth0 - InterfaceName string `json:"interface_name"` -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c index cb2243147351..d7cb0af030eb 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c +++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c @@ -82,7 +82,7 @@ struct nlconfig_t { uint8_t is_setgroup; /* Rootless container settings. */ - uint8_t is_rootless; + uint8_t is_rootless_euid; /* boolean */ char *uidmappath; size_t uidmappath_len; char *gidmappath; @@ -100,7 +100,7 @@ struct nlconfig_t { #define GIDMAP_ATTR 27284 #define SETGROUP_ATTR 27285 #define OOM_SCORE_ADJ_ATTR 27286 -#define ROOTLESS_ATTR 27287 +#define ROOTLESS_EUID_ATTR 27287 #define UIDMAPPATH_ATTR 27288 #define GIDMAPPATH_ATTR 27289 @@ -419,8 +419,8 @@ static void nl_parse(int fd, struct nlconfig_t *config) case CLONE_FLAGS_ATTR: config->cloneflags = readint32(current); break; - case ROOTLESS_ATTR: - config->is_rootless = readint8(current); + case ROOTLESS_EUID_ATTR: + config->is_rootless_euid = readint8(current); /* boolean */ break; case OOM_SCORE_ADJ_ATTR: config->oom_score_adj = current; @@ -687,7 +687,7 @@ void nsexec(void) * newuidmap/newgidmap shall be used. */ - if (config.is_rootless && !config.is_setgroup) + if (config.is_rootless_euid && !config.is_setgroup) update_setgroups(child, SETGROUPS_DENY); /* Set up mappings. */ @@ -953,7 +953,7 @@ void nsexec(void) if (setgid(0) < 0) bail("setgid failed"); - if (!config.is_rootless && config.is_setgroup) { + if (!config.is_rootless_euid && config.is_setgroup) { if (setgroups(0, NULL) < 0) bail("setgroups failed"); } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/config.go b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/config.go deleted file mode 100644 index ded5a6bbc8bb..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/config.go +++ /dev/null @@ -1,76 +0,0 @@ -package seccomp - -import ( - "fmt" - - "github.com/opencontainers/runc/libcontainer/configs" -) - -var operators = map[string]configs.Operator{ - "SCMP_CMP_NE": configs.NotEqualTo, - "SCMP_CMP_LT": configs.LessThan, - "SCMP_CMP_LE": configs.LessThanOrEqualTo, - "SCMP_CMP_EQ": configs.EqualTo, - "SCMP_CMP_GE": configs.GreaterThanOrEqualTo, - "SCMP_CMP_GT": configs.GreaterThan, - "SCMP_CMP_MASKED_EQ": configs.MaskEqualTo, -} - -var actions = map[string]configs.Action{ - "SCMP_ACT_KILL": configs.Kill, - "SCMP_ACT_ERRNO": configs.Errno, - "SCMP_ACT_TRAP": configs.Trap, - "SCMP_ACT_ALLOW": configs.Allow, - "SCMP_ACT_TRACE": configs.Trace, -} - -var archs = map[string]string{ - "SCMP_ARCH_X86": "x86", - "SCMP_ARCH_X86_64": "amd64", - "SCMP_ARCH_X32": "x32", - "SCMP_ARCH_ARM": "arm", - "SCMP_ARCH_AARCH64": "arm64", - "SCMP_ARCH_MIPS": "mips", - "SCMP_ARCH_MIPS64": "mips64", - "SCMP_ARCH_MIPS64N32": "mips64n32", - "SCMP_ARCH_MIPSEL": "mipsel", - "SCMP_ARCH_MIPSEL64": "mipsel64", - "SCMP_ARCH_MIPSEL64N32": "mipsel64n32", - "SCMP_ARCH_PPC": "ppc", - "SCMP_ARCH_PPC64": "ppc64", - "SCMP_ARCH_PPC64LE": "ppc64le", - "SCMP_ARCH_S390": "s390", - "SCMP_ARCH_S390X": "s390x", -} - -// ConvertStringToOperator converts a string into a Seccomp comparison operator. -// Comparison operators use the names they are assigned by Libseccomp's header. -// Attempting to convert a string that is not a valid operator results in an -// error. -func ConvertStringToOperator(in string) (configs.Operator, error) { - if op, ok := operators[in]; ok == true { - return op, nil - } - return 0, fmt.Errorf("string %s is not a valid operator for seccomp", in) -} - -// ConvertStringToAction converts a string into a Seccomp rule match action. -// Actions use the names they are assigned in Libseccomp's header, though some -// (notable, SCMP_ACT_TRACE) are not available in this implementation and will -// return errors. -// Attempting to convert a string that is not a valid action results in an -// error. -func ConvertStringToAction(in string) (configs.Action, error) { - if act, ok := actions[in]; ok == true { - return act, nil - } - return 0, fmt.Errorf("string %s is not a valid action for seccomp", in) -} - -// ConvertStringToArch converts a string into a Seccomp comparison arch. -func ConvertStringToArch(in string) (string, error) { - if arch, ok := archs[in]; ok == true { - return arch, nil - } - return "", fmt.Errorf("string %s is not a valid arch for seccomp", in) -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go deleted file mode 100644 index d99f3fe640c6..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go +++ /dev/null @@ -1,258 +0,0 @@ -// +build linux,cgo,seccomp - -package seccomp - -import ( - "bufio" - "fmt" - "os" - "strings" - - "github.com/opencontainers/runc/libcontainer/configs" - libseccomp "github.com/seccomp/libseccomp-golang" - - "golang.org/x/sys/unix" -) - -var ( - actAllow = libseccomp.ActAllow - actTrap = libseccomp.ActTrap - actKill = libseccomp.ActKill - actTrace = libseccomp.ActTrace.SetReturnCode(int16(unix.EPERM)) - actErrno = libseccomp.ActErrno.SetReturnCode(int16(unix.EPERM)) -) - -const ( - // Linux system calls can have at most 6 arguments - syscallMaxArguments int = 6 -) - -// Filters given syscalls in a container, preventing them from being used -// Started in the container init process, and carried over to all child processes -// Setns calls, however, require a separate invocation, as they are not children -// of the init until they join the namespace -func InitSeccomp(config *configs.Seccomp) error { - if config == nil { - return fmt.Errorf("cannot initialize Seccomp - nil config passed") - } - - defaultAction, err := getAction(config.DefaultAction) - if err != nil { - return fmt.Errorf("error initializing seccomp - invalid default action") - } - - filter, err := libseccomp.NewFilter(defaultAction) - if err != nil { - return fmt.Errorf("error creating filter: %s", err) - } - - // Add extra architectures - for _, arch := range config.Architectures { - scmpArch, err := libseccomp.GetArchFromString(arch) - if err != nil { - return fmt.Errorf("error validating Seccomp architecture: %s", err) - } - - if err := filter.AddArch(scmpArch); err != nil { - return fmt.Errorf("error adding architecture to seccomp filter: %s", err) - } - } - - // Unset no new privs bit - if err := filter.SetNoNewPrivsBit(false); err != nil { - return fmt.Errorf("error setting no new privileges: %s", err) - } - - // Add a rule for each syscall - for _, call := range config.Syscalls { - if call == nil { - return fmt.Errorf("encountered nil syscall while initializing Seccomp") - } - - if err = matchCall(filter, call); err != nil { - return err - } - } - - if err = filter.Load(); err != nil { - return fmt.Errorf("error loading seccomp filter into kernel: %s", err) - } - - return nil -} - -// IsEnabled returns if the kernel has been configured to support seccomp. -func IsEnabled() bool { - // Try to read from /proc/self/status for kernels > 3.8 - s, err := parseStatusFile("/proc/self/status") - if err != nil { - // Check if Seccomp is supported, via CONFIG_SECCOMP. - if err := unix.Prctl(unix.PR_GET_SECCOMP, 0, 0, 0, 0); err != unix.EINVAL { - // Make sure the kernel has CONFIG_SECCOMP_FILTER. - if err := unix.Prctl(unix.PR_SET_SECCOMP, unix.SECCOMP_MODE_FILTER, 0, 0, 0); err != unix.EINVAL { - return true - } - } - return false - } - _, ok := s["Seccomp"] - return ok -} - -// Convert Libcontainer Action to Libseccomp ScmpAction -func getAction(act configs.Action) (libseccomp.ScmpAction, error) { - switch act { - case configs.Kill: - return actKill, nil - case configs.Errno: - return actErrno, nil - case configs.Trap: - return actTrap, nil - case configs.Allow: - return actAllow, nil - case configs.Trace: - return actTrace, nil - default: - return libseccomp.ActInvalid, fmt.Errorf("invalid action, cannot use in rule") - } -} - -// Convert Libcontainer Operator to Libseccomp ScmpCompareOp -func getOperator(op configs.Operator) (libseccomp.ScmpCompareOp, error) { - switch op { - case configs.EqualTo: - return libseccomp.CompareEqual, nil - case configs.NotEqualTo: - return libseccomp.CompareNotEqual, nil - case configs.GreaterThan: - return libseccomp.CompareGreater, nil - case configs.GreaterThanOrEqualTo: - return libseccomp.CompareGreaterEqual, nil - case configs.LessThan: - return libseccomp.CompareLess, nil - case configs.LessThanOrEqualTo: - return libseccomp.CompareLessOrEqual, nil - case configs.MaskEqualTo: - return libseccomp.CompareMaskedEqual, nil - default: - return libseccomp.CompareInvalid, fmt.Errorf("invalid operator, cannot use in rule") - } -} - -// Convert Libcontainer Arg to Libseccomp ScmpCondition -func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) { - cond := libseccomp.ScmpCondition{} - - if arg == nil { - return cond, fmt.Errorf("cannot convert nil to syscall condition") - } - - op, err := getOperator(arg.Op) - if err != nil { - return cond, err - } - - return libseccomp.MakeCondition(arg.Index, op, arg.Value, arg.ValueTwo) -} - -// Add a rule to match a single syscall -func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error { - if call == nil || filter == nil { - return fmt.Errorf("cannot use nil as syscall to block") - } - - if len(call.Name) == 0 { - return fmt.Errorf("empty string is not a valid syscall") - } - - // If we can't resolve the syscall, assume it's not supported on this kernel - // Ignore it, don't error out - callNum, err := libseccomp.GetSyscallFromName(call.Name) - if err != nil { - return nil - } - - // Convert the call's action to the libseccomp equivalent - callAct, err := getAction(call.Action) - if err != nil { - return fmt.Errorf("action in seccomp profile is invalid: %s", err) - } - - // Unconditional match - just add the rule - if len(call.Args) == 0 { - if err = filter.AddRule(callNum, callAct); err != nil { - return fmt.Errorf("error adding seccomp filter rule for syscall %s: %s", call.Name, err) - } - } else { - // If two or more arguments have the same condition, - // Revert to old behavior, adding each condition as a separate rule - argCounts := make([]uint, syscallMaxArguments) - conditions := []libseccomp.ScmpCondition{} - - for _, cond := range call.Args { - newCond, err := getCondition(cond) - if err != nil { - return fmt.Errorf("error creating seccomp syscall condition for syscall %s: %s", call.Name, err) - } - - argCounts[cond.Index] += 1 - - conditions = append(conditions, newCond) - } - - hasMultipleArgs := false - for _, count := range argCounts { - if count > 1 { - hasMultipleArgs = true - break - } - } - - if hasMultipleArgs { - // Revert to old behavior - // Add each condition attached to a separate rule - for _, cond := range conditions { - condArr := []libseccomp.ScmpCondition{cond} - - if err = filter.AddRuleConditional(callNum, callAct, condArr); err != nil { - return fmt.Errorf("error adding seccomp rule for syscall %s: %s", call.Name, err) - } - } - } else { - // No conditions share same argument - // Use new, proper behavior - if err = filter.AddRuleConditional(callNum, callAct, conditions); err != nil { - return fmt.Errorf("error adding seccomp rule for syscall %s: %s", call.Name, err) - } - } - } - - return nil -} - -func parseStatusFile(path string) (map[string]string, error) { - f, err := os.Open(path) - if err != nil { - return nil, err - } - defer f.Close() - - s := bufio.NewScanner(f) - status := make(map[string]string) - - for s.Scan() { - text := s.Text() - parts := strings.Split(text, ":") - - if len(parts) <= 1 { - continue - } - - status[parts[0]] = parts[1] - } - if err := s.Err(); err != nil { - return nil, err - } - - return status, nil -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go deleted file mode 100644 index 44df1ad4c269..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go +++ /dev/null @@ -1,24 +0,0 @@ -// +build !linux !cgo !seccomp - -package seccomp - -import ( - "errors" - - "github.com/opencontainers/runc/libcontainer/configs" -) - -var ErrSeccompNotEnabled = errors.New("seccomp: config provided but seccomp not supported") - -// InitSeccomp does nothing because seccomp is not supported. -func InitSeccomp(config *configs.Seccomp) error { - if config != nil { - return ErrSeccompNotEnabled - } - return nil -} - -// IsEnabled returns false, because it is not supported. -func IsEnabled() bool { - return false -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/specconv/example.go b/vendor/github.com/opencontainers/runc/libcontainer/specconv/example.go deleted file mode 100644 index c113b337f34a..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/specconv/example.go +++ /dev/null @@ -1,221 +0,0 @@ -package specconv - -import ( - "os" - "strings" - - "github.com/opencontainers/runtime-spec/specs-go" -) - -// Example returns an example spec file, with many options set so a user can -// see what a standard spec file looks like. -func Example() *specs.Spec { - return &specs.Spec{ - Version: specs.Version, - Root: &specs.Root{ - Path: "rootfs", - Readonly: true, - }, - Process: &specs.Process{ - Terminal: true, - User: specs.User{}, - Args: []string{ - "sh", - }, - Env: []string{ - "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", - "TERM=xterm", - }, - Cwd: "/", - NoNewPrivileges: true, - Capabilities: &specs.LinuxCapabilities{ - Bounding: []string{ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE", - }, - Permitted: []string{ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE", - }, - Inheritable: []string{ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE", - }, - Ambient: []string{ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE", - }, - Effective: []string{ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE", - }, - }, - Rlimits: []specs.POSIXRlimit{ - { - Type: "RLIMIT_NOFILE", - Hard: uint64(1024), - Soft: uint64(1024), - }, - }, - }, - Hostname: "runc", - Mounts: []specs.Mount{ - { - Destination: "/proc", - Type: "proc", - Source: "proc", - Options: nil, - }, - { - Destination: "/dev", - Type: "tmpfs", - Source: "tmpfs", - Options: []string{"nosuid", "strictatime", "mode=755", "size=65536k"}, - }, - { - Destination: "/dev/pts", - Type: "devpts", - Source: "devpts", - Options: []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"}, - }, - { - Destination: "/dev/shm", - Type: "tmpfs", - Source: "shm", - Options: []string{"nosuid", "noexec", "nodev", "mode=1777", "size=65536k"}, - }, - { - Destination: "/dev/mqueue", - Type: "mqueue", - Source: "mqueue", - Options: []string{"nosuid", "noexec", "nodev"}, - }, - { - Destination: "/sys", - Type: "sysfs", - Source: "sysfs", - Options: []string{"nosuid", "noexec", "nodev", "ro"}, - }, - { - Destination: "/sys/fs/cgroup", - Type: "cgroup", - Source: "cgroup", - Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"}, - }, - }, - Linux: &specs.Linux{ - MaskedPaths: []string{ - "/proc/kcore", - "/proc/latency_stats", - "/proc/timer_list", - "/proc/timer_stats", - "/proc/sched_debug", - "/sys/firmware", - "/proc/scsi", - }, - ReadonlyPaths: []string{ - "/proc/asound", - "/proc/bus", - "/proc/fs", - "/proc/irq", - "/proc/sys", - "/proc/sysrq-trigger", - }, - Resources: &specs.LinuxResources{ - Devices: []specs.LinuxDeviceCgroup{ - { - Allow: false, - Access: "rwm", - }, - }, - }, - Namespaces: []specs.LinuxNamespace{ - { - Type: "pid", - }, - { - Type: "network", - }, - { - Type: "ipc", - }, - { - Type: "uts", - }, - { - Type: "mount", - }, - }, - }, - } -} - -// ToRootless converts the given spec file into one that should work with -// rootless containers, by removing incompatible options and adding others that -// are needed. -func ToRootless(spec *specs.Spec) { - var namespaces []specs.LinuxNamespace - - // Remove networkns from the spec. - for _, ns := range spec.Linux.Namespaces { - switch ns.Type { - case specs.NetworkNamespace, specs.UserNamespace: - // Do nothing. - default: - namespaces = append(namespaces, ns) - } - } - // Add userns to the spec. - namespaces = append(namespaces, specs.LinuxNamespace{ - Type: specs.UserNamespace, - }) - spec.Linux.Namespaces = namespaces - - // Add mappings for the current user. - spec.Linux.UIDMappings = []specs.LinuxIDMapping{{ - HostID: uint32(os.Geteuid()), - ContainerID: 0, - Size: 1, - }} - spec.Linux.GIDMappings = []specs.LinuxIDMapping{{ - HostID: uint32(os.Getegid()), - ContainerID: 0, - Size: 1, - }} - - // Fix up mounts. - var mounts []specs.Mount - for _, mount := range spec.Mounts { - // Ignore all mounts that are under /sys. - if strings.HasPrefix(mount.Destination, "/sys") { - continue - } - - // Remove all gid= and uid= mappings. - var options []string - for _, option := range mount.Options { - if !strings.HasPrefix(option, "gid=") && !strings.HasPrefix(option, "uid=") { - options = append(options, option) - } - } - - mount.Options = options - mounts = append(mounts, mount) - } - // Add the sysfs mount as an rbind. - mounts = append(mounts, specs.Mount{ - Source: "/sys", - Destination: "/sys", - Type: "none", - Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"}, - }) - spec.Mounts = mounts - - // Remove cgroup settings. - spec.Linux.Resources = nil -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/specconv/spec_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/specconv/spec_linux.go deleted file mode 100644 index 7951497efa3e..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/specconv/spec_linux.go +++ /dev/null @@ -1,836 +0,0 @@ -// +build linux - -// Package specconv implements conversion of specifications to libcontainer -// configurations -package specconv - -import ( - "fmt" - "os" - "path/filepath" - "strings" - "time" - - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/seccomp" - libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" - "github.com/opencontainers/runtime-spec/specs-go" - - "golang.org/x/sys/unix" -) - -const wildcard = -1 - -var namespaceMapping = map[specs.LinuxNamespaceType]configs.NamespaceType{ - specs.PIDNamespace: configs.NEWPID, - specs.NetworkNamespace: configs.NEWNET, - specs.MountNamespace: configs.NEWNS, - specs.UserNamespace: configs.NEWUSER, - specs.IPCNamespace: configs.NEWIPC, - specs.UTSNamespace: configs.NEWUTS, -} - -var mountPropagationMapping = map[string]int{ - "rprivate": unix.MS_PRIVATE | unix.MS_REC, - "private": unix.MS_PRIVATE, - "rslave": unix.MS_SLAVE | unix.MS_REC, - "slave": unix.MS_SLAVE, - "rshared": unix.MS_SHARED | unix.MS_REC, - "shared": unix.MS_SHARED, - "runbindable": unix.MS_UNBINDABLE | unix.MS_REC, - "unbindable": unix.MS_UNBINDABLE, - "": 0, -} - -var allowedDevices = []*configs.Device{ - // allow mknod for any device - { - Type: 'c', - Major: wildcard, - Minor: wildcard, - Permissions: "m", - Allow: true, - }, - { - Type: 'b', - Major: wildcard, - Minor: wildcard, - Permissions: "m", - Allow: true, - }, - { - Type: 'c', - Path: "/dev/null", - Major: 1, - Minor: 3, - Permissions: "rwm", - Allow: true, - }, - { - Type: 'c', - Path: "/dev/random", - Major: 1, - Minor: 8, - Permissions: "rwm", - Allow: true, - }, - { - Type: 'c', - Path: "/dev/full", - Major: 1, - Minor: 7, - Permissions: "rwm", - Allow: true, - }, - { - Type: 'c', - Path: "/dev/tty", - Major: 5, - Minor: 0, - Permissions: "rwm", - Allow: true, - }, - { - Type: 'c', - Path: "/dev/zero", - Major: 1, - Minor: 5, - Permissions: "rwm", - Allow: true, - }, - { - Type: 'c', - Path: "/dev/urandom", - Major: 1, - Minor: 9, - Permissions: "rwm", - Allow: true, - }, - { - Path: "/dev/console", - Type: 'c', - Major: 5, - Minor: 1, - Permissions: "rwm", - Allow: true, - }, - // /dev/pts/ - pts namespaces are "coming soon" - { - Path: "", - Type: 'c', - Major: 136, - Minor: wildcard, - Permissions: "rwm", - Allow: true, - }, - { - Path: "", - Type: 'c', - Major: 5, - Minor: 2, - Permissions: "rwm", - Allow: true, - }, - // tuntap - { - Path: "", - Type: 'c', - Major: 10, - Minor: 200, - Permissions: "rwm", - Allow: true, - }, -} - -type CreateOpts struct { - CgroupName string - UseSystemdCgroup bool - NoPivotRoot bool - NoNewKeyring bool - Spec *specs.Spec - Rootless bool -} - -// CreateLibcontainerConfig creates a new libcontainer configuration from a -// given specification and a cgroup name -func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { - // runc's cwd will always be the bundle path - rcwd, err := os.Getwd() - if err != nil { - return nil, err - } - cwd, err := filepath.Abs(rcwd) - if err != nil { - return nil, err - } - spec := opts.Spec - if spec.Root == nil { - return nil, fmt.Errorf("Root must be specified") - } - rootfsPath := spec.Root.Path - if !filepath.IsAbs(rootfsPath) { - rootfsPath = filepath.Join(cwd, rootfsPath) - } - labels := []string{} - for k, v := range spec.Annotations { - labels = append(labels, fmt.Sprintf("%s=%s", k, v)) - } - config := &configs.Config{ - Rootfs: rootfsPath, - NoPivotRoot: opts.NoPivotRoot, - Readonlyfs: spec.Root.Readonly, - Hostname: spec.Hostname, - Labels: append(labels, fmt.Sprintf("bundle=%s", cwd)), - NoNewKeyring: opts.NoNewKeyring, - Rootless: opts.Rootless, - } - - exists := false - for _, m := range spec.Mounts { - config.Mounts = append(config.Mounts, createLibcontainerMount(cwd, m)) - } - if err := createDevices(spec, config); err != nil { - return nil, err - } - c, err := createCgroupConfig(opts) - if err != nil { - return nil, err - } - config.Cgroups = c - // set linux-specific config - if spec.Linux != nil { - if config.RootPropagation, exists = mountPropagationMapping[spec.Linux.RootfsPropagation]; !exists { - return nil, fmt.Errorf("rootfsPropagation=%v is not supported", spec.Linux.RootfsPropagation) - } - if config.NoPivotRoot && (config.RootPropagation&unix.MS_PRIVATE != 0) { - return nil, fmt.Errorf("rootfsPropagation of [r]private is not safe without pivot_root") - } - - for _, ns := range spec.Linux.Namespaces { - t, exists := namespaceMapping[ns.Type] - if !exists { - return nil, fmt.Errorf("namespace %q does not exist", ns) - } - if config.Namespaces.Contains(t) { - return nil, fmt.Errorf("malformed spec file: duplicated ns %q", ns) - } - config.Namespaces.Add(t, ns.Path) - } - if config.Namespaces.Contains(configs.NEWNET) && config.Namespaces.PathOf(configs.NEWNET) == "" { - config.Networks = []*configs.Network{ - { - Type: "loopback", - }, - } - } - if config.Namespaces.Contains(configs.NEWUSER) { - if err := setupUserNamespace(spec, config); err != nil { - return nil, err - } - } - config.MaskPaths = spec.Linux.MaskedPaths - config.ReadonlyPaths = spec.Linux.ReadonlyPaths - config.MountLabel = spec.Linux.MountLabel - config.Sysctl = spec.Linux.Sysctl - if spec.Linux.Seccomp != nil { - seccomp, err := SetupSeccomp(spec.Linux.Seccomp) - if err != nil { - return nil, err - } - config.Seccomp = seccomp - } - } - if spec.Process.SelinuxLabel != "" { - config.ProcessLabel = spec.Process.SelinuxLabel - } - if spec.Process != nil { - config.OomScoreAdj = spec.Process.OOMScoreAdj - } - if spec.Process.Capabilities != nil { - config.Capabilities = &configs.Capabilities{ - Bounding: spec.Process.Capabilities.Bounding, - Effective: spec.Process.Capabilities.Effective, - Permitted: spec.Process.Capabilities.Permitted, - Inheritable: spec.Process.Capabilities.Inheritable, - Ambient: spec.Process.Capabilities.Ambient, - } - } - createHooks(spec, config) - config.Version = specs.Version - if spec.Linux.IntelRdt != nil { - config.IntelRdt = &configs.IntelRdt{} - if spec.Linux.IntelRdt.L3CacheSchema != "" { - config.IntelRdt.L3CacheSchema = spec.Linux.IntelRdt.L3CacheSchema - } - } - return config, nil -} - -func createLibcontainerMount(cwd string, m specs.Mount) *configs.Mount { - flags, pgflags, data, ext := parseMountOptions(m.Options) - source := m.Source - device := m.Type - if flags&unix.MS_BIND != 0 { - if device == "" { - device = "bind" - } - if !filepath.IsAbs(source) { - source = filepath.Join(cwd, m.Source) - } - } - return &configs.Mount{ - Device: device, - Source: source, - Destination: m.Destination, - Data: data, - Flags: flags, - PropagationFlags: pgflags, - Extensions: ext, - } -} - -func createCgroupConfig(opts *CreateOpts) (*configs.Cgroup, error) { - var ( - myCgroupPath string - - spec = opts.Spec - useSystemdCgroup = opts.UseSystemdCgroup - name = opts.CgroupName - ) - - c := &configs.Cgroup{ - Resources: &configs.Resources{}, - } - - if spec.Linux != nil && spec.Linux.CgroupsPath != "" { - myCgroupPath = libcontainerUtils.CleanPath(spec.Linux.CgroupsPath) - if useSystemdCgroup { - myCgroupPath = spec.Linux.CgroupsPath - } - } - - if useSystemdCgroup { - if myCgroupPath == "" { - c.Parent = "system.slice" - c.ScopePrefix = "runc" - c.Name = name - } else { - // Parse the path from expected "slice:prefix:name" - // for e.g. "system.slice:docker:1234" - parts := strings.Split(myCgroupPath, ":") - if len(parts) != 3 { - return nil, fmt.Errorf("expected cgroupsPath to be of format \"slice:prefix:name\" for systemd cgroups") - } - c.Parent = parts[0] - c.ScopePrefix = parts[1] - c.Name = parts[2] - } - } else { - if myCgroupPath == "" { - c.Name = name - } - c.Path = myCgroupPath - } - - // In rootless containers, any attempt to make cgroup changes will fail. - // libcontainer will validate this and we shouldn't add any cgroup options - // the user didn't specify. - if !opts.Rootless { - c.Resources.AllowedDevices = allowedDevices - } - if spec.Linux != nil { - r := spec.Linux.Resources - if r == nil { - return c, nil - } - for i, d := range spec.Linux.Resources.Devices { - var ( - t = "a" - major = int64(-1) - minor = int64(-1) - ) - if d.Type != "" { - t = d.Type - } - if d.Major != nil { - major = *d.Major - } - if d.Minor != nil { - minor = *d.Minor - } - if d.Access == "" { - return nil, fmt.Errorf("device access at %d field cannot be empty", i) - } - dt, err := stringToCgroupDeviceRune(t) - if err != nil { - return nil, err - } - dd := &configs.Device{ - Type: dt, - Major: major, - Minor: minor, - Permissions: d.Access, - Allow: d.Allow, - } - c.Resources.Devices = append(c.Resources.Devices, dd) - } - if r.Memory != nil { - if r.Memory.Limit != nil { - c.Resources.Memory = *r.Memory.Limit - } - if r.Memory.Reservation != nil { - c.Resources.MemoryReservation = *r.Memory.Reservation - } - if r.Memory.Swap != nil { - c.Resources.MemorySwap = *r.Memory.Swap - } - if r.Memory.Kernel != nil { - c.Resources.KernelMemory = *r.Memory.Kernel - } - if r.Memory.KernelTCP != nil { - c.Resources.KernelMemoryTCP = *r.Memory.KernelTCP - } - if r.Memory.Swappiness != nil { - c.Resources.MemorySwappiness = r.Memory.Swappiness - } - if r.Memory.DisableOOMKiller != nil { - c.Resources.OomKillDisable = *r.Memory.DisableOOMKiller - } - } - if r.CPU != nil { - if r.CPU.Shares != nil { - c.Resources.CpuShares = *r.CPU.Shares - } - if r.CPU.Quota != nil { - c.Resources.CpuQuota = *r.CPU.Quota - } - if r.CPU.Period != nil { - c.Resources.CpuPeriod = *r.CPU.Period - } - if r.CPU.RealtimeRuntime != nil { - c.Resources.CpuRtRuntime = *r.CPU.RealtimeRuntime - } - if r.CPU.RealtimePeriod != nil { - c.Resources.CpuRtPeriod = *r.CPU.RealtimePeriod - } - if r.CPU.Cpus != "" { - c.Resources.CpusetCpus = r.CPU.Cpus - } - if r.CPU.Mems != "" { - c.Resources.CpusetMems = r.CPU.Mems - } - } - if r.Pids != nil { - c.Resources.PidsLimit = r.Pids.Limit - } - if r.BlockIO != nil { - if r.BlockIO.Weight != nil { - c.Resources.BlkioWeight = *r.BlockIO.Weight - } - if r.BlockIO.LeafWeight != nil { - c.Resources.BlkioLeafWeight = *r.BlockIO.LeafWeight - } - if r.BlockIO.WeightDevice != nil { - for _, wd := range r.BlockIO.WeightDevice { - var weight, leafWeight uint16 - if wd.Weight != nil { - weight = *wd.Weight - } - if wd.LeafWeight != nil { - leafWeight = *wd.LeafWeight - } - weightDevice := configs.NewWeightDevice(wd.Major, wd.Minor, weight, leafWeight) - c.Resources.BlkioWeightDevice = append(c.Resources.BlkioWeightDevice, weightDevice) - } - } - if r.BlockIO.ThrottleReadBpsDevice != nil { - for _, td := range r.BlockIO.ThrottleReadBpsDevice { - rate := td.Rate - throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) - c.Resources.BlkioThrottleReadBpsDevice = append(c.Resources.BlkioThrottleReadBpsDevice, throttleDevice) - } - } - if r.BlockIO.ThrottleWriteBpsDevice != nil { - for _, td := range r.BlockIO.ThrottleWriteBpsDevice { - rate := td.Rate - throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) - c.Resources.BlkioThrottleWriteBpsDevice = append(c.Resources.BlkioThrottleWriteBpsDevice, throttleDevice) - } - } - if r.BlockIO.ThrottleReadIOPSDevice != nil { - for _, td := range r.BlockIO.ThrottleReadIOPSDevice { - rate := td.Rate - throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) - c.Resources.BlkioThrottleReadIOPSDevice = append(c.Resources.BlkioThrottleReadIOPSDevice, throttleDevice) - } - } - if r.BlockIO.ThrottleWriteIOPSDevice != nil { - for _, td := range r.BlockIO.ThrottleWriteIOPSDevice { - rate := td.Rate - throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) - c.Resources.BlkioThrottleWriteIOPSDevice = append(c.Resources.BlkioThrottleWriteIOPSDevice, throttleDevice) - } - } - } - for _, l := range r.HugepageLimits { - c.Resources.HugetlbLimit = append(c.Resources.HugetlbLimit, &configs.HugepageLimit{ - Pagesize: l.Pagesize, - Limit: l.Limit, - }) - } - if r.Network != nil { - if r.Network.ClassID != nil { - c.Resources.NetClsClassid = *r.Network.ClassID - } - for _, m := range r.Network.Priorities { - c.Resources.NetPrioIfpriomap = append(c.Resources.NetPrioIfpriomap, &configs.IfPrioMap{ - Interface: m.Name, - Priority: int64(m.Priority), - }) - } - } - } - if !opts.Rootless { - // append the default allowed devices to the end of the list - c.Resources.Devices = append(c.Resources.Devices, allowedDevices...) - } - return c, nil -} - -func stringToCgroupDeviceRune(s string) (rune, error) { - switch s { - case "a": - return 'a', nil - case "b": - return 'b', nil - case "c": - return 'c', nil - default: - return 0, fmt.Errorf("invalid cgroup device type %q", s) - } -} - -func stringToDeviceRune(s string) (rune, error) { - switch s { - case "p": - return 'p', nil - case "u": - return 'u', nil - case "b": - return 'b', nil - case "c": - return 'c', nil - default: - return 0, fmt.Errorf("invalid device type %q", s) - } -} - -func createDevices(spec *specs.Spec, config *configs.Config) error { - // add whitelisted devices - config.Devices = []*configs.Device{ - { - Type: 'c', - Path: "/dev/null", - Major: 1, - Minor: 3, - FileMode: 0666, - Uid: 0, - Gid: 0, - }, - { - Type: 'c', - Path: "/dev/random", - Major: 1, - Minor: 8, - FileMode: 0666, - Uid: 0, - Gid: 0, - }, - { - Type: 'c', - Path: "/dev/full", - Major: 1, - Minor: 7, - FileMode: 0666, - Uid: 0, - Gid: 0, - }, - { - Type: 'c', - Path: "/dev/tty", - Major: 5, - Minor: 0, - FileMode: 0666, - Uid: 0, - Gid: 0, - }, - { - Type: 'c', - Path: "/dev/zero", - Major: 1, - Minor: 5, - FileMode: 0666, - Uid: 0, - Gid: 0, - }, - { - Type: 'c', - Path: "/dev/urandom", - Major: 1, - Minor: 9, - FileMode: 0666, - Uid: 0, - Gid: 0, - }, - } - // merge in additional devices from the spec - if spec.Linux != nil { - for _, d := range spec.Linux.Devices { - var uid, gid uint32 - var filemode os.FileMode = 0666 - - if d.UID != nil { - uid = *d.UID - } - if d.GID != nil { - gid = *d.GID - } - dt, err := stringToDeviceRune(d.Type) - if err != nil { - return err - } - if d.FileMode != nil { - filemode = *d.FileMode - } - device := &configs.Device{ - Type: dt, - Path: d.Path, - Major: d.Major, - Minor: d.Minor, - FileMode: filemode, - Uid: uid, - Gid: gid, - } - config.Devices = append(config.Devices, device) - } - } - return nil -} - -func setupUserNamespace(spec *specs.Spec, config *configs.Config) error { - create := func(m specs.LinuxIDMapping) configs.IDMap { - return configs.IDMap{ - HostID: int(m.HostID), - ContainerID: int(m.ContainerID), - Size: int(m.Size), - } - } - if spec.Linux != nil { - for _, m := range spec.Linux.UIDMappings { - config.UidMappings = append(config.UidMappings, create(m)) - } - for _, m := range spec.Linux.GIDMappings { - config.GidMappings = append(config.GidMappings, create(m)) - } - } - rootUID, err := config.HostRootUID() - if err != nil { - return err - } - rootGID, err := config.HostRootGID() - if err != nil { - return err - } - for _, node := range config.Devices { - node.Uid = uint32(rootUID) - node.Gid = uint32(rootGID) - } - return nil -} - -// parseMountOptions parses the string and returns the flags, propagation -// flags and any mount data that it contains. -func parseMountOptions(options []string) (int, []int, string, int) { - var ( - flag int - pgflag []int - data []string - extFlags int - ) - flags := map[string]struct { - clear bool - flag int - }{ - "acl": {false, unix.MS_POSIXACL}, - "async": {true, unix.MS_SYNCHRONOUS}, - "atime": {true, unix.MS_NOATIME}, - "bind": {false, unix.MS_BIND}, - "defaults": {false, 0}, - "dev": {true, unix.MS_NODEV}, - "diratime": {true, unix.MS_NODIRATIME}, - "dirsync": {false, unix.MS_DIRSYNC}, - "exec": {true, unix.MS_NOEXEC}, - "iversion": {false, unix.MS_I_VERSION}, - "lazytime": {false, unix.MS_LAZYTIME}, - "loud": {true, unix.MS_SILENT}, - "mand": {false, unix.MS_MANDLOCK}, - "noacl": {true, unix.MS_POSIXACL}, - "noatime": {false, unix.MS_NOATIME}, - "nodev": {false, unix.MS_NODEV}, - "nodiratime": {false, unix.MS_NODIRATIME}, - "noexec": {false, unix.MS_NOEXEC}, - "noiversion": {true, unix.MS_I_VERSION}, - "nolazytime": {true, unix.MS_LAZYTIME}, - "nomand": {true, unix.MS_MANDLOCK}, - "norelatime": {true, unix.MS_RELATIME}, - "nostrictatime": {true, unix.MS_STRICTATIME}, - "nosuid": {false, unix.MS_NOSUID}, - "rbind": {false, unix.MS_BIND | unix.MS_REC}, - "relatime": {false, unix.MS_RELATIME}, - "remount": {false, unix.MS_REMOUNT}, - "ro": {false, unix.MS_RDONLY}, - "rw": {true, unix.MS_RDONLY}, - "silent": {false, unix.MS_SILENT}, - "strictatime": {false, unix.MS_STRICTATIME}, - "suid": {true, unix.MS_NOSUID}, - "sync": {false, unix.MS_SYNCHRONOUS}, - } - propagationFlags := map[string]int{ - "private": unix.MS_PRIVATE, - "shared": unix.MS_SHARED, - "slave": unix.MS_SLAVE, - "unbindable": unix.MS_UNBINDABLE, - "rprivate": unix.MS_PRIVATE | unix.MS_REC, - "rshared": unix.MS_SHARED | unix.MS_REC, - "rslave": unix.MS_SLAVE | unix.MS_REC, - "runbindable": unix.MS_UNBINDABLE | unix.MS_REC, - } - extensionFlags := map[string]struct { - clear bool - flag int - }{ - "tmpcopyup": {false, configs.EXT_COPYUP}, - } - for _, o := range options { - // If the option does not exist in the flags table or the flag - // is not supported on the platform, - // then it is a data value for a specific fs type - if f, exists := flags[o]; exists && f.flag != 0 { - if f.clear { - flag &= ^f.flag - } else { - flag |= f.flag - } - } else if f, exists := propagationFlags[o]; exists && f != 0 { - pgflag = append(pgflag, f) - } else if f, exists := extensionFlags[o]; exists && f.flag != 0 { - if f.clear { - extFlags &= ^f.flag - } else { - extFlags |= f.flag - } - } else { - data = append(data, o) - } - } - return flag, pgflag, strings.Join(data, ","), extFlags -} - -func SetupSeccomp(config *specs.LinuxSeccomp) (*configs.Seccomp, error) { - if config == nil { - return nil, nil - } - - // No default action specified, no syscalls listed, assume seccomp disabled - if config.DefaultAction == "" && len(config.Syscalls) == 0 { - return nil, nil - } - - newConfig := new(configs.Seccomp) - newConfig.Syscalls = []*configs.Syscall{} - - if len(config.Architectures) > 0 { - newConfig.Architectures = []string{} - for _, arch := range config.Architectures { - newArch, err := seccomp.ConvertStringToArch(string(arch)) - if err != nil { - return nil, err - } - newConfig.Architectures = append(newConfig.Architectures, newArch) - } - } - - // Convert default action from string representation - newDefaultAction, err := seccomp.ConvertStringToAction(string(config.DefaultAction)) - if err != nil { - return nil, err - } - newConfig.DefaultAction = newDefaultAction - - // Loop through all syscall blocks and convert them to libcontainer format - for _, call := range config.Syscalls { - newAction, err := seccomp.ConvertStringToAction(string(call.Action)) - if err != nil { - return nil, err - } - - for _, name := range call.Names { - newCall := configs.Syscall{ - Name: name, - Action: newAction, - Args: []*configs.Arg{}, - } - // Loop through all the arguments of the syscall and convert them - for _, arg := range call.Args { - newOp, err := seccomp.ConvertStringToOperator(string(arg.Op)) - if err != nil { - return nil, err - } - - newArg := configs.Arg{ - Index: arg.Index, - Value: arg.Value, - ValueTwo: arg.ValueTwo, - Op: newOp, - } - - newCall.Args = append(newCall.Args, &newArg) - } - newConfig.Syscalls = append(newConfig.Syscalls, &newCall) - } - } - - return newConfig, nil -} - -func createHooks(rspec *specs.Spec, config *configs.Config) { - config.Hooks = &configs.Hooks{} - if rspec.Hooks != nil { - - for _, h := range rspec.Hooks.Prestart { - cmd := createCommandHook(h) - config.Hooks.Prestart = append(config.Hooks.Prestart, configs.NewCommandHook(cmd)) - } - for _, h := range rspec.Hooks.Poststart { - cmd := createCommandHook(h) - config.Hooks.Poststart = append(config.Hooks.Poststart, configs.NewCommandHook(cmd)) - } - for _, h := range rspec.Hooks.Poststop { - cmd := createCommandHook(h) - config.Hooks.Poststop = append(config.Hooks.Poststop, configs.NewCommandHook(cmd)) - } - } -} - -func createCommandHook(h specs.Hook) configs.Command { - cmd := configs.Command{ - Path: h.Path, - Args: h.Args, - Env: h.Env, - } - if h.Timeout != nil { - d := time.Duration(*h.Timeout) * time.Second - cmd.Timeout = &d - } - return cmd -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go deleted file mode 100644 index c8a9364d54d7..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go +++ /dev/null @@ -1,93 +0,0 @@ -// +build linux - -package utils - -/* - * Copyright 2016, 2017 SUSE LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import ( - "fmt" - "os" - - "golang.org/x/sys/unix" -) - -// MaxSendfdLen is the maximum length of the name of a file descriptor being -// sent using SendFd. The name of the file handle returned by RecvFd will never -// be larger than this value. -const MaxNameLen = 4096 - -// oobSpace is the size of the oob slice required to store a single FD. Note -// that unix.UnixRights appears to make the assumption that fd is always int32, -// so sizeof(fd) = 4. -var oobSpace = unix.CmsgSpace(4) - -// RecvFd waits for a file descriptor to be sent over the given AF_UNIX -// socket. The file name of the remote file descriptor will be recreated -// locally (it is sent as non-auxiliary data in the same payload). -func RecvFd(socket *os.File) (*os.File, error) { - // For some reason, unix.Recvmsg uses the length rather than the capacity - // when passing the msg_controllen and other attributes to recvmsg. So we - // have to actually set the length. - name := make([]byte, MaxNameLen) - oob := make([]byte, oobSpace) - - sockfd := socket.Fd() - n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, 0) - if err != nil { - return nil, err - } - - if n >= MaxNameLen || oobn != oobSpace { - return nil, fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn) - } - - // Truncate. - name = name[:n] - oob = oob[:oobn] - - scms, err := unix.ParseSocketControlMessage(oob) - if err != nil { - return nil, err - } - if len(scms) != 1 { - return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms)) - } - scm := scms[0] - - fds, err := unix.ParseUnixRights(&scm) - if err != nil { - return nil, err - } - if len(fds) != 1 { - return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds)) - } - fd := uintptr(fds[0]) - - return os.NewFile(fd, string(name)), nil -} - -// SendFd sends a file descriptor over the given AF_UNIX socket. In -// addition, the file.Name() of the given file will also be sent as -// non-auxiliary data in the same payload (allowing to send contextual -// information for a file descriptor). -func SendFd(socket *os.File, name string, fd uintptr) error { - if len(name) >= MaxNameLen { - return fmt.Errorf("sendfd: filename too long: %s", name) - } - oob := unix.UnixRights(int(fd)) - return unix.Sendmsg(int(socket.Fd()), []byte(name), oob, nil, 0) -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go deleted file mode 100644 index 40ccfaa1a01a..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go +++ /dev/null @@ -1,112 +0,0 @@ -package utils - -import ( - "encoding/json" - "io" - "os" - "path/filepath" - "strings" - "unsafe" - - "golang.org/x/sys/unix" -) - -const ( - exitSignalOffset = 128 -) - -// ResolveRootfs ensures that the current working directory is -// not a symlink and returns the absolute path to the rootfs -func ResolveRootfs(uncleanRootfs string) (string, error) { - rootfs, err := filepath.Abs(uncleanRootfs) - if err != nil { - return "", err - } - return filepath.EvalSymlinks(rootfs) -} - -// ExitStatus returns the correct exit status for a process based on if it -// was signaled or exited cleanly -func ExitStatus(status unix.WaitStatus) int { - if status.Signaled() { - return exitSignalOffset + int(status.Signal()) - } - return status.ExitStatus() -} - -// WriteJSON writes the provided struct v to w using standard json marshaling -func WriteJSON(w io.Writer, v interface{}) error { - data, err := json.Marshal(v) - if err != nil { - return err - } - _, err = w.Write(data) - return err -} - -// CleanPath makes a path safe for use with filepath.Join. This is done by not -// only cleaning the path, but also (if the path is relative) adding a leading -// '/' and cleaning it (then removing the leading '/'). This ensures that a -// path resulting from prepending another path will always resolve to lexically -// be a subdirectory of the prefixed path. This is all done lexically, so paths -// that include symlinks won't be safe as a result of using CleanPath. -func CleanPath(path string) string { - // Deal with empty strings nicely. - if path == "" { - return "" - } - - // Ensure that all paths are cleaned (especially problematic ones like - // "/../../../../../" which can cause lots of issues). - path = filepath.Clean(path) - - // If the path isn't absolute, we need to do more processing to fix paths - // such as "../../../..//some/path". We also shouldn't convert absolute - // paths to relative ones. - if !filepath.IsAbs(path) { - path = filepath.Clean(string(os.PathSeparator) + path) - // This can't fail, as (by definition) all paths are relative to root. - path, _ = filepath.Rel(string(os.PathSeparator), path) - } - - // Clean the path again for good measure. - return filepath.Clean(path) -} - -// SearchLabels searches a list of key-value pairs for the provided key and -// returns the corresponding value. The pairs must be separated with '='. -func SearchLabels(labels []string, query string) string { - for _, l := range labels { - parts := strings.SplitN(l, "=", 2) - if len(parts) < 2 { - continue - } - if parts[0] == query { - return parts[1] - } - } - return "" -} - -// Annotations returns the bundle path and user defined annotations from the -// libcontainer state. We need to remove the bundle because that is a label -// added by libcontainer. -func Annotations(labels []string) (bundle string, userAnnotations map[string]string) { - userAnnotations = make(map[string]string) - for _, l := range labels { - parts := strings.SplitN(l, "=", 2) - if len(parts) < 2 { - continue - } - if parts[0] == "bundle" { - bundle = parts[1] - } else { - userAnnotations[parts[0]] = parts[1] - } - } - return -} - -func GetIntSize() int { - return int(unsafe.Sizeof(1)) -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go deleted file mode 100644 index c96088988a6d..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go +++ /dev/null @@ -1,44 +0,0 @@ -// +build !windows - -package utils - -import ( - "io/ioutil" - "os" - "strconv" - - "golang.org/x/sys/unix" -) - -func CloseExecFrom(minFd int) error { - fdList, err := ioutil.ReadDir("/proc/self/fd") - if err != nil { - return err - } - for _, fi := range fdList { - fd, err := strconv.Atoi(fi.Name()) - if err != nil { - // ignore non-numeric file names - continue - } - - if fd < minFd { - // ignore descriptors lower than our specified minimum - continue - } - - // intentionally ignore errors from unix.CloseOnExec - unix.CloseOnExec(fd) - // the cases where this might fail are basically file descriptors that have already been closed (including and especially the one that was created when ioutil.ReadDir did the "opendir" syscall) - } - return nil -} - -// NewSockPair returns a new unix socket pair -func NewSockPair(name string) (parent *os.File, child *os.File, err error) { - fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0) - if err != nil { - return nil, nil, err - } - return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil -} diff --git a/vendor/github.com/seccomp/libseccomp-golang/LICENSE b/vendor/github.com/seccomp/libseccomp-golang/LICENSE deleted file mode 100644 index 81cf60de29ef..000000000000 --- a/vendor/github.com/seccomp/libseccomp-golang/LICENSE +++ /dev/null @@ -1,22 +0,0 @@ -Copyright (c) 2015 Matthew Heon -Copyright (c) 2015 Paul Moore -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: -- Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/seccomp/libseccomp-golang/README b/vendor/github.com/seccomp/libseccomp-golang/README deleted file mode 100644 index 64cab6911d5a..000000000000 --- a/vendor/github.com/seccomp/libseccomp-golang/README +++ /dev/null @@ -1,26 +0,0 @@ -libseccomp-golang: Go Language Bindings for the libseccomp Project -=============================================================================== -https://github.com/seccomp/libseccomp-golang -https://github.com/seccomp/libseccomp - -The libseccomp library provides an easy to use, platform independent, interface -to the Linux Kernel's syscall filtering mechanism. The libseccomp API is -designed to abstract away the underlying BPF based syscall filter language and -present a more conventional function-call based filtering interface that should -be familiar to, and easily adopted by, application developers. - -The libseccomp-golang library provides a Go based interface to the libseccomp -library. - -* Online Resources - -The library source repository currently lives on GitHub at the following URLs: - - -> https://github.com/seccomp/libseccomp-golang - -> https://github.com/seccomp/libseccomp - -The project mailing list is currently hosted on Google Groups at the URL below, -please note that a Google account is not required to subscribe to the mailing -list. - - -> https://groups.google.com/d/forum/libseccomp diff --git a/vendor/github.com/seccomp/libseccomp-golang/seccomp.go b/vendor/github.com/seccomp/libseccomp-golang/seccomp.go deleted file mode 100644 index b2c010fc3873..000000000000 --- a/vendor/github.com/seccomp/libseccomp-golang/seccomp.go +++ /dev/null @@ -1,857 +0,0 @@ -// +build linux - -// Public API specification for libseccomp Go bindings -// Contains public API for the bindings - -// Package seccomp provides bindings for libseccomp, a library wrapping the Linux -// seccomp syscall. Seccomp enables an application to restrict system call use -// for itself and its children. -package seccomp - -import ( - "fmt" - "os" - "runtime" - "strings" - "sync" - "syscall" - "unsafe" -) - -// C wrapping code - -// #cgo pkg-config: libseccomp -// #include -// #include -import "C" - -// Exported types - -// ScmpArch represents a CPU architecture. Seccomp can restrict syscalls on a -// per-architecture basis. -type ScmpArch uint - -// ScmpAction represents an action to be taken on a filter rule match in -// libseccomp -type ScmpAction uint - -// ScmpCompareOp represents a comparison operator which can be used in a filter -// rule -type ScmpCompareOp uint - -// ScmpCondition represents a rule in a libseccomp filter context -type ScmpCondition struct { - Argument uint `json:"argument,omitempty"` - Op ScmpCompareOp `json:"operator,omitempty"` - Operand1 uint64 `json:"operand_one,omitempty"` - Operand2 uint64 `json:"operand_two,omitempty"` -} - -// ScmpSyscall represents a Linux System Call -type ScmpSyscall int32 - -// Exported Constants - -const ( - // Valid architectures recognized by libseccomp - // ARM64 and all MIPS architectures are unsupported by versions of the - // library before v2.2 and will return errors if used - - // ArchInvalid is a placeholder to ensure uninitialized ScmpArch - // variables are invalid - ArchInvalid ScmpArch = iota - // ArchNative is the native architecture of the kernel - ArchNative ScmpArch = iota - // ArchX86 represents 32-bit x86 syscalls - ArchX86 ScmpArch = iota - // ArchAMD64 represents 64-bit x86-64 syscalls - ArchAMD64 ScmpArch = iota - // ArchX32 represents 64-bit x86-64 syscalls (32-bit pointers) - ArchX32 ScmpArch = iota - // ArchARM represents 32-bit ARM syscalls - ArchARM ScmpArch = iota - // ArchARM64 represents 64-bit ARM syscalls - ArchARM64 ScmpArch = iota - // ArchMIPS represents 32-bit MIPS syscalls - ArchMIPS ScmpArch = iota - // ArchMIPS64 represents 64-bit MIPS syscalls - ArchMIPS64 ScmpArch = iota - // ArchMIPS64N32 represents 64-bit MIPS syscalls (32-bit pointers) - ArchMIPS64N32 ScmpArch = iota - // ArchMIPSEL represents 32-bit MIPS syscalls (little endian) - ArchMIPSEL ScmpArch = iota - // ArchMIPSEL64 represents 64-bit MIPS syscalls (little endian) - ArchMIPSEL64 ScmpArch = iota - // ArchMIPSEL64N32 represents 64-bit MIPS syscalls (little endian, - // 32-bit pointers) - ArchMIPSEL64N32 ScmpArch = iota - // ArchPPC represents 32-bit POWERPC syscalls - ArchPPC ScmpArch = iota - // ArchPPC64 represents 64-bit POWER syscalls (big endian) - ArchPPC64 ScmpArch = iota - // ArchPPC64LE represents 64-bit POWER syscalls (little endian) - ArchPPC64LE ScmpArch = iota - // ArchS390 represents 31-bit System z/390 syscalls - ArchS390 ScmpArch = iota - // ArchS390X represents 64-bit System z/390 syscalls - ArchS390X ScmpArch = iota -) - -const ( - // Supported actions on filter match - - // ActInvalid is a placeholder to ensure uninitialized ScmpAction - // variables are invalid - ActInvalid ScmpAction = iota - // ActKill kills the process - ActKill ScmpAction = iota - // ActTrap throws SIGSYS - ActTrap ScmpAction = iota - // ActErrno causes the syscall to return a negative error code. This - // code can be set with the SetReturnCode method - ActErrno ScmpAction = iota - // ActTrace causes the syscall to notify tracing processes with the - // given error code. This code can be set with the SetReturnCode method - ActTrace ScmpAction = iota - // ActAllow permits the syscall to continue execution - ActAllow ScmpAction = iota -) - -const ( - // These are comparison operators used in conditional seccomp rules - // They are used to compare the value of a single argument of a syscall - // against a user-defined constant - - // CompareInvalid is a placeholder to ensure uninitialized ScmpCompareOp - // variables are invalid - CompareInvalid ScmpCompareOp = iota - // CompareNotEqual returns true if the argument is not equal to the - // given value - CompareNotEqual ScmpCompareOp = iota - // CompareLess returns true if the argument is less than the given value - CompareLess ScmpCompareOp = iota - // CompareLessOrEqual returns true if the argument is less than or equal - // to the given value - CompareLessOrEqual ScmpCompareOp = iota - // CompareEqual returns true if the argument is equal to the given value - CompareEqual ScmpCompareOp = iota - // CompareGreaterEqual returns true if the argument is greater than or - // equal to the given value - CompareGreaterEqual ScmpCompareOp = iota - // CompareGreater returns true if the argument is greater than the given - // value - CompareGreater ScmpCompareOp = iota - // CompareMaskedEqual returns true if the argument is equal to the given - // value, when masked (bitwise &) against the second given value - CompareMaskedEqual ScmpCompareOp = iota -) - -// Helpers for types - -// GetArchFromString returns an ScmpArch constant from a string representing an -// architecture -func GetArchFromString(arch string) (ScmpArch, error) { - switch strings.ToLower(arch) { - case "x86": - return ArchX86, nil - case "amd64", "x86-64", "x86_64", "x64": - return ArchAMD64, nil - case "x32": - return ArchX32, nil - case "arm": - return ArchARM, nil - case "arm64", "aarch64": - return ArchARM64, nil - case "mips": - return ArchMIPS, nil - case "mips64": - return ArchMIPS64, nil - case "mips64n32": - return ArchMIPS64N32, nil - case "mipsel": - return ArchMIPSEL, nil - case "mipsel64": - return ArchMIPSEL64, nil - case "mipsel64n32": - return ArchMIPSEL64N32, nil - case "ppc": - return ArchPPC, nil - case "ppc64": - return ArchPPC64, nil - case "ppc64le": - return ArchPPC64LE, nil - case "s390": - return ArchS390, nil - case "s390x": - return ArchS390X, nil - default: - return ArchInvalid, fmt.Errorf("cannot convert unrecognized string %s", arch) - } -} - -// String returns a string representation of an architecture constant -func (a ScmpArch) String() string { - switch a { - case ArchX86: - return "x86" - case ArchAMD64: - return "amd64" - case ArchX32: - return "x32" - case ArchARM: - return "arm" - case ArchARM64: - return "arm64" - case ArchMIPS: - return "mips" - case ArchMIPS64: - return "mips64" - case ArchMIPS64N32: - return "mips64n32" - case ArchMIPSEL: - return "mipsel" - case ArchMIPSEL64: - return "mipsel64" - case ArchMIPSEL64N32: - return "mipsel64n32" - case ArchPPC: - return "ppc" - case ArchPPC64: - return "ppc64" - case ArchPPC64LE: - return "ppc64le" - case ArchS390: - return "s390" - case ArchS390X: - return "s390x" - case ArchNative: - return "native" - case ArchInvalid: - return "Invalid architecture" - default: - return "Unknown architecture" - } -} - -// String returns a string representation of a comparison operator constant -func (a ScmpCompareOp) String() string { - switch a { - case CompareNotEqual: - return "Not equal" - case CompareLess: - return "Less than" - case CompareLessOrEqual: - return "Less than or equal to" - case CompareEqual: - return "Equal" - case CompareGreaterEqual: - return "Greater than or equal to" - case CompareGreater: - return "Greater than" - case CompareMaskedEqual: - return "Masked equality" - case CompareInvalid: - return "Invalid comparison operator" - default: - return "Unrecognized comparison operator" - } -} - -// String returns a string representation of a seccomp match action -func (a ScmpAction) String() string { - switch a & 0xFFFF { - case ActKill: - return "Action: Kill Process" - case ActTrap: - return "Action: Send SIGSYS" - case ActErrno: - return fmt.Sprintf("Action: Return error code %d", (a >> 16)) - case ActTrace: - return fmt.Sprintf("Action: Notify tracing processes with code %d", - (a >> 16)) - case ActAllow: - return "Action: Allow system call" - default: - return "Unrecognized Action" - } -} - -// SetReturnCode adds a return code to a supporting ScmpAction, clearing any -// existing code Only valid on ActErrno and ActTrace. Takes no action otherwise. -// Accepts 16-bit return code as argument. -// Returns a valid ScmpAction of the original type with the new error code set. -func (a ScmpAction) SetReturnCode(code int16) ScmpAction { - aTmp := a & 0x0000FFFF - if aTmp == ActErrno || aTmp == ActTrace { - return (aTmp | (ScmpAction(code)&0xFFFF)<<16) - } - return a -} - -// GetReturnCode returns the return code of an ScmpAction -func (a ScmpAction) GetReturnCode() int16 { - return int16(a >> 16) -} - -// General utility functions - -// GetLibraryVersion returns the version of the library the bindings are built -// against. -// The version is formatted as follows: Major.Minor.Micro -func GetLibraryVersion() (major, minor, micro int) { - return verMajor, verMinor, verMicro -} - -// Syscall functions - -// GetName retrieves the name of a syscall from its number. -// Acts on any syscall number. -// Returns either a string containing the name of the syscall, or an error. -func (s ScmpSyscall) GetName() (string, error) { - return s.GetNameByArch(ArchNative) -} - -// GetNameByArch retrieves the name of a syscall from its number for a given -// architecture. -// Acts on any syscall number. -// Accepts a valid architecture constant. -// Returns either a string containing the name of the syscall, or an error. -// if the syscall is unrecognized or an issue occurred. -func (s ScmpSyscall) GetNameByArch(arch ScmpArch) (string, error) { - if err := sanitizeArch(arch); err != nil { - return "", err - } - - cString := C.seccomp_syscall_resolve_num_arch(arch.toNative(), C.int(s)) - if cString == nil { - return "", fmt.Errorf("could not resolve syscall name") - } - defer C.free(unsafe.Pointer(cString)) - - finalStr := C.GoString(cString) - return finalStr, nil -} - -// GetSyscallFromName returns the number of a syscall by name on the kernel's -// native architecture. -// Accepts a string containing the name of a syscall. -// Returns the number of the syscall, or an error if no syscall with that name -// was found. -func GetSyscallFromName(name string) (ScmpSyscall, error) { - cString := C.CString(name) - defer C.free(unsafe.Pointer(cString)) - - result := C.seccomp_syscall_resolve_name(cString) - if result == scmpError { - return 0, fmt.Errorf("could not resolve name to syscall") - } - - return ScmpSyscall(result), nil -} - -// GetSyscallFromNameByArch returns the number of a syscall by name for a given -// architecture's ABI. -// Accepts the name of a syscall and an architecture constant. -// Returns the number of the syscall, or an error if an invalid architecture is -// passed or a syscall with that name was not found. -func GetSyscallFromNameByArch(name string, arch ScmpArch) (ScmpSyscall, error) { - if err := sanitizeArch(arch); err != nil { - return 0, err - } - - cString := C.CString(name) - defer C.free(unsafe.Pointer(cString)) - - result := C.seccomp_syscall_resolve_name_arch(arch.toNative(), cString) - if result == scmpError { - return 0, fmt.Errorf("could not resolve name to syscall") - } - - return ScmpSyscall(result), nil -} - -// MakeCondition creates and returns a new condition to attach to a filter rule. -// Associated rules will only match if this condition is true. -// Accepts the number the argument we are checking, and a comparison operator -// and value to compare to. -// The rule will match if argument $arg (zero-indexed) of the syscall is -// $COMPARE_OP the provided comparison value. -// Some comparison operators accept two values. Masked equals, for example, -// will mask $arg of the syscall with the second value provided (via bitwise -// AND) and then compare against the first value provided. -// For example, in the less than or equal case, if the syscall argument was -// 0 and the value provided was 1, the condition would match, as 0 is less -// than or equal to 1. -// Return either an error on bad argument or a valid ScmpCondition struct. -func MakeCondition(arg uint, comparison ScmpCompareOp, values ...uint64) (ScmpCondition, error) { - var condStruct ScmpCondition - - if comparison == CompareInvalid { - return condStruct, fmt.Errorf("invalid comparison operator") - } else if arg > 5 { - return condStruct, fmt.Errorf("syscalls only have up to 6 arguments") - } else if len(values) > 2 { - return condStruct, fmt.Errorf("conditions can have at most 2 arguments") - } else if len(values) == 0 { - return condStruct, fmt.Errorf("must provide at least one value to compare against") - } - - condStruct.Argument = arg - condStruct.Op = comparison - condStruct.Operand1 = values[0] - if len(values) == 2 { - condStruct.Operand2 = values[1] - } else { - condStruct.Operand2 = 0 // Unused - } - - return condStruct, nil -} - -// Utility Functions - -// GetNativeArch returns architecture token representing the native kernel -// architecture -func GetNativeArch() (ScmpArch, error) { - arch := C.seccomp_arch_native() - - return archFromNative(arch) -} - -// Public Filter API - -// ScmpFilter represents a filter context in libseccomp. -// A filter context is initially empty. Rules can be added to it, and it can -// then be loaded into the kernel. -type ScmpFilter struct { - filterCtx C.scmp_filter_ctx - valid bool - lock sync.Mutex -} - -// NewFilter creates and returns a new filter context. -// Accepts a default action to be taken for syscalls which match no rules in -// the filter. -// Returns a reference to a valid filter context, or nil and an error if the -// filter context could not be created or an invalid default action was given. -func NewFilter(defaultAction ScmpAction) (*ScmpFilter, error) { - if err := sanitizeAction(defaultAction); err != nil { - return nil, err - } - - fPtr := C.seccomp_init(defaultAction.toNative()) - if fPtr == nil { - return nil, fmt.Errorf("could not create filter") - } - - filter := new(ScmpFilter) - filter.filterCtx = fPtr - filter.valid = true - runtime.SetFinalizer(filter, filterFinalizer) - - return filter, nil -} - -// IsValid determines whether a filter context is valid to use. -// Some operations (Release and Merge) render filter contexts invalid and -// consequently prevent further use. -func (f *ScmpFilter) IsValid() bool { - f.lock.Lock() - defer f.lock.Unlock() - - return f.valid -} - -// Reset resets a filter context, removing all its existing state. -// Accepts a new default action to be taken for syscalls which do not match. -// Returns an error if the filter or action provided are invalid. -func (f *ScmpFilter) Reset(defaultAction ScmpAction) error { - f.lock.Lock() - defer f.lock.Unlock() - - if err := sanitizeAction(defaultAction); err != nil { - return err - } else if !f.valid { - return errBadFilter - } - - retCode := C.seccomp_reset(f.filterCtx, defaultAction.toNative()) - if retCode != 0 { - return syscall.Errno(-1 * retCode) - } - - return nil -} - -// Release releases a filter context, freeing its memory. Should be called after -// loading into the kernel, when the filter is no longer needed. -// After calling this function, the given filter is no longer valid and cannot -// be used. -// Release() will be invoked automatically when a filter context is garbage -// collected, but can also be called manually to free memory. -func (f *ScmpFilter) Release() { - f.lock.Lock() - defer f.lock.Unlock() - - if !f.valid { - return - } - - f.valid = false - C.seccomp_release(f.filterCtx) -} - -// Merge merges two filter contexts. -// The source filter src will be released as part of the process, and will no -// longer be usable or valid after this call. -// To be merged, filters must NOT share any architectures, and all their -// attributes (Default Action, Bad Arch Action, No New Privs and TSync bools) -// must match. -// The filter src will be merged into the filter this is called on. -// The architectures of the src filter not present in the destination, and all -// associated rules, will be added to the destination. -// Returns an error if merging the filters failed. -func (f *ScmpFilter) Merge(src *ScmpFilter) error { - f.lock.Lock() - defer f.lock.Unlock() - - src.lock.Lock() - defer src.lock.Unlock() - - if !src.valid || !f.valid { - return fmt.Errorf("one or more of the filter contexts is invalid or uninitialized") - } - - // Merge the filters - retCode := C.seccomp_merge(f.filterCtx, src.filterCtx) - if syscall.Errno(-1*retCode) == syscall.EINVAL { - return fmt.Errorf("filters could not be merged due to a mismatch in attributes or invalid filter") - } else if retCode != 0 { - return syscall.Errno(-1 * retCode) - } - - src.valid = false - - return nil -} - -// IsArchPresent checks if an architecture is present in a filter. -// If a filter contains an architecture, it uses its default action for -// syscalls which do not match rules in it, and its rules can match syscalls -// for that ABI. -// If a filter does not contain an architecture, all syscalls made to that -// kernel ABI will fail with the filter's default Bad Architecture Action -// (by default, killing the process). -// Accepts an architecture constant. -// Returns true if the architecture is present in the filter, false otherwise, -// and an error on an invalid filter context, architecture constant, or an -// issue with the call to libseccomp. -func (f *ScmpFilter) IsArchPresent(arch ScmpArch) (bool, error) { - f.lock.Lock() - defer f.lock.Unlock() - - if err := sanitizeArch(arch); err != nil { - return false, err - } else if !f.valid { - return false, errBadFilter - } - - retCode := C.seccomp_arch_exist(f.filterCtx, arch.toNative()) - if syscall.Errno(-1*retCode) == syscall.EEXIST { - // -EEXIST is "arch not present" - return false, nil - } else if retCode != 0 { - return false, syscall.Errno(-1 * retCode) - } - - return true, nil -} - -// AddArch adds an architecture to the filter. -// Accepts an architecture constant. -// Returns an error on invalid filter context or architecture token, or an -// issue with the call to libseccomp. -func (f *ScmpFilter) AddArch(arch ScmpArch) error { - f.lock.Lock() - defer f.lock.Unlock() - - if err := sanitizeArch(arch); err != nil { - return err - } else if !f.valid { - return errBadFilter - } - - // Libseccomp returns -EEXIST if the specified architecture is already - // present. Succeed silently in this case, as it's not fatal, and the - // architecture is present already. - retCode := C.seccomp_arch_add(f.filterCtx, arch.toNative()) - if retCode != 0 && syscall.Errno(-1*retCode) != syscall.EEXIST { - return syscall.Errno(-1 * retCode) - } - - return nil -} - -// RemoveArch removes an architecture from the filter. -// Accepts an architecture constant. -// Returns an error on invalid filter context or architecture token, or an -// issue with the call to libseccomp. -func (f *ScmpFilter) RemoveArch(arch ScmpArch) error { - f.lock.Lock() - defer f.lock.Unlock() - - if err := sanitizeArch(arch); err != nil { - return err - } else if !f.valid { - return errBadFilter - } - - // Similar to AddArch, -EEXIST is returned if the arch is not present - // Succeed silently in that case, this is not fatal and the architecture - // is not present in the filter after RemoveArch - retCode := C.seccomp_arch_remove(f.filterCtx, arch.toNative()) - if retCode != 0 && syscall.Errno(-1*retCode) != syscall.EEXIST { - return syscall.Errno(-1 * retCode) - } - - return nil -} - -// Load loads a filter context into the kernel. -// Returns an error if the filter context is invalid or the syscall failed. -func (f *ScmpFilter) Load() error { - f.lock.Lock() - defer f.lock.Unlock() - - if !f.valid { - return errBadFilter - } - - if retCode := C.seccomp_load(f.filterCtx); retCode != 0 { - return syscall.Errno(-1 * retCode) - } - - return nil -} - -// GetDefaultAction returns the default action taken on a syscall which does not -// match a rule in the filter, or an error if an issue was encountered -// retrieving the value. -func (f *ScmpFilter) GetDefaultAction() (ScmpAction, error) { - action, err := f.getFilterAttr(filterAttrActDefault) - if err != nil { - return 0x0, err - } - - return actionFromNative(action) -} - -// GetBadArchAction returns the default action taken on a syscall for an -// architecture not in the filter, or an error if an issue was encountered -// retrieving the value. -func (f *ScmpFilter) GetBadArchAction() (ScmpAction, error) { - action, err := f.getFilterAttr(filterAttrActBadArch) - if err != nil { - return 0x0, err - } - - return actionFromNative(action) -} - -// GetNoNewPrivsBit returns the current state the No New Privileges bit will be set -// to on the filter being loaded, or an error if an issue was encountered -// retrieving the value. -// The No New Privileges bit tells the kernel that new processes run with exec() -// cannot gain more privileges than the process that ran exec(). -// For example, a process with No New Privileges set would be unable to exec -// setuid/setgid executables. -func (f *ScmpFilter) GetNoNewPrivsBit() (bool, error) { - noNewPrivs, err := f.getFilterAttr(filterAttrNNP) - if err != nil { - return false, err - } - - if noNewPrivs == 0 { - return false, nil - } - - return true, nil -} - -// GetTsyncBit returns whether Thread Synchronization will be enabled on the -// filter being loaded, or an error if an issue was encountered retrieving the -// value. -// Thread Sync ensures that all members of the thread group of the calling -// process will share the same Seccomp filter set. -// Tsync is a fairly recent addition to the Linux kernel and older kernels -// lack support. If the running kernel does not support Tsync and it is -// requested in a filter, Libseccomp will not enable TSync support and will -// proceed as normal. -// This function is unavailable before v2.2 of libseccomp and will return an -// error. -func (f *ScmpFilter) GetTsyncBit() (bool, error) { - tSync, err := f.getFilterAttr(filterAttrTsync) - if err != nil { - return false, err - } - - if tSync == 0 { - return false, nil - } - - return true, nil -} - -// SetBadArchAction sets the default action taken on a syscall for an -// architecture not in the filter, or an error if an issue was encountered -// setting the value. -func (f *ScmpFilter) SetBadArchAction(action ScmpAction) error { - if err := sanitizeAction(action); err != nil { - return err - } - - return f.setFilterAttr(filterAttrActBadArch, action.toNative()) -} - -// SetNoNewPrivsBit sets the state of the No New Privileges bit, which will be -// applied on filter load, or an error if an issue was encountered setting the -// value. -// Filters with No New Privileges set to 0 can only be loaded if the process -// has the CAP_SYS_ADMIN capability. -func (f *ScmpFilter) SetNoNewPrivsBit(state bool) error { - var toSet C.uint32_t = 0x0 - - if state { - toSet = 0x1 - } - - return f.setFilterAttr(filterAttrNNP, toSet) -} - -// SetTsync sets whether Thread Synchronization will be enabled on the filter -// being loaded. Returns an error if setting Tsync failed, or the filter is -// invalid. -// Thread Sync ensures that all members of the thread group of the calling -// process will share the same Seccomp filter set. -// Tsync is a fairly recent addition to the Linux kernel and older kernels -// lack support. If the running kernel does not support Tsync and it is -// requested in a filter, Libseccomp will not enable TSync support and will -// proceed as normal. -// This function is unavailable before v2.2 of libseccomp and will return an -// error. -func (f *ScmpFilter) SetTsync(enable bool) error { - var toSet C.uint32_t = 0x0 - - if enable { - toSet = 0x1 - } - - return f.setFilterAttr(filterAttrTsync, toSet) -} - -// SetSyscallPriority sets a syscall's priority. -// This provides a hint to the filter generator in libseccomp about the -// importance of this syscall. High-priority syscalls are placed -// first in the filter code, and incur less overhead (at the expense of -// lower-priority syscalls). -func (f *ScmpFilter) SetSyscallPriority(call ScmpSyscall, priority uint8) error { - f.lock.Lock() - defer f.lock.Unlock() - - if !f.valid { - return errBadFilter - } - - if retCode := C.seccomp_syscall_priority(f.filterCtx, C.int(call), - C.uint8_t(priority)); retCode != 0 { - return syscall.Errno(-1 * retCode) - } - - return nil -} - -// AddRule adds a single rule for an unconditional action on a syscall. -// Accepts the number of the syscall and the action to be taken on the call -// being made. -// Returns an error if an issue was encountered adding the rule. -func (f *ScmpFilter) AddRule(call ScmpSyscall, action ScmpAction) error { - return f.addRuleGeneric(call, action, false, nil) -} - -// AddRuleExact adds a single rule for an unconditional action on a syscall. -// Accepts the number of the syscall and the action to be taken on the call -// being made. -// No modifications will be made to the rule, and it will fail to add if it -// cannot be applied to the current architecture without modification. -// The rule will function exactly as described, but it may not function identically -// (or be able to be applied to) all architectures. -// Returns an error if an issue was encountered adding the rule. -func (f *ScmpFilter) AddRuleExact(call ScmpSyscall, action ScmpAction) error { - return f.addRuleGeneric(call, action, true, nil) -} - -// AddRuleConditional adds a single rule for a conditional action on a syscall. -// Returns an error if an issue was encountered adding the rule. -// All conditions must match for the rule to match. -// There is a bug in library versions below v2.2.1 which can, in some cases, -// cause conditions to be lost when more than one are used. Consequently, -// AddRuleConditional is disabled on library versions lower than v2.2.1 -func (f *ScmpFilter) AddRuleConditional(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error { - return f.addRuleGeneric(call, action, false, conds) -} - -// AddRuleConditionalExact adds a single rule for a conditional action on a -// syscall. -// No modifications will be made to the rule, and it will fail to add if it -// cannot be applied to the current architecture without modification. -// The rule will function exactly as described, but it may not function identically -// (or be able to be applied to) all architectures. -// Returns an error if an issue was encountered adding the rule. -// There is a bug in library versions below v2.2.1 which can, in some cases, -// cause conditions to be lost when more than one are used. Consequently, -// AddRuleConditionalExact is disabled on library versions lower than v2.2.1 -func (f *ScmpFilter) AddRuleConditionalExact(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error { - return f.addRuleGeneric(call, action, true, conds) -} - -// ExportPFC output PFC-formatted, human-readable dump of a filter context's -// rules to a file. -// Accepts file to write to (must be open for writing). -// Returns an error if writing to the file fails. -func (f *ScmpFilter) ExportPFC(file *os.File) error { - f.lock.Lock() - defer f.lock.Unlock() - - fd := file.Fd() - - if !f.valid { - return errBadFilter - } - - if retCode := C.seccomp_export_pfc(f.filterCtx, C.int(fd)); retCode != 0 { - return syscall.Errno(-1 * retCode) - } - - return nil -} - -// ExportBPF outputs Berkeley Packet Filter-formatted, kernel-readable dump of a -// filter context's rules to a file. -// Accepts file to write to (must be open for writing). -// Returns an error if writing to the file fails. -func (f *ScmpFilter) ExportBPF(file *os.File) error { - f.lock.Lock() - defer f.lock.Unlock() - - fd := file.Fd() - - if !f.valid { - return errBadFilter - } - - if retCode := C.seccomp_export_bpf(f.filterCtx, C.int(fd)); retCode != 0 { - return syscall.Errno(-1 * retCode) - } - - return nil -} diff --git a/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go b/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go deleted file mode 100644 index ab67a3dedc0c..000000000000 --- a/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go +++ /dev/null @@ -1,506 +0,0 @@ -// +build linux - -// Internal functions for libseccomp Go bindings -// No exported functions - -package seccomp - -import ( - "fmt" - "os" - "syscall" -) - -// Unexported C wrapping code - provides the C-Golang interface -// Get the seccomp header in scope -// Need stdlib.h for free() on cstrings - -// #cgo pkg-config: libseccomp -/* -#include -#include - -#if SCMP_VER_MAJOR < 2 -#error Minimum supported version of Libseccomp is v2.1.0 -#elif SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 1 -#error Minimum supported version of Libseccomp is v2.1.0 -#endif - -#define ARCH_BAD ~0 - -const uint32_t C_ARCH_BAD = ARCH_BAD; - -#ifndef SCMP_ARCH_AARCH64 -#define SCMP_ARCH_AARCH64 ARCH_BAD -#endif - -#ifndef SCMP_ARCH_MIPS -#define SCMP_ARCH_MIPS ARCH_BAD -#endif - -#ifndef SCMP_ARCH_MIPS64 -#define SCMP_ARCH_MIPS64 ARCH_BAD -#endif - -#ifndef SCMP_ARCH_MIPS64N32 -#define SCMP_ARCH_MIPS64N32 ARCH_BAD -#endif - -#ifndef SCMP_ARCH_MIPSEL -#define SCMP_ARCH_MIPSEL ARCH_BAD -#endif - -#ifndef SCMP_ARCH_MIPSEL64 -#define SCMP_ARCH_MIPSEL64 ARCH_BAD -#endif - -#ifndef SCMP_ARCH_MIPSEL64N32 -#define SCMP_ARCH_MIPSEL64N32 ARCH_BAD -#endif - -#ifndef SCMP_ARCH_PPC -#define SCMP_ARCH_PPC ARCH_BAD -#endif - -#ifndef SCMP_ARCH_PPC64 -#define SCMP_ARCH_PPC64 ARCH_BAD -#endif - -#ifndef SCMP_ARCH_PPC64LE -#define SCMP_ARCH_PPC64LE ARCH_BAD -#endif - -#ifndef SCMP_ARCH_S390 -#define SCMP_ARCH_S390 ARCH_BAD -#endif - -#ifndef SCMP_ARCH_S390X -#define SCMP_ARCH_S390X ARCH_BAD -#endif - -const uint32_t C_ARCH_NATIVE = SCMP_ARCH_NATIVE; -const uint32_t C_ARCH_X86 = SCMP_ARCH_X86; -const uint32_t C_ARCH_X86_64 = SCMP_ARCH_X86_64; -const uint32_t C_ARCH_X32 = SCMP_ARCH_X32; -const uint32_t C_ARCH_ARM = SCMP_ARCH_ARM; -const uint32_t C_ARCH_AARCH64 = SCMP_ARCH_AARCH64; -const uint32_t C_ARCH_MIPS = SCMP_ARCH_MIPS; -const uint32_t C_ARCH_MIPS64 = SCMP_ARCH_MIPS64; -const uint32_t C_ARCH_MIPS64N32 = SCMP_ARCH_MIPS64N32; -const uint32_t C_ARCH_MIPSEL = SCMP_ARCH_MIPSEL; -const uint32_t C_ARCH_MIPSEL64 = SCMP_ARCH_MIPSEL64; -const uint32_t C_ARCH_MIPSEL64N32 = SCMP_ARCH_MIPSEL64N32; -const uint32_t C_ARCH_PPC = SCMP_ARCH_PPC; -const uint32_t C_ARCH_PPC64 = SCMP_ARCH_PPC64; -const uint32_t C_ARCH_PPC64LE = SCMP_ARCH_PPC64LE; -const uint32_t C_ARCH_S390 = SCMP_ARCH_S390; -const uint32_t C_ARCH_S390X = SCMP_ARCH_S390X; - -const uint32_t C_ACT_KILL = SCMP_ACT_KILL; -const uint32_t C_ACT_TRAP = SCMP_ACT_TRAP; -const uint32_t C_ACT_ERRNO = SCMP_ACT_ERRNO(0); -const uint32_t C_ACT_TRACE = SCMP_ACT_TRACE(0); -const uint32_t C_ACT_ALLOW = SCMP_ACT_ALLOW; - -// If TSync is not supported, make sure it doesn't map to a supported filter attribute -// Don't worry about major version < 2, the minimum version checks should catch that case -#if SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 2 -#define SCMP_FLTATR_CTL_TSYNC _SCMP_CMP_MIN -#endif - -const uint32_t C_ATTRIBUTE_DEFAULT = (uint32_t)SCMP_FLTATR_ACT_DEFAULT; -const uint32_t C_ATTRIBUTE_BADARCH = (uint32_t)SCMP_FLTATR_ACT_BADARCH; -const uint32_t C_ATTRIBUTE_NNP = (uint32_t)SCMP_FLTATR_CTL_NNP; -const uint32_t C_ATTRIBUTE_TSYNC = (uint32_t)SCMP_FLTATR_CTL_TSYNC; - -const int C_CMP_NE = (int)SCMP_CMP_NE; -const int C_CMP_LT = (int)SCMP_CMP_LT; -const int C_CMP_LE = (int)SCMP_CMP_LE; -const int C_CMP_EQ = (int)SCMP_CMP_EQ; -const int C_CMP_GE = (int)SCMP_CMP_GE; -const int C_CMP_GT = (int)SCMP_CMP_GT; -const int C_CMP_MASKED_EQ = (int)SCMP_CMP_MASKED_EQ; - -const int C_VERSION_MAJOR = SCMP_VER_MAJOR; -const int C_VERSION_MINOR = SCMP_VER_MINOR; -const int C_VERSION_MICRO = SCMP_VER_MICRO; - -typedef struct scmp_arg_cmp* scmp_cast_t; - -// Wrapper to create an scmp_arg_cmp struct -void* -make_struct_arg_cmp( - unsigned int arg, - int compare, - uint64_t a, - uint64_t b - ) -{ - struct scmp_arg_cmp *s = malloc(sizeof(struct scmp_arg_cmp)); - - s->arg = arg; - s->op = compare; - s->datum_a = a; - s->datum_b = b; - - return s; -} -*/ -import "C" - -// Nonexported types -type scmpFilterAttr uint32 - -// Nonexported constants - -const ( - filterAttrActDefault scmpFilterAttr = iota - filterAttrActBadArch scmpFilterAttr = iota - filterAttrNNP scmpFilterAttr = iota - filterAttrTsync scmpFilterAttr = iota -) - -const ( - // An error return from certain libseccomp functions - scmpError C.int = -1 - // Comparison boundaries to check for architecture validity - archStart ScmpArch = ArchNative - archEnd ScmpArch = ArchS390X - // Comparison boundaries to check for action validity - actionStart ScmpAction = ActKill - actionEnd ScmpAction = ActAllow - // Comparison boundaries to check for comparison operator validity - compareOpStart ScmpCompareOp = CompareNotEqual - compareOpEnd ScmpCompareOp = CompareMaskedEqual -) - -var ( - // Error thrown on bad filter context - errBadFilter = fmt.Errorf("filter is invalid or uninitialized") - // Constants representing library major, minor, and micro versions - verMajor = int(C.C_VERSION_MAJOR) - verMinor = int(C.C_VERSION_MINOR) - verMicro = int(C.C_VERSION_MICRO) -) - -// Nonexported functions - -// Check if library version is greater than or equal to the given one -func checkVersionAbove(major, minor, micro int) bool { - return (verMajor > major) || - (verMajor == major && verMinor > minor) || - (verMajor == major && verMinor == minor && verMicro >= micro) -} - -// Init function: Verify library version is appropriate -func init() { - if !checkVersionAbove(2, 1, 0) { - fmt.Fprintf(os.Stderr, "Libseccomp version too low: minimum supported is 2.1.0, detected %d.%d.%d", C.C_VERSION_MAJOR, C.C_VERSION_MINOR, C.C_VERSION_MICRO) - os.Exit(-1) - } -} - -// Filter helpers - -// Filter finalizer - ensure that kernel context for filters is freed -func filterFinalizer(f *ScmpFilter) { - f.Release() -} - -// Get a raw filter attribute -func (f *ScmpFilter) getFilterAttr(attr scmpFilterAttr) (C.uint32_t, error) { - f.lock.Lock() - defer f.lock.Unlock() - - if !f.valid { - return 0x0, errBadFilter - } - - if !checkVersionAbove(2, 2, 0) && attr == filterAttrTsync { - return 0x0, fmt.Errorf("the thread synchronization attribute is not supported in this version of the library") - } - - var attribute C.uint32_t - - retCode := C.seccomp_attr_get(f.filterCtx, attr.toNative(), &attribute) - if retCode != 0 { - return 0x0, syscall.Errno(-1 * retCode) - } - - return attribute, nil -} - -// Set a raw filter attribute -func (f *ScmpFilter) setFilterAttr(attr scmpFilterAttr, value C.uint32_t) error { - f.lock.Lock() - defer f.lock.Unlock() - - if !f.valid { - return errBadFilter - } - - if !checkVersionAbove(2, 2, 0) && attr == filterAttrTsync { - return fmt.Errorf("the thread synchronization attribute is not supported in this version of the library") - } - - retCode := C.seccomp_attr_set(f.filterCtx, attr.toNative(), value) - if retCode != 0 { - return syscall.Errno(-1 * retCode) - } - - return nil -} - -// DOES NOT LOCK OR CHECK VALIDITY -// Assumes caller has already done this -// Wrapper for seccomp_rule_add_... functions -func (f *ScmpFilter) addRuleWrapper(call ScmpSyscall, action ScmpAction, exact bool, cond C.scmp_cast_t) error { - var length C.uint - if cond != nil { - length = 1 - } else { - length = 0 - } - - var retCode C.int - if exact { - retCode = C.seccomp_rule_add_exact_array(f.filterCtx, action.toNative(), C.int(call), length, cond) - } else { - retCode = C.seccomp_rule_add_array(f.filterCtx, action.toNative(), C.int(call), length, cond) - } - - if syscall.Errno(-1*retCode) == syscall.EFAULT { - return fmt.Errorf("unrecognized syscall") - } else if syscall.Errno(-1*retCode) == syscall.EPERM { - return fmt.Errorf("requested action matches default action of filter") - } else if retCode != 0 { - return syscall.Errno(-1 * retCode) - } - - return nil -} - -// Generic add function for filter rules -func (f *ScmpFilter) addRuleGeneric(call ScmpSyscall, action ScmpAction, exact bool, conds []ScmpCondition) error { - f.lock.Lock() - defer f.lock.Unlock() - - if !f.valid { - return errBadFilter - } - - if len(conds) == 0 { - if err := f.addRuleWrapper(call, action, exact, nil); err != nil { - return err - } - } else { - // We don't support conditional filtering in library version v2.1 - if !checkVersionAbove(2, 2, 1) { - return fmt.Errorf("conditional filtering requires libseccomp version >= 2.2.1") - } - - for _, cond := range conds { - cmpStruct := C.make_struct_arg_cmp(C.uint(cond.Argument), cond.Op.toNative(), C.uint64_t(cond.Operand1), C.uint64_t(cond.Operand2)) - defer C.free(cmpStruct) - - if err := f.addRuleWrapper(call, action, exact, C.scmp_cast_t(cmpStruct)); err != nil { - return err - } - } - } - - return nil -} - -// Generic Helpers - -// Helper - Sanitize Arch token input -func sanitizeArch(in ScmpArch) error { - if in < archStart || in > archEnd { - return fmt.Errorf("unrecognized architecture") - } - - if in.toNative() == C.C_ARCH_BAD { - return fmt.Errorf("architecture is not supported on this version of the library") - } - - return nil -} - -func sanitizeAction(in ScmpAction) error { - inTmp := in & 0x0000FFFF - if inTmp < actionStart || inTmp > actionEnd { - return fmt.Errorf("unrecognized action") - } - - if inTmp != ActTrace && inTmp != ActErrno && (in&0xFFFF0000) != 0 { - return fmt.Errorf("highest 16 bits must be zeroed except for Trace and Errno") - } - - return nil -} - -func sanitizeCompareOp(in ScmpCompareOp) error { - if in < compareOpStart || in > compareOpEnd { - return fmt.Errorf("unrecognized comparison operator") - } - - return nil -} - -func archFromNative(a C.uint32_t) (ScmpArch, error) { - switch a { - case C.C_ARCH_X86: - return ArchX86, nil - case C.C_ARCH_X86_64: - return ArchAMD64, nil - case C.C_ARCH_X32: - return ArchX32, nil - case C.C_ARCH_ARM: - return ArchARM, nil - case C.C_ARCH_NATIVE: - return ArchNative, nil - case C.C_ARCH_AARCH64: - return ArchARM64, nil - case C.C_ARCH_MIPS: - return ArchMIPS, nil - case C.C_ARCH_MIPS64: - return ArchMIPS64, nil - case C.C_ARCH_MIPS64N32: - return ArchMIPS64N32, nil - case C.C_ARCH_MIPSEL: - return ArchMIPSEL, nil - case C.C_ARCH_MIPSEL64: - return ArchMIPSEL64, nil - case C.C_ARCH_MIPSEL64N32: - return ArchMIPSEL64N32, nil - case C.C_ARCH_PPC: - return ArchPPC, nil - case C.C_ARCH_PPC64: - return ArchPPC64, nil - case C.C_ARCH_PPC64LE: - return ArchPPC64LE, nil - case C.C_ARCH_S390: - return ArchS390, nil - case C.C_ARCH_S390X: - return ArchS390X, nil - default: - return 0x0, fmt.Errorf("unrecognized architecture") - } -} - -// Only use with sanitized arches, no error handling -func (a ScmpArch) toNative() C.uint32_t { - switch a { - case ArchX86: - return C.C_ARCH_X86 - case ArchAMD64: - return C.C_ARCH_X86_64 - case ArchX32: - return C.C_ARCH_X32 - case ArchARM: - return C.C_ARCH_ARM - case ArchARM64: - return C.C_ARCH_AARCH64 - case ArchMIPS: - return C.C_ARCH_MIPS - case ArchMIPS64: - return C.C_ARCH_MIPS64 - case ArchMIPS64N32: - return C.C_ARCH_MIPS64N32 - case ArchMIPSEL: - return C.C_ARCH_MIPSEL - case ArchMIPSEL64: - return C.C_ARCH_MIPSEL64 - case ArchMIPSEL64N32: - return C.C_ARCH_MIPSEL64N32 - case ArchPPC: - return C.C_ARCH_PPC - case ArchPPC64: - return C.C_ARCH_PPC64 - case ArchPPC64LE: - return C.C_ARCH_PPC64LE - case ArchS390: - return C.C_ARCH_S390 - case ArchS390X: - return C.C_ARCH_S390X - case ArchNative: - return C.C_ARCH_NATIVE - default: - return 0x0 - } -} - -// Only use with sanitized ops, no error handling -func (a ScmpCompareOp) toNative() C.int { - switch a { - case CompareNotEqual: - return C.C_CMP_NE - case CompareLess: - return C.C_CMP_LT - case CompareLessOrEqual: - return C.C_CMP_LE - case CompareEqual: - return C.C_CMP_EQ - case CompareGreaterEqual: - return C.C_CMP_GE - case CompareGreater: - return C.C_CMP_GT - case CompareMaskedEqual: - return C.C_CMP_MASKED_EQ - default: - return 0x0 - } -} - -func actionFromNative(a C.uint32_t) (ScmpAction, error) { - aTmp := a & 0xFFFF - switch a & 0xFFFF0000 { - case C.C_ACT_KILL: - return ActKill, nil - case C.C_ACT_TRAP: - return ActTrap, nil - case C.C_ACT_ERRNO: - return ActErrno.SetReturnCode(int16(aTmp)), nil - case C.C_ACT_TRACE: - return ActTrace.SetReturnCode(int16(aTmp)), nil - case C.C_ACT_ALLOW: - return ActAllow, nil - default: - return 0x0, fmt.Errorf("unrecognized action") - } -} - -// Only use with sanitized actions, no error handling -func (a ScmpAction) toNative() C.uint32_t { - switch a & 0xFFFF { - case ActKill: - return C.C_ACT_KILL - case ActTrap: - return C.C_ACT_TRAP - case ActErrno: - return C.C_ACT_ERRNO | (C.uint32_t(a) >> 16) - case ActTrace: - return C.C_ACT_TRACE | (C.uint32_t(a) >> 16) - case ActAllow: - return C.C_ACT_ALLOW - default: - return 0x0 - } -} - -// Internal only, assumes safe attribute -func (a scmpFilterAttr) toNative() uint32 { - switch a { - case filterAttrActDefault: - return uint32(C.C_ATTRIBUTE_DEFAULT) - case filterAttrActBadArch: - return uint32(C.C_ATTRIBUTE_BADARCH) - case filterAttrNNP: - return uint32(C.C_ATTRIBUTE_NNP) - case filterAttrTsync: - return uint32(C.C_ATTRIBUTE_TSYNC) - default: - return 0x0 - } -}