From 72327724c01055e88fec5c3337b12041e97aed3a Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Wed, 4 Jul 2018 16:13:30 +0900 Subject: [PATCH] rootless: refactor libcontainer_specconv Signed-off-by: Akihiro Suda --- executor/runcexecutor/executor.go | 14 +- util/libcontainer_specconv/README.md | 1 - util/libcontainer_specconv/example.go | 190 ---- util/libcontainer_specconv/spec_linux_test.go | 190 ---- util/rootless/specconv/specconv_linux.go | 113 +++ util/rootless/specconv/specconv_linux_test.go | 42 + vendor.conf | 4 +- .../libcontainer/configs/validate/rootless.go | 116 --- .../configs/validate/validator.go | 212 ----- .../runc/libcontainer/intelrdt/intelrdt.go | 553 ----------- .../runc/libcontainer/intelrdt/stats.go | 24 - .../github.com/opencontainers/selinux/LICENSE | 201 ---- .../opencontainers/selinux/README.md | 7 - .../selinux/go-selinux/selinux.go | 688 -------------- .../selinux/go-selinux/xattrs.go | 78 -- .../seccomp/libseccomp-golang/LICENSE | 22 + .../seccomp/libseccomp-golang/README | 26 + .../seccomp/libseccomp-golang/seccomp.go | 857 ++++++++++++++++++ .../libseccomp-golang/seccomp_internal.go | 506 +++++++++++ 19 files changed, 1576 insertions(+), 2268 deletions(-) delete mode 100644 util/libcontainer_specconv/README.md delete mode 100644 util/libcontainer_specconv/example.go delete mode 100644 util/libcontainer_specconv/spec_linux_test.go create mode 100644 util/rootless/specconv/specconv_linux.go create mode 100644 util/rootless/specconv/specconv_linux_test.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go delete mode 100644 vendor/github.com/opencontainers/selinux/LICENSE delete mode 100644 vendor/github.com/opencontainers/selinux/README.md delete mode 100644 vendor/github.com/opencontainers/selinux/go-selinux/selinux.go delete mode 100644 vendor/github.com/opencontainers/selinux/go-selinux/xattrs.go create mode 100644 vendor/github.com/seccomp/libseccomp-golang/LICENSE create mode 100644 vendor/github.com/seccomp/libseccomp-golang/README create mode 100644 vendor/github.com/seccomp/libseccomp-golang/seccomp.go create mode 100644 vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go diff --git a/executor/runcexecutor/executor.go b/executor/runcexecutor/executor.go index 97eb3430a02f..5225d5fc564e 100644 --- a/executor/runcexecutor/executor.go +++ b/executor/runcexecutor/executor.go @@ -21,7 +21,7 @@ import ( "github.com/moby/buildkit/executor" "github.com/moby/buildkit/executor/oci" "github.com/moby/buildkit/identity" - "github.com/moby/buildkit/util/libcontainer_specconv" + rootlessspecconv "github.com/moby/buildkit/util/rootless/specconv" "github.com/moby/buildkit/util/system" "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" @@ -84,6 +84,8 @@ func New(opt Opt) (executor.Executor, error) { LogFormat: runc.JSON, PdeathSignal: syscall.SIGKILL, Setpgid: true, + // we don't execute runc with --rootless=(true|false) explicitly, + // so as to support non-runc runtimes } w := &runcExecutor{ @@ -169,13 +171,11 @@ func (w *runcExecutor) Exec(ctx context.Context, meta executor.Meta, root cache. return errors.Wrapf(err, "failed to create working directory %s", newp) } + if err := setOOMScoreAdj(spec); err != nil { + return err + } if w.rootless { - specconv.ToRootless(spec, nil) - // TODO(AkihiroSuda): keep Cgroups enabled if /sys/fs/cgroup/cpuset/buildkit exists and writable - spec.Linux.CgroupsPath = "" - // TODO(AkihiroSuda): ToRootless removes netns, but we should readd netns here - // if either SUID or userspace NAT is configured on the host. - if err := setOOMScoreAdj(spec); err != nil { + if err := rootlessspecconv.ToRootless(spec); err != nil { return err } } diff --git a/util/libcontainer_specconv/README.md b/util/libcontainer_specconv/README.md deleted file mode 100644 index 7b985bafbf2f..000000000000 --- a/util/libcontainer_specconv/README.md +++ /dev/null @@ -1 +0,0 @@ -Temporary forked from https://github.com/opencontainers/runc/pull/1692 diff --git a/util/libcontainer_specconv/example.go b/util/libcontainer_specconv/example.go deleted file mode 100644 index 352ef45537c1..000000000000 --- a/util/libcontainer_specconv/example.go +++ /dev/null @@ -1,190 +0,0 @@ -package specconv - -import ( - "os" - "sort" - "strings" - - "github.com/opencontainers/runc/libcontainer/system" - "github.com/opencontainers/runc/libcontainer/user" - "github.com/opencontainers/runtime-spec/specs-go" -) - -// RootlessOpts is an optional spec for ToRootless -type RootlessOpts struct { - // Add sub{u,g}id to spec.Linux.{U,G}IDMappings. - // Requires newuidmap(1) and newgidmap(1) with suid bit. - // Ignored when running in userns. - MapSubUIDGID bool -} - -// Run-time context for ToRootless. -type RootlessContext struct { - EUID uint32 - EGID uint32 - SubUIDs []user.SubID - SubGIDs []user.SubID - UIDMap []user.IDMap - GIDMap []user.IDMap - InUserNS bool -} - -// ToRootless converts the given spec file into one that should work with -// rootless containers, by removing incompatible options and adding others that -// are needed. -func ToRootless(spec *specs.Spec, opts *RootlessOpts) error { - var err error - ctx := RootlessContext{} - ctx.EUID = uint32(os.Geteuid()) - ctx.EGID = uint32(os.Getegid()) - ctx.SubUIDs, err = user.CurrentUserSubUIDs() - if err != nil && !os.IsNotExist(err) { - return err - } - ctx.SubGIDs, err = user.CurrentGroupSubGIDs() - if err != nil && !os.IsNotExist(err) { - return err - } - ctx.UIDMap, err = user.CurrentProcessUIDMap() - if err != nil && !os.IsNotExist(err) { - return err - } - uidMapExists := !os.IsNotExist(err) - ctx.GIDMap, err = user.CurrentProcessUIDMap() - if err != nil && !os.IsNotExist(err) { - return err - } - ctx.InUserNS = uidMapExists && system.UIDMapInUserNS(ctx.UIDMap) - return ToRootlessWithContext(ctx, spec, opts) -} - -// ToRootlessWithContext converts the spec with the run-time context. -// ctx can be internally modified for sorting. -func ToRootlessWithContext(ctx RootlessContext, spec *specs.Spec, opts *RootlessOpts) error { - if opts == nil { - opts = &RootlessOpts{} - } - var namespaces []specs.LinuxNamespace - - // Remove networkns from the spec. - for _, ns := range spec.Linux.Namespaces { - switch ns.Type { - case specs.NetworkNamespace, specs.UserNamespace: - // Do nothing. - default: - namespaces = append(namespaces, ns) - } - } - // Add userns to the spec. - namespaces = append(namespaces, specs.LinuxNamespace{ - Type: specs.UserNamespace, - }) - spec.Linux.Namespaces = namespaces - - // Add mappings for the current user. - if ctx.InUserNS { - uNextContainerID := int64(0) - sort.Sort(idmapSorter(ctx.UIDMap)) - for _, uidmap := range ctx.UIDMap { - spec.Linux.UIDMappings = append(spec.Linux.UIDMappings, - specs.LinuxIDMapping{ - HostID: uint32(uidmap.ID), - ContainerID: uint32(uNextContainerID), - Size: uint32(uidmap.Count), - }) - uNextContainerID += uidmap.Count - } - gNextContainerID := int64(0) - sort.Sort(idmapSorter(ctx.GIDMap)) - for _, gidmap := range ctx.GIDMap { - spec.Linux.GIDMappings = append(spec.Linux.GIDMappings, - specs.LinuxIDMapping{ - HostID: uint32(gidmap.ID), - ContainerID: uint32(gNextContainerID), - Size: uint32(gidmap.Count), - }) - gNextContainerID += gidmap.Count - } - // opts.MapSubUIDGID is ignored in userns - } else { - spec.Linux.UIDMappings = []specs.LinuxIDMapping{{ - HostID: ctx.EUID, - ContainerID: 0, - Size: 1, - }} - spec.Linux.GIDMappings = []specs.LinuxIDMapping{{ - HostID: ctx.EGID, - ContainerID: 0, - Size: 1, - }} - if opts.MapSubUIDGID { - uNextContainerID := int64(1) - sort.Sort(subIDSorter(ctx.SubUIDs)) - for _, subuid := range ctx.SubUIDs { - spec.Linux.UIDMappings = append(spec.Linux.UIDMappings, - specs.LinuxIDMapping{ - HostID: uint32(subuid.SubID), - ContainerID: uint32(uNextContainerID), - Size: uint32(subuid.Count), - }) - uNextContainerID += subuid.Count - } - gNextContainerID := int64(1) - sort.Sort(subIDSorter(ctx.SubGIDs)) - for _, subgid := range ctx.SubGIDs { - spec.Linux.GIDMappings = append(spec.Linux.GIDMappings, - specs.LinuxIDMapping{ - HostID: uint32(subgid.SubID), - ContainerID: uint32(gNextContainerID), - Size: uint32(subgid.Count), - }) - gNextContainerID += subgid.Count - } - } - } - - // Fix up mounts. - var mounts []specs.Mount - for _, mount := range spec.Mounts { - // Ignore all mounts that are under /sys. - if strings.HasPrefix(mount.Destination, "/sys") { - continue - } - - // Remove all gid= and uid= mappings. - var options []string - for _, option := range mount.Options { - if !strings.HasPrefix(option, "gid=") && !strings.HasPrefix(option, "uid=") { - options = append(options, option) - } - } - - mount.Options = options - mounts = append(mounts, mount) - } - // Add the sysfs mount as an rbind. - mounts = append(mounts, specs.Mount{ - Source: "/sys", - Destination: "/sys", - Type: "none", - Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"}, - }) - spec.Mounts = mounts - - // Remove cgroup settings. - spec.Linux.Resources = nil - return nil -} - -// subIDSorter is required for Go <= 1.7 -type subIDSorter []user.SubID - -func (x subIDSorter) Len() int { return len(x) } -func (x subIDSorter) Swap(i, j int) { x[i], x[j] = x[j], x[i] } -func (x subIDSorter) Less(i, j int) bool { return x[i].SubID < x[j].SubID } - -type idmapSorter []user.IDMap - -func (x idmapSorter) Len() int { return len(x) } -func (x idmapSorter) Swap(i, j int) { x[i], x[j] = x[j], x[i] } -func (x idmapSorter) Less(i, j int) bool { return x[i].ID < x[j].ID } diff --git a/util/libcontainer_specconv/spec_linux_test.go b/util/libcontainer_specconv/spec_linux_test.go deleted file mode 100644 index 116112b0df65..000000000000 --- a/util/libcontainer_specconv/spec_linux_test.go +++ /dev/null @@ -1,190 +0,0 @@ -// +build linux - -package specconv - -import ( - "reflect" - "testing" - - "github.com/opencontainers/runc/libcontainer/configs/validate" - "github.com/opencontainers/runc/libcontainer/specconv" - "github.com/opencontainers/runc/libcontainer/user" - "github.com/opencontainers/runtime-spec/specs-go" -) - -func TestRootlessSpecconvValidate(t *testing.T) { - specGen := func() *specs.Spec { - spec := specconv.Example() - spec.Root.Path = "/" - return spec - } - cases := []struct { - ctx RootlessContext - opts *RootlessOpts - additionalValidator func(t *testing.T, s *specs.Spec) - }{ - { - ctx: RootlessContext{ - EUID: 0, - EGID: 0, - }, - }, - { - ctx: RootlessContext{ - EUID: 4242, - EGID: 4242, - }, - }, - { - ctx: RootlessContext{ - EUID: 4242, - EGID: 4242, - // empty subuid / subgid - }, - opts: &RootlessOpts{ - MapSubUIDGID: true, - }, - }, - { - ctx: RootlessContext{ - EUID: 4242, - EGID: 4242, - SubUIDs: []user.SubID{ - { - Name: "dummy", - SubID: 14242, - Count: 65536, - }, - { - Name: "dummy", - SubID: 114242, - Count: 65536, - }, - }, - SubGIDs: []user.SubID{ - { - Name: "dummy", - SubID: 14242, - Count: 65536, - }, - { - Name: "dummy", - SubID: 114242, - Count: 65536, - }, - }, - }, - opts: &RootlessOpts{ - MapSubUIDGID: true, - }, - additionalValidator: func(t *testing.T, s *specs.Spec) { - expectedUIDMappings := []specs.LinuxIDMapping{ - { - HostID: 4242, - ContainerID: 0, - Size: 1, - }, - { - HostID: 14242, - ContainerID: 1, - Size: 65536, - }, - { - HostID: 114242, - ContainerID: 65537, - Size: 65536, - }, - } - if !reflect.DeepEqual(expectedUIDMappings, s.Linux.UIDMappings) { - t.Errorf("expected %#v, got %#v", expectedUIDMappings, s.Linux.UIDMappings) - } - expectedGIDMappings := expectedUIDMappings - if !reflect.DeepEqual(expectedGIDMappings, s.Linux.GIDMappings) { - t.Errorf("expected %#v, got %#v", expectedGIDMappings, s.Linux.GIDMappings) - } - }, - }, - { - ctx: RootlessContext{ - EUID: 0, - EGID: 0, - UIDMap: []user.IDMap{ - { - ID: 0, - ParentID: 4242, - Count: 1, - }, - { - ID: 1, - ParentID: 231072, - Count: 65536, - }, - }, - GIDMap: []user.IDMap{ - { - ID: 0, - ParentID: 4242, - Count: 1, - }, - { - ID: 1, - ParentID: 231072, - Count: 65536, - }, - }, - InUserNS: true, - }, - additionalValidator: func(t *testing.T, s *specs.Spec) { - expectedUIDMappings := []specs.LinuxIDMapping{ - { - HostID: 0, - ContainerID: 0, - Size: 1, - }, - { - HostID: 1, - ContainerID: 1, - Size: 65536, - }, - } - if !reflect.DeepEqual(expectedUIDMappings, s.Linux.UIDMappings) { - t.Errorf("expected %#v, got %#v", expectedUIDMappings, s.Linux.UIDMappings) - } - expectedGIDMappings := expectedUIDMappings - if !reflect.DeepEqual(expectedGIDMappings, s.Linux.GIDMappings) { - t.Errorf("expected %#v, got %#v", expectedGIDMappings, s.Linux.GIDMappings) - } - }, - }, - } - - for _, c := range cases { - spec := specGen() - err := ToRootlessWithContext(c.ctx, spec, c.opts) - if err != nil { - t.Errorf("Couldn't convert a rootful spec to rootless: %v", err) - } - - // t.Logf("%#v", spec) - if c.additionalValidator != nil { - c.additionalValidator(t, spec) - } - - opts := &specconv.CreateOpts{ - CgroupName: "ContainerID", - UseSystemdCgroup: false, - Spec: spec, - Rootless: true, - } - - config, err := specconv.CreateLibcontainerConfig(opts) - if err != nil { - t.Errorf("Couldn't create libcontainer config: %v", err) - } - - validator := validate.New() - if err := validator.Validate(config); err != nil { - t.Errorf("Expected specconv to produce valid rootless container config: %v", err) - } - } -} diff --git a/util/rootless/specconv/specconv_linux.go b/util/rootless/specconv/specconv_linux.go new file mode 100644 index 000000000000..1aa8d2717be3 --- /dev/null +++ b/util/rootless/specconv/specconv_linux.go @@ -0,0 +1,113 @@ +package specconv + +import ( + "os" + "sort" + "strings" + + "github.com/opencontainers/runc/libcontainer/system" + "github.com/opencontainers/runc/libcontainer/user" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" +) + +// ToRootless converts spec to be compatible with "rootless" runc. +// * Adds userns (Note: since we are already in userns, ideally we should not need to do this. runc-side issue is tracked at https://github.com/opencontainers/runc/issues/1837) +// * Fix up mount flags (same as above) +// * Replace /sys with bind-mount (FIXME: we don't need to do this if netns is unshared) +func ToRootless(spec *specs.Spec) error { + if !system.RunningInUserNS() { + return errors.New("needs to be in user namespace") + } + uidMap, err := user.CurrentProcessUIDMap() + if err != nil && !os.IsNotExist(err) { + return err + } + gidMap, err := user.CurrentProcessUIDMap() + if err != nil && !os.IsNotExist(err) { + return err + } + return toRootless(spec, uidMap, gidMap) +} + +// toRootless was forked from github.com/opencontainers/runc/libcontainer/specconv +func toRootless(spec *specs.Spec, uidMap, gidMap []user.IDMap) error { + if err := configureUserNS(spec, uidMap, gidMap); err != nil { + return err + } + if err := configureMounts(spec); err != nil { + return err + } + + // Remove cgroup settings. + spec.Linux.Resources = nil + spec.Linux.CgroupsPath = "" + return nil +} + +// configureUserNS add suserns and the current ID map to the spec. +// Since we are already in userns, ideally we should not need to add userns. +// However, currently rootless runc always requires userns to be added. +// https://github.com/opencontainers/runc/issues/1837 +func configureUserNS(spec *specs.Spec, uidMap, gidMap []user.IDMap) error { + spec.Linux.Namespaces = append(spec.Linux.Namespaces, specs.LinuxNamespace{ + Type: specs.UserNamespace, + }) + + sort.Slice(uidMap, func(i, j int) bool { return uidMap[i].ID < uidMap[j].ID }) + uNextContainerID := int64(0) + for _, u := range uidMap { + spec.Linux.UIDMappings = append(spec.Linux.UIDMappings, + specs.LinuxIDMapping{ + HostID: uint32(u.ID), + ContainerID: uint32(uNextContainerID), + Size: uint32(u.Count), + }) + uNextContainerID += u.Count + } + sort.Slice(gidMap, func(i, j int) bool { return gidMap[i].ID < gidMap[j].ID }) + gNextContainerID := int64(0) + for _, g := range gidMap { + spec.Linux.GIDMappings = append(spec.Linux.GIDMappings, + specs.LinuxIDMapping{ + HostID: uint32(g.ID), + ContainerID: uint32(gNextContainerID), + Size: uint32(g.Count), + }) + gNextContainerID += g.Count + } + return nil +} + +func configureMounts(spec *specs.Spec) error { + var mounts []specs.Mount + for _, mount := range spec.Mounts { + // Ignore all mounts that are under /sys, because we add /sys later. + if strings.HasPrefix(mount.Destination, "/sys") { + continue + } + + // Remove all gid= and uid= mappings. + // Since we are already in userns, ideally we should not need to do this. + // https://github.com/opencontainers/runc/issues/1837 + var options []string + for _, option := range mount.Options { + if !strings.HasPrefix(option, "gid=") && !strings.HasPrefix(option, "uid=") { + options = append(options, option) + } + } + mount.Options = options + mounts = append(mounts, mount) + } + + // Add the sysfs mount as an rbind, because we can't mount /sys unless we have netns. + // TODO: keep original /sys mount when we have netns. + mounts = append(mounts, specs.Mount{ + Source: "/sys", + Destination: "/sys", + Type: "none", + Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"}, + }) + spec.Mounts = mounts + return nil +} diff --git a/util/rootless/specconv/specconv_linux_test.go b/util/rootless/specconv/specconv_linux_test.go new file mode 100644 index 000000000000..03b2041d553b --- /dev/null +++ b/util/rootless/specconv/specconv_linux_test.go @@ -0,0 +1,42 @@ +package specconv + +import ( + "testing" + + "github.com/opencontainers/runc/libcontainer/specconv" + "github.com/opencontainers/runc/libcontainer/user" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/stretchr/testify/require" +) + +func TestToRootless(t *testing.T) { + spec := specconv.Example() + uidMap := []user.IDMap{ + { + ID: 0, + ParentID: 4242, + Count: 1, + }, + { + ID: 1, + ParentID: 231072, + Count: 65536, + }, + } + gidMap := uidMap + expectedUIDMappings := []specs.LinuxIDMapping{ + { + HostID: 0, + ContainerID: 0, + Size: 1, + }, + { + HostID: 1, + ContainerID: 1, + Size: 65536, + }, + } + err := toRootless(spec, uidMap, gidMap) + require.NoError(t, err) + require.EqualValues(t, expectedUIDMappings, spec.Linux.UIDMappings) +} diff --git a/vendor.conf b/vendor.conf index 26d6a3b6d5a2..ea11bb041fc6 100644 --- a/vendor.conf +++ b/vendor.conf @@ -60,8 +60,10 @@ github.com/uber/jaeger-lib c48167d9cae5887393dd5e61efd06a4a48b7fbb3 github.com/codahale/hdrhistogram f8ad88b59a584afeee9d334eff879b104439117b github.com/opentracing-contrib/go-stdlib b1a47cfbdd7543e70e9ef3e73d0802ad306cc1cc -github.com/opencontainers/selinux 74a747aeaf2d66097b6908f572794f49f07dda2c # used by dockerfile tests gotest.tools v2.1.0 github.com/google/go-cmp v0.2.0 + +# used by rootless spec conv test +github.com/seccomp/libseccomp-golang 32f571b70023028bd57d9288c20efbcb237f3ce0 diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go deleted file mode 100644 index 8c3954ce2511..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go +++ /dev/null @@ -1,116 +0,0 @@ -package validate - -import ( - "fmt" - "os" - "reflect" - "strings" - - "github.com/opencontainers/runc/libcontainer/configs" -) - -var ( - geteuid = os.Geteuid - getegid = os.Getegid -) - -func (v *ConfigValidator) rootless(config *configs.Config) error { - if err := rootlessMappings(config); err != nil { - return err - } - if err := rootlessMount(config); err != nil { - return err - } - - // XXX: We currently can't verify the user config at all, because - // configs.Config doesn't store the user-related configs. So this - // has to be verified by setupUser() in init_linux.go. - - return nil -} - -func hasIDMapping(id int, mappings []configs.IDMap) bool { - for _, m := range mappings { - if id >= m.ContainerID && id < m.ContainerID+m.Size { - return true - } - } - return false -} - -func rootlessMappings(config *configs.Config) error { - if euid := geteuid(); euid != 0 { - if !config.Namespaces.Contains(configs.NEWUSER) { - return fmt.Errorf("rootless containers require user namespaces") - } - if len(config.UidMappings) == 0 { - return fmt.Errorf("rootless containers requires at least one UID mapping") - } - if len(config.GidMappings) == 0 { - return fmt.Errorf("rootless containers requires at least one GID mapping") - } - } - - return nil -} - -// cgroup verifies that the user isn't trying to set any cgroup limits or paths. -func rootlessCgroup(config *configs.Config) error { - // Nothing set at all. - if config.Cgroups == nil || config.Cgroups.Resources == nil { - return nil - } - - // Used for comparing to the zero value. - left := reflect.ValueOf(*config.Cgroups.Resources) - right := reflect.Zero(left.Type()) - - // This is all we need to do, since specconv won't add cgroup options in - // rootless mode. - if !reflect.DeepEqual(left.Interface(), right.Interface()) { - return fmt.Errorf("cannot specify resource limits in rootless container") - } - - return nil -} - -// mount verifies that the user isn't trying to set up any mounts they don't have -// the rights to do. In addition, it makes sure that no mount has a `uid=` or -// `gid=` option that doesn't resolve to root. -func rootlessMount(config *configs.Config) error { - // XXX: We could whitelist allowed devices at this point, but I'm not - // convinced that's a good idea. The kernel is the best arbiter of - // access control. - - for _, mount := range config.Mounts { - // Check that the options list doesn't contain any uid= or gid= entries - // that don't resolve to root. - for _, opt := range strings.Split(mount.Data, ",") { - if strings.HasPrefix(opt, "uid=") { - var uid int - n, err := fmt.Sscanf(opt, "uid=%d", &uid) - if n != 1 || err != nil { - // Ignore unknown mount options. - continue - } - if !hasIDMapping(uid, config.UidMappings) { - return fmt.Errorf("cannot specify uid= mount options for unmapped uid in rootless containers") - } - } - - if strings.HasPrefix(opt, "gid=") { - var gid int - n, err := fmt.Sscanf(opt, "gid=%d", &gid) - if n != 1 || err != nil { - // Ignore unknown mount options. - continue - } - if !hasIDMapping(gid, config.GidMappings) { - return fmt.Errorf("cannot specify gid= mount options for unmapped gid in rootless containers") - } - } - } - } - - return nil -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go deleted file mode 100644 index cbbba9a03a20..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go +++ /dev/null @@ -1,212 +0,0 @@ -package validate - -import ( - "fmt" - "os" - "path/filepath" - "strings" - - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/intelrdt" - selinux "github.com/opencontainers/selinux/go-selinux" -) - -type Validator interface { - Validate(*configs.Config) error -} - -func New() Validator { - return &ConfigValidator{} -} - -type ConfigValidator struct { -} - -func (v *ConfigValidator) Validate(config *configs.Config) error { - if err := v.rootfs(config); err != nil { - return err - } - if err := v.network(config); err != nil { - return err - } - if err := v.hostname(config); err != nil { - return err - } - if err := v.security(config); err != nil { - return err - } - if err := v.usernamespace(config); err != nil { - return err - } - if err := v.sysctl(config); err != nil { - return err - } - if err := v.intelrdt(config); err != nil { - return err - } - if config.Rootless { - if err := v.rootless(config); err != nil { - return err - } - } - return nil -} - -// rootfs validates if the rootfs is an absolute path and is not a symlink -// to the container's root filesystem. -func (v *ConfigValidator) rootfs(config *configs.Config) error { - if _, err := os.Stat(config.Rootfs); err != nil { - if os.IsNotExist(err) { - return fmt.Errorf("rootfs (%s) does not exist", config.Rootfs) - } - return err - } - cleaned, err := filepath.Abs(config.Rootfs) - if err != nil { - return err - } - if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil { - return err - } - if filepath.Clean(config.Rootfs) != cleaned { - return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs) - } - return nil -} - -func (v *ConfigValidator) network(config *configs.Config) error { - if !config.Namespaces.Contains(configs.NEWNET) { - if len(config.Networks) > 0 || len(config.Routes) > 0 { - return fmt.Errorf("unable to apply network settings without a private NET namespace") - } - } - return nil -} - -func (v *ConfigValidator) hostname(config *configs.Config) error { - if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) { - return fmt.Errorf("unable to set hostname without a private UTS namespace") - } - return nil -} - -func (v *ConfigValidator) security(config *configs.Config) error { - // restrict sys without mount namespace - if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) && - !config.Namespaces.Contains(configs.NEWNS) { - return fmt.Errorf("unable to restrict sys entries without a private MNT namespace") - } - if config.ProcessLabel != "" && !selinux.GetEnabled() { - return fmt.Errorf("selinux label is specified in config, but selinux is disabled or not supported") - } - - return nil -} - -func (v *ConfigValidator) usernamespace(config *configs.Config) error { - if config.Namespaces.Contains(configs.NEWUSER) { - if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { - return fmt.Errorf("USER namespaces aren't enabled in the kernel") - } - } else { - if config.UidMappings != nil || config.GidMappings != nil { - return fmt.Errorf("User namespace mappings specified, but USER namespace isn't enabled in the config") - } - } - return nil -} - -// sysctl validates that the specified sysctl keys are valid or not. -// /proc/sys isn't completely namespaced and depending on which namespaces -// are specified, a subset of sysctls are permitted. -func (v *ConfigValidator) sysctl(config *configs.Config) error { - validSysctlMap := map[string]bool{ - "kernel.msgmax": true, - "kernel.msgmnb": true, - "kernel.msgmni": true, - "kernel.sem": true, - "kernel.shmall": true, - "kernel.shmmax": true, - "kernel.shmmni": true, - "kernel.shm_rmid_forced": true, - } - - for s := range config.Sysctl { - if validSysctlMap[s] || strings.HasPrefix(s, "fs.mqueue.") { - if config.Namespaces.Contains(configs.NEWIPC) { - continue - } else { - return fmt.Errorf("sysctl %q is not allowed in the hosts ipc namespace", s) - } - } - if strings.HasPrefix(s, "net.") { - if config.Namespaces.Contains(configs.NEWNET) { - if path := config.Namespaces.PathOf(configs.NEWNET); path != "" { - if err := checkHostNs(s, path); err != nil { - return err - } - } - continue - } else { - return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", s) - } - } - return fmt.Errorf("sysctl %q is not in a separate kernel namespace", s) - } - - return nil -} - -func (v *ConfigValidator) intelrdt(config *configs.Config) error { - if config.IntelRdt != nil { - if !intelrdt.IsEnabled() { - return fmt.Errorf("intelRdt is specified in config, but Intel RDT feature is not supported or enabled") - } - if config.IntelRdt.L3CacheSchema == "" { - return fmt.Errorf("intelRdt is specified in config, but intelRdt.l3CacheSchema is empty") - } - } - - return nil -} - -func isSymbolicLink(path string) (bool, error) { - fi, err := os.Lstat(path) - if err != nil { - return false, err - } - - return fi.Mode()&os.ModeSymlink == os.ModeSymlink, nil -} - -// checkHostNs checks whether network sysctl is used in host namespace. -func checkHostNs(sysctlConfig string, path string) error { - var currentProcessNetns = "/proc/self/ns/net" - // readlink on the current processes network namespace - destOfCurrentProcess, err := os.Readlink(currentProcessNetns) - if err != nil { - return fmt.Errorf("read soft link %q error", currentProcessNetns) - } - - // First check if the provided path is a symbolic link - symLink, err := isSymbolicLink(path) - if err != nil { - return fmt.Errorf("could not check that %q is a symlink: %v", path, err) - } - - if symLink == false { - // The provided namespace is not a symbolic link, - // it is not the host namespace. - return nil - } - - // readlink on the path provided in the struct - destOfContainer, err := os.Readlink(path) - if err != nil { - return fmt.Errorf("read soft link %q error", path) - } - if destOfContainer == destOfCurrentProcess { - return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", sysctlConfig) - } - return nil -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go deleted file mode 100644 index 487c630af61c..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go +++ /dev/null @@ -1,553 +0,0 @@ -// +build linux - -package intelrdt - -import ( - "bufio" - "fmt" - "io/ioutil" - "os" - "path/filepath" - "strconv" - "strings" - "sync" - - "github.com/opencontainers/runc/libcontainer/configs" -) - -/* - * About Intel RDT/CAT feature: - * Intel platforms with new Xeon CPU support Resource Director Technology (RDT). - * Intel Cache Allocation Technology (CAT) is a sub-feature of RDT. Currently L3 - * Cache is the only resource that is supported in RDT. - * - * This feature provides a way for the software to restrict cache allocation to a - * defined 'subset' of L3 cache which may be overlapping with other 'subsets'. - * The different subsets are identified by class of service (CLOS) and each CLOS - * has a capacity bitmask (CBM). - * - * For more information about Intel RDT/CAT can be found in the section 17.17 - * of Intel Software Developer Manual. - * - * About Intel RDT/CAT kernel interface: - * In Linux 4.10 kernel or newer, the interface is defined and exposed via - * "resource control" filesystem, which is a "cgroup-like" interface. - * - * Comparing with cgroups, it has similar process management lifecycle and - * interfaces in a container. But unlike cgroups' hierarchy, it has single level - * filesystem layout. - * - * Intel RDT "resource control" filesystem hierarchy: - * mount -t resctrl resctrl /sys/fs/resctrl - * tree /sys/fs/resctrl - * /sys/fs/resctrl/ - * |-- info - * | |-- L3 - * | |-- cbm_mask - * | |-- min_cbm_bits - * | |-- num_closids - * |-- cpus - * |-- schemata - * |-- tasks - * |-- - * |-- cpus - * |-- schemata - * |-- tasks - * - * For runc, we can make use of `tasks` and `schemata` configuration for L3 cache - * resource constraints. - * - * The file `tasks` has a list of tasks that belongs to this group (e.g., - * " group). Tasks can be added to a group by writing the task ID - * to the "tasks" file (which will automatically remove them from the previous - * group to which they belonged). New tasks created by fork(2) and clone(2) are - * added to the same group as their parent. If a pid is not in any sub group, it is - * in root group. - * - * The file `schemata` has allocation bitmasks/values for L3 cache on each socket, - * which contains L3 cache id and capacity bitmask (CBM). - * Format: "L3:=;=;..." - * For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0` - * which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. - * - * The valid L3 cache CBM is a *contiguous bits set* and number of bits that can - * be set is less than the max bit. The max bits in the CBM is varied among - * supported Intel Xeon platforms. In Intel RDT "resource control" filesystem - * layout, the CBM in a group should be a subset of the CBM in root. Kernel will - * check if it is valid when writing. e.g., 0xfffff in root indicates the max bits - * of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM - * values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. - * - * For more information about Intel RDT/CAT kernel interface: - * https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt - * - * An example for runc: - * Consider a two-socket machine with two L3 caches where the default CBM is - * 0xfffff and the max CBM length is 20 bits. With this configuration, tasks - * inside the container only have access to the "upper" 80% of L3 cache id 0 and - * the "lower" 50% L3 cache id 1: - * - * "linux": { - * "intelRdt": { - * "l3CacheSchema": "L3:0=ffff0;1=3ff" - * } - * } - */ - -type Manager interface { - // Applies Intel RDT configuration to the process with the specified pid - Apply(pid int) error - - // Returns statistics for Intel RDT - GetStats() (*Stats, error) - - // Destroys the Intel RDT 'container_id' group - Destroy() error - - // Returns Intel RDT path to save in a state file and to be able to - // restore the object later - GetPath() string - - // Set Intel RDT "resource control" filesystem as configured. - Set(container *configs.Config) error -} - -// This implements interface Manager -type IntelRdtManager struct { - mu sync.Mutex - Config *configs.Config - Id string - Path string -} - -const ( - IntelRdtTasks = "tasks" -) - -var ( - // The absolute root path of the Intel RDT "resource control" filesystem - intelRdtRoot string - intelRdtRootLock sync.Mutex - - // The flag to indicate if Intel RDT is supported - isEnabled bool -) - -type intelRdtData struct { - root string - config *configs.Config - pid int -} - -// Check if Intel RDT is enabled in init() -func init() { - // 1. Check if hardware and kernel support Intel RDT/CAT feature - // "cat_l3" flag is set if supported - isFlagSet, err := parseCpuInfoFile("/proc/cpuinfo") - if !isFlagSet || err != nil { - isEnabled = false - return - } - - // 2. Check if Intel RDT "resource control" filesystem is mounted - // The user guarantees to mount the filesystem - isEnabled = isIntelRdtMounted() -} - -// Return the mount point path of Intel RDT "resource control" filesysem -func findIntelRdtMountpointDir() (string, error) { - f, err := os.Open("/proc/self/mountinfo") - if err != nil { - return "", err - } - defer f.Close() - - s := bufio.NewScanner(f) - for s.Scan() { - text := s.Text() - fields := strings.Split(text, " ") - // Safe as mountinfo encodes mountpoints with spaces as \040. - index := strings.Index(text, " - ") - postSeparatorFields := strings.Fields(text[index+3:]) - numPostFields := len(postSeparatorFields) - - // This is an error as we can't detect if the mount is for "Intel RDT" - if numPostFields == 0 { - return "", fmt.Errorf("Found no fields post '-' in %q", text) - } - - if postSeparatorFields[0] == "resctrl" { - // Check that the mount is properly formated. - if numPostFields < 3 { - return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text) - } - - return fields[4], nil - } - } - if err := s.Err(); err != nil { - return "", err - } - - return "", NewNotFoundError("Intel RDT") -} - -// Gets the root path of Intel RDT "resource control" filesystem -func getIntelRdtRoot() (string, error) { - intelRdtRootLock.Lock() - defer intelRdtRootLock.Unlock() - - if intelRdtRoot != "" { - return intelRdtRoot, nil - } - - root, err := findIntelRdtMountpointDir() - if err != nil { - return "", err - } - - if _, err := os.Stat(root); err != nil { - return "", err - } - - intelRdtRoot = root - return intelRdtRoot, nil -} - -func isIntelRdtMounted() bool { - _, err := getIntelRdtRoot() - if err != nil { - return false - } - - return true -} - -func parseCpuInfoFile(path string) (bool, error) { - f, err := os.Open(path) - if err != nil { - return false, err - } - defer f.Close() - - s := bufio.NewScanner(f) - for s.Scan() { - if err := s.Err(); err != nil { - return false, err - } - - text := s.Text() - flags := strings.Split(text, " ") - - // "cat_l3" flag is set if Intel RDT/CAT is supported - for _, flag := range flags { - if flag == "cat_l3" { - return true, nil - } - } - } - return false, nil -} - -func parseUint(s string, base, bitSize int) (uint64, error) { - value, err := strconv.ParseUint(s, base, bitSize) - if err != nil { - intValue, intErr := strconv.ParseInt(s, base, bitSize) - // 1. Handle negative values greater than MinInt64 (and) - // 2. Handle negative values lesser than MinInt64 - if intErr == nil && intValue < 0 { - return 0, nil - } else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 { - return 0, nil - } - - return value, err - } - - return value, nil -} - -// Gets a single uint64 value from the specified file. -func getIntelRdtParamUint(path, file string) (uint64, error) { - fileName := filepath.Join(path, file) - contents, err := ioutil.ReadFile(fileName) - if err != nil { - return 0, err - } - - res, err := parseUint(strings.TrimSpace(string(contents)), 10, 64) - if err != nil { - return res, fmt.Errorf("unable to parse %q as a uint from file %q", string(contents), fileName) - } - return res, nil -} - -// Gets a string value from the specified file -func getIntelRdtParamString(path, file string) (string, error) { - contents, err := ioutil.ReadFile(filepath.Join(path, file)) - if err != nil { - return "", err - } - - return strings.TrimSpace(string(contents)), nil -} - -func readTasksFile(dir string) ([]int, error) { - f, err := os.Open(filepath.Join(dir, IntelRdtTasks)) - if err != nil { - return nil, err - } - defer f.Close() - - var ( - s = bufio.NewScanner(f) - out = []int{} - ) - - for s.Scan() { - if t := s.Text(); t != "" { - pid, err := strconv.Atoi(t) - if err != nil { - return nil, err - } - out = append(out, pid) - } - } - return out, nil -} - -func writeFile(dir, file, data string) error { - if dir == "" { - return fmt.Errorf("no such directory for %s", file) - } - if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data+"\n"), 0700); err != nil { - return fmt.Errorf("failed to write %v to %v: %v", data, file, err) - } - return nil -} - -func getIntelRdtData(c *configs.Config, pid int) (*intelRdtData, error) { - rootPath, err := getIntelRdtRoot() - if err != nil { - return nil, err - } - return &intelRdtData{ - root: rootPath, - config: c, - pid: pid, - }, nil -} - -// Get the read-only L3 cache information -func getL3CacheInfo() (*L3CacheInfo, error) { - l3CacheInfo := &L3CacheInfo{} - - rootPath, err := getIntelRdtRoot() - if err != nil { - return l3CacheInfo, err - } - - path := filepath.Join(rootPath, "info", "L3") - cbmMask, err := getIntelRdtParamString(path, "cbm_mask") - if err != nil { - return l3CacheInfo, err - } - minCbmBits, err := getIntelRdtParamUint(path, "min_cbm_bits") - if err != nil { - return l3CacheInfo, err - } - numClosids, err := getIntelRdtParamUint(path, "num_closids") - if err != nil { - return l3CacheInfo, err - } - - l3CacheInfo.CbmMask = cbmMask - l3CacheInfo.MinCbmBits = minCbmBits - l3CacheInfo.NumClosids = numClosids - - return l3CacheInfo, nil -} - -// WriteIntelRdtTasks writes the specified pid into the "tasks" file -func WriteIntelRdtTasks(dir string, pid int) error { - if dir == "" { - return fmt.Errorf("no such directory for %s", IntelRdtTasks) - } - - // Dont attach any pid if -1 is specified as a pid - if pid != -1 { - if err := ioutil.WriteFile(filepath.Join(dir, IntelRdtTasks), []byte(strconv.Itoa(pid)), 0700); err != nil { - return fmt.Errorf("failed to write %v to %v: %v", pid, IntelRdtTasks, err) - } - } - return nil -} - -// Check if Intel RDT is enabled -func IsEnabled() bool { - return isEnabled -} - -// Get the 'container_id' path in Intel RDT "resource control" filesystem -func GetIntelRdtPath(id string) (string, error) { - rootPath, err := getIntelRdtRoot() - if err != nil { - return "", err - } - - path := filepath.Join(rootPath, id) - return path, nil -} - -// Applies Intel RDT configuration to the process with the specified pid -func (m *IntelRdtManager) Apply(pid int) (err error) { - // If intelRdt is not specified in config, we do nothing - if m.Config.IntelRdt == nil { - return nil - } - d, err := getIntelRdtData(m.Config, pid) - if err != nil && !IsNotFound(err) { - return err - } - - m.mu.Lock() - defer m.mu.Unlock() - path, err := d.join(m.Id) - if err != nil { - return err - } - - m.Path = path - return nil -} - -// Destroys the Intel RDT 'container_id' group -func (m *IntelRdtManager) Destroy() error { - m.mu.Lock() - defer m.mu.Unlock() - if err := os.RemoveAll(m.Path); err != nil { - return err - } - m.Path = "" - return nil -} - -// Returns Intel RDT path to save in a state file and to be able to -// restore the object later -func (m *IntelRdtManager) GetPath() string { - if m.Path == "" { - m.Path, _ = GetIntelRdtPath(m.Id) - } - return m.Path -} - -// Returns statistics for Intel RDT -func (m *IntelRdtManager) GetStats() (*Stats, error) { - // If intelRdt is not specified in config - if m.Config.IntelRdt == nil { - return nil, nil - } - - m.mu.Lock() - defer m.mu.Unlock() - stats := NewStats() - - // The read-only L3 cache information - l3CacheInfo, err := getL3CacheInfo() - if err != nil { - return nil, err - } - stats.L3CacheInfo = l3CacheInfo - - // The read-only L3 cache schema in root - rootPath, err := getIntelRdtRoot() - if err != nil { - return nil, err - } - tmpRootStrings, err := getIntelRdtParamString(rootPath, "schemata") - if err != nil { - return nil, err - } - // L3 cache schema is in the first line - schemaRootStrings := strings.Split(tmpRootStrings, "\n") - stats.L3CacheSchemaRoot = schemaRootStrings[0] - - // The L3 cache schema in 'container_id' group - tmpStrings, err := getIntelRdtParamString(m.GetPath(), "schemata") - if err != nil { - return nil, err - } - // L3 cache schema is in the first line - schemaStrings := strings.Split(tmpStrings, "\n") - stats.L3CacheSchema = schemaStrings[0] - - return stats, nil -} - -// Set Intel RDT "resource control" filesystem as configured. -func (m *IntelRdtManager) Set(container *configs.Config) error { - path := m.GetPath() - - // About L3 cache schema file: - // The schema has allocation masks/values for L3 cache on each socket, - // which contains L3 cache id and capacity bitmask (CBM). - // Format: "L3:=;=;..." - // For example, on a two-socket machine, L3's schema line could be: - // L3:0=ff;1=c0 - // Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. - // - // About L3 cache CBM validity: - // The valid L3 cache CBM is a *contiguous bits set* and number of - // bits that can be set is less than the max bit. The max bits in the - // CBM is varied among supported Intel Xeon platforms. In Intel RDT - // "resource control" filesystem layout, the CBM in a group should - // be a subset of the CBM in root. Kernel will check if it is valid - // when writing. - // e.g., 0xfffff in root indicates the max bits of CBM is 20 bits, - // which mapping to entire L3 cache capacity. Some valid CBM values - // to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. - if container.IntelRdt != nil { - l3CacheSchema := container.IntelRdt.L3CacheSchema - if l3CacheSchema != "" { - if err := writeFile(path, "schemata", l3CacheSchema); err != nil { - return err - } - } - } - - return nil -} - -func (raw *intelRdtData) join(id string) (string, error) { - path := filepath.Join(raw.root, id) - if err := os.MkdirAll(path, 0755); err != nil { - return "", err - } - - if err := WriteIntelRdtTasks(path, raw.pid); err != nil { - return "", err - } - return path, nil -} - -type NotFoundError struct { - ResourceControl string -} - -func (e *NotFoundError) Error() string { - return fmt.Sprintf("mountpoint for %s not found", e.ResourceControl) -} - -func NewNotFoundError(res string) error { - return &NotFoundError{ - ResourceControl: res, - } -} - -func IsNotFound(err error) bool { - if err == nil { - return false - } - _, ok := err.(*NotFoundError) - return ok -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go deleted file mode 100644 index 095c0a380cd0..000000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go +++ /dev/null @@ -1,24 +0,0 @@ -// +build linux - -package intelrdt - -type L3CacheInfo struct { - CbmMask string `json:"cbm_mask,omitempty"` - MinCbmBits uint64 `json:"min_cbm_bits,omitempty"` - NumClosids uint64 `json:"num_closids,omitempty"` -} - -type Stats struct { - // The read-only L3 cache information - L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"` - - // The read-only L3 cache schema in root - L3CacheSchemaRoot string `json:"l3_cache_schema_root,omitempty"` - - // The L3 cache schema in 'container_id' group - L3CacheSchema string `json:"l3_cache_schema,omitempty"` -} - -func NewStats() *Stats { - return &Stats{} -} diff --git a/vendor/github.com/opencontainers/selinux/LICENSE b/vendor/github.com/opencontainers/selinux/LICENSE deleted file mode 100644 index 8dada3edaf50..000000000000 --- a/vendor/github.com/opencontainers/selinux/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/vendor/github.com/opencontainers/selinux/README.md b/vendor/github.com/opencontainers/selinux/README.md deleted file mode 100644 index 043a9293718f..000000000000 --- a/vendor/github.com/opencontainers/selinux/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# selinux - -[![GoDoc](https://godoc.org/github.com/opencontainers/selinux?status.svg)](https://godoc.org/github.com/opencontainers/selinux) [![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/selinux)](https://goreportcard.com/report/github.com/opencontainers/selinux) [![Build Status](https://travis-ci.org/opencontainers/selinux.svg?branch=master)](https://travis-ci.org/opencontainers/selinux) - -Common SELinux package used across the container ecosystem. - -Please see the [godoc](https://godoc.org/github.com/opencontainers/selinux) for more information. diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/selinux.go b/vendor/github.com/opencontainers/selinux/go-selinux/selinux.go deleted file mode 100644 index 17ba2c556144..000000000000 --- a/vendor/github.com/opencontainers/selinux/go-selinux/selinux.go +++ /dev/null @@ -1,688 +0,0 @@ -// +build linux - -package selinux - -import ( - "bufio" - "bytes" - "crypto/rand" - "encoding/binary" - "fmt" - "io" - "io/ioutil" - "os" - "path/filepath" - "regexp" - "strconv" - "strings" - "sync" - "syscall" -) - -const ( - // Enforcing constant indicate SELinux is in enforcing mode - Enforcing = 1 - // Permissive constant to indicate SELinux is in permissive mode - Permissive = 0 - // Disabled constant to indicate SELinux is disabled - Disabled = -1 - selinuxDir = "/etc/selinux/" - selinuxConfig = selinuxDir + "config" - selinuxfsMount = "/sys/fs/selinux" - selinuxTypeTag = "SELINUXTYPE" - selinuxTag = "SELINUX" - xattrNameSelinux = "security.selinux" - stRdOnly = 0x01 - selinuxfsMagic = 0xf97cff8c -) - -type selinuxState struct { - enabledSet bool - enabled bool - selinuxfsSet bool - selinuxfs string - mcsList map[string]bool - sync.Mutex -} - -var ( - assignRegex = regexp.MustCompile(`^([^=]+)=(.*)$`) - state = selinuxState{ - mcsList: make(map[string]bool), - } -) - -// Context is a representation of the SELinux label broken into 4 parts -type Context map[string]string - -func (s *selinuxState) setEnable(enabled bool) bool { - s.Lock() - defer s.Unlock() - s.enabledSet = true - s.enabled = enabled - return s.enabled -} - -func (s *selinuxState) getEnabled() bool { - s.Lock() - enabled := s.enabled - enabledSet := s.enabledSet - s.Unlock() - if enabledSet { - return enabled - } - - enabled = false - if fs := getSelinuxMountPoint(); fs != "" { - if con, _ := CurrentLabel(); con != "kernel" { - enabled = true - } - } - return s.setEnable(enabled) -} - -// SetDisabled disables selinux support for the package -func SetDisabled() { - state.setEnable(false) -} - -func (s *selinuxState) setSELinuxfs(selinuxfs string) string { - s.Lock() - defer s.Unlock() - s.selinuxfsSet = true - s.selinuxfs = selinuxfs - return s.selinuxfs -} - -func verifySELinuxfsMount(mnt string) bool { - var buf syscall.Statfs_t - for { - err := syscall.Statfs(mnt, &buf) - if err == nil { - break - } - if err == syscall.EAGAIN { - continue - } - return false - } - if uint32(buf.Type) != uint32(selinuxfsMagic) { - return false - } - if (buf.Flags & stRdOnly) != 0 { - return false - } - - return true -} - -func findSELinuxfs() string { - // fast path: check the default mount first - if verifySELinuxfsMount(selinuxfsMount) { - return selinuxfsMount - } - - // check if selinuxfs is available before going the slow path - fs, err := ioutil.ReadFile("/proc/filesystems") - if err != nil { - return "" - } - if !bytes.Contains(fs, []byte("\tselinuxfs\n")) { - return "" - } - - // slow path: try to find among the mounts - f, err := os.Open("/proc/self/mountinfo") - if err != nil { - return "" - } - defer f.Close() - - scanner := bufio.NewScanner(f) - for { - mnt := findSELinuxfsMount(scanner) - if mnt == "" { // error or not found - return "" - } - if verifySELinuxfsMount(mnt) { - return mnt - } - } -} - -// findSELinuxfsMount returns a next selinuxfs mount point found, -// if there is one, or an empty string in case of EOF or error. -func findSELinuxfsMount(s *bufio.Scanner) string { - for s.Scan() { - txt := s.Text() - // The first field after - is fs type. - // Safe as spaces in mountpoints are encoded as \040 - if !strings.Contains(txt, " - selinuxfs ") { - continue - } - const mPos = 5 // mount point is 5th field - fields := strings.SplitN(txt, " ", mPos+1) - if len(fields) < mPos+1 { - continue - } - return fields[mPos-1] - } - - return "" -} - -func (s *selinuxState) getSELinuxfs() string { - s.Lock() - selinuxfs := s.selinuxfs - selinuxfsSet := s.selinuxfsSet - s.Unlock() - if selinuxfsSet { - return selinuxfs - } - - return s.setSELinuxfs(findSELinuxfs()) -} - -// getSelinuxMountPoint returns the path to the mountpoint of an selinuxfs -// filesystem or an empty string if no mountpoint is found. Selinuxfs is -// a proc-like pseudo-filesystem that exposes the selinux policy API to -// processes. The existence of an selinuxfs mount is used to determine -// whether selinux is currently enabled or not. -func getSelinuxMountPoint() string { - return state.getSELinuxfs() -} - -// GetEnabled returns whether selinux is currently enabled. -func GetEnabled() bool { - return state.getEnabled() -} - -func readConfig(target string) (value string) { - var ( - val, key string - bufin *bufio.Reader - ) - - in, err := os.Open(selinuxConfig) - if err != nil { - return "" - } - defer in.Close() - - bufin = bufio.NewReader(in) - - for done := false; !done; { - var line string - if line, err = bufin.ReadString('\n'); err != nil { - if err != io.EOF { - return "" - } - done = true - } - line = strings.TrimSpace(line) - if len(line) == 0 { - // Skip blank lines - continue - } - if line[0] == ';' || line[0] == '#' { - // Skip comments - continue - } - if groups := assignRegex.FindStringSubmatch(line); groups != nil { - key, val = strings.TrimSpace(groups[1]), strings.TrimSpace(groups[2]) - if key == target { - return strings.Trim(val, "\"") - } - } - } - return "" -} - -func getSELinuxPolicyRoot() string { - return selinuxDir + readConfig(selinuxTypeTag) -} - -func readCon(name string) (string, error) { - var val string - - in, err := os.Open(name) - if err != nil { - return "", err - } - defer in.Close() - - _, err = fmt.Fscanf(in, "%s", &val) - return strings.Trim(val, "\x00"), err -} - -// SetFileLabel sets the SELinux label for this path or returns an error. -func SetFileLabel(path string, label string) error { - return lsetxattr(path, xattrNameSelinux, []byte(label), 0) -} - -// FileLabel returns the SELinux label for this path or returns an error. -func FileLabel(path string) (string, error) { - label, err := lgetxattr(path, xattrNameSelinux) - if err != nil { - return "", err - } - // Trim the NUL byte at the end of the byte buffer, if present. - if len(label) > 0 && label[len(label)-1] == '\x00' { - label = label[:len(label)-1] - } - return string(label), nil -} - -/* -SetFSCreateLabel tells kernel the label to create all file system objects -created by this task. Setting label="" to return to default. -*/ -func SetFSCreateLabel(label string) error { - return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/fscreate", syscall.Gettid()), label) -} - -/* -FSCreateLabel returns the default label the kernel which the kernel is using -for file system objects created by this task. "" indicates default. -*/ -func FSCreateLabel() (string, error) { - return readCon(fmt.Sprintf("/proc/self/task/%d/attr/fscreate", syscall.Gettid())) -} - -// CurrentLabel returns the SELinux label of the current process thread, or an error. -func CurrentLabel() (string, error) { - return readCon(fmt.Sprintf("/proc/self/task/%d/attr/current", syscall.Gettid())) -} - -// PidLabel returns the SELinux label of the given pid, or an error. -func PidLabel(pid int) (string, error) { - return readCon(fmt.Sprintf("/proc/%d/attr/current", pid)) -} - -/* -ExecLabel returns the SELinux label that the kernel will use for any programs -that are executed by the current process thread, or an error. -*/ -func ExecLabel() (string, error) { - return readCon(fmt.Sprintf("/proc/self/task/%d/attr/exec", syscall.Gettid())) -} - -func writeCon(name string, val string) error { - out, err := os.OpenFile(name, os.O_WRONLY, 0) - if err != nil { - return err - } - defer out.Close() - - if val != "" { - _, err = out.Write([]byte(val)) - } else { - _, err = out.Write(nil) - } - return err -} - -/* -CanonicalizeContext takes a context string and writes it to the kernel -the function then returns the context that the kernel will use. This function -can be used to see if two contexts are equivalent -*/ -func CanonicalizeContext(val string) (string, error) { - return readWriteCon(filepath.Join(getSelinuxMountPoint(), "context"), val) -} - -func readWriteCon(name string, val string) (string, error) { - var retval string - f, err := os.OpenFile(name, os.O_RDWR, 0) - if err != nil { - return "", err - } - defer f.Close() - - _, err = f.Write([]byte(val)) - if err != nil { - return "", err - } - - _, err = fmt.Fscanf(f, "%s", &retval) - return strings.Trim(retval, "\x00"), err -} - -/* -SetExecLabel sets the SELinux label that the kernel will use for any programs -that are executed by the current process thread, or an error. -*/ -func SetExecLabel(label string) error { - return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/exec", syscall.Gettid()), label) -} - -// Get returns the Context as a string -func (c Context) Get() string { - if c["level"] != "" { - return fmt.Sprintf("%s:%s:%s:%s", c["user"], c["role"], c["type"], c["level"]) - } - return fmt.Sprintf("%s:%s:%s", c["user"], c["role"], c["type"]) -} - -// NewContext creates a new Context struct from the specified label -func NewContext(label string) Context { - c := make(Context) - - if len(label) != 0 { - con := strings.SplitN(label, ":", 4) - c["user"] = con[0] - c["role"] = con[1] - c["type"] = con[2] - if len(con) > 3 { - c["level"] = con[3] - } - } - return c -} - -// ReserveLabel reserves the MLS/MCS level component of the specified label -func ReserveLabel(label string) { - if len(label) != 0 { - con := strings.SplitN(label, ":", 4) - if len(con) > 3 { - mcsAdd(con[3]) - } - } -} - -func selinuxEnforcePath() string { - return fmt.Sprintf("%s/enforce", getSelinuxMountPoint()) -} - -// EnforceMode returns the current SELinux mode Enforcing, Permissive, Disabled -func EnforceMode() int { - var enforce int - - enforceS, err := readCon(selinuxEnforcePath()) - if err != nil { - return -1 - } - - enforce, err = strconv.Atoi(string(enforceS)) - if err != nil { - return -1 - } - return enforce -} - -/* -SetEnforceMode sets the current SELinux mode Enforcing, Permissive. -Disabled is not valid, since this needs to be set at boot time. -*/ -func SetEnforceMode(mode int) error { - return writeCon(selinuxEnforcePath(), fmt.Sprintf("%d", mode)) -} - -/* -DefaultEnforceMode returns the systems default SELinux mode Enforcing, -Permissive or Disabled. Note this is is just the default at boot time. -EnforceMode tells you the systems current mode. -*/ -func DefaultEnforceMode() int { - switch readConfig(selinuxTag) { - case "enforcing": - return Enforcing - case "permissive": - return Permissive - } - return Disabled -} - -func mcsAdd(mcs string) error { - if mcs == "" { - return nil - } - state.Lock() - defer state.Unlock() - if state.mcsList[mcs] { - return fmt.Errorf("MCS Label already exists") - } - state.mcsList[mcs] = true - return nil -} - -func mcsDelete(mcs string) { - if mcs == "" { - return - } - state.Lock() - defer state.Unlock() - state.mcsList[mcs] = false -} - -func intToMcs(id int, catRange uint32) string { - var ( - SETSIZE = int(catRange) - TIER = SETSIZE - ORD = id - ) - - if id < 1 || id > 523776 { - return "" - } - - for ORD > TIER { - ORD = ORD - TIER - TIER-- - } - TIER = SETSIZE - TIER - ORD = ORD + TIER - return fmt.Sprintf("s0:c%d,c%d", TIER, ORD) -} - -func uniqMcs(catRange uint32) string { - var ( - n uint32 - c1, c2 uint32 - mcs string - ) - - for { - binary.Read(rand.Reader, binary.LittleEndian, &n) - c1 = n % catRange - binary.Read(rand.Reader, binary.LittleEndian, &n) - c2 = n % catRange - if c1 == c2 { - continue - } else { - if c1 > c2 { - c1, c2 = c2, c1 - } - } - mcs = fmt.Sprintf("s0:c%d,c%d", c1, c2) - if err := mcsAdd(mcs); err != nil { - continue - } - break - } - return mcs -} - -/* -ReleaseLabel will unreserve the MLS/MCS Level field of the specified label. -Allowing it to be used by another process. -*/ -func ReleaseLabel(label string) { - if len(label) != 0 { - con := strings.SplitN(label, ":", 4) - if len(con) > 3 { - mcsDelete(con[3]) - } - } -} - -var roFileLabel string - -// ROFileLabel returns the specified SELinux readonly file label -func ROFileLabel() (fileLabel string) { - return roFileLabel -} - -/* -ContainerLabels returns an allocated processLabel and fileLabel to be used for -container labeling by the calling process. -*/ -func ContainerLabels() (processLabel string, fileLabel string) { - var ( - val, key string - bufin *bufio.Reader - ) - - if !GetEnabled() { - return "", "" - } - lxcPath := fmt.Sprintf("%s/contexts/lxc_contexts", getSELinuxPolicyRoot()) - in, err := os.Open(lxcPath) - if err != nil { - return "", "" - } - defer in.Close() - - bufin = bufio.NewReader(in) - - for done := false; !done; { - var line string - if line, err = bufin.ReadString('\n'); err != nil { - if err == io.EOF { - done = true - } else { - goto exit - } - } - line = strings.TrimSpace(line) - if len(line) == 0 { - // Skip blank lines - continue - } - if line[0] == ';' || line[0] == '#' { - // Skip comments - continue - } - if groups := assignRegex.FindStringSubmatch(line); groups != nil { - key, val = strings.TrimSpace(groups[1]), strings.TrimSpace(groups[2]) - if key == "process" { - processLabel = strings.Trim(val, "\"") - } - if key == "file" { - fileLabel = strings.Trim(val, "\"") - } - if key == "ro_file" { - roFileLabel = strings.Trim(val, "\"") - } - } - } - - if processLabel == "" || fileLabel == "" { - return "", "" - } - - if roFileLabel == "" { - roFileLabel = fileLabel - } -exit: - scon := NewContext(processLabel) - if scon["level"] != "" { - mcs := uniqMcs(1024) - scon["level"] = mcs - processLabel = scon.Get() - scon = NewContext(fileLabel) - scon["level"] = mcs - fileLabel = scon.Get() - } - return processLabel, fileLabel -} - -// SecurityCheckContext validates that the SELinux label is understood by the kernel -func SecurityCheckContext(val string) error { - return writeCon(fmt.Sprintf("%s/context", getSelinuxMountPoint()), val) -} - -/* -CopyLevel returns a label with the MLS/MCS level from src label replaces on -the dest label. -*/ -func CopyLevel(src, dest string) (string, error) { - if src == "" { - return "", nil - } - if err := SecurityCheckContext(src); err != nil { - return "", err - } - if err := SecurityCheckContext(dest); err != nil { - return "", err - } - scon := NewContext(src) - tcon := NewContext(dest) - mcsDelete(tcon["level"]) - mcsAdd(scon["level"]) - tcon["level"] = scon["level"] - return tcon.Get(), nil -} - -// Prevent users from relabing system files -func badPrefix(fpath string) error { - var badprefixes = []string{"/usr"} - - for _, prefix := range badprefixes { - if fpath == prefix || strings.HasPrefix(fpath, fmt.Sprintf("%s/", prefix)) { - return fmt.Errorf("relabeling content in %s is not allowed", prefix) - } - } - return nil -} - -// Chcon changes the fpath file object to the SELinux label label. -// If the fpath is a directory and recurse is true Chcon will walk the -// directory tree setting the label -func Chcon(fpath string, label string, recurse bool) error { - if label == "" { - return nil - } - if err := badPrefix(fpath); err != nil { - return err - } - callback := func(p string, info os.FileInfo, err error) error { - return SetFileLabel(p, label) - } - - if recurse { - return filepath.Walk(fpath, callback) - } - - return SetFileLabel(fpath, label) -} - -// DupSecOpt takes an SELinux process label and returns security options that -// can will set the SELinux Type and Level for future container processes -func DupSecOpt(src string) []string { - if src == "" { - return nil - } - con := NewContext(src) - if con["user"] == "" || - con["role"] == "" || - con["type"] == "" { - return nil - } - dup := []string{"user:" + con["user"], - "role:" + con["role"], - "type:" + con["type"], - } - - if con["level"] != "" { - dup = append(dup, "level:"+con["level"]) - } - - return dup -} - -// DisableSecOpt returns a security opt that can be used to disabling SELinux -// labeling support for future container processes -func DisableSecOpt() []string { - return []string{"disable"} -} diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/xattrs.go b/vendor/github.com/opencontainers/selinux/go-selinux/xattrs.go deleted file mode 100644 index 7f2ef8504906..000000000000 --- a/vendor/github.com/opencontainers/selinux/go-selinux/xattrs.go +++ /dev/null @@ -1,78 +0,0 @@ -// +build linux - -package selinux - -import ( - "syscall" - "unsafe" -) - -var _zero uintptr - -// Returns a []byte slice if the xattr is set and nil otherwise -// Requires path and its attribute as arguments -func lgetxattr(path string, attr string) ([]byte, error) { - var sz int - pathBytes, err := syscall.BytePtrFromString(path) - if err != nil { - return nil, err - } - attrBytes, err := syscall.BytePtrFromString(attr) - if err != nil { - return nil, err - } - - // Start with a 128 length byte array - sz = 128 - dest := make([]byte, sz) - destBytes := unsafe.Pointer(&dest[0]) - _sz, _, errno := syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0) - - switch { - case errno == syscall.ENODATA: - return nil, errno - case errno == syscall.ENOTSUP: - return nil, errno - case errno == syscall.ERANGE: - // 128 byte array might just not be good enough, - // A dummy buffer is used ``uintptr(0)`` to get real size - // of the xattrs on disk - _sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(unsafe.Pointer(nil)), uintptr(0), 0, 0) - sz = int(_sz) - if sz < 0 { - return nil, errno - } - dest = make([]byte, sz) - destBytes := unsafe.Pointer(&dest[0]) - _sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0) - if errno != 0 { - return nil, errno - } - case errno != 0: - return nil, errno - } - sz = int(_sz) - return dest[:sz], nil -} - -func lsetxattr(path string, attr string, data []byte, flags int) error { - pathBytes, err := syscall.BytePtrFromString(path) - if err != nil { - return err - } - attrBytes, err := syscall.BytePtrFromString(attr) - if err != nil { - return err - } - var dataBytes unsafe.Pointer - if len(data) > 0 { - dataBytes = unsafe.Pointer(&data[0]) - } else { - dataBytes = unsafe.Pointer(&_zero) - } - _, _, errno := syscall.Syscall6(syscall.SYS_LSETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(dataBytes), uintptr(len(data)), uintptr(flags), 0) - if errno != 0 { - return errno - } - return nil -} diff --git a/vendor/github.com/seccomp/libseccomp-golang/LICENSE b/vendor/github.com/seccomp/libseccomp-golang/LICENSE new file mode 100644 index 000000000000..81cf60de29ef --- /dev/null +++ b/vendor/github.com/seccomp/libseccomp-golang/LICENSE @@ -0,0 +1,22 @@ +Copyright (c) 2015 Matthew Heon +Copyright (c) 2015 Paul Moore +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +- Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/seccomp/libseccomp-golang/README b/vendor/github.com/seccomp/libseccomp-golang/README new file mode 100644 index 000000000000..64cab6911d5a --- /dev/null +++ b/vendor/github.com/seccomp/libseccomp-golang/README @@ -0,0 +1,26 @@ +libseccomp-golang: Go Language Bindings for the libseccomp Project +=============================================================================== +https://github.com/seccomp/libseccomp-golang +https://github.com/seccomp/libseccomp + +The libseccomp library provides an easy to use, platform independent, interface +to the Linux Kernel's syscall filtering mechanism. The libseccomp API is +designed to abstract away the underlying BPF based syscall filter language and +present a more conventional function-call based filtering interface that should +be familiar to, and easily adopted by, application developers. + +The libseccomp-golang library provides a Go based interface to the libseccomp +library. + +* Online Resources + +The library source repository currently lives on GitHub at the following URLs: + + -> https://github.com/seccomp/libseccomp-golang + -> https://github.com/seccomp/libseccomp + +The project mailing list is currently hosted on Google Groups at the URL below, +please note that a Google account is not required to subscribe to the mailing +list. + + -> https://groups.google.com/d/forum/libseccomp diff --git a/vendor/github.com/seccomp/libseccomp-golang/seccomp.go b/vendor/github.com/seccomp/libseccomp-golang/seccomp.go new file mode 100644 index 000000000000..b2c010fc3873 --- /dev/null +++ b/vendor/github.com/seccomp/libseccomp-golang/seccomp.go @@ -0,0 +1,857 @@ +// +build linux + +// Public API specification for libseccomp Go bindings +// Contains public API for the bindings + +// Package seccomp provides bindings for libseccomp, a library wrapping the Linux +// seccomp syscall. Seccomp enables an application to restrict system call use +// for itself and its children. +package seccomp + +import ( + "fmt" + "os" + "runtime" + "strings" + "sync" + "syscall" + "unsafe" +) + +// C wrapping code + +// #cgo pkg-config: libseccomp +// #include +// #include +import "C" + +// Exported types + +// ScmpArch represents a CPU architecture. Seccomp can restrict syscalls on a +// per-architecture basis. +type ScmpArch uint + +// ScmpAction represents an action to be taken on a filter rule match in +// libseccomp +type ScmpAction uint + +// ScmpCompareOp represents a comparison operator which can be used in a filter +// rule +type ScmpCompareOp uint + +// ScmpCondition represents a rule in a libseccomp filter context +type ScmpCondition struct { + Argument uint `json:"argument,omitempty"` + Op ScmpCompareOp `json:"operator,omitempty"` + Operand1 uint64 `json:"operand_one,omitempty"` + Operand2 uint64 `json:"operand_two,omitempty"` +} + +// ScmpSyscall represents a Linux System Call +type ScmpSyscall int32 + +// Exported Constants + +const ( + // Valid architectures recognized by libseccomp + // ARM64 and all MIPS architectures are unsupported by versions of the + // library before v2.2 and will return errors if used + + // ArchInvalid is a placeholder to ensure uninitialized ScmpArch + // variables are invalid + ArchInvalid ScmpArch = iota + // ArchNative is the native architecture of the kernel + ArchNative ScmpArch = iota + // ArchX86 represents 32-bit x86 syscalls + ArchX86 ScmpArch = iota + // ArchAMD64 represents 64-bit x86-64 syscalls + ArchAMD64 ScmpArch = iota + // ArchX32 represents 64-bit x86-64 syscalls (32-bit pointers) + ArchX32 ScmpArch = iota + // ArchARM represents 32-bit ARM syscalls + ArchARM ScmpArch = iota + // ArchARM64 represents 64-bit ARM syscalls + ArchARM64 ScmpArch = iota + // ArchMIPS represents 32-bit MIPS syscalls + ArchMIPS ScmpArch = iota + // ArchMIPS64 represents 64-bit MIPS syscalls + ArchMIPS64 ScmpArch = iota + // ArchMIPS64N32 represents 64-bit MIPS syscalls (32-bit pointers) + ArchMIPS64N32 ScmpArch = iota + // ArchMIPSEL represents 32-bit MIPS syscalls (little endian) + ArchMIPSEL ScmpArch = iota + // ArchMIPSEL64 represents 64-bit MIPS syscalls (little endian) + ArchMIPSEL64 ScmpArch = iota + // ArchMIPSEL64N32 represents 64-bit MIPS syscalls (little endian, + // 32-bit pointers) + ArchMIPSEL64N32 ScmpArch = iota + // ArchPPC represents 32-bit POWERPC syscalls + ArchPPC ScmpArch = iota + // ArchPPC64 represents 64-bit POWER syscalls (big endian) + ArchPPC64 ScmpArch = iota + // ArchPPC64LE represents 64-bit POWER syscalls (little endian) + ArchPPC64LE ScmpArch = iota + // ArchS390 represents 31-bit System z/390 syscalls + ArchS390 ScmpArch = iota + // ArchS390X represents 64-bit System z/390 syscalls + ArchS390X ScmpArch = iota +) + +const ( + // Supported actions on filter match + + // ActInvalid is a placeholder to ensure uninitialized ScmpAction + // variables are invalid + ActInvalid ScmpAction = iota + // ActKill kills the process + ActKill ScmpAction = iota + // ActTrap throws SIGSYS + ActTrap ScmpAction = iota + // ActErrno causes the syscall to return a negative error code. This + // code can be set with the SetReturnCode method + ActErrno ScmpAction = iota + // ActTrace causes the syscall to notify tracing processes with the + // given error code. This code can be set with the SetReturnCode method + ActTrace ScmpAction = iota + // ActAllow permits the syscall to continue execution + ActAllow ScmpAction = iota +) + +const ( + // These are comparison operators used in conditional seccomp rules + // They are used to compare the value of a single argument of a syscall + // against a user-defined constant + + // CompareInvalid is a placeholder to ensure uninitialized ScmpCompareOp + // variables are invalid + CompareInvalid ScmpCompareOp = iota + // CompareNotEqual returns true if the argument is not equal to the + // given value + CompareNotEqual ScmpCompareOp = iota + // CompareLess returns true if the argument is less than the given value + CompareLess ScmpCompareOp = iota + // CompareLessOrEqual returns true if the argument is less than or equal + // to the given value + CompareLessOrEqual ScmpCompareOp = iota + // CompareEqual returns true if the argument is equal to the given value + CompareEqual ScmpCompareOp = iota + // CompareGreaterEqual returns true if the argument is greater than or + // equal to the given value + CompareGreaterEqual ScmpCompareOp = iota + // CompareGreater returns true if the argument is greater than the given + // value + CompareGreater ScmpCompareOp = iota + // CompareMaskedEqual returns true if the argument is equal to the given + // value, when masked (bitwise &) against the second given value + CompareMaskedEqual ScmpCompareOp = iota +) + +// Helpers for types + +// GetArchFromString returns an ScmpArch constant from a string representing an +// architecture +func GetArchFromString(arch string) (ScmpArch, error) { + switch strings.ToLower(arch) { + case "x86": + return ArchX86, nil + case "amd64", "x86-64", "x86_64", "x64": + return ArchAMD64, nil + case "x32": + return ArchX32, nil + case "arm": + return ArchARM, nil + case "arm64", "aarch64": + return ArchARM64, nil + case "mips": + return ArchMIPS, nil + case "mips64": + return ArchMIPS64, nil + case "mips64n32": + return ArchMIPS64N32, nil + case "mipsel": + return ArchMIPSEL, nil + case "mipsel64": + return ArchMIPSEL64, nil + case "mipsel64n32": + return ArchMIPSEL64N32, nil + case "ppc": + return ArchPPC, nil + case "ppc64": + return ArchPPC64, nil + case "ppc64le": + return ArchPPC64LE, nil + case "s390": + return ArchS390, nil + case "s390x": + return ArchS390X, nil + default: + return ArchInvalid, fmt.Errorf("cannot convert unrecognized string %s", arch) + } +} + +// String returns a string representation of an architecture constant +func (a ScmpArch) String() string { + switch a { + case ArchX86: + return "x86" + case ArchAMD64: + return "amd64" + case ArchX32: + return "x32" + case ArchARM: + return "arm" + case ArchARM64: + return "arm64" + case ArchMIPS: + return "mips" + case ArchMIPS64: + return "mips64" + case ArchMIPS64N32: + return "mips64n32" + case ArchMIPSEL: + return "mipsel" + case ArchMIPSEL64: + return "mipsel64" + case ArchMIPSEL64N32: + return "mipsel64n32" + case ArchPPC: + return "ppc" + case ArchPPC64: + return "ppc64" + case ArchPPC64LE: + return "ppc64le" + case ArchS390: + return "s390" + case ArchS390X: + return "s390x" + case ArchNative: + return "native" + case ArchInvalid: + return "Invalid architecture" + default: + return "Unknown architecture" + } +} + +// String returns a string representation of a comparison operator constant +func (a ScmpCompareOp) String() string { + switch a { + case CompareNotEqual: + return "Not equal" + case CompareLess: + return "Less than" + case CompareLessOrEqual: + return "Less than or equal to" + case CompareEqual: + return "Equal" + case CompareGreaterEqual: + return "Greater than or equal to" + case CompareGreater: + return "Greater than" + case CompareMaskedEqual: + return "Masked equality" + case CompareInvalid: + return "Invalid comparison operator" + default: + return "Unrecognized comparison operator" + } +} + +// String returns a string representation of a seccomp match action +func (a ScmpAction) String() string { + switch a & 0xFFFF { + case ActKill: + return "Action: Kill Process" + case ActTrap: + return "Action: Send SIGSYS" + case ActErrno: + return fmt.Sprintf("Action: Return error code %d", (a >> 16)) + case ActTrace: + return fmt.Sprintf("Action: Notify tracing processes with code %d", + (a >> 16)) + case ActAllow: + return "Action: Allow system call" + default: + return "Unrecognized Action" + } +} + +// SetReturnCode adds a return code to a supporting ScmpAction, clearing any +// existing code Only valid on ActErrno and ActTrace. Takes no action otherwise. +// Accepts 16-bit return code as argument. +// Returns a valid ScmpAction of the original type with the new error code set. +func (a ScmpAction) SetReturnCode(code int16) ScmpAction { + aTmp := a & 0x0000FFFF + if aTmp == ActErrno || aTmp == ActTrace { + return (aTmp | (ScmpAction(code)&0xFFFF)<<16) + } + return a +} + +// GetReturnCode returns the return code of an ScmpAction +func (a ScmpAction) GetReturnCode() int16 { + return int16(a >> 16) +} + +// General utility functions + +// GetLibraryVersion returns the version of the library the bindings are built +// against. +// The version is formatted as follows: Major.Minor.Micro +func GetLibraryVersion() (major, minor, micro int) { + return verMajor, verMinor, verMicro +} + +// Syscall functions + +// GetName retrieves the name of a syscall from its number. +// Acts on any syscall number. +// Returns either a string containing the name of the syscall, or an error. +func (s ScmpSyscall) GetName() (string, error) { + return s.GetNameByArch(ArchNative) +} + +// GetNameByArch retrieves the name of a syscall from its number for a given +// architecture. +// Acts on any syscall number. +// Accepts a valid architecture constant. +// Returns either a string containing the name of the syscall, or an error. +// if the syscall is unrecognized or an issue occurred. +func (s ScmpSyscall) GetNameByArch(arch ScmpArch) (string, error) { + if err := sanitizeArch(arch); err != nil { + return "", err + } + + cString := C.seccomp_syscall_resolve_num_arch(arch.toNative(), C.int(s)) + if cString == nil { + return "", fmt.Errorf("could not resolve syscall name") + } + defer C.free(unsafe.Pointer(cString)) + + finalStr := C.GoString(cString) + return finalStr, nil +} + +// GetSyscallFromName returns the number of a syscall by name on the kernel's +// native architecture. +// Accepts a string containing the name of a syscall. +// Returns the number of the syscall, or an error if no syscall with that name +// was found. +func GetSyscallFromName(name string) (ScmpSyscall, error) { + cString := C.CString(name) + defer C.free(unsafe.Pointer(cString)) + + result := C.seccomp_syscall_resolve_name(cString) + if result == scmpError { + return 0, fmt.Errorf("could not resolve name to syscall") + } + + return ScmpSyscall(result), nil +} + +// GetSyscallFromNameByArch returns the number of a syscall by name for a given +// architecture's ABI. +// Accepts the name of a syscall and an architecture constant. +// Returns the number of the syscall, or an error if an invalid architecture is +// passed or a syscall with that name was not found. +func GetSyscallFromNameByArch(name string, arch ScmpArch) (ScmpSyscall, error) { + if err := sanitizeArch(arch); err != nil { + return 0, err + } + + cString := C.CString(name) + defer C.free(unsafe.Pointer(cString)) + + result := C.seccomp_syscall_resolve_name_arch(arch.toNative(), cString) + if result == scmpError { + return 0, fmt.Errorf("could not resolve name to syscall") + } + + return ScmpSyscall(result), nil +} + +// MakeCondition creates and returns a new condition to attach to a filter rule. +// Associated rules will only match if this condition is true. +// Accepts the number the argument we are checking, and a comparison operator +// and value to compare to. +// The rule will match if argument $arg (zero-indexed) of the syscall is +// $COMPARE_OP the provided comparison value. +// Some comparison operators accept two values. Masked equals, for example, +// will mask $arg of the syscall with the second value provided (via bitwise +// AND) and then compare against the first value provided. +// For example, in the less than or equal case, if the syscall argument was +// 0 and the value provided was 1, the condition would match, as 0 is less +// than or equal to 1. +// Return either an error on bad argument or a valid ScmpCondition struct. +func MakeCondition(arg uint, comparison ScmpCompareOp, values ...uint64) (ScmpCondition, error) { + var condStruct ScmpCondition + + if comparison == CompareInvalid { + return condStruct, fmt.Errorf("invalid comparison operator") + } else if arg > 5 { + return condStruct, fmt.Errorf("syscalls only have up to 6 arguments") + } else if len(values) > 2 { + return condStruct, fmt.Errorf("conditions can have at most 2 arguments") + } else if len(values) == 0 { + return condStruct, fmt.Errorf("must provide at least one value to compare against") + } + + condStruct.Argument = arg + condStruct.Op = comparison + condStruct.Operand1 = values[0] + if len(values) == 2 { + condStruct.Operand2 = values[1] + } else { + condStruct.Operand2 = 0 // Unused + } + + return condStruct, nil +} + +// Utility Functions + +// GetNativeArch returns architecture token representing the native kernel +// architecture +func GetNativeArch() (ScmpArch, error) { + arch := C.seccomp_arch_native() + + return archFromNative(arch) +} + +// Public Filter API + +// ScmpFilter represents a filter context in libseccomp. +// A filter context is initially empty. Rules can be added to it, and it can +// then be loaded into the kernel. +type ScmpFilter struct { + filterCtx C.scmp_filter_ctx + valid bool + lock sync.Mutex +} + +// NewFilter creates and returns a new filter context. +// Accepts a default action to be taken for syscalls which match no rules in +// the filter. +// Returns a reference to a valid filter context, or nil and an error if the +// filter context could not be created or an invalid default action was given. +func NewFilter(defaultAction ScmpAction) (*ScmpFilter, error) { + if err := sanitizeAction(defaultAction); err != nil { + return nil, err + } + + fPtr := C.seccomp_init(defaultAction.toNative()) + if fPtr == nil { + return nil, fmt.Errorf("could not create filter") + } + + filter := new(ScmpFilter) + filter.filterCtx = fPtr + filter.valid = true + runtime.SetFinalizer(filter, filterFinalizer) + + return filter, nil +} + +// IsValid determines whether a filter context is valid to use. +// Some operations (Release and Merge) render filter contexts invalid and +// consequently prevent further use. +func (f *ScmpFilter) IsValid() bool { + f.lock.Lock() + defer f.lock.Unlock() + + return f.valid +} + +// Reset resets a filter context, removing all its existing state. +// Accepts a new default action to be taken for syscalls which do not match. +// Returns an error if the filter or action provided are invalid. +func (f *ScmpFilter) Reset(defaultAction ScmpAction) error { + f.lock.Lock() + defer f.lock.Unlock() + + if err := sanitizeAction(defaultAction); err != nil { + return err + } else if !f.valid { + return errBadFilter + } + + retCode := C.seccomp_reset(f.filterCtx, defaultAction.toNative()) + if retCode != 0 { + return syscall.Errno(-1 * retCode) + } + + return nil +} + +// Release releases a filter context, freeing its memory. Should be called after +// loading into the kernel, when the filter is no longer needed. +// After calling this function, the given filter is no longer valid and cannot +// be used. +// Release() will be invoked automatically when a filter context is garbage +// collected, but can also be called manually to free memory. +func (f *ScmpFilter) Release() { + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return + } + + f.valid = false + C.seccomp_release(f.filterCtx) +} + +// Merge merges two filter contexts. +// The source filter src will be released as part of the process, and will no +// longer be usable or valid after this call. +// To be merged, filters must NOT share any architectures, and all their +// attributes (Default Action, Bad Arch Action, No New Privs and TSync bools) +// must match. +// The filter src will be merged into the filter this is called on. +// The architectures of the src filter not present in the destination, and all +// associated rules, will be added to the destination. +// Returns an error if merging the filters failed. +func (f *ScmpFilter) Merge(src *ScmpFilter) error { + f.lock.Lock() + defer f.lock.Unlock() + + src.lock.Lock() + defer src.lock.Unlock() + + if !src.valid || !f.valid { + return fmt.Errorf("one or more of the filter contexts is invalid or uninitialized") + } + + // Merge the filters + retCode := C.seccomp_merge(f.filterCtx, src.filterCtx) + if syscall.Errno(-1*retCode) == syscall.EINVAL { + return fmt.Errorf("filters could not be merged due to a mismatch in attributes or invalid filter") + } else if retCode != 0 { + return syscall.Errno(-1 * retCode) + } + + src.valid = false + + return nil +} + +// IsArchPresent checks if an architecture is present in a filter. +// If a filter contains an architecture, it uses its default action for +// syscalls which do not match rules in it, and its rules can match syscalls +// for that ABI. +// If a filter does not contain an architecture, all syscalls made to that +// kernel ABI will fail with the filter's default Bad Architecture Action +// (by default, killing the process). +// Accepts an architecture constant. +// Returns true if the architecture is present in the filter, false otherwise, +// and an error on an invalid filter context, architecture constant, or an +// issue with the call to libseccomp. +func (f *ScmpFilter) IsArchPresent(arch ScmpArch) (bool, error) { + f.lock.Lock() + defer f.lock.Unlock() + + if err := sanitizeArch(arch); err != nil { + return false, err + } else if !f.valid { + return false, errBadFilter + } + + retCode := C.seccomp_arch_exist(f.filterCtx, arch.toNative()) + if syscall.Errno(-1*retCode) == syscall.EEXIST { + // -EEXIST is "arch not present" + return false, nil + } else if retCode != 0 { + return false, syscall.Errno(-1 * retCode) + } + + return true, nil +} + +// AddArch adds an architecture to the filter. +// Accepts an architecture constant. +// Returns an error on invalid filter context or architecture token, or an +// issue with the call to libseccomp. +func (f *ScmpFilter) AddArch(arch ScmpArch) error { + f.lock.Lock() + defer f.lock.Unlock() + + if err := sanitizeArch(arch); err != nil { + return err + } else if !f.valid { + return errBadFilter + } + + // Libseccomp returns -EEXIST if the specified architecture is already + // present. Succeed silently in this case, as it's not fatal, and the + // architecture is present already. + retCode := C.seccomp_arch_add(f.filterCtx, arch.toNative()) + if retCode != 0 && syscall.Errno(-1*retCode) != syscall.EEXIST { + return syscall.Errno(-1 * retCode) + } + + return nil +} + +// RemoveArch removes an architecture from the filter. +// Accepts an architecture constant. +// Returns an error on invalid filter context or architecture token, or an +// issue with the call to libseccomp. +func (f *ScmpFilter) RemoveArch(arch ScmpArch) error { + f.lock.Lock() + defer f.lock.Unlock() + + if err := sanitizeArch(arch); err != nil { + return err + } else if !f.valid { + return errBadFilter + } + + // Similar to AddArch, -EEXIST is returned if the arch is not present + // Succeed silently in that case, this is not fatal and the architecture + // is not present in the filter after RemoveArch + retCode := C.seccomp_arch_remove(f.filterCtx, arch.toNative()) + if retCode != 0 && syscall.Errno(-1*retCode) != syscall.EEXIST { + return syscall.Errno(-1 * retCode) + } + + return nil +} + +// Load loads a filter context into the kernel. +// Returns an error if the filter context is invalid or the syscall failed. +func (f *ScmpFilter) Load() error { + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return errBadFilter + } + + if retCode := C.seccomp_load(f.filterCtx); retCode != 0 { + return syscall.Errno(-1 * retCode) + } + + return nil +} + +// GetDefaultAction returns the default action taken on a syscall which does not +// match a rule in the filter, or an error if an issue was encountered +// retrieving the value. +func (f *ScmpFilter) GetDefaultAction() (ScmpAction, error) { + action, err := f.getFilterAttr(filterAttrActDefault) + if err != nil { + return 0x0, err + } + + return actionFromNative(action) +} + +// GetBadArchAction returns the default action taken on a syscall for an +// architecture not in the filter, or an error if an issue was encountered +// retrieving the value. +func (f *ScmpFilter) GetBadArchAction() (ScmpAction, error) { + action, err := f.getFilterAttr(filterAttrActBadArch) + if err != nil { + return 0x0, err + } + + return actionFromNative(action) +} + +// GetNoNewPrivsBit returns the current state the No New Privileges bit will be set +// to on the filter being loaded, or an error if an issue was encountered +// retrieving the value. +// The No New Privileges bit tells the kernel that new processes run with exec() +// cannot gain more privileges than the process that ran exec(). +// For example, a process with No New Privileges set would be unable to exec +// setuid/setgid executables. +func (f *ScmpFilter) GetNoNewPrivsBit() (bool, error) { + noNewPrivs, err := f.getFilterAttr(filterAttrNNP) + if err != nil { + return false, err + } + + if noNewPrivs == 0 { + return false, nil + } + + return true, nil +} + +// GetTsyncBit returns whether Thread Synchronization will be enabled on the +// filter being loaded, or an error if an issue was encountered retrieving the +// value. +// Thread Sync ensures that all members of the thread group of the calling +// process will share the same Seccomp filter set. +// Tsync is a fairly recent addition to the Linux kernel and older kernels +// lack support. If the running kernel does not support Tsync and it is +// requested in a filter, Libseccomp will not enable TSync support and will +// proceed as normal. +// This function is unavailable before v2.2 of libseccomp and will return an +// error. +func (f *ScmpFilter) GetTsyncBit() (bool, error) { + tSync, err := f.getFilterAttr(filterAttrTsync) + if err != nil { + return false, err + } + + if tSync == 0 { + return false, nil + } + + return true, nil +} + +// SetBadArchAction sets the default action taken on a syscall for an +// architecture not in the filter, or an error if an issue was encountered +// setting the value. +func (f *ScmpFilter) SetBadArchAction(action ScmpAction) error { + if err := sanitizeAction(action); err != nil { + return err + } + + return f.setFilterAttr(filterAttrActBadArch, action.toNative()) +} + +// SetNoNewPrivsBit sets the state of the No New Privileges bit, which will be +// applied on filter load, or an error if an issue was encountered setting the +// value. +// Filters with No New Privileges set to 0 can only be loaded if the process +// has the CAP_SYS_ADMIN capability. +func (f *ScmpFilter) SetNoNewPrivsBit(state bool) error { + var toSet C.uint32_t = 0x0 + + if state { + toSet = 0x1 + } + + return f.setFilterAttr(filterAttrNNP, toSet) +} + +// SetTsync sets whether Thread Synchronization will be enabled on the filter +// being loaded. Returns an error if setting Tsync failed, or the filter is +// invalid. +// Thread Sync ensures that all members of the thread group of the calling +// process will share the same Seccomp filter set. +// Tsync is a fairly recent addition to the Linux kernel and older kernels +// lack support. If the running kernel does not support Tsync and it is +// requested in a filter, Libseccomp will not enable TSync support and will +// proceed as normal. +// This function is unavailable before v2.2 of libseccomp and will return an +// error. +func (f *ScmpFilter) SetTsync(enable bool) error { + var toSet C.uint32_t = 0x0 + + if enable { + toSet = 0x1 + } + + return f.setFilterAttr(filterAttrTsync, toSet) +} + +// SetSyscallPriority sets a syscall's priority. +// This provides a hint to the filter generator in libseccomp about the +// importance of this syscall. High-priority syscalls are placed +// first in the filter code, and incur less overhead (at the expense of +// lower-priority syscalls). +func (f *ScmpFilter) SetSyscallPriority(call ScmpSyscall, priority uint8) error { + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return errBadFilter + } + + if retCode := C.seccomp_syscall_priority(f.filterCtx, C.int(call), + C.uint8_t(priority)); retCode != 0 { + return syscall.Errno(-1 * retCode) + } + + return nil +} + +// AddRule adds a single rule for an unconditional action on a syscall. +// Accepts the number of the syscall and the action to be taken on the call +// being made. +// Returns an error if an issue was encountered adding the rule. +func (f *ScmpFilter) AddRule(call ScmpSyscall, action ScmpAction) error { + return f.addRuleGeneric(call, action, false, nil) +} + +// AddRuleExact adds a single rule for an unconditional action on a syscall. +// Accepts the number of the syscall and the action to be taken on the call +// being made. +// No modifications will be made to the rule, and it will fail to add if it +// cannot be applied to the current architecture without modification. +// The rule will function exactly as described, but it may not function identically +// (or be able to be applied to) all architectures. +// Returns an error if an issue was encountered adding the rule. +func (f *ScmpFilter) AddRuleExact(call ScmpSyscall, action ScmpAction) error { + return f.addRuleGeneric(call, action, true, nil) +} + +// AddRuleConditional adds a single rule for a conditional action on a syscall. +// Returns an error if an issue was encountered adding the rule. +// All conditions must match for the rule to match. +// There is a bug in library versions below v2.2.1 which can, in some cases, +// cause conditions to be lost when more than one are used. Consequently, +// AddRuleConditional is disabled on library versions lower than v2.2.1 +func (f *ScmpFilter) AddRuleConditional(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error { + return f.addRuleGeneric(call, action, false, conds) +} + +// AddRuleConditionalExact adds a single rule for a conditional action on a +// syscall. +// No modifications will be made to the rule, and it will fail to add if it +// cannot be applied to the current architecture without modification. +// The rule will function exactly as described, but it may not function identically +// (or be able to be applied to) all architectures. +// Returns an error if an issue was encountered adding the rule. +// There is a bug in library versions below v2.2.1 which can, in some cases, +// cause conditions to be lost when more than one are used. Consequently, +// AddRuleConditionalExact is disabled on library versions lower than v2.2.1 +func (f *ScmpFilter) AddRuleConditionalExact(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error { + return f.addRuleGeneric(call, action, true, conds) +} + +// ExportPFC output PFC-formatted, human-readable dump of a filter context's +// rules to a file. +// Accepts file to write to (must be open for writing). +// Returns an error if writing to the file fails. +func (f *ScmpFilter) ExportPFC(file *os.File) error { + f.lock.Lock() + defer f.lock.Unlock() + + fd := file.Fd() + + if !f.valid { + return errBadFilter + } + + if retCode := C.seccomp_export_pfc(f.filterCtx, C.int(fd)); retCode != 0 { + return syscall.Errno(-1 * retCode) + } + + return nil +} + +// ExportBPF outputs Berkeley Packet Filter-formatted, kernel-readable dump of a +// filter context's rules to a file. +// Accepts file to write to (must be open for writing). +// Returns an error if writing to the file fails. +func (f *ScmpFilter) ExportBPF(file *os.File) error { + f.lock.Lock() + defer f.lock.Unlock() + + fd := file.Fd() + + if !f.valid { + return errBadFilter + } + + if retCode := C.seccomp_export_bpf(f.filterCtx, C.int(fd)); retCode != 0 { + return syscall.Errno(-1 * retCode) + } + + return nil +} diff --git a/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go b/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go new file mode 100644 index 000000000000..ab67a3dedc0c --- /dev/null +++ b/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go @@ -0,0 +1,506 @@ +// +build linux + +// Internal functions for libseccomp Go bindings +// No exported functions + +package seccomp + +import ( + "fmt" + "os" + "syscall" +) + +// Unexported C wrapping code - provides the C-Golang interface +// Get the seccomp header in scope +// Need stdlib.h for free() on cstrings + +// #cgo pkg-config: libseccomp +/* +#include +#include + +#if SCMP_VER_MAJOR < 2 +#error Minimum supported version of Libseccomp is v2.1.0 +#elif SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 1 +#error Minimum supported version of Libseccomp is v2.1.0 +#endif + +#define ARCH_BAD ~0 + +const uint32_t C_ARCH_BAD = ARCH_BAD; + +#ifndef SCMP_ARCH_AARCH64 +#define SCMP_ARCH_AARCH64 ARCH_BAD +#endif + +#ifndef SCMP_ARCH_MIPS +#define SCMP_ARCH_MIPS ARCH_BAD +#endif + +#ifndef SCMP_ARCH_MIPS64 +#define SCMP_ARCH_MIPS64 ARCH_BAD +#endif + +#ifndef SCMP_ARCH_MIPS64N32 +#define SCMP_ARCH_MIPS64N32 ARCH_BAD +#endif + +#ifndef SCMP_ARCH_MIPSEL +#define SCMP_ARCH_MIPSEL ARCH_BAD +#endif + +#ifndef SCMP_ARCH_MIPSEL64 +#define SCMP_ARCH_MIPSEL64 ARCH_BAD +#endif + +#ifndef SCMP_ARCH_MIPSEL64N32 +#define SCMP_ARCH_MIPSEL64N32 ARCH_BAD +#endif + +#ifndef SCMP_ARCH_PPC +#define SCMP_ARCH_PPC ARCH_BAD +#endif + +#ifndef SCMP_ARCH_PPC64 +#define SCMP_ARCH_PPC64 ARCH_BAD +#endif + +#ifndef SCMP_ARCH_PPC64LE +#define SCMP_ARCH_PPC64LE ARCH_BAD +#endif + +#ifndef SCMP_ARCH_S390 +#define SCMP_ARCH_S390 ARCH_BAD +#endif + +#ifndef SCMP_ARCH_S390X +#define SCMP_ARCH_S390X ARCH_BAD +#endif + +const uint32_t C_ARCH_NATIVE = SCMP_ARCH_NATIVE; +const uint32_t C_ARCH_X86 = SCMP_ARCH_X86; +const uint32_t C_ARCH_X86_64 = SCMP_ARCH_X86_64; +const uint32_t C_ARCH_X32 = SCMP_ARCH_X32; +const uint32_t C_ARCH_ARM = SCMP_ARCH_ARM; +const uint32_t C_ARCH_AARCH64 = SCMP_ARCH_AARCH64; +const uint32_t C_ARCH_MIPS = SCMP_ARCH_MIPS; +const uint32_t C_ARCH_MIPS64 = SCMP_ARCH_MIPS64; +const uint32_t C_ARCH_MIPS64N32 = SCMP_ARCH_MIPS64N32; +const uint32_t C_ARCH_MIPSEL = SCMP_ARCH_MIPSEL; +const uint32_t C_ARCH_MIPSEL64 = SCMP_ARCH_MIPSEL64; +const uint32_t C_ARCH_MIPSEL64N32 = SCMP_ARCH_MIPSEL64N32; +const uint32_t C_ARCH_PPC = SCMP_ARCH_PPC; +const uint32_t C_ARCH_PPC64 = SCMP_ARCH_PPC64; +const uint32_t C_ARCH_PPC64LE = SCMP_ARCH_PPC64LE; +const uint32_t C_ARCH_S390 = SCMP_ARCH_S390; +const uint32_t C_ARCH_S390X = SCMP_ARCH_S390X; + +const uint32_t C_ACT_KILL = SCMP_ACT_KILL; +const uint32_t C_ACT_TRAP = SCMP_ACT_TRAP; +const uint32_t C_ACT_ERRNO = SCMP_ACT_ERRNO(0); +const uint32_t C_ACT_TRACE = SCMP_ACT_TRACE(0); +const uint32_t C_ACT_ALLOW = SCMP_ACT_ALLOW; + +// If TSync is not supported, make sure it doesn't map to a supported filter attribute +// Don't worry about major version < 2, the minimum version checks should catch that case +#if SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 2 +#define SCMP_FLTATR_CTL_TSYNC _SCMP_CMP_MIN +#endif + +const uint32_t C_ATTRIBUTE_DEFAULT = (uint32_t)SCMP_FLTATR_ACT_DEFAULT; +const uint32_t C_ATTRIBUTE_BADARCH = (uint32_t)SCMP_FLTATR_ACT_BADARCH; +const uint32_t C_ATTRIBUTE_NNP = (uint32_t)SCMP_FLTATR_CTL_NNP; +const uint32_t C_ATTRIBUTE_TSYNC = (uint32_t)SCMP_FLTATR_CTL_TSYNC; + +const int C_CMP_NE = (int)SCMP_CMP_NE; +const int C_CMP_LT = (int)SCMP_CMP_LT; +const int C_CMP_LE = (int)SCMP_CMP_LE; +const int C_CMP_EQ = (int)SCMP_CMP_EQ; +const int C_CMP_GE = (int)SCMP_CMP_GE; +const int C_CMP_GT = (int)SCMP_CMP_GT; +const int C_CMP_MASKED_EQ = (int)SCMP_CMP_MASKED_EQ; + +const int C_VERSION_MAJOR = SCMP_VER_MAJOR; +const int C_VERSION_MINOR = SCMP_VER_MINOR; +const int C_VERSION_MICRO = SCMP_VER_MICRO; + +typedef struct scmp_arg_cmp* scmp_cast_t; + +// Wrapper to create an scmp_arg_cmp struct +void* +make_struct_arg_cmp( + unsigned int arg, + int compare, + uint64_t a, + uint64_t b + ) +{ + struct scmp_arg_cmp *s = malloc(sizeof(struct scmp_arg_cmp)); + + s->arg = arg; + s->op = compare; + s->datum_a = a; + s->datum_b = b; + + return s; +} +*/ +import "C" + +// Nonexported types +type scmpFilterAttr uint32 + +// Nonexported constants + +const ( + filterAttrActDefault scmpFilterAttr = iota + filterAttrActBadArch scmpFilterAttr = iota + filterAttrNNP scmpFilterAttr = iota + filterAttrTsync scmpFilterAttr = iota +) + +const ( + // An error return from certain libseccomp functions + scmpError C.int = -1 + // Comparison boundaries to check for architecture validity + archStart ScmpArch = ArchNative + archEnd ScmpArch = ArchS390X + // Comparison boundaries to check for action validity + actionStart ScmpAction = ActKill + actionEnd ScmpAction = ActAllow + // Comparison boundaries to check for comparison operator validity + compareOpStart ScmpCompareOp = CompareNotEqual + compareOpEnd ScmpCompareOp = CompareMaskedEqual +) + +var ( + // Error thrown on bad filter context + errBadFilter = fmt.Errorf("filter is invalid or uninitialized") + // Constants representing library major, minor, and micro versions + verMajor = int(C.C_VERSION_MAJOR) + verMinor = int(C.C_VERSION_MINOR) + verMicro = int(C.C_VERSION_MICRO) +) + +// Nonexported functions + +// Check if library version is greater than or equal to the given one +func checkVersionAbove(major, minor, micro int) bool { + return (verMajor > major) || + (verMajor == major && verMinor > minor) || + (verMajor == major && verMinor == minor && verMicro >= micro) +} + +// Init function: Verify library version is appropriate +func init() { + if !checkVersionAbove(2, 1, 0) { + fmt.Fprintf(os.Stderr, "Libseccomp version too low: minimum supported is 2.1.0, detected %d.%d.%d", C.C_VERSION_MAJOR, C.C_VERSION_MINOR, C.C_VERSION_MICRO) + os.Exit(-1) + } +} + +// Filter helpers + +// Filter finalizer - ensure that kernel context for filters is freed +func filterFinalizer(f *ScmpFilter) { + f.Release() +} + +// Get a raw filter attribute +func (f *ScmpFilter) getFilterAttr(attr scmpFilterAttr) (C.uint32_t, error) { + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return 0x0, errBadFilter + } + + if !checkVersionAbove(2, 2, 0) && attr == filterAttrTsync { + return 0x0, fmt.Errorf("the thread synchronization attribute is not supported in this version of the library") + } + + var attribute C.uint32_t + + retCode := C.seccomp_attr_get(f.filterCtx, attr.toNative(), &attribute) + if retCode != 0 { + return 0x0, syscall.Errno(-1 * retCode) + } + + return attribute, nil +} + +// Set a raw filter attribute +func (f *ScmpFilter) setFilterAttr(attr scmpFilterAttr, value C.uint32_t) error { + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return errBadFilter + } + + if !checkVersionAbove(2, 2, 0) && attr == filterAttrTsync { + return fmt.Errorf("the thread synchronization attribute is not supported in this version of the library") + } + + retCode := C.seccomp_attr_set(f.filterCtx, attr.toNative(), value) + if retCode != 0 { + return syscall.Errno(-1 * retCode) + } + + return nil +} + +// DOES NOT LOCK OR CHECK VALIDITY +// Assumes caller has already done this +// Wrapper for seccomp_rule_add_... functions +func (f *ScmpFilter) addRuleWrapper(call ScmpSyscall, action ScmpAction, exact bool, cond C.scmp_cast_t) error { + var length C.uint + if cond != nil { + length = 1 + } else { + length = 0 + } + + var retCode C.int + if exact { + retCode = C.seccomp_rule_add_exact_array(f.filterCtx, action.toNative(), C.int(call), length, cond) + } else { + retCode = C.seccomp_rule_add_array(f.filterCtx, action.toNative(), C.int(call), length, cond) + } + + if syscall.Errno(-1*retCode) == syscall.EFAULT { + return fmt.Errorf("unrecognized syscall") + } else if syscall.Errno(-1*retCode) == syscall.EPERM { + return fmt.Errorf("requested action matches default action of filter") + } else if retCode != 0 { + return syscall.Errno(-1 * retCode) + } + + return nil +} + +// Generic add function for filter rules +func (f *ScmpFilter) addRuleGeneric(call ScmpSyscall, action ScmpAction, exact bool, conds []ScmpCondition) error { + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return errBadFilter + } + + if len(conds) == 0 { + if err := f.addRuleWrapper(call, action, exact, nil); err != nil { + return err + } + } else { + // We don't support conditional filtering in library version v2.1 + if !checkVersionAbove(2, 2, 1) { + return fmt.Errorf("conditional filtering requires libseccomp version >= 2.2.1") + } + + for _, cond := range conds { + cmpStruct := C.make_struct_arg_cmp(C.uint(cond.Argument), cond.Op.toNative(), C.uint64_t(cond.Operand1), C.uint64_t(cond.Operand2)) + defer C.free(cmpStruct) + + if err := f.addRuleWrapper(call, action, exact, C.scmp_cast_t(cmpStruct)); err != nil { + return err + } + } + } + + return nil +} + +// Generic Helpers + +// Helper - Sanitize Arch token input +func sanitizeArch(in ScmpArch) error { + if in < archStart || in > archEnd { + return fmt.Errorf("unrecognized architecture") + } + + if in.toNative() == C.C_ARCH_BAD { + return fmt.Errorf("architecture is not supported on this version of the library") + } + + return nil +} + +func sanitizeAction(in ScmpAction) error { + inTmp := in & 0x0000FFFF + if inTmp < actionStart || inTmp > actionEnd { + return fmt.Errorf("unrecognized action") + } + + if inTmp != ActTrace && inTmp != ActErrno && (in&0xFFFF0000) != 0 { + return fmt.Errorf("highest 16 bits must be zeroed except for Trace and Errno") + } + + return nil +} + +func sanitizeCompareOp(in ScmpCompareOp) error { + if in < compareOpStart || in > compareOpEnd { + return fmt.Errorf("unrecognized comparison operator") + } + + return nil +} + +func archFromNative(a C.uint32_t) (ScmpArch, error) { + switch a { + case C.C_ARCH_X86: + return ArchX86, nil + case C.C_ARCH_X86_64: + return ArchAMD64, nil + case C.C_ARCH_X32: + return ArchX32, nil + case C.C_ARCH_ARM: + return ArchARM, nil + case C.C_ARCH_NATIVE: + return ArchNative, nil + case C.C_ARCH_AARCH64: + return ArchARM64, nil + case C.C_ARCH_MIPS: + return ArchMIPS, nil + case C.C_ARCH_MIPS64: + return ArchMIPS64, nil + case C.C_ARCH_MIPS64N32: + return ArchMIPS64N32, nil + case C.C_ARCH_MIPSEL: + return ArchMIPSEL, nil + case C.C_ARCH_MIPSEL64: + return ArchMIPSEL64, nil + case C.C_ARCH_MIPSEL64N32: + return ArchMIPSEL64N32, nil + case C.C_ARCH_PPC: + return ArchPPC, nil + case C.C_ARCH_PPC64: + return ArchPPC64, nil + case C.C_ARCH_PPC64LE: + return ArchPPC64LE, nil + case C.C_ARCH_S390: + return ArchS390, nil + case C.C_ARCH_S390X: + return ArchS390X, nil + default: + return 0x0, fmt.Errorf("unrecognized architecture") + } +} + +// Only use with sanitized arches, no error handling +func (a ScmpArch) toNative() C.uint32_t { + switch a { + case ArchX86: + return C.C_ARCH_X86 + case ArchAMD64: + return C.C_ARCH_X86_64 + case ArchX32: + return C.C_ARCH_X32 + case ArchARM: + return C.C_ARCH_ARM + case ArchARM64: + return C.C_ARCH_AARCH64 + case ArchMIPS: + return C.C_ARCH_MIPS + case ArchMIPS64: + return C.C_ARCH_MIPS64 + case ArchMIPS64N32: + return C.C_ARCH_MIPS64N32 + case ArchMIPSEL: + return C.C_ARCH_MIPSEL + case ArchMIPSEL64: + return C.C_ARCH_MIPSEL64 + case ArchMIPSEL64N32: + return C.C_ARCH_MIPSEL64N32 + case ArchPPC: + return C.C_ARCH_PPC + case ArchPPC64: + return C.C_ARCH_PPC64 + case ArchPPC64LE: + return C.C_ARCH_PPC64LE + case ArchS390: + return C.C_ARCH_S390 + case ArchS390X: + return C.C_ARCH_S390X + case ArchNative: + return C.C_ARCH_NATIVE + default: + return 0x0 + } +} + +// Only use with sanitized ops, no error handling +func (a ScmpCompareOp) toNative() C.int { + switch a { + case CompareNotEqual: + return C.C_CMP_NE + case CompareLess: + return C.C_CMP_LT + case CompareLessOrEqual: + return C.C_CMP_LE + case CompareEqual: + return C.C_CMP_EQ + case CompareGreaterEqual: + return C.C_CMP_GE + case CompareGreater: + return C.C_CMP_GT + case CompareMaskedEqual: + return C.C_CMP_MASKED_EQ + default: + return 0x0 + } +} + +func actionFromNative(a C.uint32_t) (ScmpAction, error) { + aTmp := a & 0xFFFF + switch a & 0xFFFF0000 { + case C.C_ACT_KILL: + return ActKill, nil + case C.C_ACT_TRAP: + return ActTrap, nil + case C.C_ACT_ERRNO: + return ActErrno.SetReturnCode(int16(aTmp)), nil + case C.C_ACT_TRACE: + return ActTrace.SetReturnCode(int16(aTmp)), nil + case C.C_ACT_ALLOW: + return ActAllow, nil + default: + return 0x0, fmt.Errorf("unrecognized action") + } +} + +// Only use with sanitized actions, no error handling +func (a ScmpAction) toNative() C.uint32_t { + switch a & 0xFFFF { + case ActKill: + return C.C_ACT_KILL + case ActTrap: + return C.C_ACT_TRAP + case ActErrno: + return C.C_ACT_ERRNO | (C.uint32_t(a) >> 16) + case ActTrace: + return C.C_ACT_TRACE | (C.uint32_t(a) >> 16) + case ActAllow: + return C.C_ACT_ALLOW + default: + return 0x0 + } +} + +// Internal only, assumes safe attribute +func (a scmpFilterAttr) toNative() uint32 { + switch a { + case filterAttrActDefault: + return uint32(C.C_ATTRIBUTE_DEFAULT) + case filterAttrActBadArch: + return uint32(C.C_ATTRIBUTE_BADARCH) + case filterAttrNNP: + return uint32(C.C_ATTRIBUTE_NNP) + case filterAttrTsync: + return uint32(C.C_ATTRIBUTE_TSYNC) + default: + return 0x0 + } +}