diff --git a/cmd/buildkitd/config/config.go b/cmd/buildkitd/config/config.go index 758fa62b5c62..a76fb0b10741 100644 --- a/cmd/buildkitd/config/config.go +++ b/cmd/buildkitd/config/config.go @@ -49,12 +49,13 @@ type TLSConfig struct { } type OCIConfig struct { - Enabled *bool `toml:"enabled"` - Labels map[string]string `toml:"labels"` - Platforms []string `toml:"platforms"` - Snapshotter string `toml:"snapshotter"` - Rootless bool `toml:"rootless"` - GCPolicy []GCPolicy `toml:"gcpolicy"` + Enabled *bool `toml:"enabled"` + Labels map[string]string `toml:"labels"` + Platforms []string `toml:"platforms"` + Snapshotter string `toml:"snapshotter"` + Rootless bool `toml:"rootless"` + NoProcessSandbox bool `toml:"noProcessSandbox"` + GCPolicy []GCPolicy `toml:"gcpolicy"` } type ContainerdConfig struct { diff --git a/cmd/buildkitd/main_oci_worker.go b/cmd/buildkitd/main_oci_worker.go index 68cd1c2e8bac..d7ccc8d73a20 100644 --- a/cmd/buildkitd/main_oci_worker.go +++ b/cmd/buildkitd/main_oci_worker.go @@ -10,6 +10,7 @@ import ( "github.com/containerd/containerd/snapshots/native" "github.com/containerd/containerd/snapshots/overlay" "github.com/moby/buildkit/cmd/buildkitd/config" + "github.com/moby/buildkit/executor/oci" "github.com/moby/buildkit/worker" "github.com/moby/buildkit/worker/base" "github.com/moby/buildkit/worker/runc" @@ -66,6 +67,11 @@ func init() { Usage: u, }) } + flags = append(flags, cli.BoolFlag{ + Name: "oci-worker-no-process-sandbox", + Usage: "use the host PID namespace and procfs (WARNING: allows build containers to kill (and potentially ptrace) an arbitrary process in the host namespace)", + }) + registerWorkerInitializer( workerInitializer{ fn: ociWorkerInitializer, @@ -109,6 +115,9 @@ func applyOCIFlags(c *cli.Context, cfg *config.Config) error { if c.GlobalIsSet("oci-worker-rootless") { cfg.Workers.OCI.Rootless = c.GlobalBool("oci-worker-rootless") } + if c.GlobalIsSet("oci-worker-no-process-sandbox") { + cfg.Workers.OCI.NoProcessSandbox = c.GlobalBool("oci-worker-no-process-sandbox") + } if platforms := c.GlobalStringSlice("oci-worker-platform"); len(platforms) != 0 { cfg.Workers.OCI.Platforms = platforms @@ -136,7 +145,17 @@ func ociWorkerInitializer(c *cli.Context, common workerInitializerOpt) ([]worker if cfg.Rootless { logrus.Debugf("running in rootless mode") } - opt, err := runc.NewWorkerOpt(common.config.Root, snFactory, cfg.Rootless, cfg.Labels) + + processMode := oci.ProcessSandbox + if cfg.NoProcessSandbox { + logrus.Warn("NoProcessSandbox is enabled. Note that NoProcessSandbox allows build containers to kill (and potentially ptrace) an arbitrary process in the BuildKit host namespace. NoProcessSandbox should be enabled only when the BuildKit is running in a container as an unprivileged user.") + if !cfg.Rootless { + return nil, errors.New("can't enable NoProcessSandbox without Rootless") + } + processMode = oci.NoProcessSandbox + } + + opt, err := runc.NewWorkerOpt(common.config.Root, snFactory, cfg.Rootless, processMode, cfg.Labels) if err != nil { return nil, err } diff --git a/docs/rootless.md b/docs/rootless.md index ef42886a5c21..5d4f1669edea 100644 --- a/docs/rootless.md +++ b/docs/rootless.md @@ -6,7 +6,6 @@ Requirements: - RHEL/CentOS 7 requires `sudo sh -c "echo 28633 > /proc/sys/user/max_user_namespaces"`. You may also need `sudo grubby --args="namespace.unpriv_enable=1 user_namespace.enable=1" --update-kernel="$(grubby --default-kernel)"`. - `newuidmap` and `newgidmap` need to be installed on the host. These commands are provided by the `uidmap` package. For RHEL/CentOS 7, RPM is not officially provided but available at https://copr.fedorainfracloud.org/coprs/vbatts/shadow-utils-newxidmap/ . - `/etc/subuid` and `/etc/subgid` should contain >= 65536 sub-IDs. e.g. `penguin:231072:65536`. -- To run in a Docker container with non-root `USER`, `docker run --privileged` is still required. See also Jessie's blog: https://blog.jessfraz.com/post/building-container-images-securely-on-kubernetes/ ## Set up @@ -63,8 +62,29 @@ Docker image is available as [`moby/buildkit:rootless`](https://hub.docker.com/r $ docker run --name buildkitd -d --privileged -p 1234:1234 moby/buildkit:rootless --addr tcp://0.0.0.0:1234 ``` -`docker run` requires `--privileged` but the BuildKit daemon is executed as a normal user. -See [`docker/cli#1347`](https://github.com/docker/cli/pull/1347) for the ongoing work to remove this requirement +``` +$ go get ./examples/build-using-dockerfile +$ build-using-dockerfile --buildkit-addr tcp://127.0.0.1:1234 -t foo /path/to/somewhere +``` + +### Security consideration + +Although `moby/buildkit:rootless` executes the BuildKit daemon as a normal user, `docker run` still requires `--privileged`. +This is to allow build executor containers to mount `/proc`, by providing "unmasked" `/proc` to the BuildKit daemon container. + +See [`docker/cli#1347`](https://github.com/docker/cli/pull/1347) for the ongoing work to remove this requirement. +See also [Disabling process sandbox](#disabling-process-sandbox). + +#### UID/GID + +The `moby/buildkit:rootless` image has the following UID/GID configuration: + +Actual ID (shown in the host and the BuildKit daemon container)| Mapped ID (shown in build executor containers) +----------|---------- +1000 | 0 +100000 | 1 +... | ... +165535 | 65536 ``` $ docker exec buildkitd id @@ -75,9 +95,91 @@ PID USER TIME COMMAND 13 user 0:00 /proc/self/exe buildkitd --addr tcp://0.0.0.0:1234 21 user 0:00 buildkitd --addr tcp://0.0.0.0:1234 29 user 0:00 ps aux +$ docker exec cat /etc/subuid +user:100000:65536 ``` +To change the UID/GID configuration, you need to modify and build the BuildKit image manually. ``` -$ go get ./examples/build-using-dockerfile -$ build-using-dockerfile --buildkit-addr tcp://127.0.0.1:1234 -t foo /path/to/somewhere +$ vi hack/dockerfiles/test.Dockerfile +$ docker build -t buildkit-rootless-custom --target rootless -f hack/dockerfiles/test.Dockerfile . +``` + +#### Disabling process sandbox + +By passing `--oci-worker-no-process-sandbox` to the `buildkitd` arguments, BuildKit can be executed in a container without `--privileged`. +However, you still need to pass `--security-opt seccomp=unconfined --security-opt apparmor=unconfined` to `docker run`. + +``` +$ docker run --name buildkitd -d --security-opt seccomp=unconfined --security-opt apparmor=unconfined -p 1234:1234 moby/buildkit:rootless --addr tcp://0.0.0.0:1234 --oci-worker-no-process-sandbox ``` + +Note that `--oci-worker-no-process-sandbox` allows build executor containers to `kill` (and potentially `ptrace` depending on the seccomp configuration) an arbitrary process in the BuildKit daemon container. + + +## Set up (using Kubernetes) + +### With `securityContext` + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: buildkitd + name: buildkitd +spec: + selector: + matchLabels: + app: buildkitd + template: + metadata: + labels: + app: buildkitd + spec: + containers: + - image: moby/buildkit:rootless + args: + - --addr + - tcp://0.0.0.0:1234 + name: buildkitd + ports: + - containerPort: 1234 + securityContext: + privileged: true +``` + +This configuration requires privileged containers to be enabled. + +If you are using Kubernetes v1.12+ with either Docker v18.06+, containerd v1.2+, or CRI-O v1.12+ as the CRI runtime, you can replace `privileged: true` with `procMount: Unmasked`. + +### Without `securityContext` but with `--oci-worker-no-process-sandbox` + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: buildkitd + name: buildkitd +spec: + selector: + matchLabels: + app: buildkitd + template: + metadata: + labels: + app: buildkitd + spec: + containers: + - image: moby/buildkit:rootless + args: + - --addr + - tcp://0.0.0.0:1234 + - --oci-worker-no-process-sandbox + name: buildkitd + ports: + - containerPort: 1234 +``` + +See [Disabling process sandbox](#disabling-process-sandbox) for security notice. diff --git a/executor/containerdexecutor/executor.go b/executor/containerdexecutor/executor.go index ed6553aa604f..e0e70d527d1c 100644 --- a/executor/containerdexecutor/executor.go +++ b/executor/containerdexecutor/executor.go @@ -120,7 +120,8 @@ func (w containerdExecutor) Exec(ctx context.Context, meta executor.Meta, root c } opts = append(opts, containerdoci.WithCgroup(cgroupsPath)) } - spec, cleanup, err := oci.GenerateSpec(ctx, meta, mounts, id, resolvConf, hostsFile, namespace, opts...) + processMode := oci.ProcessSandbox // FIXME(AkihiroSuda) + spec, cleanup, err := oci.GenerateSpec(ctx, meta, mounts, id, resolvConf, hostsFile, namespace, processMode, opts...) if err != nil { return err } diff --git a/executor/oci/mounts.go b/executor/oci/mounts.go index a0fe8a9f925e..8d32a95f87e2 100644 --- a/executor/oci/mounts.go +++ b/executor/oci/mounts.go @@ -2,16 +2,19 @@ package oci import ( "context" + "path/filepath" + "strings" specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" ) // MountOpts sets oci spec specific info for mount points -type MountOpts func([]specs.Mount) []specs.Mount +type MountOpts func([]specs.Mount) ([]specs.Mount, error) //GetMounts returns default required for buildkit // https://github.com/moby/buildkit/issues/429 -func GetMounts(ctx context.Context, mountOpts ...MountOpts) []specs.Mount { +func GetMounts(ctx context.Context, mountOpts ...MountOpts) ([]specs.Mount, error) { mounts := []specs.Mount{ { Destination: "/proc", @@ -49,20 +52,66 @@ func GetMounts(ctx context.Context, mountOpts ...MountOpts) []specs.Mount { Options: []string{"nosuid", "noexec", "nodev", "ro"}, }, } + var err error for _, o := range mountOpts { - mounts = o(mounts) + mounts, err = o(mounts) + if err != nil { + return nil, err + } } - return mounts + return mounts, nil } -func withROBind(src, dest string) func(m []specs.Mount) []specs.Mount { - return func(m []specs.Mount) []specs.Mount { +func withROBind(src, dest string) func(m []specs.Mount) ([]specs.Mount, error) { + return func(m []specs.Mount) ([]specs.Mount, error) { m = append(m, specs.Mount{ Destination: dest, Type: "bind", Source: src, Options: []string{"rbind", "ro"}, }) - return m + return m, nil + } +} + +func hasPrefix(p, prefixDir string) bool { + prefixDir = filepath.Clean(prefixDir) + if prefixDir == "/" { + return true + } + p = filepath.Clean(p) + return p == prefixDir || strings.HasPrefix(p, prefixDir+"/") +} + +func removeMountsWithPrefix(mounts []specs.Mount, prefixDir string) []specs.Mount { + var ret []specs.Mount + for _, m := range mounts { + if !hasPrefix(m.Destination, prefixDir) { + ret = append(ret, m) + } + } + return ret +} + +func withProcessMode(processMode ProcessMode) func([]specs.Mount) ([]specs.Mount, error) { + return func(m []specs.Mount) ([]specs.Mount, error) { + switch processMode { + case ProcessSandbox: + // keep the default + case NoProcessSandbox: + m = removeMountsWithPrefix(m, "/proc") + procMount := specs.Mount{ + Destination: "/proc", + Type: "bind", + Source: "/proc", + // NOTE: "rbind"+"ro" does not make /proc read-only recursively. + // So we keep maskedPath and readonlyPaths (although not mandatory for rootless mode) + Options: []string{"rbind"}, + } + m = append([]specs.Mount{procMount}, m...) + default: + return nil, errors.Errorf("unknown process mode: %v", processMode) + } + return m, nil } } diff --git a/executor/oci/mounts_test.go b/executor/oci/mounts_test.go new file mode 100644 index 000000000000..15b60f9fdfc0 --- /dev/null +++ b/executor/oci/mounts_test.go @@ -0,0 +1,56 @@ +package oci + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestHasPrefix(t *testing.T) { + type testCase struct { + path string + prefix string + expected bool + } + testCases := []testCase{ + { + path: "/foo/bar", + prefix: "/foo", + expected: true, + }, + { + path: "/foo/bar", + prefix: "/foo/", + expected: true, + }, + { + path: "/foo/bar", + prefix: "/", + expected: true, + }, + { + path: "/foo", + prefix: "/foo", + expected: true, + }, + { + path: "/foo/bar", + prefix: "/bar", + expected: false, + }, + { + path: "/foo/bar", + prefix: "foo", + expected: false, + }, + { + path: "/foobar", + prefix: "/foo", + expected: false, + }, + } + for i, tc := range testCases { + actual := hasPrefix(tc.path, tc.prefix) + require.Equal(t, tc.expected, actual, "#%d: under(%q,%q)", i, tc.path, tc.prefix) + } +} diff --git a/executor/oci/spec_unix.go b/executor/oci/spec_unix.go index b7f087ed095c..aa6b18803700 100644 --- a/executor/oci/spec_unix.go +++ b/executor/oci/spec_unix.go @@ -22,8 +22,21 @@ import ( // Ideally we don't have to import whole containerd just for the default spec +// ProcMode configures PID namespaces +type ProcessMode int + +const ( + // ProcessSandbox unshares pidns and mount procfs. + ProcessSandbox ProcessMode = iota + // NoProcessSandbox uses host pidns and bind-mount procfs. + // Note that NoProcessSandbox allows build containers to kill (and potentially ptrace) an arbitrary process in the BuildKit host namespace. + // NoProcessSandbox should be enabled only when the BuildKit is running in a container as an unprivileged user. + NoProcessSandbox +) + // GenerateSpec generates spec using containerd functionality. -func GenerateSpec(ctx context.Context, meta executor.Meta, mounts []executor.Mount, id, resolvConf, hostsFile string, namespace network.Namespace, opts ...oci.SpecOpts) (*specs.Spec, func(), error) { +// opts are ignored for s.Process, s.Hostname, and s.Mounts . +func GenerateSpec(ctx context.Context, meta executor.Meta, mounts []executor.Mount, id, resolvConf, hostsFile string, namespace network.Namespace, processMode ProcessMode, opts ...oci.SpecOpts) (*specs.Spec, func(), error) { c := &containers.Container{ ID: id, } @@ -32,6 +45,14 @@ func GenerateSpec(ctx context.Context, meta executor.Meta, mounts []executor.Mou ctx = namespaces.WithNamespace(ctx, "buildkit") } + switch processMode { + case NoProcessSandbox: + // Mount for /proc is replaced in GetMounts() + opts = append(opts, + oci.WithHostNamespace(specs.PIDNamespace)) + // TODO(AkihiroSuda): Configure seccomp to disable ptrace (and prctl?) explicitly + } + // Note that containerd.GenerateSpec is namespaced so as to make // specs.Linux.CgroupsPath namespaced s, err := oci.GenerateSpec(ctx, nil, c, opts...) @@ -48,10 +69,14 @@ func GenerateSpec(ctx context.Context, meta executor.Meta, mounts []executor.Mou s.Process.NoNewPrivileges = false // reset nonewprivileges s.Hostname = "buildkitsandbox" - s.Mounts = GetMounts(ctx, + s.Mounts, err = GetMounts(ctx, + withProcessMode(processMode), withROBind(resolvConf, "/etc/resolv.conf"), withROBind(hostsFile, "/etc/hosts"), ) + if err != nil { + return nil, nil, err + } s.Mounts = append(s.Mounts, specs.Mount{ Destination: "/sys/fs/cgroup", diff --git a/executor/runcexecutor/executor.go b/executor/runcexecutor/executor.go index 8080f5714cdc..c1abccb0830f 100644 --- a/executor/runcexecutor/executor.go +++ b/executor/runcexecutor/executor.go @@ -39,6 +39,8 @@ type Opt struct { Rootless bool // DefaultCgroupParent is the cgroup-parent name for executor DefaultCgroupParent string + // ProcessMode + ProcessMode oci.ProcessMode } var defaultCommandCandidates = []string{"buildkit-runc", "runc"} @@ -50,6 +52,7 @@ type runcExecutor struct { cgroupParent string rootless bool networkProviders map[pb.NetMode]network.Provider + processMode oci.ProcessMode } func New(opt Opt, networkProviders map[pb.NetMode]network.Provider) (executor.Executor, error) { @@ -105,6 +108,7 @@ func New(opt Opt, networkProviders map[pb.NetMode]network.Provider) (executor.Ex cgroupParent: opt.DefaultCgroupParent, rootless: opt.Rootless, networkProviders: networkProviders, + processMode: opt.ProcessMode, } return w, nil } @@ -193,7 +197,7 @@ func (w *runcExecutor) Exec(ctx context.Context, meta executor.Meta, root cache. } opts = append(opts, containerdoci.WithCgroup(cgroupsPath)) } - spec, cleanup, err := oci.GenerateSpec(ctx, meta, mounts, id, resolvConf, hostsFile, namespace, opts...) + spec, cleanup, err := oci.GenerateSpec(ctx, meta, mounts, id, resolvConf, hostsFile, namespace, w.processMode, opts...) if err != nil { return err } diff --git a/worker/runc/runc.go b/worker/runc/runc.go index 404c44df2830..a46e7c0c6bf6 100644 --- a/worker/runc/runc.go +++ b/worker/runc/runc.go @@ -13,6 +13,7 @@ import ( "github.com/containerd/containerd/platforms" ctdsnapshot "github.com/containerd/containerd/snapshots" "github.com/moby/buildkit/cache/metadata" + "github.com/moby/buildkit/executor/oci" "github.com/moby/buildkit/executor/runcexecutor" containerdsnapshot "github.com/moby/buildkit/snapshot/containerd" "github.com/moby/buildkit/util/network" @@ -33,7 +34,7 @@ type SnapshotterFactory struct { // NewWorkerOpt creates a WorkerOpt. // But it does not set the following fields: // - SessionManager -func NewWorkerOpt(root string, snFactory SnapshotterFactory, rootless bool, labels map[string]string) (base.WorkerOpt, error) { +func NewWorkerOpt(root string, snFactory SnapshotterFactory, rootless bool, processMode oci.ProcessMode, labels map[string]string) (base.WorkerOpt, error) { var opt base.WorkerOpt name := "runc-" + snFactory.Name root = filepath.Join(root, name) @@ -48,7 +49,8 @@ func NewWorkerOpt(root string, snFactory SnapshotterFactory, rootless bool, labe // Root directory Root: filepath.Join(root, "executor"), // without root privileges - Rootless: rootless, + Rootless: rootless, + ProcessMode: processMode, }, network.Default()) if err != nil { return opt, err diff --git a/worker/runc/runc_test.go b/worker/runc/runc_test.go index fd6e643b93d7..33b988579dba 100644 --- a/worker/runc/runc_test.go +++ b/worker/runc/runc_test.go @@ -5,6 +5,7 @@ package runc import ( "bytes" "context" + "fmt" "io" "io/ioutil" "os" @@ -15,8 +16,10 @@ import ( "github.com/containerd/containerd/namespaces" ctdsnapshot "github.com/containerd/containerd/snapshots" "github.com/containerd/containerd/snapshots/overlay" + "github.com/moby/buildkit/cache" "github.com/moby/buildkit/client" "github.com/moby/buildkit/executor" + "github.com/moby/buildkit/executor/oci" "github.com/moby/buildkit/session" "github.com/moby/buildkit/snapshot" "github.com/moby/buildkit/source" @@ -24,24 +27,10 @@ import ( "github.com/stretchr/testify/require" ) -func TestRuncWorker(t *testing.T) { - t.Parallel() - if os.Getuid() != 0 { - t.Skip("requires root") - } - - if _, err := exec.LookPath("runc"); err != nil { - if _, err := exec.LookPath("buildkit-runc"); err != nil { - t.Skipf("no runc found: %s", err.Error()) - } - } - - ctx := namespaces.WithNamespace(context.Background(), "buildkit-test") - - // this should be an example or e2e test +func newWorkerOpt(t *testing.T, processMode oci.ProcessMode) (base.WorkerOpt, func()) { tmpdir, err := ioutil.TempDir("", "workertest") require.NoError(t, err) - defer os.RemoveAll(tmpdir) + cleanup := func() { os.RemoveAll(tmpdir) } snFactory := SnapshotterFactory{ Name: "overlayfs", @@ -50,23 +39,51 @@ func TestRuncWorker(t *testing.T) { }, } rootless := false - workerOpt, err := NewWorkerOpt(tmpdir, snFactory, rootless, nil) + workerOpt, err := NewWorkerOpt(tmpdir, snFactory, rootless, processMode, nil) require.NoError(t, err) workerOpt.SessionManager, err = session.NewManager() require.NoError(t, err) + return workerOpt, cleanup +} - w, err := base.NewWorker(workerOpt) - require.NoError(t, err) +func checkRequirement(t *testing.T) { + if os.Getuid() != 0 { + t.Skip("requires root") + } + if _, err := exec.LookPath("runc"); err != nil { + if _, err := exec.LookPath("buildkit-runc"); err != nil { + t.Skipf("no runc found: %s", err.Error()) + } + } +} + +func newCtx(s string) context.Context { + return namespaces.WithNamespace(context.Background(), s) +} + +func newBusyboxSourceSnapshot(ctx context.Context, t *testing.T, w *base.Worker) cache.ImmutableRef { img, err := source.NewImageIdentifier("docker.io/library/busybox:latest") require.NoError(t, err) - src, err := w.SourceManager.Resolve(ctx, img) require.NoError(t, err) - snap, err := src.Snapshot(ctx) require.NoError(t, err) + return snap +} + +func TestRuncWorker(t *testing.T) { + t.Parallel() + checkRequirement(t) + + workerOpt, cleanupWorkerOpt := newWorkerOpt(t, oci.ProcessSandbox) + defer cleanupWorkerOpt() + w, err := base.NewWorker(workerOpt) + require.NoError(t, err) + + ctx := newCtx("buildkit-test") + snap := newBusyboxSourceSnapshot(ctx, t, w) mounts, err := snap.Mount(ctx, false) require.NoError(t, err) @@ -158,6 +175,35 @@ func TestRuncWorker(t *testing.T) { } +func TestRuncWorkerNoProcessSandbox(t *testing.T) { + t.Parallel() + checkRequirement(t) + + workerOpt, cleanupWorkerOpt := newWorkerOpt(t, oci.NoProcessSandbox) + defer cleanupWorkerOpt() + w, err := base.NewWorker(workerOpt) + require.NoError(t, err) + + ctx := newCtx("buildkit-test") + snap := newBusyboxSourceSnapshot(ctx, t, w) + root, err := w.CacheManager.New(ctx, snap) + require.NoError(t, err) + + // ensure the procfs is shared + selfPID := os.Getpid() + selfCmdline, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/cmdline", selfPID)) + require.NoError(t, err) + meta := executor.Meta{ + Args: []string{"/bin/cat", fmt.Sprintf("/proc/%d/cmdline", selfPID)}, + Cwd: "/", + } + stdout := bytes.NewBuffer(nil) + stderr := bytes.NewBuffer(nil) + err = w.Executor.Exec(ctx, meta, root, nil, nil, &nopCloser{stdout}, &nopCloser{stderr}) + require.NoError(t, err, fmt.Sprintf("stdout=%q, stderr=%q", stdout.String(), stderr.String())) + require.Equal(t, string(selfCmdline), stdout.String()) +} + type nopCloser struct { io.Writer }