Skip to content

Commit

Permalink
fix(xunix): also mount shared symlinked shared object files (#123)
Browse files Browse the repository at this point in the history
* fix(xunix): also mount shared object files with .so.N
* chore(Makefile): add test and test-integration targets
* chore(README.md): add hacking and troubleshooting sections
* chore(integration): fix tests under cgroupv2

Signed-off-by: Cian Johnston <[email protected]>
Co-authored-by: Dean Sheather <[email protected]>
  • Loading branch information
johnstcn and deansheather authored Jan 27, 2025
1 parent 2b091cf commit 361631d
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 24 deletions.
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,11 @@ fmt/go:
.PHONY: fmt/md
fmt/md:
go run github.com/Kunde21/markdownfmt/v3/cmd/[email protected] -w ./README.md

.PHONY: test
test:
go test -v -count=1 ./...

.PHONY: test-integration
test-integration:
go test -v -count=1 -tags=integration ./integration/
34 changes: 34 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,37 @@ env {
> }
> }
> ```
## GPUs
When passing through GPUs to the inner container, you may end up using associated tooling such as the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/index.html) or the [NVIDIA GPU Operator](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/index.html). These will inject required utilities and libraries inside the inner container. You can verify this by directly running (without Envbox) a barebones image like `debian:bookworm` and running `mount` or `nvidia-smi` inside the container.
Envbox will detect these mounts and pass them inside the inner container it creates, so that GPU-aware tools run inside the inner container can still utilize these libraries.
## Hacking
Here's a simple one-liner to run the `codercom/enterprise-minimal:ubuntu` image in Envbox using Docker:
```
docker run -it --rm \
-v /tmp/envbox/docker:/var/lib/coder/docker \
-v /tmp/envbox/containers:/var/lib/coder/containers \
-v /tmp/envbox/sysbox:/var/lib/sysbox \
-v /tmp/envbox/docker:/var/lib/docker \
-v /usr/src:/usr/src:ro \
-v /lib/modules:/lib/modules:ro \
--privileged \
-e CODER_INNER_IMAGE=codercom/enterprise-minimal:ubuntu \
-e CODER_INNER_USERNAME=coder \
envbox:latest /envbox docker
```
This will store persistent data under `/tmp/envbox`.
## Troubleshooting
### `failed to write <number> to cgroup.procs: write /sys/fs/cgroup/docker/<id>/init.scope/cgroup.procs: operation not supported: unknown`
This issue occurs in Docker if you have `cgroupns-mode` set to `private`. To validate, add `--cgroupns=host` to your `docker run` invocation and re-run.
To permanently set this as the default in your Docker daemon, add `"default-cgroupns-mode": "host"` to your `/etc/docker/daemon.json` and restart Docker.
61 changes: 43 additions & 18 deletions integration/docker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,28 +240,53 @@ func TestDocker(t *testing.T) {
require.Equal(t, "1000", strings.TrimSpace(string(out)))

// Validate that memory limit is being applied to the inner container.
out, err = integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{
// First check under cgroupv2 path.
if out, err = integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{
ContainerID: resource.Container.ID,
Cmd: []string{"cat", "/sys/fs/cgroup/memory/memory.limit_in_bytes"},
})
require.NoError(t, err)
require.Equal(t, expectedMemoryLimit, strings.TrimSpace(string(out)))
Cmd: []string{"cat", "/sys/fs/cgroup/memory.max"},
}); err == nil {
require.Equal(t, expectedMemoryLimit, strings.TrimSpace(string(out)))
} else { // fall back to cgroupv1 path.
out, err = integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{
ContainerID: resource.Container.ID,
Cmd: []string{"cat", "/sys/fs/cgroup/memory/memory.limit_in_bytes"},
})
require.NoError(t, err)
require.Equal(t, expectedMemoryLimit, strings.TrimSpace(string(out)))
}

periodStr, err := integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{
// Validate the cpu limits are being applied to the inner container.
// First check under cgroupv2 path.
var quota, period int64
if out, err = integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{
ContainerID: resource.Container.ID,
Cmd: []string{"cat", "/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_period_us"},
})
require.NoError(t, err)
period, err := strconv.ParseInt(strings.TrimSpace(string(periodStr)), 10, 64)
require.NoError(t, err)
Cmd: []string{"cat", "/sys/fs/cgroup/cpu.max"},
}); err == nil {
// out is in the format "period quota"
// e.g. "100000 100000"
fields := strings.Fields(string(out))
require.Len(t, fields, 2)
period, err = strconv.ParseInt(fields[0], 10, 64)
require.NoError(t, err)
quota, err = strconv.ParseInt(fields[1], 10, 64)
require.NoError(t, err)
} else { // fall back to cgroupv1 path.
periodStr, err := integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{
ContainerID: resource.Container.ID,
Cmd: []string{"cat", "/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_period_us"},
})
require.NoError(t, err)
period, err = strconv.ParseInt(strings.TrimSpace(string(periodStr)), 10, 64)
require.NoError(t, err)

quotaStr, err := integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{
ContainerID: resource.Container.ID,
Cmd: []string{"cat", "/sys/fs/cgroup/cpu/cpu.cfs_quota_us"},
})
require.NoError(t, err)
quota, err := strconv.ParseInt(strings.TrimSpace(string(quotaStr)), 10, 64)
require.NoError(t, err)
quotaStr, err := integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{
ContainerID: resource.Container.ID,
Cmd: []string{"cat", "/sys/fs/cgroup/cpu/cpu.cfs_quota_us"},
})
require.NoError(t, err)
quota, err = strconv.ParseInt(strings.TrimSpace(string(quotaStr)), 10, 64)
require.NoError(t, err)
}

// Validate that the CPU limit is being applied to the inner container.
actualLimit := float64(quota) / float64(period)
Expand Down
9 changes: 5 additions & 4 deletions xunix/gpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ import (
)

var (
gpuMountRegex = regexp.MustCompile("(?i)(nvidia|vulkan|cuda)")
gpuExtraRegex = regexp.MustCompile("(?i)(libgl|nvidia|vulkan|cuda)")
gpuEnvRegex = regexp.MustCompile("(?i)nvidia")
gpuMountRegex = regexp.MustCompile("(?i)(nvidia|vulkan|cuda)")
gpuExtraRegex = regexp.MustCompile("(?i)(libgl|nvidia|vulkan|cuda)")
gpuEnvRegex = regexp.MustCompile("(?i)nvidia")
sharedObjectRegex = regexp.MustCompile(`\.so(\.[0-9\.]+)?$`)
)

func GPUEnvs(ctx context.Context) []string {
Expand Down Expand Up @@ -103,7 +104,7 @@ func usrLibGPUs(ctx context.Context, log slog.Logger, usrLibDir string) ([]mount
return nil
}

if filepath.Ext(path) != ".so" || !gpuExtraRegex.MatchString(path) {
if !sharedObjectRegex.MatchString(path) || !gpuExtraRegex.MatchString(path) {
return nil
}

Expand Down
4 changes: 2 additions & 2 deletions xunix/gpu_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,13 @@ func TestGPUs(t *testing.T) {
expectedUsrLibFiles = []string{
filepath.Join(usrLibMountpoint, "nvidia", "libglxserver_nvidia.so"),
filepath.Join(usrLibMountpoint, "libnvidia-ml.so"),
filepath.Join(usrLibMountpoint, "nvidia", "libglxserver_nvidia.so.1"),
}

// fakeUsrLibFiles are files that should be written to the "mounted"
// /usr/lib directory. It includes files that shouldn't be returned.
fakeUsrLibFiles = append([]string{
filepath.Join(usrLibMountpoint, "libcurl-gnutls.so"),
filepath.Join(usrLibMountpoint, "nvidia", "libglxserver_nvidia.so.1"),
}, expectedUsrLibFiles...)
)

Expand Down Expand Up @@ -98,7 +98,7 @@ func TestGPUs(t *testing.T) {
devices, binds, err := xunix.GPUs(ctx, log, usrLibMountpoint)
require.NoError(t, err)
require.Len(t, devices, 2, "unexpected 2 nvidia devices")
require.Len(t, binds, 3, "expected 4 nvidia binds")
require.Len(t, binds, 4, "expected 4 nvidia binds")
require.Contains(t, binds, mount.MountPoint{
Device: "/dev/sda1",
Path: "/usr/local/nvidia",
Expand Down

0 comments on commit 361631d

Please sign in to comment.