From 3161ab11ec8e10a5063be3df6305e25cfd045338 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 14 Jan 2025 22:06:42 +0100 Subject: [PATCH 01/15] Fix layer reuse in schema1 images MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit reused.Digest is not always blobDigest, it might be uncompressedDigest; but we must have a blobDiffIDs entry for reused.Digest. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index be906810f..bd06ff486 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -537,7 +537,7 @@ func (s *storageImageDestination) tryReusingBlobAsPending(blobDigest digest.Dige return false, private.ReusedBlob{}, fmt.Errorf(`looking for layers with digest %q: %w`, uncompressedDigest, err) } if found, reused := reusedBlobFromLayerLookup(layers, blobDigest, size, options); found { - s.lockProtected.blobDiffIDs[blobDigest] = uncompressedDigest + s.lockProtected.blobDiffIDs[reused.Digest] = uncompressedDigest return true, reused, nil } } From e018cff34db8a7d0ab04288a47504b9d4e701ce2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 30 Oct 2024 15:57:51 +0100 Subject: [PATCH 02/15] Update c/storage after https://github.com/containers/storage/pull/2155 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Miloslav Trmač --- go.mod | 12 ++++++------ go.sum | 26 ++++++++++++++------------ 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/go.mod b/go.mod index a19024a17..c9f289ddc 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,7 @@ require ( github.com/BurntSushi/toml v1.4.0 github.com/containers/libtrust v0.0.0-20230121012942-c1716e8a8d01 github.com/containers/ocicrypt v1.2.1 - github.com/containers/storage v1.56.1 + github.com/containers/storage v1.56.2-0.20250121150636-c2cdd500e4ef github.com/cyberphone/json-canonicalization v0.0.0-20231217050601-ba74d44ecf5f github.com/distribution/reference v0.6.0 github.com/docker/cli v27.5.0+incompatible @@ -42,7 +42,7 @@ require ( github.com/xeipuuv/gojsonschema v1.2.0 go.etcd.io/bbolt v1.3.11 golang.org/x/crypto v0.32.0 - golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f + golang.org/x/exp v0.0.0-20241217172543-b2144cdd0a67 golang.org/x/oauth2 v0.25.0 golang.org/x/sync v0.10.0 golang.org/x/sys v0.29.0 @@ -64,10 +64,10 @@ require ( github.com/containerd/errdefs v0.3.0 // indirect github.com/containerd/errdefs/pkg v0.3.0 // indirect github.com/containerd/log v0.1.0 // indirect - github.com/containerd/stargz-snapshotter/estargz v0.15.1 // indirect + github.com/containerd/stargz-snapshotter/estargz v0.16.3 // indirect github.com/containerd/typeurl/v2 v2.2.3 // indirect github.com/coreos/go-oidc/v3 v3.12.0 // indirect - github.com/cyphar/filepath-securejoin v0.3.4 // indirect + github.com/cyphar/filepath-securejoin v0.3.6 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/docker/go-metrics v0.0.1 // indirect github.com/docker/go-units v0.5.0 // indirect @@ -105,7 +105,7 @@ require ( github.com/mistifyio/go-zfs/v3 v3.0.1 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/moby/docker-image-spec v1.3.1 // indirect - github.com/moby/sys/capability v0.3.0 // indirect + github.com/moby/sys/capability v0.4.0 // indirect github.com/moby/sys/mountinfo v0.7.2 // indirect github.com/moby/sys/user v0.3.0 // indirect github.com/moby/term v0.5.0 // indirect @@ -128,7 +128,7 @@ require ( github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966 // indirect github.com/smallstep/pkcs7 v0.1.1 // indirect github.com/stefanberger/go-pkcs11uri v0.0.0-20230803200340-78284954bff6 // indirect - github.com/tchap/go-patricia/v2 v2.3.1 // indirect + github.com/tchap/go-patricia/v2 v2.3.2 // indirect github.com/titanous/rocacheck v0.0.0-20171023193734-afe73141d399 // indirect github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect diff --git a/go.sum b/go.sum index ffca01145..b1df07f15 100644 --- a/go.sum +++ b/go.sum @@ -48,22 +48,24 @@ github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151X github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk= github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= -github.com/containerd/stargz-snapshotter/estargz v0.15.1 h1:eXJjw9RbkLFgioVaTG+G/ZW/0kEe2oEKCdS/ZxIyoCU= -github.com/containerd/stargz-snapshotter/estargz v0.15.1/go.mod h1:gr2RNwukQ/S9Nv33Lt6UC7xEx58C+LHRdoqbEKjz1Kk= +github.com/containerd/stargz-snapshotter/estargz v0.16.3 h1:7evrXtoh1mSbGj/pfRccTampEyKpjpOnS3CyiV1Ebr8= +github.com/containerd/stargz-snapshotter/estargz v0.16.3/go.mod h1:uyr4BfYfOj3G9WBVE8cOlQmXAbPN9VEQpBBeJIuOipU= github.com/containerd/typeurl/v2 v2.2.3 h1:yNA/94zxWdvYACdYO8zofhrTVuQY73fFU1y++dYSw40= github.com/containerd/typeurl/v2 v2.2.3/go.mod h1:95ljDnPfD3bAbDJRugOiShd/DlAAsxGtUBhJxIn7SCk= github.com/containers/libtrust v0.0.0-20230121012942-c1716e8a8d01 h1:Qzk5C6cYglewc+UyGf6lc8Mj2UaPTHy/iF2De0/77CA= github.com/containers/libtrust v0.0.0-20230121012942-c1716e8a8d01/go.mod h1:9rfv8iPl1ZP7aqh9YA68wnZv2NUDbXdcdPHVz0pFbPY= github.com/containers/ocicrypt v1.2.1 h1:0qIOTT9DoYwcKmxSt8QJt+VzMY18onl9jUXsxpVhSmM= github.com/containers/ocicrypt v1.2.1/go.mod h1:aD0AAqfMp0MtwqWgHM1bUwe1anx0VazI108CRrSKINQ= -github.com/containers/storage v1.56.1 h1:gDZj/S6Zxus4Xx42X6iNB3ODXuh0qoOdH/BABfrvcKo= -github.com/containers/storage v1.56.1/go.mod h1:c6WKowcAlED/DkWGNuL9bvGYqIWCVy7isRMdCSKWNjk= +github.com/containers/storage v1.56.2-0.20250121150636-c2cdd500e4ef h1:mzC7dl6SRdqyMd22kLuljhJTdoqqd4gW8m4LTNetBCo= +github.com/containers/storage v1.56.2-0.20250121150636-c2cdd500e4ef/go.mod h1:KbGwnyB0b3cwwiPuAiB9XqSYfsEhRb/ALIPgfqpmLLA= github.com/coreos/go-oidc/v3 v3.12.0 h1:sJk+8G2qq94rDI6ehZ71Bol3oUHy63qNYmkiSjrc/Jo= github.com/coreos/go-oidc/v3 v3.12.0/go.mod h1:gE3LgjOgFoHi9a4ce4/tJczr0Ai2/BoDhf0r5lltWI0= github.com/cyberphone/json-canonicalization v0.0.0-20231217050601-ba74d44ecf5f h1:eHnXnuK47UlSTOQexbzxAZfekVz6i+LKRdj1CU5DPaM= github.com/cyberphone/json-canonicalization v0.0.0-20231217050601-ba74d44ecf5f/go.mod h1:uzvlm1mxhHkdfqitSA92i7Se+S9ksOn3a3qmv/kyOCw= -github.com/cyphar/filepath-securejoin v0.3.4 h1:VBWugsJh2ZxJmLFSM06/0qzQyiQX2Qs0ViKrUAcqdZ8= -github.com/cyphar/filepath-securejoin v0.3.4/go.mod h1:8s/MCNJREmFK0H02MF6Ihv1nakJe4L/w3WZLHNkvlYM= +github.com/cyphar/filepath-securejoin v0.3.6 h1:4d9N5ykBnSp5Xn2JkhocYDkOpURL/18CYMpo6xB9uWM= +github.com/cyphar/filepath-securejoin v0.3.6/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI= +github.com/cyphar/filepath-securejoin v0.4.0 h1:PioTG9TBRSApBpYGnDU8HC+miIsX8vitBH9LGNNMoLQ= +github.com/cyphar/filepath-securejoin v0.4.0/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= @@ -233,8 +235,8 @@ github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyua github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo= -github.com/moby/sys/capability v0.3.0 h1:kEP+y6te0gEXIaeQhIi0s7vKs/w0RPoH1qPa6jROcVg= -github.com/moby/sys/capability v0.3.0/go.mod h1:4g9IK291rVkms3LKCDOoYlnV8xKwoDTpIrNEE35Wq0I= +github.com/moby/sys/capability v0.4.0 h1:4D4mI6KlNtWMCM1Z/K0i7RV1FkX+DBDHKVJpCndZoHk= +github.com/moby/sys/capability v0.4.0/go.mod h1:4g9IK291rVkms3LKCDOoYlnV8xKwoDTpIrNEE35Wq0I= github.com/moby/sys/mountinfo v0.7.2 h1:1shs6aH5s4o5H2zQLn796ADW1wMrIwHsyJ2v9KouLrg= github.com/moby/sys/mountinfo v0.7.2/go.mod h1:1YOa8w8Ih7uW0wALDUgT1dTTSBrZ+HiBLGws92L2RU4= github.com/moby/sys/user v0.3.0 h1:9ni5DlcW5an3SvRSx4MouotOygvzaXbaSrc/wGDFWPo= @@ -337,8 +339,8 @@ github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOf github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/sylabs/sif/v2 v2.20.2 h1:HGEPzauCHhIosw5o6xmT3jczuKEuaFzSfdjAsH33vYw= github.com/sylabs/sif/v2 v2.20.2/go.mod h1:WyYryGRaR4Wp21SAymm5pK0p45qzZCSRiZMFvUZiuhc= -github.com/tchap/go-patricia/v2 v2.3.1 h1:6rQp39lgIYZ+MHmdEq4xzuk1t7OdC35z/xm0BGhTkes= -github.com/tchap/go-patricia/v2 v2.3.1/go.mod h1:VZRHKAb53DLaG+nA9EaYYiaEx6YztwDlLElMsnSHD4k= +github.com/tchap/go-patricia/v2 v2.3.2 h1:xTHFutuitO2zqKAQ5rCROYgUb7Or/+IC3fts9/Yc7nM= +github.com/tchap/go-patricia/v2 v2.3.2/go.mod h1:VZRHKAb53DLaG+nA9EaYYiaEx6YztwDlLElMsnSHD4k= github.com/titanous/rocacheck v0.0.0-20171023193734-afe73141d399 h1:e/5i7d4oYZ+C1wj2THlRK+oAhjeS/TRQwMfkIuet3w0= github.com/titanous/rocacheck v0.0.0-20171023193734-afe73141d399/go.mod h1:LdwHTNJT99C5fTAzDz0ud328OgXz+gierycbcIx2fRs= github.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc= @@ -405,8 +407,8 @@ golang.org/x/crypto v0.30.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ss golang.org/x/crypto v0.32.0 h1:euUpcYgM8WcP71gNpTqQCn6rC2t6ULUPiOzfWaXVVfc= golang.org/x/crypto v0.32.0/go.mod h1:ZnnJkOaASj8g0AjIduWNlq2NRxL0PlBrbKVyZ6V/Ugc= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f h1:XdNn9LlyWAhLVp6P/i8QYBW+hlyhrhei9uErw2B5GJo= -golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f/go.mod h1:D5SMRVC3C2/4+F/DB1wZsLRnSNimn2Sp/NPsCrsv8ak= +golang.org/x/exp v0.0.0-20241217172543-b2144cdd0a67 h1:1UoZQm6f0P/ZO0w1Ri+f+ifG/gXhegadRdwBIXEFWDo= +golang.org/x/exp v0.0.0-20241217172543-b2144cdd0a67/go.mod h1:qj5a5QZpwLU2NLQudwIN5koi3beDhSAlJwa67PuM98c= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= From f1b1c7e79f5731d1022dcbe389c42193042df227 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Thu, 28 Nov 2024 20:05:59 +0100 Subject: [PATCH 03/15] Handle a failure when closing compressor. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Miloslav Trmač --- storage/storage_test.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/storage/storage_test.go b/storage/storage_test.go index 6f90495fd..fc7e63e99 100644 --- a/storage/storage_test.go +++ b/storage/storage_test.go @@ -189,14 +189,18 @@ func TestParseWithGraphDriverOptions(t *testing.T) { // makeLayerGoroutine writes to pwriter, and on success, updates uncompressedCount // before it terminates. -func makeLayerGoroutine(pwriter io.Writer, uncompressedCount *int64, compression archive.Compression) error { +func makeLayerGoroutine(pwriter io.Writer, uncompressedCount *int64, compression archive.Compression) (retErr error) { var uncompressed *ioutils.WriteCounter if compression != archive.Uncompressed { compressor, err := archive.CompressStream(pwriter, compression) if err != nil { return fmt.Errorf("compressing layer: %w", err) } - defer compressor.Close() + defer func() { + if err := compressor.Close(); err != nil && retErr == nil { + retErr = err + } + }() uncompressed = ioutils.NewWriteCounter(compressor) } else { uncompressed = ioutils.NewWriteCounter(pwriter) From 65b101e92d8f97ed9c23bdd0ab1d4db594505537 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Thu, 28 Nov 2024 20:26:10 +0100 Subject: [PATCH 04/15] Use more representative configs in storage tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... because we will start enforcing that the DiffID values match. Signed-off-by: Miloslav Trmač --- storage/storage_test.go | 125 ++++++++++++++++++++++------------------ 1 file changed, 68 insertions(+), 57 deletions(-) diff --git a/storage/storage_test.go b/storage/storage_test.go index fc7e63e99..870688933 100644 --- a/storage/storage_test.go +++ b/storage/storage_test.go @@ -9,6 +9,7 @@ import ( "context" "crypto/rand" "crypto/sha256" + "encoding/json" "errors" "flag" "fmt" @@ -187,12 +188,11 @@ func TestParseWithGraphDriverOptions(t *testing.T) { } } -// makeLayerGoroutine writes to pwriter, and on success, updates uncompressedCount +// makeLayerGoroutine writes to dest, and on success, updates uncompressedCount and uncompressedDigest // before it terminates. -func makeLayerGoroutine(pwriter io.Writer, uncompressedCount *int64, compression archive.Compression) (retErr error) { - var uncompressed *ioutils.WriteCounter +func makeLayerGoroutine(dest io.Writer, uncompressedCount *int64, uncompressedDigest *digest.Digest, compression archive.Compression) (retErr error) { if compression != archive.Uncompressed { - compressor, err := archive.CompressStream(pwriter, compression) + compressor, err := archive.CompressStream(dest, compression) if err != nil { return fmt.Errorf("compressing layer: %w", err) } @@ -201,11 +201,12 @@ func makeLayerGoroutine(pwriter io.Writer, uncompressedCount *int64, compression retErr = err } }() - uncompressed = ioutils.NewWriteCounter(compressor) - } else { - uncompressed = ioutils.NewWriteCounter(pwriter) + dest = compressor } - twriter := tar.NewWriter(uncompressed) + + uncompressedCounter := ioutils.NewWriteCounter(dest) + uncompressedDigester := digest.Canonical.Digester() + twriter := tar.NewWriter(io.MultiWriter(uncompressedCounter, uncompressedDigester.Hash())) // defer twriter.Close() // should be called here to correctly terminate the archive. // We do not do that, to workaround https://github.com/containers/storage/issues/1729 : @@ -248,36 +249,40 @@ func makeLayerGoroutine(pwriter io.Writer, uncompressedCount *int64, compression if err := twriter.Flush(); err != nil { return fmt.Errorf("Error flushing output to tar archive: %w", err) } - *uncompressedCount = uncompressed.Count + *uncompressedCount = uncompressedCounter.Count + *uncompressedDigest = uncompressedDigester.Digest() return nil } type testBlob struct { - compressedDigest digest.Digest - uncompressedSize int64 - compressedSize int64 - data []byte + uncompressedDigest digest.Digest + compressedDigest digest.Digest + uncompressedSize int64 + compressedSize int64 + data []byte } func makeLayer(t *testing.T, compression archive.Compression) testBlob { preader, pwriter := io.Pipe() var uncompressedCount int64 + var uncompressedDigest digest.Digest go func() { err := errors.New("Internal error: unexpected panic in makeLayer") defer func() { // Note that this is not the same as {defer pipeWriter.CloseWithError(err)}; we need err to be evaluated lazily. _ = pwriter.CloseWithError(err) }() - err = makeLayerGoroutine(pwriter, &uncompressedCount, compression) + err = makeLayerGoroutine(pwriter, &uncompressedCount, &uncompressedDigest, compression) }() tbuffer := bytes.Buffer{} _, err := io.Copy(&tbuffer, preader) require.NoError(t, err) return testBlob{ - compressedDigest: digest.SHA256.FromBytes(tbuffer.Bytes()), - uncompressedSize: uncompressedCount, - compressedSize: int64(tbuffer.Len()), - data: tbuffer.Bytes(), + uncompressedDigest: uncompressedDigest, + compressedDigest: digest.SHA256.FromBytes(tbuffer.Bytes()), + uncompressedSize: uncompressedCount, + compressedSize: int64(tbuffer.Len()), + data: tbuffer.Bytes(), } } @@ -309,6 +314,40 @@ func ensureTestCanCreateImages(t *testing.T) { } } +// configForLayers returns a minimally-plausible config for layers +func configForLayers(t *testing.T, layers []testBlob) testBlob { + rootFS := manifest.Schema2RootFS{ + Type: "layers", + DiffIDs: []digest.Digest{}, + } + for _, l := range layers { + rootFS.DiffIDs = append(rootFS.DiffIDs, l.uncompressedDigest) + } + // Add a unique label so that different calls to configForLayers don’t try to use the same image ID. + randomBytes := make([]byte, digest.Canonical.Size()) + _, err := rand.Read(randomBytes) + require.NoError(t, err) + config := manifest.Schema2Image{ + Schema2V1Image: manifest.Schema2V1Image{ + Config: &manifest.Schema2Config{ + Labels: map[string]string{"unique": fmt.Sprintf("%x", randomBytes)}, + }, + Created: time.Now(), + }, + RootFS: &rootFS, + } + configBytes, err := json.Marshal(config) + require.NoError(t, err) + configDigest := digest.Canonical.FromBytes(configBytes) + return testBlob{ + uncompressedDigest: configDigest, + compressedDigest: configDigest, + uncompressedSize: int64(len(configBytes)), + compressedSize: int64(len(configBytes)), + data: configBytes, + } +} + func createUncommittedImageDest(t *testing.T, ref types.ImageReference, cache types.BlobInfoCache, layers []testBlob, config *testBlob) (types.ImageDestination, types.UnparsedImage) { dest, err := ref.NewImageDestination(context.Background(), nil) @@ -320,21 +359,11 @@ func createUncommittedImageDest(t *testing.T, ref types.ImageReference, cache ty layerDescriptors = append(layerDescriptors, desc) } - var configDescriptor manifest.Schema2Descriptor - if config != nil { - configDescriptor = config.storeBlob(t, dest, cache, manifest.DockerV2Schema2ConfigMediaType, true) - } else { - // Use a random digest so that different calls to createUncommittedImageDest with config == nil don’t try to - // use the same image ID. - digestBytes := make([]byte, digest.Canonical.Size()) - _, err := rand.Read(digestBytes) - require.NoError(t, err) - configDescriptor = manifest.Schema2Descriptor{ - MediaType: manifest.DockerV2Schema2ConfigMediaType, - Size: 1, - Digest: digest.NewDigestFromBytes(digest.Canonical, digestBytes), - } + if config == nil { + cd := configForLayers(t, layers) + config = &cd } + configDescriptor := config.storeBlob(t, dest, cache, manifest.DockerV2Schema2ConfigMediaType, true) manifest := manifest.Schema2FromComponents(configDescriptor, layerDescriptors) manifestBytes, err := manifest.Serialize() @@ -362,14 +391,6 @@ func createImage(t *testing.T, ref types.ImageReference, cache types.BlobInfoCac func TestWriteRead(t *testing.T) { ensureTestCanCreateImages(t) - configBytes := []byte(`{"config":{"labels":{}},"created":"2006-01-02T15:04:05Z"}`) - config := testBlob{ - compressedDigest: digest.SHA256.FromBytes(configBytes), - uncompressedSize: int64(len(configBytes)), - compressedSize: int64(len(configBytes)), - data: configBytes, - } - manifests := []string{ //`{ // "schemaVersion": 2, @@ -447,6 +468,7 @@ func TestWriteRead(t *testing.T) { layer := makeLayer(t, compression) _ = layer.storeBlob(t, dest, cache, manifest.DockerV2Schema2LayerMediaType, false) t.Logf("Wrote randomly-generated layer %q (%d/%d bytes) to destination", layer.compressedDigest, layer.compressedSize, layer.uncompressedSize) + config := configForLayers(t, []testBlob{layer}) _ = config.storeBlob(t, dest, cache, manifest.DockerV2Schema2ConfigMediaType, true) manifest := strings.ReplaceAll(manifestFmt, "%lh", layer.compressedDigest.String()) @@ -639,18 +661,13 @@ func TestSize(t *testing.T) { layer1 := makeLayer(t, archive.Gzip) layer2 := makeLayer(t, archive.Gzip) - configBytes := []byte(`{"config":{"labels":{}},"created":"2006-01-02T15:04:05Z"}`) - config := testBlob{ - compressedDigest: digest.SHA256.FromBytes(configBytes), - uncompressedSize: int64(len(configBytes)), - compressedSize: int64(len(configBytes)), - data: configBytes, - } + layerBlobs := []testBlob{layer1, layer2} + config := configForLayers(t, layerBlobs) ref, err := Transport.ParseReference("test") require.NoError(t, err) - createImage(t, ref, cache, []testBlob{layer1, layer2}, &config) + createImage(t, ref, cache, layerBlobs, &config) img, err := ref.NewImage(context.Background(), nil) require.NoError(t, err) @@ -677,15 +694,9 @@ func TestDuplicateBlob(t *testing.T) { layer1 := makeLayer(t, archive.Gzip) layer2 := makeLayer(t, archive.Gzip) - configBytes := []byte(`{"config":{"labels":{}},"created":"2006-01-02T15:04:05Z"}`) - config := testBlob{ - compressedDigest: digest.SHA256.FromBytes(configBytes), - uncompressedSize: int64(len(configBytes)), - compressedSize: int64(len(configBytes)), - data: configBytes, - } - - createImage(t, ref, cache, []testBlob{layer1, layer2, layer1, layer2}, &config) + layerBlobs := []testBlob{layer1, layer2, layer1, layer2} + config := configForLayers(t, layerBlobs) + createImage(t, ref, cache, layerBlobs, &config) img, err := ref.NewImage(context.Background(), nil) require.NoError(t, err) From db46b2c5dc8b34a62e6cd5b0ead6c50306c6ace7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 27 Nov 2024 22:22:03 +0100 Subject: [PATCH 05/15] Move computing trustedLayerIdentityData out of singleLayerIDComponent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will use the trustedLayerIdentityData for other purposes in the caller as well. Should not change behavior. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index bd06ff486..51bba77f1 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -824,15 +824,7 @@ func (s *storageImageDestination) queueOrCommit(index int, info addedLayerInfo) // singleLayerIDComponent returns a single layer’s the input to computing a layer (chain) ID, // and an indication whether the input already has the shape of a layer ID. -// It returns ("", false) if the layer is not found at all (which should never happen) -func (s *storageImageDestination) singleLayerIDComponent(layerIndex int, blobDigest digest.Digest) (string, bool) { - s.lock.Lock() - defer s.lock.Unlock() - - trusted, ok := s.trustedLayerIdentityDataLocked(layerIndex, blobDigest) - if !ok { - return "", false - } +func (trusted trustedLayerIdentityData) singleLayerIDComponent() (string, bool) { if trusted.layerIdentifiedByTOC { return "@TOC=" + trusted.tocDigest.Encoded(), false // "@" is not a valid start of a digest.Digest, so this is unambiguous. } @@ -875,9 +867,10 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si // Check if there's already a layer with the ID that we'd give to the result of applying // this layer blob to its parent, if it has one, or the blob's hex value otherwise. - // The layerID refers either to the DiffID or the digest of the TOC. - layerIDComponent, layerIDComponentStandalone := s.singleLayerIDComponent(index, info.digest) - if layerIDComponent == "" { + s.lock.Lock() + trusted, ok := s.trustedLayerIdentityDataLocked(index, info.digest) + s.lock.Unlock() + if !ok { // Check if it's elsewhere and the caller just forgot to pass it to us in a PutBlob() / TryReusingBlob() / … // // Use none.NoCache to avoid a repeated DiffID lookup in the BlobInfoCache: a caller @@ -902,12 +895,15 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si return false, fmt.Errorf("error determining uncompressed digest for blob %q", info.digest.String()) } - layerIDComponent, layerIDComponentStandalone = s.singleLayerIDComponent(index, info.digest) - if layerIDComponent == "" { + s.lock.Lock() + trusted, ok = s.trustedLayerIdentityDataLocked(index, info.digest) + s.lock.Unlock() + if !ok { return false, fmt.Errorf("we have blob %q, but don't know its layer ID", info.digest.String()) } } - + // The layerID refers either to the DiffID or the digest of the TOC. + layerIDComponent, layerIDComponentStandalone := trusted.singleLayerIDComponent() id := layerIDComponent if !layerIDComponentStandalone || parentLayer != "" { id = digest.Canonical.FromString(parentLayer + "+" + layerIDComponent).Encoded() From 9924351cfbd72a06c0ec63af2a162090673b0a4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Fri, 29 Nov 2024 18:37:07 +0100 Subject: [PATCH 06/15] Move singleLayerIDComponent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Keep the commit queuing logic together, this is more of an implementation detail of commitLayer. Only moves unchanged code, should not change behavior. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index 51bba77f1..c28becf84 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -822,15 +822,6 @@ func (s *storageImageDestination) queueOrCommit(index int, info addedLayerInfo) return nil } -// singleLayerIDComponent returns a single layer’s the input to computing a layer (chain) ID, -// and an indication whether the input already has the shape of a layer ID. -func (trusted trustedLayerIdentityData) singleLayerIDComponent() (string, bool) { - if trusted.layerIdentifiedByTOC { - return "@TOC=" + trusted.tocDigest.Encoded(), false // "@" is not a valid start of a digest.Digest, so this is unambiguous. - } - return trusted.diffID.Encoded(), true // This looks like chain IDs, and it uses the traditional value. -} - // commitLayer commits the specified layer with the given index to the storage. // size can usually be -1; it can be provided if the layer is not known to be already present in blobDiffIDs. // @@ -925,6 +916,15 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si return false, nil } +// singleLayerIDComponent returns a single layer’s the input to computing a layer (chain) ID, +// and an indication whether the input already has the shape of a layer ID. +func (trusted trustedLayerIdentityData) singleLayerIDComponent() (string, bool) { + if trusted.layerIdentifiedByTOC { + return "@TOC=" + trusted.tocDigest.Encoded(), false // "@" is not a valid start of a digest.Digest, so this is unambiguous. + } + return trusted.diffID.Encoded(), true // This looks like chain IDs, and it uses the traditional value. +} + // createNewLayer creates a new layer newLayerID for (index, layerDigest) on top of parentLayer (which may be ""). // If the layer cannot be committed yet, the function returns (nil, nil). func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.Digest, parentLayer, newLayerID string) (*storage.Layer, error) { From c32db7d22cef28131aa6a1b7a61c6c6a92a00ec2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 27 Nov 2024 22:29:04 +0100 Subject: [PATCH 07/15] Split layerID from commitLayer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's fairly isolated from the rest of the function, and if split, it can have unit tests. Those tests are valuable to ensure that layer IDs continue to behave the expected way and maximize layer reuse (although we are not making an API commitment to layer ID values). Should not change behavior. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 27 +++++---- storage/storage_dest_test.go | 105 +++++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+), 11 deletions(-) create mode 100644 storage/storage_dest_test.go diff --git a/storage/storage_dest.go b/storage/storage_dest.go index c28becf84..4f58b9e94 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -893,12 +893,9 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si return false, fmt.Errorf("we have blob %q, but don't know its layer ID", info.digest.String()) } } - // The layerID refers either to the DiffID or the digest of the TOC. - layerIDComponent, layerIDComponentStandalone := trusted.singleLayerIDComponent() - id := layerIDComponent - if !layerIDComponentStandalone || parentLayer != "" { - id = digest.Canonical.FromString(parentLayer + "+" + layerIDComponent).Encoded() - } + + id := layerID(parentLayer, trusted) + if layer, err2 := s.imageRef.transport.store.Layer(id); layer != nil && err2 == nil { // There's already a layer that should have the right contents, just reuse it. s.indexToStorageID[index] = layer.ID @@ -916,13 +913,21 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si return false, nil } -// singleLayerIDComponent returns a single layer’s the input to computing a layer (chain) ID, -// and an indication whether the input already has the shape of a layer ID. -func (trusted trustedLayerIdentityData) singleLayerIDComponent() (string, bool) { +// layerID computes a layer (“chain”) ID for (a possibly-empty parentLayer, trusted) +func layerID(parentLayer string, trusted trustedLayerIdentityData) string { + var layerIDComponent string if trusted.layerIdentifiedByTOC { - return "@TOC=" + trusted.tocDigest.Encoded(), false // "@" is not a valid start of a digest.Digest, so this is unambiguous. + // "@" is not a valid start of a digest.Digest.Encoded(), so this is unambiguous with the !layerIdentifiedByTOC case. + // But we _must_ hash this below to get a Digest.Encoded()-formatted value. + layerIDComponent = "@TOC=" + trusted.tocDigest.Encoded() + } else { + layerIDComponent = trusted.diffID.Encoded() // This looks like chain IDs, and it uses the traditional value. + } + id := layerIDComponent + if trusted.layerIdentifiedByTOC || parentLayer != "" { + id = digest.Canonical.FromString(parentLayer + "+" + layerIDComponent).Encoded() } - return trusted.diffID.Encoded(), true // This looks like chain IDs, and it uses the traditional value. + return id } // createNewLayer creates a new layer newLayerID for (index, layerDigest) on top of parentLayer (which may be ""). diff --git a/storage/storage_dest_test.go b/storage/storage_dest_test.go new file mode 100644 index 000000000..93b60e690 --- /dev/null +++ b/storage/storage_dest_test.go @@ -0,0 +1,105 @@ +package storage + +import ( + "testing" + + "github.com/opencontainers/go-digest" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestLayerID(t *testing.T) { + blobDigest, err := digest.Parse("sha256:0000000000000000000000000000000000000000000000000000000000000000") + require.NoError(t, err) + + for _, c := range []struct { + parentID string + identifiedByTOC bool + diffID string + tocDigest string + expected string + }{ + { + parentID: "", + identifiedByTOC: false, + diffID: "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + tocDigest: "", + expected: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + }, + { + parentID: "", + identifiedByTOC: false, + diffID: "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + expected: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + tocDigest: "sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", + }, + { + parentID: "", + identifiedByTOC: true, + diffID: "", + tocDigest: "sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", + expected: "07f60ddaf18a3d1fa18a71bf40f0b9889b473e26555d6fffdfbd72ba6a59469e", + }, + { + parentID: "", + identifiedByTOC: true, + diffID: "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + tocDigest: "sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", + expected: "07f60ddaf18a3d1fa18a71bf40f0b9889b473e26555d6fffdfbd72ba6a59469e", + }, + { + parentID: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + identifiedByTOC: false, + diffID: "sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", + tocDigest: "", + expected: "76f79efda453922cda1cecb6ec9e7cf9d86ea968c6dd199d4030dd00078a1686", + }, + { + parentID: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + identifiedByTOC: false, + diffID: "sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", + tocDigest: "sha256:cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc", + expected: "76f79efda453922cda1cecb6ec9e7cf9d86ea968c6dd199d4030dd00078a1686", + }, + { + parentID: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + identifiedByTOC: true, + diffID: "", + tocDigest: "sha256:cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc", + expected: "468becc3d25ee862f81fd728d229a2b2487cfc9b3e6cf3a4d0af8c3fdde0e7a9", + }, + { + parentID: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + identifiedByTOC: true, + diffID: "sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", + tocDigest: "sha256:cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc", + expected: "468becc3d25ee862f81fd728d229a2b2487cfc9b3e6cf3a4d0af8c3fdde0e7a9", + }, + } { + var diffID, tocDigest digest.Digest + if c.diffID != "" { + diffID, err = digest.Parse(c.diffID) + require.NoError(t, err) + } + if c.tocDigest != "" { + tocDigest, err = digest.Parse(c.tocDigest) + require.NoError(t, err) + } + + res := layerID(c.parentID, trustedLayerIdentityData{ + layerIdentifiedByTOC: c.identifiedByTOC, + diffID: diffID, + tocDigest: tocDigest, + blobDigest: "", + }) + assert.Equal(t, c.expected, res) + // blobDigest does not affect the layer ID + res = layerID(c.parentID, trustedLayerIdentityData{ + layerIdentifiedByTOC: c.identifiedByTOC, + diffID: diffID, + tocDigest: tocDigest, + blobDigest: blobDigest, + }) + assert.Equal(t, c.expected, res) + } +} From 9b2e568f18456e9edbe0ccdf59ef447f710622b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Fri, 29 Nov 2024 01:27:49 +0100 Subject: [PATCH 08/15] Beautify the implementation of layerID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Should not change behavior. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index 4f58b9e94..3fd7856e8 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -913,21 +913,23 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si return false, nil } -// layerID computes a layer (“chain”) ID for (a possibly-empty parentLayer, trusted) -func layerID(parentLayer string, trusted trustedLayerIdentityData) string { - var layerIDComponent string +// layerID computes a layer (“chain”) ID for (a possibly-empty parentID, trusted) +func layerID(parentID string, trusted trustedLayerIdentityData) string { + var component string + mustHash := false if trusted.layerIdentifiedByTOC { // "@" is not a valid start of a digest.Digest.Encoded(), so this is unambiguous with the !layerIdentifiedByTOC case. // But we _must_ hash this below to get a Digest.Encoded()-formatted value. - layerIDComponent = "@TOC=" + trusted.tocDigest.Encoded() + component = "@TOC=" + trusted.tocDigest.Encoded() + mustHash = true } else { - layerIDComponent = trusted.diffID.Encoded() // This looks like chain IDs, and it uses the traditional value. + component = trusted.diffID.Encoded() // This looks like chain IDs, and it uses the traditional value. } - id := layerIDComponent - if trusted.layerIdentifiedByTOC || parentLayer != "" { - id = digest.Canonical.FromString(parentLayer + "+" + layerIDComponent).Encoded() + + if parentID == "" && !mustHash { + return component } - return id + return digest.Canonical.FromString(parentID + "+" + component).Encoded() } // createNewLayer creates a new layer newLayerID for (index, layerDigest) on top of parentLayer (which may be ""). From a2498625559637fa3dfee75c894c527f95916ec2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 27 Nov 2024 23:49:35 +0100 Subject: [PATCH 09/15] Introduce trustedLayerIdentityData.logString() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... to simplify some of the repetitive logging code. Should not change behavior. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index 3fd7856e8..d99da453e 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -612,6 +612,13 @@ type trustedLayerIdentityData struct { blobDigest digest.Digest // A digest of the (possibly-compressed) layer as presented, or "" if unknown/untrusted. } +// logString() prints a representation of trusted suitable identifying a layer in logs and errors. +// The string is already quoted to expose malicious input and does not need to be quoted again. +// Note that it does not include _all_ of the contents. +func (trusted trustedLayerIdentityData) logString() string { + return fmt.Sprintf("%q/%q/%q", trusted.blobDigest, trusted.tocDigest, trusted.diffID) +} + // trustedLayerIdentityDataLocked returns a _consistent_ set of information for a layer with (layerIndex, blobDigest). // blobDigest is the (possibly-compressed) layer digest referenced in the manifest. // It returns (trusted, true) if the layer was found, or (_, false) if insufficient data is available. @@ -1039,7 +1046,7 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D } } if layer == nil { - return nil, fmt.Errorf("layer for blob %q/%q/%q not found", trusted.blobDigest, trusted.tocDigest, trusted.diffID) + return nil, fmt.Errorf("layer for blob %s not found", trusted.logString()) } // Read the layer's contents. @@ -1049,7 +1056,7 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D } diff, err2 := s.imageRef.transport.store.Diff("", layer.ID, diffOptions) if err2 != nil { - return nil, fmt.Errorf("reading layer %q for blob %q/%q/%q: %w", layer.ID, trusted.blobDigest, trusted.tocDigest, trusted.diffID, err2) + return nil, fmt.Errorf("reading layer %q for blob %s: %w", layer.ID, trusted.logString(), err2) } // Copy the layer diff to a file. Diff() takes a lock that it holds // until the ReadCloser that it returns is closed, and PutLayer() wants @@ -1128,7 +1135,7 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D UncompressedDigest: trusted.diffID, }, file) if err != nil && !errors.Is(err, storage.ErrDuplicateID) { - return nil, fmt.Errorf("adding layer with blob %q/%q/%q: %w", trusted.blobDigest, trusted.tocDigest, trusted.diffID, err) + return nil, fmt.Errorf("adding layer with blob %s: %w", trusted.logString(), err) } return layer, nil } From 8ad82b882be504db58c3fc70dcf77b1b686a6a43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 26 Nov 2024 20:43:20 +0100 Subject: [PATCH 10/15] Return sentinel errors from untrustedLayerDiffID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit untrustedLayerDiffID currently specializes the "not available yet" case; also specialize the "image does not provide this at all" case, which we will need to handle. Should not change behavior. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 70 ++++++++++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 21 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index d99da453e..c3d1931c5 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -960,12 +960,12 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D if diffOutput.UncompressedDigest == "" { d, err := s.untrustedLayerDiffID(index) if err != nil { + if errors.Is(err, errUntrustedLayerDiffIDNotYetAvailable) { + logrus.Debugf("Skipping commit for layer %q, manifest not yet available", newLayerID) + return nil, nil + } return nil, err } - if d == "" { - logrus.Debugf("Skipping commit for layer %q, manifest not yet available", newLayerID) - return nil, nil - } untrustedUncompressedDigest = d // While the contents of the digest are untrusted, make sure at least the _format_ is valid, @@ -1185,8 +1185,27 @@ func (u *uncommittedImageSource) GetBlob(ctx context.Context, info types.BlobInf return io.NopCloser(bytes.NewReader(blob)), int64(len(blob)), nil } +// errUntrustedLayerDiffIDNotYetAvailable is returned by untrustedLayerDiffID +// if the value is not yet available (but it can be available after s.manifests is set). +// This should only happen for external callers of the transport, not for c/image/copy. +// +// Callers of untrustedLayerDiffID before PutManifest must handle this error specially; +// callers after PutManifest can use the default, reporting an internal error. +var errUntrustedLayerDiffIDNotYetAvailable = errors.New("internal error: untrustedLayerDiffID has no value available and fallback was not implemented") + +// untrustedLayerDiffIDUnknownError is returned by untrustedLayerDiffID +// if the image’s format does not provide DiffIDs. +type untrustedLayerDiffIDUnknownError struct { + layerIndex int +} + +func (e untrustedLayerDiffIDUnknownError) Error() string { + return fmt.Sprintf("DiffID value for layer %d is unknown or explicitly empty", e.layerIndex) +} + // untrustedLayerDiffID returns a DiffID value for layerIndex from the image’s config. -// If the value is not yet available (but it can be available after s.manifets is set), it returns ("", nil). +// It may return two special errors, errUntrustedLayerDiffIDNotYetAvailable or untrustedLayerDiffIDUnknownError. +// // WARNING: We don’t validate the DiffID value against the layer contents; it must not be used for any deduplication. func (s *storageImageDestination) untrustedLayerDiffID(layerIndex int) (digest.Digest, error) { // At this point, we are either inside the multi-threaded scope of HasThreadSafePutBlob, @@ -1200,7 +1219,7 @@ func (s *storageImageDestination) untrustedLayerDiffID(layerIndex int) (digest.D // via NoteOriginalOCIConfig; this is a compatibility fallback for external callers // of the public types.ImageDestination. if s.manifest == nil { - return "", nil + return "", errUntrustedLayerDiffIDNotYetAvailable } ctx := context.Background() // This is all happening in memory, no need to worry about cancellation. @@ -1217,24 +1236,33 @@ func (s *storageImageDestination) untrustedLayerDiffID(layerIndex int) (digest.D s.setUntrustedDiffIDValuesFromOCIConfig(configOCI) } + // Let entirely empty / missing diffIDs through; but if the array does exist, expect it to contain an entry for every layer, + // and fail hard on missing entries. This tries to account for completely naive image producers who just don’t fill DiffID, + // while still detecting incorrectly-built / confused images. + // + // In practice, this should, right now, only matter for pulls of OCI images + // (this code path implies that we did a partial pull because a layer has an annotation), + // So, DiffIDs should always be set. + // + // It is, though, reachable by pulling an OCI image while converting to schema1, + // using a manual (skopeo copy) or something similar, not (podman pull). + // + // Our schema1.OCIConfig code produces non-empty DiffID arrays of empty values, so treat arrays of all-empty + // values as “DiffID unknown”. + // For schema 1, it is important to exit here, before the layerIndex >= len(s.untrustedDiffIDValues) + // check, because the format conversion from schema1 to OCI used to compute untrustedDiffIDValues + // changes the number of layres (drops items with Schema1V1Compatibility.ThrowAway). + if !slices.ContainsFunc(s.untrustedDiffIDValues, func(d digest.Digest) bool { + return d != "" + }) { + return "", untrustedLayerDiffIDUnknownError{ + layerIndex: layerIndex, + } + } if layerIndex >= len(s.untrustedDiffIDValues) { return "", fmt.Errorf("image config has only %d DiffID values, but a layer with index %d exists", len(s.untrustedDiffIDValues), layerIndex) } - res := s.untrustedDiffIDValues[layerIndex] - if res == "" { - // In practice, this should, right now, only matter for pulls of OCI images - // (this code path implies that we did a partial pull because a layer has an annotation), - // So, DiffIDs should always be set. - // - // It is, though, reachable by pulling an OCI image while converting to schema1, - // using a manual (skopeo copy) or something similar, not (podman pull). - // - // Our schema1.OCIConfig code produces non-empty DiffID arrays of empty values. - // The current semantics of this function are that ("", nil) means "try again later", - // which is not what we want to happen; for now, turn that into an explicit error. - return "", fmt.Errorf("DiffID value for layer %d is unknown or explicitly empty", layerIndex) - } - return res, nil + return s.untrustedDiffIDValues[layerIndex], nil } // setUntrustedDiffIDValuesFromOCIConfig updates s.untrustedDiffIDvalues from config. From 6d3f8396749de840b1762be2526cd9d1d462a2ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 27 Nov 2024 23:58:05 +0100 Subject: [PATCH 11/15] Pass trustedLayerIdentityData into createNewLayer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two different locations in the function need the data, and the caller must have it available; so always passing it in simplifies the implementation and removes an impossible error path. This might hypothetically make layer reuse a bit worse, if we happened to learn something for trustedLayerIdentityData from processing other layers of the same image, but reusing the same layer twice within an image should be rare. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index c3d1931c5..7ae1332fe 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -909,7 +909,7 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si return false, nil } - layer, err := s.createNewLayer(index, info.digest, parentLayer, id) + layer, err := s.createNewLayer(index, trusted, parentLayer, id) if err != nil { return false, err } @@ -939,9 +939,9 @@ func layerID(parentID string, trusted trustedLayerIdentityData) string { return digest.Canonical.FromString(parentID + "+" + component).Encoded() } -// createNewLayer creates a new layer newLayerID for (index, layerDigest) on top of parentLayer (which may be ""). +// createNewLayer creates a new layer newLayerID for (index, trusted) on top of parentLayer (which may be ""). // If the layer cannot be committed yet, the function returns (nil, nil). -func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.Digest, parentLayer, newLayerID string) (*storage.Layer, error) { +func (s *storageImageDestination) createNewLayer(index int, trusted trustedLayerIdentityData, parentLayer, newLayerID string) (*storage.Layer, error) { s.lock.Lock() diffOutput, ok := s.lockProtected.diffOutputs[index] s.lock.Unlock() @@ -950,11 +950,9 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D // That way it will be persisted in storage even if the cache is deleted; also // we can use the value below to avoid the untrustedUncompressedDigest logic (and notably // the costly commit delay until a manifest is available). - s.lock.Lock() - if d, ok := s.lockProtected.indexToDiffID[index]; ok { - diffOutput.UncompressedDigest = d + if diffOutput.UncompressedDigest == "" && trusted.diffID != "" { + diffOutput.UncompressedDigest = trusted.diffID } - s.lock.Unlock() var untrustedUncompressedDigest digest.Digest if diffOutput.UncompressedDigest == "" { @@ -1012,14 +1010,10 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D // then we need to read the desired contents from a layer. var filename string var gotFilename bool - s.lock.Lock() - trusted, ok := s.trustedLayerIdentityDataLocked(index, layerDigest) - if ok && trusted.blobDigest != "" { + if trusted.blobDigest != "" { + s.lock.Lock() filename, gotFilename = s.lockProtected.filenames[trusted.blobDigest] - } - s.lock.Unlock() - if !ok { // We have already determined newLayerID, so the data must have been available. - return nil, fmt.Errorf("internal inconsistency: layer (%d, %q) not found", index, layerDigest) + s.lock.Unlock() } var trustedOriginalDigest digest.Digest // For storage.LayerOptions var trustedOriginalSize *int64 From f50eda833ee5d52e35784ede24de9b77d798077d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 10 Dec 2024 21:39:22 +0100 Subject: [PATCH 12/15] Document, and ensure, that TOC-identified layers can't happen for schema1 images MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Should not change behavior; we call GetTOCDigest in copy.imageCopier.copyLayer before reaching PutBlobPartial, so the new error path should not be reachable. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index 7ae1332fe..6a1847964 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -33,6 +33,7 @@ import ( graphdriver "github.com/containers/storage/drivers" "github.com/containers/storage/pkg/archive" "github.com/containers/storage/pkg/chunked" + "github.com/containers/storage/pkg/chunked/toc" "github.com/containers/storage/pkg/ioutils" digest "github.com/opencontainers/go-digest" imgspecv1 "github.com/opencontainers/image-spec/specs-go/v1" @@ -111,8 +112,10 @@ type storageImageDestinationLockProtected struct { // // Ideally we wouldn’t have blobDiffIDs, and we would just keep records by index, but the public API does not require the caller // to provide layer indices; and configs don’t have layer indices. blobDiffIDs needs to exist for those cases. - indexToDiffID map[int]digest.Digest // Mapping from layer index to DiffID - indexToTOCDigest map[int]digest.Digest // Mapping from layer index to a TOC Digest + indexToDiffID map[int]digest.Digest // Mapping from layer index to DiffID + // Mapping from layer index to a TOC Digest. + // If this is set, then either c/storage/pkg/chunked/toc.GetTOCDigest must have returned a value, or indexToDiffID must be set as well. + indexToTOCDigest map[int]digest.Digest blobDiffIDs map[digest.Digest]digest.Digest // Mapping from layer blobsums to their corresponding DiffIDs. CAREFUL: See the WARNING above. // Layer data: Before commitLayer is called, either at least one of (diffOutputs, indexToAdditionalLayer, filenames) @@ -401,6 +404,15 @@ func (s *storageImageDestination) PutBlobPartial(ctx context.Context, chunkAcces options.Cache.RecordDigestUncompressedPair(out.CompressedDigest, out.UncompressedDigest) } } else { + // Sanity-check the defined rules for indexToTOCDigest. + toc, err := toc.GetTOCDigest(srcInfo.Annotations) + if err != nil { + return err + } + if toc == nil { + return fmt.Errorf("internal error: PrepareStagedLayer returned a TOC-only identity for layer %q with no TOC digest", srcInfo.Digest.String()) + } + // Use diffID for layer identity if it is known. if uncompressedDigest := options.Cache.UncompressedDigestForTOC(out.TOCDigest); uncompressedDigest != "" { s.lockProtected.indexToDiffID[options.LayerIndex] = uncompressedDigest @@ -605,7 +617,9 @@ func reusedBlobFromLayerLookup(layers []storage.Layer, blobDigest digest.Digest, // trustedLayerIdentityData is a _consistent_ set of information known about a single layer. type trustedLayerIdentityData struct { - layerIdentifiedByTOC bool // true if we decided the layer should be identified by tocDigest, false if by diffID + // true if we decided the layer should be identified by tocDigest, false if by diffID + // This can only be true if c/storage/pkg/chunked/toc.GetTOCDigest returns a value. + layerIdentifiedByTOC bool diffID digest.Digest // A digest of the uncompressed full contents of the layer, or "" if unknown; must be set if !layerIdentifiedByTOC tocDigest digest.Digest // A digest of the TOC digest, or "" if unknown; must be set if layerIdentifiedByTOC From ecd79c154c0132ba0c7d75cfca54cf3838c4507f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 26 Nov 2024 21:17:52 +0100 Subject: [PATCH 13/15] Don't do possibly-ambiguous partial pulls of images with unknown DiffID values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a layer has a TOC, require that it must have a DiffID commitment, or refuse to pull it partially. Layers without a TOC continue to be allowed to use the partial pull code path, and we don't even require config's RootFS.DiffID to be present. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 98 ++++++++++++++++++++++++++++++++++------- 1 file changed, 83 insertions(+), 15 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index 6a1847964..3aab28ec8 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -346,6 +346,56 @@ func (f *zstdFetcher) GetBlobAt(chunks []chunked.ImageSourceChunk) (chan io.Read // If the call fails with ErrFallbackToOrdinaryLayerDownload, the caller can fall back to PutBlobWithOptions. // The fallback _must not_ be done otherwise. func (s *storageImageDestination) PutBlobPartial(ctx context.Context, chunkAccessor private.BlobChunkAccessor, srcInfo types.BlobInfo, options private.PutBlobPartialOptions) (_ private.UploadedBlob, retErr error) { + inputTOCDigest, err := toc.GetTOCDigest(srcInfo.Annotations) + if err != nil { + return private.UploadedBlob{}, err + } + + // The identity of partially-pulled layers is, as long as we keep compatibility with tar-like consumers, + // unfixably ambiguous: there are two possible “views” of the same file (same compressed digest), + // the traditional “view” that decompresses the primary stream and consumes a tar file, + // and the partial-pull “view” that starts with the TOC. + // The two “views” have two separate metadata sets and may refer to different parts of the blob for file contents; + // the direct way to ensure they are consistent would be to read the full primary stream (and authenticate it against + // the compressed digest), and ensure the metadata and layer contents exactly match the partially-pulled contents - + // making the partial pull completely pointless. + // + // Instead, for partial-pull-capable layers (with inputTOCDigest set), we require the image to “commit” + // to uncompressed layer digest values via the config's RootFS.DiffIDs array: + // they are already naturally computed for traditionally-pulled layers, and for partially-pulled layers we + // do the optimal partial pull, and then reconstruct the uncompressed tar stream just to (expensively) compute this digest. + // + // Layers which don’t support partial pulls (inputTOCDigest == "", incl. all schema1 layers) can be let through: + // the partial pull code will either not engage, or consume the full layer; and the rules of indexToTOCDigest / layerIdentifiedByTOC + // ensure the layer is identified by DiffID, i.e. using the traditional “view”. + // + // But if inputTOCDigest is set and the input image doesn't have RootFS.DiffIDs (the config is invalid for schema2/OCI), + // don't allow a partial pull, and fall back to PutBlobWithOptions. + // + // (The user can opt out of the DiffID commitment checking by a c/storage option, giving up security for performance, + // but we will still trigger the fall back here, and we will still enforce a DiffID match, so that the set of accepted images + // is the same in both cases, and so that users are not tempted to set the c/storage option to allow accepting some invalid images.) + var untrustedDiffID digest.Digest // "" if unknown + udid, err := s.untrustedLayerDiffID(options.LayerIndex) + if err != nil { + var diffIDUnknownErr untrustedLayerDiffIDUnknownError + switch { + case errors.Is(err, errUntrustedLayerDiffIDNotYetAvailable): + // PutBlobPartial is a private API, so all callers are within c/image, and should have called + // NoteOriginalOCIConfig first. + return private.UploadedBlob{}, fmt.Errorf("internal error: in PutBlobPartial, untrustedLayerDiffID returned errUntrustedLayerDiffIDNotYetAvailable") + case errors.As(err, &diffIDUnknownErr): + if inputTOCDigest != nil { + return private.UploadedBlob{}, private.NewErrFallbackToOrdinaryLayerDownload(err) + } + untrustedDiffID = "" // A schema1 image or a non-TOC layer with no ambiguity, let it through + default: + return private.UploadedBlob{}, err + } + } else { + untrustedDiffID = udid + } + fetcher := zstdFetcher{ chunkAccessor: chunkAccessor, ctx: ctx, @@ -385,7 +435,16 @@ func (s *storageImageDestination) PutBlobPartial(ctx context.Context, chunkAcces if err := func() error { // A scope for defer defer s.lock.Unlock() + // For true partial pulls, c/storage decides whether to compute the uncompressed digest based on an option in storage.conf + // (defaults to true, to avoid ambiguity.) + // c/storage can also be configured, to consume a layer not prepared for partial pulls (primarily to allow composefs conversion), + // and in that case it always consumes the full blob and always computes the uncompressed digest. if out.UncompressedDigest != "" { + if untrustedDiffID != "" && out.UncompressedDigest != untrustedDiffID { + return fmt.Errorf("uncompressed digest of layer %q is %q, config claims %q", srcInfo.Digest.String(), + out.UncompressedDigest.String(), untrustedDiffID.String()) + } + s.lockProtected.indexToDiffID[options.LayerIndex] = out.UncompressedDigest if out.TOCDigest != "" { s.lockProtected.indexToTOCDigest[options.LayerIndex] = out.TOCDigest @@ -405,11 +464,7 @@ func (s *storageImageDestination) PutBlobPartial(ctx context.Context, chunkAcces } } else { // Sanity-check the defined rules for indexToTOCDigest. - toc, err := toc.GetTOCDigest(srcInfo.Annotations) - if err != nil { - return err - } - if toc == nil { + if inputTOCDigest == nil { return fmt.Errorf("internal error: PrepareStagedLayer returned a TOC-only identity for layer %q with no TOC digest", srcInfo.Digest.String()) } @@ -461,17 +516,36 @@ func (s *storageImageDestination) tryReusingBlobAsPending(blobDigest digest.Dige if err := blobDigest.Validate(); err != nil { return false, private.ReusedBlob{}, fmt.Errorf("Can not check for a blob with invalid digest: %w", err) } - if options.TOCDigest != "" { + useTOCDigest := false // If set, (options.TOCDigest != "" && options.LayerIndex != nil) AND we can use options.TOCDigest safely. + if options.TOCDigest != "" && options.LayerIndex != nil { if err := options.TOCDigest.Validate(); err != nil { return false, private.ReusedBlob{}, fmt.Errorf("Can not check for a blob with invalid digest: %w", err) } + // Only consider using TOCDigest if we can avoid ambiguous image “views”, see the detailed comment in PutBlobPartial. + _, err := s.untrustedLayerDiffID(*options.LayerIndex) + if err != nil { + var diffIDUnknownErr untrustedLayerDiffIDUnknownError + switch { + case errors.Is(err, errUntrustedLayerDiffIDNotYetAvailable): + // options.TOCDigest is a private API, so all callers are within c/image, and should have called + // NoteOriginalOCIConfig first. + return false, private.ReusedBlob{}, fmt.Errorf("internal error: in TryReusingBlobWithOptions, untrustedLayerDiffID returned errUntrustedLayerDiffIDNotYetAvailable") + case errors.As(err, &diffIDUnknownErr): + logrus.Debugf("Not using TOC %q to look for layer reuse: %v", options.TOCDigest, err) + // But don’t abort entirely, keep useTOCDigest = false, try a blobDigest match. + default: + return false, private.ReusedBlob{}, err + } + } else { + useTOCDigest = true + } } // lock the entire method as it executes fairly quickly s.lock.Lock() defer s.lock.Unlock() - if options.SrcRef != nil && options.TOCDigest != "" && options.LayerIndex != nil { + if options.SrcRef != nil && useTOCDigest { // Check if we have the layer in the underlying additional layer store. aLayer, err := s.imageRef.transport.store.LookupAdditionalLayer(options.TOCDigest, options.SrcRef.String()) if err != nil && !errors.Is(err, storage.ErrLayerUnknown) { @@ -555,7 +629,7 @@ func (s *storageImageDestination) tryReusingBlobAsPending(blobDigest digest.Dige } } - if options.TOCDigest != "" && options.LayerIndex != nil { + if useTOCDigest { // Check if we know which which UncompressedDigest the TOC digest resolves to, and we have a match for that. // Prefer this over LayersByTOCDigest because we can identify the layer using UncompressedDigest, maximizing reuse. uncompressedDigest := options.Cache.UncompressedDigestForTOC(options.TOCDigest) @@ -1248,13 +1322,7 @@ func (s *storageImageDestination) untrustedLayerDiffID(layerIndex int) (digest.D // and fail hard on missing entries. This tries to account for completely naive image producers who just don’t fill DiffID, // while still detecting incorrectly-built / confused images. // - // In practice, this should, right now, only matter for pulls of OCI images - // (this code path implies that we did a partial pull because a layer has an annotation), - // So, DiffIDs should always be set. - // - // It is, though, reachable by pulling an OCI image while converting to schema1, - // using a manual (skopeo copy) or something similar, not (podman pull). - // + // schema1 images don’t have DiffID values in the config. // Our schema1.OCIConfig code produces non-empty DiffID arrays of empty values, so treat arrays of all-empty // values as “DiffID unknown”. // For schema 1, it is important to exit here, before the layerIndex >= len(s.untrustedDiffIDValues) From 9f093ee613ff1d97bdf1aee8049e383eae4188fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 27 Nov 2024 23:02:58 +0100 Subject: [PATCH 14/15] Improve comments throughout commitLayer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove some completely redundant comments to shorten the code, clarify where appropriate. Should not change behavior. Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index 3aab28ec8..392643769 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -928,16 +928,15 @@ func (s *storageImageDestination) queueOrCommit(index int, info addedLayerInfo) // must guarantee that, at any given time, at most one goroutine may execute // `commitLayer()`. func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, size int64) (bool, error) { - // Already committed? Return early. if _, alreadyCommitted := s.indexToStorageID[index]; alreadyCommitted { return false, nil } - // Start with an empty string or the previous layer ID. Note that - // `s.indexToStorageID` can only be accessed by *one* goroutine at any - // given time. Hence, we don't need to lock accesses. - var parentLayer string + var parentLayer string // "" if no parent if index != 0 { + // s.indexToStorageID can only be written by this function, and our caller + // is responsible for ensuring it can be only be called by *one* goroutine at any + // given time. Hence, we don't need to lock accesses. prev, ok := s.indexToStorageID[index-1] if !ok { return false, fmt.Errorf("Internal error: commitLayer called with previous layer %d not committed yet", index-1) @@ -945,19 +944,17 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si parentLayer = prev } - // Carry over the previous ID for empty non-base layers. if info.emptyLayer { s.indexToStorageID[index] = parentLayer return false, nil } - // Check if there's already a layer with the ID that we'd give to the result of applying - // this layer blob to its parent, if it has one, or the blob's hex value otherwise. + // Collect trusted parameters of the layer. s.lock.Lock() trusted, ok := s.trustedLayerIdentityDataLocked(index, info.digest) s.lock.Unlock() if !ok { - // Check if it's elsewhere and the caller just forgot to pass it to us in a PutBlob() / TryReusingBlob() / … + // Check if the layer exists already and the caller just (incorrectly) forgot to pass it to us in a PutBlob() / TryReusingBlob() / … // // Use none.NoCache to avoid a repeated DiffID lookup in the BlobInfoCache: a caller // that relies on using a blob digest that has never been seen by the store had better call From 2e79eabb4383e7734d62cd25e85ea07f13ecad94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 26 Nov 2024 21:17:52 +0100 Subject: [PATCH 15/15] Enforce that DiffID of a layer matches the config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - If a layer has a TOC digest (i.e. could possibly be pulled partially), and c/storage has computed the uncompressed digest, require that the config's RootFS.DiffIDs exists and matches. This fixes the "view ambiguity" of partially-pulled layers. - For _all_ layers, if RootFS.DiffIDs exists and we know the layer's uncompressed digest, also require the RootFS.DiffIDs value to match. This might be a compatibility break, but Docker requires these values anyway. - We happen to allow setting DiffIDs to empty values, if the layer does not have a TOC digest (so there is no risk of "view ambiguity"). Signed-off-by: Miloslav Trmač --- storage/storage_dest.go | 61 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 6 deletions(-) diff --git a/storage/storage_dest.go b/storage/storage_dest.go index 392643769..0af4523ff 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -440,6 +440,8 @@ func (s *storageImageDestination) PutBlobPartial(ctx context.Context, chunkAcces // c/storage can also be configured, to consume a layer not prepared for partial pulls (primarily to allow composefs conversion), // and in that case it always consumes the full blob and always computes the uncompressed digest. if out.UncompressedDigest != "" { + // This is centrally enforced later, in commitLayer, but because we have the value available, + // we might just as well check immediately. if untrustedDiffID != "" && out.UncompressedDigest != untrustedDiffID { return fmt.Errorf("uncompressed digest of layer %q is %q, config claims %q", srcInfo.Digest.String(), out.UncompressedDigest.String(), untrustedDiffID.String()) @@ -551,6 +553,8 @@ func (s *storageImageDestination) tryReusingBlobAsPending(blobDigest digest.Dige if err != nil && !errors.Is(err, storage.ErrLayerUnknown) { return false, private.ReusedBlob{}, fmt.Errorf(`looking for compressed layers with digest %q and labels: %w`, blobDigest, err) } else if err == nil { + // Compare the long comment in PutBlobPartial. We assume that the Additional Layer Store will, somehow, + // avoid layer “view” ambiguity. alsTOCDigest := aLayer.TOCDigest() if alsTOCDigest != options.TOCDigest { // FIXME: If alsTOCDigest is "", the Additional Layer Store FUSE server is probably just too old, and we could @@ -986,6 +990,37 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si } } + // Ensure that we always see the same “view” of a layer, as identified by the layer’s uncompressed digest, + // unless the user has explicitly opted out of this in storage.conf: see the more detailed explanation in PutBlobPartial. + if trusted.diffID != "" { + untrustedDiffID, err := s.untrustedLayerDiffID(index) + if err != nil { + var diffIDUnknownErr untrustedLayerDiffIDUnknownError + switch { + case errors.Is(err, errUntrustedLayerDiffIDNotYetAvailable): + logrus.Debugf("Skipping commit for layer %q, manifest not yet available for DiffID check", index) + return true, nil + case errors.As(err, &diffIDUnknownErr): + // If untrustedLayerDiffIDUnknownError, the input image is schema1, has no TOC annotations, + // so we could not have reused a TOC-identified layer nor have done a TOC-identified partial pull, + // i.e. there is no other “view” to worry about. Sanity-check that we really see the only expected view. + // + // Or, maybe, the input image is OCI, and has invalid/missing DiffID values in config. In that case + // we _must_ fail if we used a TOC-identified layer - but PutBlobPartial should have already + // refused to do a partial pull, so we are in an inconsistent state. + if trusted.layerIdentifiedByTOC { + return false, fmt.Errorf("internal error: layer %d for blob %s was identified by TOC, but we don't have a DiffID in config", + index, trusted.logString()) + } + // else a schema1 image or a non-TOC layer with no ambiguity, let it through + default: + return false, err + } + } else if trusted.diffID != untrustedDiffID { + return false, fmt.Errorf("layer %d (blob %s) does not match config's DiffID %q", index, trusted.logString(), untrustedDiffID) + } + } + id := layerID(parentLayer, trusted) if layer, err2 := s.imageRef.transport.store.Layer(id); layer != nil && err2 == nil { @@ -1031,10 +1066,11 @@ func (s *storageImageDestination) createNewLayer(index int, trusted trustedLayer diffOutput, ok := s.lockProtected.diffOutputs[index] s.lock.Unlock() if ok { - // If we know a trusted DiffID value (e.g. from a BlobInfoCache), set it in diffOutput. + // Typically, we compute a trusted DiffID value to authenticate the layer contents, see the detailed explanation + // in PutBlobPartial. If the user has opted out of that, but we know a trusted DiffID value + // (e.g. from a BlobInfoCache), set it in diffOutput. // That way it will be persisted in storage even if the cache is deleted; also - // we can use the value below to avoid the untrustedUncompressedDigest logic (and notably - // the costly commit delay until a manifest is available). + // we can use the value below to avoid the untrustedUncompressedDigest logic. if diffOutput.UncompressedDigest == "" && trusted.diffID != "" { diffOutput.UncompressedDigest = trusted.diffID } @@ -1043,11 +1079,23 @@ func (s *storageImageDestination) createNewLayer(index int, trusted trustedLayer if diffOutput.UncompressedDigest == "" { d, err := s.untrustedLayerDiffID(index) if err != nil { - if errors.Is(err, errUntrustedLayerDiffIDNotYetAvailable) { + var diffIDUnknownErr untrustedLayerDiffIDUnknownError + switch { + case errors.Is(err, errUntrustedLayerDiffIDNotYetAvailable): logrus.Debugf("Skipping commit for layer %q, manifest not yet available", newLayerID) return nil, nil + case errors.As(err, &diffIDUnknownErr): + // If untrustedLayerDiffIDUnknownError, the input image is schema1, has no TOC annotations, + // so we should have !trusted.layerIdentifiedByTOC, i.e. we should have set + // diffOutput.UncompressedDigest above in this function, at the very latest. + // + // Or, maybe, the input image is OCI, and has invalid/missing DiffID values in config. In that case + // commitLayer should have already refused this image when dealing with the “view” ambiguity. + return nil, fmt.Errorf("internal error: layer %d for blob %s was partially-pulled with unknown UncompressedDigest, but we don't have a DiffID in config", + index, trusted.logString()) + default: + return nil, err } - return nil, err } untrustedUncompressedDigest = d @@ -1285,7 +1333,8 @@ func (e untrustedLayerDiffIDUnknownError) Error() string { // untrustedLayerDiffID returns a DiffID value for layerIndex from the image’s config. // It may return two special errors, errUntrustedLayerDiffIDNotYetAvailable or untrustedLayerDiffIDUnknownError. // -// WARNING: We don’t validate the DiffID value against the layer contents; it must not be used for any deduplication. +// WARNING: This function does not even validate that the returned digest has a valid format. +// WARNING: We don’t _always_ validate this DiffID value against the layer contents; it must not be used for any deduplication. func (s *storageImageDestination) untrustedLayerDiffID(layerIndex int) (digest.Digest, error) { // At this point, we are either inside the multi-threaded scope of HasThreadSafePutBlob, // nothing is writing to s.manifest yet, and s.untrustedDiffIDValues might have been set