diff --git a/storage/storage_dest.go b/storage/storage_dest.go index 69f5936ea1..32c1645ed3 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -812,13 +812,26 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D if !ok { return nil, fmt.Errorf("failed to find diffID for layer: %q", layerDigest) } + optionalDiffID := diffID // FIXME // Check if we previously cached a file with that blob's contents. If we didn't, // then we need to read the desired contents from a layer. + var trustedUncompressedDigest, trustedOriginalDigest digest.Digest // For storage.LayerOptions s.lock.Lock() - filename, ok := s.lockProtected.filenames[layerDigest] + filename, gotFilename := s.lockProtected.filenames[layerDigest] + _, layerIdentifiedByTOC := s.lockProtected.indexToTOCDigest[index] s.lock.Unlock() - if !ok { + if gotFilename && !layerIdentifiedByTOC { + // If layerIdentifiedByTOC, if we now happen to find a layerDigest match, the newLayerID has already been computed as TOC-based, + // and we don't know the relationship of the layerDigest and TOC digest. + // We could recompute newLayerID to be DiffID-based and use the file, but such a within-image layer + // reuse is expected to be pretty rare; instead, ignore the unexpected file match and proceed to the + // originally-planned TOC match. + + // Because !layerIdentifiedByTOC, optionaldiffID must have been set; and even if it weren’t, PutLayer will recompute the digest from the stream. + trustedUncompressedDigest = optionalDiffID + trustedOriginalDigest = layerDigest // The code setting .filenames[layerDigest] is responsible for the contents matching. + } else { // Try to find the layer with contents matching that blobsum. var layer *storage.Layer // = nil layers, err2 := s.imageRef.transport.store.LayersByUncompressedDigest(diffID) @@ -861,17 +874,49 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D if err != nil { return nil, fmt.Errorf("storing blob to file %q: %w", filename, err) } + + // FIXME CLEAN THIS UP + if !layerIdentifiedByTOC { + // If we found the layer using LayersByUncompressedDigest(diffID): + // - trustedUncompressedDigest = diffID by definition, and diffID is set + // - trustedOriginalDigest = layerDigest is trusted by construction of blobDiffIDs (probably from BlobInfoCache, which is trusted) + // If we found the layer using LayersByCompressedDigest + // - trustedOriginalDigest = layerDigest is trusted from the previous layer + // - trustedUncompressedDigest is set because tocDigest == "" + // or we have found it using Cache.UncompressedDigest + LayersByUncompressedDigest; the cache is trusted to provide + // a matching data pair + // + trustedUncompressedDigest = optionalDiffID // !layerIdentifiedByTOC, so this is always set + // FIXME: .CompressedSize is set from the stream, so it is incorrect to have an uncompressed stream with a compressed digest value. + // We could, in the future, here, set (CompressedDigest, CompressedSize) from the existing layer. + trustedOriginalDigest = optionalDiffID // The stream we have is uncompressed, this matches contents of the stream. + } else { + // Don’t use layerDigest; if we pulled by TOC, the relationship between TOC digest and layerDigest is unverified. + if optionalDiffID == "" && layer.UncompressedDigest != "" { + optionalDiffID = layer.UncompressedDigest + } + // These values might still be ""; in that case PutLayer will compute them from the stream. + // FIXME? We could compute them ourselves above; that would mean only one digest computation instead of two. + // FIXME? Alternatively, c/storage could be smarter about digesting uncompressed streams. + trustedUncompressedDigest = optionalDiffID + // We don’t have a trusted compressed digest value here. We can legitimately set storage.LayerOptions.OriginalDigest to "", + // but that would just result in PutLayer computing the digest of the input stream == optionalDiffID. + // So, instead, set .OriginalDigest to the value we know already, to avoid that digest computation. + trustedOriginalDigest = optionalDiffID // The stream we have is uncompressed, this matches contents of the stream. + } // Allow using the already-collected layer contents without extracting the layer again. // // This only matches against the uncompressed digest. // We don’t have the original compressed data here to trivially set filenames[layerDigest]. // In particular we can’t achieve the correct Layer.CompressedSize value with the current c/storage API. // Within-image layer reuse is probably very rare, for now we prefer to avoid that complexity. - s.lock.Lock() - s.lockProtected.blobDiffIDs[diffID] = diffID - s.lockProtected.filenames[diffID] = filename - s.lockProtected.fileSizes[diffID] = fileSize - s.lock.Unlock() + if trustedUncompressedDigest != "" { + s.lock.Lock() + s.lockProtected.blobDiffIDs[trustedUncompressedDigest] = trustedUncompressedDigest + s.lockProtected.filenames[trustedUncompressedDigest] = filename + s.lockProtected.fileSizes[trustedUncompressedDigest] = fileSize + s.lock.Unlock() + } } // Read the cached blob and use it as a diff. file, err := os.Open(filename) @@ -882,8 +927,8 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D // Build the new layer using the diff, regardless of where it came from. // TODO: This can take quite some time, and should ideally be cancellable using ctx.Done(). layer, _, err := s.imageRef.transport.store.PutLayer(newLayerID, parentLayer, nil, "", false, &storage.LayerOptions{ - OriginalDigest: layerDigest, - UncompressedDigest: diffID, + OriginalDigest: trustedOriginalDigest, + UncompressedDigest: trustedUncompressedDigest, }, file) if err != nil && !errors.Is(err, storage.ErrDuplicateID) { return nil, fmt.Errorf("adding layer with blob %q: %w", layerDigest, err)