Skip to content
This repository has been archived by the owner on Jan 15, 2024. It is now read-only.

Commit

Permalink
[feature] Fix png crc signature of each chunk while writing (#6)
Browse files Browse the repository at this point in the history
* a fish, a fish, a fishy-oh!

* fix png crc while writing

* prove that the fish is clean

* update to latest go-png-image-structure version

* set scanner error via pipewriter, removing need for logging

* update go version, instead of allocating new zeroed data just clear() existing

---------

Co-authored-by: kim <[email protected]>
  • Loading branch information
tsmethurst and NyaaaWhatsUpDoc authored Nov 29, 2023
1 parent 3914082 commit c6030ed
Show file tree
Hide file tree
Showing 8 changed files with 133 additions and 125 deletions.
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
module github.com/superseriousbusiness/exif-terminator

go 1.17
go 1.21

require (
github.com/dsoprea/go-exif/v3 v3.0.0-20210625224831-a6301f85c82b
github.com/dsoprea/go-png-image-structure/v2 v2.0.0-20210512210324-29b889a6093d
github.com/stretchr/testify v1.7.0
github.com/superseriousbusiness/go-jpeg-image-structure/v2 v2.0.0-20220321154430-d89a106fdabe
github.com/superseriousbusiness/go-png-image-structure/v2 v2.0.1-SSB
golang.org/x/image v0.1.0
)

Expand Down
9 changes: 2 additions & 7 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@ github.com/dsoprea/go-logging v0.0.0-20200710184922-b02d349568dd h1:l+vLbuxptsC6
github.com/dsoprea/go-logging v0.0.0-20200710184922-b02d349568dd/go.mod h1:7I+3Pe2o/YSU88W0hWlm9S22W7XI1JFNJ86U0zPKMf8=
github.com/dsoprea/go-photoshop-info-format v0.0.0-20200609050348-3db9b63b202c h1:7j5aWACOzROpr+dvMtu8GnI97g9ShLWD72XIELMgn+c=
github.com/dsoprea/go-photoshop-info-format v0.0.0-20200609050348-3db9b63b202c/go.mod h1:pqKB+ijp27cEcrHxhXVgUUMlSDRuGJJp1E+20Lj5H0E=
github.com/dsoprea/go-png-image-structure/v2 v2.0.0-20210512210324-29b889a6093d h1:2zNIgrJTspLxUKoJGl0Ln24+hufPKSjP3cu4++5MeSE=
github.com/dsoprea/go-png-image-structure/v2 v2.0.0-20210512210324-29b889a6093d/go.mod h1:scnx0wQSM7UiCMK66dSdiPZvL2hl6iF5DvpZ7uT59MY=
github.com/dsoprea/go-utility v0.0.0-20200711062821-fab8125e9bdf h1:/w4QxepU4AHh3AuO6/g8y/YIIHH5+aKP3Bj8sg5cqhU=
github.com/dsoprea/go-utility v0.0.0-20200711062821-fab8125e9bdf/go.mod h1:95+K3z2L0mqsVYd6yveIv1lmtT3tcQQ3dVakPySffW8=
github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e h1:IxIbA7VbCNrwumIYjDoMOdf4KOSkMC6NJE4s8oRbE7E=
github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e/go.mod h1:uAzdkPTub5Y9yQwXe8W4m2XuP0tK4a9Q/dantD0+uaU=
Expand All @@ -36,28 +33,26 @@ github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5Cc
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/superseriousbusiness/go-jpeg-image-structure/v2 v2.0.0-20220321154430-d89a106fdabe h1:ksl2oCx/Qo8sNDc3Grb8WGKBM9nkvhCm25uvlT86azE=
github.com/superseriousbusiness/go-jpeg-image-structure/v2 v2.0.0-20220321154430-d89a106fdabe/go.mod h1:gH4P6gN1V+wmIw5o97KGaa1RgXB/tVpC2UNzijhg3E4=
github.com/superseriousbusiness/go-png-image-structure/v2 v2.0.1-SSB h1:8psprYSK1KdOSH7yQ4PbJq0YYaGQY+gzdW/B0ExDb/8=
github.com/superseriousbusiness/go-png-image-structure/v2 v2.0.1-SSB/go.mod h1:ymKGfy9kg4dIdraeZRAdobMS/flzLk3VcRPLpEWOAXg=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/image v0.1.0 h1:r8Oj8ZA2Xy12/b5KZYj3tuv7NG/fBz3TwQVvpJ9l8Rk=
golang.org/x/image v0.1.0/go.mod h1:iyPr49SD/G/TBxYVB/9RRtGUT5eNbo2u4NamWeQcD5c=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200320220750-118fecf932d8/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b h1:PxfKdU9lEEDYjdIzOtC4qFWgkU2rGHdKlKowJSMN9h0=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
Expand Down
Binary file added images/fish-clean.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/fish.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
47 changes: 0 additions & 47 deletions logger.go

This file was deleted.

68 changes: 34 additions & 34 deletions png.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,9 @@
package terminator

import (
"encoding/binary"
"io"

pngstructure "github.com/dsoprea/go-png-image-structure/v2"
pngstructure "github.com/superseriousbusiness/go-png-image-structure/v2"
)

type pngVisitor struct {
Expand All @@ -45,49 +44,50 @@ func (v *pngVisitor) split(data []byte, atEOF bool) (int, []byte, error) {
}
}

// check if the splitter has any new chunks in it that we haven't written yet
chunkSlice := v.ps.Chunks()
// Check if the splitter now has
// any new chunks in it for us.
chunkSlice, err := v.ps.Chunks()
if err != nil {
return advance, token, err
}

// Write each chunk by passing it
// through our custom write func,
// which strips out exif and fixes
// the CRC of each chunk.
chunks := chunkSlice.Chunks()
for i, chunk := range chunks {
// look through all the chunks in the splitter
if i > v.lastWrittenChunk {
// we've got a chunk we haven't written yet! write it...
if err := v.writeChunk(chunk); err != nil {
return advance, token, err
}
// then remove the data
chunk.Data = chunk.Data[:0]
// and update
v.lastWrittenChunk = i
if i <= v.lastWrittenChunk {
// Skip already
// written chunks.
continue
}

// Write this new chunk.
if err := v.writeChunk(chunk); err != nil {
return advance, token, err
}
v.lastWrittenChunk = i

// Zero data; here you
// go garbage collector.
chunk.Data = nil
}

return advance, token, err
}

func (v *pngVisitor) writeChunk(chunk *pngstructure.Chunk) error {
if err := binary.Write(v.writer, binary.BigEndian, chunk.Length); err != nil {
return err
}

if _, err := v.writer.Write([]byte(chunk.Type)); err != nil {
return err
}

if chunk.Type == pngstructure.EXifChunkType {
blank := make([]byte, len(chunk.Data))
if _, err := v.writer.Write(blank); err != nil {
return err
}
} else {
if _, err := v.writer.Write(chunk.Data); err != nil {
return err
}
// Replace exif data
// with zero bytes.
clear(chunk.Data)
}

if err := binary.Write(v.writer, binary.BigEndian, chunk.Crc); err != nil {
return err
}
// Fix CRC of each chunk.
chunk.UpdateCrc32()

return nil
// finally, write chunk to writer.
_, err := chunk.WriteTo(v.writer)
return err
}
94 changes: 59 additions & 35 deletions terminator.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,29 +25,34 @@ import (
"fmt"
"io"

pngstructure "github.com/dsoprea/go-png-image-structure/v2"
jpegstructure "github.com/superseriousbusiness/go-jpeg-image-structure/v2"
pngstructure "github.com/superseriousbusiness/go-png-image-structure/v2"
)

func Terminate(in io.Reader, fileSize int, mediaType string) (io.Reader, error) {
// to avoid keeping too much stuff in memory we want to pipe data directly
// To avoid keeping too much stuff
// in memory we want to pipe data
// directly to the reader.
pipeReader, pipeWriter := io.Pipe()

// we don't know ahead of time how long segments might be: they could be as large as
// the file itself, so unfortunately we need to allocate a buffer here that'scanner as large
// as the file
// We don't know ahead of time how long
// segments might be: they could be as
// large as the file itself, so we need
// a buffer with generous overhead.
scanner := bufio.NewScanner(in)
scanner.Buffer([]byte{}, fileSize)
var err error

var err error
switch mediaType {
case "image/jpeg", "jpeg", "jpg":
err = terminateJpeg(scanner, pipeWriter, fileSize)

case "image/webp", "webp":
err = terminateWebp(scanner, pipeWriter)

case "image/png", "png":
// for pngs we need to skip the header bytes, so read them in
// and check we're really dealing with a png here
// For pngs we need to skip the header bytes, so read
// them in and check we're really dealing with a png.
header := make([]byte, len(pngstructure.PngSignature))
if _, headerError := in.Read(header); headerError != nil {
err = headerError
Expand All @@ -67,68 +72,87 @@ func Terminate(in io.Reader, fileSize int, mediaType string) (io.Reader, error)
return pipeReader, err
}

func terminateJpeg(scanner *bufio.Scanner, writer io.WriteCloser, expectedFileSize int) error {
// jpeg visitor is where the spicy hack of streaming the de-exifed data is contained
func terminateJpeg(scanner *bufio.Scanner, writer *io.PipeWriter, expectedFileSize int) error {
v := &jpegVisitor{
writer: writer,
expectedFileSize: expectedFileSize,
}

// provide the visitor to the splitter so that it triggers on every section scan
// Provide the visitor to the splitter so
// that it triggers on every section scan.
js := jpegstructure.NewJpegSplitter(v)

// the visitor also needs to read back the list of segments: for this it needs
// to know what jpeg splitter it's attached to, so give it a pointer to the splitter
// The visitor also needs to read back the
// list of segments: for this it needs to
// know what jpeg splitter it's attached to,
// so give it a pointer to the splitter.
v.js = js

// use the jpeg splitters 'split' function, which satisfies the bufio.SplitFunc interface
// Jpeg visitor's 'split' function
// satisfies bufio.SplitFunc{}.
scanner.Split(js.Split)

scanAndClose(scanner, writer)
go scanAndClose(scanner, writer)
return nil
}

func terminateWebp(scanner *bufio.Scanner, writer io.WriteCloser) error {
func terminateWebp(scanner *bufio.Scanner, writer *io.PipeWriter) error {
v := &webpVisitor{
writer: writer,
}

// use the webp visitor's 'split' function, which satisfies the bufio.SplitFunc interface
// Webp visitor's 'split' function
// satisfies bufio.SplitFunc{}.
scanner.Split(v.split)

scanAndClose(scanner, writer)
go scanAndClose(scanner, writer)
return nil
}

func terminatePng(scanner *bufio.Scanner, writer io.WriteCloser) error {
func terminatePng(scanner *bufio.Scanner, writer *io.PipeWriter) error {
ps := pngstructure.NewPngSplitter()

// Don't bother checking CRC;
// we're overwriting it anyway.
ps.DoCheckCrc(false)

v := &pngVisitor{
ps: ps,
writer: writer,
lastWrittenChunk: -1,
}

// use the png visitor's 'split' function, which satisfies the bufio.SplitFunc interface
// Png visitor's 'split' function
// satisfies bufio.SplitFunc{}.
scanner.Split(v.split)

scanAndClose(scanner, writer)
go scanAndClose(scanner, writer)
return nil
}

func scanAndClose(scanner *bufio.Scanner, writer io.WriteCloser) {
// scan asynchronously until there's nothing left to scan, and then close the writer
// so that the reader on the other side knows that we're done
//
// due to the nature of io.Pipe, writing won't actually work
// until the pipeReader starts being read by the caller, which
// is why we do this asynchronously
go func() {
defer writer.Close()
for scanner.Scan() {
}
if scanner.Err() != nil {
logger.Error(scanner.Err())
}
// scanAndClose scans through the given scanner until there's
// nothing left to scan, and then closes the writer so that the
// reader on the other side of the pipe knows that we're done.
//
// Any error encountered when scanning will be logged by terminator.
//
// Due to the nature of io.Pipe, writing won't actually work
// until the pipeReader starts being read by the caller, which
// is why this function should always be called asynchronously.
func scanAndClose(scanner *bufio.Scanner, writer *io.PipeWriter) {
var err error

defer func() {
// Always close writer, using returned
// scanner error (if any). If err is nil
// then the standard io.EOF will be used.
// (this will not overwrite existing).
writer.CloseWithError(err)
}()

for scanner.Scan() {
}

// Set error on return.
err = scanner.Err()
}
36 changes: 36 additions & 0 deletions terminator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,42 @@ func (suite *TerminatorTestSuite) TestTerminateRecipe() {
suite.EqualValues(recipeClean, b)
}

func (suite *TerminatorTestSuite) TestTerminateFish() {
fishDirty, err := os.ReadFile("./images/fish.png")
if err != nil {
panic(err)
}
originalSize := len(fishDirty)

// should not be decodable as a png
_, err = png.Decode(bytes.NewBuffer(fishDirty))
suite.EqualError(err, "png: invalid format: invalid checksum")

out, err := terminator.Terminate(bytes.NewBuffer(fishDirty), originalSize, "png")
suite.NoError(err)

// we should be able to get some bytes back from the returned reader
b, err := io.ReadAll(out)
suite.NoError(err)
suite.NotEmpty(b)

// the processed image should have the same size as the initial image
suite.EqualValues(originalSize, len(b))

// should be decodable as a png
_, err = png.Decode(bytes.NewBuffer(b))
suite.NoError(err)

// bytes should be the same as the clean image
fishClean, err := os.ReadFile("./images/fish-clean.png")
suite.NoError(err)
suite.EqualValues(fishClean, b)

// bytes should not be the same as the
// original, since we fixed some things.
suite.NotEqual(fishClean, fishDirty)
}

func TestTerminatorTestSuite(t *testing.T) {
suite.Run(t, &TerminatorTestSuite{})
}

0 comments on commit c6030ed

Please sign in to comment.