diff --git a/CMakeLists.txt b/CMakeLists.txt index 0b346cf..c88ed8c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,7 +13,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}/cmake) # Project version number set(PRJ_VERSION_MAJOR 9) -set(PRJ_VERSION_MINOR 46) +set(PRJ_VERSION_MINOR 47) if(WIN32 AND NOT DEFINED ENV{MSYSTEM}) message(FATAL_ERROR "Currently unsuppored environment. Use MINGW for Windows builds. CMake willl exit now.") diff --git a/docker/Dockerfile.build b/docker/Dockerfile.build index 420372d..6e0f3aa 100644 --- a/docker/Dockerfile.build +++ b/docker/Dockerfile.build @@ -66,10 +66,9 @@ COPY get-maria.sh . RUN ./get-maria.sh # ---------- download golang - -RUN curl -LO https://go.dev/dl/go1.20.2.linux-amd64.tar.gz \ - && tar -xvf go1.20.2.linux-amd64.tar.gz \ - && rm go1.20.2.linux-amd64.tar.gz \ +RUN curl -LO https://go.dev/dl/go1.20.4.linux-amd64.tar.gz \ + && tar -xvf go1.20.4.linux-amd64.tar.gz \ + && rm go1.20.4.linux-amd64.tar.gz \ && sed -i -e '$a export PATH=$PATH:/root/go/bin' .bashrc \ && sed -i -e '$a export GOROOT=/root/go' .bashrc diff --git a/lib2inpx/lib2inpx.cpp b/lib2inpx/lib2inpx.cpp index e09b764..c9c04e4 100644 --- a/lib2inpx/lib2inpx.cpp +++ b/lib2inpx/lib2inpx.cpp @@ -627,6 +627,10 @@ bool read_fb2(const unzip& uz, const string& book_id, fb2_parser& fb, unz_file_i uz.current(fi); + if (fi.uncompressed_size == 0) { + throw runtime_error("Empty fb2 file"); + } + int len = 0; bool continue_processing = true; diff --git a/src/inpxcreator/cmd/libmerge/main.go b/src/inpxcreator/cmd/libmerge/main.go index 5a3c87b..307b4fc 100644 --- a/src/inpxcreator/cmd/libmerge/main.go +++ b/src/inpxcreator/cmd/libmerge/main.go @@ -259,7 +259,7 @@ func main() { tmpOut = f.Name() w = zip.NewWriter(f) - if (sizeBytes - last.info.Size()) > 0 { + if last.info != nil && (sizeBytes-last.info.Size()) > 0 { fmt.Printf("Merging last archive, possibly fist time processing: %s\n", filepath.Join(last.dir, last.info.Name())) skipFirst = true tmp := make([]archive, len(updates)+1, len(updates)+1) @@ -301,60 +301,66 @@ func main() { } fmt.Printf("\tProcessing update: %s\n", filepath.Join(u.dir, u.info.Name())) for _, file := range rc.File { - if id := name2id(file.Name); id > 0 { - if firstBook == 0 { - firstBook = id - } - lastBook = id - - // I know this is wrong, leftBytes could already be negative, but to repeat what libsplit did - // always copy first file... - - if err := w.Copy(file); err != nil { - log.Printf("Error copying from %s (%s): %v", name, file.Name, err) - } else { - - leftBytes -= int64(file.CompressedSize64) - - if leftBytes <= 0 { - if err := w.Close(); err != nil { - log.Fatalf("Finishing zip file: %v", err) - } - if err := f.Close(); err != nil { - log.Fatalf("Finishing zip file: %v", err) - } - newName := fmt.Sprintf("fb2-%06d-%06d.zip", firstBook, lastBook) - fmt.Printf("\t--> Finalizing archive: %s\n", newName) - - newName = filepath.Join(last.dir, newName) - if err := os.Rename(tmpOut, newName); err != nil { - log.Fatalf("Renaming archive: %v", err) - } - - last.info, err = os.Stat(newName) - if err != nil { - log.Fatalf("Stat failed: %v", err) - } - last.begin = firstBook - last.end = lastBook - fmt.Printf("\t--> New last archive: %s\n", newName) - - // We may want to rebuild inpx - have new "last" archive ready - code = 2 - - f, err = ioutil.TempFile(last.dir, "merge-") - if err != nil { - log.Fatalf("Unable to create temp file: %v", err) - } - tmpOut = f.Name() - w = zip.NewWriter(f) - leftBytes = sizeBytes - firstBook = 0 + if file.FileInfo().Size() == 0 { + log.Printf("\t\tWrong book size - %d, skipping: \"%s\"\n", file.FileInfo().Size(), file.FileInfo().Name()) + continue + } + id := int(0) + if id = name2id(file.FileInfo().Name()); id <= 0 { + log.Printf("\t\tWrong book name, skipping: \"%s\"\n", file.FileInfo().Name()) + continue + } + + if firstBook == 0 { + firstBook = id + } + lastBook = id + + // I know this is wrong, leftBytes could already be negative, but to repeat what libsplit did + // always copy first file... + + if err := w.Copy(file); err != nil { + log.Printf("Error copying from %s (%s): %v", name, file.FileInfo().Name(), err) + } else { + + leftBytes -= int64(file.CompressedSize64) + + if leftBytes <= 0 { + if err := w.Close(); err != nil { + log.Fatalf("Finishing zip file: %v", err) + } + if err := f.Close(); err != nil { + log.Fatalf("Finishing zip file: %v", err) + } + newName := fmt.Sprintf("fb2-%06d-%06d.zip", firstBook, lastBook) + fmt.Printf("\t--> Finalizing archive: %s\n", newName) + + newName = filepath.Join(last.dir, newName) + if err := os.Rename(tmpOut, newName); err != nil { + log.Fatalf("Renaming archive: %v", err) + } + + last.info, err = os.Stat(newName) + if err != nil { + log.Fatalf("Stat failed: %v", err) } + last.begin = firstBook + last.end = lastBook + fmt.Printf("\t--> New last archive: %s\n", newName) + + // We may want to rebuild inpx - have new "last" archive ready + code = 2 + + f, err = ioutil.TempFile(last.dir, "merge-") + if err != nil { + log.Fatalf("Unable to create temp file: %v", err) + } + tmpOut = f.Name() + w = zip.NewWriter(f) + leftBytes = sizeBytes + firstBook = 0 } - } else { - log.Printf("\t\tWrong book name, skipping: \"%s\"\n", file.Name) } } if err := rc.Close(); err != nil { diff --git a/src/inpxcreator/internal/zip/example_test.go b/src/inpxcreator/internal/zip/example_test.go deleted file mode 100644 index 6004465..0000000 --- a/src/inpxcreator/internal/zip/example_test.go +++ /dev/null @@ -1,94 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package zip_test - -import ( - "archive/zip" - "bytes" - "compress/flate" - "fmt" - "io" - "log" - "os" - "runtime" -) - -func ExampleWriter() { - // Create a buffer to write our archive to. - buf := new(bytes.Buffer) - - // Create a new zip archive. - w := zip.NewWriter(buf) - - // Add some files to the archive. - var files = []struct { - Name, Body string - }{ - {"readme.txt", "This archive contains some text files."}, - {"gopher.txt", "Gopher names:\nGeorge\nGeoffrey\nGonzo"}, - {"todo.txt", "Get animal handling licence.\nWrite more examples."}, - } - for _, file := range files { - f, err := w.Create(file.Name) - if err != nil { - log.Fatal(err) - } - _, err = f.Write([]byte(file.Body)) - if err != nil { - log.Fatal(err) - } - } - - // Make sure to check the error on Close. - err := w.Close() - if err != nil { - log.Fatal(err) - } -} - -func ExampleReader() { - // Open a zip archive for reading. - r, err := zip.OpenReader(runtime.GOROOT() + "/src/archive/zip/testdata/readme.zip") - if err != nil { - log.Fatal(err) - } - defer r.Close() - - // Iterate through the files in the archive, - // printing some of their contents. - for _, f := range r.File { - fmt.Printf("Contents of %s:\n", f.Name) - rc, err := f.Open() - if err != nil { - log.Fatal(err) - } - _, err = io.CopyN(os.Stdout, rc, 68) - if err != nil { - log.Fatal(err) - } - rc.Close() - fmt.Println() - } - // Output: - // Contents of README: - // This is the source code repository for the Go programming language. -} - -func ExampleWriter_RegisterCompressor() { - // Override the default Deflate compressor with a higher compression level. - - // Create a buffer to write our archive to. - buf := new(bytes.Buffer) - - // Create a new zip archive. - w := zip.NewWriter(buf) - - // Register a custom Deflate compressor. - w.RegisterCompressor(zip.Deflate, func(out io.Writer) (io.WriteCloser, error) { - return flate.NewWriter(out, flate.BestCompression) - }) - - // Proceed to add files to w. -} diff --git a/src/inpxcreator/internal/zip/reader.go b/src/inpxcreator/internal/zip/reader.go index 2ecc034..c9722b2 100644 --- a/src/inpxcreator/internal/zip/reader.go +++ b/src/inpxcreator/internal/zip/reader.go @@ -8,43 +8,63 @@ import ( "bufio" "encoding/binary" "errors" - "fmt" "hash" "hash/crc32" "io" + "io/fs" "os" + "path" + "path/filepath" + "sort" + "strings" + "sync" + "time" ) +// var zipinsecurepath = godebug.New("zipinsecurepath") + var ( - ErrFormat = errors.New("zip: not a valid zip file") - ErrAlgorithm = errors.New("zip: unsupported compression algorithm") - ErrChecksum = errors.New("zip: checksum error") + ErrFormat = errors.New("zip: not a valid zip file") + ErrAlgorithm = errors.New("zip: unsupported compression algorithm") + ErrChecksum = errors.New("zip: checksum error") + ErrInsecurePath = errors.New("zip: insecure file path") ) +// A Reader serves content from a ZIP archive. type Reader struct { r io.ReaderAt File []*File Comment string decompressors map[uint16]Decompressor - size int64 - dirOffset int64 + + // Some JAR files are zip files with a prefix that is a bash script. + // The baseOffset field is the start of the zip file proper. + baseOffset int64 + + // To support appending writer (Append, AppendOffset). + dirOffset int64 + + // fileList is a list of files sorted by ename, + // for use by the Open method. + fileListOnce sync.Once + fileList []fileListEntry } +// A ReadCloser is a Reader that must be closed when no longer needed. type ReadCloser struct { f *os.File Reader } +// A File is a single file in a ZIP archive. +// The file information is in the embedded FileHeader. +// The file content can be accessed by calling Open. type File struct { FileHeader zip *Reader zipr io.ReaderAt - zipsize int64 - headerOffset int64 -} - -func (f *File) hasDataDescriptor() bool { - return f.Flags&0x8 != 0 + headerOffset int64 // includes overall ZIP archive baseOffset + zip64 bool // zip64 extended information extra field presence } // OpenReader will open the Zip file specified by name and return a ReadCloser. @@ -69,46 +89,93 @@ func OpenReader(name string) (*ReadCloser, error) { // NewReader returns a new Reader reading from r, which is assumed to // have the given size in bytes. +// +// If any file inside the archive uses a non-local name +// (as defined by [filepath.IsLocal]) or a name containing backslashes +// and the GODEBUG environment variable contains `zipinsecurepath=0`, +// NewReader returns the reader with an ErrInsecurePath error. +// A future version of Go may introduce this behavior by default. +// Programs that want to accept non-local names can ignore +// the ErrInsecurePath error and use the returned reader. func NewReader(r io.ReaderAt, size int64) (*Reader, error) { + if size < 0 { + return nil, errors.New("zip: size cannot be negative") + } zr := new(Reader) if err := zr.init(r, size); err != nil { return nil, err } + for _, f := range zr.File { + if f.Name == "" { + // Zip permits an empty file name field. + continue + } + + // if zipinsecurepath.Value() != "0" { + // continue + // } + + // The zip specification states that names must use forward slashes, + // so consider any backslashes in the name insecure. + if !filepath.IsLocal(f.Name) || strings.Contains(f.Name, `\`) { + return zr, ErrInsecurePath + } + } return zr, nil } func (z *Reader) init(r io.ReaderAt, size int64) error { - end, err := readDirectoryEnd(r, size) + end, baseOffset, err := readDirectoryEnd(r, size) if err != nil { return err } - if end.directoryRecords > uint64(size)/fileHeaderLen { - return fmt.Errorf("archive/zip: TOC declares impossible %d files in %d byte zip", end.directoryRecords, size) - } z.r = r - z.size = size + z.baseOffset = baseOffset z.dirOffset = int64(end.directoryOffset) - z.File = make([]*File, 0, end.directoryRecords) + // Since the number of directory records is not validated, it is not + // safe to preallocate z.File without first checking that the specified + // number of files is reasonable, since a malformed archive may + // indicate it contains up to 1 << 128 - 1 files. Since each file has a + // header which will be _at least_ 30 bytes we can safely preallocate + // if (data size / 30) >= end.directoryRecords. + if end.directorySize < uint64(size) && (uint64(size)-end.directorySize)/30 >= end.directoryRecords { + z.File = make([]*File, 0, end.directoryRecords) + } z.Comment = end.comment rs := io.NewSectionReader(r, 0, size) - if _, err = rs.Seek(int64(end.directoryOffset), os.SEEK_SET); err != nil { + if _, err = rs.Seek(z.baseOffset+int64(end.directoryOffset), io.SeekStart); err != nil { return err } buf := bufio.NewReader(rs) // The count of files inside a zip is truncated to fit in a uint16. // Gloss over this by reading headers until we encounter - // a bad one, and then only report a ErrFormat or UnexpectedEOF if + // a bad one, and then only report an ErrFormat or UnexpectedEOF if // the file count modulo 65536 is incorrect. for { - f := &File{zip: z, zipr: r, zipsize: size} + f := &File{zip: z, zipr: r} err = readDirectoryHeader(f, buf) + + // For compatibility with other zip programs, + // if we have a non-zero base offset and can't read + // the first directory header, try again with a zero + // base offset. + if err == ErrFormat && z.baseOffset != 0 && len(z.File) == 0 { + z.baseOffset = 0 + if _, err = rs.Seek(int64(end.directoryOffset), io.SeekStart); err != nil { + return err + } + buf.Reset(rs) + continue + } + if err == ErrFormat || err == io.ErrUnexpectedEOF { break } if err != nil { return err } + f.headerOffset += z.baseOffset z.File = append(z.File, f) } if uint16(len(z.File)) != uint16(end.directoryRecords) { // only compare 16 bits here @@ -157,19 +224,34 @@ func (f *File) DataOffset() (offset int64, err error) { // Open returns a ReadCloser that provides access to the File's contents. // Multiple files may be read concurrently. -func (f *File) Open() (rc io.ReadCloser, err error) { +func (f *File) Open() (io.ReadCloser, error) { bodyOffset, err := f.findBodyOffset() if err != nil { - return + return nil, err + } + if strings.HasSuffix(f.Name, "/") { + // The ZIP specification (APPNOTE.TXT) specifies that directories, which + // are technically zero-byte files, must not have any associated file + // data. We previously tried failing here if f.CompressedSize64 != 0, + // but it turns out that a number of implementations (namely, the Java + // jar tool) don't properly set the storage method on directories + // resulting in a file with compressed size > 0 but uncompressed size == + // 0. We still want to fail when a directory has associated uncompressed + // data, but we are tolerant of cases where the uncompressed size is + // zero but compressed size is not. + if f.UncompressedSize64 != 0 { + return &dirReader{ErrFormat}, nil + } else { + return &dirReader{io.EOF}, nil + } } size := int64(f.CompressedSize64) r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size) dcomp := f.zip.decompressor(f.Method) if dcomp == nil { - err = ErrAlgorithm - return + return nil, ErrAlgorithm } - rc = dcomp(r) + var rc io.ReadCloser = dcomp(r) var desr io.Reader if f.hasDataDescriptor() { desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen) @@ -180,10 +262,33 @@ func (f *File) Open() (rc io.ReadCloser, err error) { f: f, desr: desr, } - return + return rc, nil } -// TODO: Document. +// OpenRaw returns a Reader that provides access to the File's contents without +// decompression. +func (f *File) OpenRaw() (io.Reader, error) { + bodyOffset, err := f.findBodyOffset() + if err != nil { + return nil, err + } + r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, int64(f.CompressedSize64)) + return r, nil +} + +type dirReader struct { + err error +} + +func (r *dirReader) Read([]byte) (int, error) { + return 0, r.err +} + +func (r *dirReader) Close() error { + return nil +} + +// AppendOffset provodes external access to internal info necessary to setuo proper Append writer. func (z *Reader) AppendOffset() int64 { return z.dirOffset } @@ -206,6 +311,10 @@ type checksumReader struct { err error // sticky error } +func (r *checksumReader) Stat() (fs.FileInfo, error) { + return headerFileInfo{&r.f.FileHeader}, nil +} + func (r *checksumReader) Read(b []byte) (n int, err error) { if r.err != nil { return 0, r.err @@ -213,6 +322,9 @@ func (r *checksumReader) Read(b []byte) (n int, err error) { n, err = r.rc.Read(b) r.hash.Write(b[:n]) r.nread += uint64(n) + if r.nread > r.f.UncompressedSize64 { + return 0, ErrFormat + } if err == nil { return } @@ -299,52 +411,124 @@ func readDirectoryHeader(f *File, r io.Reader) error { f.Extra = d[filenameLen : filenameLen+extraLen] f.Comment = string(d[filenameLen+extraLen:]) + // Determine the character encoding. + utf8Valid1, utf8Require1 := detectUTF8(f.Name) + utf8Valid2, utf8Require2 := detectUTF8(f.Comment) + switch { + case !utf8Valid1 || !utf8Valid2: + // Name and Comment definitely not UTF-8. + f.NonUTF8 = true + case !utf8Require1 && !utf8Require2: + // Name and Comment use only single-byte runes that overlap with UTF-8. + f.NonUTF8 = false + default: + // Might be UTF-8, might be some other encoding; preserve existing flag. + // Some ZIP writers use UTF-8 encoding without setting the UTF-8 flag. + // Since it is impossible to always distinguish valid UTF-8 from some + // other encoding (e.g., GBK or Shift-JIS), we trust the flag. + f.NonUTF8 = f.Flags&0x800 == 0 + } + needUSize := f.UncompressedSize == ^uint32(0) needCSize := f.CompressedSize == ^uint32(0) needHeaderOffset := f.headerOffset == int64(^uint32(0)) - if len(f.Extra) > 0 { - // Best effort to find what we need. - // Other zip authors might not even follow the basic format, - // and we'll just ignore the Extra content in that case. - b := readBuf(f.Extra) - for len(b) >= 4 { // need at least tag and size - tag := b.uint16() - size := b.uint16() - if int(size) > len(b) { - break + // Best effort to find what we need. + // Other zip authors might not even follow the basic format, + // and we'll just ignore the Extra content in that case. + var modified time.Time +parseExtras: + for extra := readBuf(f.Extra); len(extra) >= 4; { // need at least tag and size + fieldTag := extra.uint16() + fieldSize := int(extra.uint16()) + if len(extra) < fieldSize { + break + } + fieldBuf := extra.sub(fieldSize) + + switch fieldTag { + case zip64ExtraID: + f.zip64 = true + + // update directory values from the zip64 extra block. + // They should only be consulted if the sizes read earlier + // are maxed out. + // See golang.org/issue/13367. + if needUSize { + needUSize = false + if len(fieldBuf) < 8 { + return ErrFormat + } + f.UncompressedSize64 = fieldBuf.uint64() } - if tag == zip64ExtraId { - // update directory values from the zip64 extra block. - // They should only be consulted if the sizes read earlier - // are maxed out. - // See golang.org/issue/13367. - eb := readBuf(b[:size]) - - if needUSize { - needUSize = false - if len(eb) < 8 { - return ErrFormat - } - f.UncompressedSize64 = eb.uint64() + if needCSize { + needCSize = false + if len(fieldBuf) < 8 { + return ErrFormat } - if needCSize { - needCSize = false - if len(eb) < 8 { - return ErrFormat - } - f.CompressedSize64 = eb.uint64() + f.CompressedSize64 = fieldBuf.uint64() + } + if needHeaderOffset { + needHeaderOffset = false + if len(fieldBuf) < 8 { + return ErrFormat } - if needHeaderOffset { - needHeaderOffset = false - if len(eb) < 8 { - return ErrFormat - } - f.headerOffset = int64(eb.uint64()) + f.headerOffset = int64(fieldBuf.uint64()) + } + case ntfsExtraID: + if len(fieldBuf) < 4 { + continue parseExtras + } + fieldBuf.uint32() // reserved (ignored) + for len(fieldBuf) >= 4 { // need at least tag and size + attrTag := fieldBuf.uint16() + attrSize := int(fieldBuf.uint16()) + if len(fieldBuf) < attrSize { + continue parseExtras } - break + attrBuf := fieldBuf.sub(attrSize) + if attrTag != 1 || attrSize != 24 { + continue // Ignore irrelevant attributes + } + + const ticksPerSecond = 1e7 // Windows timestamp resolution + ts := int64(attrBuf.uint64()) // ModTime since Windows epoch + secs := int64(ts / ticksPerSecond) + nsecs := (1e9 / ticksPerSecond) * int64(ts%ticksPerSecond) + epoch := time.Date(1601, time.January, 1, 0, 0, 0, 0, time.UTC) + modified = time.Unix(epoch.Unix()+secs, nsecs) } - b = b[size:] + case unixExtraID, infoZipUnixExtraID: + if len(fieldBuf) < 8 { + continue parseExtras + } + fieldBuf.uint32() // AcTime (ignored) + ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch + modified = time.Unix(ts, 0) + case extTimeExtraID: + if len(fieldBuf) < 5 || fieldBuf.uint8()&1 == 0 { + continue parseExtras + } + ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch + modified = time.Unix(ts, 0) + } + } + + msdosModified := msDosTimeToTime(f.ModifiedDate, f.ModifiedTime) + f.Modified = msdosModified + if !modified.IsZero() { + f.Modified = modified.UTC() + + // If legacy MS-DOS timestamps are set, we can use the delta between + // the legacy and extended versions to estimate timezone offset. + // + // A non-UTC timezone is always used (even if offset is zero). + // Thus, FileHeader.Modified.Location() == time.UTC is useful for + // determining whether extended timestamps are present. + // This is necessary for users that need to do additional time + // calculations when dealing with legacy ZIP formats. + if f.ModifiedTime != 0 || f.ModifiedDate != 0 { + f.Modified = modified.In(timeZone(msdosModified.Sub(modified))) } } @@ -367,7 +551,6 @@ func readDirectoryHeader(f *File, r io.Reader) error { func readDataDescriptor(r io.Reader, f *File) error { var buf [dataDescriptorLen]byte - // The spec says: "Although not originally assigned a // signature, the value 0x08074b50 has commonly been adopted // as a signature value for the data descriptor record. @@ -405,7 +588,7 @@ func readDataDescriptor(r io.Reader, f *File) error { return nil } -func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) { +func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, baseOffset int64, err error) { // look for directoryEndSignature in the last 1k, then in the last 65k var buf []byte var directoryEndOffset int64 @@ -415,7 +598,7 @@ func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) } buf = make([]byte, int(bLen)) if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF { - return nil, err + return nil, 0, err } if p := findSignatureInBlock(buf); p >= 0 { buf = buf[p:] @@ -423,7 +606,7 @@ func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) break } if i == 1 || bLen == size { - return nil, ErrFormat + return nil, 0, ErrFormat } } @@ -440,7 +623,7 @@ func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) } l := int(d.commentLen) if l > len(b) { - return nil, errors.New("zip: invalid comment length") + return nil, 0, errors.New("zip: invalid comment length") } d.comment = string(b[:l]) @@ -448,17 +631,21 @@ func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff { p, err := findDirectory64End(r, directoryEndOffset) if err == nil && p >= 0 { + directoryEndOffset = p err = readDirectory64End(r, p, d) } if err != nil { - return nil, err + return nil, 0, err } } + + baseOffset = directoryEndOffset - int64(d.directorySize) - int64(d.directoryOffset) + // Make sure directoryOffset points to somewhere in our file. - if o := int64(d.directoryOffset); o < 0 || o >= size { - return nil, ErrFormat + if o := baseOffset + int64(d.directoryOffset); o < 0 || o >= size { + return nil, 0, ErrFormat } - return d, nil + return d, baseOffset, nil } // findDirectory64End tries to read the zip64 locator just before the @@ -527,6 +714,12 @@ func findSignatureInBlock(b []byte) int { type readBuf []byte +func (b *readBuf) uint8() uint8 { + v := (*b)[0] + *b = (*b)[1:] + return v +} + func (b *readBuf) uint16() uint16 { v := binary.LittleEndian.Uint16(*b) *b = (*b)[2:] @@ -544,3 +737,244 @@ func (b *readBuf) uint64() uint64 { *b = (*b)[8:] return v } + +func (b *readBuf) sub(n int) readBuf { + b2 := (*b)[:n] + *b = (*b)[n:] + return b2 +} + +// A fileListEntry is a File and its ename. +// If file == nil, the fileListEntry describes a directory without metadata. +type fileListEntry struct { + name string + file *File + isDir bool + isDup bool +} + +type fileInfoDirEntry interface { + fs.FileInfo + fs.DirEntry +} + +func (e *fileListEntry) stat() (fileInfoDirEntry, error) { + if e.isDup { + return nil, errors.New(e.name + ": duplicate entries in zip file") + } + if !e.isDir { + return headerFileInfo{&e.file.FileHeader}, nil + } + return e, nil +} + +// Only used for directories. +func (f *fileListEntry) Name() string { _, elem, _ := split(f.name); return elem } +func (f *fileListEntry) Size() int64 { return 0 } +func (f *fileListEntry) Mode() fs.FileMode { return fs.ModeDir | 0555 } +func (f *fileListEntry) Type() fs.FileMode { return fs.ModeDir } +func (f *fileListEntry) IsDir() bool { return true } +func (f *fileListEntry) Sys() any { return nil } + +func (f *fileListEntry) ModTime() time.Time { + if f.file == nil { + return time.Time{} + } + return f.file.FileHeader.Modified.UTC() +} + +func (f *fileListEntry) Info() (fs.FileInfo, error) { return f, nil } + +// toValidName coerces name to be a valid name for fs.FS.Open. +func toValidName(name string) string { + name = strings.ReplaceAll(name, `\`, `/`) + p := path.Clean(name) + + p = strings.TrimPrefix(p, "/") + + for strings.HasPrefix(p, "../") { + p = p[len("../"):] + } + + return p +} + +func (r *Reader) initFileList() { + r.fileListOnce.Do(func() { + // files and knownDirs map from a file/directory name + // to an index into the r.fileList entry that we are + // building. They are used to mark duplicate entries. + files := make(map[string]int) + knownDirs := make(map[string]int) + + // dirs[name] is true if name is known to be a directory, + // because it appears as a prefix in a path. + dirs := make(map[string]bool) + + for _, file := range r.File { + isDir := len(file.Name) > 0 && file.Name[len(file.Name)-1] == '/' + name := toValidName(file.Name) + if name == "" { + continue + } + + if idx, ok := files[name]; ok { + r.fileList[idx].isDup = true + continue + } + if idx, ok := knownDirs[name]; ok { + r.fileList[idx].isDup = true + continue + } + + for dir := path.Dir(name); dir != "."; dir = path.Dir(dir) { + dirs[dir] = true + } + + idx := len(r.fileList) + entry := fileListEntry{ + name: name, + file: file, + isDir: isDir, + } + r.fileList = append(r.fileList, entry) + if isDir { + knownDirs[name] = idx + } else { + files[name] = idx + } + } + for dir := range dirs { + if _, ok := knownDirs[dir]; !ok { + if idx, ok := files[dir]; ok { + r.fileList[idx].isDup = true + } else { + entry := fileListEntry{ + name: dir, + file: nil, + isDir: true, + } + r.fileList = append(r.fileList, entry) + } + } + } + + sort.Slice(r.fileList, func(i, j int) bool { return fileEntryLess(r.fileList[i].name, r.fileList[j].name) }) + }) +} + +func fileEntryLess(x, y string) bool { + xdir, xelem, _ := split(x) + ydir, yelem, _ := split(y) + return xdir < ydir || xdir == ydir && xelem < yelem +} + +// Open opens the named file in the ZIP archive, +// using the semantics of fs.FS.Open: +// paths are always slash separated, with no +// leading / or ../ elements. +func (r *Reader) Open(name string) (fs.File, error) { + r.initFileList() + + if !fs.ValidPath(name) { + return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid} + } + e := r.openLookup(name) + if e == nil { + return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrNotExist} + } + if e.isDir { + return &openDir{e, r.openReadDir(name), 0}, nil + } + rc, err := e.file.Open() + if err != nil { + return nil, err + } + return rc.(fs.File), nil +} + +func split(name string) (dir, elem string, isDir bool) { + if len(name) > 0 && name[len(name)-1] == '/' { + isDir = true + name = name[:len(name)-1] + } + i := len(name) - 1 + for i >= 0 && name[i] != '/' { + i-- + } + if i < 0 { + return ".", name, isDir + } + return name[:i], name[i+1:], isDir +} + +var dotFile = &fileListEntry{name: "./", isDir: true} + +func (r *Reader) openLookup(name string) *fileListEntry { + if name == "." { + return dotFile + } + + dir, elem, _ := split(name) + files := r.fileList + i := sort.Search(len(files), func(i int) bool { + idir, ielem, _ := split(files[i].name) + return idir > dir || idir == dir && ielem >= elem + }) + if i < len(files) { + fname := files[i].name + if fname == name || len(fname) == len(name)+1 && fname[len(name)] == '/' && fname[:len(name)] == name { + return &files[i] + } + } + return nil +} + +func (r *Reader) openReadDir(dir string) []fileListEntry { + files := r.fileList + i := sort.Search(len(files), func(i int) bool { + idir, _, _ := split(files[i].name) + return idir >= dir + }) + j := sort.Search(len(files), func(j int) bool { + jdir, _, _ := split(files[j].name) + return jdir > dir + }) + return files[i:j] +} + +type openDir struct { + e *fileListEntry + files []fileListEntry + offset int +} + +func (d *openDir) Close() error { return nil } +func (d *openDir) Stat() (fs.FileInfo, error) { return d.e.stat() } + +func (d *openDir) Read([]byte) (int, error) { + return 0, &fs.PathError{Op: "read", Path: d.e.name, Err: errors.New("is a directory")} +} + +func (d *openDir) ReadDir(count int) ([]fs.DirEntry, error) { + n := len(d.files) - d.offset + if count > 0 && n > count { + n = count + } + if n == 0 { + if count <= 0 { + return nil, nil + } + return nil, io.EOF + } + list := make([]fs.DirEntry, n) + for i := range list { + s, err := d.files[d.offset+i].stat() + if err != nil { + return nil, err + } + list[i] = s + } + d.offset += n + return list, nil +} diff --git a/src/inpxcreator/internal/zip/reader_test.go b/src/inpxcreator/internal/zip/reader_test.go deleted file mode 100644 index 20793e7..0000000 --- a/src/inpxcreator/internal/zip/reader_test.go +++ /dev/null @@ -1,860 +0,0 @@ -// Copyright 2010 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package zip - -import ( - "bytes" - "encoding/binary" - "encoding/hex" - "io" - "io/ioutil" - "os" - "path/filepath" - "regexp" - "runtime" - "strings" - "testing" - "time" -) - -type ZipTest struct { - Name string - Source func() (r io.ReaderAt, size int64) // if non-nil, used instead of testdata/ file - Comment string - File []ZipTestFile - Error error // the error that Opening this file should return -} - -type ZipTestFile struct { - Name string - Mode os.FileMode - Mtime string // optional, modified time in format "mm-dd-yy hh:mm:ss" - - // Information describing expected zip file content. - // First, reading the entire content should produce the error ContentErr. - // Second, if ContentErr==nil, the content should match Content. - // If content is large, an alternative to setting Content is to set File, - // which names a file in the testdata/ directory containing the - // uncompressed expected content. - // If content is very large, an alternative to setting Content or File - // is to set Size, which will then be checked against the header-reported size - // but will bypass the decompressing of the actual data. - // This last option is used for testing very large (multi-GB) compressed files. - ContentErr error - Content []byte - File string - Size uint64 -} - -// Caution: The Mtime values found for the test files should correspond to -// the values listed with unzip -l . However, the values -// listed by unzip appear to be off by some hours. When creating -// fresh test files and testing them, this issue is not present. -// The test files were created in Sydney, so there might be a time -// zone issue. The time zone information does have to be encoded -// somewhere, because otherwise unzip -l could not provide a different -// time from what the archive/zip package provides, but there appears -// to be no documentation about this. - -var tests = []ZipTest{ - { - Name: "test.zip", - Comment: "This is a zipfile comment.", - File: []ZipTestFile{ - { - Name: "test.txt", - Content: []byte("This is a test text file.\n"), - Mtime: "09-05-10 12:12:02", - Mode: 0644, - }, - { - Name: "gophercolor16x16.png", - File: "gophercolor16x16.png", - Mtime: "09-05-10 15:52:58", - Mode: 0644, - }, - }, - }, - { - Name: "test-trailing-junk.zip", - Comment: "This is a zipfile comment.", - File: []ZipTestFile{ - { - Name: "test.txt", - Content: []byte("This is a test text file.\n"), - Mtime: "09-05-10 12:12:02", - Mode: 0644, - }, - { - Name: "gophercolor16x16.png", - File: "gophercolor16x16.png", - Mtime: "09-05-10 15:52:58", - Mode: 0644, - }, - }, - }, - { - Name: "r.zip", - Source: returnRecursiveZip, - File: []ZipTestFile{ - { - Name: "r/r.zip", - Content: rZipBytes(), - Mtime: "03-04-10 00:24:16", - Mode: 0666, - }, - }, - }, - { - Name: "symlink.zip", - File: []ZipTestFile{ - { - Name: "symlink", - Content: []byte("../target"), - Mode: 0777 | os.ModeSymlink, - }, - }, - }, - { - Name: "readme.zip", - }, - { - Name: "readme.notzip", - Error: ErrFormat, - }, - { - Name: "dd.zip", - File: []ZipTestFile{ - { - Name: "filename", - Content: []byte("This is a test textfile.\n"), - Mtime: "02-02-11 13:06:20", - Mode: 0666, - }, - }, - }, - { - // created in windows XP file manager. - Name: "winxp.zip", - File: crossPlatform, - }, - { - // created by Zip 3.0 under Linux - Name: "unix.zip", - File: crossPlatform, - }, - { - // created by Go, before we wrote the "optional" data - // descriptor signatures (which are required by OS X) - Name: "go-no-datadesc-sig.zip", - File: []ZipTestFile{ - { - Name: "foo.txt", - Content: []byte("foo\n"), - Mtime: "03-08-12 16:59:10", - Mode: 0644, - }, - { - Name: "bar.txt", - Content: []byte("bar\n"), - Mtime: "03-08-12 16:59:12", - Mode: 0644, - }, - }, - }, - { - // created by Go, after we wrote the "optional" data - // descriptor signatures (which are required by OS X) - Name: "go-with-datadesc-sig.zip", - File: []ZipTestFile{ - { - Name: "foo.txt", - Content: []byte("foo\n"), - Mode: 0666, - }, - { - Name: "bar.txt", - Content: []byte("bar\n"), - Mode: 0666, - }, - }, - }, - { - Name: "Bad-CRC32-in-data-descriptor", - Source: returnCorruptCRC32Zip, - File: []ZipTestFile{ - { - Name: "foo.txt", - Content: []byte("foo\n"), - Mode: 0666, - ContentErr: ErrChecksum, - }, - { - Name: "bar.txt", - Content: []byte("bar\n"), - Mode: 0666, - }, - }, - }, - // Tests that we verify (and accept valid) crc32s on files - // with crc32s in their file header (not in data descriptors) - { - Name: "crc32-not-streamed.zip", - File: []ZipTestFile{ - { - Name: "foo.txt", - Content: []byte("foo\n"), - Mtime: "03-08-12 16:59:10", - Mode: 0644, - }, - { - Name: "bar.txt", - Content: []byte("bar\n"), - Mtime: "03-08-12 16:59:12", - Mode: 0644, - }, - }, - }, - // Tests that we verify (and reject invalid) crc32s on files - // with crc32s in their file header (not in data descriptors) - { - Name: "crc32-not-streamed.zip", - Source: returnCorruptNotStreamedZip, - File: []ZipTestFile{ - { - Name: "foo.txt", - Content: []byte("foo\n"), - Mtime: "03-08-12 16:59:10", - Mode: 0644, - ContentErr: ErrChecksum, - }, - { - Name: "bar.txt", - Content: []byte("bar\n"), - Mtime: "03-08-12 16:59:12", - Mode: 0644, - }, - }, - }, - { - Name: "zip64.zip", - File: []ZipTestFile{ - { - Name: "README", - Content: []byte("This small file is in ZIP64 format.\n"), - Mtime: "08-10-12 14:33:32", - Mode: 0644, - }, - }, - }, - // Another zip64 file with different Extras fields. (golang.org/issue/7069) - { - Name: "zip64-2.zip", - File: []ZipTestFile{ - { - Name: "README", - Content: []byte("This small file is in ZIP64 format.\n"), - Mtime: "08-10-12 14:33:32", - Mode: 0644, - }, - }, - }, - // Largest possible non-zip64 file, with no zip64 header. - { - Name: "big.zip", - Source: returnBigZipBytes, - File: []ZipTestFile{ - { - Name: "big.file", - Content: nil, - Size: 1<<32 - 1, - Mode: 0666, - }, - }, - }, -} - -var crossPlatform = []ZipTestFile{ - { - Name: "hello", - Content: []byte("world \r\n"), - Mode: 0666, - }, - { - Name: "dir/bar", - Content: []byte("foo \r\n"), - Mode: 0666, - }, - { - Name: "dir/empty/", - Content: []byte{}, - Mode: os.ModeDir | 0777, - }, - { - Name: "readonly", - Content: []byte("important \r\n"), - Mode: 0444, - }, -} - -func TestReader(t *testing.T) { - for _, zt := range tests { - readTestZip(t, zt) - } -} - -func readTestZip(t *testing.T, zt ZipTest) { - var z *Reader - var err error - if zt.Source != nil { - rat, size := zt.Source() - z, err = NewReader(rat, size) - } else { - var rc *ReadCloser - rc, err = OpenReader(filepath.Join(runtime.GOROOT(), "src/archive/zip/testdata", zt.Name)) - if err == nil { - defer rc.Close() - z = &rc.Reader - } - } - if err != zt.Error { - t.Errorf("%s: error=%v, want %v", zt.Name, err, zt.Error) - return - } - - // bail if file is not zip - if err == ErrFormat { - return - } - - // bail here if no Files expected to be tested - // (there may actually be files in the zip, but we don't care) - if zt.File == nil { - return - } - - if z.Comment != zt.Comment { - t.Errorf("%s: comment=%q, want %q", zt.Name, z.Comment, zt.Comment) - } - if len(z.File) != len(zt.File) { - t.Fatalf("%s: file count=%d, want %d", zt.Name, len(z.File), len(zt.File)) - } - - // test read of each file - for i, ft := range zt.File { - readTestFile(t, zt, ft, z.File[i]) - } - - // test simultaneous reads - n := 0 - done := make(chan bool) - for i := 0; i < 5; i++ { - for j, ft := range zt.File { - go func(j int, ft ZipTestFile) { - readTestFile(t, zt, ft, z.File[j]) - done <- true - }(j, ft) - n++ - } - } - for ; n > 0; n-- { - <-done - } -} - -func readTestFile(t *testing.T, zt ZipTest, ft ZipTestFile, f *File) { - if f.Name != ft.Name { - t.Errorf("%s: name=%q, want %q", zt.Name, f.Name, ft.Name) - } - - if ft.Mtime != "" { - mtime, err := time.Parse("01-02-06 15:04:05", ft.Mtime) - if err != nil { - t.Error(err) - return - } - if ft := f.ModTime(); !ft.Equal(mtime) { - t.Errorf("%s: %s: mtime=%s, want %s", zt.Name, f.Name, ft, mtime) - } - } - - testFileMode(t, zt.Name, f, ft.Mode) - - size := uint64(f.UncompressedSize) - if size == uint32max { - size = f.UncompressedSize64 - } else if size != f.UncompressedSize64 { - t.Errorf("%v: UncompressedSize=%#x does not match UncompressedSize64=%#x", f.Name, size, f.UncompressedSize64) - } - - r, err := f.Open() - if err != nil { - t.Errorf("%s: %v", zt.Name, err) - return - } - - // For very large files, just check that the size is correct. - // The content is expected to be all zeros. - // Don't bother uncompressing: too big. - if ft.Content == nil && ft.File == "" && ft.Size > 0 { - if size != ft.Size { - t.Errorf("%v: uncompressed size %#x, want %#x", size, ft.Size) - } - r.Close() - return - } - - var b bytes.Buffer - _, err = io.Copy(&b, r) - if err != ft.ContentErr { - t.Errorf("%s: copying contents: %v (want %v)", zt.Name, err, ft.ContentErr) - } - if err != nil { - return - } - r.Close() - - if g := uint64(b.Len()); g != size { - t.Errorf("%v: read %v bytes but f.UncompressedSize == %v", f.Name, g, size) - } - - var c []byte - if ft.Content != nil { - c = ft.Content - } else if c, err = ioutil.ReadFile("testdata/" + ft.File); err != nil { - t.Error(err) - return - } - - if b.Len() != len(c) { - t.Errorf("%s: len=%d, want %d", f.Name, b.Len(), len(c)) - return - } - - for i, b := range b.Bytes() { - if b != c[i] { - t.Errorf("%s: content[%d]=%q want %q", f.Name, i, b, c[i]) - return - } - } -} - -func testFileMode(t *testing.T, zipName string, f *File, want os.FileMode) { - mode := f.Mode() - if want == 0 { - t.Errorf("%s: %s mode: got %v, want none", zipName, f.Name, mode) - } else if mode != want { - t.Errorf("%s: %s mode: want %v, got %v", zipName, f.Name, want, mode) - } -} - -func TestInvalidFiles(t *testing.T) { - const size = 1024 * 70 // 70kb - b := make([]byte, size) - - // zeroes - _, err := NewReader(bytes.NewReader(b), size) - if err != ErrFormat { - t.Errorf("zeroes: error=%v, want %v", err, ErrFormat) - } - - // repeated directoryEndSignatures - sig := make([]byte, 4) - binary.LittleEndian.PutUint32(sig, directoryEndSignature) - for i := 0; i < size-4; i += 4 { - copy(b[i:i+4], sig) - } - _, err = NewReader(bytes.NewReader(b), size) - if err != ErrFormat { - t.Errorf("sigs: error=%v, want %v", err, ErrFormat) - } -} - -func messWith(fileName string, corrupter func(b []byte)) (r io.ReaderAt, size int64) { - data, err := ioutil.ReadFile(filepath.Join("testdata", fileName)) - if err != nil { - panic("Error reading " + fileName + ": " + err.Error()) - } - corrupter(data) - return bytes.NewReader(data), int64(len(data)) -} - -func returnCorruptCRC32Zip() (r io.ReaderAt, size int64) { - return messWith("go-with-datadesc-sig.zip", func(b []byte) { - // Corrupt one of the CRC32s in the data descriptor: - b[0x2d]++ - }) -} - -func returnCorruptNotStreamedZip() (r io.ReaderAt, size int64) { - return messWith("crc32-not-streamed.zip", func(b []byte) { - // Corrupt foo.txt's final crc32 byte, in both - // the file header and TOC. (0x7e -> 0x7f) - b[0x11]++ - b[0x9d]++ - - // TODO(bradfitz): add a new test that only corrupts - // one of these values, and verify that that's also an - // error. Currently, the reader code doesn't verify the - // fileheader and TOC's crc32 match if they're both - // non-zero and only the second line above, the TOC, - // is what matters. - }) -} - -// rZipBytes returns the bytes of a recursive zip file, without -// putting it on disk and triggering certain virus scanners. -func rZipBytes() []byte { - s := ` -0000000 50 4b 03 04 14 00 00 00 08 00 08 03 64 3c f9 f4 -0000010 89 64 48 01 00 00 b8 01 00 00 07 00 00 00 72 2f -0000020 72 2e 7a 69 70 00 25 00 da ff 50 4b 03 04 14 00 -0000030 00 00 08 00 08 03 64 3c f9 f4 89 64 48 01 00 00 -0000040 b8 01 00 00 07 00 00 00 72 2f 72 2e 7a 69 70 00 -0000050 2f 00 d0 ff 00 25 00 da ff 50 4b 03 04 14 00 00 -0000060 00 08 00 08 03 64 3c f9 f4 89 64 48 01 00 00 b8 -0000070 01 00 00 07 00 00 00 72 2f 72 2e 7a 69 70 00 2f -0000080 00 d0 ff c2 54 8e 57 39 00 05 00 fa ff c2 54 8e -0000090 57 39 00 05 00 fa ff 00 05 00 fa ff 00 14 00 eb -00000a0 ff c2 54 8e 57 39 00 05 00 fa ff 00 05 00 fa ff -00000b0 00 14 00 eb ff 42 88 21 c4 00 00 14 00 eb ff 42 -00000c0 88 21 c4 00 00 14 00 eb ff 42 88 21 c4 00 00 14 -00000d0 00 eb ff 42 88 21 c4 00 00 14 00 eb ff 42 88 21 -00000e0 c4 00 00 00 00 ff ff 00 00 00 ff ff 00 34 00 cb -00000f0 ff 42 88 21 c4 00 00 00 00 ff ff 00 00 00 ff ff -0000100 00 34 00 cb ff 42 e8 21 5e 0f 00 00 00 ff ff 0a -0000110 f0 66 64 12 61 c0 15 dc e8 a0 48 bf 48 af 2a b3 -0000120 20 c0 9b 95 0d c4 67 04 42 53 06 06 06 40 00 06 -0000130 00 f9 ff 6d 01 00 00 00 00 42 e8 21 5e 0f 00 00 -0000140 00 ff ff 0a f0 66 64 12 61 c0 15 dc e8 a0 48 bf -0000150 48 af 2a b3 20 c0 9b 95 0d c4 67 04 42 53 06 06 -0000160 06 40 00 06 00 f9 ff 6d 01 00 00 00 00 50 4b 01 -0000170 02 14 00 14 00 00 00 08 00 08 03 64 3c f9 f4 89 -0000180 64 48 01 00 00 b8 01 00 00 07 00 00 00 00 00 00 -0000190 00 00 00 00 00 00 00 00 00 00 00 72 2f 72 2e 7a -00001a0 69 70 50 4b 05 06 00 00 00 00 01 00 01 00 35 00 -00001b0 00 00 6d 01 00 00 00 00` - s = regexp.MustCompile(`[0-9a-f]{7}`).ReplaceAllString(s, "") - s = regexp.MustCompile(`\s+`).ReplaceAllString(s, "") - b, err := hex.DecodeString(s) - if err != nil { - panic(err) - } - return b -} - -func returnRecursiveZip() (r io.ReaderAt, size int64) { - b := rZipBytes() - return bytes.NewReader(b), int64(len(b)) -} - -// biggestZipBytes returns the bytes of a zip file biggest.zip -// that contains a zip file bigger.zip that contains a zip file -// big.zip that contains big.file, which contains 2³²-1 zeros. -// The big.zip file is interesting because it has no zip64 header, -// much like the innermost zip files in the well-known 42.zip. -// -// biggest.zip was generated by changing isZip64 to use > uint32max -// instead of >= uint32max and then running this program: -// -// package main -// -// import ( -// "archive/zip" -// "bytes" -// "io" -// "io/ioutil" -// "log" -// ) -// -// type zeros struct{} -// -// func (zeros) Read(b []byte) (int, error) { -// for i := range b { -// b[i] = 0 -// } -// return len(b), nil -// } -// -// func main() { -// bigZip := makeZip("big.file", io.LimitReader(zeros{}, 1<<32-1)) -// if err := ioutil.WriteFile("/tmp/big.zip", bigZip, 0666); err != nil { -// log.Fatal(err) -// } -// -// biggerZip := makeZip("big.zip", bytes.NewReader(bigZip)) -// if err := ioutil.WriteFile("/tmp/bigger.zip", biggerZip, 0666); err != nil { -// log.Fatal(err) -// } -// -// biggestZip := makeZip("bigger.zip", bytes.NewReader(biggerZip)) -// if err := ioutil.WriteFile("/tmp/biggest.zip", biggestZip, 0666); err != nil { -// log.Fatal(err) -// } -// } -// -// func makeZip(name string, r io.Reader) []byte { -// var buf bytes.Buffer -// w := zip.NewWriter(&buf) -// wf, err := w.Create(name) -// if err != nil { -// log.Fatal(err) -// } -// if _, err = io.Copy(wf, r); err != nil { -// log.Fatal(err) -// } -// if err := w.Close(); err != nil { -// log.Fatal(err) -// } -// return buf.Bytes() -// } -// -// The 4 GB of zeros compresses to 4 MB, which compresses to 20 kB, -// which compresses to 1252 bytes (in the hex dump below). -// -// It's here in hex for the same reason as rZipBytes above: to avoid -// problems with on-disk virus scanners or other zip processors. -// -func biggestZipBytes() []byte { - s := ` -0000000 50 4b 03 04 14 00 08 00 08 00 00 00 00 00 00 00 -0000010 00 00 00 00 00 00 00 00 00 00 0a 00 00 00 62 69 -0000020 67 67 65 72 2e 7a 69 70 ec dc 6b 4c 53 67 18 07 -0000030 f0 16 c5 ca 65 2e cb b8 94 20 61 1f 44 33 c7 cd -0000040 c0 86 4a b5 c0 62 8a 61 05 c6 cd 91 b2 54 8c 1b -0000050 63 8b 03 9c 1b 95 52 5a e3 a0 19 6c b2 05 59 44 -0000060 64 9d 73 83 71 11 46 61 14 b9 1d 14 09 4a c3 60 -0000070 2e 4c 6e a5 60 45 02 62 81 95 b6 94 9e 9e 77 e7 -0000080 d0 43 b6 f8 71 df 96 3c e7 a4 69 ce bf cf e9 79 -0000090 ce ef 79 3f bf f1 31 db b6 bb 31 76 92 e7 f3 07 -00000a0 8b fc 9c ca cc 08 cc cb cc 5e d2 1c 88 d9 7e bb -00000b0 4f bb 3a 3f 75 f1 5d 7f 8f c2 68 67 77 8f 25 ff -00000c0 84 e2 93 2d ef a4 95 3d 71 4e 2c b9 b0 87 c3 be -00000d0 3d f8 a7 60 24 61 c5 ef ae 9e c8 6c 6d 4e 69 c8 -00000e0 67 65 34 f8 37 76 2d 76 5c 54 f3 95 65 49 c7 0f -00000f0 18 71 4b 7e 5b 6a d1 79 47 61 41 b0 4e 2a 74 45 -0000100 43 58 12 b2 5a a5 c6 7d 68 55 88 d4 98 75 18 6d -0000110 08 d1 1f 8f 5a 9e 96 ee 45 cf a4 84 4e 4b e8 50 -0000120 a7 13 d9 06 de 52 81 97 36 b2 d7 b8 fc 2b 5f 55 -0000130 23 1f 32 59 cf 30 27 fb e2 8a b9 de 45 dd 63 9c -0000140 4b b5 8b 96 4c 7a 62 62 cc a1 a7 cf fa f1 fe dd -0000150 54 62 11 bf 36 78 b3 c7 b1 b5 f2 61 4d 4e dd 66 -0000160 32 2e e6 70 34 5f f4 c9 e6 6c 43 6f da 6b c6 c3 -0000170 09 2c ce 09 57 7f d2 7e b4 23 ba 7c 1b 99 bc 22 -0000180 3e f1 de 91 2f e3 9c 1b 82 cc c2 84 39 aa e6 de -0000190 b4 69 fc cc cb 72 a6 61 45 f0 d3 1d 26 19 7c 8d -00001a0 29 c8 66 02 be 77 6a f9 3d 34 79 17 19 c8 96 24 -00001b0 a3 ac e4 dd 3b 1a 8e c6 fe 96 38 6b bf 67 5a 23 -00001c0 f4 16 f4 e6 8a b4 fc c2 cd bf 95 66 1d bb 35 aa -00001d0 92 7d 66 d8 08 8d a5 1f 54 2a af 09 cf 61 ff d2 -00001e0 85 9d 8f b6 d7 88 07 4a 86 03 db 64 f3 d9 92 73 -00001f0 df ec a7 fc 23 4c 8d 83 79 63 2a d9 fd 8d b3 c8 -0000200 8f 7e d4 19 85 e6 8d 1c 76 f0 8b 58 32 fd 9a d6 -0000210 85 e2 48 ad c3 d5 60 6f 7e 22 dd ef 09 49 7c 7f -0000220 3a 45 c3 71 b7 df f3 4c 63 fb b5 d9 31 5f 6e d6 -0000230 24 1d a4 4a fe 32 a7 5c 16 48 5c 3e 08 6b 8a d3 -0000240 25 1d a2 12 a5 59 24 ea 20 5f 52 6d ad 94 db 6b -0000250 94 b9 5d eb 4b a7 5c 44 bb 1e f2 3c 6b cf 52 c9 -0000260 e9 e5 ba 06 b9 c4 e5 0a d0 00 0d d0 00 0d d0 00 -0000270 0d d0 00 0d d0 00 0d d0 00 0d d0 00 0d d0 00 0d -0000280 d0 00 0d d0 00 0d d0 00 0d d0 00 0d d0 00 0d d0 -0000290 00 0d d0 00 0d d0 00 0d d0 00 0d d0 00 0d d0 00 -00002a0 0d d0 00 cd ff 9e 46 86 fa a7 7d 3a 43 d7 8e 10 -00002b0 52 e9 be e6 6e cf eb 9e 85 4d 65 ce cc 30 c1 44 -00002c0 c0 4e af bc 9c 6c 4b a0 d7 54 ff 1d d5 5c 89 fb -00002d0 b5 34 7e c4 c2 9e f5 a0 f6 5b 7e 6e ca 73 c7 ef -00002e0 5d be de f9 e8 81 eb a5 0a a5 63 54 2c d7 1c d1 -00002f0 89 17 85 f8 16 94 f2 8a b2 a3 f5 b6 6d df 75 cd -0000300 90 dd 64 bd 5d 55 4e f2 55 19 1b b7 cc ef 1b ea -0000310 2e 05 9c f4 aa 1e a8 cd a6 82 c7 59 0f 5e 9d e0 -0000320 bb fc 6c d6 99 23 eb 36 ad c6 c5 e1 d8 e1 e2 3e -0000330 d9 90 5a f7 91 5d 6f bc 33 6d 98 47 d2 7c 2e 2f -0000340 99 a4 25 72 85 49 2c be 0b 5b af 8f e5 6e 81 a6 -0000350 a3 5a 6f 39 53 3a ab 7a 8b 1e 26 f7 46 6c 7d 26 -0000360 53 b3 22 31 94 d3 83 f2 18 4d f5 92 33 27 53 97 -0000370 0f d3 e6 55 9c a6 c5 31 87 6f d3 f3 ae 39 6f 56 -0000380 10 7b ab 7e d0 b4 ca f2 b8 05 be 3f 0e 6e 5a 75 -0000390 ab 0c f5 37 0e ba 8e 75 71 7a aa ed 7a dd 6a 63 -00003a0 be 9b a0 97 27 6a 6f e7 d3 8b c4 7c ec d3 91 56 -00003b0 d9 ac 5e bf 16 42 2f 00 1f 93 a2 23 87 bd e2 59 -00003c0 a0 de 1a 66 c8 62 eb 55 8f 91 17 b4 61 42 7a 50 -00003d0 40 03 34 40 03 34 40 03 34 40 03 34 40 03 34 40 -00003e0 03 34 40 03 34 40 03 34 40 03 34 40 03 34 40 03 -00003f0 34 40 03 34 40 03 34 ff 85 86 90 8b ea 67 90 0d -0000400 e1 42 1b d2 61 d6 79 ec fd 3e 44 28 a4 51 6c 5c -0000410 fc d2 72 ca ba 82 18 46 16 61 cd 93 a9 0f d1 24 -0000420 17 99 e2 2c 71 16 84 0c c8 7a 13 0f 9a 5e c5 f0 -0000430 79 64 e2 12 4d c8 82 a1 81 19 2d aa 44 6d 87 54 -0000440 84 71 c1 f6 d4 ca 25 8c 77 b9 08 c7 c8 5e 10 8a -0000450 8f 61 ed 8c ba 30 1f 79 9a c7 60 34 2b b9 8c f8 -0000460 18 a6 83 1b e3 9f ad 79 fe fd 1b 8b f1 fc 41 6f -0000470 d4 13 1f e3 b8 83 ba 64 92 e7 eb e4 77 05 8f ba -0000480 fa 3b 00 00 ff ff 50 4b 07 08 a6 18 b1 91 5e 04 -0000490 00 00 e4 47 00 00 50 4b 01 02 14 00 14 00 08 00 -00004a0 08 00 00 00 00 00 a6 18 b1 91 5e 04 00 00 e4 47 -00004b0 00 00 0a 00 00 00 00 00 00 00 00 00 00 00 00 00 -00004c0 00 00 00 00 62 69 67 67 65 72 2e 7a 69 70 50 4b -00004d0 05 06 00 00 00 00 01 00 01 00 38 00 00 00 96 04 -00004e0 00 00 00 00` - s = regexp.MustCompile(`[0-9a-f]{7}`).ReplaceAllString(s, "") - s = regexp.MustCompile(`\s+`).ReplaceAllString(s, "") - b, err := hex.DecodeString(s) - if err != nil { - panic(err) - } - return b -} - -func returnBigZipBytes() (r io.ReaderAt, size int64) { - b := biggestZipBytes() - for i := 0; i < 2; i++ { - r, err := NewReader(bytes.NewReader(b), int64(len(b))) - if err != nil { - panic(err) - } - f, err := r.File[0].Open() - if err != nil { - panic(err) - } - b, err = ioutil.ReadAll(f) - if err != nil { - panic(err) - } - } - return bytes.NewReader(b), int64(len(b)) -} - -func TestIssue8186(t *testing.T) { - // Directory headers & data found in the TOC of a JAR file. - dirEnts := []string{ - "PK\x01\x02\n\x00\n\x00\x00\b\x00\x004\x9d3?\xaa\x1b\x06\xf0\x81\x02\x00\x00\x81\x02\x00\x00-\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00res/drawable-xhdpi-v4/ic_actionbar_accept.png\xfe\xca\x00\x00\x00", - "PK\x01\x02\n\x00\n\x00\x00\b\x00\x004\x9d3?\x90K\x89\xc7t\n\x00\x00t\n\x00\x00\x0e\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd1\x02\x00\x00resources.arsc\x00\x00\x00", - "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?\xff$\x18\xed3\x03\x00\x00\xb4\b\x00\x00\x13\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00t\r\x00\x00AndroidManifest.xml", - "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?\x14\xc5K\xab\x192\x02\x00\xc8\xcd\x04\x00\v\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe8\x10\x00\x00classes.dex", - "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?E\x96\nD\xac\x01\x00\x00P\x03\x00\x00&\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00:C\x02\x00res/layout/actionbar_set_wallpaper.xml", - "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?Ļ\x14\xe3\xd8\x01\x00\x00\xd8\x03\x00\x00 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00:E\x02\x00res/layout/wallpaper_cropper.xml", - "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?}\xc1\x15\x9eZ\x01\x00\x00!\x02\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00`G\x02\x00META-INF/MANIFEST.MF", - "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?\xe6\x98Ьo\x01\x00\x00\x84\x02\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfcH\x02\x00META-INF/CERT.SF", - "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?\xbfP\x96b\x86\x04\x00\x00\xb2\x06\x00\x00\x11\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa9J\x02\x00META-INF/CERT.RSA", - } - for i, s := range dirEnts { - var f File - err := readDirectoryHeader(&f, strings.NewReader(s)) - if err != nil { - t.Errorf("error reading #%d: %v", i, err) - } - } -} - -// Verify we return ErrUnexpectedEOF when length is short. -func TestIssue10957(t *testing.T) { - data := []byte("PK\x03\x040000000PK\x01\x0200000" + - "0000000000000000000\x00" + - "\x00\x00\x00\x00\x00000000000000PK\x01" + - "\x020000000000000000000" + - "00000\v\x00\x00\x00\x00\x00000000000" + - "00000000000000PK\x01\x0200" + - "00000000000000000000" + - "00\v\x00\x00\x00\x00\x00000000000000" + - "00000000000PK\x01\x020000<" + - "0\x00\x0000000000000000\v\x00\v" + - "\x00\x00\x00\x00\x0000000000\x00\x00\x00\x00000" + - "00000000PK\x01\x0200000000" + - "0000000000000000\v\x00\x00\x00" + - "\x00\x0000PK\x05\x06000000\x05\x000000" + - "\v\x00\x00\x00\x00\x00") - z, err := NewReader(bytes.NewReader(data), int64(len(data))) - if err != nil { - t.Fatal(err) - } - for i, f := range z.File { - r, err := f.Open() - if err != nil { - continue - } - if f.UncompressedSize64 < 1e6 { - n, err := io.Copy(ioutil.Discard, r) - if i == 3 && err != io.ErrUnexpectedEOF { - t.Errorf("File[3] error = %v; want io.ErrUnexpectedEOF", err) - } - if err == nil && uint64(n) != f.UncompressedSize64 { - t.Errorf("file %d: bad size: copied=%d; want=%d", i, n, f.UncompressedSize64) - } - } - r.Close() - } -} - -// Verify the number of files is sane. -func TestIssue10956(t *testing.T) { - data := []byte("PK\x06\x06PK\x06\a0000\x00\x00\x00\x00\x00\x00\x00\x00" + - "0000PK\x05\x06000000000000" + - "0000\v\x00000\x00\x00\x00\x00\x00\x00\x000") - _, err := NewReader(bytes.NewReader(data), int64(len(data))) - const want = "TOC declares impossible 3472328296227680304 files in 57 byte" - if err == nil && !strings.Contains(err.Error(), want) { - t.Errorf("error = %v; want %q", err, want) - } -} - -// Verify we return ErrUnexpectedEOF when reading truncated data descriptor. -func TestIssue11146(t *testing.T) { - data := []byte("PK\x03\x040000000000000000" + - "000000\x01\x00\x00\x000\x01\x00\x00\xff\xff0000" + - "0000000000000000PK\x01\x02" + - "0000\b0\b\x00000000000000" + - "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x000000PK\x05\x06\x00\x00" + - "\x00\x0000\x01\x0000008\x00\x00\x00\x00\x00") - z, err := NewReader(bytes.NewReader(data), int64(len(data))) - if err != nil { - t.Fatal(err) - } - r, err := z.File[0].Open() - if err != nil { - t.Fatal(err) - } - _, err = ioutil.ReadAll(r) - if err != io.ErrUnexpectedEOF { - t.Errorf("File[0] error = %v; want io.ErrUnexpectedEOF", err) - } - r.Close() -} - -// Verify we do not treat non-zip64 archives as zip64 -func TestIssue12449(t *testing.T) { - data := []byte{ - 0x50, 0x4b, 0x03, 0x04, 0x14, 0x00, 0x08, 0x00, - 0x00, 0x00, 0x6b, 0xb4, 0xba, 0x46, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x03, 0x00, 0x18, 0x00, 0xca, 0x64, - 0x55, 0x75, 0x78, 0x0b, 0x00, 0x50, 0x4b, 0x05, - 0x06, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, - 0x00, 0x49, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, - 0x00, 0x31, 0x31, 0x31, 0x32, 0x32, 0x32, 0x0a, - 0x50, 0x4b, 0x07, 0x08, 0x1d, 0x88, 0x77, 0xb0, - 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x50, 0x4b, 0x01, 0x02, 0x14, 0x03, 0x14, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x6b, 0xb4, 0xba, 0x46, - 0x1d, 0x88, 0x77, 0xb0, 0x07, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x18, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xa0, 0x81, 0x00, 0x00, 0x00, 0x00, 0xca, 0x64, - 0x55, 0x75, 0x78, 0x0b, 0x00, 0x50, 0x4b, 0x05, - 0x06, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, - 0x00, 0x49, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, - 0x00, 0x97, 0x2b, 0x49, 0x23, 0x05, 0xc5, 0x0b, - 0xa7, 0xd1, 0x52, 0xa2, 0x9c, 0x50, 0x4b, 0x06, - 0x07, 0xc8, 0x19, 0xc1, 0xaf, 0x94, 0x9c, 0x61, - 0x44, 0xbe, 0x94, 0x19, 0x42, 0x58, 0x12, 0xc6, - 0x5b, 0x50, 0x4b, 0x05, 0x06, 0x00, 0x00, 0x00, - 0x00, 0x01, 0x00, 0x01, 0x00, 0x69, 0x00, 0x00, - 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, - } - // Read in the archive. - _, err := NewReader(bytes.NewReader([]byte(data)), int64(len(data))) - if err != nil { - t.Errorf("Error reading the archive: %v", err) - } -} diff --git a/src/inpxcreator/internal/zip/register.go b/src/inpxcreator/internal/zip/register.go index 8fccbf7..4389246 100644 --- a/src/inpxcreator/internal/zip/register.go +++ b/src/inpxcreator/internal/zip/register.go @@ -8,7 +8,6 @@ import ( "compress/flate" "errors" "io" - "io/ioutil" "sync" ) @@ -64,52 +63,85 @@ func (w *pooledFlateWriter) Close() error { return err } -var ( - mu sync.RWMutex // guards compressor and decompressor maps +var flateReaderPool sync.Pool - compressors = map[uint16]Compressor{ - Store: func(w io.Writer) (io.WriteCloser, error) { return &nopCloser{w}, nil }, - Deflate: func(w io.Writer) (io.WriteCloser, error) { return newFlateWriter(w), nil }, +func newFlateReader(r io.Reader) io.ReadCloser { + fr, ok := flateReaderPool.Get().(io.ReadCloser) + if ok { + fr.(flate.Resetter).Reset(r, nil) + } else { + fr = flate.NewReader(r) } + return &pooledFlateReader{fr: fr} +} + +type pooledFlateReader struct { + mu sync.Mutex // guards Close and Read + fr io.ReadCloser +} - decompressors = map[uint16]Decompressor{ - Store: ioutil.NopCloser, - Deflate: flate.NewReader, +func (r *pooledFlateReader) Read(p []byte) (n int, err error) { + r.mu.Lock() + defer r.mu.Unlock() + if r.fr == nil { + return 0, errors.New("Read after Close") } + return r.fr.Read(p) +} + +func (r *pooledFlateReader) Close() error { + r.mu.Lock() + defer r.mu.Unlock() + var err error + if r.fr != nil { + err = r.fr.Close() + flateReaderPool.Put(r.fr) + r.fr = nil + } + return err +} + +var ( + compressors sync.Map // map[uint16]Compressor + decompressors sync.Map // map[uint16]Decompressor ) +func init() { + compressors.Store(Store, Compressor(func(w io.Writer) (io.WriteCloser, error) { return &nopCloser{w}, nil })) + compressors.Store(Deflate, Compressor(func(w io.Writer) (io.WriteCloser, error) { return newFlateWriter(w), nil })) + + decompressors.Store(Store, Decompressor(io.NopCloser)) + decompressors.Store(Deflate, Decompressor(newFlateReader)) +} + // RegisterDecompressor allows custom decompressors for a specified method ID. // The common methods Store and Deflate are built in. func RegisterDecompressor(method uint16, dcomp Decompressor) { - mu.Lock() - defer mu.Unlock() - - if _, ok := decompressors[method]; ok { + if _, dup := decompressors.LoadOrStore(method, dcomp); dup { panic("decompressor already registered") } - decompressors[method] = dcomp } // RegisterCompressor registers custom compressors for a specified method ID. // The common methods Store and Deflate are built in. func RegisterCompressor(method uint16, comp Compressor) { - mu.Lock() - defer mu.Unlock() - - if _, ok := compressors[method]; ok { + if _, dup := compressors.LoadOrStore(method, comp); dup { panic("compressor already registered") } - compressors[method] = comp } func compressor(method uint16) Compressor { - mu.RLock() - defer mu.RUnlock() - return compressors[method] + ci, ok := compressors.Load(method) + if !ok { + return nil + } + return ci.(Compressor) } func decompressor(method uint16) Decompressor { - mu.RLock() - defer mu.RUnlock() - return decompressors[method] + di, ok := decompressors.Load(method) + if !ok { + return nil + } + return di.(Decompressor) } diff --git a/src/inpxcreator/internal/zip/struct.go b/src/inpxcreator/internal/zip/struct.go index 5ee4f88..9c37084 100644 --- a/src/inpxcreator/internal/zip/struct.go +++ b/src/inpxcreator/internal/zip/struct.go @@ -5,7 +5,7 @@ /* Package zip provides support for reading and writing ZIP archives. -See: http://www.pkware.com/documents/casestudies/APPNOTE.TXT +See the [ZIP specification] for details. This package does not support disk spanning. @@ -16,19 +16,21 @@ fields. The 64 bit fields will always contain the correct value and for normal archives both fields will be the same. For files requiring the ZIP64 format the 32 bit fields will be 0xffffffff and the 64 bit fields must be used instead. + +[ZIP specification]: https://www.pkware.com/appnote */ package zip import ( - "os" + "io/fs" "path" "time" ) // Compression methods. const ( - Store uint16 = 0 - Deflate uint16 = 8 + Store uint16 = 0 // no compression + Deflate uint16 = 8 // DEFLATE compressed ) const ( @@ -42,60 +44,128 @@ const ( directoryHeaderLen = 46 // + filename + extra + comment directoryEndLen = 22 // + comment dataDescriptorLen = 16 // four uint32: descriptor signature, crc32, compressed size, size - dataDescriptor64Len = 24 // descriptor with 8 byte sizes + dataDescriptor64Len = 24 // two uint32: signature, crc32 | two uint64: compressed size, size directory64LocLen = 20 // directory64EndLen = 56 // + extra - // Constants for the first byte in CreatorVersion + // Constants for the first byte in CreatorVersion. creatorFAT = 0 creatorUnix = 3 creatorNTFS = 11 creatorVFAT = 14 creatorMacOSX = 19 - // version numbers + // Version numbers. zipVersion20 = 20 // 2.0 zipVersion45 = 45 // 4.5 (reads and writes zip64 archives) - // limits for non zip64 files + // Limits for non zip64 files. uint16max = (1 << 16) - 1 uint32max = (1 << 32) - 1 - // extra header id's - zip64ExtraId = 0x0001 // zip64 Extended Information Extra Field + // Extra header IDs. + // + // IDs 0..31 are reserved for official use by PKWARE. + // IDs above that range are defined by third-party vendors. + // Since ZIP lacked high precision timestamps (nor a official specification + // of the timezone used for the date fields), many competing extra fields + // have been invented. Pervasive use effectively makes them "official". + // + // See http://mdfs.net/Docs/Comp/Archiving/Zip/ExtraField + zip64ExtraID = 0x0001 // Zip64 extended information + ntfsExtraID = 0x000a // NTFS + unixExtraID = 0x000d // UNIX + extTimeExtraID = 0x5455 // Extended timestamp + infoZipUnixExtraID = 0x5855 // Info-ZIP Unix extension ) -// FileHeader describes a file within a zip file. -// See the zip spec for details. +// FileHeader describes a file within a ZIP file. +// See the [ZIP specification] for details. +// +// [ZIP specification]: https://www.pkware.com/appnote type FileHeader struct { // Name is the name of the file. - // It must be a relative path: it must not start with a drive - // letter (e.g. C:) or leading slash, and only forward slashes - // are allowed. + // + // It must be a relative path, not start with a drive letter (such as "C:"), + // and must use forward slashes instead of back slashes. A trailing slash + // indicates that this file is a directory and should have no data. Name string - CreatorVersion uint16 - ReaderVersion uint16 - Flags uint16 - Method uint16 - ModifiedTime uint16 // MS-DOS time - ModifiedDate uint16 // MS-DOS date - CRC32 uint32 - CompressedSize uint32 // Deprecated: Use CompressedSize64 instead. - UncompressedSize uint32 // Deprecated: Use UncompressedSize64 instead. - CompressedSize64 uint64 + // Comment is any arbitrary user-defined string shorter than 64KiB. + Comment string + + // NonUTF8 indicates that Name and Comment are not encoded in UTF-8. + // + // By specification, the only other encoding permitted should be CP-437, + // but historically many ZIP readers interpret Name and Comment as whatever + // the system's local character encoding happens to be. + // + // This flag should only be set if the user intends to encode a non-portable + // ZIP file for a specific localized region. Otherwise, the Writer + // automatically sets the ZIP format's UTF-8 flag for valid UTF-8 strings. + NonUTF8 bool + + CreatorVersion uint16 + ReaderVersion uint16 + Flags uint16 + + // Method is the compression method. If zero, Store is used. + Method uint16 + + // Modified is the modified time of the file. + // + // When reading, an extended timestamp is preferred over the legacy MS-DOS + // date field, and the offset between the times is used as the timezone. + // If only the MS-DOS date is present, the timezone is assumed to be UTC. + // + // When writing, an extended timestamp (which is timezone-agnostic) is + // always emitted. The legacy MS-DOS date field is encoded according to the + // location of the Modified time. + Modified time.Time + + // ModifiedTime is an MS-DOS-encoded time. + // + // Deprecated: Use Modified instead. + ModifiedTime uint16 + + // ModifiedDate is an MS-DOS-encoded date. + // + // Deprecated: Use Modified instead. + ModifiedDate uint16 + + // CRC32 is the CRC32 checksum of the file content. + CRC32 uint32 + + // CompressedSize is the compressed size of the file in bytes. + // If either the uncompressed or compressed size of the file + // does not fit in 32 bits, CompressedSize is set to ^uint32(0). + // + // Deprecated: Use CompressedSize64 instead. + CompressedSize uint32 + + // UncompressedSize is the compressed size of the file in bytes. + // If either the uncompressed or compressed size of the file + // does not fit in 32 bits, CompressedSize is set to ^uint32(0). + // + // Deprecated: Use UncompressedSize64 instead. + UncompressedSize uint32 + + // CompressedSize64 is the compressed size of the file in bytes. + CompressedSize64 uint64 + + // UncompressedSize64 is the uncompressed size of the file in bytes. UncompressedSize64 uint64 - Extra []byte - ExternalAttrs uint32 // Meaning depends on CreatorVersion - Comment string + + Extra []byte + ExternalAttrs uint32 // Meaning depends on CreatorVersion } -// FileInfo returns an os.FileInfo for the FileHeader. -func (h *FileHeader) FileInfo() os.FileInfo { +// FileInfo returns an fs.FileInfo for the FileHeader. +func (h *FileHeader) FileInfo() fs.FileInfo { return headerFileInfo{h} } -// headerFileInfo implements os.FileInfo. +// headerFileInfo implements fs.FileInfo. type headerFileInfo struct { fh *FileHeader } @@ -107,17 +177,27 @@ func (fi headerFileInfo) Size() int64 { } return int64(fi.fh.UncompressedSize) } -func (fi headerFileInfo) IsDir() bool { return fi.Mode().IsDir() } -func (fi headerFileInfo) ModTime() time.Time { return fi.fh.ModTime() } -func (fi headerFileInfo) Mode() os.FileMode { return fi.fh.Mode() } -func (fi headerFileInfo) Sys() interface{} { return fi.fh } +func (fi headerFileInfo) IsDir() bool { return fi.Mode().IsDir() } +func (fi headerFileInfo) ModTime() time.Time { + if fi.fh.Modified.IsZero() { + return fi.fh.ModTime() + } + return fi.fh.Modified.UTC() +} +func (fi headerFileInfo) Mode() fs.FileMode { return fi.fh.Mode() } +func (fi headerFileInfo) Type() fs.FileMode { return fi.fh.Mode().Type() } +func (fi headerFileInfo) Sys() any { return fi.fh } + +func (fi headerFileInfo) Info() (fs.FileInfo, error) { return fi, nil } // FileInfoHeader creates a partially-populated FileHeader from an -// os.FileInfo. -// Because os.FileInfo's Name method returns only the base name of +// fs.FileInfo. +// Because fs.FileInfo's Name method returns only the base name of // the file it describes, it may be necessary to modify the Name field // of the returned header to provide the full path name of the file. -func FileInfoHeader(fi os.FileInfo) (*FileHeader, error) { +// If compression is desired, callers should set the FileHeader.Method +// field; it is unset by default. +func FileInfoHeader(fi fs.FileInfo) (*FileHeader, error) { size := fi.Size() fh := &FileHeader{ Name: fi.Name(), @@ -144,9 +224,24 @@ type directoryEnd struct { comment string } +// timeZone returns a *time.Location based on the provided offset. +// If the offset is non-sensible, then this uses an offset of zero. +func timeZone(offset time.Duration) *time.Location { + const ( + minOffset = -12 * time.Hour // E.g., Baker island at -12:00 + maxOffset = +14 * time.Hour // E.g., Line island at +14:00 + offsetAlias = 15 * time.Minute // E.g., Nepal at +5:45 + ) + offset = offset.Round(offsetAlias) + if offset < minOffset || maxOffset < offset { + offset = 0 + } + return time.FixedZone("", int(offset/time.Second)) +} + // msDosTimeToTime converts an MS-DOS date and time into a time.Time. // The resolution is 2s. -// See: http://msdn.microsoft.com/en-us/library/ms724247(v=VS.85).aspx +// See: https://msdn.microsoft.com/en-us/library/ms724247(v=VS.85).aspx func msDosTimeToTime(dosDate, dosTime uint16) time.Time { return time.Date( // date bits 0-4: day of month; 5-8: month; 9-15: years since 1980 @@ -166,23 +261,28 @@ func msDosTimeToTime(dosDate, dosTime uint16) time.Time { // timeToMsDosTime converts a time.Time to an MS-DOS date and time. // The resolution is 2s. -// See: http://msdn.microsoft.com/en-us/library/ms724274(v=VS.85).aspx +// See: https://msdn.microsoft.com/en-us/library/ms724274(v=VS.85).aspx func timeToMsDosTime(t time.Time) (fDate uint16, fTime uint16) { - t = t.In(time.UTC) fDate = uint16(t.Day() + int(t.Month())<<5 + (t.Year()-1980)<<9) fTime = uint16(t.Second()/2 + t.Minute()<<5 + t.Hour()<<11) return } -// ModTime returns the modification time in UTC. -// The resolution is 2s. +// ModTime returns the modification time in UTC using the legacy +// ModifiedDate and ModifiedTime fields. +// +// Deprecated: Use Modified instead. func (h *FileHeader) ModTime() time.Time { return msDosTimeToTime(h.ModifiedDate, h.ModifiedTime) } -// SetModTime sets the ModifiedTime and ModifiedDate fields to the given time in UTC. -// The resolution is 2s. +// SetModTime sets the Modified, ModifiedTime, and ModifiedDate fields +// to the given time in UTC. +// +// Deprecated: Use Modified instead. func (h *FileHeader) SetModTime(t time.Time) { + t = t.UTC() // Convert to UTC for compatibility + h.Modified = t h.ModifiedDate, h.ModifiedTime = timeToMsDosTime(t) } @@ -206,7 +306,7 @@ const ( ) // Mode returns the permission and mode bits for the FileHeader. -func (h *FileHeader) Mode() (mode os.FileMode) { +func (h *FileHeader) Mode() (mode fs.FileMode) { switch h.CreatorVersion >> 8 { case creatorUnix, creatorMacOSX: mode = unixModeToFileMode(h.ExternalAttrs >> 16) @@ -214,18 +314,18 @@ func (h *FileHeader) Mode() (mode os.FileMode) { mode = msdosModeToFileMode(h.ExternalAttrs) } if len(h.Name) > 0 && h.Name[len(h.Name)-1] == '/' { - mode |= os.ModeDir + mode |= fs.ModeDir } return mode } // SetMode changes the permission and mode bits for the FileHeader. -func (h *FileHeader) SetMode(mode os.FileMode) { +func (h *FileHeader) SetMode(mode fs.FileMode) { h.CreatorVersion = h.CreatorVersion&0xff | creatorUnix<<8 h.ExternalAttrs = fileModeToUnixMode(mode) << 16 // set MSDOS attributes too, as the original zip does. - if mode&os.ModeDir != 0 { + if mode&fs.ModeDir != 0 { h.ExternalAttrs |= msdosDir } if mode&0200 == 0 { @@ -234,13 +334,17 @@ func (h *FileHeader) SetMode(mode os.FileMode) { } // isZip64 reports whether the file size exceeds the 32 bit limit -func (fh *FileHeader) isZip64() bool { - return fh.CompressedSize64 >= uint32max || fh.UncompressedSize64 >= uint32max +func (h *FileHeader) isZip64() bool { + return h.CompressedSize64 >= uint32max || h.UncompressedSize64 >= uint32max +} + +func (f *FileHeader) hasDataDescriptor() bool { + return f.Flags&0x8 != 0 } -func msdosModeToFileMode(m uint32) (mode os.FileMode) { +func msdosModeToFileMode(m uint32) (mode fs.FileMode) { if m&msdosDir != 0 { - mode = os.ModeDir | 0777 + mode = fs.ModeDir | 0777 } else { mode = 0666 } @@ -250,64 +354,62 @@ func msdosModeToFileMode(m uint32) (mode os.FileMode) { return mode } -func fileModeToUnixMode(mode os.FileMode) uint32 { +func fileModeToUnixMode(mode fs.FileMode) uint32 { var m uint32 - switch mode & os.ModeType { + switch mode & fs.ModeType { default: m = s_IFREG - case os.ModeDir: + case fs.ModeDir: m = s_IFDIR - case os.ModeSymlink: + case fs.ModeSymlink: m = s_IFLNK - case os.ModeNamedPipe: + case fs.ModeNamedPipe: m = s_IFIFO - case os.ModeSocket: + case fs.ModeSocket: m = s_IFSOCK - case os.ModeDevice: - if mode&os.ModeCharDevice != 0 { - m = s_IFCHR - } else { - m = s_IFBLK - } + case fs.ModeDevice: + m = s_IFBLK + case fs.ModeDevice | fs.ModeCharDevice: + m = s_IFCHR } - if mode&os.ModeSetuid != 0 { + if mode&fs.ModeSetuid != 0 { m |= s_ISUID } - if mode&os.ModeSetgid != 0 { + if mode&fs.ModeSetgid != 0 { m |= s_ISGID } - if mode&os.ModeSticky != 0 { + if mode&fs.ModeSticky != 0 { m |= s_ISVTX } return m | uint32(mode&0777) } -func unixModeToFileMode(m uint32) os.FileMode { - mode := os.FileMode(m & 0777) +func unixModeToFileMode(m uint32) fs.FileMode { + mode := fs.FileMode(m & 0777) switch m & s_IFMT { case s_IFBLK: - mode |= os.ModeDevice + mode |= fs.ModeDevice case s_IFCHR: - mode |= os.ModeDevice | os.ModeCharDevice + mode |= fs.ModeDevice | fs.ModeCharDevice case s_IFDIR: - mode |= os.ModeDir + mode |= fs.ModeDir case s_IFIFO: - mode |= os.ModeNamedPipe + mode |= fs.ModeNamedPipe case s_IFLNK: - mode |= os.ModeSymlink + mode |= fs.ModeSymlink case s_IFREG: // nothing to do case s_IFSOCK: - mode |= os.ModeSocket + mode |= fs.ModeSocket } if m&s_ISGID != 0 { - mode |= os.ModeSetgid + mode |= fs.ModeSetgid } if m&s_ISUID != 0 { - mode |= os.ModeSetuid + mode |= fs.ModeSetuid } if m&s_ISVTX != 0 { - mode |= os.ModeSticky + mode |= fs.ModeSticky } return mode } diff --git a/src/inpxcreator/internal/zip/writer.go b/src/inpxcreator/internal/zip/writer.go index 534ee35..e1cd49c 100644 --- a/src/inpxcreator/internal/zip/writer.go +++ b/src/inpxcreator/internal/zip/writer.go @@ -11,9 +11,14 @@ import ( "hash" "hash/crc32" "io" + "strings" + "unicode/utf8" ) -// TODO(adg): support zip file comments +var ( + errLongName = errors.New("zip: FileHeader.Name too long") + errLongExtra = errors.New("zip: FileHeader.Extra too long") +) // Writer implements a zip file writer. type Writer struct { @@ -22,12 +27,20 @@ type Writer struct { last *fileWriter closed bool compressors map[uint16]Compressor - names map[string]int // filename -> index in dir slice. + comment string + + // To support appending writer we need filename -> index in dir slice. + names map[string]int + + // testHookCloseSizeOffset if non-nil is called with the size + // of offset of the central directory at Close. + testHookCloseSizeOffset func(size, offset uint64) } type header struct { *FileHeader offset uint64 + raw bool } // NewWriter returns a new Writer writing a zip file to w. @@ -71,8 +84,18 @@ func (w *Writer) Flush() error { return w.cw.w.(*bufio.Writer).Flush() } +// SetComment sets the end-of-central-directory comment field. +// It can only be called before Close. +func (w *Writer) SetComment(comment string) error { + if len(comment) > uint16max { + return errors.New("zip: Writer.Comment too long") + } + w.comment = comment + return nil +} + // Close finishes writing the zip file by writing the central directory. -// It does not (and can not) close the underlying writer. +// It does not close the underlying writer. func (w *Writer) Close() error { if w.last != nil && !w.last.closed { if err := w.last.close(); err != nil { @@ -87,14 +110,7 @@ func (w *Writer) Close() error { // write central directory start := w.cw.count - records := uint64(0) for _, h := range w.dir { - if h.FileHeader == nil { - // This entry has been superceded by a later - // appended entry. - continue - } - records++ var buf [directoryHeaderLen]byte b := writeBuf(buf[:]) b.uint32(uint32(directoryHeaderSignature)) @@ -115,7 +131,7 @@ func (w *Writer) Close() error { // append a zip64 extra block to Extra var buf [28]byte // 2x uint16 + 3x uint64 eb := writeBuf(buf[:]) - eb.uint16(zip64ExtraId) + eb.uint16(zip64ExtraID) eb.uint16(24) // size = 3x uint64 eb.uint64(h.UncompressedSize64) eb.uint64(h.CompressedSize64) @@ -125,6 +141,7 @@ func (w *Writer) Close() error { b.uint32(h.CompressedSize) b.uint32(h.UncompressedSize) } + b.uint16(uint16(len(h.Name))) b.uint16(uint16(len(h.Extra))) b.uint16(uint16(len(h.Comment))) @@ -150,10 +167,15 @@ func (w *Writer) Close() error { } end := w.cw.count + records := uint64(len(w.dir)) size := uint64(end - start) offset := uint64(start) - if records > uint16max || size > uint32max || offset > uint32max { + if f := w.testHookCloseSizeOffset; f != nil { + f(size, offset) + } + + if records >= uint16max || size >= uint32max || offset >= uint32max { var buf [directory64EndLen + directory64LocLen]byte b := writeBuf(buf[:]) @@ -179,7 +201,7 @@ func (w *Writer) Close() error { return err } - // store max values in the regular end record to signal that + // store max values in the regular end record to signal // that the zip64 values should be used instead records = uint16max size = uint32max @@ -190,24 +212,29 @@ func (w *Writer) Close() error { var buf [directoryEndLen]byte b := writeBuf(buf[:]) b.uint32(uint32(directoryEndSignature)) - b = b[4:] // skip over disk number and first disk number (2x uint16) - b.uint16(uint16(records)) // number of entries this disk - b.uint16(uint16(records)) // number of entries total - b.uint32(uint32(size)) // size of directory - b.uint32(uint32(offset)) // start of directory - // skipped size of comment (always zero) + b = b[4:] // skip over disk number and first disk number (2x uint16) + b.uint16(uint16(records)) // number of entries this disk + b.uint16(uint16(records)) // number of entries total + b.uint32(uint32(size)) // size of directory + b.uint32(uint32(offset)) // start of directory + b.uint16(uint16(len(w.comment))) // byte size of EOCD comment if _, err := w.cw.Write(buf[:]); err != nil { return err } + if _, err := io.WriteString(w.cw, w.comment); err != nil { + return err + } return w.cw.w.(*bufio.Writer).Flush() } // Create adds a file to the zip file using the provided name. // It returns a Writer to which the file contents should be written. +// The file contents will be compressed using the Deflate method. // The name must be a relative path: it must not start with a drive // letter (e.g. C:) or leading slash, and only forward slashes are -// allowed. +// allowed. To create a directory instead of a file, add a trailing +// slash to the name. // The file's contents must be written to the io.Writer before the next // call to Create, CreateHeader, or Close. func (w *Writer) Create(name string) (io.Writer, error) { @@ -218,30 +245,42 @@ func (w *Writer) Create(name string) (io.Writer, error) { return w.CreateHeader(header) } -func (w *Writer) closeLastWriter() error { - if w.last != nil && !w.last.closed { - err := w.last.close() - w.last = nil - return err +// detectUTF8 reports whether s is a valid UTF-8 string, and whether the string +// must be considered UTF-8 encoding (i.e., not compatible with CP-437, ASCII, +// or any other common encoding). +func detectUTF8(s string) (valid, require bool) { + for i := 0; i < len(s); { + r, size := utf8.DecodeRuneInString(s[i:]) + i += size + // Officially, ZIP uses CP-437, but many readers use the system's + // local character encoding. Most encoding are compatible with a large + // subset of CP-437, which itself is ASCII-like. + // + // Forbid 0x7e and 0x5c since EUC-KR and Shift-JIS replace those + // characters with localized currency and overline characters. + if r < 0x20 || r > 0x7d || r == 0x5c { + if !utf8.ValidRune(r) || (r == utf8.RuneError && size == 1) { + return false, false + } + require = true + } } - return nil + return true, require } -// CreateHeader adds a file to the zip file using the provided FileHeader -// for the file metadata. -// It returns a Writer to which the file contents should be written. -// -// The file's contents must be written to the io.Writer before the next -// call to Create, CreateHeader, or Close. The provided FileHeader fh -// must not be modified after a call to CreateHeader. -func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) { - if err := w.closeLastWriter(); err != nil { - return nil, err +// prepare performs the bookkeeping operations required at the start of +// CreateHeader and CreateRaw. +func (w *Writer) prepare(fh *FileHeader) error { + if w.last != nil && !w.last.closed { + if err := w.last.close(); err != nil { + return err + } } if len(w.dir) > 0 && w.dir[len(w.dir)-1].FileHeader == fh { // See https://golang.org/issue/11144 confusion. - return nil, errors.New("archive/zip: invalid duplicate FileHeader") + return errors.New("archive/zip: invalid duplicate FileHeader") } + // appending writer support. if i, ok := w.names[fh.Name]; ok { // We're appending a file that existed already, // so clear out the old entry so that it won't @@ -249,75 +288,140 @@ func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) { w.dir[i].FileHeader = nil delete(w.names, fh.Name) } + return nil +} + +// CreateHeader adds a file to the zip archive using the provided FileHeader +// for the file metadata. Writer takes ownership of fh and may mutate +// its fields. The caller must not modify fh after calling CreateHeader. +// +// This returns a Writer to which the file contents should be written. +// The file's contents must be written to the io.Writer before the next +// call to Create, CreateHeader, CreateRaw, or Close. +func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) { + if err := w.prepare(fh); err != nil { + return nil, err + } - fh.Flags |= 0x8 // we will write a data descriptor + // The ZIP format has a sad state of affairs regarding character encoding. + // Officially, the name and comment fields are supposed to be encoded + // in CP-437 (which is mostly compatible with ASCII), unless the UTF-8 + // flag bit is set. However, there are several problems: + // + // * Many ZIP readers still do not support UTF-8. + // * If the UTF-8 flag is cleared, several readers simply interpret the + // name and comment fields as whatever the local system encoding is. + // + // In order to avoid breaking readers without UTF-8 support, + // we avoid setting the UTF-8 flag if the strings are CP-437 compatible. + // However, if the strings require multibyte UTF-8 encoding and is a + // valid UTF-8 string, then we set the UTF-8 bit. + // + // For the case, where the user explicitly wants to specify the encoding + // as UTF-8, they will need to set the flag bit themselves. + utf8Valid1, utf8Require1 := detectUTF8(fh.Name) + utf8Valid2, utf8Require2 := detectUTF8(fh.Comment) + switch { + case fh.NonUTF8: + fh.Flags &^= 0x800 + case (utf8Require1 || utf8Require2) && (utf8Valid1 && utf8Valid2): + fh.Flags |= 0x800 + } fh.CreatorVersion = fh.CreatorVersion&0xff00 | zipVersion20 // preserve compatibility byte fh.ReaderVersion = zipVersion20 - fw := &fileWriter{ - zipw: w.cw, - compCount: &countWriter{w: w.cw}, - crc32: crc32.NewIEEE(), - } - comp := w.compressor(fh.Method) - if comp == nil { - return nil, ErrAlgorithm - } - var err error - fw.comp, err = comp(fw.compCount) - if err != nil { - return nil, err - } - fw.rawCount = &countWriter{w: fw.comp} - + // If Modified is set, this takes precedence over MS-DOS timestamp fields. + if !fh.Modified.IsZero() { + // Contrary to the FileHeader.SetModTime method, we intentionally + // do not convert to UTC, because we assume the user intends to encode + // the date using the specified timezone. A user may want this control + // because many legacy ZIP readers interpret the timestamp according + // to the local timezone. + // + // The timezone is only non-UTC if a user directly sets the Modified + // field directly themselves. All other approaches sets UTC. + fh.ModifiedDate, fh.ModifiedTime = timeToMsDosTime(fh.Modified) + + // Use "extended timestamp" format since this is what Info-ZIP uses. + // Nearly every major ZIP implementation uses a different format, + // but at least most seem to be able to understand the other formats. + // + // This format happens to be identical for both local and central header + // if modification time is the only timestamp being encoded. + var mbuf [9]byte // 2*SizeOf(uint16) + SizeOf(uint8) + SizeOf(uint32) + mt := uint32(fh.Modified.Unix()) + eb := writeBuf(mbuf[:]) + eb.uint16(extTimeExtraID) + eb.uint16(5) // Size: SizeOf(uint8) + SizeOf(uint32) + eb.uint8(1) // Flags: ModTime + eb.uint32(mt) // ModTime + fh.Extra = append(fh.Extra, mbuf[:]...) + } + + var ( + ow io.Writer + fw *fileWriter + ) h := &header{ FileHeader: fh, offset: uint64(w.cw.count), } - w.dir = append(w.dir, h) - fw.header = h - if err := writeHeader(w.cw, fh); err != nil { - return nil, err - } + if strings.HasSuffix(fh.Name, "/") { + // Set the compression method to Store to ensure data length is truly zero, + // which the writeHeader method always encodes for the size fields. + // This is necessary as most compression formats have non-zero lengths + // even when compressing an empty string. + fh.Method = Store + fh.Flags &^= 0x8 // we will not write a data descriptor - w.last = fw - return fw, nil -} + // Explicitly clear sizes as they have no meaning for directories. + fh.CompressedSize = 0 + fh.CompressedSize64 = 0 + fh.UncompressedSize = 0 + fh.UncompressedSize64 = 0 -// Copy copies the file f (obtained from a Reader) into w. -// It copies the compressed form directly. -func (w *Writer) Copy(f *File) error { - dataOffset, err := f.DataOffset() - if err != nil { - return err - } - if err := w.closeLastWriter(); err != nil { - return err - } + ow = dirWriter{} + } else { + fh.Flags |= 0x8 // we will write a data descriptor - fh := f.FileHeader - h := &header{ - FileHeader: &fh, - offset: uint64(w.cw.count), + fw = &fileWriter{ + zipw: w.cw, + compCount: &countWriter{w: w.cw}, + crc32: crc32.NewIEEE(), + } + comp := w.compressor(fh.Method) + if comp == nil { + return nil, ErrAlgorithm + } + var err error + fw.comp, err = comp(fw.compCount) + if err != nil { + return nil, err + } + fw.rawCount = &countWriter{w: fw.comp} + fw.header = h + ow = fw } - fh.Flags |= 0x8 // we will write a data descriptor w.dir = append(w.dir, h) - - if err := writeHeader(w.cw, &fh); err != nil { - return err + if err := writeHeader(w.cw, h); err != nil { + return nil, err } + // If we're creating a directory, fw is nil. + w.last = fw + return ow, nil +} - r := io.NewSectionReader(f.zipr, dataOffset, int64(f.CompressedSize64)) - if _, err := io.Copy(w.cw, r); err != nil { - return err +func writeHeader(w io.Writer, h *header) error { + const maxUint16 = 1<<16 - 1 + if len(h.Name) > maxUint16 { + return errLongName + } + if len(h.Extra) > maxUint16 { + return errLongExtra } - return writeDesc(w.cw, &fh) -} - -func writeHeader(w io.Writer, h *FileHeader) error { var buf [fileHeaderLen]byte b := writeBuf(buf[:]) b.uint32(uint32(fileHeaderSignature)) @@ -326,9 +430,20 @@ func writeHeader(w io.Writer, h *FileHeader) error { b.uint16(h.Method) b.uint16(h.ModifiedTime) b.uint16(h.ModifiedDate) - b.uint32(0) // since we are writing a data descriptor crc32, - b.uint32(0) // compressed size, - b.uint32(0) // and uncompressed size should be zero + // In raw mode (caller does the compression), the values are either + // written here or in the trailing data descriptor based on the header + // flags. + if h.raw && !h.hasDataDescriptor() { + b.uint32(h.CRC32) + b.uint32(uint32(min64(h.CompressedSize64, uint32max))) + b.uint32(uint32(min64(h.UncompressedSize64, uint32max))) + } else { + // When this package handle the compression, these values are + // always written to the trailing data descriptor. + b.uint32(0) // crc32 + b.uint32(0) // compressed size + b.uint32(0) // uncompressed size + } b.uint16(uint16(len(h.Name))) b.uint16(uint16(len(h.Extra))) if _, err := w.Write(buf[:]); err != nil { @@ -341,6 +456,65 @@ func writeHeader(w io.Writer, h *FileHeader) error { return err } +func min64(x, y uint64) uint64 { + if x < y { + return x + } + return y +} + +// CreateRaw adds a file to the zip archive using the provided FileHeader and +// returns a Writer to which the file contents should be written. The file's +// contents must be written to the io.Writer before the next call to Create, +// CreateHeader, CreateRaw, or Close. +// +// In contrast to CreateHeader, the bytes passed to Writer are not compressed. +func (w *Writer) CreateRaw(fh *FileHeader) (io.Writer, error) { + if err := w.prepare(fh); err != nil { + return nil, err + } + + fh.CompressedSize = uint32(min64(fh.CompressedSize64, uint32max)) + fh.UncompressedSize = uint32(min64(fh.UncompressedSize64, uint32max)) + + h := &header{ + FileHeader: fh, + offset: uint64(w.cw.count), + raw: true, + } + w.dir = append(w.dir, h) + if err := writeHeader(w.cw, h); err != nil { + return nil, err + } + + if strings.HasSuffix(fh.Name, "/") { + w.last = nil + return dirWriter{}, nil + } + + fw := &fileWriter{ + header: h, + zipw: w.cw, + } + w.last = fw + return fw, nil +} + +// Copy copies the file f (obtained from a Reader) into w. It copies the raw +// form directly bypassing decompression, compression, and validation. +func (w *Writer) Copy(f *File) error { + r, err := f.OpenRaw() + if err != nil { + return err + } + fw, err := w.CreateRaw(&f.FileHeader) + if err != nil { + return err + } + _, err = io.Copy(fw, r) + return err +} + // RegisterCompressor registers or overrides a custom compressor for a specific // method ID. If a compressor for a given method is not found, Writer will // default to looking up the compressor at the package level. @@ -359,6 +533,15 @@ func (w *Writer) compressor(method uint16) Compressor { return comp } +type dirWriter struct{} + +func (dirWriter) Write(b []byte) (int, error) { + if len(b) == 0 { + return 0, nil + } + return 0, errors.New("zip: write to directory") +} + type fileWriter struct { *header zipw io.Writer @@ -373,6 +556,9 @@ func (w *fileWriter) Write(p []byte) (int, error) { if w.closed { return 0, errors.New("zip: write to closed file") } + if w.raw { + return w.zipw.Write(p) + } w.crc32.Write(p) return w.rawCount.Write(p) } @@ -382,6 +568,9 @@ func (w *fileWriter) close() error { return errors.New("zip: file closed twice") } w.closed = true + if w.raw { + return w.writeDataDescriptor() + } if err := w.comp.Close(); err != nil { return err } @@ -401,32 +590,35 @@ func (w *fileWriter) close() error { fh.UncompressedSize = uint32(fh.UncompressedSize64) } - return writeDesc(w.zipw, fh) + return w.writeDataDescriptor() } -func writeDesc(w io.Writer, fh *FileHeader) error { +func (w *fileWriter) writeDataDescriptor() error { + if !w.hasDataDescriptor() { + return nil + } // Write data descriptor. This is more complicated than one would // think, see e.g. comments in zipfile.c:putextended() and // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588. // The approach here is to write 8 byte sizes if needed without // adding a zip64 extra in the local header (too late anyway). var buf []byte - if fh.isZip64() { + if w.isZip64() { buf = make([]byte, dataDescriptor64Len) } else { buf = make([]byte, dataDescriptorLen) } b := writeBuf(buf) b.uint32(dataDescriptorSignature) // de-facto standard, required by OS X - b.uint32(fh.CRC32) - if fh.isZip64() { - b.uint64(fh.CompressedSize64) - b.uint64(fh.UncompressedSize64) + b.uint32(w.CRC32) + if w.isZip64() { + b.uint64(w.CompressedSize64) + b.uint64(w.UncompressedSize64) } else { - b.uint32(fh.CompressedSize) - b.uint32(fh.UncompressedSize) + b.uint32(w.CompressedSize) + b.uint32(w.UncompressedSize) } - _, err := w.Write(buf) + _, err := w.zipw.Write(buf) return err } @@ -451,6 +643,11 @@ func (w nopCloser) Close() error { type writeBuf []byte +func (b *writeBuf) uint8(v uint8) { + (*b)[0] = v + *b = (*b)[1:] +} + func (b *writeBuf) uint16(v uint16) { binary.LittleEndian.PutUint16(*b, v) *b = (*b)[2:] diff --git a/src/inpxcreator/internal/zip/writer_test.go b/src/inpxcreator/internal/zip/writer_test.go deleted file mode 100644 index 5c37cee..0000000 --- a/src/inpxcreator/internal/zip/writer_test.go +++ /dev/null @@ -1,291 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package zip - -import ( - "bytes" - "io" - "io/ioutil" - "math/rand" - "os" - "testing" -) - -// TODO(adg): a more sophisticated test suite - -type WriteTest struct { - Name string - Data []byte - Method uint16 - Mode os.FileMode -} - -var writeTests = []WriteTest{ - { - Name: "foo", - Data: []byte("Rabbits, guinea pigs, gophers, marsupial rats, and quolls."), - Method: Store, - Mode: 0666, - }, - { - Name: "bar", - Data: nil, // large data set in the test - Method: Deflate, - Mode: 0644, - }, - { - Name: "setuid", - Data: []byte("setuid file"), - Method: Deflate, - Mode: 0755 | os.ModeSetuid, - }, - { - Name: "setgid", - Data: []byte("setgid file"), - Method: Deflate, - Mode: 0755 | os.ModeSetgid, - }, - { - Name: "symlink", - Data: []byte("../link/target"), - Method: Deflate, - Mode: 0755 | os.ModeSymlink, - }, -} - -func TestWriter(t *testing.T) { - largeData := make([]byte, 1<<17) - for i := range largeData { - largeData[i] = byte(rand.Int()) - } - writeTests[1].Data = largeData - defer func() { - writeTests[1].Data = nil - }() - - // write a zip file - buf := new(bytes.Buffer) - w := NewWriter(buf) - - for _, wt := range writeTests { - testCreate(t, w, &wt) - } - - if err := w.Close(); err != nil { - t.Fatal(err) - } - - // read it back - r, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len())) - if err != nil { - t.Fatal(err) - } - for i, wt := range writeTests { - testReadFile(t, r.File[i], &wt) - } -} - -func TestWriterCopy(t *testing.T) { - // make a zip file - buf := new(bytes.Buffer) - w := NewWriter(buf) - for _, wt := range writeTests { - testCreate(t, w, &wt) - } - if err := w.Close(); err != nil { - t.Fatal(err) - } - - // read it back - src, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len())) - if err != nil { - t.Fatal(err) - } - for i, wt := range writeTests { - testReadFile(t, src.File[i], &wt) - } - - // make a new zip file copying the old compressed data. - buf2 := new(bytes.Buffer) - dst := NewWriter(buf2) - for _, f := range src.File { - if err := dst.Copy(f); err != nil { - t.Fatal(err) - } - } - if err := dst.Close(); err != nil { - t.Fatal(err) - } - - // read the new one back - r, err := NewReader(bytes.NewReader(buf2.Bytes()), int64(buf2.Len())) - if err != nil { - t.Fatal(err) - } - for i, wt := range writeTests { - testReadFile(t, r.File[i], &wt) - } -} - -func TestAppend(t *testing.T) { - // write a zip file - buf := new(bytes.Buffer) - w := NewWriter(buf) - - for _, wt := range writeTests { - testCreate(t, w, &wt) - } - - if err := w.Close(); err != nil { - t.Fatal(err) - } - - // read it back - r, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len())) - if err != nil { - t.Fatal(err) - } - - // append a file to it. - abuf := new(bytes.Buffer) - abuf.Write(buf.Bytes()[:r.AppendOffset()]) - w = r.Append(abuf) - - wt := WriteTest{ - Name: "foo", - Data: []byte("Badgers, canines, weasels, owls, and snakes"), - Method: Store, - Mode: 0755, - } - testCreate(t, w, &wt) - - if err := w.Close(); err != nil { - t.Fatal(err) - } - - // read the whole thing back. - allBytes := abuf.Bytes() - - r, err = NewReader(bytes.NewReader(allBytes), int64(len(allBytes))) - if err != nil { - t.Fatal(err) - } - - writeTests := append(writeTests[1:], wt) - for i, wt := range writeTests { - testReadFile(t, r.File[i], &wt) - } -} - -func TestWriterOffset(t *testing.T) { - largeData := make([]byte, 1<<17) - for i := range largeData { - largeData[i] = byte(rand.Int()) - } - writeTests[1].Data = largeData - defer func() { - writeTests[1].Data = nil - }() - - // write a zip file - buf := new(bytes.Buffer) - existingData := []byte{1, 2, 3, 1, 2, 3, 1, 2, 3} - n, _ := buf.Write(existingData) - w := NewWriter(buf) - w.SetOffset(int64(n)) - - for _, wt := range writeTests { - testCreate(t, w, &wt) - } - - if err := w.Close(); err != nil { - t.Fatal(err) - } - - // read it back - r, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len())) - if err != nil { - t.Fatal(err) - } - for i, wt := range writeTests { - testReadFile(t, r.File[i], &wt) - } -} - -func TestWriterFlush(t *testing.T) { - var buf bytes.Buffer - w := NewWriter(struct{ io.Writer }{&buf}) - _, err := w.Create("foo") - if err != nil { - t.Fatal(err) - } - if buf.Len() > 0 { - t.Fatalf("Unexpected %d bytes already in buffer", buf.Len()) - } - if err := w.Flush(); err != nil { - t.Fatal(err) - } - if buf.Len() == 0 { - t.Fatal("No bytes written after Flush") - } -} - -func testCreate(t *testing.T, w *Writer, wt *WriteTest) { - header := &FileHeader{ - Name: wt.Name, - Method: wt.Method, - } - if wt.Mode != 0 { - header.SetMode(wt.Mode) - } - f, err := w.CreateHeader(header) - if err != nil { - t.Fatal(err) - } - _, err = f.Write(wt.Data) - if err != nil { - t.Fatal(err) - } -} - -func testReadFile(t *testing.T, f *File, wt *WriteTest) { - if f.Name != wt.Name { - t.Fatalf("File name: got %q, want %q", f.Name, wt.Name) - } - testFileMode(t, wt.Name, f, wt.Mode) - rc, err := f.Open() - if err != nil { - t.Fatal("opening:", err) - } - b, err := ioutil.ReadAll(rc) - if err != nil { - t.Fatal("reading:", err) - } - err = rc.Close() - if err != nil { - t.Fatal("closing:", err) - } - if !bytes.Equal(b, wt.Data) { - t.Errorf("File contents %q, want %q", b, wt.Data) - } -} - -func BenchmarkCompressedZipGarbage(b *testing.B) { - b.ReportAllocs() - var buf bytes.Buffer - bigBuf := bytes.Repeat([]byte("a"), 1<<20) - for i := 0; i < b.N; i++ { - buf.Reset() - zw := NewWriter(&buf) - for j := 0; j < 3; j++ { - w, _ := zw.CreateHeader(&FileHeader{ - Name: "foo", - Method: Deflate, - }) - w.Write(bigBuf) - } - zw.Close() - } -} diff --git a/src/inpxcreator/internal/zip/zip_test.go b/src/inpxcreator/internal/zip/zip_test.go deleted file mode 100644 index f785abf..0000000 --- a/src/inpxcreator/internal/zip/zip_test.go +++ /dev/null @@ -1,471 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Tests that involve both reading and writing. - -package zip - -import ( - "bytes" - "fmt" - "hash" - "internal/testenv" - "io" - "io/ioutil" - "sort" - "strings" - "testing" - "time" -) - -func TestOver65kFiles(t *testing.T) { - if testing.Short() && testenv.Builder() == "" { - t.Skip("skipping in short mode") - } - buf := new(bytes.Buffer) - w := NewWriter(buf) - const nFiles = (1 << 16) + 42 - for i := 0; i < nFiles; i++ { - _, err := w.CreateHeader(&FileHeader{ - Name: fmt.Sprintf("%d.dat", i), - Method: Store, // avoid Issue 6136 and Issue 6138 - }) - if err != nil { - t.Fatalf("creating file %d: %v", i, err) - } - } - if err := w.Close(); err != nil { - t.Fatalf("Writer.Close: %v", err) - } - s := buf.String() - zr, err := NewReader(strings.NewReader(s), int64(len(s))) - if err != nil { - t.Fatalf("NewReader: %v", err) - } - if got := len(zr.File); got != nFiles { - t.Fatalf("File contains %d files, want %d", got, nFiles) - } - for i := 0; i < nFiles; i++ { - want := fmt.Sprintf("%d.dat", i) - if zr.File[i].Name != want { - t.Fatalf("File(%d) = %q, want %q", i, zr.File[i].Name, want) - } - } -} - -func TestModTime(t *testing.T) { - var testTime = time.Date(2009, time.November, 10, 23, 45, 58, 0, time.UTC) - fh := new(FileHeader) - fh.SetModTime(testTime) - outTime := fh.ModTime() - if !outTime.Equal(testTime) { - t.Errorf("times don't match: got %s, want %s", outTime, testTime) - } -} - -func testHeaderRoundTrip(fh *FileHeader, wantUncompressedSize uint32, wantUncompressedSize64 uint64, t *testing.T) { - fi := fh.FileInfo() - fh2, err := FileInfoHeader(fi) - if err != nil { - t.Fatal(err) - } - if got, want := fh2.Name, fh.Name; got != want { - t.Errorf("Name: got %s, want %s\n", got, want) - } - if got, want := fh2.UncompressedSize, wantUncompressedSize; got != want { - t.Errorf("UncompressedSize: got %d, want %d\n", got, want) - } - if got, want := fh2.UncompressedSize64, wantUncompressedSize64; got != want { - t.Errorf("UncompressedSize64: got %d, want %d\n", got, want) - } - if got, want := fh2.ModifiedTime, fh.ModifiedTime; got != want { - t.Errorf("ModifiedTime: got %d, want %d\n", got, want) - } - if got, want := fh2.ModifiedDate, fh.ModifiedDate; got != want { - t.Errorf("ModifiedDate: got %d, want %d\n", got, want) - } - - if sysfh, ok := fi.Sys().(*FileHeader); !ok && sysfh != fh { - t.Errorf("Sys didn't return original *FileHeader") - } -} - -func TestFileHeaderRoundTrip(t *testing.T) { - fh := &FileHeader{ - Name: "foo.txt", - UncompressedSize: 987654321, - ModifiedTime: 1234, - ModifiedDate: 5678, - } - testHeaderRoundTrip(fh, fh.UncompressedSize, uint64(fh.UncompressedSize), t) -} - -func TestFileHeaderRoundTrip64(t *testing.T) { - fh := &FileHeader{ - Name: "foo.txt", - UncompressedSize64: 9876543210, - ModifiedTime: 1234, - ModifiedDate: 5678, - } - testHeaderRoundTrip(fh, uint32max, fh.UncompressedSize64, t) -} - -type repeatedByte struct { - off int64 - b byte - n int64 -} - -// rleBuffer is a run-length-encoded byte buffer. -// It's an io.Writer (like a bytes.Buffer) and also an io.ReaderAt, -// allowing random-access reads. -type rleBuffer struct { - buf []repeatedByte -} - -func (r *rleBuffer) Size() int64 { - if len(r.buf) == 0 { - return 0 - } - last := &r.buf[len(r.buf)-1] - return last.off + last.n -} - -func (r *rleBuffer) Write(p []byte) (n int, err error) { - var rp *repeatedByte - if len(r.buf) > 0 { - rp = &r.buf[len(r.buf)-1] - // Fast path, if p is entirely the same byte repeated. - if lastByte := rp.b; len(p) > 0 && p[0] == lastByte { - all := true - for _, b := range p { - if b != lastByte { - all = false - break - } - } - if all { - rp.n += int64(len(p)) - return len(p), nil - } - } - } - - for _, b := range p { - if rp == nil || rp.b != b { - r.buf = append(r.buf, repeatedByte{r.Size(), b, 1}) - rp = &r.buf[len(r.buf)-1] - } else { - rp.n++ - } - } - return len(p), nil -} - -func (r *rleBuffer) ReadAt(p []byte, off int64) (n int, err error) { - if len(p) == 0 { - return - } - skipParts := sort.Search(len(r.buf), func(i int) bool { - part := &r.buf[i] - return part.off+part.n > off - }) - parts := r.buf[skipParts:] - if len(parts) > 0 { - skipBytes := off - parts[0].off - for len(parts) > 0 { - part := parts[0] - for i := skipBytes; i < part.n; i++ { - if n == len(p) { - return - } - p[n] = part.b - n++ - } - parts = parts[1:] - skipBytes = 0 - } - } - if n != len(p) { - err = io.ErrUnexpectedEOF - } - return -} - -// Just testing the rleBuffer used in the Zip64 test above. Not used by the zip code. -func TestRLEBuffer(t *testing.T) { - b := new(rleBuffer) - var all []byte - writes := []string{"abcdeee", "eeeeeee", "eeeefghaaiii"} - for _, w := range writes { - b.Write([]byte(w)) - all = append(all, w...) - } - if len(b.buf) != 10 { - t.Fatalf("len(b.buf) = %d; want 10", len(b.buf)) - } - - for i := 0; i < len(all); i++ { - for j := 0; j < len(all)-i; j++ { - buf := make([]byte, j) - n, err := b.ReadAt(buf, int64(i)) - if err != nil || n != len(buf) { - t.Errorf("ReadAt(%d, %d) = %d, %v; want %d, nil", i, j, n, err, len(buf)) - } - if !bytes.Equal(buf, all[i:i+j]) { - t.Errorf("ReadAt(%d, %d) = %q; want %q", i, j, buf, all[i:i+j]) - } - } - } -} - -// fakeHash32 is a dummy Hash32 that always returns 0. -type fakeHash32 struct { - hash.Hash32 -} - -func (fakeHash32) Write(p []byte) (int, error) { return len(p), nil } -func (fakeHash32) Sum32() uint32 { return 0 } - -func TestZip64(t *testing.T) { - if testing.Short() { - t.Skip("slow test; skipping") - } - const size = 1 << 32 // before the "END\n" part - buf := testZip64(t, size) - testZip64DirectoryRecordLength(buf, t) -} - -func TestZip64EdgeCase(t *testing.T) { - if testing.Short() { - t.Skip("slow test; skipping") - } - // Test a zip file with uncompressed size 0xFFFFFFFF. - // That's the magic marker for a 64-bit file, so even though - // it fits in a 32-bit field we must use the 64-bit field. - // Go 1.5 and earlier got this wrong, - // writing an invalid zip file. - const size = 1<<32 - 1 - int64(len("END\n")) // before the "END\n" part - buf := testZip64(t, size) - testZip64DirectoryRecordLength(buf, t) -} - -func testZip64(t testing.TB, size int64) *rleBuffer { - const chunkSize = 1024 - chunks := int(size / chunkSize) - // write size bytes plus "END\n" to a zip file - buf := new(rleBuffer) - w := NewWriter(buf) - f, err := w.CreateHeader(&FileHeader{ - Name: "huge.txt", - Method: Store, - }) - if err != nil { - t.Fatal(err) - } - f.(*fileWriter).crc32 = fakeHash32{} - chunk := make([]byte, chunkSize) - for i := range chunk { - chunk[i] = '.' - } - for i := 0; i < chunks; i++ { - _, err := f.Write(chunk) - if err != nil { - t.Fatal("write chunk:", err) - } - } - if frag := int(size % chunkSize); frag > 0 { - _, err := f.Write(chunk[:frag]) - if err != nil { - t.Fatal("write chunk:", err) - } - } - end := []byte("END\n") - _, err = f.Write(end) - if err != nil { - t.Fatal("write end:", err) - } - if err := w.Close(); err != nil { - t.Fatal(err) - } - - // read back zip file and check that we get to the end of it - r, err := NewReader(buf, int64(buf.Size())) - if err != nil { - t.Fatal("reader:", err) - } - f0 := r.File[0] - rc, err := f0.Open() - if err != nil { - t.Fatal("opening:", err) - } - rc.(*checksumReader).hash = fakeHash32{} - for i := 0; i < chunks; i++ { - _, err := io.ReadFull(rc, chunk) - if err != nil { - t.Fatal("read:", err) - } - } - if frag := int(size % chunkSize); frag > 0 { - _, err := io.ReadFull(rc, chunk[:frag]) - if err != nil { - t.Fatal("read:", err) - } - } - gotEnd, err := ioutil.ReadAll(rc) - if err != nil { - t.Fatal("read end:", err) - } - if !bytes.Equal(gotEnd, end) { - t.Errorf("End of zip64 archive %q, want %q", gotEnd, end) - } - err = rc.Close() - if err != nil { - t.Fatal("closing:", err) - } - if size+int64(len("END\n")) >= 1<<32-1 { - if got, want := f0.UncompressedSize, uint32(uint32max); got != want { - t.Errorf("UncompressedSize %#x, want %#x", got, want) - } - } - - if got, want := f0.UncompressedSize64, uint64(size)+uint64(len(end)); got != want { - t.Errorf("UncompressedSize64 %#x, want %#x", got, want) - } - - return buf -} - -// Issue 9857 -func testZip64DirectoryRecordLength(buf *rleBuffer, t *testing.T) { - d := make([]byte, 1024) - if _, err := buf.ReadAt(d, buf.Size()-int64(len(d))); err != nil { - t.Fatal("read:", err) - } - - sigOff := findSignatureInBlock(d) - dirOff, err := findDirectory64End(buf, buf.Size()-int64(len(d))+int64(sigOff)) - if err != nil { - t.Fatal("findDirectory64End:", err) - } - - d = make([]byte, directory64EndLen) - if _, err := buf.ReadAt(d, dirOff); err != nil { - t.Fatal("read:", err) - } - - b := readBuf(d) - if sig := b.uint32(); sig != directory64EndSignature { - t.Fatalf("Expected directory64EndSignature (%d), got %d", directory64EndSignature, sig) - } - - size := b.uint64() - if size != directory64EndLen-12 { - t.Fatalf("Expected length of %d, got %d", directory64EndLen-12, size) - } -} - -func testInvalidHeader(h *FileHeader, t *testing.T) { - var buf bytes.Buffer - z := NewWriter(&buf) - - f, err := z.CreateHeader(h) - if err != nil { - t.Fatalf("error creating header: %v", err) - } - if _, err := f.Write([]byte("hi")); err != nil { - t.Fatalf("error writing content: %v", err) - } - if err := z.Close(); err != nil { - t.Fatalf("error closing zip writer: %v", err) - } - - b := buf.Bytes() - if _, err = NewReader(bytes.NewReader(b), int64(len(b))); err != ErrFormat { - t.Fatalf("got %v, expected ErrFormat", err) - } -} - -func testValidHeader(h *FileHeader, t *testing.T) { - var buf bytes.Buffer - z := NewWriter(&buf) - - f, err := z.CreateHeader(h) - if err != nil { - t.Fatalf("error creating header: %v", err) - } - if _, err := f.Write([]byte("hi")); err != nil { - t.Fatalf("error writing content: %v", err) - } - if err := z.Close(); err != nil { - t.Fatalf("error closing zip writer: %v", err) - } - - b := buf.Bytes() - zf, err := NewReader(bytes.NewReader(b), int64(len(b))) - if err != nil { - t.Fatalf("got %v, expected nil", err) - } - zh := zf.File[0].FileHeader - if zh.Name != h.Name || zh.Method != h.Method || zh.UncompressedSize64 != uint64(len("hi")) { - t.Fatalf("got %q/%d/%d expected %q/%d/%d", zh.Name, zh.Method, zh.UncompressedSize64, h.Name, h.Method, len("hi")) - } -} - -// Issue 4302. -func TestHeaderInvalidTagAndSize(t *testing.T) { - const timeFormat = "20060102T150405.000.txt" - - ts := time.Now() - filename := ts.Format(timeFormat) - - h := FileHeader{ - Name: filename, - Method: Deflate, - Extra: []byte(ts.Format(time.RFC3339Nano)), // missing tag and len, but Extra is best-effort parsing - } - h.SetModTime(ts) - - testValidHeader(&h, t) -} - -func TestHeaderTooShort(t *testing.T) { - h := FileHeader{ - Name: "foo.txt", - Method: Deflate, - Extra: []byte{zip64ExtraId}, // missing size and second half of tag, but Extra is best-effort parsing - } - testValidHeader(&h, t) -} - -func TestHeaderIgnoredSize(t *testing.T) { - h := FileHeader{ - Name: "foo.txt", - Method: Deflate, - Extra: []byte{zip64ExtraId & 0xFF, zip64ExtraId >> 8, 24, 0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8}, // bad size but shouldn't be consulted - } - testValidHeader(&h, t) -} - -// Issue 4393. It is valid to have an extra data header -// which contains no body. -func TestZeroLengthHeader(t *testing.T) { - h := FileHeader{ - Name: "extadata.txt", - Method: Deflate, - Extra: []byte{ - 85, 84, 5, 0, 3, 154, 144, 195, 77, // tag 21589 size 5 - 85, 120, 0, 0, // tag 30805 size 0 - }, - } - testValidHeader(&h, t) -} - -// Just benchmarking how fast the Zip64 test above is. Not related to -// our zip performance, since the test above disabled CRC32 and flate. -func BenchmarkZip64Test(b *testing.B) { - for i := 0; i < b.N; i++ { - testZip64(b, 1<<26) - } -}