├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── generic-feature-request.md │ └── new-format-request.md └── workflows │ ├── macos-latest.yml │ ├── ubuntu-latest.yml │ └── windows-latest.yml ├── .gitignore ├── 7z.go ├── LICENSE ├── README.md ├── archives.go ├── archives_test.go ├── brotli.go ├── brotli_test.go ├── bz2.go ├── formats.go ├── formats_test.go ├── fs.go ├── fs_test.go ├── go.mod ├── go.sum ├── gz.go ├── interfaces.go ├── lz4.go ├── lzip.go ├── minlz.go ├── rar.go ├── rar_test.go ├── sz.go ├── tar.go ├── testdata ├── self-tar.tar ├── symlinks.zip ├── test.part01.rar ├── test.part02.rar ├── test.zip └── unordered.zip ├── xz.go ├── zip.go ├── zip_test.go ├── zlib.go └── zstd.go /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [mholt] # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: For behaviors which violate documentation or cause incorrect results 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 15 | 16 | ## What version of the package or command are you using? 17 | 18 | 19 | 20 | ## What are you trying to do? 21 | 22 | 23 | 24 | ## What steps did you take? 25 | 26 | 27 | 28 | ## What did you expect to happen, and what actually happened instead? 29 | 30 | 31 | 32 | ## How do you think this should be fixed? 33 | 34 | 35 | 36 | ## Please link to any related issues, pull requests, and/or discussion 37 | 38 | 39 | 40 | ## Bonus: What do you use this package for, and do you have any other suggestions or feedback? 41 | 42 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/generic-feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Generic feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: feature request 6 | assignees: '' 7 | 8 | --- 9 | 10 | 14 | 15 | ## What would you like to have changed? 16 | 17 | 18 | 19 | ## Why is this feature a useful, necessary, and/or important addition to this project? 20 | 21 | 22 | 23 | ## What alternatives are there, or what are you doing in the meantime to work around the lack of this feature? 24 | 25 | 26 | 27 | ## Please link to any relevant issues, pull requests, or other discussions. 28 | 29 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/new-format-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: New format request 3 | about: Request a new archival or compression format 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 14 | 15 | ## Introduce the format you are requesting. 16 | 17 | 18 | 19 | 20 | ## What do YOU use this format for? 21 | 22 | 23 | 24 | 25 | ## What is the format's conventional file extension(s)? 26 | 27 | 28 | 29 | 30 | ## What is the format's typical header bytes? 31 | 32 | 33 | 34 | 35 | ## What is the format's MIME type? 36 | 37 | 38 | 39 | 40 | ## Please link to the format's formal or official specification(s). 41 | 42 | 43 | 44 | 45 | ## Which Go libraries could be used to implement this format? 46 | 47 | 48 | -------------------------------------------------------------------------------- /.github/workflows/macos-latest.yml: -------------------------------------------------------------------------------- 1 | name: Mac 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | 7 | build-and-test: 8 | 9 | strategy: 10 | matrix: 11 | go-version: [1.23] 12 | runs-on: macos-latest 13 | steps: 14 | - name: Install Go 15 | uses: actions/setup-go@v5 16 | with: 17 | go-version: ${{ matrix.go-version }} 18 | 19 | - name: Checkout code 20 | uses: actions/checkout@v4 21 | 22 | - name: Test 23 | run: go test -v -race ./... 24 | -------------------------------------------------------------------------------- /.github/workflows/ubuntu-latest.yml: -------------------------------------------------------------------------------- 1 | name: Linux 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | 7 | build-and-test: 8 | 9 | strategy: 10 | matrix: 11 | go-version: [1.23] 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Install Go 15 | uses: actions/setup-go@v5 16 | with: 17 | go-version: ${{ matrix.go-version }} 18 | 19 | - name: Checkout code 20 | uses: actions/checkout@v4 21 | 22 | - name: Test 23 | run: go test -v -race ./... 24 | -------------------------------------------------------------------------------- /.github/workflows/windows-latest.yml: -------------------------------------------------------------------------------- 1 | name: Windows 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | 7 | build-and-test: 8 | 9 | strategy: 10 | matrix: 11 | go-version: [1.23] 12 | runs-on: windows-latest 13 | steps: 14 | - name: Install Go 15 | uses: actions/setup-go@v5 16 | with: 17 | go-version: ${{ matrix.go-version }} 18 | 19 | - name: Checkout code 20 | uses: actions/checkout@v4 21 | 22 | - name: Test 23 | run: go test -v -race ./... 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | _gitignore -------------------------------------------------------------------------------- /7z.go: -------------------------------------------------------------------------------- 1 | package archives 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "errors" 7 | "fmt" 8 | "io" 9 | "io/fs" 10 | "log" 11 | "strings" 12 | 13 | "github.com/bodgit/sevenzip" 14 | ) 15 | 16 | func init() { 17 | RegisterFormat(SevenZip{}) 18 | 19 | // looks like the sevenzip package registers a lot of decompressors for us automatically: 20 | // https://github.com/bodgit/sevenzip/blob/46c5197162c784318b98b9a3f80289a9aa1ca51a/register.go#L38-L61 21 | } 22 | 23 | type SevenZip struct { 24 | // If true, errors encountered during reading or writing 25 | // a file within an archive will be logged and the 26 | // operation will continue on remaining files. 27 | ContinueOnError bool 28 | 29 | // The password, if dealing with an encrypted archive. 30 | Password string 31 | } 32 | 33 | func (SevenZip) Extension() string { return ".7z" } 34 | func (SevenZip) MediaType() string { return "application/x-7z-compressed" } 35 | 36 | func (z SevenZip) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { 37 | var mr MatchResult 38 | 39 | // match filename 40 | if strings.Contains(strings.ToLower(filename), z.Extension()) { 41 | mr.ByName = true 42 | } 43 | 44 | // match file header 45 | buf, err := readAtMost(stream, len(sevenZipHeader)) 46 | if err != nil { 47 | return mr, err 48 | } 49 | mr.ByStream = bytes.Equal(buf, sevenZipHeader) 50 | 51 | return mr, nil 52 | } 53 | 54 | // Archive is not implemented for 7z because I do not know of a pure-Go 7z writer. 55 | 56 | // Extract extracts files from z, implementing the Extractor interface. Uniquely, however, 57 | // sourceArchive must be an io.ReaderAt and io.Seeker, which are oddly disjoint interfaces 58 | // from io.Reader which is what the method signature requires. We chose this signature for 59 | // the interface because we figure you can Read() from anything you can ReadAt() or Seek() 60 | // with. Due to the nature of the zip archive format, if sourceArchive is not an io.Seeker 61 | // and io.ReaderAt, an error is returned. 62 | func (z SevenZip) Extract(ctx context.Context, sourceArchive io.Reader, handleFile FileHandler) error { 63 | sra, ok := sourceArchive.(seekReaderAt) 64 | if !ok { 65 | return fmt.Errorf("input type must be an io.ReaderAt and io.Seeker because of zip format constraints") 66 | } 67 | 68 | size, err := streamSizeBySeeking(sra) 69 | if err != nil { 70 | return fmt.Errorf("determining stream size: %w", err) 71 | } 72 | 73 | zr, err := sevenzip.NewReaderWithPassword(sra, size, z.Password) 74 | if err != nil { 75 | return err 76 | } 77 | 78 | // important to initialize to non-nil, empty value due to how fileIsIncluded works 79 | skipDirs := skipList{} 80 | 81 | for i, f := range zr.File { 82 | if err := ctx.Err(); err != nil { 83 | return err // honor context cancellation 84 | } 85 | 86 | if fileIsIncluded(skipDirs, f.Name) { 87 | continue 88 | } 89 | 90 | fi := f.FileInfo() 91 | file := FileInfo{ 92 | FileInfo: fi, 93 | Header: f.FileHeader, 94 | NameInArchive: f.Name, 95 | Open: func() (fs.File, error) { 96 | openedFile, err := f.Open() 97 | if err != nil { 98 | return nil, err 99 | } 100 | return fileInArchive{openedFile, fi}, nil 101 | }, 102 | } 103 | 104 | err := handleFile(ctx, file) 105 | if errors.Is(err, fs.SkipAll) { 106 | break 107 | } else if errors.Is(err, fs.SkipDir) && file.IsDir() { 108 | skipDirs.add(f.Name) 109 | } else if err != nil { 110 | if z.ContinueOnError { 111 | log.Printf("[ERROR] %s: %v", f.Name, err) 112 | continue 113 | } 114 | return fmt.Errorf("handling file %d: %s: %w", i, f.Name, err) 115 | } 116 | } 117 | 118 | return nil 119 | } 120 | 121 | // https://py7zr.readthedocs.io/en/latest/archive_format.html#signature 122 | var sevenZipHeader = []byte("7z\xBC\xAF\x27\x1C") 123 | 124 | // Interface guard 125 | var _ Extractor = SevenZip{} 126 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Matthew Holt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # archives [![Go Reference](https://pkg.go.dev/badge/github.com/mholt/archives.svg)](https://pkg.go.dev/github.com/mholt/archives) [![Linux](https://github.com/mholt/archives/actions/workflows/ubuntu-latest.yml/badge.svg)](https://github.com/mholt/archives/actions/workflows/ubuntu-latest.yml) [![Mac](https://github.com/mholt/archives/actions/workflows/macos-latest.yml/badge.svg)](https://github.com/mholt/archives/actions/workflows/macos-latest.yml) [![Windows](https://github.com/mholt/archives/actions/workflows/windows-latest.yml/badge.svg)](https://github.com/mholt/archives/actions/workflows/windows-latest.yml) 2 | 3 | Introducing **mholt/archives** - a cross-platform, multi-format Go library for working with archives and compression formats with a unified API and as virtual file systems compatible with [`io/fs`](https://pkg.go.dev/io/fs). 4 | 5 | 6 | ## Features 7 | 8 | - Stream-oriented APIs 9 | - Automatically identify archive and compression formats: 10 | - By file name 11 | - By stream peeking (headers) 12 | - Traverse directories, archives, and other files uniformly as [`io/fs`](https://pkg.go.dev/io/fs) file systems: 13 | - [`FileFS`](https://pkg.go.dev/github.com/mholt/archives#FileFS) 14 | - [`DirFS`](https://pkg.go.dev/github.com/mholt/archives#DirFS) 15 | - [`ArchiveFS`](https://pkg.go.dev/github.com/mholt/archives#ArchiveFS) 16 | - Seamlessly walk into archive files using [`DeepFS`](https://pkg.go.dev/github.com/mholt/archives#DeepFS) 17 | - Compress and decompress files 18 | - Create and extract archive files 19 | - Walk or traverse into archive files 20 | - Extract only specific files from archives 21 | - Insert into (append to) .tar and .zip archives without re-creating entire archive 22 | - Numerous archive and compression formats supported 23 | - Read from password-protected 7-Zip and RAR files 24 | - Extensible (add more formats just by registering them) 25 | - Cross-platform, static binary 26 | - Pure Go (no cgo) 27 | - Multithreaded Gzip 28 | - Adjustable compression levels 29 | - Super-fast Snappy implementation (via [S2](https://github.com/klauspost/compress/blob/master/s2/README.md)) 30 | 31 | ### Supported compression formats 32 | 33 | - brotli (.br) 34 | - bzip2 (.bz2) 35 | - flate (.zip) 36 | - gzip (.gz) 37 | - lz4 (.lz4) 38 | - lzip (.lz) 39 | - minlz (.mz) 40 | - snappy (.sz) and S2 (.s2) 41 | - xz (.xz) 42 | - zlib (.zz) 43 | - zstandard (.zst) 44 | 45 | ### Supported archive formats 46 | 47 | - .zip 48 | - .tar (including any compressed variants like .tar.gz) 49 | - .rar (read-only) 50 | - .7z (read-only) 51 | 52 | ## Command line utility 53 | 54 | There is an independently-maintained command line tool called [**`arc`**](https://github.com/jm33-m0/arc) currently in development that will expose many of the functions of this library to a shell. 55 | 56 | ## Library use 57 | 58 | ```bash 59 | $ go get github.com/mholt/archives 60 | ``` 61 | 62 | 63 | ### Create archive 64 | 65 | Creating archives can be done entirely without needing a real disk or storage device. All you need is a list of [`FileInfo` structs](https://pkg.go.dev/github.com/mholt/archives#FileInfo), which can be implemented without a real file system. 66 | 67 | However, creating archives from a disk is very common, so you can use the [`FilesFromDisk()` function](https://pkg.go.dev/github.com/mholt/archives#FilesFromDisk) to help you map filenames on disk to their paths in the archive. 68 | 69 | In this example, we add 4 files and a directory (which includes its contents recursively) to a .tar.gz file: 70 | 71 | ```go 72 | ctx := context.TODO() 73 | 74 | // map files on disk to their paths in the archive using default settings (second arg) 75 | files, err := archives.FilesFromDisk(ctx, nil, map[string]string{ 76 | "/path/on/disk/file1.txt": "file1.txt", 77 | "/path/on/disk/file2.txt": "subfolder/file2.txt", 78 | "/path/on/disk/file3.txt": "", // put in root of archive as file3.txt 79 | "/path/on/disk/file4.txt": "subfolder/", // put in subfolder as file4.txt 80 | "/path/on/disk/folder": "Custom Folder", // contents added recursively 81 | }) 82 | if err != nil { 83 | return err 84 | } 85 | 86 | // create the output file we'll write to 87 | out, err := os.Create("example.tar.gz") 88 | if err != nil { 89 | return err 90 | } 91 | defer out.Close() 92 | 93 | // we can use the CompressedArchive type to gzip a tarball 94 | // (since we're writing, we only set Archival, but if you're 95 | // going to read, set Extraction) 96 | format := archives.CompressedArchive{ 97 | Compression: archives.Gz{}, 98 | Archival: archives.Tar{}, 99 | } 100 | 101 | // create the archive 102 | err = format.Archive(ctx, out, files) 103 | if err != nil { 104 | return err 105 | } 106 | ``` 107 | 108 | ### Extract archive 109 | 110 | Extracting an archive, extracting _from_ an archive, and walking an archive are all the same function. 111 | 112 | Simply use your format type (e.g. `Zip`) to call `Extract()`. You'll pass in a context (for cancellation), the input stream, and a callback function to handle each file. 113 | 114 | ```go 115 | // the type that will be used to read the input stream 116 | var format archives.Zip 117 | 118 | err := format.Extract(ctx, input, func(ctx context.Context, f archives.FileInfo) error { 119 | // do something with the file here; or, if you only want a specific file or directory, 120 | // just return until you come across the desired f.NameInArchive value(s) 121 | return nil 122 | }) 123 | if err != nil { 124 | return err 125 | } 126 | ``` 127 | 128 | ### Identifying formats 129 | 130 | When you have an input stream with unknown contents, this package can identify it for you. It will try matching based on filename and/or the header (which peeks at the stream): 131 | 132 | ```go 133 | // unless your stream is an io.Seeker, use the returned stream value to 134 | // ensure you re-read the bytes consumed during Identify() 135 | format, stream, err := archives.Identify(ctx, "filename.tar.zst", stream) 136 | if err != nil { 137 | return err 138 | } 139 | 140 | // you can now type-assert format to whatever you need 141 | 142 | // want to extract something? 143 | if ex, ok := format.(archives.Extractor); ok { 144 | // ... proceed to extract 145 | } 146 | 147 | // or maybe it's compressed and you want to decompress it? 148 | if decomp, ok := format.(archives.Decompressor); ok { 149 | rc, err := decomp.OpenReader(unknownFile) 150 | if err != nil { 151 | return err 152 | } 153 | defer rc.Close() 154 | 155 | // read from rc to get decompressed data 156 | } 157 | ``` 158 | 159 | `Identify()` works by reading an arbitrary number of bytes from the beginning of the stream (just enough to check for file headers). It buffers them and returns a new reader that lets you re-read them anew. If your input stream is `io.Seeker` however, no buffer is created as it uses `Seek()` instead, and the returned stream is the same as the input. 160 | 161 | ### Virtual file systems 162 | 163 | This is my favorite feature. 164 | 165 | Let's say you have a directory on disk, an archive, a compressed archive, any other regular file, or a stream of any of the above! You don't really care; you just want to use it uniformly no matter what it is. 166 | 167 | Simply create a file system: 168 | 169 | ```go 170 | // filename could be: 171 | // - a folder ("/home/you/Desktop") 172 | // - an archive ("example.zip") 173 | // - a compressed archive ("example.tar.gz") 174 | // - a regular file ("example.txt") 175 | // - a compressed regular file ("example.txt.gz") 176 | // and/or the last argument could be a stream of any of the above 177 | fsys, err := archives.FileSystem(ctx, filename, nil) 178 | if err != nil { 179 | return err 180 | } 181 | ``` 182 | 183 | This is a fully-featured `fs.FS`, so you can open files and read directories, no matter what kind of file the input was. 184 | 185 | For example, to open a specific file: 186 | 187 | ```go 188 | f, err := fsys.Open("file") 189 | if err != nil { 190 | return err 191 | } 192 | defer f.Close() 193 | ``` 194 | 195 | If you opened a regular file or archive, you can read from it. If it's a compressed file, reads are automatically decompressed. 196 | 197 | If you opened a directory (either real or in an archive), you can list its contents: 198 | 199 | ```go 200 | if dir, ok := f.(fs.ReadDirFile); ok { 201 | // 0 gets all entries, but you can pass > 0 to paginate 202 | entries, err := dir.ReadDir(0) 203 | if err != nil { 204 | return err 205 | } 206 | for _, e := range entries { 207 | fmt.Println(e.Extension()) 208 | } 209 | } 210 | ``` 211 | 212 | Or get a directory listing this way: 213 | 214 | ```go 215 | entries, err := fsys.ReadDir("Playlists") 216 | if err != nil { 217 | return err 218 | } 219 | for _, e := range entries { 220 | fmt.Println(e.Extension()) 221 | } 222 | ``` 223 | 224 | Or maybe you want to walk all or part of the file system, but skip a folder named `.git`: 225 | 226 | ```go 227 | err := fs.WalkDir(fsys, ".", func(path string, d fs.DirEntry, err error) error { 228 | if err != nil { 229 | return err 230 | } 231 | if path == ".git" { 232 | return fs.SkipDir 233 | } 234 | fmt.Println("Walking:", path, "Dir?", d.IsDir()) 235 | return nil 236 | }) 237 | if err != nil { 238 | return err 239 | } 240 | ``` 241 | 242 | The `archives` package lets you do it all. 243 | 244 | **Important .tar note:** Tar files do not efficiently implement file system semantics due to their historical roots in sequential-access design for tapes. File systems inherently assume some index facilitating random access, but tar files need to be read from the beginning to access something at the end. This is especially slow when the archive is compressed. Optimizations have been implemented to amortize `ReadDir()` calls so that `fs.WalkDir()` only has to scan the archive once, but they use more memory. Open calls require another scan to find the file. It may be more efficient to use `Tar.Extract()` directly if file system semantics are not important to you. 245 | 246 | #### Use with `http.FileServer` 247 | 248 | It can be used with http.FileServer to browse archives and directories in a browser. However, due to how http.FileServer works, don't directly use http.FileServer with compressed files; instead wrap it like following: 249 | 250 | ```go 251 | fileServer := http.FileServer(http.FS(archiveFS)) 252 | http.HandleFunc("/", func(writer http.ResponseWriter, request *http.Request) { 253 | // disable range request 254 | writer.Header().Set("Accept-Ranges", "none") 255 | request.Header.Del("Range") 256 | 257 | // disable content-type sniffing 258 | ctype := mime.TypeByExtension(filepath.Ext(request.URL.Path)) 259 | writer.Header()["Content-Type"] = nil 260 | if ctype != "" { 261 | writer.Header().Set("Content-Type", ctype) 262 | } 263 | fileServer.ServeHTTP(writer, request) 264 | }) 265 | ``` 266 | 267 | http.FileServer will try to sniff the Content-Type by default if it can't be inferred from file name. To do this, the http package will try to read from the file and then Seek back to file start, which the libray can't achieve currently. The same goes with Range requests. Seeking in archives is not currently supported by this package due to limitations in dependencies. 268 | 269 | If Content-Type is desirable, you can [register it](https://pkg.go.dev/mime#AddExtensionType) yourself. 270 | 271 | ### Compress data 272 | 273 | Compression formats let you open writers to compress data: 274 | 275 | ```go 276 | // wrap underlying writer w 277 | compressor, err := archives.Zstd{}.OpenWriter(w) 278 | if err != nil { 279 | return err 280 | } 281 | defer compressor.Close() 282 | 283 | // writes to compressor will be compressed 284 | ``` 285 | 286 | ### Decompress data 287 | 288 | Similarly, compression formats let you open readers to decompress data: 289 | 290 | ```go 291 | // wrap underlying reader r 292 | decompressor, err := archives.Snappy{}.OpenReader(r) 293 | if err != nil { 294 | return err 295 | } 296 | defer decompressor.Close() 297 | 298 | // reads from decompressor will be decompressed 299 | ``` 300 | 301 | ### Append to tarball and zip archives 302 | 303 | Tar and Zip archives can be appended to without creating a whole new archive by calling `Insert()` on a tar or zip stream. However, for tarballs, this requires that the tarball is not compressed (due to complexities with modifying compression dictionaries). 304 | 305 | Here is an example that appends a file to a tarball on disk: 306 | 307 | ```go 308 | tarball, err := os.OpenFile("example.tar", os.O_RDWR, 0644) 309 | if err != nil { 310 | return err 311 | } 312 | defer tarball.Close() 313 | 314 | // prepare a text file for the root of the archive 315 | files, err := archives.FilesFromDisk(nil, map[string]string{ 316 | "/home/you/lastminute.txt": "", 317 | }) 318 | 319 | err := archives.Tar{}.Insert(context.Background(), tarball, files) 320 | if err != nil { 321 | return err 322 | } 323 | ``` 324 | 325 | The code is similar for inserting into a Zip archive, except you'll call `Insert()` on a `Zip{}` value instead. 326 | 327 | 328 | ### Traverse into archives while walking 329 | 330 | If you are traversing/walking the file system using [`fs.WalkDir()`](https://pkg.go.dev/io/fs#WalkDir), the [**`DeepFS`**](https://pkg.go.dev/github.com/mholt/archives#DeepFS) type lets you walk the contents of archives (and compressed archives!) transparently as if the archive file was a regular directory on disk. 331 | 332 | Simply root your DeepFS at a real path, then walk away: 333 | 334 | ```go 335 | fsys := &archives.DeepFS{Root: "/some/dir"} 336 | 337 | err := fs.WalkDir(fsys, ".", func(fpath string, d fs.DirEntry, err error) error { 338 | ... 339 | }) 340 | ``` 341 | 342 | You'll notice that paths within archives look like `/some/dir/archive.zip/foo/bar.txt`. If you pass a path like that into `fsys.Open()`, it will split the path at the end of the archive file (`/some/dir/archive.zip`) and use the remainder of the path (`foo/bar.txt`) inside the archive. 343 | -------------------------------------------------------------------------------- /archives.go: -------------------------------------------------------------------------------- 1 | package archives 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "io" 7 | "io/fs" 8 | "os" 9 | "path" 10 | "path/filepath" 11 | "strings" 12 | "time" 13 | ) 14 | 15 | // FileInfo is a virtualized, generalized file abstraction for interacting with archives. 16 | type FileInfo struct { 17 | fs.FileInfo 18 | 19 | // The file header as used/provided by the archive format. 20 | // Typically, you do not need to set this field when creating 21 | // an archive. 22 | Header any 23 | 24 | // The path of the file as it appears in the archive. 25 | // This is equivalent to Header.Name (for most Header 26 | // types). We require it to be specified here because 27 | // it is such a common field and we want to preserve 28 | // format-agnosticism (no type assertions) for basic 29 | // operations. 30 | // 31 | // When extracting, this name or path may not have 32 | // been sanitized; it should not be trusted at face 33 | // value. Consider using path.Clean() before using. 34 | // 35 | // If this is blank when inserting a file into an 36 | // archive, the filename's base may be assumed 37 | // by default to be the name in the archive. 38 | NameInArchive string 39 | 40 | // For symbolic and hard links, the target of the link. 41 | // Not supported by all archive formats. 42 | LinkTarget string 43 | 44 | // A callback function that opens the file to read its 45 | // contents. The file must be closed when reading is 46 | // complete. 47 | Open func() (fs.File, error) 48 | } 49 | 50 | func (f FileInfo) Stat() (fs.FileInfo, error) { return f.FileInfo, nil } 51 | 52 | // FilesFromDisk is an opinionated function that returns a list of FileInfos 53 | // by walking the directories in the filenames map. The keys are the names on 54 | // disk, and the values become their associated names in the archive. 55 | // 56 | // Map keys that specify directories on disk will be walked and added to the 57 | // archive recursively, rooted at the named directory. They should use the 58 | // platform's path separator (backslash on Windows; slash on everything else). 59 | // For convenience, map keys that end in a separator ('/', or '\' on Windows) 60 | // will enumerate contents only, without adding the folder itself to the archive. 61 | // 62 | // Map values should typically use slash ('/') as the separator regardless of 63 | // the platform, as most archive formats standardize on that rune as the 64 | // directory separator for filenames within an archive. For convenience, map 65 | // values that are empty string are interpreted as the base name of the file 66 | // (sans path) in the root of the archive; and map values that end in a slash 67 | // will use the base name of the file in that folder of the archive. 68 | // 69 | // File gathering will adhere to the settings specified in options. 70 | // 71 | // This function is used primarily when preparing a list of files to add to 72 | // an archive. 73 | func FilesFromDisk(ctx context.Context, options *FromDiskOptions, filenames map[string]string) ([]FileInfo, error) { 74 | var files []FileInfo 75 | for rootOnDisk, rootInArchive := range filenames { 76 | if err := ctx.Err(); err != nil { 77 | return nil, err 78 | } 79 | 80 | walkErr := filepath.WalkDir(rootOnDisk, func(filename string, d fs.DirEntry, err error) error { 81 | if err := ctx.Err(); err != nil { 82 | return err 83 | } 84 | if err != nil { 85 | return err 86 | } 87 | 88 | info, err := d.Info() 89 | if err != nil { 90 | return err 91 | } 92 | 93 | nameInArchive := nameOnDiskToNameInArchive(filename, rootOnDisk, rootInArchive) 94 | // this is the root folder and we are adding its contents to target rootInArchive 95 | if info.IsDir() && nameInArchive == "" { 96 | return nil 97 | } 98 | 99 | // handle symbolic links 100 | var linkTarget string 101 | if isSymlink(info) { 102 | if options != nil && options.FollowSymlinks { 103 | filename, info, err = followSymlink(filename) 104 | if err != nil { 105 | return err 106 | } 107 | } else { 108 | // preserve symlinks 109 | linkTarget, err = os.Readlink(filename) 110 | if err != nil { 111 | return fmt.Errorf("%s: readlink: %w", filename, err) 112 | } 113 | } 114 | } 115 | 116 | // handle file attributes 117 | if options != nil && options.ClearAttributes { 118 | info = noAttrFileInfo{info} 119 | } 120 | 121 | file := FileInfo{ 122 | FileInfo: info, 123 | NameInArchive: nameInArchive, 124 | LinkTarget: linkTarget, 125 | Open: func() (fs.File, error) { 126 | return os.Open(filename) 127 | }, 128 | } 129 | 130 | files = append(files, file) 131 | return nil 132 | }) 133 | if walkErr != nil { 134 | return nil, walkErr 135 | } 136 | } 137 | return files, nil 138 | } 139 | 140 | // nameOnDiskToNameInArchive converts a filename from disk to a name in an archive, 141 | // respecting rules defined by FilesFromDisk. nameOnDisk is the full filename on disk 142 | // which is expected to be prefixed by rootOnDisk (according to fs.WalkDirFunc godoc) 143 | // and which will be placed into a folder rootInArchive in the archive. 144 | func nameOnDiskToNameInArchive(nameOnDisk, rootOnDisk, rootInArchive string) string { 145 | // These manipulations of rootInArchive could be done just once instead of on 146 | // every walked file since they don't rely on nameOnDisk which is the only 147 | // variable that changes during the walk, but combining all the logic into this 148 | // one function is easier to reason about and test. I suspect the performance 149 | // penalty is insignificant. 150 | if strings.HasSuffix(rootOnDisk, string(filepath.Separator)) { 151 | // "map keys that end in a separator will enumerate contents only, 152 | // without adding the folder itself to the archive." 153 | rootInArchive = trimTopDir(rootInArchive) 154 | } else if rootInArchive == "" { 155 | // "map values that are empty string are interpreted as the base name 156 | // of the file (sans path) in the root of the archive" 157 | rootInArchive = filepath.Base(rootOnDisk) 158 | } 159 | if rootInArchive == "." { 160 | // an in-archive root of "." is an escape hatch for the above rule 161 | // where an empty in-archive root means to use the base name of the 162 | // file; if the user does not want this, they can specify a "." to 163 | // still put it in the root of the archive 164 | rootInArchive = "" 165 | } 166 | if strings.HasSuffix(rootInArchive, "/") { 167 | // "map values that end in a slash will use the base name of the file in 168 | // that folder of the archive." 169 | rootInArchive += filepath.Base(rootOnDisk) 170 | } 171 | truncPath := strings.TrimPrefix(nameOnDisk, rootOnDisk) 172 | return path.Join(rootInArchive, filepath.ToSlash(truncPath)) 173 | } 174 | 175 | // trimTopDir strips the top or first directory from the path. 176 | // It expects a forward-slashed path. 177 | // 178 | // Examples: "a/b/c" => "b/c", "/a/b/c" => "b/c" 179 | func trimTopDir(dir string) string { 180 | return strings.TrimPrefix(dir, topDir(dir)+"/") 181 | } 182 | 183 | // topDir returns the top or first directory in the path. 184 | // It expects a forward-slashed path. 185 | // 186 | // Examples: "a/b/c" => "a", "/a/b/c" => "/a" 187 | func topDir(dir string) string { 188 | var start int 189 | if len(dir) > 0 && dir[0] == '/' { 190 | start = 1 191 | } 192 | if pos := strings.Index(dir[start:], "/"); pos >= 0 { 193 | return dir[:pos+start] 194 | } 195 | return dir 196 | } 197 | 198 | // noAttrFileInfo is used to zero out some file attributes (issue #280). 199 | type noAttrFileInfo struct{ fs.FileInfo } 200 | 201 | // Mode preserves only the type and permission bits. 202 | func (no noAttrFileInfo) Mode() fs.FileMode { 203 | return no.FileInfo.Mode() & (fs.ModeType | fs.ModePerm) 204 | } 205 | func (noAttrFileInfo) ModTime() time.Time { return time.Time{} } 206 | func (noAttrFileInfo) Sys() any { return nil } 207 | 208 | // FromDiskOptions specifies various options for gathering files from disk. 209 | type FromDiskOptions struct { 210 | // If true, symbolic links will be dereferenced, meaning that 211 | // the link will not be added as a link, but what the link 212 | // points to will be added as a file. 213 | FollowSymlinks bool 214 | 215 | // If true, some file attributes will not be preserved. 216 | // Name, size, type, and permissions will still be preserved. 217 | ClearAttributes bool 218 | } 219 | 220 | // FileHandler is a callback function that is used to handle files as they are read 221 | // from an archive; it is kind of like fs.WalkDirFunc. Handler functions that open 222 | // their files must not overlap or run concurrently, as files may be read from the 223 | // same sequential stream; always close the file before returning. 224 | // 225 | // If the special error value fs.SkipDir is returned, the directory of the file 226 | // (or the file itself if it is a directory) will not be walked. Note that because 227 | // archive contents are not necessarily ordered, skipping directories requires 228 | // memory, and skipping lots of directories may run up your memory bill. 229 | // 230 | // Any other returned error will terminate a walk and be returned to the caller. 231 | type FileHandler func(ctx context.Context, info FileInfo) error 232 | 233 | // openAndCopyFile opens file for reading, copies its 234 | // contents to w, then closes file. 235 | func openAndCopyFile(file FileInfo, w io.Writer) error { 236 | fileReader, err := file.Open() 237 | if err != nil { 238 | return err 239 | } 240 | defer fileReader.Close() 241 | // When file is in use and size is being written to, creating the compressed 242 | // file will fail with "archive/tar: write too long." Using CopyN gracefully 243 | // handles this. 244 | _, err = io.Copy(w, fileReader) 245 | if err != nil && err != io.EOF { 246 | return err 247 | } 248 | return nil 249 | } 250 | 251 | // fileIsIncluded returns true if filename is included according to 252 | // filenameList; meaning it is in the list, its parent folder/path 253 | // is in the list, or the list is nil. 254 | func fileIsIncluded(filenameList []string, filename string) bool { 255 | // include all files if there is no specific list 256 | if filenameList == nil { 257 | return true 258 | } 259 | for _, fn := range filenameList { 260 | // exact matches are of course included 261 | if filename == fn { 262 | return true 263 | } 264 | // also consider the file included if its parent folder/path is in the list 265 | if strings.HasPrefix(filename, strings.TrimSuffix(fn, "/")+"/") { 266 | return true 267 | } 268 | } 269 | return false 270 | } 271 | 272 | func isSymlink(info fs.FileInfo) bool { 273 | return info.Mode()&os.ModeSymlink != 0 274 | } 275 | 276 | // streamSizeBySeeking determines the size of the stream by 277 | // seeking to the end, then back again, so the resulting 278 | // seek position upon returning is the same as when called 279 | // (assuming no errors). 280 | func streamSizeBySeeking(s io.Seeker) (int64, error) { 281 | currentPosition, err := s.Seek(0, io.SeekCurrent) 282 | if err != nil { 283 | return 0, fmt.Errorf("getting current offset: %w", err) 284 | } 285 | maxPosition, err := s.Seek(0, io.SeekEnd) 286 | if err != nil { 287 | return 0, fmt.Errorf("fast-forwarding to end: %w", err) 288 | } 289 | _, err = s.Seek(currentPosition, io.SeekStart) 290 | if err != nil { 291 | return 0, fmt.Errorf("returning to prior offset %d: %w", currentPosition, err) 292 | } 293 | return maxPosition, nil 294 | } 295 | 296 | // skipList keeps a list of non-intersecting paths 297 | // as long as its add method is used. Identical 298 | // elements are rejected, more specific paths are 299 | // replaced with broader ones, and more specific 300 | // paths won't be added when a broader one already 301 | // exists in the list. Trailing slashes are ignored. 302 | type skipList []string 303 | 304 | func (s *skipList) add(dir string) { 305 | trimmedDir := strings.TrimSuffix(dir, "/") 306 | var dontAdd bool 307 | for i := 0; i < len(*s); i++ { 308 | trimmedElem := strings.TrimSuffix((*s)[i], "/") 309 | if trimmedDir == trimmedElem { 310 | return 311 | } 312 | // don't add dir if a broader path already exists in the list 313 | if strings.HasPrefix(trimmedDir, trimmedElem+"/") { 314 | dontAdd = true 315 | continue 316 | } 317 | // if dir is broader than a path in the list, remove more specific path in list 318 | if strings.HasPrefix(trimmedElem, trimmedDir+"/") { 319 | *s = append((*s)[:i], (*s)[i+1:]...) 320 | i-- 321 | } 322 | } 323 | if !dontAdd { 324 | *s = append(*s, dir) 325 | } 326 | } 327 | 328 | // followSymlink follows a symlink until it finds a non-symlink, 329 | // returning the target path, file info, and any error that occurs. 330 | // It also checks for symlink loops and maximum depth. 331 | func followSymlink(filename string) (string, os.FileInfo, error) { 332 | visited := make(map[string]bool) 333 | visited[filename] = true 334 | // Limit in Linux kernel: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/namei.c?id=v3.5#n624 335 | const maxDepth = 40 336 | 337 | for { 338 | linkPath, err := os.Readlink(filename) 339 | if err != nil { 340 | return "", nil, fmt.Errorf("%s: readlink: %w", filename, err) 341 | } 342 | if !filepath.IsAbs(linkPath) { 343 | linkPath = filepath.Join(filepath.Dir(filename), linkPath) 344 | } 345 | info, err := os.Lstat(linkPath) 346 | if err != nil { 347 | return "", nil, fmt.Errorf("%s: statting dereferenced symlink: %w", filename, err) 348 | } 349 | 350 | // Not a symlink, we've found the target, return it 351 | if info.Mode()&os.ModeSymlink == 0 { 352 | return linkPath, info, nil 353 | } 354 | 355 | if visited[linkPath] { 356 | return "", nil, fmt.Errorf("%s: symlink loop", filename) 357 | } 358 | 359 | if len(visited) >= maxDepth { 360 | return "", nil, fmt.Errorf("%s: maximum symlink depth (%d) exceeded", filename, maxDepth) 361 | } 362 | 363 | visited[linkPath] = true 364 | filename = linkPath 365 | } 366 | } 367 | -------------------------------------------------------------------------------- /archives_test.go: -------------------------------------------------------------------------------- 1 | package archives 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | "reflect" 8 | "runtime" 9 | "strings" 10 | "testing" 11 | ) 12 | 13 | func TestTrimTopDir(t *testing.T) { 14 | for i, test := range []struct { 15 | input string 16 | want string 17 | }{ 18 | {input: "a/b/c", want: "b/c"}, 19 | {input: "a", want: "a"}, 20 | {input: "abc/def", want: "def"}, 21 | {input: "/abc/def", want: "def"}, 22 | } { 23 | t.Run(test.input, func(t *testing.T) { 24 | got := trimTopDir(test.input) 25 | if got != test.want { 26 | t.Errorf("Test %d: want: '%s', got: '%s')", i, test.want, got) 27 | } 28 | }) 29 | } 30 | } 31 | 32 | func TestTopDir(t *testing.T) { 33 | for _, tc := range []struct { 34 | input string 35 | want string 36 | }{ 37 | {input: "a/b/c", want: "a"}, 38 | {input: "a", want: "a"}, 39 | {input: "abc/def", want: "abc"}, 40 | {input: "/abc/def", want: "/abc"}, 41 | } { 42 | t.Run(tc.input, func(t *testing.T) { 43 | got := topDir(tc.input) 44 | if got != tc.want { 45 | t.Errorf("want: '%s', got: '%s')", tc.want, got) 46 | } 47 | }) 48 | } 49 | } 50 | 51 | func TestFileIsIncluded(t *testing.T) { 52 | for i, tc := range []struct { 53 | included []string 54 | candidate string 55 | expect bool 56 | }{ 57 | { 58 | included: []string{"a"}, 59 | candidate: "a", 60 | expect: true, 61 | }, 62 | { 63 | included: []string{"a", "b", "a/b"}, 64 | candidate: "b", 65 | expect: true, 66 | }, 67 | { 68 | included: []string{"a", "b", "c/d"}, 69 | candidate: "c/d/e", 70 | expect: true, 71 | }, 72 | { 73 | included: []string{"a"}, 74 | candidate: "a/b/c", 75 | expect: true, 76 | }, 77 | { 78 | included: []string{"a"}, 79 | candidate: "aa/b/c", 80 | expect: false, 81 | }, 82 | { 83 | included: []string{"a", "b", "c/d"}, 84 | candidate: "b/c", 85 | expect: true, 86 | }, 87 | { 88 | included: []string{"a/"}, 89 | candidate: "a", 90 | expect: false, 91 | }, 92 | { 93 | included: []string{"a/"}, 94 | candidate: "a/", 95 | expect: true, 96 | }, 97 | { 98 | included: []string{"a"}, 99 | candidate: "a/", 100 | expect: true, 101 | }, 102 | { 103 | included: []string{"a/b"}, 104 | candidate: "a/", 105 | expect: false, 106 | }, 107 | } { 108 | actual := fileIsIncluded(tc.included, tc.candidate) 109 | if actual != tc.expect { 110 | t.Errorf("Test %d (included=%v candidate=%v): expected %t but got %t", 111 | i, tc.included, tc.candidate, tc.expect, actual) 112 | } 113 | } 114 | } 115 | 116 | func TestSkipList(t *testing.T) { 117 | for i, tc := range []struct { 118 | start skipList 119 | add string 120 | expect skipList 121 | }{ 122 | { 123 | start: skipList{"a", "b", "c"}, 124 | add: "d", 125 | expect: skipList{"a", "b", "c", "d"}, 126 | }, 127 | { 128 | start: skipList{"a", "b", "c"}, 129 | add: "b", 130 | expect: skipList{"a", "b", "c"}, 131 | }, 132 | { 133 | start: skipList{"a", "b", "c"}, 134 | add: "b/c", // don't add because b implies b/c 135 | expect: skipList{"a", "b", "c"}, 136 | }, 137 | { 138 | start: skipList{"a", "b", "c"}, 139 | add: "b/c/", // effectively same as above 140 | expect: skipList{"a", "b", "c"}, 141 | }, 142 | { 143 | start: skipList{"a", "b/", "c"}, 144 | add: "b", // effectively same as b/ 145 | expect: skipList{"a", "b/", "c"}, 146 | }, 147 | { 148 | start: skipList{"a", "b/c", "c"}, 149 | add: "b", // replace b/c because b is broader 150 | expect: skipList{"a", "c", "b"}, 151 | }, 152 | } { 153 | start := make(skipList, len(tc.start)) 154 | copy(start, tc.start) 155 | 156 | tc.start.add(tc.add) 157 | 158 | if !reflect.DeepEqual(tc.start, tc.expect) { 159 | t.Errorf("Test %d (start=%v add=%v): expected %v but got %v", 160 | i, start, tc.add, tc.expect, tc.start) 161 | } 162 | } 163 | } 164 | 165 | func TestNameOnDiskToNameInArchive(t *testing.T) { 166 | for i, tc := range []struct { 167 | windows bool // only run this test on Windows 168 | rootOnDisk string // user says they want to archive this file/folder 169 | nameOnDisk string // the walk encounters a file with this name (with rootOnDisk as a prefix) 170 | rootInArchive string // file should be placed in this dir within the archive (rootInArchive becomes a prefix) 171 | expect string // final filename in archive 172 | }{ 173 | { 174 | rootOnDisk: "a", 175 | nameOnDisk: "a/b/c", 176 | rootInArchive: "", 177 | expect: "a/b/c", 178 | }, 179 | { 180 | rootOnDisk: "a/b", 181 | nameOnDisk: "a/b/c", 182 | rootInArchive: "", 183 | expect: "b/c", 184 | }, 185 | { 186 | rootOnDisk: "a/b/", 187 | nameOnDisk: "a/b/c", 188 | rootInArchive: "", 189 | expect: "c", 190 | }, 191 | { 192 | rootOnDisk: "a/b/", 193 | nameOnDisk: "a/b/c", 194 | rootInArchive: ".", 195 | expect: "c", 196 | }, 197 | { 198 | rootOnDisk: "a/b/c", 199 | nameOnDisk: "a/b/c", 200 | rootInArchive: "", 201 | expect: "c", 202 | }, 203 | { 204 | rootOnDisk: "a/b", 205 | nameOnDisk: "a/b/c", 206 | rootInArchive: "foo", 207 | expect: "foo/c", 208 | }, 209 | { 210 | rootOnDisk: "a", 211 | nameOnDisk: "a/b/c", 212 | rootInArchive: "foo", 213 | expect: "foo/b/c", 214 | }, 215 | { 216 | rootOnDisk: "a", 217 | nameOnDisk: "a/b/c", 218 | rootInArchive: "foo/", 219 | expect: "foo/a/b/c", 220 | }, 221 | { 222 | rootOnDisk: "a/", 223 | nameOnDisk: "a/b/c", 224 | rootInArchive: "foo", 225 | expect: "foo/b/c", 226 | }, 227 | { 228 | rootOnDisk: "a/", 229 | nameOnDisk: "a/b/c", 230 | rootInArchive: "foo", 231 | expect: "foo/b/c", 232 | }, 233 | { 234 | windows: true, 235 | rootOnDisk: `C:\foo`, 236 | nameOnDisk: `C:\foo\bar`, 237 | rootInArchive: "", 238 | expect: "foo/bar", 239 | }, 240 | { 241 | windows: true, 242 | rootOnDisk: `C:\foo`, 243 | nameOnDisk: `C:\foo\bar`, 244 | rootInArchive: "subfolder", 245 | expect: "subfolder/bar", 246 | }, 247 | } { 248 | if !strings.HasPrefix(tc.nameOnDisk, tc.rootOnDisk) { 249 | t.Errorf("Test %d: Invalid test case! Filename (on disk) will have rootOnDisk as a prefix according to the fs.WalkDirFunc godoc.", i) 250 | continue 251 | } 252 | if tc.windows && runtime.GOOS != "windows" { 253 | t.Logf("Test %d: Skipping test that is only compatible with Windows", i) 254 | continue 255 | } 256 | if !tc.windows && runtime.GOOS == "windows" { 257 | t.Logf("Test %d: Skipping test that is not compatible with Windows", i) 258 | continue 259 | } 260 | 261 | actual := nameOnDiskToNameInArchive(tc.nameOnDisk, tc.rootOnDisk, tc.rootInArchive) 262 | if actual != tc.expect { 263 | t.Errorf("Test %d: Got '%s' but expected '%s' (nameOnDisk=%s rootOnDisk=%s rootInArchive=%s)", 264 | i, actual, tc.expect, tc.nameOnDisk, tc.rootOnDisk, tc.rootInArchive) 265 | } 266 | } 267 | } 268 | 269 | func TestFollowSymlink(t *testing.T) { 270 | // Create temp directory for tests 271 | tmpDir := t.TempDir() 272 | 273 | fixSeparators := func(path string) string { 274 | if runtime.GOOS == "windows" { 275 | return strings.ReplaceAll(path, "/", "\\") 276 | } 277 | return path 278 | } 279 | 280 | t.Run("single symlink to regular file", func(t *testing.T) { 281 | // Create a regular file 282 | targetFile := filepath.Join(tmpDir, "target.txt") 283 | if err := os.WriteFile(targetFile, []byte("test content"), 0644); err != nil { 284 | t.Fatal(err) 285 | } 286 | 287 | // Create symlink to the file 288 | symlinkFile := filepath.Join(tmpDir, "link.txt") 289 | if err := os.Symlink(targetFile, symlinkFile); err != nil { 290 | t.Fatal(err) 291 | } 292 | 293 | // Test followSymlink 294 | finalPath, info, err := followSymlink(symlinkFile) 295 | if err != nil { 296 | t.Fatalf("followSymlink failed: %v", err) 297 | } 298 | 299 | if finalPath != fixSeparators(targetFile) { 300 | t.Errorf("expected final path %s, got %s", fixSeparators(targetFile), finalPath) 301 | } 302 | 303 | if info.IsDir() { 304 | t.Error("expected file, got directory") 305 | } 306 | 307 | if info.Mode()&os.ModeSymlink != 0 { 308 | t.Error("expected regular file, got symlink") 309 | } 310 | }) 311 | 312 | t.Run("chain of symlinks", func(t *testing.T) { 313 | // Create a regular file 314 | targetFile := filepath.Join(tmpDir, "chain_target.txt") 315 | if err := os.WriteFile(targetFile, []byte("chain content"), 0644); err != nil { 316 | t.Fatal(err) 317 | } 318 | 319 | // Create first symlink pointing to the file 320 | link1 := filepath.Join(tmpDir, "chain_link1.txt") 321 | if err := os.Symlink(targetFile, link1); err != nil { 322 | t.Fatal(err) 323 | } 324 | 325 | // Create second symlink pointing to first symlink 326 | link2 := filepath.Join(tmpDir, "chain_link2.txt") 327 | if err := os.Symlink(link1, link2); err != nil { 328 | t.Fatal(err) 329 | } 330 | 331 | // Test followSymlink on the chain 332 | finalPath, info, err := followSymlink(link2) 333 | if err != nil { 334 | t.Fatalf("followSymlink failed: %v", err) 335 | } 336 | 337 | if finalPath != fixSeparators(targetFile) { 338 | t.Errorf("expected final path %s, got %s", fixSeparators(targetFile), finalPath) 339 | } 340 | 341 | if info.Mode()&os.ModeSymlink != 0 { 342 | t.Error("expected regular file, got symlink") 343 | } 344 | }) 345 | 346 | t.Run("symlink loop detection", func(t *testing.T) { 347 | // Create circular symlinks 348 | loop1 := filepath.Join(tmpDir, "loop1.txt") 349 | loop2 := filepath.Join(tmpDir, "loop2.txt") 350 | 351 | // Create symlinks that point to each other 352 | if err := os.Symlink(loop2, loop1); err != nil { 353 | t.Fatal(err) 354 | } 355 | if err := os.Symlink(loop1, loop2); err != nil { 356 | t.Fatal(err) 357 | } 358 | 359 | // Test followSymlink should detect the loop 360 | _, _, err := followSymlink(loop1) 361 | if err == nil { 362 | t.Error("expected error for symlink loop, got nil") 363 | } 364 | if !strings.Contains(err.Error(), "symlink loop") { 365 | t.Errorf("expected 'symlink loop' error, got: %v", err) 366 | } 367 | }) 368 | 369 | t.Run("relative path symlink", func(t *testing.T) { 370 | // Create subdirectory 371 | subDir := filepath.Join(tmpDir, "subdir") 372 | if err := os.Mkdir(subDir, 0755); err != nil { 373 | t.Fatal(err) 374 | } 375 | 376 | // Create target file in subdirectory 377 | targetFile := filepath.Join(tmpDir, "relative_target.txt") 378 | if err := os.WriteFile(targetFile, []byte("relative content"), 0644); err != nil { 379 | t.Fatal(err) 380 | } 381 | 382 | // Create symlink with relative path from tmpDir to subdir/target 383 | symlinkFile := filepath.Join(subDir, "relative_link.txt") 384 | if err := os.Symlink("../relative_target.txt", symlinkFile); err != nil { 385 | t.Fatal(err) 386 | } 387 | 388 | // Test followSymlink 389 | finalPath, info, err := followSymlink(symlinkFile) 390 | if err != nil { 391 | t.Fatalf("followSymlink failed: %v", err) 392 | } 393 | 394 | if finalPath != fixSeparators(targetFile) { 395 | t.Errorf("expected final path %s, got %s", targetFile, finalPath) 396 | } 397 | 398 | if info.Mode()&os.ModeSymlink != 0 { 399 | t.Error("expected regular file, got symlink") 400 | } 401 | }) 402 | 403 | t.Run("absolute path symlink", func(t *testing.T) { 404 | // Create target file 405 | targetFile := filepath.Join(tmpDir, "abs_target.txt") 406 | if err := os.WriteFile(targetFile, []byte("absolute content"), 0644); err != nil { 407 | t.Fatal(err) 408 | } 409 | 410 | // Create symlink with absolute path 411 | symlinkFile := filepath.Join(tmpDir, "abs_link.txt") 412 | if err := os.Symlink(targetFile, symlinkFile); err != nil { 413 | t.Fatal(err) 414 | } 415 | 416 | // Test followSymlink 417 | finalPath, info, err := followSymlink(symlinkFile) 418 | if err != nil { 419 | t.Fatalf("followSymlink failed: %v", err) 420 | } 421 | 422 | if finalPath != fixSeparators(targetFile) { 423 | t.Errorf("expected final path %s, got %s", fixSeparators(targetFile), finalPath) 424 | } 425 | 426 | if info.Mode()&os.ModeSymlink != 0 { 427 | t.Error("expected regular file, got symlink") 428 | } 429 | }) 430 | 431 | t.Run("broken symlink", func(t *testing.T) { 432 | // Create symlink pointing to non-existent file 433 | brokenLink := filepath.Join(tmpDir, "broken_link.txt") 434 | nonExistentTarget := filepath.Join(tmpDir, "nonexistent.txt") 435 | if err := os.Symlink(nonExistentTarget, brokenLink); err != nil { 436 | t.Fatal(err) 437 | } 438 | 439 | // Test followSymlink should return error 440 | _, _, err := followSymlink(brokenLink) 441 | if err == nil { 442 | t.Error("expected error for broken symlink, got nil") 443 | } 444 | if !strings.Contains(err.Error(), "statting dereferenced symlink") { 445 | t.Errorf("expected 'statting dereferenced symlink' error, got: %v", err) 446 | } 447 | }) 448 | 449 | t.Run("symlink to directory", func(t *testing.T) { 450 | // Create target directory 451 | targetDir := filepath.Join(tmpDir, "target_dir") 452 | if err := os.Mkdir(targetDir, 0755); err != nil { 453 | t.Fatal(err) 454 | } 455 | 456 | // Create symlink to directory 457 | symlinkDir := filepath.Join(tmpDir, "link_dir") 458 | if err := os.Symlink(targetDir, symlinkDir); err != nil { 459 | t.Fatal(err) 460 | } 461 | 462 | // Test followSymlink 463 | finalPath, info, err := followSymlink(symlinkDir) 464 | if err != nil { 465 | t.Fatalf("followSymlink failed: %v", err) 466 | } 467 | 468 | if finalPath != fixSeparators(targetDir) { 469 | t.Errorf("expected final path %s, got %s", fixSeparators(targetDir), finalPath) 470 | } 471 | 472 | if !info.IsDir() { 473 | t.Error("expected directory, got file") 474 | } 475 | 476 | if info.Mode()&os.ModeSymlink != 0 { 477 | t.Error("expected regular directory, got symlink") 478 | } 479 | }) 480 | 481 | t.Run("maximum symlink depth exceeded", func(t *testing.T) { 482 | // Create target file 483 | targetFile := filepath.Join(tmpDir, "depth_target.txt") 484 | if err := os.WriteFile(targetFile, []byte("depth content"), 0644); err != nil { 485 | t.Fatal(err) 486 | } 487 | 488 | // Create a chain of 41 symlinks (exceeding the limit of 40) 489 | prevLink := targetFile 490 | var links []string 491 | for i := 0; i < 41; i++ { 492 | linkName := filepath.Join(tmpDir, fmt.Sprintf("depth_link_%d.txt", i)) 493 | if err := os.Symlink(prevLink, linkName); err != nil { 494 | t.Fatal(err) 495 | } 496 | links = append(links, linkName) 497 | prevLink = linkName 498 | } 499 | 500 | // Test followSymlink should return depth error 501 | _, _, err := followSymlink(links[len(links)-1]) 502 | if err == nil { 503 | t.Error("expected error for maximum depth exceeded, got nil") 504 | } 505 | if !strings.Contains(err.Error(), "maximum symlink depth") { 506 | t.Errorf("expected 'maximum symlink depth' error, got: %v", err) 507 | } 508 | if !strings.Contains(err.Error(), "40") { 509 | t.Errorf("expected error to mention depth limit of 40, got: %v", err) 510 | } 511 | }) 512 | } 513 | -------------------------------------------------------------------------------- /brotli.go: -------------------------------------------------------------------------------- 1 | package archives 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "io" 7 | "strings" 8 | 9 | "github.com/andybalholm/brotli" 10 | ) 11 | 12 | func init() { 13 | RegisterFormat(Brotli{}) 14 | } 15 | 16 | // Brotli facilitates brotli compression. 17 | type Brotli struct { 18 | Quality int 19 | } 20 | 21 | func (Brotli) Extension() string { return ".br" } 22 | func (Brotli) MediaType() string { return "application/x-br" } 23 | 24 | func (br Brotli) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { 25 | var mr MatchResult 26 | 27 | // match filename 28 | if strings.Contains(strings.ToLower(filename), br.Extension()) { 29 | mr.ByName = true 30 | } 31 | 32 | if stream != nil { 33 | // brotli does not have well-defined file headers or a magic number; 34 | // the best way to match the stream is probably to try decoding part 35 | // of it, but we'll just have to guess a large-enough size that is 36 | // still small enough for the smallest streams we'll encounter 37 | input := &bytes.Buffer{} 38 | r := brotli.NewReader(io.TeeReader(stream, input)) 39 | buf := make([]byte, 16) 40 | 41 | // First gauntlet - can the reader even read 16 bytes without an error? 42 | n, err := r.Read(buf) 43 | if err != nil { 44 | return mr, nil 45 | } 46 | buf = buf[:n] 47 | inputBytes := input.Bytes() 48 | 49 | // Second gauntlet - do the decompressed bytes exist in the raw input? 50 | // If they don't appear in the first 4 bytes (to account for the up to 51 | // 32 bits of initial brotli header) or at all, then chances are the 52 | // input was compressed. 53 | idx := bytes.Index(inputBytes, buf) 54 | if idx < 4 { 55 | mr.ByStream = true 56 | return mr, nil 57 | } 58 | 59 | // The input is assumed to be compressed data, but we still can't be 100% sure. 60 | // Try reading more data until we encounter an error. 61 | for n < 128 { 62 | nn, err := r.Read(buf) 63 | switch err { 64 | case io.EOF: 65 | // If we've reached EOF, we return assuming it's compressed. 66 | mr.ByStream = true 67 | return mr, nil 68 | case io.ErrUnexpectedEOF: 69 | // If we've encountered a short read, that's probably due to invalid reads due 70 | // to the fact it isn't compressed data at all. 71 | return mr, nil 72 | case nil: 73 | // No error, no problem. Continue reading. 74 | n += nn 75 | default: 76 | // If we encounter any other error, return it. 77 | return mr, nil 78 | } 79 | } 80 | 81 | // If we haven't encountered an error by now, the input is probably compressed. 82 | mr.ByStream = true 83 | } 84 | 85 | return mr, nil 86 | } 87 | 88 | func (br Brotli) OpenWriter(w io.Writer) (io.WriteCloser, error) { 89 | return brotli.NewWriterLevel(w, br.Quality), nil 90 | } 91 | 92 | func (Brotli) OpenReader(r io.Reader) (io.ReadCloser, error) { 93 | return io.NopCloser(brotli.NewReader(r)), nil 94 | } 95 | -------------------------------------------------------------------------------- /brotli_test.go: -------------------------------------------------------------------------------- 1 | package archives 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "testing" 7 | ) 8 | 9 | func TestBrotli_Match_Stream(t *testing.T) { 10 | testTxt := []byte("this is text, but it has to be long enough to match brotli which doesn't have a magic number") 11 | type testcase struct { 12 | name string 13 | input []byte 14 | matches bool 15 | } 16 | for _, tc := range []testcase{ 17 | { 18 | name: "uncompressed yaml", 19 | input: []byte("---\nthis-is-not-brotli: \"it is actually yaml\""), 20 | matches: false, 21 | }, 22 | { 23 | name: "uncompressed text", 24 | input: testTxt, 25 | matches: false, 26 | }, 27 | { 28 | name: "text compressed with brotli quality 0", 29 | input: compress(t, ".br", testTxt, Brotli{Quality: 0}.OpenWriter), 30 | matches: true, 31 | }, 32 | { 33 | name: "text compressed with brotli quality 1", 34 | input: compress(t, ".br", testTxt, Brotli{Quality: 1}.OpenWriter), 35 | matches: true, 36 | }, 37 | { 38 | name: "text compressed with brotli quality 2", 39 | input: compress(t, ".br", testTxt, Brotli{Quality: 2}.OpenWriter), 40 | matches: true, 41 | }, 42 | { 43 | name: "text compressed with brotli quality 3", 44 | input: compress(t, ".br", testTxt, Brotli{Quality: 3}.OpenWriter), 45 | matches: true, 46 | }, 47 | { 48 | name: "text compressed with brotli quality 4", 49 | input: compress(t, ".br", testTxt, Brotli{Quality: 4}.OpenWriter), 50 | matches: true, 51 | }, 52 | { 53 | name: "text compressed with brotli quality 5", 54 | input: compress(t, ".br", testTxt, Brotli{Quality: 5}.OpenWriter), 55 | matches: true, 56 | }, 57 | { 58 | name: "text compressed with brotli quality 6", 59 | input: compress(t, ".br", testTxt, Brotli{Quality: 6}.OpenWriter), 60 | matches: true, 61 | }, 62 | { 63 | name: "text compressed with brotli quality 7", 64 | input: compress(t, ".br", testTxt, Brotli{Quality: 7}.OpenWriter), 65 | matches: true, 66 | }, 67 | { 68 | name: "text compressed with brotli quality 8", 69 | input: compress(t, ".br", testTxt, Brotli{Quality: 8}.OpenWriter), 70 | matches: true, 71 | }, 72 | { 73 | name: "text compressed with brotli quality 9", 74 | input: compress(t, ".br", testTxt, Brotli{Quality: 9}.OpenWriter), 75 | matches: true, 76 | }, 77 | { 78 | name: "text compressed with brotli quality 10", 79 | input: compress(t, ".br", testTxt, Brotli{Quality: 10}.OpenWriter), 80 | matches: true, 81 | }, 82 | { 83 | name: "text compressed with brotli quality 11", 84 | input: compress(t, ".br", testTxt, Brotli{Quality: 11}.OpenWriter), 85 | matches: true, 86 | }, 87 | } { 88 | t.Run(tc.name, func(t *testing.T) { 89 | r := bytes.NewBuffer(tc.input) 90 | 91 | mr, err := Brotli{}.Match(context.Background(), "", r) 92 | if err != nil { 93 | t.Errorf("Brotli.OpenReader() error = %v", err) 94 | return 95 | } 96 | 97 | if mr.ByStream != tc.matches { 98 | t.Logf("input: %s", tc.input) 99 | t.Error("Brotli.Match() expected ByStream to be", tc.matches, "but got", mr.ByStream) 100 | } 101 | }) 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /bz2.go: -------------------------------------------------------------------------------- 1 | package archives 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "io" 7 | "strings" 8 | 9 | "github.com/dsnet/compress/bzip2" 10 | ) 11 | 12 | func init() { 13 | RegisterFormat(Bz2{}) 14 | } 15 | 16 | // Bz2 facilitates bzip2 compression. 17 | type Bz2 struct { 18 | CompressionLevel int 19 | } 20 | 21 | func (Bz2) Extension() string { return ".bz2" } 22 | func (Bz2) MediaType() string { return "application/x-bzip2" } 23 | 24 | func (bz Bz2) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { 25 | var mr MatchResult 26 | 27 | // match filename 28 | if strings.Contains(strings.ToLower(filename), bz.Extension()) { 29 | mr.ByName = true 30 | } 31 | 32 | // match file header 33 | buf, err := readAtMost(stream, len(bzip2Header)) 34 | if err != nil { 35 | return mr, err 36 | } 37 | mr.ByStream = bytes.Equal(buf, bzip2Header) 38 | 39 | return mr, nil 40 | } 41 | 42 | func (bz Bz2) OpenWriter(w io.Writer) (io.WriteCloser, error) { 43 | return bzip2.NewWriter(w, &bzip2.WriterConfig{ 44 | Level: bz.CompressionLevel, 45 | }) 46 | } 47 | 48 | func (Bz2) OpenReader(r io.Reader) (io.ReadCloser, error) { 49 | return bzip2.NewReader(r, nil) 50 | } 51 | 52 | var bzip2Header = []byte("BZh") 53 | -------------------------------------------------------------------------------- /formats.go: -------------------------------------------------------------------------------- 1 | package archives 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "errors" 7 | "fmt" 8 | "io" 9 | "path" 10 | "path/filepath" 11 | "strings" 12 | ) 13 | 14 | // RegisterFormat registers a format. It should be called during init. 15 | // Duplicate formats by name are not allowed and will panic. 16 | func RegisterFormat(format Format) { 17 | name := strings.Trim(strings.ToLower(format.Extension()), ".") 18 | if _, ok := formats[name]; ok { 19 | panic("format " + name + " is already registered") 20 | } 21 | formats[name] = format 22 | } 23 | 24 | // Identify iterates the registered formats and returns the one that 25 | // matches the given filename and/or stream. It is capable of identifying 26 | // compressed files (.gz, .xz...), archive files (.tar, .zip...), and 27 | // compressed archive files (tar.gz, tar.bz2...). The returned Format 28 | // value can be type-asserted to ascertain its capabilities. 29 | // 30 | // If no matching formats were found, special error NoMatch is returned. 31 | // 32 | // If stream is nil then it will only match on file name and the 33 | // returned io.Reader will be nil. 34 | // 35 | // If stream is non-nil, it will be returned in the same read position 36 | // as it was before Identify() was called, by virtue of buffering the 37 | // peeked bytes. However, if the stream is an io.Seeker, Seek() must 38 | // work, no extra buffering will be performed, and the original input 39 | // value will be returned at the original position by seeking. 40 | func Identify(ctx context.Context, filename string, stream io.Reader) (Format, io.Reader, error) { 41 | var compression Compression 42 | var archival Archival 43 | var extraction Extraction 44 | 45 | filename = path.Base(filepath.ToSlash(filename)) 46 | 47 | rewindableStream, err := newRewindReader(stream) 48 | if err != nil { 49 | return nil, nil, err 50 | } 51 | 52 | // try compression format first, since that's the outer "layer" if combined 53 | for name, format := range formats { 54 | cf, isCompression := format.(Compression) 55 | if !isCompression { 56 | continue 57 | } 58 | 59 | matchResult, err := identifyOne(ctx, format, filename, rewindableStream, nil) 60 | if err != nil { 61 | return nil, rewindableStream.reader(), fmt.Errorf("matching %s: %w", name, err) 62 | } 63 | 64 | // if matched, wrap input stream with decompression 65 | // so we can see if it contains an archive within 66 | if matchResult.Matched() { 67 | compression = cf 68 | break 69 | } 70 | } 71 | 72 | // try archival and extraction formats next 73 | for name, format := range formats { 74 | ar, isArchive := format.(Archival) 75 | ex, isExtract := format.(Extraction) 76 | if !isArchive && !isExtract { 77 | continue 78 | } 79 | 80 | matchResult, err := identifyOne(ctx, format, filename, rewindableStream, compression) 81 | if err != nil { 82 | return nil, rewindableStream.reader(), fmt.Errorf("matching %s: %w", name, err) 83 | } 84 | 85 | if matchResult.Matched() { 86 | archival = ar 87 | extraction = ex 88 | break 89 | } 90 | } 91 | 92 | // the stream should be rewound by identifyOne; then return the most specific type of match 93 | bufferedStream := rewindableStream.reader() 94 | switch { 95 | case compression != nil && archival == nil && extraction == nil: 96 | return compression, bufferedStream, nil 97 | case compression == nil && archival != nil && extraction == nil: 98 | return archival, bufferedStream, nil 99 | case compression == nil && archival == nil && extraction != nil: 100 | return extraction, bufferedStream, nil 101 | case compression == nil && archival != nil && extraction != nil: 102 | // archival and extraction are always set together, so they must be the same 103 | return archival, bufferedStream, nil 104 | case compression != nil && extraction != nil: 105 | // in practice, this is only used for compressed tar files, and the tar format can 106 | // both read and write, so the archival value should always work too; but keep in 107 | // mind that Identify() is used on existing files to be read, not new files to write 108 | return CompressedArchive{archival, extraction, compression}, bufferedStream, nil 109 | default: 110 | return nil, bufferedStream, NoMatch 111 | } 112 | } 113 | 114 | func identifyOne(ctx context.Context, format Format, filename string, stream *rewindReader, comp Compression) (mr MatchResult, err error) { 115 | defer stream.rewind() 116 | 117 | if filename == "." { 118 | filename = "" 119 | } 120 | 121 | // if looking within a compressed format, wrap the stream in a 122 | // reader that can decompress it so we can match the "inner" format 123 | // (yes, we have to make a new reader every time we do a match, 124 | // because we reset/seek the stream each time and that can mess up 125 | // the compression reader's state if we don't discard it also) 126 | if comp != nil && stream != nil { 127 | decompressedStream, openErr := comp.OpenReader(stream) 128 | if openErr != nil { 129 | return MatchResult{}, openErr 130 | } 131 | defer decompressedStream.Close() 132 | mr, err = format.Match(ctx, filename, decompressedStream) 133 | } else { 134 | // Make sure we pass a nil io.Reader not a *rewindReader(nil) 135 | var r io.Reader 136 | if stream != nil { 137 | r = stream 138 | } 139 | mr, err = format.Match(ctx, filename, r) 140 | } 141 | 142 | // if the error is EOF, we can just ignore it. 143 | // Just means we have a small input file. 144 | if errors.Is(err, io.EOF) { 145 | err = nil 146 | } 147 | return mr, err 148 | } 149 | 150 | // readAtMost reads at most n bytes from the stream. A nil, empty, or short 151 | // stream is not an error. The returned slice of bytes may have length < n 152 | // without an error. 153 | func readAtMost(stream io.Reader, n int) ([]byte, error) { 154 | if stream == nil || n <= 0 { 155 | return []byte{}, nil 156 | } 157 | 158 | buf := make([]byte, n) 159 | nr, err := io.ReadFull(stream, buf) 160 | 161 | // Return the bytes read if there was no error OR if the 162 | // error was EOF (stream was empty) or UnexpectedEOF (stream 163 | // had less than n). We ignore those errors because we aren't 164 | // required to read the full n bytes; so an empty or short 165 | // stream is not actually an error. 166 | if err == nil || 167 | errors.Is(err, io.EOF) || 168 | errors.Is(err, io.ErrUnexpectedEOF) { 169 | return buf[:nr], nil 170 | } 171 | 172 | return nil, err 173 | } 174 | 175 | // CompressedArchive represents an archive which is compressed externally 176 | // (for example, a gzipped tar file, .tar.gz.) It combines a compression 177 | // format on top of an archival/extraction format and provides both 178 | // functionalities in a single type, allowing archival and extraction 179 | // operations transparently through compression and decompression. However, 180 | // compressed archives have some limitations; for example, files cannot be 181 | // inserted/appended because of complexities with modifying existing 182 | // compression state (perhaps this could be overcome, but I'm not about to 183 | // try it). 184 | type CompressedArchive struct { 185 | Archival 186 | Extraction 187 | Compression 188 | } 189 | 190 | // Name returns a concatenation of the archive and compression format extensions. 191 | func (ca CompressedArchive) Extension() string { 192 | var name string 193 | if ca.Archival != nil { 194 | name += ca.Archival.Extension() 195 | } else if ca.Extraction != nil { 196 | name += ca.Extraction.Extension() 197 | } 198 | name += ca.Compression.Extension() 199 | return name 200 | } 201 | 202 | // MediaType returns the compression format's MIME type, since 203 | // a compressed archive is fundamentally a compressed file. 204 | func (ca CompressedArchive) MediaType() string { return ca.Compression.MediaType() } 205 | 206 | // Match matches if the input matches both the compression and archival/extraction format. 207 | func (ca CompressedArchive) Match(ctx context.Context, filename string, stream io.Reader) (MatchResult, error) { 208 | var conglomerate MatchResult 209 | 210 | if ca.Compression != nil { 211 | matchResult, err := ca.Compression.Match(ctx, filename, stream) 212 | if err != nil { 213 | return MatchResult{}, err 214 | } 215 | if !matchResult.Matched() { 216 | return matchResult, nil 217 | } 218 | 219 | // wrap the reader with the decompressor so we can 220 | // attempt to match the archive by reading the stream 221 | rc, err := ca.Compression.OpenReader(stream) 222 | if err != nil { 223 | return matchResult, err 224 | } 225 | defer rc.Close() 226 | stream = rc 227 | 228 | conglomerate = matchResult 229 | } 230 | 231 | if ca.Archival != nil { 232 | matchResult, err := ca.Archival.Match(ctx, filename, stream) 233 | if err != nil { 234 | return MatchResult{}, err 235 | } 236 | if !matchResult.Matched() { 237 | return matchResult, nil 238 | } 239 | conglomerate.ByName = conglomerate.ByName || matchResult.ByName 240 | conglomerate.ByStream = conglomerate.ByStream || matchResult.ByStream 241 | } 242 | 243 | return conglomerate, nil 244 | } 245 | 246 | // Archive writes an archive to the output stream while compressing the result. 247 | func (ca CompressedArchive) Archive(ctx context.Context, output io.Writer, files []FileInfo) error { 248 | if ca.Archival == nil { 249 | return fmt.Errorf("no archival format") 250 | } 251 | if ca.Compression != nil { 252 | wc, err := ca.Compression.OpenWriter(output) 253 | if err != nil { 254 | return err 255 | } 256 | defer wc.Close() 257 | output = wc 258 | } 259 | return ca.Archival.Archive(ctx, output, files) 260 | } 261 | 262 | // ArchiveAsync adds files to the output archive while compressing the result asynchronously. 263 | func (ca CompressedArchive) ArchiveAsync(ctx context.Context, output io.Writer, jobs <-chan ArchiveAsyncJob) error { 264 | if ca.Archival == nil { 265 | return fmt.Errorf("no archival format") 266 | } 267 | do, ok := ca.Archival.(ArchiverAsync) 268 | if !ok { 269 | return fmt.Errorf("%T archive does not support async writing", ca.Archival) 270 | } 271 | if ca.Compression != nil { 272 | wc, err := ca.Compression.OpenWriter(output) 273 | if err != nil { 274 | return err 275 | } 276 | defer wc.Close() 277 | output = wc 278 | } 279 | return do.ArchiveAsync(ctx, output, jobs) 280 | } 281 | 282 | // Extract reads files out of a compressed archive while decompressing the results. 283 | func (ca CompressedArchive) Extract(ctx context.Context, sourceArchive io.Reader, handleFile FileHandler) error { 284 | if ca.Extraction == nil { 285 | return fmt.Errorf("no extraction format") 286 | } 287 | if ca.Compression != nil { 288 | rc, err := ca.Compression.OpenReader(sourceArchive) 289 | if err != nil { 290 | return err 291 | } 292 | defer rc.Close() 293 | sourceArchive = rc 294 | } 295 | return ca.Extraction.Extract(ctx, sourceArchive, handleFile) 296 | } 297 | 298 | // MatchResult returns true if the format was matched either 299 | // by name, stream, or both. Name usually refers to matching 300 | // by file extension, and stream usually refers to reading 301 | // the first few bytes of the stream (its header). A stream 302 | // match is generally stronger, as filenames are not always 303 | // indicative of their contents if they even exist at all. 304 | type MatchResult struct { 305 | ByName, ByStream bool 306 | } 307 | 308 | // Matched returns true if a match was made by either name or stream. 309 | func (mr MatchResult) Matched() bool { return mr.ByName || mr.ByStream } 310 | 311 | func (mr MatchResult) String() string { 312 | return fmt.Sprintf("{ByName=%v ByStream=%v}", mr.ByName, mr.ByStream) 313 | } 314 | 315 | // rewindReader is a Reader that can be rewound (reset) to re-read what 316 | // was already read and then continue to read more from the underlying 317 | // stream. When no more rewinding is necessary, call reader() to get a 318 | // new reader that first reads the buffered bytes, then continues to 319 | // read from the stream. This is useful for "peeking" a stream an 320 | // arbitrary number of bytes. Loosely based on the Connection type 321 | // from https://github.com/mholt/caddy-l4. 322 | // 323 | // If the reader is also an io.Seeker, no buffer is used, and instead 324 | // the stream seeks back to the starting position. 325 | type rewindReader struct { 326 | io.Reader 327 | start int64 328 | buf *bytes.Buffer 329 | bufReader io.Reader 330 | } 331 | 332 | func newRewindReader(r io.Reader) (*rewindReader, error) { 333 | if r == nil { 334 | return nil, nil 335 | } 336 | 337 | rr := &rewindReader{Reader: r} 338 | 339 | // avoid buffering if we have a seeker we can use 340 | if seeker, ok := r.(io.Seeker); ok { 341 | var err error 342 | rr.start, err = seeker.Seek(0, io.SeekCurrent) 343 | if err != nil { 344 | return nil, fmt.Errorf("seek to determine current position: %w", err) 345 | } 346 | } else { 347 | rr.buf = new(bytes.Buffer) 348 | } 349 | 350 | return rr, nil 351 | } 352 | 353 | func (rr *rewindReader) Read(p []byte) (n int, err error) { 354 | if rr == nil { 355 | panic("reading from nil rewindReader") 356 | } 357 | 358 | // if there is a buffer we should read from, start 359 | // with that; we only read from the underlying stream 360 | // after the buffer has been "depleted" 361 | if rr.bufReader != nil { 362 | n, err = rr.bufReader.Read(p) 363 | if err == io.EOF { 364 | rr.bufReader = nil 365 | err = nil 366 | } 367 | if n == len(p) { 368 | return 369 | } 370 | } 371 | 372 | // buffer has been depleted or we are not using one, 373 | // so read from underlying stream 374 | nr, err := rr.Reader.Read(p[n:]) 375 | 376 | // anything that was read needs to be written to 377 | // the buffer (if used), even if there was an error 378 | if nr > 0 && rr.buf != nil { 379 | if nw, errw := rr.buf.Write(p[n : n+nr]); errw != nil { 380 | return nw, errw 381 | } 382 | } 383 | 384 | // up to now, n was how many bytes were read from 385 | // the buffer, and nr was how many bytes were read 386 | // from the stream; add them to return total count 387 | n += nr 388 | 389 | return 390 | } 391 | 392 | // rewind resets the stream to the beginning by causing 393 | // Read() to start reading from the beginning of the 394 | // stream, or, if buffering, the buffered bytes. 395 | func (rr *rewindReader) rewind() { 396 | if rr == nil { 397 | return 398 | } 399 | if ras, ok := rr.Reader.(io.Seeker); ok { 400 | if _, err := ras.Seek(rr.start, io.SeekStart); err == nil { 401 | return 402 | } 403 | } 404 | rr.bufReader = bytes.NewReader(rr.buf.Bytes()) 405 | } 406 | 407 | // reader returns a reader that reads first from the buffered 408 | // bytes (if buffering), then from the underlying stream; if a 409 | // Seeker, the stream will be seeked back to the start. After 410 | // calling this, no more rewinding is allowed since reads from 411 | // the stream are not recorded, so rewinding properly is impossible. 412 | // If the underlying reader implements io.Seeker, then the 413 | // underlying reader will be used directly. 414 | func (rr *rewindReader) reader() io.Reader { 415 | if rr == nil { 416 | return nil 417 | } 418 | if ras, ok := rr.Reader.(io.Seeker); ok { 419 | if _, err := ras.Seek(rr.start, io.SeekStart); err == nil { 420 | return rr.Reader 421 | } 422 | } 423 | return io.MultiReader(bytes.NewReader(rr.buf.Bytes()), rr.Reader) 424 | } 425 | 426 | // NoMatch is a special error returned if there are no matching formats. 427 | var NoMatch = fmt.Errorf("no formats matched") 428 | 429 | // Registered formats. 430 | var formats = make(map[string]Format) 431 | 432 | // Interface guards 433 | var ( 434 | _ Format = (*CompressedArchive)(nil) 435 | _ Archiver = (*CompressedArchive)(nil) 436 | _ ArchiverAsync = (*CompressedArchive)(nil) 437 | _ Extractor = (*CompressedArchive)(nil) 438 | _ Compressor = (*CompressedArchive)(nil) 439 | _ Decompressor = (*CompressedArchive)(nil) 440 | ) 441 | -------------------------------------------------------------------------------- /formats_test.go: -------------------------------------------------------------------------------- 1 | package archives 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "errors" 7 | "io" 8 | "io/fs" 9 | "math/rand" 10 | "os" 11 | "strings" 12 | "testing" 13 | "time" 14 | ) 15 | 16 | func TestRewindReader(t *testing.T) { 17 | data := "the header\nthe body\n" 18 | 19 | r, err := newRewindReader(strings.NewReader(data)) 20 | if err != nil { 21 | t.Errorf("creating rewindReader: %v", err) 22 | } 23 | 24 | buf := make([]byte, 10) // enough for 'the header' 25 | 26 | // test rewinding reads 27 | for i := 0; i < 10; i++ { 28 | r.rewind() 29 | n, err := r.Read(buf) 30 | if err != nil { 31 | t.Errorf("Read failed: %s", err) 32 | } 33 | if string(buf[:n]) != "the header" { 34 | t.Errorf("iteration %d: expected 'the header' but got '%s' (n=%d)", i, string(buf[:n]), n) 35 | } 36 | } 37 | 38 | // get the reader from header reader and make sure we can read all of the data out 39 | r.rewind() 40 | finalReader := r.reader() 41 | buf = make([]byte, len(data)) 42 | n, err := io.ReadFull(finalReader, buf) 43 | if err != nil { 44 | t.Errorf("ReadFull failed: %s (n=%d)", err, n) 45 | } 46 | if string(buf) != data { 47 | t.Errorf("expected '%s' but got '%s'", string(data), string(buf)) 48 | } 49 | } 50 | 51 | func TestCompression(t *testing.T) { 52 | seed := time.Now().UnixNano() 53 | t.Logf("seed: %d", seed) 54 | r := rand.New(rand.NewSource(seed)) 55 | 56 | contents := make([]byte, 1024) 57 | r.Read(contents) 58 | 59 | compressed := new(bytes.Buffer) 60 | 61 | testOK := func(t *testing.T, comp Compression, testFilename string) { 62 | // compress into buffer 63 | compressed.Reset() 64 | wc, err := comp.OpenWriter(compressed) 65 | checkErr(t, err, "opening writer") 66 | _, err = wc.Write(contents) 67 | checkErr(t, err, "writing contents") 68 | checkErr(t, wc.Close(), "closing writer") 69 | 70 | // make sure Identify correctly chooses this compression method 71 | format, stream, err := Identify(context.Background(), testFilename, compressed) 72 | checkErr(t, err, "identifying") 73 | if format.Extension() != comp.Extension() { 74 | t.Errorf("expected format %s but got %s", comp.Extension(), format.Extension()) 75 | } 76 | 77 | // read the contents back out and compare 78 | decompReader, err := format.(Decompressor).OpenReader(stream) 79 | checkErr(t, err, "opening with decompressor '%s'", format.Extension()) 80 | data, err := io.ReadAll(decompReader) 81 | checkErr(t, err, "reading decompressed data") 82 | checkErr(t, decompReader.Close(), "closing decompressor") 83 | if !bytes.Equal(data, contents) { 84 | t.Errorf("not equal to original") 85 | } 86 | } 87 | 88 | var cannotIdentifyFromStream = map[string]bool{Brotli{}.Extension(): true} 89 | 90 | for _, f := range formats { 91 | // only test compressors 92 | comp, ok := f.(Compression) 93 | if !ok { 94 | continue 95 | } 96 | 97 | t.Run(f.Extension()+"_with_extension", func(t *testing.T) { 98 | testOK(t, comp, "file"+f.Extension()) 99 | }) 100 | if !cannotIdentifyFromStream[f.Extension()] { 101 | t.Run(f.Extension()+"_without_extension", func(t *testing.T) { 102 | testOK(t, comp, "") 103 | }) 104 | } 105 | } 106 | } 107 | 108 | func checkErr(t *testing.T, err error, msgFmt string, args ...any) { 109 | t.Helper() 110 | if err == nil { 111 | return 112 | } 113 | args = append(args, err) 114 | t.Fatalf(msgFmt+": %s", args...) 115 | } 116 | 117 | func TestIdentifyDoesNotMatchContentFromTrimmedKnownHeaderHaving0Suffix(t *testing.T) { 118 | // Using the outcome of `n, err := io.ReadFull(stream, buf)` without minding n 119 | // may lead to a mis-characterization for cases with known header ending with 0x0 120 | // because the default byte value in a declared array is 0. 121 | // This test guards against those cases. 122 | tests := []struct { 123 | name string 124 | header []byte 125 | }{ 126 | { 127 | name: "rar_v5.0", 128 | header: rarHeaderV5_0, 129 | }, 130 | { 131 | name: "rar_v1.5", 132 | header: rarHeaderV1_5, 133 | }, 134 | { 135 | name: "xz", 136 | header: xzHeader, 137 | }, 138 | } 139 | for _, tt := range tests { 140 | t.Run(tt.name, func(t *testing.T) { 141 | headerLen := len(tt.header) 142 | if headerLen == 0 || tt.header[headerLen-1] != 0 { 143 | t.Errorf("header expected to end with 0: header=%v", tt.header) 144 | return 145 | } 146 | headerTrimmed := tt.header[:headerLen-1] 147 | stream := bytes.NewReader(headerTrimmed) 148 | got, _, err := Identify(context.Background(), "", stream) 149 | if got != nil { 150 | t.Errorf("no Format expected for trimmed know %s header: found Format= %v", tt.name, got.Extension()) 151 | return 152 | } 153 | if !errors.Is(err, NoMatch) { 154 | t.Errorf("NoMatch expected for for trimmed know %s header: err :=%#v", tt.name, err) 155 | return 156 | } 157 | 158 | }) 159 | } 160 | } 161 | 162 | func TestIdentifyCanAssessSmallOrNoContent(t *testing.T) { 163 | type args struct { 164 | stream io.ReadSeeker 165 | } 166 | tests := []struct { 167 | name string 168 | args args 169 | }{ 170 | { 171 | name: "should return nomatch for an empty stream", 172 | args: args{ 173 | stream: bytes.NewReader([]byte{}), 174 | }, 175 | }, 176 | { 177 | name: "should return nomatch for a stream with content size less than known header", 178 | args: args{ 179 | stream: bytes.NewReader([]byte{'a'}), 180 | }, 181 | }, 182 | { 183 | name: "should return nomatch for a stream with content size greater then known header size and not supported format", 184 | args: args{ 185 | stream: bytes.NewReader([]byte(strings.Repeat("this is a txt content", 2))), 186 | }, 187 | }, 188 | } 189 | for _, tt := range tests { 190 | t.Run(tt.name, func(t *testing.T) { 191 | got, _, err := Identify(context.Background(), "", tt.args.stream) 192 | if got != nil { 193 | t.Errorf("no Format expected for non archive and not compressed stream: found Format=%#v", got) 194 | return 195 | } 196 | if !errors.Is(err, NoMatch) { 197 | t.Errorf("NoMatch expected for non archive and not compressed stream: %#v", err) 198 | return 199 | } 200 | 201 | }) 202 | } 203 | } 204 | 205 | func compress( 206 | t *testing.T, compName string, content []byte, 207 | openwriter func(w io.Writer) (io.WriteCloser, error), 208 | ) []byte { 209 | buf := bytes.NewBuffer(make([]byte, 0, 128)) 210 | cwriter, err := openwriter(buf) 211 | if err != nil { 212 | t.Errorf("fail to open compression writer: compression-name=%s, err=%#v", compName, err) 213 | return nil 214 | } 215 | _, err = cwriter.Write(content) 216 | if err != nil { 217 | cerr := cwriter.Close() 218 | t.Errorf( 219 | "fail to write using compression writer: compression-name=%s, err=%#v, close-err=%#v", 220 | compName, err, cerr) 221 | return nil 222 | } 223 | err = cwriter.Close() 224 | if err != nil { 225 | t.Errorf("fail to close compression writer: compression-name=%s, err=%#v", compName, err) 226 | return nil 227 | } 228 | return buf.Bytes() 229 | } 230 | 231 | func archive(t *testing.T, arch Archiver, fname string, fileInfo fs.FileInfo) []byte { 232 | files := []FileInfo{ 233 | {FileInfo: fileInfo, NameInArchive: "tmp.txt", 234 | Open: func() (fs.File, error) { 235 | return os.Open(fname) 236 | }}, 237 | } 238 | buf := bytes.NewBuffer(make([]byte, 0, 128)) 239 | err := arch.Archive(context.TODO(), buf, files) 240 | if err != nil { 241 | t.Errorf("fail to create archive: err=%#v", err) 242 | return nil 243 | } 244 | return buf.Bytes() 245 | 246 | } 247 | 248 | type writeNopCloser struct{ io.Writer } 249 | 250 | func (wnc writeNopCloser) Close() error { return nil } 251 | 252 | func newWriteNopCloser(w io.Writer) (io.WriteCloser, error) { 253 | return writeNopCloser{w}, nil 254 | } 255 | 256 | func newTmpTextFile(t *testing.T, content string) (string, fs.FileInfo) { 257 | tmpTxtFile, err := os.CreateTemp("", "TestIdentifyFindFormatByStreamContent-tmp-*.txt") 258 | if err != nil { 259 | t.Errorf("fail to create tmp test file for archive tests: err=%v", err) 260 | return "", nil 261 | } 262 | fname := tmpTxtFile.Name() 263 | 264 | if _, err = tmpTxtFile.Write([]byte(content)); err != nil { 265 | t.Errorf("fail to write content to tmp-txt-file: err=%#v", err) 266 | return "", nil 267 | } 268 | if err = tmpTxtFile.Close(); err != nil { 269 | t.Errorf("fail to close tmp-txt-file: err=%#v", err) 270 | return "", nil 271 | } 272 | fi, err := os.Stat(fname) 273 | if err != nil { 274 | t.Errorf("fail to get tmp-txt-file stats: err=%v", err) 275 | return "", nil 276 | } 277 | 278 | return fname, fi 279 | } 280 | 281 | func TestIdentifyFindFormatByStreamContent(t *testing.T) { 282 | tmpTxtFileName, tmpTxtFileInfo := newTmpTextFile(t, "this is text that has to be long enough for brotli to match") 283 | t.Cleanup(func() { 284 | os.RemoveAll(tmpTxtFileName) 285 | }) 286 | 287 | tests := []struct { 288 | name string 289 | content []byte 290 | openCompressionWriter func(w io.Writer) (io.WriteCloser, error) 291 | compressorName string 292 | wantFormatName string 293 | }{ 294 | { 295 | name: "should recognize brotli", 296 | openCompressionWriter: Brotli{}.OpenWriter, 297 | content: []byte("this is text, but it has to be long enough to match brotli which doesn't have a magic number"), 298 | compressorName: ".br", 299 | wantFormatName: ".br", 300 | }, 301 | { 302 | name: "should recognize bz2", 303 | openCompressionWriter: Bz2{}.OpenWriter, 304 | content: []byte("this is text"), 305 | compressorName: ".bz2", 306 | wantFormatName: ".bz2", 307 | }, 308 | { 309 | name: "should recognize gz", 310 | openCompressionWriter: Gz{}.OpenWriter, 311 | content: []byte("this is text"), 312 | compressorName: ".gz", 313 | wantFormatName: ".gz", 314 | }, 315 | { 316 | name: "should recognize lz4", 317 | openCompressionWriter: Lz4{}.OpenWriter, 318 | content: []byte("this is text"), 319 | compressorName: ".lz4", 320 | wantFormatName: ".lz4", 321 | }, 322 | { 323 | name: "should recognize lz", 324 | openCompressionWriter: Lzip{}.OpenWriter, 325 | content: []byte("this is text"), 326 | compressorName: ".lz", 327 | wantFormatName: ".lz", 328 | }, 329 | { 330 | name: "should recognize sz", 331 | openCompressionWriter: Sz{}.OpenWriter, 332 | content: []byte("this is text"), 333 | compressorName: ".sz", 334 | wantFormatName: ".sz", 335 | }, 336 | { 337 | name: "should recognize xz", 338 | openCompressionWriter: Xz{}.OpenWriter, 339 | content: []byte("this is text"), 340 | compressorName: ".xz", 341 | wantFormatName: ".xz", 342 | }, 343 | { 344 | name: "should recognize zst", 345 | openCompressionWriter: Zstd{}.OpenWriter, 346 | content: []byte("this is text"), 347 | compressorName: ".zst", 348 | wantFormatName: ".zst", 349 | }, 350 | { 351 | name: "should recognize tar", 352 | openCompressionWriter: newWriteNopCloser, 353 | content: archive(t, Tar{}, tmpTxtFileName, tmpTxtFileInfo), 354 | compressorName: "", 355 | wantFormatName: ".tar", 356 | }, 357 | { 358 | name: "should recognize tar.gz", 359 | openCompressionWriter: Gz{}.OpenWriter, 360 | content: archive(t, Tar{}, tmpTxtFileName, tmpTxtFileInfo), 361 | compressorName: ".gz", 362 | wantFormatName: ".tar.gz", 363 | }, 364 | { 365 | name: "should recognize zip", 366 | openCompressionWriter: newWriteNopCloser, 367 | content: archive(t, Zip{}, tmpTxtFileName, tmpTxtFileInfo), 368 | compressorName: "", 369 | wantFormatName: ".zip", 370 | }, 371 | { 372 | name: "should recognize rar by v5.0 header", 373 | openCompressionWriter: newWriteNopCloser, 374 | content: rarHeaderV5_0[:], 375 | compressorName: "", 376 | wantFormatName: ".rar", 377 | }, 378 | { 379 | name: "should recognize rar by v1.5 header", 380 | openCompressionWriter: newWriteNopCloser, 381 | content: rarHeaderV1_5[:], 382 | compressorName: "", 383 | wantFormatName: ".rar", 384 | }, 385 | { 386 | name: "should recognize zz", 387 | openCompressionWriter: Zlib{}.OpenWriter, 388 | content: []byte("this is text"), 389 | compressorName: ".zz", 390 | wantFormatName: ".zz", 391 | }, 392 | } 393 | for _, tt := range tests { 394 | t.Run(tt.name, func(t *testing.T) { 395 | stream := bytes.NewReader(compress(t, tt.compressorName, tt.content, tt.openCompressionWriter)) 396 | got, _, err := Identify(context.Background(), "", stream) 397 | if err != nil { 398 | t.Errorf("should have found a corresponding Format, but got err=%+v", err) 399 | return 400 | } 401 | if tt.wantFormatName != got.Extension() { 402 | t.Errorf("unexpected format found: expected=%s actual=%s", tt.wantFormatName, got.Extension()) 403 | return 404 | } 405 | 406 | }) 407 | } 408 | } 409 | 410 | func TestIdentifyAndOpenZip(t *testing.T) { 411 | f, err := os.Open("testdata/test.zip") 412 | checkErr(t, err, "opening zip") 413 | defer f.Close() 414 | 415 | format, reader, err := Identify(context.Background(), "test.zip", f) 416 | checkErr(t, err, "identifying zip") 417 | if format.Extension() != ".zip" { 418 | t.Errorf("unexpected format found: expected=.zip actual=%s", format.Extension()) 419 | } 420 | 421 | err = format.(Extractor).Extract(context.Background(), reader, func(ctx context.Context, f FileInfo) error { 422 | rc, err := f.Open() 423 | if err != nil { 424 | return err 425 | } 426 | defer rc.Close() 427 | _, err = io.ReadAll(rc) 428 | return err 429 | }) 430 | checkErr(t, err, "extracting zip") 431 | } 432 | 433 | func TestIdentifyASCIIFileStartingWithX(t *testing.T) { 434 | // Create a temporary file starting with the letter 'x' 435 | tmpFile, err := os.CreateTemp("", "TestIdentifyASCIIFileStartingWithX-tmp-*.txt") 436 | if err != nil { 437 | t.Errorf("fail to create tmp test file for archive tests: err=%v", err) 438 | } 439 | defer os.Remove(tmpFile.Name()) 440 | 441 | _, err = tmpFile.Write([]byte("xThis is a test file")) 442 | if err != nil { 443 | t.Errorf("Failed to write to temp file: %v", err) 444 | } 445 | tmpFile.Close() 446 | 447 | // Open the file and use the Identify function 448 | file, err := os.Open(tmpFile.Name()) 449 | if err != nil { 450 | t.Errorf("Failed to open temp file: %v", err) 451 | } 452 | defer file.Close() 453 | 454 | _, _, err = Identify(context.Background(), tmpFile.Name(), file) 455 | if !errors.Is(err, NoMatch) { 456 | t.Errorf("Identify failed: %v", err) 457 | } 458 | } 459 | 460 | func TestIdentifyStreamNil(t *testing.T) { 461 | format, _, err := Identify(context.Background(), "test.tar.zst", nil) 462 | checkErr(t, err, "identifying tar.zst") 463 | if format.Extension() != ".tar.zst" { 464 | t.Errorf("unexpected format found: expected=.tar.zst actual=%s", format.Extension()) 465 | } 466 | } 467 | -------------------------------------------------------------------------------- /fs_test.go: -------------------------------------------------------------------------------- 1 | package archives 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | _ "embed" 7 | "fmt" 8 | "io" 9 | "io/fs" 10 | "log" 11 | "net/http" 12 | "os" 13 | "path" 14 | "path/filepath" 15 | "reflect" 16 | "sort" 17 | "testing" 18 | ) 19 | 20 | func TestPathWithoutTopDir(t *testing.T) { 21 | for i, tc := range []struct { 22 | input, expect string 23 | }{ 24 | { 25 | input: "a/b/c", 26 | expect: "b/c", 27 | }, 28 | { 29 | input: "b/c", 30 | expect: "c", 31 | }, 32 | { 33 | input: "c", 34 | expect: "c", 35 | }, 36 | { 37 | input: "", 38 | expect: "", 39 | }, 40 | } { 41 | if actual := pathWithoutTopDir(tc.input); actual != tc.expect { 42 | t.Errorf("Test %d (input=%s): Expected '%s' but got '%s'", i, tc.input, tc.expect, actual) 43 | } 44 | } 45 | } 46 | 47 | func TestSplitPath(t *testing.T) { 48 | d := DeepFS{} 49 | for i, testCase := range []struct { 50 | input, expectedReal, expectedInner string 51 | }{ 52 | { 53 | input: "/", 54 | expectedReal: "/", 55 | expectedInner: "", 56 | }, 57 | { 58 | input: "foo", 59 | expectedReal: "foo", 60 | expectedInner: "", 61 | }, 62 | { 63 | input: "foo/bar", 64 | expectedReal: filepath.Join("foo", "bar"), 65 | expectedInner: "", 66 | }, 67 | { 68 | input: "foo.zip", 69 | expectedReal: filepath.Join("foo.zip"), 70 | expectedInner: ".", 71 | }, 72 | { 73 | input: "foo.zip/a", 74 | expectedReal: "foo.zip", 75 | expectedInner: "a", 76 | }, 77 | { 78 | input: "foo.zip/a/b", 79 | expectedReal: "foo.zip", 80 | expectedInner: "a/b", 81 | }, 82 | { 83 | input: "a/b/foobar.zip/c", 84 | expectedReal: filepath.Join("a", "b", "foobar.zip"), 85 | expectedInner: "c", 86 | }, 87 | { 88 | input: "a/foo.zip/b/test.tar", 89 | expectedReal: filepath.Join("a", "foo.zip"), 90 | expectedInner: "b/test.tar", 91 | }, 92 | { 93 | input: "a/foo.zip/b/test.tar/c", 94 | expectedReal: filepath.Join("a", "foo.zip"), 95 | expectedInner: "b/test.tar/c", 96 | }, 97 | } { 98 | actualReal, actualInner := d.SplitPath(testCase.input) 99 | if actualReal != testCase.expectedReal { 100 | t.Errorf("Test %d (input=%q): expected real path %q but got %q", i, testCase.input, testCase.expectedReal, actualReal) 101 | } 102 | if actualInner != testCase.expectedInner { 103 | t.Errorf("Test %d (input=%q): expected inner path %q but got %q", i, testCase.input, testCase.expectedInner, actualInner) 104 | } 105 | } 106 | } 107 | 108 | func TestPathContainsArchive(t *testing.T) { 109 | for i, testCase := range []struct { 110 | input string 111 | expected bool 112 | }{ 113 | { 114 | input: "", 115 | expected: false, 116 | }, 117 | { 118 | input: "foo", 119 | expected: false, 120 | }, 121 | { 122 | input: "foo.zip", 123 | expected: true, 124 | }, 125 | { 126 | input: "a/b/c.tar.gz", 127 | expected: true, 128 | }, 129 | { 130 | input: "a/b/c.tar.gz/d", 131 | expected: true, 132 | }, 133 | { 134 | input: "a/b/c.txt", 135 | expected: false, 136 | }, 137 | } { 138 | actual := PathContainsArchive(testCase.input) 139 | if actual != testCase.expected { 140 | t.Errorf("Test %d (input=%q): expected %v but got %v", i, testCase.input, testCase.expected, actual) 141 | } 142 | } 143 | } 144 | 145 | var ( 146 | //go:embed testdata/test.zip 147 | testZIP []byte 148 | //go:embed testdata/unordered.zip 149 | unorderZip []byte 150 | ) 151 | 152 | func TestSelfTar(t *testing.T) { 153 | fn := "testdata/self-tar.tar" 154 | fh, err := os.Open(fn) 155 | if err != nil { 156 | t.Errorf("Could not load test tar: %v", fn) 157 | } 158 | fstat, err := os.Stat(fn) 159 | if err != nil { 160 | t.Errorf("Could not stat test tar: %v", fn) 161 | } 162 | fsys := &ArchiveFS{ 163 | Stream: io.NewSectionReader(fh, 0, fstat.Size()), 164 | Format: Tar{}, 165 | } 166 | var count int 167 | err = fs.WalkDir(fsys, ".", func(path string, d fs.DirEntry, err error) error { 168 | if count > 10 { 169 | t.Error("walking test tar appears to be recursing in error") 170 | return fmt.Errorf("recursing tar: %v", fn) 171 | } 172 | count++ 173 | return nil 174 | }) 175 | if err != nil { 176 | t.Error(err) 177 | } 178 | } 179 | 180 | func ExampleArchiveFS_Stream() { 181 | fsys := &ArchiveFS{ 182 | Stream: io.NewSectionReader(bytes.NewReader(testZIP), 0, int64(len(testZIP))), 183 | Format: Zip{}, 184 | } 185 | // You can serve the contents in a web server: 186 | http.Handle("/static", http.StripPrefix("/static", 187 | http.FileServer(http.FS(fsys)))) 188 | 189 | // Or read the files using fs functions: 190 | dis, err := fsys.ReadDir(".") 191 | if err != nil { 192 | log.Fatal(err) 193 | } 194 | for _, di := range dis { 195 | fmt.Println(di.Name()) 196 | b, err := fs.ReadFile(fsys, path.Join(".", di.Name())) 197 | if err != nil { 198 | log.Fatal(err) 199 | } 200 | fmt.Println(bytes.Contains(b, []byte("granted"))) 201 | } 202 | // Output: 203 | // LICENSE 204 | // true 205 | } 206 | 207 | func TestArchiveFS_ReadDir(t *testing.T) { 208 | for _, tc := range []struct { 209 | name string 210 | archive ArchiveFS 211 | want map[string][]string 212 | }{ 213 | { 214 | name: "test.zip", 215 | archive: ArchiveFS{ 216 | Stream: io.NewSectionReader(bytes.NewReader(testZIP), 0, int64(len(testZIP))), 217 | Format: Zip{}, 218 | }, 219 | // unzip -l testdata/test.zip 220 | want: map[string][]string{ 221 | ".": {"LICENSE"}, 222 | }, 223 | }, 224 | { 225 | name: "unordered.zip", 226 | archive: ArchiveFS{ 227 | Stream: io.NewSectionReader(bytes.NewReader(unorderZip), 0, int64(len(unorderZip))), 228 | Format: Zip{}, 229 | }, 230 | // unzip -l testdata/unordered.zip, note entry 1/1 and 1/2 are separated by contents of directory 2 231 | want: map[string][]string{ 232 | ".": {"1", "2"}, 233 | "1": {"1", "2"}, 234 | "2": {"1"}, 235 | }, 236 | }, 237 | } { 238 | tc := tc 239 | t.Run(tc.name, func(t *testing.T) { 240 | t.Parallel() 241 | fsys := tc.archive 242 | for baseDir, wantLS := range tc.want { 243 | t.Run(fmt.Sprintf("ReadDir(%q)", baseDir), func(t *testing.T) { 244 | dis, err := fsys.ReadDir(baseDir) 245 | if err != nil { 246 | t.Error(err) 247 | } 248 | 249 | dirs := []string{} 250 | for _, di := range dis { 251 | dirs = append(dirs, di.Name()) 252 | } 253 | 254 | // Stabilize the sort order 255 | sort.Strings(dirs) 256 | 257 | if !reflect.DeepEqual(wantLS, dirs) { 258 | t.Errorf("ReadDir() got: %v, want: %v", dirs, wantLS) 259 | } 260 | }) 261 | 262 | // Uncomment to reproduce https://github.com/mholt/archiver/issues/340. 263 | t.Run(fmt.Sprintf("Open(%s)", baseDir), func(t *testing.T) { 264 | f, err := fsys.Open(baseDir) 265 | if err != nil { 266 | t.Errorf("fsys.Open(%q): %#v %s", baseDir, err, err) 267 | return 268 | } 269 | 270 | rdf, ok := f.(fs.ReadDirFile) 271 | if !ok { 272 | t.Errorf("fsys.Open(%q) did not return a fs.ReadDirFile, got: %#v", baseDir, f) 273 | } 274 | 275 | dis, err := rdf.ReadDir(-1) 276 | if err != nil { 277 | t.Error(err) 278 | } 279 | 280 | dirs := []string{} 281 | for _, di := range dis { 282 | dirs = append(dirs, di.Name()) 283 | } 284 | 285 | // Stabilize the sort order 286 | sort.Strings(dirs) 287 | 288 | if !reflect.DeepEqual(wantLS, dirs) { 289 | t.Errorf("Open().ReadDir(-1) got: %v, want: %v", dirs, wantLS) 290 | } 291 | }) 292 | } 293 | }) 294 | } 295 | } 296 | 297 | func TestFileSystem(t *testing.T) { 298 | ctx := context.Background() 299 | filename := "testdata/test.zip" 300 | 301 | checkFS := func(t *testing.T, fsys fs.FS) { 302 | license, err := fsys.Open("LICENSE") 303 | if err != nil { 304 | t.Fatal(err) 305 | } 306 | b, err := io.ReadAll(license) 307 | if err != nil { 308 | t.Fatal(err) 309 | } 310 | if len(b) == 0 { 311 | t.Fatal("empty file") 312 | } 313 | err = license.Close() 314 | if err != nil { 315 | t.Fatal(err) 316 | } 317 | } 318 | 319 | t.Run("filename", func(t *testing.T) { 320 | fsys, err := FileSystem(ctx, filename, nil) 321 | if err != nil { 322 | t.Fatal(err) 323 | } 324 | checkFS(t, fsys) 325 | }) 326 | 327 | t.Run("stream", func(t *testing.T) { 328 | f, err := os.Open(filename) 329 | if err != nil { 330 | t.Fatal(err) 331 | } 332 | t.Cleanup(func() { 333 | err = f.Close() 334 | if err != nil { 335 | t.Error(err) 336 | } 337 | }) 338 | fsys, err := FileSystem(ctx, "", f) 339 | if err != nil { 340 | t.Fatal(err) 341 | } 342 | checkFS(t, fsys) 343 | }) 344 | 345 | t.Run("filename and stream", func(t *testing.T) { 346 | f, err := os.Open(filename) 347 | if err != nil { 348 | t.Fatal(err) 349 | } 350 | t.Cleanup(func() { 351 | err = f.Close() 352 | if err != nil { 353 | t.Error(err) 354 | } 355 | }) 356 | fsys, err := FileSystem(ctx, "test.zip", f) 357 | if err != nil { 358 | t.Fatal(err) 359 | } 360 | checkFS(t, fsys) 361 | }) 362 | } 363 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/mholt/archives 2 | 3 | go 1.22.2 4 | 5 | toolchain go1.23.2 6 | 7 | require ( 8 | github.com/andybalholm/brotli v1.1.2-0.20250424173009-453214e765f3 9 | github.com/dsnet/compress v0.0.2-0.20230904184137-39efe44ab707 10 | github.com/klauspost/compress v1.17.11 11 | github.com/klauspost/pgzip v1.2.6 12 | github.com/nwaples/rardecode/v2 v2.1.0 13 | github.com/therootcompany/xz v1.0.1 14 | github.com/ulikunitz/xz v0.5.12 15 | ) 16 | 17 | require ( 18 | github.com/STARRY-S/zip v0.2.1 19 | github.com/bodgit/sevenzip v1.6.0 20 | github.com/minio/minlz v1.0.0 21 | github.com/pierrec/lz4/v4 v4.1.21 22 | github.com/sorairolake/lzip-go v0.3.5 23 | golang.org/x/text v0.20.0 24 | ) 25 | 26 | require ( 27 | github.com/bodgit/plumbing v1.3.0 // indirect 28 | github.com/bodgit/windows v1.0.1 // indirect 29 | github.com/hashicorp/errwrap v1.1.0 // indirect 30 | github.com/hashicorp/go-multierror v1.1.1 // indirect 31 | github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect 32 | go4.org v0.0.0-20230225012048-214862532bf5 // indirect 33 | ) 34 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= 2 | cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= 3 | cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= 4 | cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU= 5 | cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY= 6 | cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc= 7 | cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0= 8 | cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To= 9 | cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M= 10 | cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= 11 | cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= 12 | cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= 13 | cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= 14 | cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= 15 | cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw= 16 | cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos= 17 | dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= 18 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= 19 | github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= 20 | github.com/STARRY-S/zip v0.2.1 h1:pWBd4tuSGm3wtpoqRZZ2EAwOmcHK6XFf7bU9qcJXyFg= 21 | github.com/STARRY-S/zip v0.2.1/go.mod h1:xNvshLODWtC4EJ702g7cTYn13G53o1+X9BWnPFpcWV4= 22 | github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= 23 | github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= 24 | github.com/andybalholm/brotli v1.1.2-0.20250424173009-453214e765f3 h1:8PmGpDEZl9yDpcdEr6Odf23feCxK3LNUNMxjXg41pZQ= 25 | github.com/andybalholm/brotli v1.1.2-0.20250424173009-453214e765f3/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= 26 | github.com/bodgit/plumbing v1.3.0 h1:pf9Itz1JOQgn7vEOE7v7nlEfBykYqvUYioC61TwWCFU= 27 | github.com/bodgit/plumbing v1.3.0/go.mod h1:JOTb4XiRu5xfnmdnDJo6GmSbSbtSyufrsyZFByMtKEs= 28 | github.com/bodgit/sevenzip v1.6.0 h1:a4R0Wu6/P1o1pP/3VV++aEOcyeBxeO/xE2Y9NSTrr6A= 29 | github.com/bodgit/sevenzip v1.6.0/go.mod h1:zOBh9nJUof7tcrlqJFv1koWRrhz3LbDbUNngkuZxLMc= 30 | github.com/bodgit/windows v1.0.1 h1:tF7K6KOluPYygXa3Z2594zxlkbKPAOvqr97etrGNIz4= 31 | github.com/bodgit/windows v1.0.1/go.mod h1:a6JLwrB4KrTR5hBpp8FI9/9W9jJfeQ2h4XDXU74ZCdM= 32 | github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= 33 | github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= 34 | github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= 35 | github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= 36 | github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= 37 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 38 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 39 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 40 | github.com/dsnet/compress v0.0.2-0.20230904184137-39efe44ab707 h1:2tV76y6Q9BB+NEBasnqvs7e49aEBFI8ejC89PSnWH+4= 41 | github.com/dsnet/compress v0.0.2-0.20230904184137-39efe44ab707/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s= 42 | github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY= 43 | github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= 44 | github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= 45 | github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= 46 | github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= 47 | github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= 48 | github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= 49 | github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= 50 | github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= 51 | github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= 52 | github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= 53 | github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y= 54 | github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= 55 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 56 | github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 57 | github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 58 | github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= 59 | github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= 60 | github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= 61 | github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= 62 | github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 63 | github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 64 | github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 65 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 66 | github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= 67 | github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= 68 | github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= 69 | github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= 70 | github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= 71 | github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= 72 | github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= 73 | github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= 74 | github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= 75 | github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= 76 | github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= 77 | github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= 78 | github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= 79 | github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= 80 | github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= 81 | github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= 82 | github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= 83 | github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= 84 | github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= 85 | github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= 86 | github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= 87 | github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= 88 | github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= 89 | github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= 90 | github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU= 91 | github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= 92 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= 93 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 94 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 95 | github.com/minio/minlz v1.0.0 h1:Kj7aJZ1//LlTP1DM8Jm7lNKvvJS2m74gyyXXn3+uJWQ= 96 | github.com/minio/minlz v1.0.0/go.mod h1:qT0aEB35q79LLornSzeDH75LBf3aH1MV+jB5w9Wasec= 97 | github.com/nwaples/rardecode/v2 v2.1.0 h1:JQl9ZoBPDy+nIZGb1mx8+anfHp/LV3NE2MjMiv0ct/U= 98 | github.com/nwaples/rardecode/v2 v2.1.0/go.mod h1:7uz379lSxPe6j9nvzxUZ+n7mnJNgjsRNb6IbvGVHRmw= 99 | github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= 100 | github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= 101 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 102 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 103 | github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= 104 | github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= 105 | github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0+uJM6H+SuU8sEs5K5IQeKccPqeSjfgcKGgPk= 106 | github.com/sorairolake/lzip-go v0.3.5 h1:ms5Xri9o1JBIWvOFAorYtUNik6HI3HgBTkISiqu0Cwg= 107 | github.com/sorairolake/lzip-go v0.3.5/go.mod h1:N0KYq5iWrMXI0ZEXKXaS9hCyOjZUQdBDEIbXfoUwbdk= 108 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 109 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 110 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 111 | github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= 112 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 113 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 114 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 115 | github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= 116 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 117 | github.com/therootcompany/xz v1.0.1 h1:CmOtsn1CbtmyYiusbfmhmkpAAETj0wBIH6kCYaX+xzw= 118 | github.com/therootcompany/xz v1.0.1/go.mod h1:3K3UH1yCKgBneZYhuQUvJ9HPD19UEXEI0BWbMn8qNMY= 119 | github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= 120 | github.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc= 121 | github.com/ulikunitz/xz v0.5.12/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= 122 | github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= 123 | github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= 124 | github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= 125 | go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= 126 | go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= 127 | go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= 128 | go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= 129 | go4.org v0.0.0-20230225012048-214862532bf5 h1:nifaUDeh+rPaBCMPMQHZmvJf+QdpLFnuQPwx+LxVmtc= 130 | go4.org v0.0.0-20230225012048-214862532bf5/go.mod h1:F57wTi5Lrj6WLyswp5EYV1ncrEbFGHD4hhz6S1ZYeaU= 131 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 132 | golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 133 | golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 134 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 135 | golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= 136 | golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 137 | golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 138 | golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= 139 | golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek= 140 | golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY= 141 | golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= 142 | golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= 143 | golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= 144 | golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= 145 | golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= 146 | golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= 147 | golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= 148 | golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= 149 | golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= 150 | golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= 151 | golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= 152 | golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= 153 | golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs= 154 | golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= 155 | golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= 156 | golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= 157 | golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= 158 | golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= 159 | golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= 160 | golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 161 | golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= 162 | golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 163 | golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 164 | golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 165 | golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 166 | golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 167 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 168 | golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 169 | golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 170 | golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= 171 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 172 | golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 173 | golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 174 | golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 175 | golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= 176 | golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= 177 | golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= 178 | golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= 179 | golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= 180 | golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= 181 | golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= 182 | golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= 183 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 184 | golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 185 | golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 186 | golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 187 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 188 | golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 189 | golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 190 | golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= 191 | golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= 192 | golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 193 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 194 | golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 195 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 196 | golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 197 | golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 198 | golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 199 | golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 200 | golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 201 | golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 202 | golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 203 | golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 204 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 205 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 206 | golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 207 | golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 208 | golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 209 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 210 | golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= 211 | golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= 212 | golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 213 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 214 | golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 215 | golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= 216 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 217 | golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= 218 | golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= 219 | golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= 220 | golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= 221 | golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= 222 | golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= 223 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 224 | golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 225 | golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= 226 | golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= 227 | golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= 228 | golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= 229 | golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= 230 | golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= 231 | golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= 232 | golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= 233 | golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= 234 | golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= 235 | golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 236 | golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 237 | golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 238 | golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 239 | golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 240 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 241 | golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 242 | golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= 243 | golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= 244 | golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= 245 | golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= 246 | golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= 247 | golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= 248 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 249 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 250 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 251 | google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= 252 | google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= 253 | google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= 254 | google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= 255 | google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= 256 | google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= 257 | google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= 258 | google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= 259 | google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= 260 | google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= 261 | google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= 262 | google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= 263 | google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= 264 | google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= 265 | google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= 266 | google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= 267 | google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= 268 | google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= 269 | google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= 270 | google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= 271 | google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8= 272 | google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= 273 | google.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= 274 | google.golang.org/genproto v0.0.0-20191216164720-4f79533eabd1/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= 275 | google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= 276 | google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= 277 | google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= 278 | google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= 279 | google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= 280 | google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= 281 | google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= 282 | google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= 283 | google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= 284 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 285 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 286 | gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= 287 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 288 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 289 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 290 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 291 | honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 292 | honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 293 | honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 294 | honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 295 | honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= 296 | rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= 297 | rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= 298 | rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= 299 | -------------------------------------------------------------------------------- /gz.go: -------------------------------------------------------------------------------- 1 | package archives 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "io" 7 | "strings" 8 | 9 | "github.com/klauspost/compress/gzip" 10 | "github.com/klauspost/pgzip" 11 | ) 12 | 13 | func init() { 14 | RegisterFormat(Gz{}) 15 | } 16 | 17 | // Gz facilitates gzip compression. 18 | type Gz struct { 19 | // Gzip compression level. See https://pkg.go.dev/compress/flate#pkg-constants 20 | // for some predefined constants. If 0, DefaultCompression is assumed rather 21 | // than no compression. 22 | CompressionLevel int 23 | 24 | // DisableMultistream controls whether the reader supports multistream files. 25 | // See https://pkg.go.dev/compress/gzip#example-Reader.Multistream 26 | DisableMultistream bool 27 | 28 | // Use a fast parallel Gzip implementation. This is only 29 | // effective for large streams (about 1 MB or greater). 30 | Multithreaded bool 31 | } 32 | 33 | func (Gz) Extension() string { return ".gz" } 34 | func (Gz) MediaType() string { return "application/gzip" } 35 | 36 | func (gz Gz) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { 37 | var mr MatchResult 38 | 39 | // match filename 40 | if strings.Contains(strings.ToLower(filename), gz.Extension()) { 41 | mr.ByName = true 42 | } 43 | 44 | // match file header 45 | buf, err := readAtMost(stream, len(gzHeader)) 46 | if err != nil { 47 | return mr, err 48 | } 49 | mr.ByStream = bytes.Equal(buf, gzHeader) 50 | 51 | return mr, nil 52 | } 53 | 54 | func (gz Gz) OpenWriter(w io.Writer) (io.WriteCloser, error) { 55 | // assume default compression level if 0, rather than no 56 | // compression, since no compression on a gzipped file 57 | // doesn't make any sense in our use cases 58 | level := gz.CompressionLevel 59 | if level == 0 { 60 | level = gzip.DefaultCompression 61 | } 62 | 63 | var wc io.WriteCloser 64 | var err error 65 | if gz.Multithreaded { 66 | wc, err = pgzip.NewWriterLevel(w, level) 67 | } else { 68 | wc, err = gzip.NewWriterLevel(w, level) 69 | } 70 | return wc, err 71 | } 72 | 73 | func (gz Gz) OpenReader(r io.Reader) (io.ReadCloser, error) { 74 | if gz.Multithreaded { 75 | gzR, err := pgzip.NewReader(r) 76 | if gzR != nil && gz.DisableMultistream { 77 | gzR.Multistream(false) 78 | } 79 | return gzR, err 80 | } 81 | 82 | gzR, err := gzip.NewReader(r) 83 | if gzR != nil && gz.DisableMultistream { 84 | gzR.Multistream(false) 85 | } 86 | return gzR, err 87 | } 88 | 89 | // magic number at the beginning of gzip files 90 | var gzHeader = []byte{0x1f, 0x8b} 91 | -------------------------------------------------------------------------------- /interfaces.go: -------------------------------------------------------------------------------- 1 | package archives 2 | 3 | import ( 4 | "context" 5 | "io" 6 | ) 7 | 8 | // Format represents a way of getting data out of something else. 9 | // A format usually represents compression or an archive (or both). 10 | type Format interface { 11 | // Extension returns the conventional file extension for this 12 | // format. 13 | Extension() string 14 | 15 | // MediaType returns the MIME type ("content type") of this 16 | // format (see RFC 2046). 17 | MediaType() string 18 | 19 | // Match returns true if the given name/stream is recognized. 20 | // One of the arguments is optional: filename might be empty 21 | // if working with an unnamed stream, or stream might be empty 22 | // if only working with a file on disk; but both may also be 23 | // specified. The filename should consist only of the base name, 24 | // not path components, and is typically used for matching by 25 | // file extension. However, matching by reading the stream is 26 | // preferred as it is more accurate. Match reads only as many 27 | // bytes as needed to determine a match. 28 | Match(ctx context.Context, filename string, stream io.Reader) (MatchResult, error) 29 | } 30 | 31 | // Compression is a compression format with both compress and decompress methods. 32 | type Compression interface { 33 | Format 34 | Compressor 35 | Decompressor 36 | } 37 | 38 | // Archival is an archival format that can create/write archives. 39 | type Archival interface { 40 | Format 41 | Archiver 42 | Extractor 43 | } 44 | 45 | // Extraction is an archival format that extract from (read) archives. 46 | type Extraction interface { 47 | Format 48 | Extractor 49 | } 50 | 51 | // Compressor can compress data by wrapping a writer. 52 | type Compressor interface { 53 | // OpenWriter wraps w with a new writer that compresses what is written. 54 | // The writer must be closed when writing is finished. 55 | OpenWriter(w io.Writer) (io.WriteCloser, error) 56 | } 57 | 58 | // Decompressor can decompress data by wrapping a reader. 59 | type Decompressor interface { 60 | // OpenReader wraps r with a new reader that decompresses what is read. 61 | // The reader must be closed when reading is finished. 62 | OpenReader(r io.Reader) (io.ReadCloser, error) 63 | } 64 | 65 | // Archiver can create a new archive. 66 | type Archiver interface { 67 | // Archive writes an archive file to output with the given files. 68 | // 69 | // Context cancellation must be honored. 70 | Archive(ctx context.Context, output io.Writer, files []FileInfo) error 71 | } 72 | 73 | // ArchiveAsyncJob contains a File to be archived and a channel that 74 | // the result of the archiving should be returned on. 75 | // EXPERIMENTAL: Subject to change or removal. 76 | type ArchiveAsyncJob struct { 77 | File FileInfo 78 | Result chan<- error 79 | } 80 | 81 | // ArchiverAsync is an Archiver that can also create archives 82 | // asynchronously by pumping files into a channel as they are 83 | // discovered. 84 | // EXPERIMENTAL: Subject to change or removal. 85 | type ArchiverAsync interface { 86 | Archiver 87 | 88 | // Use ArchiveAsync if you can't pre-assemble a list of all 89 | // the files for the archive. Close the jobs channel after 90 | // all the files have been sent. 91 | // 92 | // This won't return until the channel is closed. 93 | ArchiveAsync(ctx context.Context, output io.Writer, jobs <-chan ArchiveAsyncJob) error 94 | } 95 | 96 | // Extractor can extract files from an archive. 97 | type Extractor interface { 98 | // Extract walks entries in the archive and calls handleFile for each 99 | // entry in the archive. 100 | // 101 | // Any files opened in the FileHandler should be closed when it returns, 102 | // as there is no guarantee the files can be read outside the handler 103 | // or after the walk has proceeded to the next file. 104 | // 105 | // Context cancellation must be honored. 106 | Extract(ctx context.Context, archive io.Reader, handleFile FileHandler) error 107 | } 108 | 109 | // Inserter can insert files into an existing archive. 110 | // EXPERIMENTAL: Subject to change. 111 | type Inserter interface { 112 | // Insert inserts the files into archive. 113 | // 114 | // Context cancellation must be honored. 115 | Insert(ctx context.Context, archive io.ReadWriteSeeker, files []FileInfo) error 116 | } 117 | -------------------------------------------------------------------------------- /lz4.go: -------------------------------------------------------------------------------- 1 | package archives 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "io" 7 | "strings" 8 | 9 | "github.com/pierrec/lz4/v4" 10 | ) 11 | 12 | func init() { 13 | RegisterFormat(Lz4{}) 14 | } 15 | 16 | // Lz4 facilitates LZ4 compression. 17 | type Lz4 struct { 18 | CompressionLevel int 19 | } 20 | 21 | func (Lz4) Extension() string { return ".lz4" } 22 | func (Lz4) MediaType() string { return "application/x-lz4" } 23 | 24 | func (lz Lz4) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { 25 | var mr MatchResult 26 | 27 | // match filename 28 | if strings.Contains(strings.ToLower(filename), lz.Extension()) { 29 | mr.ByName = true 30 | } 31 | 32 | // match file header 33 | buf, err := readAtMost(stream, len(lz4Header)) 34 | if err != nil { 35 | return mr, err 36 | } 37 | mr.ByStream = bytes.Equal(buf, lz4Header) 38 | 39 | return mr, nil 40 | } 41 | 42 | func (lz Lz4) OpenWriter(w io.Writer) (io.WriteCloser, error) { 43 | lzw := lz4.NewWriter(w) 44 | options := []lz4.Option{ 45 | lz4.CompressionLevelOption(lz4.CompressionLevel(lz.CompressionLevel)), 46 | } 47 | if err := lzw.Apply(options...); err != nil { 48 | return nil, err 49 | } 50 | return lzw, nil 51 | } 52 | 53 | func (Lz4) OpenReader(r io.Reader) (io.ReadCloser, error) { 54 | return io.NopCloser(lz4.NewReader(r)), nil 55 | } 56 | 57 | var lz4Header = []byte{0x04, 0x22, 0x4d, 0x18} 58 | -------------------------------------------------------------------------------- /lzip.go: -------------------------------------------------------------------------------- 1 | package archives 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "io" 7 | "path/filepath" 8 | "strings" 9 | 10 | "github.com/sorairolake/lzip-go" 11 | ) 12 | 13 | func init() { 14 | RegisterFormat(Lzip{}) 15 | } 16 | 17 | // Lzip facilitates lzip compression. 18 | type Lzip struct{} 19 | 20 | func (Lzip) Extension() string { return ".lz" } 21 | func (Lzip) MediaType() string { return "application/x-lzip" } 22 | 23 | func (lz Lzip) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { 24 | var mr MatchResult 25 | 26 | // match filename 27 | if filepath.Ext(strings.ToLower(filename)) == lz.Extension() { 28 | mr.ByName = true 29 | } 30 | 31 | // match file header 32 | buf, err := readAtMost(stream, len(lzipHeader)) 33 | if err != nil { 34 | return mr, err 35 | } 36 | mr.ByStream = bytes.Equal(buf, lzipHeader) 37 | 38 | return mr, nil 39 | } 40 | 41 | func (Lzip) OpenWriter(w io.Writer) (io.WriteCloser, error) { 42 | return lzip.NewWriter(w), nil 43 | } 44 | 45 | func (Lzip) OpenReader(r io.Reader) (io.ReadCloser, error) { 46 | lzr, err := lzip.NewReader(r) 47 | if err != nil { 48 | return nil, err 49 | } 50 | return io.NopCloser(lzr), err 51 | } 52 | 53 | // magic number at the beginning of lzip files 54 | // https://datatracker.ietf.org/doc/html/draft-diaz-lzip-09#section-2 55 | var lzipHeader = []byte("LZIP") 56 | -------------------------------------------------------------------------------- /minlz.go: -------------------------------------------------------------------------------- 1 | package archives 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "io" 7 | "path/filepath" 8 | "strings" 9 | 10 | "github.com/minio/minlz" 11 | ) 12 | 13 | func init() { 14 | RegisterFormat(MinLZ{}) 15 | } 16 | 17 | // MinLZ facilitates MinLZ compression. See 18 | // https://github.com/minio/minlz/blob/main/SPEC.md 19 | // and 20 | // https://blog.min.io/minlz-compression-algorithm/. 21 | type MinLZ struct{} 22 | 23 | func (MinLZ) Extension() string { return ".mz" } 24 | func (MinLZ) MediaType() string { return "application/x-minlz-compressed" } 25 | 26 | func (mz MinLZ) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { 27 | var mr MatchResult 28 | 29 | // match filename 30 | if filepath.Ext(strings.ToLower(filename)) == ".mz" { 31 | mr.ByName = true 32 | } 33 | 34 | // match file header 35 | buf, err := readAtMost(stream, len(mzHeader)) 36 | if err != nil { 37 | return mr, err 38 | } 39 | mr.ByStream = bytes.Equal(buf, mzHeader) 40 | 41 | return mr, nil 42 | } 43 | 44 | func (MinLZ) OpenWriter(w io.Writer) (io.WriteCloser, error) { 45 | return minlz.NewWriter(w), nil 46 | } 47 | 48 | func (MinLZ) OpenReader(r io.Reader) (io.ReadCloser, error) { 49 | mr := minlz.NewReader(r) 50 | return io.NopCloser(mr), nil 51 | } 52 | 53 | var mzHeader = []byte("\xff\x06\x00\x00MinLz") 54 | -------------------------------------------------------------------------------- /rar.go: -------------------------------------------------------------------------------- 1 | package archives 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "errors" 7 | "fmt" 8 | "io" 9 | "io/fs" 10 | "log" 11 | "os" 12 | "path" 13 | "strings" 14 | "time" 15 | 16 | "github.com/nwaples/rardecode/v2" 17 | ) 18 | 19 | func init() { 20 | RegisterFormat(Rar{}) 21 | } 22 | 23 | type rarReader interface { 24 | Next() (*rardecode.FileHeader, error) 25 | io.Reader 26 | io.WriterTo 27 | } 28 | 29 | type Rar struct { 30 | // If true, errors encountered during reading or writing 31 | // a file within an archive will be logged and the 32 | // operation will continue on remaining files. 33 | ContinueOnError bool 34 | 35 | // Password to open archives. 36 | Password string 37 | 38 | // Name for a multi-volume archive. When Name is specified, 39 | // the named file is extracted (rather than any io.Reader that 40 | // may be passed to Extract). If the archive is a multi-volume 41 | // archive, this name will also be used by the decoder to derive 42 | // the filename of the next volume in the volume set. 43 | Name string 44 | 45 | // FS is an fs.FS exposing the files of the archive. Unless Name is 46 | // also specified, this does nothing. When Name is also specified, 47 | // FS defines the fs.FS that from which the archive will be opened, 48 | // and in the case of a multi-volume archive, from where each subsequent 49 | // volume of the volume set will be loaded. 50 | // 51 | // Typically this should be a DirFS pointing at the directory containing 52 | // the volumes of the archive. 53 | FS fs.FS 54 | } 55 | 56 | func (Rar) Extension() string { return ".rar" } 57 | func (Rar) MediaType() string { return "application/vnd.rar" } 58 | 59 | func (r Rar) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { 60 | var mr MatchResult 61 | 62 | // match filename 63 | if strings.Contains(strings.ToLower(filename), r.Extension()) { 64 | mr.ByName = true 65 | } 66 | 67 | // match file header (there are two versions; allocate buffer for larger one) 68 | buf, err := readAtMost(stream, len(rarHeaderV5_0)) 69 | if err != nil { 70 | return mr, err 71 | } 72 | 73 | matchedV1_5 := len(buf) >= len(rarHeaderV1_5) && 74 | bytes.Equal(rarHeaderV1_5, buf[:len(rarHeaderV1_5)]) 75 | matchedV5_0 := len(buf) >= len(rarHeaderV5_0) && 76 | bytes.Equal(rarHeaderV5_0, buf[:len(rarHeaderV5_0)]) 77 | 78 | mr.ByStream = matchedV1_5 || matchedV5_0 79 | 80 | return mr, nil 81 | } 82 | 83 | // Archive is not implemented for RAR because it is patent-encumbered. 84 | 85 | func (r Rar) Extract(ctx context.Context, sourceArchive io.Reader, handleFile FileHandler) error { 86 | var options []rardecode.Option 87 | if r.Password != "" { 88 | options = append(options, rardecode.Password(r.Password)) 89 | } 90 | 91 | if r.FS != nil { 92 | options = append(options, rardecode.FileSystem(r.FS)) 93 | } 94 | 95 | var ( 96 | rr rarReader 97 | err error 98 | ) 99 | 100 | // If a name has been provided, then the sourceArchive stream is ignored 101 | // and the archive is opened directly via the filesystem (or provided FS). 102 | if r.Name != "" { 103 | var or *rardecode.ReadCloser 104 | if or, err = rardecode.OpenReader(r.Name, options...); err == nil { 105 | rr = or 106 | defer or.Close() 107 | } 108 | } else { 109 | rr, err = rardecode.NewReader(sourceArchive, options...) 110 | } 111 | if err != nil { 112 | return err 113 | } 114 | 115 | // important to initialize to non-nil, empty value due to how fileIsIncluded works 116 | skipDirs := skipList{} 117 | 118 | for { 119 | if err := ctx.Err(); err != nil { 120 | return err // honor context cancellation 121 | } 122 | 123 | hdr, err := rr.Next() 124 | if err == io.EOF { 125 | break 126 | } 127 | if err != nil { 128 | if r.ContinueOnError { 129 | log.Printf("[ERROR] Advancing to next file in rar archive: %v", err) 130 | continue 131 | } 132 | return err 133 | } 134 | if fileIsIncluded(skipDirs, hdr.Name) { 135 | continue 136 | } 137 | 138 | info := rarFileInfo{hdr} 139 | file := FileInfo{ 140 | FileInfo: info, 141 | Header: hdr, 142 | NameInArchive: hdr.Name, 143 | Open: func() (fs.File, error) { 144 | return fileInArchive{io.NopCloser(rr), info}, nil 145 | }, 146 | } 147 | 148 | err = handleFile(ctx, file) 149 | if errors.Is(err, fs.SkipAll) { 150 | break 151 | } else if errors.Is(err, fs.SkipDir) && file.IsDir() { 152 | skipDirs.add(hdr.Name) 153 | } else if err != nil { 154 | return fmt.Errorf("handling file: %s: %w", hdr.Name, err) 155 | } 156 | } 157 | 158 | return nil 159 | } 160 | 161 | // rarFileInfo satisfies the fs.FileInfo interface for RAR entries. 162 | type rarFileInfo struct { 163 | fh *rardecode.FileHeader 164 | } 165 | 166 | func (rfi rarFileInfo) Name() string { return path.Base(rfi.fh.Name) } 167 | func (rfi rarFileInfo) Size() int64 { return rfi.fh.UnPackedSize } 168 | func (rfi rarFileInfo) Mode() os.FileMode { return rfi.fh.Mode() } 169 | func (rfi rarFileInfo) ModTime() time.Time { return rfi.fh.ModificationTime } 170 | func (rfi rarFileInfo) IsDir() bool { return rfi.fh.IsDir } 171 | func (rfi rarFileInfo) Sys() any { return nil } 172 | 173 | var ( 174 | rarHeaderV1_5 = []byte("Rar!\x1a\x07\x00") // v1.5 175 | rarHeaderV5_0 = []byte("Rar!\x1a\x07\x01\x00") // v5.0 176 | ) 177 | 178 | // Interface guard 179 | var _ Extractor = Rar{} 180 | -------------------------------------------------------------------------------- /rar_test.go: -------------------------------------------------------------------------------- 1 | package archives 2 | 3 | import ( 4 | "context" 5 | "crypto/sha1" 6 | "encoding/hex" 7 | "io" 8 | "testing" 9 | ) 10 | 11 | func TestRarExtractMultiVolume(t *testing.T) { 12 | // Test files testdata/test.part*.rar were created by: 13 | // seq 0 2000 > test.txt 14 | // rar a -v1k test.rar test.txt 15 | rar := Rar{ 16 | Name: "test.part01.rar", 17 | FS: DirFS("testdata"), 18 | } 19 | 20 | const expectedSHA1Sum = "4da7f88f69b44a3fdb705667019a65f4c6e058a3" 21 | if err := rar.Extract(context.Background(), nil, func(_ context.Context, info FileInfo) error { 22 | f, err := info.Open() 23 | if err != nil { 24 | return err 25 | } 26 | defer f.Close() 27 | 28 | h := sha1.New() 29 | if _, err = io.Copy(h, f); err != nil { 30 | return err 31 | } 32 | 33 | if got := hex.EncodeToString(h.Sum(nil)); got != expectedSHA1Sum { 34 | t.Errorf("expected %s, got %s", expectedSHA1Sum, got) 35 | } 36 | return nil 37 | }); err != nil { 38 | t.Error(err) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /sz.go: -------------------------------------------------------------------------------- 1 | package archives 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "io" 7 | "strings" 8 | 9 | "github.com/klauspost/compress/s2" 10 | ) 11 | 12 | func init() { 13 | RegisterFormat(Sz{}) 14 | } 15 | 16 | // Sz facilitates Snappy compression. It uses S2 17 | // for reading and writing, but by default will 18 | // write Snappy-compatible data. 19 | type Sz struct { 20 | // Configurable S2 extension. 21 | S2 S2 22 | } 23 | 24 | // S2 is an extension of Snappy that can read Snappy 25 | // streams and write Snappy-compatible streams, but 26 | // can also be configured to write Snappy-incompatible 27 | // streams for greater gains. See 28 | // https://pkg.go.dev/github.com/klauspost/compress/s2 29 | // for details and the documentation for each option. 30 | type S2 struct { 31 | // reader options 32 | MaxBlockSize int 33 | AllocBlock int 34 | IgnoreStreamIdentifier bool 35 | IgnoreCRC bool 36 | 37 | // writer options 38 | AddIndex bool 39 | Compression S2Level 40 | BlockSize int 41 | Concurrency int 42 | FlushOnWrite bool 43 | Padding int 44 | SnappyIncompatible bool 45 | } 46 | 47 | func (Sz) Extension() string { return ".sz" } 48 | func (Sz) MediaType() string { return "application/x-snappy-framed" } 49 | 50 | func (sz Sz) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { 51 | var mr MatchResult 52 | 53 | // match filename 54 | if strings.Contains(strings.ToLower(filename), sz.Extension()) || 55 | strings.Contains(strings.ToLower(filename), ".s2") { 56 | mr.ByName = true 57 | } 58 | 59 | // match file header 60 | buf, err := readAtMost(stream, len(snappyHeader)) 61 | if err != nil { 62 | return mr, err 63 | } 64 | mr.ByStream = bytes.Equal(buf, snappyHeader) 65 | 66 | return mr, nil 67 | } 68 | 69 | func (sz Sz) OpenWriter(w io.Writer) (io.WriteCloser, error) { 70 | var opts []s2.WriterOption 71 | if sz.S2.AddIndex { 72 | opts = append(opts, s2.WriterAddIndex()) 73 | } 74 | switch sz.S2.Compression { 75 | case S2LevelNone: 76 | opts = append(opts, s2.WriterUncompressed()) 77 | case S2LevelBetter: 78 | opts = append(opts, s2.WriterBetterCompression()) 79 | case S2LevelBest: 80 | opts = append(opts, s2.WriterBestCompression()) 81 | } 82 | if sz.S2.BlockSize != 0 { 83 | opts = append(opts, s2.WriterBlockSize(sz.S2.BlockSize)) 84 | } 85 | if sz.S2.Concurrency != 0 { 86 | opts = append(opts, s2.WriterConcurrency(sz.S2.Concurrency)) 87 | } 88 | if sz.S2.FlushOnWrite { 89 | opts = append(opts, s2.WriterFlushOnWrite()) 90 | } 91 | if sz.S2.Padding != 0 { 92 | opts = append(opts, s2.WriterPadding(sz.S2.Padding)) 93 | } 94 | if !sz.S2.SnappyIncompatible { 95 | // this option is inverted because by default we should 96 | // probably write Snappy-compatible streams 97 | opts = append(opts, s2.WriterSnappyCompat()) 98 | } 99 | return s2.NewWriter(w, opts...), nil 100 | } 101 | 102 | func (sz Sz) OpenReader(r io.Reader) (io.ReadCloser, error) { 103 | var opts []s2.ReaderOption 104 | if sz.S2.AllocBlock != 0 { 105 | opts = append(opts, s2.ReaderAllocBlock(sz.S2.AllocBlock)) 106 | } 107 | if sz.S2.IgnoreCRC { 108 | opts = append(opts, s2.ReaderIgnoreCRC()) 109 | } 110 | if sz.S2.IgnoreStreamIdentifier { 111 | opts = append(opts, s2.ReaderIgnoreStreamIdentifier()) 112 | } 113 | if sz.S2.MaxBlockSize != 0 { 114 | opts = append(opts, s2.ReaderMaxBlockSize(sz.S2.MaxBlockSize)) 115 | } 116 | return io.NopCloser(s2.NewReader(r, opts...)), nil 117 | } 118 | 119 | // Compression level for S2 (Snappy/Sz extension). 120 | // EXPERIMENTAL: May be changed or removed without a major version bump. 121 | type S2Level int 122 | 123 | // Compression levels for S2. 124 | // EXPERIMENTAL: May be changed or removed without a major version bump. 125 | const ( 126 | S2LevelNone S2Level = 0 127 | S2LevelFast S2Level = 1 128 | S2LevelBetter S2Level = 2 129 | S2LevelBest S2Level = 3 130 | ) 131 | 132 | // https://github.com/google/snappy/blob/master/framing_format.txt - contains "sNaPpY" 133 | var snappyHeader = []byte{0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59} 134 | -------------------------------------------------------------------------------- /tar.go: -------------------------------------------------------------------------------- 1 | package archives 2 | 3 | import ( 4 | "archive/tar" 5 | "context" 6 | "errors" 7 | "fmt" 8 | "io" 9 | "io/fs" 10 | "log" 11 | "strings" 12 | ) 13 | 14 | func init() { 15 | RegisterFormat(Tar{}) 16 | } 17 | 18 | type Tar struct { 19 | // If true, use GNU header format 20 | FormatGNU bool 21 | 22 | // If true, preserve only numeric user and group id 23 | NumericUIDGID bool 24 | 25 | // If true, errors encountered during reading or writing 26 | // a file within an archive will be logged and the 27 | // operation will continue on remaining files. 28 | ContinueOnError bool 29 | 30 | // User ID of the file owner 31 | Uid int 32 | 33 | // Group ID of the file owner 34 | Gid int 35 | 36 | // Username of the file owner 37 | Uname string 38 | 39 | // Group name of the file owner 40 | Gname string 41 | } 42 | 43 | func (Tar) Extension() string { return ".tar" } 44 | func (Tar) MediaType() string { return "application/x-tar" } 45 | 46 | func (t Tar) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { 47 | var mr MatchResult 48 | 49 | // match filename 50 | if strings.Contains(strings.ToLower(filename), t.Extension()) { 51 | mr.ByName = true 52 | } 53 | 54 | // match file header 55 | if stream != nil { 56 | r := tar.NewReader(stream) 57 | _, err := r.Next() 58 | mr.ByStream = err == nil 59 | } 60 | 61 | return mr, nil 62 | } 63 | 64 | func (t Tar) Archive(ctx context.Context, output io.Writer, files []FileInfo) error { 65 | tw := tar.NewWriter(output) 66 | defer tw.Close() 67 | 68 | for _, file := range files { 69 | if err := t.writeFileToArchive(ctx, tw, file); err != nil { 70 | if t.ContinueOnError && ctx.Err() == nil { // context errors should always abort 71 | log.Printf("[ERROR] %v", err) 72 | continue 73 | } 74 | return err 75 | } 76 | } 77 | 78 | return nil 79 | } 80 | 81 | func (t Tar) ArchiveAsync(ctx context.Context, output io.Writer, jobs <-chan ArchiveAsyncJob) error { 82 | tw := tar.NewWriter(output) 83 | defer tw.Close() 84 | 85 | for job := range jobs { 86 | job.Result <- t.writeFileToArchive(ctx, tw, job.File) 87 | } 88 | 89 | return nil 90 | } 91 | 92 | func (t Tar) writeFileToArchive(ctx context.Context, tw *tar.Writer, file FileInfo) error { 93 | if err := ctx.Err(); err != nil { 94 | return err // honor context cancellation 95 | } 96 | 97 | hdr, err := tar.FileInfoHeader(file, file.LinkTarget) 98 | if err != nil { 99 | return fmt.Errorf("file %s: creating header: %w", file.NameInArchive, err) 100 | } 101 | hdr.Name = file.NameInArchive // complete path, since FileInfoHeader() only has base name 102 | if hdr.Name == "" { 103 | hdr.Name = file.Name() // assume base name of file I guess 104 | } 105 | if t.FormatGNU { 106 | hdr.Format = tar.FormatGNU 107 | } 108 | if t.NumericUIDGID { 109 | hdr.Uname = "" 110 | hdr.Gname = "" 111 | } 112 | if t.Uid != 0 { 113 | hdr.Uid = t.Uid 114 | } 115 | if t.Gid != 0 { 116 | hdr.Gid = t.Gid 117 | } 118 | if t.Uname != "" { 119 | hdr.Uname = t.Uname 120 | } 121 | if t.Gname != "" { 122 | hdr.Gname = t.Gname 123 | } 124 | 125 | if err := tw.WriteHeader(hdr); err != nil { 126 | return fmt.Errorf("file %s: writing header: %w", file.NameInArchive, err) 127 | } 128 | 129 | // only proceed to write a file body if there is actually a body 130 | // (for example, directories and links don't have a body) 131 | if hdr.Typeflag != tar.TypeReg { 132 | return nil 133 | } 134 | 135 | if err := openAndCopyFile(file, tw); err != nil { 136 | return fmt.Errorf("file %s: writing data: %w", file.NameInArchive, err) 137 | } 138 | 139 | return nil 140 | } 141 | 142 | func (t Tar) Insert(ctx context.Context, into io.ReadWriteSeeker, files []FileInfo) error { 143 | // Tar files may end with some, none, or a lot of zero-byte padding. The spec says 144 | // it should end with two 512-byte trailer records consisting solely of null/0 145 | // bytes: https://www.gnu.org/software/tar/manual/html_node/Standard.html. However, 146 | // in my experiments using the `tar` command, I've found that is not the case, 147 | // and Colin Percival (author of tarsnap) confirmed this: 148 | // - https://twitter.com/cperciva/status/1476774314623913987 149 | // - https://twitter.com/cperciva/status/1476776999758663680 150 | // So while this solution on Stack Overflow makes sense if you control the 151 | // writer: https://stackoverflow.com/a/18330903/1048862 - and I did get it 152 | // to work in that case -- it is not a general solution. Seems that the only 153 | // reliable thing to do is scan the entire archive to find the last file, 154 | // read its size, then use that to compute the end of content and thus the 155 | // true length of end-of-archive padding. This is slightly more complex than 156 | // just adding the size of the last file to the current stream/seek position, 157 | // because we have to align to 512-byte blocks precisely. I don't actually 158 | // fully know why this works, but in my testing on a few different files it 159 | // did work, whereas other solutions only worked on 1 specific file. *shrug* 160 | // 161 | // Another option is to scan the file for the last contiguous series of 0s, 162 | // without interpreting the tar format at all, and to find the nearest 163 | // blocksize-offset and start writing there. Problem is that you wouldn't 164 | // know if you just overwrote some of the last file if it ends with all 0s. 165 | // Sigh. 166 | var lastFileSize, lastStreamPos int64 167 | tr := tar.NewReader(into) 168 | for { 169 | hdr, err := tr.Next() 170 | if err == io.EOF { 171 | break 172 | } 173 | if err != nil { 174 | return err 175 | } 176 | lastStreamPos, err = into.Seek(0, io.SeekCurrent) 177 | if err != nil { 178 | return err 179 | } 180 | lastFileSize = hdr.Size 181 | } 182 | 183 | // we can now compute the precise location to write the new file to (I think) 184 | const blockSize = 512 // (as of Go 1.17, this is also a hard-coded const in the archive/tar package) 185 | newOffset := lastStreamPos + lastFileSize 186 | newOffset += blockSize - (newOffset % blockSize) // shift to next-nearest block boundary 187 | _, err := into.Seek(newOffset, io.SeekStart) 188 | if err != nil { 189 | return err 190 | } 191 | 192 | tw := tar.NewWriter(into) 193 | defer tw.Close() 194 | 195 | for i, file := range files { 196 | if err := ctx.Err(); err != nil { 197 | return err // honor context cancellation 198 | } 199 | err = t.writeFileToArchive(ctx, tw, file) 200 | if err != nil { 201 | if t.ContinueOnError && ctx.Err() == nil { 202 | log.Printf("[ERROR] appending file %d into archive: %s: %v", i, file.Name(), err) 203 | continue 204 | } 205 | return fmt.Errorf("appending file %d into archive: %s: %w", i, file.Name(), err) 206 | } 207 | } 208 | 209 | return nil 210 | } 211 | 212 | func (t Tar) Extract(ctx context.Context, sourceArchive io.Reader, handleFile FileHandler) error { 213 | tr := tar.NewReader(sourceArchive) 214 | 215 | // important to initialize to non-nil, empty value due to how fileIsIncluded works 216 | skipDirs := skipList{} 217 | 218 | for { 219 | if err := ctx.Err(); err != nil { 220 | return err // honor context cancellation 221 | } 222 | 223 | hdr, err := tr.Next() 224 | if err == io.EOF { 225 | break 226 | } 227 | if err != nil { 228 | if t.ContinueOnError && ctx.Err() == nil { 229 | log.Printf("[ERROR] Advancing to next file in tar archive: %v", err) 230 | continue 231 | } 232 | return err 233 | } 234 | if fileIsIncluded(skipDirs, hdr.Name) { 235 | continue 236 | } 237 | if hdr.Typeflag == tar.TypeXGlobalHeader { 238 | // ignore the pax global header from git-generated tarballs 239 | continue 240 | } 241 | 242 | info := hdr.FileInfo() 243 | file := FileInfo{ 244 | FileInfo: info, 245 | Header: hdr, 246 | NameInArchive: hdr.Name, 247 | LinkTarget: hdr.Linkname, 248 | Open: func() (fs.File, error) { 249 | return fileInArchive{io.NopCloser(tr), info}, nil 250 | }, 251 | } 252 | 253 | err = handleFile(ctx, file) 254 | if errors.Is(err, fs.SkipAll) { 255 | // At first, I wasn't sure if fs.SkipAll implied that the rest of the entries 256 | // should still be iterated and just "skipped" (i.e. no-ops) or if the walk 257 | // should stop; both have the same net effect, one is just less efficient... 258 | // apparently the name of fs.StopWalk was the preferred name, but it still 259 | // became fs.SkipAll because of semantics with documentation; see 260 | // https://github.com/golang/go/issues/47209 -- anyway, the walk should stop. 261 | break 262 | } else if errors.Is(err, fs.SkipDir) && file.IsDir() { 263 | skipDirs.add(hdr.Name) 264 | } else if err != nil { 265 | return fmt.Errorf("handling file: %s: %w", hdr.Name, err) 266 | } 267 | } 268 | 269 | return nil 270 | } 271 | 272 | // Interface guards 273 | var ( 274 | _ Archiver = (*Tar)(nil) 275 | _ ArchiverAsync = (*Tar)(nil) 276 | _ Extractor = (*Tar)(nil) 277 | _ Inserter = (*Tar)(nil) 278 | ) 279 | -------------------------------------------------------------------------------- /testdata/self-tar.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mholt/archives/8ed85e5752627a4b298f3e4f1474e873980d0897/testdata/self-tar.tar -------------------------------------------------------------------------------- /testdata/symlinks.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mholt/archives/8ed85e5752627a4b298f3e4f1474e873980d0897/testdata/symlinks.zip -------------------------------------------------------------------------------- /testdata/test.part01.rar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mholt/archives/8ed85e5752627a4b298f3e4f1474e873980d0897/testdata/test.part01.rar -------------------------------------------------------------------------------- /testdata/test.part02.rar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mholt/archives/8ed85e5752627a4b298f3e4f1474e873980d0897/testdata/test.part02.rar -------------------------------------------------------------------------------- /testdata/test.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mholt/archives/8ed85e5752627a4b298f3e4f1474e873980d0897/testdata/test.zip -------------------------------------------------------------------------------- /testdata/unordered.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mholt/archives/8ed85e5752627a4b298f3e4f1474e873980d0897/testdata/unordered.zip -------------------------------------------------------------------------------- /xz.go: -------------------------------------------------------------------------------- 1 | package archives 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "io" 7 | "strings" 8 | 9 | fastxz "github.com/therootcompany/xz" 10 | "github.com/ulikunitz/xz" 11 | ) 12 | 13 | func init() { 14 | RegisterFormat(Xz{}) 15 | } 16 | 17 | // Xz facilitates xz compression. 18 | type Xz struct{} 19 | 20 | func (Xz) Extension() string { return ".xz" } 21 | func (Xz) MediaType() string { return "application/x-xz" } 22 | 23 | func (x Xz) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { 24 | var mr MatchResult 25 | 26 | // match filename 27 | if strings.Contains(strings.ToLower(filename), x.Extension()) { 28 | mr.ByName = true 29 | } 30 | 31 | // match file header 32 | buf, err := readAtMost(stream, len(xzHeader)) 33 | if err != nil { 34 | return mr, err 35 | } 36 | mr.ByStream = bytes.Equal(buf, xzHeader) 37 | 38 | return mr, nil 39 | } 40 | 41 | func (Xz) OpenWriter(w io.Writer) (io.WriteCloser, error) { 42 | return xz.NewWriter(w) 43 | } 44 | 45 | func (Xz) OpenReader(r io.Reader) (io.ReadCloser, error) { 46 | xr, err := fastxz.NewReader(r, 0) 47 | if err != nil { 48 | return nil, err 49 | } 50 | return io.NopCloser(xr), err 51 | } 52 | 53 | // magic number at the beginning of xz files; see section 2.1.1.1 54 | // of https://tukaani.org/xz/xz-file-format.txt 55 | var xzHeader = []byte{0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00} 56 | -------------------------------------------------------------------------------- /zip.go: -------------------------------------------------------------------------------- 1 | package archives 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "errors" 7 | "fmt" 8 | "io" 9 | "io/fs" 10 | "log" 11 | "os" 12 | "path" 13 | "strings" 14 | 15 | szip "github.com/STARRY-S/zip" 16 | "golang.org/x/text/encoding" 17 | 18 | "github.com/dsnet/compress/bzip2" 19 | "github.com/klauspost/compress/zip" 20 | "github.com/klauspost/compress/zstd" 21 | "github.com/ulikunitz/xz" 22 | ) 23 | 24 | func init() { 25 | RegisterFormat(Zip{}) 26 | 27 | // TODO: What about custom flate levels too 28 | zip.RegisterCompressor(ZipMethodBzip2, func(out io.Writer) (io.WriteCloser, error) { 29 | return bzip2.NewWriter(out, &bzip2.WriterConfig{ /*TODO: Level: z.CompressionLevel*/ }) 30 | }) 31 | zip.RegisterCompressor(ZipMethodZstd, func(out io.Writer) (io.WriteCloser, error) { 32 | return zstd.NewWriter(out) 33 | }) 34 | zip.RegisterCompressor(ZipMethodXz, func(out io.Writer) (io.WriteCloser, error) { 35 | return xz.NewWriter(out) 36 | }) 37 | 38 | zip.RegisterDecompressor(ZipMethodBzip2, func(r io.Reader) io.ReadCloser { 39 | bz2r, err := bzip2.NewReader(r, nil) 40 | if err != nil { 41 | return nil 42 | } 43 | return bz2r 44 | }) 45 | zip.RegisterDecompressor(ZipMethodZstd, func(r io.Reader) io.ReadCloser { 46 | zr, err := zstd.NewReader(r) 47 | if err != nil { 48 | return nil 49 | } 50 | return zr.IOReadCloser() 51 | }) 52 | zip.RegisterDecompressor(ZipMethodXz, func(r io.Reader) io.ReadCloser { 53 | xr, err := xz.NewReader(r) 54 | if err != nil { 55 | return nil 56 | } 57 | return io.NopCloser(xr) 58 | }) 59 | } 60 | 61 | type Zip struct { 62 | // Only compress files which are not already in a 63 | // compressed format (determined simply by examining 64 | // file extension). 65 | SelectiveCompression bool 66 | 67 | // The method or algorithm for compressing stored files. 68 | Compression uint16 69 | 70 | // If true, errors encountered during reading or writing 71 | // a file within an archive will be logged and the 72 | // operation will continue on remaining files. 73 | ContinueOnError bool 74 | 75 | // For files in zip archives that do not have UTF-8 76 | // encoded filenames and comments, specify the character 77 | // encoding here. 78 | TextEncoding encoding.Encoding 79 | } 80 | 81 | func (Zip) Extension() string { return ".zip" } 82 | func (Zip) MediaType() string { return "application/zip" } 83 | 84 | func (z Zip) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { 85 | var mr MatchResult 86 | 87 | // match filename 88 | if strings.Contains(strings.ToLower(filename), z.Extension()) { 89 | mr.ByName = true 90 | } 91 | 92 | // match file header 93 | for _, hdr := range zipHeaders { 94 | buf, err := readAtMost(stream, len(hdr)) 95 | if err != nil { 96 | return mr, err 97 | } 98 | if bytes.Equal(buf, hdr) { 99 | mr.ByStream = true 100 | break 101 | } 102 | } 103 | 104 | return mr, nil 105 | } 106 | 107 | func (z Zip) Archive(ctx context.Context, output io.Writer, files []FileInfo) error { 108 | zw := zip.NewWriter(output) 109 | defer zw.Close() 110 | 111 | for i, file := range files { 112 | if err := z.archiveOneFile(ctx, zw, i, file); err != nil { 113 | return err 114 | } 115 | } 116 | 117 | return nil 118 | } 119 | 120 | func (z Zip) ArchiveAsync(ctx context.Context, output io.Writer, jobs <-chan ArchiveAsyncJob) error { 121 | zw := zip.NewWriter(output) 122 | defer zw.Close() 123 | 124 | var i int 125 | for job := range jobs { 126 | job.Result <- z.archiveOneFile(ctx, zw, i, job.File) 127 | i++ 128 | } 129 | 130 | return nil 131 | } 132 | 133 | func (z Zip) archiveOneFile(ctx context.Context, zw *zip.Writer, idx int, file FileInfo) error { 134 | if err := ctx.Err(); err != nil { 135 | return err // honor context cancellation 136 | } 137 | 138 | hdr, err := zip.FileInfoHeader(file) 139 | if err != nil { 140 | return fmt.Errorf("getting info for file %d: %s: %w", idx, file.Name(), err) 141 | } 142 | hdr.Name = file.NameInArchive // complete path, since FileInfoHeader() only has base name 143 | if hdr.Name == "" { 144 | hdr.Name = file.Name() // assume base name of file I guess 145 | } 146 | 147 | // customize header based on file properties 148 | if file.IsDir() { 149 | if !strings.HasSuffix(hdr.Name, "/") { 150 | hdr.Name += "/" // required 151 | } 152 | hdr.Method = zip.Store 153 | } else if z.SelectiveCompression { 154 | // only enable compression on compressable files 155 | ext := strings.ToLower(path.Ext(hdr.Name)) 156 | if _, ok := compressedFormats[ext]; ok { 157 | hdr.Method = zip.Store 158 | } else { 159 | hdr.Method = z.Compression 160 | } 161 | } else { 162 | hdr.Method = z.Compression 163 | } 164 | 165 | w, err := zw.CreateHeader(hdr) 166 | if err != nil { 167 | return fmt.Errorf("creating header for file %d: %s: %w", idx, file.Name(), err) 168 | } 169 | 170 | // file won't be considered a symlink if FollowSymlinks in FilesFromDisk is true 171 | if isSymlink(file) { 172 | _, err := w.Write([]byte(file.LinkTarget)) 173 | if err != nil { 174 | return fmt.Errorf("writing link target for file %d: %s: %w", idx, file.Name(), err) 175 | } 176 | return nil 177 | } 178 | 179 | // directories have no file body 180 | if file.IsDir() { 181 | return nil 182 | } 183 | 184 | if err := openAndCopyFile(file, w); err != nil { 185 | return fmt.Errorf("writing file %d: %s: %w", idx, file.Name(), err) 186 | } 187 | 188 | return nil 189 | } 190 | 191 | // Extract extracts files from z, implementing the Extractor interface. Uniquely, however, 192 | // sourceArchive must be an io.ReaderAt and io.Seeker, which are oddly disjoint interfaces 193 | // from io.Reader which is what the method signature requires. We chose this signature for 194 | // the interface because we figure you can Read() from anything you can ReadAt() or Seek() 195 | // with. Due to the nature of the zip archive format, if sourceArchive is not an io.Seeker 196 | // and io.ReaderAt, an error is returned. 197 | func (z Zip) Extract(ctx context.Context, sourceArchive io.Reader, handleFile FileHandler) error { 198 | sra, ok := sourceArchive.(seekReaderAt) 199 | if !ok { 200 | return fmt.Errorf("input type must be an io.ReaderAt and io.Seeker because of zip format constraints") 201 | } 202 | 203 | size, err := streamSizeBySeeking(sra) 204 | if err != nil { 205 | return fmt.Errorf("determining stream size: %w", err) 206 | } 207 | 208 | zr, err := zip.NewReader(sra, size) 209 | if err != nil { 210 | return err 211 | } 212 | 213 | // important to initialize to non-nil, empty value due to how fileIsIncluded works 214 | skipDirs := skipList{} 215 | 216 | for i, f := range zr.File { 217 | if err := ctx.Err(); err != nil { 218 | return err // honor context cancellation 219 | } 220 | 221 | // ensure filename and comment are UTF-8 encoded (issue #147 and PR #305) 222 | z.decodeText(&f.FileHeader) 223 | 224 | if fileIsIncluded(skipDirs, f.Name) { 225 | continue 226 | } 227 | 228 | info := f.FileInfo() 229 | linkTarget, err := z.getLinkTarget(f) 230 | if err != nil { 231 | return fmt.Errorf("getting link target for file %d: %s: %w", i, f.Name, err) 232 | } 233 | 234 | file := FileInfo{ 235 | FileInfo: info, 236 | Header: f.FileHeader, 237 | NameInArchive: f.Name, 238 | LinkTarget: linkTarget, 239 | Open: func() (fs.File, error) { 240 | openedFile, err := f.Open() 241 | if err != nil { 242 | return nil, err 243 | } 244 | return fileInArchive{openedFile, info}, nil 245 | }, 246 | } 247 | 248 | err = handleFile(ctx, file) 249 | if errors.Is(err, fs.SkipAll) { 250 | break 251 | } else if errors.Is(err, fs.SkipDir) && file.IsDir() { 252 | skipDirs.add(f.Name) 253 | } else if err != nil { 254 | if z.ContinueOnError { 255 | log.Printf("[ERROR] %s: %v", f.Name, err) 256 | continue 257 | } 258 | return fmt.Errorf("handling file %d: %s: %w", i, f.Name, err) 259 | } 260 | } 261 | 262 | return nil 263 | } 264 | 265 | // decodeText decodes the name and comment fields from hdr into UTF-8. 266 | // It is a no-op if the text is already UTF-8 encoded or if z.TextEncoding 267 | // is not specified. 268 | func (z Zip) decodeText(hdr *zip.FileHeader) { 269 | if hdr.NonUTF8 && z.TextEncoding != nil { 270 | dec := z.TextEncoding.NewDecoder() 271 | filename, err := dec.String(hdr.Name) 272 | if err == nil { 273 | hdr.Name = filename 274 | } 275 | if hdr.Comment != "" { 276 | comment, err := dec.String(hdr.Comment) 277 | if err == nil { 278 | hdr.Comment = comment 279 | } 280 | } 281 | } 282 | } 283 | 284 | func (z Zip) getLinkTarget(f *zip.File) (string, error) { 285 | info := f.FileInfo() 286 | // Exit early if not a symlink 287 | if info.Mode()&os.ModeSymlink == 0 { 288 | return "", nil 289 | } 290 | 291 | // Open the file and read the link target 292 | file, err := f.Open() 293 | if err != nil { 294 | return "", err 295 | } 296 | defer file.Close() 297 | 298 | const maxLinkTargetSize = 32768 299 | linkTargetBytes, err := io.ReadAll(io.LimitReader(file, maxLinkTargetSize)) 300 | if err != nil { 301 | return "", err 302 | } 303 | 304 | if len(linkTargetBytes) == maxLinkTargetSize { 305 | return "", fmt.Errorf("link target is too large: %d bytes", len(linkTargetBytes)) 306 | } 307 | 308 | return string(linkTargetBytes), nil 309 | } 310 | 311 | // Insert appends the listed files into the provided Zip archive stream. 312 | // If the filename already exists in the archive, it will be replaced. 313 | func (z Zip) Insert(ctx context.Context, into io.ReadWriteSeeker, files []FileInfo) error { 314 | // following very simple example at https://github.com/STARRY-S/zip?tab=readme-ov-file#usage 315 | zu, err := szip.NewUpdater(into) 316 | if err != nil { 317 | return err 318 | } 319 | defer zu.Close() 320 | 321 | for idx, file := range files { 322 | if err := ctx.Err(); err != nil { 323 | return err // honor context cancellation 324 | } 325 | 326 | hdr, err := szip.FileInfoHeader(file) 327 | if err != nil { 328 | return fmt.Errorf("getting info for file %d: %s: %w", idx, file.NameInArchive, err) 329 | } 330 | hdr.Name = file.NameInArchive // complete path, since FileInfoHeader() only has base name 331 | if hdr.Name == "" { 332 | hdr.Name = file.Name() // assume base name of file I guess 333 | } 334 | 335 | // customize header based on file properties 336 | if file.IsDir() { 337 | if !strings.HasSuffix(hdr.Name, "/") { 338 | hdr.Name += "/" // required 339 | } 340 | hdr.Method = zip.Store 341 | } else if z.SelectiveCompression { 342 | // only enable compression on compressable files 343 | ext := strings.ToLower(path.Ext(hdr.Name)) 344 | if _, ok := compressedFormats[ext]; ok { 345 | hdr.Method = zip.Store 346 | } else { 347 | hdr.Method = z.Compression 348 | } 349 | } 350 | 351 | w, err := zu.Append(hdr.Name, szip.APPEND_MODE_OVERWRITE) 352 | if err != nil { 353 | return fmt.Errorf("inserting file header: %d: %s: %w", idx, file.Name(), err) 354 | } 355 | 356 | // directories have no file body 357 | if file.IsDir() { 358 | return nil 359 | } 360 | if err := openAndCopyFile(file, w); err != nil { 361 | if z.ContinueOnError && ctx.Err() == nil { 362 | log.Printf("[ERROR] appending file %d into archive: %s: %v", idx, file.Name(), err) 363 | continue 364 | } 365 | return fmt.Errorf("copying inserted file %d: %s: %w", idx, file.Name(), err) 366 | } 367 | } 368 | 369 | return nil 370 | } 371 | 372 | type seekReaderAt interface { 373 | io.ReaderAt 374 | io.Seeker 375 | } 376 | 377 | // Additional compression methods not offered by archive/zip. 378 | // See https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT section 4.4.5. 379 | const ( 380 | ZipMethodBzip2 = 12 381 | // TODO: LZMA: Disabled - because 7z isn't able to unpack ZIP+LZMA ZIP+LZMA2 archives made this way - and vice versa. 382 | // ZipMethodLzma = 14 383 | ZipMethodZstd = 93 384 | ZipMethodXz = 95 385 | ) 386 | 387 | // compressedFormats is a (non-exhaustive) set of lowercased 388 | // file extensions for formats that are typically already 389 | // compressed. Compressing files that are already compressed 390 | // is inefficient, so use this set of extensions to avoid that. 391 | var compressedFormats = map[string]struct{}{ 392 | ".7z": {}, 393 | ".avi": {}, 394 | ".br": {}, 395 | ".bz2": {}, 396 | ".cab": {}, 397 | ".docx": {}, 398 | ".gif": {}, 399 | ".gz": {}, 400 | ".jar": {}, 401 | ".jpeg": {}, 402 | ".jpg": {}, 403 | ".lz": {}, 404 | ".lz4": {}, 405 | ".lzma": {}, 406 | ".m4v": {}, 407 | ".mov": {}, 408 | ".mp3": {}, 409 | ".mp4": {}, 410 | ".mpeg": {}, 411 | ".mpg": {}, 412 | ".png": {}, 413 | ".pptx": {}, 414 | ".rar": {}, 415 | ".sz": {}, 416 | ".tbz2": {}, 417 | ".tgz": {}, 418 | ".tsz": {}, 419 | ".txz": {}, 420 | ".xlsx": {}, 421 | ".xz": {}, 422 | ".zip": {}, 423 | ".zipx": {}, 424 | } 425 | 426 | var zipHeaders = [][]byte{ 427 | []byte("PK\x03\x04"), // normal 428 | []byte("PK\x05\x06"), // empty 429 | } 430 | 431 | // Interface guards 432 | var ( 433 | _ Archiver = Zip{} 434 | _ ArchiverAsync = Zip{} 435 | _ Extractor = Zip{} 436 | ) 437 | -------------------------------------------------------------------------------- /zip_test.go: -------------------------------------------------------------------------------- 1 | package archives_test 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "io" 7 | "os" 8 | "path/filepath" 9 | "reflect" 10 | "runtime" 11 | "sort" 12 | "testing" 13 | 14 | "github.com/mholt/archives" 15 | ) 16 | 17 | func TestZip_ExtractZipWithSymlinks(t *testing.T) { 18 | zipFile, err := os.Open("testdata/symlinks.zip") 19 | if err != nil { 20 | t.Errorf("failed to open zip file: %v", err) 21 | } 22 | defer zipFile.Close() 23 | 24 | zip := archives.Zip{} 25 | extractedFiles := []string{} 26 | zip.Extract(context.Background(), zipFile, func(ctx context.Context, file archives.FileInfo) error { 27 | extractedFiles = append(extractedFiles, file.Name()) 28 | if file.Name() == "symlinked" { 29 | if file.LinkTarget != "../a/hello" { 30 | t.Errorf("expected symlink target to be '../a/hello', got %s", file.LinkTarget) 31 | } 32 | } 33 | return nil 34 | }) 35 | 36 | if len(extractedFiles) != 5 { 37 | t.Errorf("expected 5 files to be extracted, got %d", len(extractedFiles)) 38 | } 39 | sort.Strings(extractedFiles) 40 | expectedFiles := []string{"a", "b", "hello", "symlinked", "zip_test"} 41 | if !reflect.DeepEqual(extractedFiles, expectedFiles) { 42 | t.Errorf("expected files to be %v, got %v", expectedFiles, extractedFiles) 43 | } 44 | } 45 | 46 | type symlinkTestCase struct { 47 | name string 48 | followSymlinks bool 49 | expectSymlinks bool 50 | } 51 | 52 | func TestZip_ArchiveZipWithSymlinks(t *testing.T) { 53 | testCases := []symlinkTestCase{ 54 | { 55 | name: "preserve symlinks", 56 | followSymlinks: false, 57 | expectSymlinks: true, 58 | }, 59 | { 60 | name: "follow symlinks", 61 | followSymlinks: true, 62 | expectSymlinks: false, 63 | }, 64 | } 65 | 66 | for _, tc := range testCases { 67 | t.Run(tc.name, func(t *testing.T) { 68 | testSymlinkArchiving(t, tc) 69 | }) 70 | } 71 | } 72 | 73 | func testSymlinkArchiving(t *testing.T, tc symlinkTestCase) { 74 | testDir := setupTestDir(t) 75 | archivePath := filepath.Join(testDir.tempDir, "test_with_symlinks.zip") 76 | 77 | ctx := context.Background() 78 | files, err := archives.FilesFromDisk( 79 | ctx, 80 | &archives.FromDiskOptions{FollowSymlinks: tc.followSymlinks}, 81 | testDir.sources, 82 | ) 83 | if err != nil { 84 | t.Fatalf("failed to get files: %v", err) 85 | } 86 | 87 | archive := createAndArchive(t, archivePath, files) 88 | defer archive.Close() 89 | 90 | extractDir := extractArchive(t, archive, archivePath) 91 | verifyExtractedContent(t, extractDir, tc.expectSymlinks) 92 | } 93 | 94 | type testDirectorySetup struct { 95 | tempDir string 96 | file1Path string 97 | file2Path string 98 | subDir string 99 | file3Path string 100 | symlinkToFile string 101 | symlinkToDir string 102 | relativeSymlink string 103 | sources map[string]string 104 | } 105 | 106 | func setupTestDir(t *testing.T) *testDirectorySetup { 107 | tempDir := t.TempDir() 108 | 109 | setup := &testDirectorySetup{ 110 | tempDir: tempDir, 111 | file1Path: filepath.Join(tempDir, "file1.txt"), 112 | file2Path: filepath.Join(tempDir, "file2.txt"), 113 | subDir: filepath.Join(tempDir, "subdir"), 114 | symlinkToFile: filepath.Join(tempDir, "symlink_to_file.txt"), 115 | symlinkToDir: filepath.Join(tempDir, "symlink_to_dir"), 116 | } 117 | setup.file3Path = filepath.Join(setup.subDir, "file3.txt") 118 | setup.relativeSymlink = filepath.Join(setup.subDir, "relative_symlink.txt") 119 | 120 | createFile(t, setup.file1Path, "content of file 1") 121 | createFile(t, setup.file2Path, "content of file 2") 122 | createDir(t, setup.subDir) 123 | createFile(t, setup.file3Path, "content of file 3") 124 | createSymlink(t, "file1.txt", setup.symlinkToFile) 125 | createSymlink(t, "subdir", setup.symlinkToDir) 126 | createSymlink(t, "../file2.txt", setup.relativeSymlink) 127 | 128 | setup.sources = map[string]string{ 129 | setup.file1Path: "", 130 | setup.file2Path: "", 131 | setup.subDir: "", 132 | setup.symlinkToFile: "", 133 | setup.symlinkToDir: "", 134 | setup.relativeSymlink: "", 135 | } 136 | 137 | return setup 138 | } 139 | 140 | func createFile(t *testing.T, path, content string) { 141 | if err := os.WriteFile(path, []byte(content), 0644); err != nil { 142 | t.Fatalf("failed to write %s: %v", path, err) 143 | } 144 | } 145 | 146 | func createDir(t *testing.T, path string) { 147 | if err := os.MkdirAll(path, 0755); err != nil { 148 | t.Fatalf("failed to create directory %s: %v", path, err) 149 | } 150 | } 151 | 152 | func createSymlink(t *testing.T, target, linkPath string) { 153 | if err := os.Symlink(target, linkPath); err != nil { 154 | t.Fatalf("failed to create symlink %s -> %s: %v", linkPath, target, err) 155 | } 156 | } 157 | 158 | func createAndArchive(t *testing.T, archivePath string, files []archives.FileInfo) *os.File { 159 | archive, err := os.Create(archivePath) 160 | if err != nil { 161 | t.Fatalf("failed to create archive: %v", err) 162 | } 163 | 164 | zip := archives.Zip{} 165 | ctx := context.Background() 166 | if err := zip.Archive(ctx, archive, files); err != nil { 167 | t.Fatalf("failed to archive files: %v", err) 168 | } 169 | if err := archive.Close(); err != nil { 170 | t.Fatalf("failed to close archive: %v", err) 171 | } 172 | 173 | archive, err = os.Open(archivePath) 174 | if err != nil { 175 | t.Fatalf("failed to open archive: %v", err) 176 | } 177 | return archive 178 | } 179 | 180 | func extractArchive(t *testing.T, archive *os.File, archivePath string) string { 181 | extractDir := filepath.Join(filepath.Dir(archivePath), "extracted") 182 | if err := os.MkdirAll(extractDir, 0755); err != nil { 183 | t.Fatalf("failed to create extract directory: %v", err) 184 | } 185 | 186 | zip := archives.Zip{} 187 | ctx := context.Background() 188 | err := zip.Extract(ctx, archive, func(ctx context.Context, file archives.FileInfo) error { 189 | if file.IsDir() { 190 | return os.MkdirAll(filepath.Join(extractDir, file.NameInArchive), file.Mode()) 191 | } 192 | 193 | os.MkdirAll(filepath.Dir(filepath.Join(extractDir, file.NameInArchive)), 0755) 194 | if file.Mode()&os.ModeSymlink != 0 { 195 | if file.LinkTarget == "" { 196 | return fmt.Errorf("symlink target is empty") 197 | } 198 | return os.Symlink(file.LinkTarget, filepath.Join(extractDir, file.NameInArchive)) 199 | } 200 | 201 | handle, err := file.Open() 202 | if err != nil { 203 | return err 204 | } 205 | defer handle.Close() 206 | dest, err := os.Create(filepath.Join(extractDir, file.NameInArchive)) 207 | if err != nil { 208 | return err 209 | } 210 | defer dest.Close() 211 | _, err = io.Copy(dest, handle) 212 | return err 213 | }) 214 | if err != nil { 215 | t.Fatalf("failed to extract archive: %v", err) 216 | } 217 | return extractDir 218 | } 219 | 220 | func verifyExtractedContent(t *testing.T, extractDir string, expectSymlinks bool) { 221 | verifyFileContent(t, extractDir, "file1.txt", "content of file 1") 222 | verifyFileContent(t, extractDir, "file2.txt", "content of file 2") 223 | verifyFileContent(t, extractDir, "subdir/file3.txt", "content of file 3") 224 | 225 | if expectSymlinks { 226 | verifySymlink(t, extractDir, "symlink_to_file.txt", "file1.txt") 227 | verifySymlink(t, extractDir, "symlink_to_dir", "subdir") 228 | relativePath := "../file2.txt" 229 | if runtime.GOOS == "windows" { 230 | relativePath = "..\\file2.txt" 231 | } 232 | verifySymlink(t, extractDir, "relative_symlink.txt", relativePath) 233 | } else { 234 | verifyFileContent(t, extractDir, "symlink_to_file.txt", "content of file 1") 235 | verifyFileContent(t, extractDir, "relative_symlink.txt", "content of file 2") 236 | verifyIsDirectory(t, extractDir, "symlink_to_dir") 237 | } 238 | } 239 | 240 | func verifyFileContent(t *testing.T, baseDir, relativePath, expectedContent string) { 241 | filePath := filepath.Join(baseDir, relativePath) 242 | content, err := os.ReadFile(filePath) 243 | if err != nil { 244 | t.Errorf("failed to read %s: %v", relativePath, err) 245 | return 246 | } 247 | if string(content) != expectedContent { 248 | t.Errorf("expected content %q in %s, got %q", expectedContent, relativePath, string(content)) 249 | } 250 | } 251 | 252 | func verifySymlink(t *testing.T, baseDir, relativePath, expectedTarget string) { 253 | filePath := filepath.Join(baseDir, relativePath) 254 | stat, err := os.Lstat(filePath) 255 | if err != nil { 256 | t.Errorf("failed to lstat %s: %v", relativePath, err) 257 | return 258 | } 259 | if stat.Mode()&os.ModeSymlink == 0 { 260 | t.Errorf("expected %s to be a symlink, got mode %s", relativePath, stat.Mode()) 261 | return 262 | } 263 | target, err := os.Readlink(filePath) 264 | if err != nil { 265 | t.Errorf("failed to read symlink %s: %v", relativePath, err) 266 | return 267 | } 268 | if target != expectedTarget { 269 | t.Errorf("expected symlink %s to point to %s, got %s", relativePath, expectedTarget, target) 270 | } 271 | } 272 | 273 | func verifyIsDirectory(t *testing.T, baseDir, relativePath string) { 274 | filePath := filepath.Join(baseDir, relativePath) 275 | stat, err := os.Lstat(filePath) 276 | if err != nil { 277 | t.Errorf("failed to lstat %s: %v", relativePath, err) 278 | return 279 | } 280 | if !stat.IsDir() { 281 | t.Errorf("expected %s to be a directory, got mode %s", relativePath, stat.Mode()) 282 | } 283 | } 284 | -------------------------------------------------------------------------------- /zlib.go: -------------------------------------------------------------------------------- 1 | package archives 2 | 3 | import ( 4 | "context" 5 | "io" 6 | "strings" 7 | 8 | "github.com/klauspost/compress/zlib" 9 | ) 10 | 11 | func init() { 12 | RegisterFormat(Zlib{}) 13 | } 14 | 15 | // Zlib facilitates zlib compression. 16 | type Zlib struct { 17 | CompressionLevel int 18 | } 19 | 20 | func (Zlib) Extension() string { return ".zz" } 21 | func (Zlib) MediaType() string { return "application/zlib" } 22 | 23 | func (zz Zlib) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { 24 | var mr MatchResult 25 | 26 | // match filename 27 | if strings.Contains(strings.ToLower(filename), zz.Extension()) { 28 | mr.ByName = true 29 | } 30 | 31 | // match file header 32 | buf, err := readAtMost(stream, 2) 33 | // If an error occurred or buf is not 2 bytes we can't check the header 34 | if err != nil || len(buf) < 2 { 35 | return mr, err 36 | } 37 | 38 | mr.ByStream = isValidZlibHeader(buf[0], buf[1]) 39 | 40 | return mr, nil 41 | } 42 | 43 | func (zz Zlib) OpenWriter(w io.Writer) (io.WriteCloser, error) { 44 | level := zz.CompressionLevel 45 | if level == 0 { 46 | level = zlib.DefaultCompression 47 | } 48 | return zlib.NewWriterLevel(w, level) 49 | } 50 | 51 | func (Zlib) OpenReader(r io.Reader) (io.ReadCloser, error) { 52 | return zlib.NewReader(r) 53 | } 54 | 55 | func isValidZlibHeader(first, second byte) bool { 56 | // Define all 32 valid zlib headers, see https://stackoverflow.com/questions/9050260/what-does-a-zlib-header-look-like/54915442#54915442 57 | validHeaders := map[uint16]struct{}{ 58 | 0x081D: {}, 0x085B: {}, 0x0899: {}, 0x08D7: {}, 59 | 0x1819: {}, 0x1857: {}, 0x1895: {}, 0x18D3: {}, 60 | 0x2815: {}, 0x2853: {}, 0x2891: {}, 0x28CF: {}, 61 | 0x3811: {}, 0x384F: {}, 0x388D: {}, 0x38CB: {}, 62 | 0x480D: {}, 0x484B: {}, 0x4889: {}, 0x48C7: {}, 63 | 0x5809: {}, 0x5847: {}, 0x5885: {}, 0x58C3: {}, 64 | 0x6805: {}, 0x6843: {}, 0x6881: {}, 0x68DE: {}, 65 | 0x7801: {}, 0x785E: {}, 0x789C: {}, 0x78DA: {}, 66 | } 67 | 68 | // Combine the first and second bytes into a single 16-bit, big-endian value 69 | header := uint16(first)<<8 | uint16(second) 70 | 71 | // Check if the header is in the map of valid headers 72 | _, isValid := validHeaders[header] 73 | return isValid 74 | } 75 | -------------------------------------------------------------------------------- /zstd.go: -------------------------------------------------------------------------------- 1 | package archives 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "io" 7 | "strings" 8 | 9 | "github.com/klauspost/compress/zstd" 10 | ) 11 | 12 | func init() { 13 | RegisterFormat(Zstd{}) 14 | } 15 | 16 | // Zstd facilitates Zstandard compression. 17 | type Zstd struct { 18 | EncoderOptions []zstd.EOption 19 | DecoderOptions []zstd.DOption 20 | } 21 | 22 | func (Zstd) Extension() string { return ".zst" } 23 | func (Zstd) MediaType() string { return "application/zstd" } 24 | 25 | func (zs Zstd) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { 26 | var mr MatchResult 27 | 28 | // match filename 29 | if strings.Contains(strings.ToLower(filename), zs.Extension()) { 30 | mr.ByName = true 31 | } 32 | 33 | // match file header 34 | buf, err := readAtMost(stream, len(zstdHeader)) 35 | if err != nil { 36 | return mr, err 37 | } 38 | mr.ByStream = bytes.Equal(buf, zstdHeader) 39 | 40 | return mr, nil 41 | } 42 | 43 | func (zs Zstd) OpenWriter(w io.Writer) (io.WriteCloser, error) { 44 | return zstd.NewWriter(w, zs.EncoderOptions...) 45 | } 46 | 47 | func (zs Zstd) OpenReader(r io.Reader) (io.ReadCloser, error) { 48 | zr, err := zstd.NewReader(r, zs.DecoderOptions...) 49 | if err != nil { 50 | return nil, err 51 | } 52 | return errorCloser{zr}, nil 53 | } 54 | 55 | type errorCloser struct { 56 | *zstd.Decoder 57 | } 58 | 59 | func (ec errorCloser) Close() error { 60 | ec.Decoder.Close() 61 | return nil 62 | } 63 | 64 | // magic number at the beginning of Zstandard files 65 | // https://github.com/facebook/zstd/blob/6211bfee5ec24dc825c11751c33aa31d618b5f10/doc/zstd_compression_format.md 66 | var zstdHeader = []byte{0x28, 0xb5, 0x2f, 0xfd} 67 | --------------------------------------------------------------------------------