├── .github └── workflows │ └── test.yaml ├── LICENSE ├── README.md ├── cancelable_reader.go ├── cancelable_reader_test.go ├── evil_generator └── main.go ├── extract.go ├── extract_test.go ├── extractor.go ├── extractor_test.go ├── go.mod ├── go.sum ├── loggingfs_test.go ├── safejoin_test.go ├── testdata ├── .gitignore ├── archive-with-backslashes.zip ├── archive.mistery ├── archive.tar.bz2 ├── archive.tar.gz ├── archive.tar.xz ├── archive.tar.zst ├── archive.zip ├── permissions.tar ├── permissions.zip ├── singlefile.bz2 ├── singlefile.gz └── zipslip │ ├── evil-link-traversal.tar │ ├── evil-win.tar │ ├── evil.tar │ └── evil.zip ├── umask_unix_test.go └── umask_windows_test.go /.github/workflows/test.yaml: -------------------------------------------------------------------------------- 1 | name: test 2 | 3 | on: 4 | push: 5 | pull_request: 6 | 7 | jobs: 8 | build-and-test: 9 | strategy: 10 | matrix: 11 | os: [ubuntu-latest, macOS-latest] 12 | 13 | runs-on: ${{ matrix.os }} 14 | 15 | steps: 16 | - uses: actions/checkout@v1 17 | - uses: actions/setup-go@v1 18 | with: 19 | go-version: "1.22" 20 | - name: Build native 21 | run: go build -v ./... 22 | shell: bash 23 | - name: Run unit tests 24 | run: go test -v -race -bench=. ./... 25 | shell: bash 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 codeclysm 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Extract 2 | 3 | [![Build Status](https://github.com/codeclysm/extract/actions/workflows/test.yaml/badge.svg?branch=master)](https://github.com/codeclysm/extract/actions/workflows/test.yaml) 4 | [![GitHub license](https://img.shields.io/badge/license-MIT-blue.svg)](https://raw.githubusercontent.com/codeclysm/extract/master/LICENSE) 5 | [![Godoc Reference](https://img.shields.io/badge/Godoc-Reference-blue.svg)](https://godoc.org/github.com/codeclysm/extract) 6 | 7 | import "github.com/codeclysm/extract/v4" 8 | 9 | Package extract allows to extract archives in zip, tar.gz or tar.bz2 formats 10 | easily. 11 | 12 | Most of the time you'll just need to call the proper function with a Reader and 13 | a destination: 14 | 15 | ```go 16 | file, _ := os.Open("path/to/file.tar.bz2") 17 | extract.Bz2(context.TODO, file, "/path/where/to/extract", nil) 18 | ``` 19 | 20 | or also: 21 | 22 | ```go 23 | data, _ := ioutil.ReadFile("path/to/file.tar.bz2") 24 | buffer := bytes.NewBuffer(data) 25 | extract.Bz2(context.TODO, buffer, "/path/where/to/extract", nil) 26 | ``` 27 | 28 | Sometimes you'll want a bit more control over the files, such as extracting a 29 | subfolder of the archive. In this cases you can specify a renamer func that will 30 | change the path for every file: 31 | 32 | ```go 33 | var shift = func(path string) string { 34 | parts := strings.Split(path, string(filepath.Separator)) 35 | parts = parts[1:] 36 | return strings.Join(parts, string(filepath.Separator)) 37 | } 38 | extract.Bz2(context.TODO, file, "/path/where/to/extract", shift) 39 | ``` 40 | 41 | If you don't know which archive you're dealing with (life really is always a surprise) you can use Archive, which will infer the type of archive from the first bytes 42 | 43 | ```go 44 | extract.Archive(context.TODO, file, "/path/where/to/extract", nil) 45 | ``` 46 | 47 | If you need more control over how your files will be extracted you can use an Extractor. 48 | 49 | It Needs a FS object that implements the FS interface: 50 | 51 | ```go 52 | type FS interface { 53 | Link(string, string) error 54 | MkdirAll(string, os.FileMode) error 55 | OpenFile(name string, flag int, perm os.FileMode) (*os.File, error) 56 | Symlink(string, string) error 57 | Remove(path string) error 58 | Stat(name string) (os.FileInfo, error) 59 | Chmod(name string, mode os.FileMode) error 60 | } 61 | ``` 62 | 63 | which contains only the required function to perform an extraction. This way it's easy to wrap os functions to 64 | chroot the path, or scramble the files, or send an event for each operation or even reimplementing them for an in-memory store, I don't know. 65 | 66 | ```go 67 | extractor := extract.Extractor{ 68 | FS: fs, 69 | } 70 | 71 | extractor.Archive(context.TODO, file, "/path/where/to/extract", nil) 72 | ``` 73 | -------------------------------------------------------------------------------- /cancelable_reader.go: -------------------------------------------------------------------------------- 1 | package extract 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "io" 7 | ) 8 | 9 | func copyCancel(ctx context.Context, dst io.Writer, src io.Reader) (int64, error) { 10 | return io.Copy(dst, newCancelableReader(ctx, src)) 11 | } 12 | 13 | type cancelableReader struct { 14 | ctx context.Context 15 | src io.Reader 16 | } 17 | 18 | func (r *cancelableReader) Read(p []byte) (int, error) { 19 | select { 20 | case <-r.ctx.Done(): 21 | return 0, errors.New("interrupted") 22 | default: 23 | return r.src.Read(p) 24 | } 25 | } 26 | 27 | func newCancelableReader(ctx context.Context, src io.Reader) *cancelableReader { 28 | return &cancelableReader{ 29 | ctx: ctx, 30 | src: src, 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /cancelable_reader_test.go: -------------------------------------------------------------------------------- 1 | package extract 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "fmt" 7 | "testing" 8 | "time" 9 | 10 | "github.com/stretchr/testify/require" 11 | ) 12 | 13 | func TestCancelableReader(t *testing.T) { 14 | var b [100000]byte 15 | ctx, cancel := context.WithCancel(context.Background()) 16 | reader := newCancelableReader(ctx, bytes.NewReader(b[:])) 17 | defer cancel() 18 | 19 | var buff [1000]byte 20 | readed := 0 21 | for { 22 | n, err := reader.Read(buff[:]) 23 | if err != nil { 24 | fmt.Println("exit error:", err) 25 | require.Equal(t, "EOF", err.Error()) 26 | break 27 | } 28 | require.NotZero(t, n) 29 | time.Sleep(10 * time.Millisecond) 30 | readed += n 31 | } 32 | 33 | fmt.Println("Readed", readed, "out of", len(b)) 34 | require.Equal(t, len(b), readed) 35 | } 36 | 37 | func TestCancelableReaderWithInterruption(t *testing.T) { 38 | var b [100000]byte 39 | ctx, cancel := context.WithCancel(context.Background()) 40 | reader := newCancelableReader(ctx, bytes.NewReader(b[:])) 41 | defer cancel() 42 | 43 | go func() { 44 | time.Sleep(100 * time.Millisecond) 45 | cancel() 46 | }() 47 | 48 | var buff [1000]byte 49 | readed := 0 50 | for { 51 | n, err := reader.Read(buff[:]) 52 | if err != nil { 53 | fmt.Println("exit error:", err) 54 | require.Equal(t, "interrupted", err.Error()) 55 | break 56 | } 57 | require.NotZero(t, n) 58 | time.Sleep(10 * time.Millisecond) 59 | readed += n 60 | } 61 | fmt.Println("Readed", readed, "out of", len(b)) 62 | require.True(t, readed < len(b)) 63 | } 64 | -------------------------------------------------------------------------------- /evil_generator/main.go: -------------------------------------------------------------------------------- 1 | // This utility is used to generate the archives used as testdata for zipslip vulnerability 2 | package main 3 | 4 | //go:generate go run main.go ../testdata/zipslip 5 | 6 | import ( 7 | "archive/tar" 8 | "archive/zip" 9 | "bytes" 10 | "log" 11 | "os" 12 | 13 | "github.com/arduino/go-paths-helper" 14 | ) 15 | 16 | func main() { 17 | if len(os.Args) != 2 { 18 | log.Fatal("Missing output directory") 19 | } 20 | outputDir := paths.New(os.Args[1]) 21 | if outputDir.IsNotDir() { 22 | log.Fatalf("Output path %s is not a directory", outputDir) 23 | } 24 | 25 | generateEvilZipSlip(outputDir) 26 | generateEvilSymLinkPathTraversalTar(outputDir) 27 | } 28 | 29 | func generateEvilZipSlip(outputDir *paths.Path) { 30 | evilPathTraversalFiles := []string{ 31 | "..", 32 | "../../../../../../../../../../../../../../../../../../../../tmp/evil.txt", 33 | "some/path/../../../../../../../../../../../../../../../../../../../../tmp/evil.txt", 34 | "/../../../../../../../../../../../../../../../../../../../../tmp/evil.txt", 35 | "/some/path/../../../../../../../../../../../../../../../../../../../../tmp/evil.txt", 36 | } 37 | winSpecificPathTraversalFiles := []string{ 38 | "..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\tmp\\evil.txt", 39 | "some\\path\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\tmp\\evil.txt", 40 | "\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\tmp\\evil.txt", 41 | "\\some\\path\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\tmp\\evil.txt", 42 | } 43 | winSpecificPathTraversalFiles = append(winSpecificPathTraversalFiles, evilPathTraversalFiles...) 44 | 45 | // Generate evil zip 46 | { 47 | buf := new(bytes.Buffer) 48 | w := zip.NewWriter(buf) 49 | for _, file := range winSpecificPathTraversalFiles { 50 | if f, err := w.Create(file); err != nil { 51 | log.Fatal(err) 52 | } else if _, err = f.Write([]byte("TEST")); err != nil { 53 | log.Fatal(err) 54 | } 55 | } 56 | if err := w.Close(); err != nil { 57 | log.Fatal(err) 58 | } 59 | if err := outputDir.Join("evil.zip").WriteFile(buf.Bytes()); err != nil { 60 | log.Fatal(err) 61 | } 62 | } 63 | 64 | // Generate evil tar 65 | { 66 | buf := new(bytes.Buffer) 67 | w := tar.NewWriter(buf) 68 | for _, file := range evilPathTraversalFiles { 69 | if err := w.WriteHeader(&tar.Header{ 70 | Name: file, 71 | Size: 4, 72 | Mode: 0666, 73 | }); err != nil { 74 | log.Fatal(err) 75 | } 76 | if _, err := w.Write([]byte("TEST")); err != nil { 77 | log.Fatal(err) 78 | } 79 | } 80 | if err := w.Close(); err != nil { 81 | log.Fatal(err) 82 | } 83 | if err := outputDir.Join("evil.tar").WriteFile(buf.Bytes()); err != nil { 84 | log.Fatal(err) 85 | } 86 | } 87 | 88 | // Generate evil tar for windows 89 | { 90 | buf := new(bytes.Buffer) 91 | w := tar.NewWriter(buf) 92 | for _, file := range winSpecificPathTraversalFiles { 93 | if err := w.WriteHeader(&tar.Header{ 94 | Name: file, 95 | Size: 4, 96 | Mode: 0666, 97 | }); err != nil { 98 | log.Fatal(err) 99 | } 100 | if _, err := w.Write([]byte("TEST")); err != nil { 101 | log.Fatal(err) 102 | } 103 | } 104 | if err := w.Close(); err != nil { 105 | log.Fatal(err) 106 | } 107 | if err := outputDir.Join("evil-win.tar").WriteFile(buf.Bytes()); err != nil { 108 | log.Fatal(err) 109 | } 110 | } 111 | } 112 | 113 | func generateEvilSymLinkPathTraversalTar(outputDir *paths.Path) { 114 | outputTarFile, err := outputDir.Join("evil-link-traversal.tar").Create() 115 | if err != nil { 116 | log.Fatal(err) 117 | } 118 | defer outputTarFile.Close() 119 | 120 | tw := tar.NewWriter(outputTarFile) 121 | defer tw.Close() 122 | 123 | if err := tw.WriteHeader(&tar.Header{ 124 | Name: "leak", Linkname: "../../../../../../../../../../../../../../../tmp/something-important", 125 | Mode: 0o0777, Size: 0, Typeflag: tar.TypeLink, 126 | }); err != nil { 127 | log.Fatal(err) 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /extract.go: -------------------------------------------------------------------------------- 1 | // Package extract allows to extract archives in zip, tar.gz or tar.bz2 formats 2 | // easily. 3 | // 4 | // Most of the time you'll just need to call the proper function with a Reader and 5 | // a destination: 6 | // 7 | // file, _ := os.Open("path/to/file.tar.bz2") 8 | // extract.Bz2(context.TODO, file, "/path/where/to/extract", nil) 9 | // 10 | // ``` 11 | // 12 | // Sometimes you'll want a bit more control over the files, such as extracting a 13 | // subfolder of the archive. In this cases you can specify a renamer func that will 14 | // change the path for every file: 15 | // 16 | // var shift = func(path string) string { 17 | // parts := strings.Split(path, string(filepath.Separator)) 18 | // parts = parts[1:] 19 | // return strings.Join(parts, string(filepath.Separator)) 20 | // } 21 | // extract.Bz2(context.TODO, file, "/path/where/to/extract", shift) 22 | // 23 | // ``` 24 | // 25 | // If you don't know which archive you're dealing with (life really is always a surprise) you can use Archive, which will infer the type of archive from the first bytes 26 | // 27 | // extract.Archive(context.TODO, file, "/path/where/to/extract", nil) 28 | package extract 29 | 30 | import ( 31 | "context" 32 | "io" 33 | "os" 34 | ) 35 | 36 | // Renamer is a function that can be used to rename the files when you're extracting 37 | // them. For example you may want to only extract files with a certain pattern. 38 | // If you return an empty string they won't be extracted. 39 | type Renamer func(string) string 40 | 41 | // Archive extracts a generic archived stream of data in the specified location. 42 | // It automatically detects the archive type and accepts a rename function to 43 | // handle the names of the files. 44 | // If the file is not an archive, an error is returned. 45 | func Archive(ctx context.Context, body io.Reader, location string, rename Renamer) error { 46 | extractor := Extractor{FS: fs{}} 47 | return extractor.Archive(ctx, body, location, rename) 48 | } 49 | 50 | // Zstd extracts a .zst or .tar.zst archived stream of data in the specified location. 51 | // It accepts a rename function to handle the names of the files (see the example) 52 | func Zstd(ctx context.Context, body io.Reader, location string, rename Renamer) error { 53 | extractor := Extractor{FS: fs{}} 54 | return extractor.Zstd(ctx, body, location, rename) 55 | } 56 | 57 | // Xz extracts a .xz or .tar.xz archived stream of data in the specified location. 58 | // It accepts a rename function to handle the names of the files (see the example) 59 | func Xz(ctx context.Context, body io.Reader, location string, rename Renamer) error { 60 | extractor := Extractor{FS: fs{}} 61 | return extractor.Xz(ctx, body, location, rename) 62 | } 63 | 64 | // Bz2 extracts a .bz2 or .tar.bz2 archived stream of data in the specified location. 65 | // It accepts a rename function to handle the names of the files (see the example) 66 | func Bz2(ctx context.Context, body io.Reader, location string, rename Renamer) error { 67 | extractor := Extractor{FS: fs{}} 68 | return extractor.Bz2(ctx, body, location, rename) 69 | } 70 | 71 | // Gz extracts a .gz or .tar.gz archived stream of data in the specified location. 72 | // It accepts a rename function to handle the names of the files (see the example) 73 | func Gz(ctx context.Context, body io.Reader, location string, rename Renamer) error { 74 | extractor := Extractor{FS: fs{}} 75 | return extractor.Gz(ctx, body, location, rename) 76 | } 77 | 78 | // Tar extracts a .tar archived stream of data in the specified location. 79 | // It accepts a rename function to handle the names of the files (see the example) 80 | func Tar(ctx context.Context, body io.Reader, location string, rename Renamer) error { 81 | extractor := Extractor{FS: fs{}} 82 | return extractor.Tar(ctx, body, location, rename) 83 | } 84 | 85 | // Zip extracts a .zip archived stream of data in the specified location. 86 | // It accepts a rename function to handle the names of the files (see the example). 87 | func Zip(ctx context.Context, body io.Reader, location string, rename Renamer) error { 88 | extractor := Extractor{FS: fs{}} 89 | return extractor.Zip(ctx, body, location, rename) 90 | } 91 | 92 | type fs struct{} 93 | 94 | func (f fs) Link(oldname, newname string) error { 95 | return os.Link(oldname, newname) 96 | } 97 | 98 | func (f fs) MkdirAll(path string, perm os.FileMode) error { 99 | return os.MkdirAll(path, perm) 100 | } 101 | 102 | func (f fs) Symlink(oldname, newname string) error { 103 | return os.Symlink(oldname, newname) 104 | } 105 | 106 | func (f fs) OpenFile(name string, flag int, perm os.FileMode) (*os.File, error) { 107 | return os.OpenFile(name, flag, perm) 108 | } 109 | 110 | func (f fs) Remove(path string) error { 111 | return os.Remove(path) 112 | } 113 | 114 | func (f fs) Stat(name string) (os.FileInfo, error) { 115 | return os.Stat(name) 116 | } 117 | 118 | func (f fs) Chmod(name string, mode os.FileMode) error { 119 | return os.Chmod(name, mode) 120 | } 121 | -------------------------------------------------------------------------------- /extract_test.go: -------------------------------------------------------------------------------- 1 | package extract_test 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "fmt" 7 | "io" 8 | "net/http" 9 | "os" 10 | "path/filepath" 11 | "runtime" 12 | "strconv" 13 | "strings" 14 | "testing" 15 | 16 | "github.com/arduino/go-paths-helper" 17 | "github.com/codeclysm/extract/v4" 18 | "github.com/stretchr/testify/require" 19 | ) 20 | 21 | type Files map[string]string 22 | 23 | var shift = func(path string) string { 24 | parts := strings.Split(path, string(filepath.Separator)) 25 | parts = parts[1:] 26 | return strings.Join(parts, string(filepath.Separator)) 27 | } 28 | 29 | var subfolder = func(path string) string { 30 | if strings.Contains(path, "archive/folder") { 31 | return path 32 | } 33 | return "" 34 | } 35 | 36 | var ExtractCases = []struct { 37 | Name string 38 | Archive string 39 | Renamer extract.Renamer 40 | Files Files 41 | }{ 42 | {"standard bz2", "testdata/archive.tar.bz2", nil, Files{ 43 | "": "dir", 44 | "/archive": "dir", 45 | "/archive/folder": "dir", 46 | "/archive/folderlink": "link", 47 | "/archive/folder/file1.txt": "folder/File1", 48 | "/archive/file1.txt": "File1", 49 | "/archive/file2.txt": "File2", 50 | "/archive/link.txt": "File1", 51 | }}, 52 | {"shift bz2", "testdata/archive.tar.bz2", shift, Files{ 53 | "": "dir", 54 | "/folder": "dir", 55 | "/folderlink": "link", 56 | "/folder/file1.txt": "folder/File1", 57 | "/file1.txt": "File1", 58 | "/file2.txt": "File2", 59 | "/link.txt": "File1", 60 | }}, 61 | {"subfolder bz2", "testdata/archive.tar.bz2", subfolder, Files{ 62 | "": "dir", 63 | "/archive": "dir", 64 | "/archive/folder": "dir", 65 | "/archive/folder/file1.txt": "folder/File1", 66 | "/archive/folderlink": "link", 67 | }}, 68 | {"not tarred bz2", "testdata/singlefile.bz2", nil, Files{ 69 | "": "singlefile", 70 | }}, 71 | 72 | {"standard gz", "testdata/archive.tar.gz", nil, Files{ 73 | "": "dir", 74 | "/archive": "dir", 75 | "/archive/folder": "dir", 76 | "/archive/folderlink": "link", 77 | "/archive/folder/file1.txt": "folder/File1", 78 | "/archive/file1.txt": "File1", 79 | "/archive/file2.txt": "File2", 80 | "/archive/link.txt": "File1", 81 | }}, 82 | {"shift gz", "testdata/archive.tar.gz", shift, Files{ 83 | "": "dir", 84 | "/folder": "dir", 85 | "/folderlink": "link", 86 | "/folder/file1.txt": "folder/File1", 87 | "/file1.txt": "File1", 88 | "/file2.txt": "File2", 89 | "/link.txt": "File1", 90 | }}, 91 | {"subfolder gz", "testdata/archive.tar.gz", subfolder, Files{ 92 | "": "dir", 93 | "/archive": "dir", 94 | "/archive/folder": "dir", 95 | "/archive/folder/file1.txt": "folder/File1", 96 | "/archive/folderlink": "link", 97 | }}, 98 | {"not tarred gz", "testdata/singlefile.gz", nil, Files{ 99 | "": "singlefile", 100 | }}, 101 | // Note that the zip format doesn't support hard links 102 | {"standard zip", "testdata/archive.zip", nil, Files{ 103 | "": "dir", 104 | "/archive": "dir", 105 | "/archive/folder": "dir", 106 | "/archive/folderlink": "link", 107 | "/archive/folder/file1.txt": "folder/File1", 108 | "/archive/file1.txt": "File1", 109 | "/archive/file2.txt": "File2", 110 | "/archive/link.txt": "File1", 111 | }}, 112 | {"shift zip", "testdata/archive.zip", shift, Files{ 113 | "": "dir", 114 | "/folder": "dir", 115 | "/folderlink": "link", 116 | "/folder/file1.txt": "folder/File1", 117 | "/file1.txt": "File1", 118 | "/file2.txt": "File2", 119 | "/link.txt": "File1", 120 | }}, 121 | {"subfolder zip", "testdata/archive.zip", subfolder, Files{ 122 | "": "dir", 123 | "/archive": "dir", 124 | "/archive/folder": "dir", 125 | "/archive/folder/file1.txt": "folder/File1", 126 | "/archive/folderlink": "link", 127 | }}, 128 | 129 | {"standard inferred", "testdata/archive.mistery", nil, Files{ 130 | "": "dir", 131 | "/archive": "dir", 132 | "/archive/folder": "dir", 133 | "/archive/folderlink": "link", 134 | "/archive/folder/file1.txt": "folder/File1", 135 | "/archive/file1.txt": "File1", 136 | "/archive/file2.txt": "File2", 137 | "/archive/link.txt": "File1", 138 | }}, 139 | {"shift inferred", "testdata/archive.mistery", shift, Files{ 140 | "": "dir", 141 | "/folder": "dir", 142 | "/folderlink": "link", 143 | "/folder/file1.txt": "folder/File1", 144 | "/file1.txt": "File1", 145 | "/file2.txt": "File2", 146 | "/link.txt": "File1", 147 | }}, 148 | {"subfolder inferred", "testdata/archive.mistery", subfolder, Files{ 149 | "": "dir", 150 | "/archive": "dir", 151 | "/archive/folder": "dir", 152 | "/archive/folder/file1.txt": "folder/File1", 153 | "/archive/folderlink": "link", 154 | }}, 155 | 156 | {"standard zip with backslashes", "testdata/archive-with-backslashes.zip", nil, Files{ 157 | "": "dir", 158 | "/AZ3166": "dir", 159 | "/AZ3166/libraries": "dir", 160 | "/AZ3166/libraries/AzureIoT": "dir", 161 | "/AZ3166/libraries/AzureIoT/keywords.txt": "Azure", 162 | "/AZ3166/cores": "dir", 163 | "/AZ3166/cores/arduino": "dir", 164 | "/AZ3166/cores/arduino/azure-iot-sdk-c": "dir", 165 | "/AZ3166/cores/arduino/azure-iot-sdk-c/umqtt": "dir", 166 | "/AZ3166/cores/arduino/azure-iot-sdk-c/umqtt/src": "dir", 167 | }}, 168 | {"shift zip with backslashes", "testdata/archive-with-backslashes.zip", shift, Files{ 169 | "": "dir", 170 | "/libraries": "dir", 171 | "/libraries/AzureIoT": "dir", 172 | "/libraries/AzureIoT/keywords.txt": "Azure", 173 | "/cores": "dir", 174 | "/cores/arduino": "dir", 175 | "/cores/arduino/azure-iot-sdk-c": "dir", 176 | "/cores/arduino/azure-iot-sdk-c/umqtt": "dir", 177 | "/cores/arduino/azure-iot-sdk-c/umqtt/src": "dir", 178 | }}, 179 | } 180 | 181 | func TestArchiveFailure(t *testing.T) { 182 | err := extract.Archive(context.Background(), strings.NewReader("not an archive"), "", nil) 183 | require.Error(t, err) 184 | require.Contains(t, err.Error(), "Not a supported archive") 185 | } 186 | 187 | func TestExtract(t *testing.T) { 188 | for _, test := range ExtractCases { 189 | dir, _ := os.MkdirTemp("", "") 190 | dir = filepath.Join(dir, "test") 191 | data, err := os.ReadFile(test.Archive) 192 | if err != nil { 193 | t.Fatal(err) 194 | } 195 | buffer := bytes.NewBuffer(data) 196 | 197 | switch filepath.Ext(test.Archive) { 198 | case ".bz2": 199 | err = extract.Bz2(context.Background(), buffer, dir, test.Renamer) 200 | case ".gz": 201 | err = extract.Gz(context.Background(), buffer, dir, test.Renamer) 202 | case ".zip": 203 | err = extract.Zip(context.Background(), buffer, dir, test.Renamer) 204 | case ".mistery": 205 | err = extract.Archive(context.Background(), buffer, dir, test.Renamer) 206 | default: 207 | t.Fatal("unknown error") 208 | } 209 | 210 | if err != nil { 211 | t.Fatal(test.Name, ": Should not fail: "+err.Error()) 212 | } 213 | 214 | testWalk(t, dir, test.Files) 215 | 216 | err = os.RemoveAll(dir) 217 | if err != nil { 218 | t.Fatal(err) 219 | } 220 | } 221 | } 222 | 223 | func TestExtractIdempotency(t *testing.T) { 224 | for _, test := range ExtractCases { 225 | t.Run(test.Name, func(t *testing.T) { 226 | dir, _ := os.MkdirTemp("", "") 227 | dir = filepath.Join(dir, "test") 228 | data, err := os.ReadFile(test.Archive) 229 | if err != nil { 230 | t.Fatal(err) 231 | } 232 | 233 | var extractFn func(context.Context, io.Reader, string, extract.Renamer) error 234 | switch filepath.Ext(test.Archive) { 235 | case ".bz2": 236 | extractFn = extract.Bz2 237 | case ".gz": 238 | extractFn = extract.Gz 239 | case ".zip": 240 | extractFn = extract.Zip 241 | case ".mistery": 242 | extractFn = extract.Archive 243 | default: 244 | t.Fatal("unknown error") 245 | } 246 | 247 | buffer := bytes.NewBuffer(data) 248 | if err = extractFn(context.Background(), buffer, dir, test.Renamer); err != nil { 249 | t.Fatal(test.Name, ": Should not fail first extraction: "+err.Error()) 250 | } 251 | 252 | buffer = bytes.NewBuffer(data) 253 | if err = extractFn(context.Background(), buffer, dir, test.Renamer); err != nil { 254 | t.Fatal(test.Name, ": Should not fail second extraction: "+err.Error()) 255 | } 256 | 257 | testWalk(t, dir, test.Files) 258 | 259 | err = os.RemoveAll(dir) 260 | if err != nil { 261 | t.Fatal(err) 262 | } 263 | }) 264 | } 265 | } 266 | 267 | func BenchmarkArchive(b *testing.B) { 268 | dir, _ := os.MkdirTemp("", "") 269 | data, _ := os.ReadFile("testdata/archive.tar.bz2") 270 | 271 | b.StartTimer() 272 | 273 | for i := 0; i < b.N; i++ { 274 | buffer := bytes.NewBuffer(data) 275 | err := extract.Archive(context.Background(), buffer, filepath.Join(dir, strconv.Itoa(i)), nil) 276 | if err != nil { 277 | b.Error(err) 278 | } 279 | } 280 | 281 | b.StopTimer() 282 | 283 | err := os.RemoveAll(dir) 284 | if err != nil { 285 | b.Error(err) 286 | } 287 | } 288 | 289 | func BenchmarkTarBz2(b *testing.B) { 290 | dir, _ := os.MkdirTemp("", "") 291 | data, _ := os.ReadFile("testdata/archive.tar.bz2") 292 | 293 | b.StartTimer() 294 | 295 | for i := 0; i < b.N; i++ { 296 | buffer := bytes.NewBuffer(data) 297 | err := extract.Bz2(context.Background(), buffer, filepath.Join(dir, strconv.Itoa(i)), nil) 298 | if err != nil { 299 | b.Error(err) 300 | } 301 | } 302 | 303 | b.StopTimer() 304 | 305 | err := os.RemoveAll(dir) 306 | if err != nil { 307 | b.Error(err) 308 | } 309 | } 310 | 311 | func BenchmarkTarGz(b *testing.B) { 312 | dir, _ := os.MkdirTemp("", "") 313 | data, _ := os.ReadFile("testdata/archive.tar.gz") 314 | 315 | b.StartTimer() 316 | 317 | for i := 0; i < b.N; i++ { 318 | buffer := bytes.NewBuffer(data) 319 | err := extract.Gz(context.Background(), buffer, filepath.Join(dir, strconv.Itoa(i)), nil) 320 | if err != nil { 321 | b.Error(err) 322 | } 323 | } 324 | 325 | b.StopTimer() 326 | 327 | err := os.RemoveAll(dir) 328 | if err != nil { 329 | b.Error(err) 330 | } 331 | } 332 | 333 | func BenchmarkZip(b *testing.B) { 334 | dir, _ := os.MkdirTemp("", "") 335 | data, _ := os.ReadFile("testdata/archive.zip") 336 | 337 | b.StartTimer() 338 | 339 | for i := 0; i < b.N; i++ { 340 | buffer := bytes.NewBuffer(data) 341 | err := extract.Zip(context.Background(), buffer, filepath.Join(dir, strconv.Itoa(i)), nil) 342 | if err != nil { 343 | b.Error(err) 344 | } 345 | } 346 | 347 | b.StopTimer() 348 | 349 | err := os.RemoveAll(dir) 350 | if err != nil { 351 | b.Error(err) 352 | } 353 | } 354 | 355 | func testWalk(t *testing.T, dir string, testFiles Files) { 356 | files := Files{} 357 | filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { 358 | path = strings.Replace(path, dir, "", 1) 359 | 360 | if info.IsDir() { 361 | files[path] = "dir" 362 | } else if info.Mode()&os.ModeSymlink != 0 { 363 | files[path] = "link" 364 | } else { 365 | data, err := os.ReadFile(filepath.Join(dir, path)) 366 | require.NoError(t, err) 367 | files[path] = strings.TrimSpace(string(data)) 368 | } 369 | 370 | return nil 371 | }) 372 | 373 | for file, kind := range files { 374 | k, ok := testFiles[file] 375 | if !ok { 376 | t.Error(file + " should not exist") 377 | continue 378 | } 379 | 380 | if kind != k { 381 | t.Error(file + " should be " + k + ", not " + kind) 382 | continue 383 | } 384 | } 385 | 386 | for file, kind := range testFiles { 387 | k, ok := files[file] 388 | if !ok { 389 | t.Error(file + " should exist") 390 | continue 391 | } 392 | 393 | if kind != k { 394 | t.Error(file + " should be " + kind + ", not " + k) 395 | continue 396 | } 397 | } 398 | } 399 | 400 | func TestTarGzMemoryConsumption(t *testing.T) { 401 | archive := paths.New("testdata/big.tar.gz") 402 | download(t, "http://downloads.arduino.cc/gcc-arm-none-eabi-4.8.3-2014q1-windows.tar.gz", archive) 403 | 404 | tmpDir, err := paths.MkTempDir("", "") 405 | require.NoError(t, err) 406 | defer tmpDir.RemoveAll() 407 | 408 | f, err := archive.Open() 409 | require.NoError(t, err) 410 | 411 | var m, m2 runtime.MemStats 412 | runtime.GC() 413 | runtime.ReadMemStats(&m) 414 | 415 | err = extract.Archive(context.Background(), f, tmpDir.String(), nil) 416 | require.NoError(t, err) 417 | 418 | runtime.ReadMemStats(&m2) 419 | heapUsed := m2.HeapInuse - m.HeapInuse 420 | if m2.HeapInuse < m.HeapInuse { 421 | heapUsed = 0 422 | } 423 | fmt.Println("Heap memory used during the test:", heapUsed) 424 | require.True(t, heapUsed < 5000000, "heap consumption should be less than 5M but is %d", heapUsed) 425 | } 426 | 427 | func TestZipMemoryConsumption(t *testing.T) { 428 | archive := paths.New("testdata/big.zip") 429 | download(t, "http://downloads.arduino.cc/tools/gcc-arm-none-eabi-7-2017-q4-major-win32-arduino1.zip", archive) 430 | 431 | tmpDir, err := paths.MkTempDir("", "") 432 | require.NoError(t, err) 433 | defer tmpDir.RemoveAll() 434 | 435 | f, err := archive.Open() 436 | require.NoError(t, err) 437 | 438 | var m, m2 runtime.MemStats 439 | runtime.GC() 440 | runtime.ReadMemStats(&m) 441 | 442 | err = extract.Archive(context.Background(), f, tmpDir.String(), nil) 443 | require.NoError(t, err) 444 | 445 | runtime.ReadMemStats(&m2) 446 | heapUsed := m2.HeapInuse - m.HeapInuse 447 | if m2.HeapInuse < m.HeapInuse { 448 | heapUsed = 0 449 | } 450 | fmt.Println("Heap memory used during the test:", heapUsed) 451 | require.True(t, heapUsed < 10000000, "heap consumption should be less than 10M but is %d", heapUsed) 452 | } 453 | 454 | func download(t require.TestingT, url string, file *paths.Path) { 455 | if file.Exist() { 456 | return 457 | } 458 | 459 | fmt.Printf("Downloading %s in %s\n", url, file) 460 | resp, err := http.Get(url) 461 | require.NoError(t, err) 462 | defer resp.Body.Close() 463 | 464 | out, err := file.Create() 465 | require.NoError(t, err) 466 | 467 | _, err = io.Copy(out, resp.Body) 468 | out.Close() 469 | if err != nil { 470 | file.Remove() 471 | } 472 | require.NoError(t, err) 473 | } 474 | -------------------------------------------------------------------------------- /extractor.go: -------------------------------------------------------------------------------- 1 | package extract 2 | 3 | import ( 4 | "archive/tar" 5 | "archive/zip" 6 | "bytes" 7 | "compress/bzip2" 8 | "compress/gzip" 9 | "context" 10 | "fmt" 11 | "io" 12 | "os" 13 | "path/filepath" 14 | "strings" 15 | 16 | filetype "github.com/h2non/filetype" 17 | "github.com/h2non/filetype/types" 18 | "github.com/juju/errors" 19 | "github.com/klauspost/compress/zstd" 20 | "github.com/ulikunitz/xz" 21 | ) 22 | 23 | // Extractor is more sophisticated than the base functions. It allows to write over an interface 24 | // rather than directly on the filesystem 25 | type Extractor struct { 26 | FS interface { 27 | // Link creates newname as a hard link to the oldname file. If there is an error, it will be of type *LinkError. 28 | Link(oldname, newname string) error 29 | 30 | // MkdirAll creates the directory path and all his parents if needed. 31 | MkdirAll(path string, perm os.FileMode) error 32 | 33 | // OpenFile opens the named file with specified flag (O_RDONLY etc.). 34 | OpenFile(name string, flag int, perm os.FileMode) (*os.File, error) 35 | 36 | // Symlink creates newname as a symbolic link to oldname. 37 | Symlink(oldname, newname string) error 38 | 39 | // Remove removes the named file or (empty) directory. 40 | Remove(path string) error 41 | 42 | // Stat returns a FileInfo describing the named file. 43 | Stat(name string) (os.FileInfo, error) 44 | 45 | // Chmod changes the mode of the named file to mode. 46 | // If the file is a symbolic link, it changes the mode of the link's target. 47 | Chmod(name string, mode os.FileMode) error 48 | } 49 | } 50 | 51 | // Archive extracts a generic archived stream of data in the specified location. 52 | // It automatically detects the archive type and accepts a rename function to 53 | // handle the names of the files. 54 | // If the file is not an archive, an error is returned. 55 | func (e *Extractor) Archive(ctx context.Context, body io.Reader, location string, rename Renamer) error { 56 | body, kind, err := match(body) 57 | if err != nil { 58 | errors.Annotatef(err, "Detect archive type") 59 | } 60 | 61 | switch kind.Extension { 62 | case "zip": 63 | return e.Zip(ctx, body, location, rename) 64 | case "gz": 65 | return e.Gz(ctx, body, location, rename) 66 | case "bz2": 67 | return e.Bz2(ctx, body, location, rename) 68 | case "xz": 69 | return e.Xz(ctx, body, location, rename) 70 | case "zst": 71 | return e.Zstd(ctx, body, location, rename) 72 | case "tar": 73 | return e.Tar(ctx, body, location, rename) 74 | default: 75 | return errors.New("Not a supported archive: " + kind.Extension) 76 | } 77 | } 78 | 79 | func (e *Extractor) Zstd(ctx context.Context, body io.Reader, location string, rename Renamer) error { 80 | reader, err := zstd.NewReader(body) 81 | if err != nil { 82 | return errors.Annotatef(err, "opening zstd: detect") 83 | } 84 | 85 | body, kind, err := match(reader) 86 | if err != nil { 87 | return errors.Annotatef(err, "extract zstd: detect") 88 | } 89 | 90 | if kind.Extension == "tar" { 91 | return e.Tar(ctx, body, location, rename) 92 | } 93 | 94 | err = e.copy(ctx, location, 0666, body) 95 | if err != nil { 96 | return err 97 | } 98 | return nil 99 | } 100 | 101 | func (e *Extractor) Xz(ctx context.Context, body io.Reader, location string, rename Renamer) error { 102 | reader, err := xz.NewReader(body) 103 | if err != nil { 104 | return errors.Annotatef(err, "opening xz: detect") 105 | } 106 | 107 | body, kind, err := match(reader) 108 | if err != nil { 109 | return errors.Annotatef(err, "extract xz: detect") 110 | } 111 | 112 | if kind.Extension == "tar" { 113 | return e.Tar(ctx, body, location, rename) 114 | } 115 | 116 | err = e.copy(ctx, location, 0666, body) 117 | if err != nil { 118 | return err 119 | } 120 | return nil 121 | } 122 | 123 | // Bz2 extracts a .bz2 or .tar.bz2 archived stream of data in the specified location. 124 | // It accepts a rename function to handle the names of the files (see the example) 125 | func (e *Extractor) Bz2(ctx context.Context, body io.Reader, location string, rename Renamer) error { 126 | reader := bzip2.NewReader(body) 127 | 128 | body, kind, err := match(reader) 129 | if err != nil { 130 | return errors.Annotatef(err, "extract bz2: detect") 131 | } 132 | 133 | if kind.Extension == "tar" { 134 | return e.Tar(ctx, body, location, rename) 135 | } 136 | 137 | err = e.copy(ctx, location, 0666, body) 138 | if err != nil { 139 | return err 140 | } 141 | return nil 142 | } 143 | 144 | // Gz extracts a .gz or .tar.gz archived stream of data in the specified location. 145 | // It accepts a rename function to handle the names of the files (see the example) 146 | func (e *Extractor) Gz(ctx context.Context, body io.Reader, location string, rename Renamer) error { 147 | reader, err := gzip.NewReader(body) 148 | if err != nil { 149 | return errors.Annotatef(err, "Gunzip") 150 | } 151 | 152 | body, kind, err := match(reader) 153 | if err != nil { 154 | return err 155 | } 156 | 157 | if kind.Extension == "tar" { 158 | return e.Tar(ctx, body, location, rename) 159 | } 160 | err = e.copy(ctx, location, 0666, body) 161 | if err != nil { 162 | return err 163 | } 164 | return nil 165 | } 166 | 167 | type link struct { 168 | Name string 169 | Path string 170 | } 171 | 172 | // Tar extracts a .tar archived stream of data in the specified location. 173 | // It accepts a rename function to handle the names of the files (see the example) 174 | func (e *Extractor) Tar(ctx context.Context, body io.Reader, location string, rename Renamer) error { 175 | links := []*link{} 176 | symlinks := []*link{} 177 | 178 | // We make the first pass creating the directory structure, or we could end up 179 | // attempting to create a file where there's no folder 180 | tr := tar.NewReader(body) 181 | for { 182 | select { 183 | case <-ctx.Done(): 184 | return errors.New("interrupted") 185 | default: 186 | } 187 | 188 | header, err := tr.Next() 189 | if err == io.EOF { 190 | break 191 | } 192 | 193 | if err != nil { 194 | return errors.Annotatef(err, "Read tar stream") 195 | } 196 | 197 | path := header.Name 198 | if rename != nil { 199 | path = rename(path) 200 | } 201 | 202 | if path == "" { 203 | continue 204 | } 205 | 206 | if path, err = safeJoin(location, path); err != nil { 207 | continue 208 | } 209 | 210 | info := header.FileInfo() 211 | 212 | switch header.Typeflag { 213 | case tar.TypeDir: 214 | if err := e.FS.MkdirAll(path, info.Mode()); err != nil { 215 | return errors.Annotatef(err, "Create directory %s", path) 216 | } 217 | case tar.TypeReg, tar.TypeRegA: 218 | if err := e.copy(ctx, path, info.Mode(), tr); err != nil { 219 | return errors.Annotatef(err, "Create file %s", path) 220 | } 221 | case tar.TypeLink: 222 | name := header.Linkname 223 | if rename != nil { 224 | name = rename(name) 225 | } 226 | 227 | name, err = safeJoin(location, name) 228 | if err != nil { 229 | continue 230 | } 231 | links = append(links, &link{Path: path, Name: name}) 232 | case tar.TypeSymlink: 233 | symlinks = append(symlinks, &link{Path: path, Name: header.Linkname}) 234 | } 235 | } 236 | 237 | // Now we make another pass creating the links 238 | for i := range links { 239 | select { 240 | case <-ctx.Done(): 241 | return errors.New("interrupted") 242 | default: 243 | } 244 | _ = e.FS.Remove(links[i].Path) 245 | if err := e.FS.Link(links[i].Name, links[i].Path); err != nil { 246 | return errors.Annotatef(err, "Create link %s", links[i].Path) 247 | } 248 | } 249 | 250 | if err := e.extractSymlinks(ctx, symlinks); err != nil { 251 | return err 252 | } 253 | 254 | return nil 255 | } 256 | 257 | func (e *Extractor) extractSymlinks(ctx context.Context, symlinks []*link) error { 258 | for _, symlink := range symlinks { 259 | select { 260 | case <-ctx.Done(): 261 | return errors.New("interrupted") 262 | default: 263 | } 264 | 265 | // Make a placeholder and replace it after unpacking everything 266 | _ = e.FS.Remove(symlink.Path) 267 | f, err := e.FS.OpenFile(symlink.Path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.FileMode(0666)) 268 | if err != nil { 269 | return fmt.Errorf("creating symlink placeholder %s: %w", symlink.Path, err) 270 | } 271 | if err := f.Close(); err != nil { 272 | return fmt.Errorf("creating symlink placeholder %s: %w", symlink.Path, err) 273 | } 274 | } 275 | 276 | for _, symlink := range symlinks { 277 | select { 278 | case <-ctx.Done(): 279 | return errors.New("interrupted") 280 | default: 281 | } 282 | _ = e.FS.Remove(symlink.Path) 283 | if err := e.FS.Symlink(symlink.Name, symlink.Path); err != nil { 284 | return errors.Annotatef(err, "Create link %s", symlink.Path) 285 | } 286 | } 287 | 288 | return nil 289 | } 290 | 291 | // Zip extracts a .zip archived stream of data in the specified location. 292 | // It accepts a rename function to handle the names of the files (see the example). 293 | func (e *Extractor) Zip(ctx context.Context, body io.Reader, location string, rename Renamer) error { 294 | var bodySize int64 295 | bodyReaderAt, isReaderAt := (body).(io.ReaderAt) 296 | if bodySeeker, isSeeker := (body).(io.Seeker); isReaderAt && isSeeker { 297 | // get the size by seeking to the end 298 | endPos, err := bodySeeker.Seek(0, io.SeekEnd) 299 | if err != nil { 300 | return fmt.Errorf("failed to seek to the end of the body: %s", err) 301 | } 302 | // reset the reader to the beginning 303 | if _, err := bodySeeker.Seek(0, io.SeekStart); err != nil { 304 | return fmt.Errorf("failed to seek to the beginning of the body: %w", err) 305 | } 306 | bodySize = endPos 307 | } else { 308 | // read the whole body into a buffer. Not sure this is the best way to do it 309 | buffer := bytes.NewBuffer([]byte{}) 310 | copyCancel(ctx, buffer, body) 311 | bodyReaderAt = bytes.NewReader(buffer.Bytes()) 312 | bodySize = int64(buffer.Len()) 313 | } 314 | archive, err := zip.NewReader(bodyReaderAt, bodySize) 315 | if err != nil { 316 | return errors.Annotatef(err, "Read the zip file") 317 | } 318 | 319 | links := []*link{} 320 | 321 | // We make the first pass creating the directory structure, or we could end up 322 | // attempting to create a file where there's no folder 323 | for _, header := range archive.File { 324 | select { 325 | case <-ctx.Done(): 326 | return errors.New("interrupted") 327 | default: 328 | } 329 | 330 | path := header.Name 331 | 332 | // Replace backslash with forward slash. There are archives in the wild made with 333 | // buggy compressors that use backslash as path separator. The ZIP format explicitly 334 | // denies the use of "\" so we just replace it with slash "/". 335 | // Moreover it seems that folders are stored as "files" but with a final "\" in the 336 | // filename... oh, well... 337 | forceDir := strings.HasSuffix(path, "\\") 338 | path = strings.Replace(path, "\\", "/", -1) 339 | 340 | if rename != nil { 341 | path = rename(path) 342 | } 343 | 344 | if path == "" { 345 | continue 346 | } 347 | 348 | if path, err = safeJoin(location, path); err != nil { 349 | continue 350 | } 351 | 352 | info := header.FileInfo() 353 | 354 | switch { 355 | case info.IsDir() || forceDir: 356 | dirMode := info.Mode() | os.ModeDir | 0100 357 | if _, err := e.FS.Stat(path); err == nil { 358 | // directory already created, update permissions 359 | if err := e.FS.Chmod(path, dirMode); err != nil { 360 | return errors.Annotatef(err, "Set permissions %s", path) 361 | } 362 | } else if err := e.FS.MkdirAll(path, dirMode); err != nil { 363 | return errors.Annotatef(err, "Create directory %s", path) 364 | } 365 | // We only check for symlinks because hard links aren't possible 366 | case info.Mode()&os.ModeSymlink != 0: 367 | if f, err := header.Open(); err != nil { 368 | return errors.Annotatef(err, "Open link %s", path) 369 | } else if name, err := io.ReadAll(f); err != nil { 370 | return errors.Annotatef(err, "Read address of link %s", path) 371 | } else { 372 | links = append(links, &link{Path: path, Name: string(name)}) 373 | f.Close() 374 | } 375 | default: 376 | if f, err := header.Open(); err != nil { 377 | return errors.Annotatef(err, "Open file %s", path) 378 | } else if err := e.copy(ctx, path, info.Mode(), f); err != nil { 379 | return errors.Annotatef(err, "Create file %s", path) 380 | } else { 381 | f.Close() 382 | } 383 | } 384 | } 385 | 386 | if err := e.extractSymlinks(ctx, links); err != nil { 387 | return err 388 | } 389 | 390 | return nil 391 | } 392 | 393 | func (e *Extractor) copy(ctx context.Context, path string, mode os.FileMode, src io.Reader) error { 394 | // We add the execution permission to be able to create files inside it 395 | err := e.FS.MkdirAll(filepath.Dir(path), mode|os.ModeDir|0100) 396 | if err != nil { 397 | return err 398 | } 399 | _ = e.FS.Remove(path) 400 | file, err := e.FS.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, mode) 401 | if err != nil { 402 | return err 403 | } 404 | defer file.Close() 405 | _, err = copyCancel(ctx, file, src) 406 | return err 407 | } 408 | 409 | // match reads the first 512 bytes, calls types.Match and returns a reader 410 | // for the whole stream 411 | func match(r io.Reader) (io.Reader, types.Type, error) { 412 | buffer := make([]byte, 512) 413 | 414 | n, err := r.Read(buffer) 415 | if err != nil && err != io.EOF { 416 | return nil, types.Unknown, err 417 | } 418 | 419 | if seeker, ok := r.(io.Seeker); ok { 420 | // if the stream is seekable, we just rewind it 421 | if _, err := seeker.Seek(0, io.SeekStart); err != nil { 422 | return nil, types.Unknown, err 423 | } 424 | } else { 425 | // otherwise we create a new reader that will prepend the buffer 426 | r = io.MultiReader(bytes.NewBuffer(buffer[:n]), r) 427 | } 428 | 429 | typ, err := filetype.Match(buffer) 430 | 431 | return r, typ, err 432 | } 433 | 434 | // safeJoin performs a filepath.Join of 'parent' and 'subdir' but returns an error 435 | // if the resulting path points outside of 'parent'. 436 | func safeJoin(parent, subdir string) (string, error) { 437 | res := filepath.Join(parent, subdir) 438 | if !strings.HasSuffix(parent, string(os.PathSeparator)) { 439 | parent += string(os.PathSeparator) 440 | } 441 | if !strings.HasPrefix(res, parent) { 442 | return res, errors.Errorf("unsafe path join: '%s' with '%s'", parent, subdir) 443 | } 444 | return res, nil 445 | } 446 | -------------------------------------------------------------------------------- /extractor_test.go: -------------------------------------------------------------------------------- 1 | package extract_test 2 | 3 | import ( 4 | "archive/tar" 5 | "archive/zip" 6 | "bytes" 7 | "context" 8 | "fmt" 9 | "os" 10 | "path/filepath" 11 | "runtime" 12 | "strconv" 13 | "strings" 14 | "testing" 15 | 16 | "github.com/arduino/go-paths-helper" 17 | "github.com/codeclysm/extract/v4" 18 | "github.com/stretchr/testify/require" 19 | ) 20 | 21 | func TestExtractors(t *testing.T) { 22 | type archiveTest struct { 23 | name string 24 | file *paths.Path 25 | } 26 | testCases := []archiveTest{ 27 | {"TarGz", paths.New("testdata/archive.tar.gz")}, 28 | {"TarBz2", paths.New("testdata/archive.tar.bz2")}, 29 | {"TarXz", paths.New("testdata/archive.tar.xz")}, 30 | {"TarZstd", paths.New("testdata/archive.tar.zst")}, 31 | {"Zip", paths.New("testdata/archive.zip")}, 32 | } 33 | for _, test := range testCases { 34 | t.Run(test.name, func(t *testing.T) { 35 | testArchive(t, test.file) 36 | }) 37 | } 38 | } 39 | 40 | func testArchive(t *testing.T, archivePath *paths.Path) { 41 | tmp, err := paths.MkTempDir("", "") 42 | require.NoError(t, err) 43 | defer tmp.RemoveAll() 44 | 45 | data, err := archivePath.ReadFile() 46 | require.NoError(t, err) 47 | 48 | buffer := bytes.NewBuffer(data) 49 | 50 | extractor := extract.Extractor{ 51 | FS: MockDisk{ 52 | Base: tmp.String(), 53 | }, 54 | } 55 | err = extractor.Archive(context.Background(), buffer, "/", nil) 56 | require.NoError(t, err) 57 | 58 | files := Files{ 59 | "": "dir", 60 | "/archive": "dir", 61 | "/archive/folder": "dir", 62 | "/archive/folderlink": "link", 63 | "/archive/folder/file1.txt": "folder/File1", 64 | "/archive/file1.txt": "File1", 65 | "/archive/file2.txt": "File2", 66 | "/archive/link.txt": "File1", 67 | } 68 | testWalk(t, tmp.String(), files) 69 | } 70 | 71 | func TestZipSlipHardening(t *testing.T) { 72 | t.Run("ZipTraversal", func(t *testing.T) { 73 | logger := &LoggingFS{} 74 | extractor := extract.Extractor{FS: logger} 75 | data, err := os.Open("testdata/zipslip/evil.zip") 76 | require.NoError(t, err) 77 | require.NoError(t, extractor.Zip(context.Background(), data, "/tmp/test", nil)) 78 | require.NoError(t, data.Close()) 79 | fmt.Print(logger) 80 | require.Empty(t, logger.Journal) 81 | }) 82 | 83 | t.Run("TarTraversal", func(t *testing.T) { 84 | logger := &LoggingFS{} 85 | extractor := extract.Extractor{FS: logger} 86 | data, err := os.Open("testdata/zipslip/evil.tar") 87 | require.NoError(t, err) 88 | require.NoError(t, extractor.Tar(context.Background(), data, "/tmp/test", nil)) 89 | require.NoError(t, data.Close()) 90 | fmt.Print(logger) 91 | require.Empty(t, logger.Journal) 92 | }) 93 | 94 | t.Run("TarLinkTraversal", func(t *testing.T) { 95 | logger := &LoggingFS{} 96 | extractor := extract.Extractor{FS: logger} 97 | data, err := os.Open("testdata/zipslip/evil-link-traversal.tar") 98 | require.NoError(t, err) 99 | require.NoError(t, extractor.Tar(context.Background(), data, "/tmp/test", nil)) 100 | require.NoError(t, data.Close()) 101 | fmt.Print(logger) 102 | require.Empty(t, logger.Journal) 103 | }) 104 | 105 | t.Run("WindowsTarTraversal", func(t *testing.T) { 106 | if runtime.GOOS != "windows" { 107 | t.Skip("Skipped on non-Windows host") 108 | } 109 | logger := &LoggingFS{} 110 | extractor := extract.Extractor{FS: logger} 111 | data, err := os.Open("testdata/zipslip/evil-win.tar") 112 | require.NoError(t, err) 113 | require.NoError(t, extractor.Tar(context.Background(), data, "/tmp/test", nil)) 114 | require.NoError(t, data.Close()) 115 | fmt.Print(logger) 116 | require.Empty(t, logger.Journal) 117 | }) 118 | } 119 | 120 | func mkTempDir(t *testing.T) *paths.Path { 121 | tmp, err := paths.MkTempDir("", "test") 122 | require.NoError(t, err) 123 | t.Cleanup(func() { tmp.RemoveAll() }) 124 | return tmp 125 | } 126 | 127 | func TestSymLinkMazeHardening(t *testing.T) { 128 | addTarSymlink := func(t *testing.T, tw *tar.Writer, new, old string) { 129 | err := tw.WriteHeader(&tar.Header{ 130 | Mode: 0o0777, Typeflag: tar.TypeSymlink, Name: new, Linkname: old, 131 | }) 132 | require.NoError(t, err) 133 | } 134 | addZipSymlink := func(t *testing.T, zw *zip.Writer, new, old string) { 135 | h := &zip.FileHeader{Name: new, Method: zip.Deflate} 136 | h.SetMode(os.ModeSymlink) 137 | w, err := zw.CreateHeader(h) 138 | require.NoError(t, err) 139 | _, err = w.Write([]byte(old)) 140 | require.NoError(t, err) 141 | } 142 | 143 | t.Run("TarWithSymlinkToAbsPath", func(t *testing.T) { 144 | // Create target dir 145 | tmp := mkTempDir(t) 146 | targetDir := tmp.Join("test") 147 | require.NoError(t, targetDir.Mkdir()) 148 | 149 | // Make a tar archive with symlink maze 150 | outputTar := bytes.NewBuffer(nil) 151 | tw := tar.NewWriter(outputTar) 152 | addTarSymlink(t, tw, "aaa", tmp.String()) 153 | addTarSymlink(t, tw, "aaa/sym", "something") 154 | require.NoError(t, tw.Close()) 155 | 156 | // Run extract 157 | extractor := extract.Extractor{FS: &LoggingFS{}} 158 | require.Error(t, extractor.Tar(context.Background(), outputTar, targetDir.String(), nil)) 159 | require.NoFileExists(t, tmp.Join("sym").String()) 160 | }) 161 | 162 | t.Run("ZipWithSymlinkToAbsPath", func(t *testing.T) { 163 | // Create target dir 164 | tmp := mkTempDir(t) 165 | targetDir := tmp.Join("test") 166 | require.NoError(t, targetDir.Mkdir()) 167 | 168 | // Make a zip archive with symlink maze 169 | outputZip := bytes.NewBuffer(nil) 170 | zw := zip.NewWriter(outputZip) 171 | addZipSymlink(t, zw, "aaa", tmp.String()) 172 | addZipSymlink(t, zw, "aaa/sym", "something") 173 | require.NoError(t, zw.Close()) 174 | 175 | // Run extract 176 | extractor := extract.Extractor{FS: &LoggingFS{}} 177 | err := extractor.Zip(context.Background(), outputZip, targetDir.String(), nil) 178 | require.NoFileExists(t, tmp.Join("sym").String()) 179 | require.Error(t, err) 180 | }) 181 | 182 | t.Run("TarWithSymlinkToRelativeExternalPath", func(t *testing.T) { 183 | // Create target dir 184 | tmp := mkTempDir(t) 185 | targetDir := tmp.Join("test") 186 | require.NoError(t, targetDir.Mkdir()) 187 | checkDir := tmp.Join("secret") 188 | require.NoError(t, checkDir.MkdirAll()) 189 | 190 | // Make a tar archive with regular symlink maze 191 | outputTar := bytes.NewBuffer(nil) 192 | tw := tar.NewWriter(outputTar) 193 | addTarSymlink(t, tw, "aaa", "../secret") 194 | addTarSymlink(t, tw, "aaa/sym", "something") 195 | require.NoError(t, tw.Close()) 196 | 197 | extractor := extract.Extractor{FS: &LoggingFS{}} 198 | require.Error(t, extractor.Tar(context.Background(), outputTar, targetDir.String(), nil)) 199 | require.NoFileExists(t, checkDir.Join("sym").String()) 200 | }) 201 | 202 | t.Run("TarWithSymlinkToInternalPath", func(t *testing.T) { 203 | // Create target dir 204 | tmp := mkTempDir(t) 205 | targetDir := tmp.Join("test") 206 | require.NoError(t, targetDir.Mkdir()) 207 | 208 | // Make a tar archive with regular symlink maze 209 | outputTar := bytes.NewBuffer(nil) 210 | tw := tar.NewWriter(outputTar) 211 | require.NoError(t, tw.WriteHeader(&tar.Header{Mode: 0o0777, Typeflag: tar.TypeDir, Name: "tmp"})) 212 | addTarSymlink(t, tw, "aaa", "tmp") 213 | addTarSymlink(t, tw, "aaa/sym", "something") 214 | require.NoError(t, tw.Close()) 215 | 216 | extractor := extract.Extractor{FS: &LoggingFS{}} 217 | require.Error(t, extractor.Tar(context.Background(), outputTar, targetDir.String(), nil)) 218 | require.NoFileExists(t, targetDir.Join("tmp", "sym").String()) 219 | }) 220 | 221 | t.Run("TarWithDoubleSymlinkToExternalPath", func(t *testing.T) { 222 | // Create target dir 223 | tmp := mkTempDir(t) 224 | targetDir := tmp.Join("test") 225 | require.NoError(t, targetDir.Mkdir()) 226 | fmt.Println("TMP:", tmp) 227 | fmt.Println("TARGET DIR:", targetDir) 228 | 229 | // Make a tar archive with regular symlink maze 230 | outputTar := bytes.NewBuffer(nil) 231 | tw := tar.NewWriter(outputTar) 232 | tw.WriteHeader(&tar.Header{Name: "fake", Mode: 0777, Typeflag: tar.TypeDir}) 233 | addTarSymlink(t, tw, "sym-maze", tmp.String()) 234 | addTarSymlink(t, tw, "sym-maze", "fake") 235 | addTarSymlink(t, tw, "sym-maze/oops", "/tmp/something") 236 | require.NoError(t, tw.Close()) 237 | 238 | extractor := extract.Extractor{FS: &LoggingFS{}} 239 | require.Error(t, extractor.Tar(context.Background(), outputTar, targetDir.String(), nil)) 240 | require.NoFileExists(t, tmp.Join("oops").String()) 241 | }) 242 | 243 | t.Run("TarWithSymlinkToExternalPathWithoutMazing", func(t *testing.T) { 244 | // Create target dir 245 | tmp := mkTempDir(t) 246 | targetDir := tmp.Join("test") 247 | require.NoError(t, targetDir.Mkdir()) 248 | 249 | // Make a tar archive with valid symlink maze 250 | outputTar := bytes.NewBuffer(nil) 251 | tw := tar.NewWriter(outputTar) 252 | require.NoError(t, tw.WriteHeader(&tar.Header{Mode: 0o0777, Typeflag: tar.TypeDir, Name: "tmp"})) 253 | addTarSymlink(t, tw, "aaa", "../tmp") 254 | require.NoError(t, tw.Close()) 255 | 256 | extractor := extract.Extractor{FS: &LoggingFS{}} 257 | require.NoError(t, extractor.Tar(context.Background(), outputTar, targetDir.String(), nil)) 258 | st, err := targetDir.Join("aaa").Lstat() 259 | require.NoError(t, err) 260 | require.Equal(t, "aaa", st.Name()) 261 | }) 262 | } 263 | 264 | func TestUnixPermissions(t *testing.T) { 265 | // Disable user's umask to enable creation of files with any permission, restore it after the test 266 | userUmask := UnixUmaskZero() 267 | defer UnixUmask(userUmask) 268 | 269 | archiveFilenames := []string{ 270 | "testdata/permissions.zip", 271 | "testdata/permissions.tar", 272 | } 273 | for _, archiveFilename := range archiveFilenames { 274 | tmp, err := paths.MkTempDir("", "") 275 | require.NoError(t, err) 276 | defer tmp.RemoveAll() 277 | 278 | f, err := paths.New(archiveFilename).Open() 279 | require.NoError(t, err) 280 | err = extract.Archive(context.Background(), f, tmp.String(), nil) 281 | require.NoError(t, err) 282 | 283 | filepath.Walk(tmp.String(), func(path string, info os.FileInfo, _ error) error { 284 | filename := filepath.Base(path) 285 | // Desired permissions indicated by part of the filenames inside the zip/tar files 286 | if strings.HasPrefix(filename, "dir") { 287 | desiredPermString := strings.Split(filename, "dir")[1] 288 | desiredPerms, _ := strconv.ParseUint(desiredPermString, 8, 32) 289 | require.Equal(t, os.ModeDir|os.FileMode(OsDirPerms(desiredPerms)), info.Mode()) 290 | } else if strings.HasPrefix(filename, "file") { 291 | desiredPermString := strings.Split(filename, "file")[1] 292 | desiredPerms, _ := strconv.ParseUint(desiredPermString, 8, 32) 293 | require.Equal(t, os.FileMode(OsFilePerms(desiredPerms)), info.Mode()) 294 | } 295 | return nil 296 | }) 297 | } 298 | } 299 | 300 | func TestZipDirectoryPermissions(t *testing.T) { 301 | // Disable user's umask to enable creation of files with any permission, restore it after the test 302 | userUmask := UnixUmaskZero() 303 | defer UnixUmask(userUmask) 304 | 305 | // This arduino library has files before their containing directories in the zip, 306 | // so a good test case that these directory permissions are created correctly 307 | archive := paths.New("testdata/filesbeforedirectories.zip") 308 | download(t, "https://downloads.arduino.cc/libraries/github.com/arduino-libraries/LiquidCrystal-1.0.7.zip", archive) 309 | 310 | tmp, err := paths.MkTempDir("", "") 311 | require.NoError(t, err) 312 | defer tmp.RemoveAll() 313 | 314 | f, err := archive.Open() 315 | require.NoError(t, err) 316 | err = extract.Archive(context.Background(), f, tmp.String(), nil) 317 | require.NoError(t, err) 318 | 319 | filepath.Walk(tmp.String(), func(path string, info os.FileInfo, _ error) error { 320 | // Test files and directories (excluding the parent) match permissions from the zip file 321 | if path != tmp.String() { 322 | if info.IsDir() { 323 | require.Equal(t, os.ModeDir|os.FileMode(OsDirPerms(0755)), info.Mode()) 324 | } else { 325 | require.Equal(t, os.FileMode(OsFilePerms(0644)), info.Mode()) 326 | } 327 | } 328 | return nil 329 | }) 330 | } 331 | 332 | // MockDisk is a disk that chroots to a directory 333 | type MockDisk struct { 334 | Base string 335 | } 336 | 337 | func (m MockDisk) Link(oldname, newname string) error { 338 | oldname = filepath.Join(m.Base, oldname) 339 | newname = filepath.Join(m.Base, newname) 340 | return os.Link(oldname, newname) 341 | } 342 | 343 | func (m MockDisk) MkdirAll(path string, perm os.FileMode) error { 344 | path = filepath.Join(m.Base, path) 345 | return os.MkdirAll(path, perm) 346 | } 347 | 348 | func (m MockDisk) Symlink(oldname, newname string) error { 349 | oldname = filepath.Join(m.Base, oldname) 350 | newname = filepath.Join(m.Base, newname) 351 | return os.Symlink(oldname, newname) 352 | } 353 | 354 | func (m MockDisk) OpenFile(name string, flag int, perm os.FileMode) (*os.File, error) { 355 | name = filepath.Join(m.Base, name) 356 | return os.OpenFile(name, flag, perm) 357 | } 358 | 359 | func (m MockDisk) Remove(path string) error { 360 | return os.Remove(filepath.Join(m.Base, path)) 361 | } 362 | 363 | func (m MockDisk) Stat(name string) (os.FileInfo, error) { 364 | name = filepath.Join(m.Base, name) 365 | return os.Stat(name) 366 | } 367 | 368 | func (m MockDisk) Chmod(name string, mode os.FileMode) error { 369 | name = filepath.Join(m.Base, name) 370 | return os.Chmod(name, mode) 371 | } 372 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/codeclysm/extract/v4 2 | 3 | go 1.22 4 | 5 | toolchain go1.22.3 6 | 7 | require ( 8 | github.com/arduino/go-paths-helper v1.12.1 9 | github.com/h2non/filetype v1.1.3 10 | github.com/juju/errors v0.0.0-20181118221551-089d3ea4e4d5 11 | github.com/klauspost/compress v1.15.13 12 | github.com/stretchr/testify v1.9.0 13 | github.com/ulikunitz/xz v0.5.12 14 | golang.org/x/sys v0.16.0 15 | ) 16 | 17 | require ( 18 | github.com/davecgh/go-spew v1.1.1 // indirect 19 | github.com/juju/testing v0.0.0-20200510222523-6c8c298c77a0 // indirect 20 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect 21 | github.com/pmezard/go-difflib v1.0.0 // indirect 22 | gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f // indirect 23 | gopkg.in/yaml.v3 v3.0.1 // indirect 24 | ) 25 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/arduino/go-paths-helper v1.12.1 h1:WkxiVUxBjKWlLMiMuYy8DcmVrkxdP7aKxQOAq7r2lVM= 2 | github.com/arduino/go-paths-helper v1.12.1/go.mod h1:jcpW4wr0u69GlXhTYydsdsqAjLaYK5n7oWHfKqOG6LM= 3 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 4 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 5 | github.com/h2non/filetype v1.1.3 h1:FKkx9QbD7HR/zjK1Ia5XiBsq9zdLi5Kf3zGyFTAFkGg= 6 | github.com/h2non/filetype v1.1.3/go.mod h1:319b3zT68BvV+WRj7cwy856M2ehB3HqNOt6sy1HndBY= 7 | github.com/juju/clock v0.0.0-20180524022203-d293bb356ca4/go.mod h1:nD0vlnrUjcjJhqN5WuCWZyzfd5AHZAC9/ajvbSx69xA= 8 | github.com/juju/errors v0.0.0-20150916125642-1b5e39b83d18/go.mod h1:W54LbzXuIE0boCoNJfwqpmkKJ1O4TCTZMetAt6jGk7Q= 9 | github.com/juju/errors v0.0.0-20181118221551-089d3ea4e4d5 h1:rhqTjzJlm7EbkELJDKMTU7udov+Se0xZkWmugr6zGok= 10 | github.com/juju/errors v0.0.0-20181118221551-089d3ea4e4d5/go.mod h1:W54LbzXuIE0boCoNJfwqpmkKJ1O4TCTZMetAt6jGk7Q= 11 | github.com/juju/loggo v0.0.0-20170605014607-8232ab8918d9 h1:Y+lzErDTURqeXqlqYi4YBYbDd7ycU74gW1ADt57/bgY= 12 | github.com/juju/loggo v0.0.0-20170605014607-8232ab8918d9/go.mod h1:vgyd7OREkbtVEN/8IXZe5Ooef3LQePvuBm9UWj6ZL8U= 13 | github.com/juju/retry v0.0.0-20160928201858-1998d01ba1c3/go.mod h1:OohPQGsr4pnxwD5YljhQ+TZnuVRYpa5irjugL1Yuif4= 14 | github.com/juju/testing v0.0.0-20200510222523-6c8c298c77a0 h1:+WWUkhnTjV6RNOxkcwk79qrjeyHEHvBzlneueBsatX4= 15 | github.com/juju/testing v0.0.0-20200510222523-6c8c298c77a0/go.mod h1:hpGvhGHPVbNBraRLZEhoQwFLMrjK8PSlO4D3nDjKYXo= 16 | github.com/juju/utils v0.0.0-20180808125547-9dfc6dbfb02b/go.mod h1:6/KLg8Wz/y2KVGWEpkK9vMNGkOnu4k/cqs8Z1fKjTOk= 17 | github.com/juju/version v0.0.0-20161031051906-1f41e27e54f2/go.mod h1:kE8gK5X0CImdr7qpSKl3xB2PmpySSmfj7zVbkZFs81U= 18 | github.com/klauspost/compress v1.15.13 h1:NFn1Wr8cfnenSJSA46lLq4wHCcBzKTSjnBIexDMMOV0= 19 | github.com/klauspost/compress v1.15.13/go.mod h1:QPwzmACJjUTFsnSHH934V6woptycfrDDJnH7hvFVbGM= 20 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 21 | github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= 22 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 23 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= 24 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= 25 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 26 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 27 | github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= 28 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 29 | github.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc= 30 | github.com/ulikunitz/xz v0.5.12/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= 31 | golang.org/x/crypto v0.0.0-20180214000028-650f4a345ab4/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= 32 | golang.org/x/net v0.0.0-20180406214816-61147c48b25b/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 33 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 34 | golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU= 35 | golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 36 | gopkg.in/check.v1 v1.0.0-20160105164936-4f90aeace3a2/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 37 | gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU= 38 | gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 39 | gopkg.in/mgo.v2 v2.0.0-20160818015218-f2b6f6c918c4 h1:hILp2hNrRnYjZpmIbx70psAHbBSEcQ1NIzDcUbJ1b6g= 40 | gopkg.in/mgo.v2 v2.0.0-20160818015218-f2b6f6c918c4/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA= 41 | gopkg.in/yaml.v2 v2.0.0-20170712054546-1be3d31502d6 h1:CvAnnm1XvMjfib69SZzDwgWfOk+PxYz0hA0HBupilBA= 42 | gopkg.in/yaml.v2 v2.0.0-20170712054546-1be3d31502d6/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= 43 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 44 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 45 | -------------------------------------------------------------------------------- /loggingfs_test.go: -------------------------------------------------------------------------------- 1 | package extract_test 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | ) 7 | 8 | // LoggingFS is a disk that logs every operation, useful for unit-testing. 9 | type LoggingFS struct { 10 | Journal []*LoggedOp 11 | } 12 | 13 | // LoggedOp is an operation logged in a LoggingFS journal. 14 | type LoggedOp struct { 15 | Op string 16 | Path string 17 | OldPath string 18 | Mode os.FileMode 19 | Info os.FileInfo 20 | Flags int 21 | Err error 22 | } 23 | 24 | func (op *LoggedOp) String() string { 25 | res := "" 26 | switch op.Op { 27 | case "link": 28 | res += fmt.Sprintf("link %s -> %s", op.Path, op.OldPath) 29 | case "symlink": 30 | res += fmt.Sprintf("symlink %s -> %s", op.Path, op.OldPath) 31 | case "mkdirall": 32 | res += fmt.Sprintf("mkdirall %v %s", op.Mode, op.Path) 33 | case "open": 34 | res += fmt.Sprintf("open %v %s (flags=%04x)", op.Mode, op.Path, op.Flags) 35 | case "remove": 36 | res += fmt.Sprintf("remove %v", op.Path) 37 | case "stat": 38 | res += fmt.Sprintf("stat %v -> %v", op.Path, op.Info) 39 | case "chmod": 40 | res += fmt.Sprintf("chmod %v %s", op.Mode, op.Path) 41 | default: 42 | panic("unknown LoggedOP " + op.Op) 43 | } 44 | if op.Err != nil { 45 | res += " error: " + op.Err.Error() 46 | } else { 47 | res += " success" 48 | } 49 | return res 50 | } 51 | 52 | func (m *LoggingFS) Link(oldname, newname string) error { 53 | err := os.Link(oldname, newname) 54 | op := &LoggedOp{ 55 | Op: "link", 56 | OldPath: oldname, 57 | Path: newname, 58 | Err: err, 59 | } 60 | m.Journal = append(m.Journal, op) 61 | fmt.Println("FS>", op) 62 | return err 63 | } 64 | 65 | func (m *LoggingFS) MkdirAll(path string, perm os.FileMode) error { 66 | err := os.MkdirAll(path, perm) 67 | op := &LoggedOp{ 68 | Op: "mkdirall", 69 | Path: path, 70 | Mode: perm, 71 | Err: err, 72 | } 73 | m.Journal = append(m.Journal, op) 74 | fmt.Println("FS>", op) 75 | return err 76 | } 77 | 78 | func (m *LoggingFS) Symlink(oldname, newname string) error { 79 | err := os.Symlink(oldname, newname) 80 | op := &LoggedOp{ 81 | Op: "symlink", 82 | OldPath: oldname, 83 | Path: newname, 84 | Err: err, 85 | } 86 | m.Journal = append(m.Journal, op) 87 | fmt.Println("FS>", op) 88 | return err 89 | } 90 | 91 | func (m *LoggingFS) OpenFile(name string, flags int, perm os.FileMode) (*os.File, error) { 92 | f, err := os.OpenFile(name, flags, perm) 93 | op := &LoggedOp{ 94 | Op: "open", 95 | Path: name, 96 | Mode: perm, 97 | Flags: flags, 98 | Err: err, 99 | } 100 | m.Journal = append(m.Journal, op) 101 | fmt.Println("FS>", op) 102 | return f, err 103 | } 104 | 105 | func (m *LoggingFS) Remove(path string) error { 106 | err := os.Remove(path) 107 | op := &LoggedOp{ 108 | Op: "remove", 109 | Path: path, 110 | } 111 | m.Journal = append(m.Journal, op) 112 | fmt.Println("FS>", op) 113 | return err 114 | } 115 | 116 | func (m *LoggingFS) Stat(path string) (os.FileInfo, error) { 117 | info, err := os.Stat(path) 118 | op := &LoggedOp{ 119 | Op: "stat", 120 | Path: path, 121 | Info: info, 122 | Err: err, 123 | } 124 | m.Journal = append(m.Journal, op) 125 | fmt.Println("FS>", op) 126 | return info, err 127 | } 128 | 129 | func (m *LoggingFS) Chmod(path string, mode os.FileMode) error { 130 | err := os.Chmod(path, mode) 131 | op := &LoggedOp{ 132 | Op: "chmod", 133 | Path: path, 134 | Mode: mode, 135 | Err: err, 136 | } 137 | m.Journal = append(m.Journal, op) 138 | fmt.Println("FS>", op) 139 | return err 140 | } 141 | 142 | func (m *LoggingFS) String() string { 143 | res := "" 144 | for _, op := range m.Journal { 145 | res += op.String() 146 | res += "\n" 147 | } 148 | return res 149 | } 150 | -------------------------------------------------------------------------------- /safejoin_test.go: -------------------------------------------------------------------------------- 1 | package extract 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | ) 8 | 9 | func TestSafeJoin(t *testing.T) { 10 | ok := func(parent, subdir string) { 11 | _, err := safeJoin(parent, subdir) 12 | require.NoError(t, err, "joining '%s' and '%s'", parent, subdir) 13 | } 14 | ko := func(parent, subdir string) { 15 | _, err := safeJoin(parent, subdir) 16 | require.Error(t, err, "joining '%s' and '%s'", parent, subdir) 17 | } 18 | ok("/", "more/path") 19 | ok("/path", "more/path") 20 | ok("/path/", "more/path") 21 | ok("/path/subdir", "more/path") 22 | ok("/path/subdir/", "more/path") 23 | 24 | ok("/", "..") // ! since we are extracting to / is ok-ish to accept ".."? 25 | ko("/path", "..") 26 | ko("/path/", "..") 27 | ko("/path/subdir", "..") 28 | ko("/path/subdir/", "..") 29 | 30 | ok("/", "../pathpath") // ! since we are extracting to / is ok-ish to accept "../pathpath"? 31 | ko("/path", "../pathpath") 32 | ko("/path/", "../pathpath") 33 | ko("/path/subdir", "../pathpath") 34 | ko("/path/subdir/", "../pathpath") 35 | } 36 | -------------------------------------------------------------------------------- /testdata/.gitignore: -------------------------------------------------------------------------------- 1 | big.tar.gz 2 | big.zip 3 | filesbeforedirectories.zip 4 | -------------------------------------------------------------------------------- /testdata/archive-with-backslashes.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeclysm/extract/eaeff3236ebe82039553f39bc285ee5c79caae3c/testdata/archive-with-backslashes.zip -------------------------------------------------------------------------------- /testdata/archive.mistery: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeclysm/extract/eaeff3236ebe82039553f39bc285ee5c79caae3c/testdata/archive.mistery -------------------------------------------------------------------------------- /testdata/archive.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeclysm/extract/eaeff3236ebe82039553f39bc285ee5c79caae3c/testdata/archive.tar.bz2 -------------------------------------------------------------------------------- /testdata/archive.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeclysm/extract/eaeff3236ebe82039553f39bc285ee5c79caae3c/testdata/archive.tar.gz -------------------------------------------------------------------------------- /testdata/archive.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeclysm/extract/eaeff3236ebe82039553f39bc285ee5c79caae3c/testdata/archive.tar.xz -------------------------------------------------------------------------------- /testdata/archive.tar.zst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeclysm/extract/eaeff3236ebe82039553f39bc285ee5c79caae3c/testdata/archive.tar.zst -------------------------------------------------------------------------------- /testdata/archive.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeclysm/extract/eaeff3236ebe82039553f39bc285ee5c79caae3c/testdata/archive.zip -------------------------------------------------------------------------------- /testdata/permissions.tar: -------------------------------------------------------------------------------- 1 | dir500/0000500000175000017500000000000014553675315011065 5ustar chrischrisdir700/0000700000175000017500000000000014553675265011075 5ustar chrischrisdir750/0000750000175000017500000000000014553675263011105 5ustar chrischrisdir755/0000755000175000017500000000000014553675257011122 5ustar chrischrisdir775/0000775000175000017500000000000014553675256011125 5ustar chrischrisdir777/0000777000175000017500000000000014553675252011125 5ustar chrischrisfile4000000400000175000017500000000000014553675317011142 0ustar chrischrisfile6000000600000175000017500000000000014553675311011140 0ustar chrischrisfile6400000640000175000017500000000000014553675305011153 0ustar chrischrisfile6440000644000175000017500000000000014553675277011173 0ustar chrischrisfile6640000664000175000017500000000000014553675275011175 0ustar chrischrisfile6660000666000175000017500000000000014553675274011200 0ustar chrischris -------------------------------------------------------------------------------- /testdata/permissions.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeclysm/extract/eaeff3236ebe82039553f39bc285ee5c79caae3c/testdata/permissions.zip -------------------------------------------------------------------------------- /testdata/singlefile.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeclysm/extract/eaeff3236ebe82039553f39bc285ee5c79caae3c/testdata/singlefile.bz2 -------------------------------------------------------------------------------- /testdata/singlefile.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeclysm/extract/eaeff3236ebe82039553f39bc285ee5c79caae3c/testdata/singlefile.gz -------------------------------------------------------------------------------- /testdata/zipslip/evil-link-traversal.tar: -------------------------------------------------------------------------------- 1 | leak0000777000000000000000000000000000000000000020640 1../../../../../../../../../../../../../../../tmp/something-importantustar0000000000000000 -------------------------------------------------------------------------------- /testdata/zipslip/evil-win.tar: -------------------------------------------------------------------------------- 1 | ..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\tmp\evil.txt0000666000000000000000000000000400000000000020720 0ustar0000000000000000TESTsome\path\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\tmp\evil.txt0000666000000000000000000000000400000000000022751 0ustar0000000000000000TEST\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\tmp\evil.txt0000666000000000000000000000000400000000000021054 0ustar0000000000000000TEST\some\path\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\tmp\evil.txt0000666000000000000000000000000400000000000023105 0ustar0000000000000000TEST..0000666000000000000000000000000400000000000007341 0ustar0000000000000000TEST../../../../../../../../../../../../../../../../../../../../tmp/evil.txt0000666000000000000000000000000400000000000017037 0ustar0000000000000000TESTsome/path/../../../../../../../../../../../../../../../../../../../../tmp/evil.txt0000666000000000000000000000000400000000000020736 0ustar0000000000000000TEST/../../../../../../../../../../../../../../../../../../../../tmp/evil.txt0000666000000000000000000000000400000000000017116 0ustar0000000000000000TEST/some/path/../../../../../../../../../../../../../../../../../../../../tmp/evil.txt0000666000000000000000000000000400000000000021015 0ustar0000000000000000TEST -------------------------------------------------------------------------------- /testdata/zipslip/evil.tar: -------------------------------------------------------------------------------- 1 | ..0000666000000000000000000000000400000000000007341 0ustar0000000000000000TEST../../../../../../../../../../../../../../../../../../../../tmp/evil.txt0000666000000000000000000000000400000000000017037 0ustar0000000000000000TESTsome/path/../../../../../../../../../../../../../../../../../../../../tmp/evil.txt0000666000000000000000000000000400000000000020736 0ustar0000000000000000TEST/../../../../../../../../../../../../../../../../../../../../tmp/evil.txt0000666000000000000000000000000400000000000017116 0ustar0000000000000000TEST/some/path/../../../../../../../../../../../../../../../../../../../../tmp/evil.txt0000666000000000000000000000000400000000000021015 0ustar0000000000000000TEST -------------------------------------------------------------------------------- /testdata/zipslip/evil.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeclysm/extract/eaeff3236ebe82039553f39bc285ee5c79caae3c/testdata/zipslip/evil.zip -------------------------------------------------------------------------------- /umask_unix_test.go: -------------------------------------------------------------------------------- 1 | //go:build !windows 2 | 3 | package extract_test 4 | 5 | import "golang.org/x/sys/unix" 6 | 7 | func UnixUmaskZero() int { 8 | return unix.Umask(0) 9 | } 10 | 11 | func UnixUmask(userUmask int) { 12 | unix.Umask(userUmask) 13 | } 14 | 15 | func OsFilePerms(unixPerms uint64) uint64 { 16 | return unixPerms 17 | } 18 | 19 | func OsDirPerms(unixPerms uint64) uint64 { 20 | return unixPerms 21 | } 22 | -------------------------------------------------------------------------------- /umask_windows_test.go: -------------------------------------------------------------------------------- 1 | //go:build windows 2 | 3 | package extract_test 4 | 5 | func UnixUmaskZero() int { 6 | return 0 7 | } 8 | 9 | func UnixUmask(userUmask int) { 10 | } 11 | 12 | func OsFilePerms(unixPerms uint64) uint64 { 13 | // Go on Windows just uses 666/444 for files depending on whether "read only" is set 14 | globalPerms := unixPerms >> 6 15 | return globalPerms | (globalPerms << 3) | (globalPerms << 6) 16 | } 17 | 18 | func OsDirPerms(unixPerms uint64) uint64 { 19 | // Go on Windows just uses 777 for directories 20 | return 0777 21 | } 22 | --------------------------------------------------------------------------------