├── .gitignore ├── zip ├── archive.zip ├── symlinks.zip ├── symlinks2.zip ├── symlinks3.zip ├── winshort.zip ├── specialmodes.zip ├── case-insensitive.zip ├── BUILD.bazel ├── zip_unix.go ├── zip_darwin.go ├── zip_win.go ├── zip.go └── zip_test.go ├── go.mod ├── go.sum ├── repositories.bzl ├── BUILD.bazel ├── sanitizer ├── BUILD.bazel ├── sanitizer_nix.go ├── sanitizer_test.go ├── sanitizer.go ├── sanitizer_nix_test.go ├── sanitizer_win_test.go └── sanitizer_win.go ├── tar ├── BUILD.bazel ├── tar_unix.go ├── tar_darwin.go ├── tar_win.go ├── traverse.tar ├── case-insensitive.tar ├── specialfiles.tar ├── specialmodes.tar ├── xattr.tar ├── traverse-slash-at-the-end.tar ├── traverse-via-links.tar ├── tar.go ├── tar_test.go └── winshort.tar ├── README.md ├── CONTRIBUTING.md ├── WORKSPACE └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | /bazel-* 2 | .idea 3 | *.iml 4 | -------------------------------------------------------------------------------- /zip/archive.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/safearchive/HEAD/zip/archive.zip -------------------------------------------------------------------------------- /zip/symlinks.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/safearchive/HEAD/zip/symlinks.zip -------------------------------------------------------------------------------- /zip/symlinks2.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/safearchive/HEAD/zip/symlinks2.zip -------------------------------------------------------------------------------- /zip/symlinks3.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/safearchive/HEAD/zip/symlinks3.zip -------------------------------------------------------------------------------- /zip/winshort.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/safearchive/HEAD/zip/winshort.zip -------------------------------------------------------------------------------- /zip/specialmodes.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/safearchive/HEAD/zip/specialmodes.zip -------------------------------------------------------------------------------- /zip/case-insensitive.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/safearchive/HEAD/zip/case-insensitive.zip -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/google/safearchive 2 | 3 | go 1.21 4 | 5 | require github.com/google/go-cmp v0.6.0 // indirect 6 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= 2 | github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 3 | -------------------------------------------------------------------------------- /repositories.bzl: -------------------------------------------------------------------------------- 1 | load("@bazel_gazelle//:deps.bzl", "go_repository") 2 | 3 | def go_repositories(): 4 | go_repository( 5 | name = "go_cmp", 6 | build_file_proto_mode = "disable_global", 7 | importpath = "github.com/google/go-cmp", 8 | sum = "h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=", 9 | version = "v0.6.0", 10 | ) 11 | -------------------------------------------------------------------------------- /BUILD.bazel: -------------------------------------------------------------------------------- 1 | licenses(["notice"]) # Apache 2.0 2 | 3 | load("@bazel_gazelle//:def.bzl", "gazelle") 4 | 5 | # gazelle:prefix github.com/google/safearchive 6 | # gazelle:go_naming_convention import_alias 7 | gazelle(name = "gazelle") 8 | 9 | load("@com_github_bazelbuild_buildtools//buildifier:def.bzl", "buildifier") 10 | 11 | buildifier( 12 | name = "buildifier", 13 | ) 14 | 15 | -------------------------------------------------------------------------------- /zip/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") 2 | 3 | licenses(["notice"]) # Apache 2.0 4 | 5 | package(default_visibility = ["//visibility:public"]) 6 | 7 | go_library( 8 | name = "zip", 9 | srcs = [ 10 | "zip.go", 11 | "zip_darwin.go", 12 | "zip_unix.go", 13 | "zip_win.go", 14 | ], 15 | importpath = "github.com/google/safearchive/zip", 16 | visibility = ["//visibility:public"], 17 | deps = ["//sanitizer"], 18 | ) 19 | 20 | alias( 21 | name = "go_default_library", 22 | actual = ":zip", 23 | visibility = ["//visibility:public"], 24 | ) 25 | 26 | go_test( 27 | name = "zip_test", 28 | size = "small", 29 | srcs = ["zip_test.go"], 30 | embed = [":zip"], 31 | embedsrcs = glob(["*.zip"]), 32 | deps = [ 33 | ], 34 | ) 35 | -------------------------------------------------------------------------------- /sanitizer/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") 2 | 3 | licenses(["notice"]) # Apache 2.0 4 | 5 | package(default_visibility = ["//visibility:public"]) 6 | 7 | go_library( 8 | name = "sanitizer", 9 | srcs = [ 10 | "sanitizer.go", 11 | "sanitizer_nix.go", 12 | "sanitizer_win.go", 13 | ], 14 | importpath = "github.com/google/safearchive/sanitizer", 15 | visibility = ["//visibility:public"], 16 | ) 17 | 18 | alias( 19 | name = "go_default_library", 20 | actual = ":sanitizer", 21 | visibility = ["//visibility:public"], 22 | ) 23 | 24 | go_test( 25 | name = "sanitizer_test", 26 | size = "small", 27 | srcs = [ 28 | "sanitizer_nix_test.go", 29 | "sanitizer_test.go", 30 | "sanitizer_win_test.go", 31 | ], 32 | embed = [":sanitizer"], 33 | ) 34 | -------------------------------------------------------------------------------- /tar/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") 2 | 3 | licenses(["notice"]) # Apache 2.0 4 | 5 | package(default_visibility = ["//visibility:public"]) 6 | 7 | go_library( 8 | name = "tar", 9 | srcs = [ 10 | "tar.go", 11 | "tar_darwin.go", 12 | "tar_unix.go", 13 | "tar_win.go", 14 | ], 15 | importpath = "github.com/google/safearchive/tar", 16 | visibility = ["//visibility:public"], 17 | deps = ["//sanitizer"], 18 | ) 19 | 20 | alias( 21 | name = "go_default_library", 22 | actual = ":tar", 23 | visibility = ["//visibility:public"], 24 | ) 25 | 26 | go_test( 27 | name = "tar_test", 28 | size = "small", 29 | srcs = ["tar_test.go"], 30 | embed = [":tar"], 31 | embedsrcs = glob(["*.tar"]), 32 | deps = [ 33 | "@go_cmp//cmp", 34 | "@go_cmp//cmp/cmpopts", 35 | ], 36 | ) 37 | -------------------------------------------------------------------------------- /tar/tar_unix.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build !windows && !darwin 16 | // +build !windows,!darwin 17 | 18 | package tar 19 | 20 | // DefaultSecurityMode is a set of security features that are enabled by default. 21 | const DefaultSecurityMode = SanitizeFilenames | PreventSymlinkTraversal 22 | -------------------------------------------------------------------------------- /tar/tar_darwin.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build darwin 16 | // +build darwin 17 | 18 | package tar 19 | 20 | // DefaultSecurityMode is a set of security features that are enabled by default. 21 | const DefaultSecurityMode = SanitizeFilenames | PreventSymlinkTraversal | PreventCaseInsensitiveSymlinkTraversal 22 | -------------------------------------------------------------------------------- /tar/tar_win.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build windows 16 | // +build windows 17 | 18 | package tar 19 | 20 | // DefaultSecurityMode is a set of security features that are enabled by default. 21 | const DefaultSecurityMode = SanitizeFilenames | PreventSymlinkTraversal | PreventCaseInsensitiveSymlinkTraversal | SkipWindowsShortFilenames 22 | -------------------------------------------------------------------------------- /zip/zip_unix.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build !windows && !darwin 16 | // +build !windows,!darwin 17 | 18 | package zip 19 | 20 | // DefaultSecurityMode enables path traversal security measures. This mode should be safe for all 21 | // existing integrations. 22 | const DefaultSecurityMode = SanitizeFilenames | PreventSymlinkTraversal 23 | -------------------------------------------------------------------------------- /zip/zip_darwin.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build darwin 16 | // +build darwin 17 | 18 | package zip 19 | 20 | // DefaultSecurityMode enables path traversal security measures. This mode should be safe for all 21 | // existing integrations. 22 | const DefaultSecurityMode = SanitizeFilenames | PreventSymlinkTraversal | PreventCaseInsensitiveSymlinkTraversal 23 | -------------------------------------------------------------------------------- /zip/zip_win.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build windows 16 | // +build windows 17 | 18 | package zip 19 | 20 | // DefaultSecurityMode enables path traversal security measures. This mode should be safe for all 21 | // existing integrations. 22 | const DefaultSecurityMode = SanitizeFilenames | PreventSymlinkTraversal | PreventCaseInsensitiveSymlinkTraversal | SkipWindowsShortFilenames 23 | -------------------------------------------------------------------------------- /sanitizer/sanitizer_nix.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build !windows 16 | // +build !windows 17 | 18 | package sanitizer 19 | 20 | import ( 21 | "path/filepath" 22 | "strings" 23 | ) 24 | 25 | var ( 26 | nixReplacer = strings.NewReplacer(`\`, `/`) 27 | ) 28 | 29 | func sanitizePath(in string) string { 30 | 31 | // normalizing path separators (something filepath.Clean will do it for us on Windows, but not 32 | // on the other platforms) 33 | in = nixReplacer.Replace(in) 34 | 35 | return strings.TrimPrefix(filepath.Clean(nixPathSeparator+in), nixPathSeparator) 36 | } 37 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # safearchive 2 | 3 | **This is not an officially supported Google product.** 4 | 5 | Safe-by-construction libraries for processing `tar` and `zip` archives, to 6 | replace unsafe alternatives like `archive/tar` and `archive/zip` that are at 7 | risk of path traversal attacks. Besides crafted filename entries in the archive, 8 | this library also protects from symbolic link attacks. 9 | 10 | ## Usage 11 | 12 | These libraries are fully compatible with their golang core counterpart, so 13 | switching to them is as easy as changing the library import at the top, no 14 | further modifications are needed. 15 | 16 | The built-in security measures can be turned on or off one by one. Only those 17 | security checks are enabled by default that do not break existing setups. 18 | 19 | You may enable the other features individually like this: 20 | 21 | ``` 22 | tr := tar.NewReader(buf) 23 | tr.SetSecurityMode(tr.GetSecurityMode() | tar.SanitizeFileMode | tar.DropXattrs) 24 | ``` 25 | 26 | or 27 | 28 | ``` 29 | tr.SetSecurityMode(tar.MaximumSecurityMode) 30 | ``` 31 | 32 | You may opt out from a certain feature like this: 33 | 34 | ``` 35 | tr.SetSecurityMode(tr.GetSecurityMode() &^ tar.SanitizeFileMode) 36 | ``` 37 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement (CLA). You (or your employer) retain the copyright to your 10 | contribution; this simply gives us permission to use and redistribute your 11 | contributions as part of the project. Head over to 12 | to see your current agreements on file or 13 | to sign a new one. 14 | 15 | You generally only need to submit a CLA once, so if you've already submitted one 16 | (even if it was for a different project), you probably don't need to do it 17 | again. 18 | 19 | ## Code Reviews 20 | 21 | All submissions, including submissions by project members, require review. We 22 | use GitHub pull requests for this purpose. Consult 23 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 24 | information on using pull requests. 25 | 26 | ## Community Guidelines 27 | 28 | This project follows 29 | [Google's Open Source Community Guidelines](https://opensource.google/conduct/). 30 | -------------------------------------------------------------------------------- /sanitizer/sanitizer_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package sanitizer 16 | 17 | import ( 18 | "strings" 19 | "testing" 20 | ) 21 | 22 | func TestHasWindowsShortFilenames(t *testing.T) { 23 | tests := []struct { 24 | in string 25 | want bool 26 | }{ 27 | {in: "ANDROI~2", want: true}, 28 | {in: "foo/ANDROI~2", want: true}, 29 | {in: "ANDROI~2/bar", want: true}, 30 | {in: "foo/ANDROI~2/bar", want: true}, 31 | // Same with different case 32 | {in: "Androi~2", want: true}, 33 | {in: "foo/Androi~2", want: true}, 34 | {in: "Androi~2/bar", want: true}, 35 | {in: "foo/Androi~2/bar", want: true}, 36 | // File extension 37 | {in: "FOOOOO~1.JPG ", want: true}, 38 | {in: "foo/FOOOOO~1.JPG", want: true}, 39 | {in: "FOOOOO~1.JPG/bar", want: true}, 40 | {in: "foo/FOOOOO~1.JPG/bar", want: true}, 41 | // Not a short filename 42 | {in: "3D Objects", want: false}, 43 | {in: "Some~Stuff", want: false}, 44 | } 45 | for _, tc := range tests { 46 | for _, a := range []string{tc.in, strings.ReplaceAll(tc.in, "\\", "/")} { 47 | got := HasWindowsShortFilenames(a) 48 | if got != tc.want { 49 | t.Errorf("HasWindowsShortFilenames(%q) = %v, want %v", a, got, tc.want) 50 | } 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /WORKSPACE: -------------------------------------------------------------------------------- 1 | workspace(name = "safearchive") 2 | 3 | load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") 4 | 5 | # Bazel's Go rules. 6 | # 7 | # Last updated: October 11, 2024. 8 | http_archive( 9 | name = "io_bazel_rules_go", 10 | sha256 = "f4a9314518ca6acfa16cc4ab43b0b8ce1e4ea64b81c38d8a3772883f153346b8", 11 | urls = [ 12 | "https://mirror.bazel.build/github.com/bazelbuild/rules_go/releases/download/v0.50.1/rules_go-v0.50.1.zip", 13 | "https://github.com/bazelbuild/rules_go/releases/download/v0.50.1/rules_go-v0.50.1.zip", 14 | ], 15 | ) 16 | 17 | http_archive( 18 | name = "bazel_gazelle", 19 | sha256 = "5982e5463f171da99e3bdaeff8c0f48283a7a5f396ec5282910b9e8a49c0dd7e", 20 | urls = [ 21 | "https://mirror.bazel.build/github.com/bazelbuild/bazel-gazelle/releases/download/v0.25.0/bazel-gazelle-v0.25.0.tar.gz", 22 | "https://github.com/bazelbuild/bazel-gazelle/releases/download/v0.25.0/bazel-gazelle-v0.25.0.tar.gz", 23 | ], 24 | ) 25 | 26 | load("@bazel_gazelle//:deps.bzl", "gazelle_dependencies") 27 | # gazelle:repository go_repository name=org_golang_x_xerrors importpath=golang.org/x/xerrors 28 | 29 | load("@io_bazel_rules_go//go:deps.bzl", "go_register_toolchains", "go_rules_dependencies") 30 | load("//:repositories.bzl", "go_repositories") 31 | 32 | # gazelle:repository_macro repositories.bzl%go_repositories 33 | go_repositories() 34 | 35 | go_rules_dependencies() 36 | 37 | go_register_toolchains(version = "1.18.3") 38 | 39 | gazelle_dependencies() 40 | 41 | http_archive( 42 | name = "com_google_protobuf", 43 | sha256 = "3bd7828aa5af4b13b99c191e8b1e884ebfa9ad371b0ce264605d347f135d2568", 44 | strip_prefix = "protobuf-3.19.4", 45 | urls = [ 46 | "https://github.com/protocolbuffers/protobuf/archive/v3.19.4.tar.gz", 47 | ], 48 | ) 49 | 50 | load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps") 51 | 52 | protobuf_deps() 53 | 54 | http_archive( 55 | name = "com_github_bazelbuild_buildtools", 56 | sha256 = "ae34c344514e08c23e90da0e2d6cb700fcd28e80c02e23e4d5715dddcb42f7b3", 57 | strip_prefix = "buildtools-4.2.2", 58 | urls = [ 59 | "https://github.com/bazelbuild/buildtools/archive/refs/tags/4.2.2.tar.gz", 60 | ], 61 | ) 62 | -------------------------------------------------------------------------------- /sanitizer/sanitizer.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package sanitizer is a lightweight library that facilitates the safearchive libraries to 16 | // prevent path traversal attempts by sanitize file paths. 17 | package sanitizer 18 | 19 | import ( 20 | "os" 21 | "regexp" 22 | "strings" 23 | ) 24 | 25 | const ( 26 | winPathSeparator = `\` 27 | nixPathSeparator = `/` 28 | ) 29 | 30 | var ( 31 | winShortFilenameRegex = regexp.MustCompile(`~\d+\.?`) 32 | ) 33 | 34 | // SanitizePath sanitizes the supplied path by purely lexical processing. 35 | // The return value is safe to be joined together with a base directory (if the basedir is empty 36 | // and no symlinks are present there). 37 | // Join(base, SanitizePath(path)) will always produce a path contained within base and Clean(path) 38 | // will always produce an unrooted path with no ".." path elements. 39 | // If the input path had a directory separator at the end, the sanitized version will preserve that. 40 | func SanitizePath(in string) string { 41 | sanitized := sanitizePath(in) 42 | 43 | // Add back trailing / if safe 44 | if len(in) > 0 && 45 | (in[len(in)-1] == nixPathSeparator[0] || in[len(in)-1] == winPathSeparator[0]) && 46 | len(sanitized) > 0 { 47 | sanitized = sanitized + string(os.PathSeparator) 48 | } 49 | 50 | return sanitized 51 | } 52 | 53 | // HasWindowsShortFilenames reports if any path component look like a Windows short filename. 54 | // Short filenames on Windows may look like this: 55 | // 1(3)~1.PNG 1 (3) (1).png 56 | // DOWNLO~1 Downloads 57 | // FOOOOO~1.JPG fooooooooo.png.gif.jpg 58 | func HasWindowsShortFilenames(in string) bool { 59 | in = strings.ReplaceAll(in, "\\", "/") 60 | parts := strings.Split(in, "/") 61 | for _, part := range parts { 62 | matched := winShortFilenameRegex.MatchString(part) 63 | if matched { 64 | return true 65 | } 66 | } 67 | return false 68 | } 69 | -------------------------------------------------------------------------------- /sanitizer/sanitizer_nix_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build !windows 16 | // +build !windows 17 | 18 | package sanitizer 19 | 20 | import ( 21 | "testing" 22 | ) 23 | 24 | func TestSanitizePathUnix(t *testing.T) { 25 | type testCase struct { 26 | input, expected string 27 | } 28 | 29 | testCases := map[string][]testCase{ 30 | "AbsolutePaths": []testCase{ 31 | {"/some/thing", `some/thing`}, 32 | {`C:\some\thing`, `C:/some/thing`}, 33 | {`c:\some\thing`, `c:/some/thing`}, 34 | {`C:/some/thing`, `C:/some/thing`}, 35 | {`\some\thing`, `some/thing`}, 36 | }, 37 | "UNCPaths": []testCase{ 38 | {`\\FILESHARE\stuff\thing`, `FILESHARE/stuff/thing`}, 39 | {`//FILESHARE/stuff/thing`, `FILESHARE/stuff/thing`}, 40 | }, 41 | "BackslashBackslashSpecial": []testCase{ 42 | {`\\.\C:\some\path`, `C:/some/path`}, 43 | {`//./C:/some\path`, `C:/some/path`}, 44 | {`/\.\C:\some\path`, `C:/some/path`}, 45 | {`\\?\Volume{96f0460f-a710-40e3-ad53-76530201cf29}\some.txt`, `?/Volume{96f0460f-a710-40e3-ad53-76530201cf29}/some.txt`}, 46 | }, 47 | "AlternativeDataStreams": []testCase{ 48 | {`something.txt:alternate`, `something.txt:alternate`}, 49 | {`something.txt::$DATA`, `something.txt::$DATA`}, 50 | }, 51 | "ReservedFilenames": []testCase{ 52 | {`somedir\LPT1`, `somedir/LPT1`}, 53 | {`somedir\LPT1\somefile`, `somedir/LPT1/somefile`}, 54 | }, 55 | "RelativePaths": []testCase{ 56 | {`../../some/thing`, `some/thing`}, 57 | {`../../some/thing`, `some/thing`}, 58 | {`..\..\some\thing`, `some/thing`}, 59 | }, 60 | "TrailingSlash": []testCase{ 61 | {`some/path/`, `some/path/`}, 62 | {`some/path\`, `some/path/`}, 63 | }, 64 | } 65 | 66 | for testName, tests := range testCases { 67 | t.Run(testName, func(t *testing.T) { 68 | for _, tc := range tests { 69 | sanitized := SanitizePath(tc.input) 70 | if tc.expected != sanitized { 71 | t.Errorf("SanitizePath(%q) = %q, want %q", tc.input, sanitized, tc.expected) 72 | } 73 | } 74 | }) 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /tar/traverse.tar: -------------------------------------------------------------------------------- 1 | readme.txt0000600000000000000000000000000000000000000011161 0ustar0000000000000000/gopher.txt0000600000000000000000000000000000000000000011267 0ustar0000000000000000../todo.txt0000600000000000000000000000000000000000000011104 0ustar0000000000000000 -------------------------------------------------------------------------------- /sanitizer/sanitizer_win_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build windows 16 | // +build windows 17 | 18 | package sanitizer 19 | 20 | import ( 21 | "testing" 22 | ) 23 | 24 | func TestSanitizePathWindows(t *testing.T) { 25 | type testCase struct { 26 | input, expected string 27 | } 28 | 29 | testCases := map[string][]testCase{ 30 | "AbsolutePaths": []testCase{ 31 | {"/some/thing", `some\thing`}, 32 | {`C:\some\thing`, `C\some\thing`}, 33 | {`c:\some\thing`, `c\some\thing`}, 34 | {`C:/some/thing`, `C\some\thing`}, 35 | {`\some\thing`, `some\thing`}, 36 | }, 37 | "FileExtensions": []testCase{ 38 | {`some.txt\thing`, `some.txt\thing`}, 39 | {`some.ext1.ext2\thing`, `some.ext1.ext2\thing`}, 40 | {`some.ext1.ext2`, `some.ext1.ext2`}, 41 | {`some.txt`, `some.txt`}, 42 | }, 43 | "UNCPaths": []testCase{ 44 | {`\\FILESHARE\stuff\thing`, `FILESHARE\stuff\thing`}, 45 | {`//FILESHARE/stuff/thing`, `FILESHARE\stuff\thing`}, 46 | }, 47 | "BackslashBackslashSpecial": []testCase{ 48 | {`\\.\C:\some\path`, `C\some\path`}, 49 | {`//./C:/some\path`, `C\some\path`}, 50 | {`/\.\C:\some\path`, `C\some\path`}, 51 | {`\\?\Volume{96f0460f-a710-40e3-ad53-76530201cf29}\some.txt`, `Volume{96f0460f-a710-40e3-ad53-76530201cf29}\some.txt`}, 52 | }, 53 | "NTprefix": []testCase{ 54 | {`\??\C:\some\path`, `C\some\path`}, 55 | {`\??\Volume{96f0460f-a710-40e3-ad53-76530201cf29}\some.txt`, `Volume{96f0460f-a710-40e3-ad53-76530201cf29}\some.txt`}, 56 | }, 57 | "AlternativeDataStreams": []testCase{ 58 | {`something.txt:alternate`, `something.txt\alternate`}, 59 | {`something.txt::$DATA`, `something.txt\$DATA`}, 60 | }, 61 | "ReservedFilenames": []testCase{ 62 | {`somedir\LPT` + ss1, `somedir\LPT` + ss1 + `-safe`}, 63 | {`somedir\LPT` + ss2, `somedir\LPT` + ss2 + `-safe`}, 64 | {`somedir\LPT` + ss3, `somedir\LPT` + ss3 + `-safe`}, 65 | {`somedir\CONIN$`, `somedir\CONIN$-safe`}, 66 | {`somedir\CONIN$ `, `somedir\CONIN$ -safe`}, 67 | {`somedir\CONIN$ .txt`, `somedir\CONIN$ -safe.txt`}, 68 | {`somedir\CONOUT$`, `somedir\CONOUT$-safe`}, 69 | {`somedir\CONOUT$ `, `somedir\CONOUT$ -safe`}, 70 | {`somedir\CONOUT$ .txt`, `somedir\CONOUT$ -safe.txt`}, 71 | {`somedir\LPT1`, `somedir\LPT1-safe`}, 72 | {`somedir\LPT1.foo`, `somedir\LPT1-safe.foo`}, 73 | {`somedir\LPT1 .foo`, `somedir\LPT1 -safe.foo`}, 74 | {`somedir\LPT1 .foo`, `somedir\LPT1 -safe.foo`}, 75 | {`somedir\LPT` + ss1 + ` .foo`, `somedir\LPT` + ss1 + ` -safe.foo`}, 76 | {`somedir\LPT1\somefile`, `somedir\LPT1-safe\somefile`}, 77 | {`somedir\LPT1.foo\somefile`, `somedir\LPT1-safe.foo\somefile`}, 78 | {`somedir\LPT1 .foo\somefile`, `somedir\LPT1 -safe.foo\somefile`}, 79 | {`somedir\LPT` + ss1 + `\somefile`, `somedir\LPT` + ss1 + `-safe\somefile`}, 80 | }, 81 | "RelativePaths": []testCase{ 82 | {`../../some/thing`, `some\thing`}, 83 | {`../../some/thing`, `some\thing`}, 84 | {`..\..\some\thing`, `some\thing`}, 85 | }, 86 | "QuestionMark": []testCase{ 87 | {`some?.txt`, `some\.txt`}, 88 | {`some.txt?`, `some.txt`}, 89 | }, 90 | "TrailingSlash": []testCase{ 91 | {`some\path/`, `some\path\`}, 92 | {`some\path\`, `some\path\`}, 93 | }, 94 | } 95 | 96 | for testName, tests := range testCases { 97 | t.Run(testName, func(t *testing.T) { 98 | for _, tc := range tests { 99 | sanitized := SanitizePath(tc.input) 100 | if tc.expected != sanitized { 101 | t.Errorf("SanitizePath(%q) = %q, want %q", tc.input, sanitized, tc.expected) 102 | } 103 | } 104 | }) 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /sanitizer/sanitizer_win.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build windows 16 | // +build windows 17 | 18 | package sanitizer 19 | 20 | import ( 21 | "path/filepath" 22 | "strings" 23 | ) 24 | 25 | var ( 26 | replacer = strings.NewReplacer(`:`, `\`, `/`, `\`, `?`, `\`) 27 | 28 | ss1 = "\u00B9" // Superscript One https://www.compart.com/en/unicode/U+00B9 29 | ss2 = "\u00B2" // Superscript Two https://www.compart.com/en/unicode/U+00B2 30 | ss3 = "\u00B3" // Superscript Three https://www.compart.com/en/unicode/U+00B3 31 | ) 32 | 33 | // isReservedName reports if name is a Windows reserved device name or a console handle. 34 | // It does not detect names with an extension, which are also reserved on some Windows versions. 35 | // 36 | // For details, search for PRN in 37 | // https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file. 38 | // 39 | // This is borrowed from https://github.com/golang/go/blob/master/src/path/filepath/path_windows.go 40 | // and fixed. 41 | func isReservedName(name string) bool { 42 | nameLen := len(name) 43 | if nameLen < 3 { 44 | return false 45 | } 46 | 47 | reservedNameLen := 0 48 | prefix := strings.ToUpper(name[0:3]) 49 | switch prefix { 50 | case "CON": 51 | reservedNameLen = 3 52 | 53 | // Passing CONIN$ or CONOUT$ to CreateFile opens a console handle. 54 | // https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createfilea#consoles 55 | // 56 | // While CONIN$ and CONOUT$ aren't documented as being files, 57 | // they behave the same as CON. For example, ./CONIN$ also opens the console input. 58 | 59 | if nameLen >= 6 && name[5] == '$' && strings.EqualFold(name[3:6], "IN$") { 60 | reservedNameLen += 3 61 | } 62 | if nameLen >= 7 && name[6] == '$' && strings.EqualFold(name[3:7], "OUT$") { 63 | reservedNameLen += 4 64 | } 65 | 66 | case "PRN", "AUX", "NUL": 67 | reservedNameLen = 3 68 | case "COM", "LPT": 69 | // these two reserved names must be followed by a digit or a SUPERSCRIPT 70 | if nameLen >= 4 { 71 | switch name[3] { 72 | case '1', '2', '3', '4', '5', '6', '7', '8', '9': 73 | reservedNameLen = 4 74 | case ss1[0]: // unicode 75 | if nameLen >= 5 { 76 | switch name[4] { 77 | case ss1[1], ss2[1], ss3[1]: 78 | reservedNameLen = 5 79 | } 80 | } 81 | } 82 | } 83 | } 84 | 85 | // All the reserved names may be followed by optional whitespaces 86 | if reservedNameLen != 0 && strings.TrimSpace(name[reservedNameLen:]) == "" { 87 | return true 88 | } 89 | 90 | return false 91 | } 92 | 93 | func sanitizePath(in string) string { 94 | // we get rid of : (ADS or drive letter specifier) 95 | in = replacer.Replace(in) 96 | 97 | // note: we do clean(trim(clean())) so even weird syntax like \\.\C:\something is sanitized safely 98 | tmp := filepath.Clean(strings.TrimLeft(filepath.Clean(winPathSeparator+in), winPathSeparator)) 99 | 100 | sb := strings.Builder{} 101 | 102 | // time to deal with reserved path components (e.g. LPT1), if any 103 | // at this point, the path separators in tmp are already normalized (\) 104 | first := true 105 | for p := tmp; p != ""; { 106 | var part string 107 | part, p, _ = strings.Cut(p, winPathSeparator) 108 | // Trim the extension and look for a reserved name. 109 | base, ext, _ := strings.Cut(part, ".") 110 | if first { 111 | first = false 112 | } else { 113 | sb.WriteString(winPathSeparator) 114 | } 115 | sb.WriteString(base) 116 | if isReservedName(base) { 117 | sb.WriteString("-safe") 118 | } 119 | if ext != "" { 120 | sb.WriteString(".") 121 | sb.WriteString(ext) 122 | } 123 | } 124 | 125 | return sb.String() 126 | } 127 | -------------------------------------------------------------------------------- /tar/case-insensitive.tar: -------------------------------------------------------------------------------- 1 | tmp0000777000000000000000000000000014701733663010367 2/ustar rootrootTmp/test-file0000644000000000000000000000000514701733705012135 0ustar rootroothelo 2 | -------------------------------------------------------------------------------- /tar/specialfiles.tar: -------------------------------------------------------------------------------- 1 | fifo0000640376027102575230000000000014402107572012406 6ustar imrerprimarygroupnull0000644000000000000000000000000014402107651011706 3ustar rootroot00000010000003sda0000644000000000000000000000000014402110035011467 4ustar rootroot00000100000000dir/0000750376027102575230000000000014402110174012311 5ustar imrerprimarygroupregular.txt0000640376027102575230000000001014402107677013743 0ustar imrerprimarygroupregular 2 | symlink0000777376027102575230000000000014402110211015340 2regular.txtustar imrerprimarygrouphardlink0000640376027102575230000000000014402107677015460 1regular.txtustar imrerprimarygroup -------------------------------------------------------------------------------- /tar/specialmodes.tar: -------------------------------------------------------------------------------- 1 | setuidstuff.txt0006640376027102575230000000001414402313604014646 0ustar imrerprimarygroupsetuidstuff 2 | setuidstuff2.txt0006750376027102575230000000001514402313604014733 0ustar imrerprimarygroupsetuidstuff2 3 | tmpstuff.txt0001640376027102575230000000001114402313604014141 0ustar imrerprimarygrouptmpstuff 4 | somedir/0000750376027102575230000000000014402313604013200 5ustar imrerprimarygroup -------------------------------------------------------------------------------- /tar/xattr.tar: -------------------------------------------------------------------------------- 1 | ./PaxHeaders/something.txt0000644000000000000000000000017114402124734013033 xustar0029 mtime=1678289372.94598663 2 | 29 atime=1678289372.94598663 3 | 30 ctime=1678289392.687447969 4 | 33 SCHILY.xattr.user.hello=world 5 | something.txt0000640376027102575230000000001214402124734015572 0ustar00imrerprimarygroup00000000000000something 6 | -------------------------------------------------------------------------------- /tar/traverse-slash-at-the-end.tar: -------------------------------------------------------------------------------- 1 | linktoroot/0000777376027102575230000000000014407052003014025 2/ustar imrerprimarygrouplinktoroot/root/.bashrc0000640376027102575230000000000514407052003016162 0ustar imrerprimarygrouppwnd 2 | -------------------------------------------------------------------------------- /tar/traverse-via-links.tar: -------------------------------------------------------------------------------- 1 | linktoroot0000777376027102575230000000000014402054301013744 2/ustar imrerprimarygrouplinktoroot/root/.bashrc0000664376027102575230000000000514402054367016202 0ustar imrerprimarygrouppwnd 2 | linktoescape0000777376027102575230000000000014402054542016577 2../outside.txtustar imrerprimarygrouplinktoescape0000664376027102575230000000000614402054570014151 0ustar imrerprimarygrouppwnd2 3 | -------------------------------------------------------------------------------- /zip/zip.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package zip is a drop-in replacement for archive/zip which security focus. 16 | // 17 | // To prevent security implications (e.g. directory traversal) of attacker controlled crafted zip 18 | // archives, this library sanitizes 19 | // - file names (bugos filename entries like ../something are fixed on the fly) 20 | // - the file mode (removing special bits like setuid) 21 | // It also: 22 | // - skips symbolic link entries 23 | // - skips special file types silently (fifos, device nodes, char devices, etc.) 24 | // 25 | // All these features are enabled by default and can be turned off one-by-one via the SetSecurityMode 26 | // method of the Reader/ReadCloser. 27 | // 28 | // Features turned on by default: 29 | // - SanitizeFilenames 30 | // - PreventSymlinkTraversal 31 | // These two features are compatible with all known legitimate use-cases. 32 | // 33 | // You may enable the other features individually like this: 34 | // tr := zip.OpenReader("some.zip") 35 | // tr.SetSecurityMode(tr.GetSecurityMode() | zip.SanitizeFileMode | zip.SkipSpecialFiles) 36 | // or 37 | // tr.SetSecurityMode(zip.MaximumSecurityMode) 38 | // 39 | // You may opt out from a certain feature like this: 40 | // tr.SetSecurityMode(tr.GetSecurityMode() &^ zip.SanitizeFileMode) 41 | package zip 42 | 43 | import ( 44 | "archive/zip" // NOLINT 45 | "io" 46 | "io/fs" 47 | "strings" 48 | 49 | "github.com/google/safearchive/sanitizer" 50 | ) 51 | 52 | const ( 53 | // Store no compression 54 | Store uint16 = zip.Store 55 | // Deflate DEFLATE compressed 56 | Deflate uint16 = zip.Deflate 57 | ) 58 | 59 | var ( 60 | // ErrFormat not a valid zip file 61 | ErrFormat = zip.ErrFormat 62 | // ErrAlgorithm unsupported compression algorithm 63 | ErrAlgorithm = zip.ErrAlgorithm 64 | // ErrChecksum checksum error 65 | ErrChecksum = zip.ErrChecksum 66 | ) 67 | 68 | // A Compressor returns a new compressing writer, writing to w. 69 | // The WriteCloser's Close method must be used to flush pending data to w. 70 | // The Compressor itself must be safe to invoke from multiple goroutines 71 | // simultaneously, but each returned writer will be used only by 72 | // one goroutine at a time. 73 | type Compressor = zip.Compressor 74 | 75 | // A Decompressor returns a new decompressing reader, reading from r. 76 | // The ReadCloser's Close method must be used to release associated resources. 77 | // The Decompressor itself must be safe to invoke from multiple goroutines 78 | // simultaneously, but each returned reader will be used only by 79 | // one goroutine at a time. 80 | type Decompressor = zip.Decompressor 81 | 82 | // A File is a single file in a ZIP archive. 83 | // The file information is in the embedded FileHeader. 84 | // The file content can be accessed by calling Open. 85 | type File = zip.File 86 | 87 | // FileHeader describes a file within a zip file. 88 | // See the zip spec for details. 89 | type FileHeader = zip.FileHeader 90 | 91 | // Note: we wrap the core ReadCloser/Reader structs so we can 92 | // add the SetSecurityMode function that reapplies the security logic 93 | // on the entries of the function. 94 | // We chose this option to keep 100% signature compatibility with the core 95 | 96 | // A ReadCloser is a Reader that must be closed when no longer needed. 97 | type ReadCloser struct { 98 | Reader 99 | upstreamReadCloser *zip.ReadCloser 100 | } 101 | 102 | // A Reader serves content from a ZIP archive. 103 | type Reader struct { 104 | *zip.Reader 105 | originalFiles []*zip.File 106 | securityMode SecurityMode 107 | } 108 | 109 | // Writer implements a zip file writer. 110 | type Writer = zip.Writer 111 | 112 | // SecurityMode controls security features to enforce 113 | type SecurityMode int 114 | 115 | const ( 116 | // PreventSymlinkTraversal security mode detects symlink 117 | // This feature is enabled by default. 118 | PreventSymlinkTraversal SecurityMode = 1 119 | // SkipSpecialFiles security mode skips special files (e.g. block devices or fifos), links are allowed still 120 | // This feature is not enabled by default. 121 | SkipSpecialFiles SecurityMode = 2 122 | // SanitizeFileMode will drop special file modes (e.g. setuid and tmp bit) 123 | // This feature is not enabled by default. 124 | SanitizeFileMode SecurityMode = 4 125 | // SanitizeFilenames will sanitize filenames (dropping .. path components and turning entries into relative) 126 | // This feature is enabled by default. 127 | SanitizeFilenames SecurityMode = 8 128 | // PreventCaseInsensitiveSymlinkTraversal activates case insensitive symlink traversal detection. 129 | // This feature requires PreventSymlinkTraversal to be enabled as well. 130 | // By default, this is activated only on MacOS and Windows builds. If you are extracting to a 131 | // case insensitive filesystem on a Unix platform, you should activate this feature explicitly. 132 | PreventCaseInsensitiveSymlinkTraversal SecurityMode = 16 133 | // SkipWindowsShortFilenames drops archive entries that have a path component that look like a 134 | // Windows short filename (e.g. GIT~1). 135 | // By default, this is activated only on Windows builds. If you are extracting to a Windows 136 | // filesystem on a non-Windows platform, you should activate this feature explicitly. 137 | SkipWindowsShortFilenames SecurityMode = 32 138 | ) 139 | 140 | // MaximumSecurityMode enables all security features. Apps that care about file contents only 141 | // and nothing unix specific (e.g. file modes or special devices) should use this mode. 142 | const MaximumSecurityMode = SanitizeFilenames | PreventSymlinkTraversal | SanitizeFileMode | SkipSpecialFiles | PreventCaseInsensitiveSymlinkTraversal | SkipWindowsShortFilenames 143 | 144 | func isSpecialFile(f zip.File) bool { 145 | amode := f.Mode() 146 | for _, m := range []fs.FileMode{fs.ModeDevice, fs.ModeNamedPipe, fs.ModeSocket, fs.ModeCharDevice, fs.ModeIrregular} { 147 | if amode&fs.FileMode(m) != 0 { 148 | return true 149 | } 150 | } 151 | return false 152 | } 153 | 154 | // applyMagic sanitizes and/or filters the entries of this zip archive 155 | // depending on the SecurityMode setting. 156 | // See the SecurityMode constants above to learn more about what kind of 157 | // security measures are currently supported. 158 | func applyMagic(files []*zip.File, securityMode SecurityMode) []*zip.File { 159 | 160 | symlinks := map[string]bool{} 161 | var re []*zip.File 162 | for _, fp := range files { 163 | // making a copy, since we change some fields (Name and ExternalAttrs) 164 | f := *fp 165 | 166 | if securityMode&SanitizeFilenames != 0 { 167 | // Sanitize filename 168 | f.Name = sanitizer.SanitizePath(f.Name) 169 | } 170 | 171 | if securityMode&SkipWindowsShortFilenames != 0 && sanitizer.HasWindowsShortFilenames(f.Name) { 172 | continue 173 | } 174 | 175 | if securityMode&PreventSymlinkTraversal != 0 { 176 | fName := sanitizer.SanitizePath(f.Name) 177 | fName = strings.TrimSuffix(fName, "/") 178 | if securityMode&PreventCaseInsensitiveSymlinkTraversal != 0 { 179 | fName = strings.ToLower(fName) 180 | } 181 | n := strings.Split(fName, "/") 182 | traversal := false 183 | for i := 1; i <= len(n); i++ { 184 | subPath := strings.Join(n[0:i], "/") 185 | if symlinks[subPath] { 186 | // a symlink has already been seen on this path. We need to drop this entry. 187 | traversal = true 188 | break 189 | } 190 | } 191 | if traversal { 192 | continue 193 | } 194 | if f.Mode()&fs.ModeSymlink != 0 { 195 | symlinks[fName] = true 196 | } 197 | } 198 | 199 | if securityMode&SkipSpecialFiles != 0 { 200 | if isSpecialFile(f) { 201 | continue 202 | } 203 | } 204 | 205 | if securityMode&SanitizeFileMode != 0 { 206 | amode := f.Mode() 207 | for _, m := range []fs.FileMode{fs.ModeTemporary, fs.ModeAppend, fs.ModeExclusive, fs.ModeSetuid, fs.ModeSetgid, fs.ModeSticky} { 208 | amode = amode &^ fs.FileMode(m) 209 | } 210 | f.SetMode(amode) 211 | } 212 | 213 | re = append(re, &f) 214 | } 215 | 216 | return re 217 | } 218 | 219 | // OpenReader will open the Zip file specified by name and return a ReadCloser. 220 | func OpenReader(name string) (*ReadCloser, error) { 221 | o, err := zip.OpenReader(name) 222 | if err != nil { 223 | return nil, err 224 | } 225 | 226 | r := Reader{Reader: &o.Reader, originalFiles: o.File} 227 | rc := ReadCloser{Reader: r, upstreamReadCloser: o} 228 | rc.SetSecurityMode(DefaultSecurityMode) 229 | return &rc, nil 230 | } 231 | 232 | // SetSecurityMode applies the security rules on the set of files in the archive 233 | func (r *ReadCloser) SetSecurityMode(sm SecurityMode) { 234 | r.File = applyMagic(r.originalFiles, sm) 235 | r.securityMode = sm 236 | } 237 | 238 | // GetSecurityMode returns the currently enabled security rules 239 | func (r *ReadCloser) GetSecurityMode() SecurityMode { 240 | return r.securityMode 241 | } 242 | 243 | // Close closes the Zip file, rendering it unusable for I/O. 244 | func (r *ReadCloser) Close() error { 245 | r.originalFiles = nil 246 | return r.upstreamReadCloser.Close() 247 | } 248 | 249 | // NewReader returns a new Reader reading from r, which is assumed to 250 | // have the given size in bytes. 251 | func NewReader(r io.ReaderAt, size int64) (*Reader, error) { 252 | o, err := zip.NewReader(r, size) 253 | if err != nil { 254 | return nil, err 255 | } 256 | re := Reader{Reader: o, originalFiles: o.File} 257 | re.SetSecurityMode(DefaultSecurityMode) 258 | return &re, nil 259 | } 260 | 261 | // SetSecurityMode applies the security rules on the set of files in the archive 262 | func (r *Reader) SetSecurityMode(sm SecurityMode) { 263 | r.File = applyMagic(r.originalFiles, sm) 264 | r.securityMode = sm 265 | } 266 | 267 | // GetSecurityMode returns the currently enabled security rules 268 | func (r *Reader) GetSecurityMode() SecurityMode { 269 | return r.securityMode 270 | } 271 | 272 | // FileInfoHeader creates a partially-populated FileHeader from an 273 | // fs.FileInfo. 274 | // Because fs.FileInfo's Name method returns only the base name of 275 | // the file it describes, it may be necessary to modify the Name field 276 | // of the returned header to provide the full path name of the file. 277 | // If compression is desired, callers should set the FileHeader.Method 278 | // field; it is unset by default. 279 | func FileInfoHeader(fi fs.FileInfo) (*FileHeader, error) { 280 | return zip.FileInfoHeader(fi) 281 | } 282 | 283 | // RegisterDecompressor allows custom decompressors for a specified method ID. 284 | // The common methods Store and Deflate are built in. 285 | func RegisterDecompressor(method uint16, dcomp Decompressor) { 286 | zip.RegisterDecompressor(method, dcomp) 287 | } 288 | 289 | // RegisterCompressor registers custom compressors for a specified method ID. 290 | // The common methods Store and Deflate are built in. 291 | func RegisterCompressor(method uint16, comp Compressor) { 292 | zip.RegisterCompressor(method, comp) 293 | } 294 | 295 | // NewWriter returns a new Writer writing a zip file to w. 296 | func NewWriter(w io.Writer) *Writer { 297 | return zip.NewWriter(w) 298 | } 299 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /zip/zip_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package zip 16 | 17 | import ( 18 | "bytes" 19 | _ "embed" 20 | "fmt" 21 | "io/fs" 22 | "os" 23 | "path/filepath" 24 | "strings" 25 | "testing" 26 | ) 27 | 28 | func isSlashRune(r rune) bool { return r == '/' || r == '\\' } 29 | 30 | // Check whether a path contains .. entries 31 | func containsDotDot(v string) bool { 32 | for _, ent := range strings.FieldsFunc(v, isSlashRune) { 33 | if ent == ".." { 34 | return true 35 | } 36 | } 37 | return false 38 | } 39 | 40 | var ( 41 | // Archive containing files: ../traverse, /absolute 42 | //go:embed archive.zip 43 | eArchiveZip []byte 44 | 45 | // Zip archive containing symbolic links 46 | //go:embed symlinks.zip 47 | eSymlinksZip []byte 48 | 49 | // Zip archive containing files with special file modes 50 | //go:embed specialmodes.zip 51 | eSpecialModesZip []byte 52 | 53 | /* 54 | // this archive looks like this: 55 | $ unzip -l symlinks2.zip 56 | Archive: symlinks2.zip 57 | Length Date Time Name 58 | --------- ---------- ----- ---- 59 | 1 2023-03-23 15:12 root 60 | 5 2023-03-23 15:12 root/poc.txt 61 | --------- ------- 62 | 6 2 files 63 | 64 | the entry with name root is a symbolic link pointing to /root 65 | */ 66 | //go:embed symlinks2.zip 67 | eSymlinks2Zip []byte 68 | 69 | /* 70 | // Same as the previous, but the root entry has a slash at the end: 71 | Archive: symlinks3.zip 72 | Length Date Time Name 73 | --------- ---------- ----- ---- 74 | 1 2023-03-23 15:18 root/ 75 | 5 2023-03-23 15:18 root/poc.txt 76 | --------- ------- 77 | 6 2 files 78 | 79 | */ 80 | //go:embed symlinks3.zip 81 | eSymlinks3Zip []byte 82 | 83 | /* 84 | // This archive attempts to traverse the path via case insensitive symlinks. 85 | Archive: case-insensitive.zip 86 | Length Date Time Name 87 | --------- ---------- ----- ---- 88 | 1 2024-10-10 11:31 tmp 89 | 6 2024-10-10 11:31 Tmp/some-file 90 | --------- ------- 91 | 7 2 files 92 | */ 93 | //go:embed case-insensitive.zip 94 | eCaseInsensitiveSymlinksZip []byte 95 | 96 | /* 97 | Archive: winshort.zip 98 | Length Date Time Name 99 | --------- ---------- ----- ---- 100 | 5 2024-10-11 14:27 3D Objects 101 | 5 2024-10-11 14:27 Androi~2 102 | 5 2024-10-11 14:27 ANDROI~2 103 | 0 2024-10-11 14:27 foo/ 104 | 5 2024-10-11 14:27 FOOOOO~1.JPG 105 | 5 2024-10-11 14:27 Some~Stuff 106 | 0 2024-10-11 14:27 foo/ANDROI~2/ 107 | 5 2024-10-11 14:27 foo/ANDROI~2/bar 108 | 0 2024-10-11 14:27 foo/FOOOOO~1.JPG/ 109 | 5 2024-10-11 14:27 foo/FOOOOO~1.JPG/bar 110 | 0 2024-10-11 14:27 foo/Androi~2/ 111 | 5 2024-10-11 14:27 foo/Androi~2/bar 112 | --------- ------- 113 | 40 12 files 114 | */ 115 | //go:embed winshort.zip 116 | eWinShortFilenamesZip []byte 117 | ) 118 | 119 | func TestSafezip(t *testing.T) { 120 | r, err := NewReader(bytes.NewReader(eArchiveZip), int64(len(eArchiveZip))) 121 | if err != nil { 122 | t.Fatalf("zip.NewReader() error = %v", err) 123 | } 124 | 125 | if len(r.File) != 2 { 126 | t.Fatalf("unexpected number of files in the archive: %d", len(r.File)) 127 | } 128 | 129 | for _, f := range r.File { 130 | if strings.HasPrefix(f.Name, "/") { 131 | t.Errorf("f.Name has unwanted '/' prefix: %q", f.Name) 132 | } 133 | if containsDotDot(f.Name) { 134 | t.Errorf("f.Name contains unwanted '..': %q", f.Name) 135 | } 136 | } 137 | } 138 | 139 | func commonTestsBefore(t *testing.T, files []*File) { 140 | if len(files) != 2 { 141 | t.Fatalf("unexpected number of files in the archive (before): %d", len(files)) 142 | } 143 | 144 | if containsDotDot(files[0].Name) { 145 | t.Errorf("f.Name contains unwanted '..': %q", files[0].Name) 146 | } 147 | if strings.HasPrefix(files[1].Name, "/") { 148 | t.Errorf("f.Name has unwanted '/' prefix: %q", files[1].Name) 149 | } 150 | 151 | } 152 | 153 | func commonTestsAfter(t *testing.T, files []*File) { 154 | if len(files) != 2 { 155 | t.Fatalf("unexpected number of files in the archive (after): %d", len(files)) 156 | } 157 | 158 | if !containsDotDot(files[0].Name) { 159 | t.Errorf("f.Name doesn't contain unwanted '..': %q", files[0].Name) 160 | } 161 | if !strings.HasPrefix(files[1].Name, "/") { 162 | t.Errorf("f.Name does not have unwanted '/' prefix: %q", files[1].Name) 163 | } 164 | 165 | } 166 | 167 | func TestSetSecurityModeNewReader(t *testing.T) { 168 | // Archive containing files: ../traverse, /absolute 169 | r, err := NewReader(bytes.NewReader(eArchiveZip), int64(len(eArchiveZip))) 170 | if err != nil { 171 | t.Fatalf("zip.NewReader() error = %v", err) 172 | } 173 | 174 | commonTestsBefore(t, r.File) 175 | r.SetSecurityMode(DefaultSecurityMode &^ SanitizeFilenames) 176 | commonTestsAfter(t, r.File) 177 | } 178 | 179 | func archiveToPath(t *testing.T, archive []byte) string { 180 | t.Helper() 181 | 182 | tmpdir := t.TempDir() 183 | p := filepath.Join(tmpdir, "tmp.zip") 184 | err := os.WriteFile(p, archive, 0o644) 185 | if err != nil { 186 | t.Fatalf("os.WriteFile(%q) error = %v", p, err) 187 | } 188 | return p 189 | } 190 | 191 | func TestSetSecurityModeOpenReader(t *testing.T) { 192 | // Archive containing files: ../traverse, /absolute 193 | p := archiveToPath(t, eArchiveZip) 194 | 195 | r, err := OpenReader(p) 196 | if err != nil { 197 | t.Fatalf("zip.OpenReader() error = %v", err) 198 | } 199 | 200 | commonTestsBefore(t, r.File) 201 | r.SetSecurityMode(DefaultSecurityMode &^ SanitizeFilenames) 202 | commonTestsAfter(t, r.File) 203 | } 204 | 205 | func TestSymlinks(t *testing.T) { 206 | r, err := OpenReader(archiveToPath(t, eSymlinksZip)) 207 | if err != nil { 208 | t.Fatalf("zip.OpenReader() error = %v", err) 209 | } 210 | 211 | if len(r.File) != 1 { 212 | t.Fatalf("we expected the symlink entry to be present, but it wasnt: %d", len(r.File)) 213 | } 214 | if r.File[0].Name != "thisisalink.txt" { 215 | t.Fatalf("unexpected entry: %q", r.File[0].Name) 216 | } 217 | 218 | r.SetSecurityMode(DefaultSecurityMode &^ PreventSymlinkTraversal) 219 | 220 | if len(r.File) != 1 { 221 | t.Fatalf("symlink check disabled, symlink entry should show up, but it didn't: %d", len(r.File)) 222 | } 223 | if r.File[0].Name != "thisisalink.txt" { 224 | t.Fatalf("unexpected entry: %q", r.File[0].Name) 225 | } 226 | } 227 | 228 | func TestSpecialModes(t *testing.T) { 229 | r, err := OpenReader(archiveToPath(t, eSpecialModesZip)) 230 | r.SetSecurityMode(r.GetSecurityMode() | SanitizeFileMode) 231 | if err != nil { 232 | t.Fatalf("zip.OpenReader() error = %v", err) 233 | } 234 | 235 | if len(r.File) != 4 { 236 | t.Fatalf("we expected all entries: %d", len(r.File)) 237 | } 238 | 239 | if r.File[0].Name != "setuidstuff.txt" { 240 | t.Errorf("unexpected 1st entry: %s", r.File[0].Name) 241 | } 242 | if r.File[0].Mode() != 0640 { 243 | t.Errorf("unexpected 1st entry file mode: %d", r.File[0].Mode()) 244 | } 245 | 246 | if r.File[1].Name != "setuidstuff2.txt" { 247 | t.Errorf("unexpected 2nd entry: %s", r.File[1].Name) 248 | } 249 | if r.File[1].Mode() != 0750 { 250 | t.Errorf("unexpected 2nd entry file mode: %d", r.File[1].Mode()) 251 | } 252 | 253 | if r.File[2].Name != "tmpstuff.txt" { 254 | t.Errorf("unexpected 3rd entry: %s", r.File[2].Name) 255 | } 256 | if r.File[2].Mode() != 0640 { 257 | t.Errorf("unexpected 3rd entry file mode: %d", r.File[2].Mode()) 258 | } 259 | 260 | if r.File[3].Name != "somedir/" { 261 | t.Errorf("unexpected 4th entry: %s", r.File[3].Name) 262 | } 263 | if r.File[3].Mode() != (fs.ModeDir | 0750) { 264 | t.Errorf("unexpected 4th entry file mode: %d", r.File[3].Mode()) 265 | } 266 | 267 | // now assessing how these entries would have looked like if we didn't sanitize them: 268 | r.SetSecurityMode(DefaultSecurityMode &^ SanitizeFileMode) 269 | 270 | if len(r.File) != 4 { 271 | t.Fatalf("we expected all entries, still: %d", len(r.File)) 272 | } 273 | 274 | if r.File[0].Name != "setuidstuff.txt" { 275 | t.Errorf("unexpected 1st entry: %s", r.File[0].Name) 276 | } 277 | if r.File[0].Mode() != (fs.ModeSetuid | fs.ModeSetgid | 0640) { 278 | t.Errorf("unexpected 1st entry file mode: %d", r.File[0].Mode()) 279 | } 280 | 281 | if r.File[1].Name != "setuidstuff2.txt" { 282 | t.Errorf("unexpected 2nd entry: %s", r.File[1].Name) 283 | } 284 | if r.File[1].Mode() != (fs.ModeSetuid | fs.ModeSetgid | 0750) { 285 | t.Errorf("unexpected 2nd entry file mode: %d", r.File[1].Mode()) 286 | } 287 | 288 | if r.File[2].Name != "tmpstuff.txt" { 289 | t.Errorf("unexpected 3rd entry: %s", r.File[2].Name) 290 | } 291 | if r.File[2].Mode() != (fs.ModeSticky | 0640) { 292 | t.Errorf("unexpected 3rd entry file mode: %d", r.File[2].Mode()) 293 | } 294 | 295 | if r.File[3].Name != "somedir/" { 296 | t.Errorf("unexpected 4th entry: %s", r.File[3].Name) 297 | } 298 | if r.File[3].Mode() != (fs.ModeDir | 0750) { 299 | t.Errorf("unexpected 4th entry file mode: %d", r.File[3].Mode()) 300 | } 301 | } 302 | 303 | func TestSymlinks2(t *testing.T) { 304 | r, err := OpenReader(archiveToPath(t, eSymlinks2Zip)) 305 | if err != nil { 306 | t.Fatalf("zip.OpenReader() error = %v", err) 307 | } 308 | 309 | if len(r.File) != 1 { 310 | t.Fatalf("we expected the symlink entry to be present, but not the follow up entry: %d", len(r.File)) 311 | } 312 | if r.File[0].Name != "root" { 313 | t.Errorf("unexpected entry: %q", r.File[0].Name) 314 | } 315 | } 316 | 317 | func TestSymlinks3(t *testing.T) { 318 | r, err := OpenReader(archiveToPath(t, eSymlinks3Zip)) 319 | if err != nil { 320 | t.Fatalf("zip.OpenReader() error = %v", err) 321 | } 322 | 323 | if len(r.File) != 1 { 324 | t.Fatalf("we expected the symlink entry to be present, but not the follow up entry: %d", len(r.File)) 325 | } 326 | if r.File[0].Name != "root/" { 327 | t.Errorf("unexpected entry: %q", r.File[0].Name) 328 | } 329 | } 330 | 331 | func TestSymlinksCaseInsensitive(t *testing.T) { 332 | path := archiveToPath(t, eCaseInsensitiveSymlinksZip) 333 | r, err := OpenReader(path) 334 | if err != nil { 335 | t.Fatalf("Error opening zip. OpenReader(%v) = %v, want nil", path, err) 336 | } 337 | r.SetSecurityMode(r.GetSecurityMode() | PreventCaseInsensitiveSymlinkTraversal) 338 | 339 | if len(r.File) != 1 { 340 | t.Fatalf("Unexpected number of files in the archive. len(OpenReader(%v).File) = %d, want 1.", path, len(r.File)) 341 | } 342 | if r.File[0].Name != "tmp" { 343 | t.Errorf("Unexpected entry. OpenReader(%v).File[0].Name = %v, want %v", path, r.File[0].Name, "tmp") 344 | } 345 | } 346 | 347 | func TestTypes(t *testing.T) { 348 | archivePath := archiveToPath(t, eArchiveZip) 349 | archive := eArchiveZip 350 | 351 | r1, err := NewReader(bytes.NewReader(archive), int64(len(archive))) 352 | if err != nil { 353 | t.Fatalf("zip.NewReader() error = %v", err) 354 | } 355 | newReaderType := fmt.Sprintf("%T", r1) 356 | 357 | r2, err := OpenReader(archivePath) 358 | if err != nil { 359 | t.Fatalf("zip.OpenReader() error = %v", err) 360 | } 361 | 362 | openReaderType := fmt.Sprintf("%T", r2.Reader) 363 | 364 | if "*"+openReaderType != newReaderType { 365 | t.Errorf("type of zip.OpenReader().Reader: %v, type of zip.NewReader(): %v", openReaderType, newReaderType) 366 | } 367 | } 368 | 369 | func TestWindowsShortFilenames(t *testing.T) { 370 | path := archiveToPath(t, eWinShortFilenamesZip) 371 | r, err := OpenReader(path) 372 | if err != nil { 373 | t.Fatalf("Error opening zip. OpenReader(%v) = %v, want nil", path, err) 374 | } 375 | r.SetSecurityMode(r.GetSecurityMode() | SkipWindowsShortFilenames) 376 | 377 | if len(r.File) != 3 { 378 | t.Fatalf("Unexpected number of files in the archive. len(OpenReader(%v).File) = %d, want 2.", path, len(r.File)) 379 | } 380 | 381 | for i, want := range []string{"3D Objects", "foo/", "Some~Stuff"} { 382 | if r.File[i].Name != want { 383 | t.Errorf("Unexpected entry. OpenReader(%v).File[%d].Name = %v, want %v", path, i, r.File[i].Name, want) 384 | } 385 | } 386 | } 387 | -------------------------------------------------------------------------------- /tar/tar.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package tar is a drop-in replacement for archive/tar with security focus. 16 | // 17 | // To prevent security implications (e.g. directory traversal) of attacker controlled crafted tar 18 | // archives, this library sanitizes 19 | // - file names (bugos filename entries like ../something are fixed on the fly) 20 | // - the file mode (removing special bits like setuid) 21 | // It also: 22 | // - skips special file types silently (fifos, device nodes, char devices, etc.) 23 | // - strips extended file system attributes 24 | // - skips files that would need to be extracted through a symbolic link 25 | // 26 | // Features turned on by default: 27 | // - SanitizeFilenames 28 | // - PreventSymlinkTraversal 29 | // These two features are compatible with all known legitimate use-cases. 30 | // 31 | // You may enable the other features individually like this: 32 | // tr := tar.NewReader(buf) 33 | // tr.SetSecurityMode(tr.GetSecurityMode() | tar.SanitizeFileMode | tar.DropXattrs) 34 | // or 35 | // tr.SetSecurityMode(tar.MaximumSecurityMode) 36 | // 37 | // You may opt out from a certain feature like this: 38 | // tr.SetSecurityMode(tr.GetSecurityMode() &^ tar.SanitizeFileMode) 39 | // 40 | // Notes about PreventSymlinkTraversal. Consider the following archive: 41 | // $ tar tvf traverse-via-links.tar 42 | // lrwxrwxrwx username/groupname 0 2023-03-08 09:43 linktoroot -> / 43 | // -rw-rw-r-- username/groupname 5 2023-03-08 09:44 linktoroot/root/.bashrc 44 | // 45 | // If an archive like this is extracted blindly, the .bashrc file of the root user would 46 | // be overwritten. The safearchive/tar library prevents this by keeping track of symbolic links 47 | // and not emitting entries that would need to be extracted through a symbolic link. 48 | // This is a feature enabled by default (PreventSymlinkTraversal), as no legitimate archives should 49 | // contain entries that are to be extracted through a symbolic link. 50 | package tar 51 | 52 | import ( 53 | "archive/tar" // NOLINT 54 | "io" 55 | "io/fs" 56 | "strings" 57 | 58 | "github.com/google/safearchive/sanitizer" 59 | ) 60 | 61 | // Format represents the tar archive format. 62 | // 63 | // The original tar format was introduced in Unix V7. 64 | // Since then, there have been multiple competing formats attempting to 65 | // standardize or extend the V7 format to overcome its limitations. 66 | // The most common formats are the USTAR, PAX, and GNU formats, 67 | // each with their own advantages and limitations. 68 | // 69 | // The following table captures the capabilities of each format: 70 | // 71 | // | USTAR | PAX | GNU 72 | // ------------------+--------+-----------+---------- 73 | // Name | 256B | unlimited | unlimited 74 | // Linkname | 100B | unlimited | unlimited 75 | // Size | uint33 | unlimited | uint89 76 | // Mode | uint21 | uint21 | uint57 77 | // Uid/Gid | uint21 | unlimited | uint57 78 | // Uname/Gname | 32B | unlimited | 32B 79 | // ModTime | uint33 | unlimited | int89 80 | // AccessTime | n/a | unlimited | int89 81 | // ChangeTime | n/a | unlimited | int89 82 | // Devmajor/Devminor | uint21 | uint21 | uint57 83 | // ------------------+--------+-----------+---------- 84 | // string encoding | ASCII | UTF-8 | binary 85 | // sub-second times | no | yes | no 86 | // sparse files | no | yes | yes 87 | // 88 | // The table's upper portion shows the Header fields, where each format reports 89 | // the maximum number of bytes allowed for each string field and 90 | // the integer type used to store each numeric field 91 | // (where timestamps are stored as the number of seconds since the Unix epoch). 92 | // 93 | // The table's lower portion shows specialized features of each format, 94 | // such as supported string encodings, support for sub-second timestamps, 95 | // or support for sparse files. 96 | // 97 | // The Writer currently provides no support for sparse files. 98 | type Format = tar.Format 99 | 100 | const ( 101 | // FormatUnknown indicates that the format is unknown. 102 | // @see archive/tar.FormatUnknown 103 | FormatUnknown = tar.FormatUnknown 104 | // FormatUSTAR represents the USTAR header format defined in POSIX.1-1988. 105 | // @see archive/tar.FormatUSTAR 106 | FormatUSTAR = tar.FormatUSTAR 107 | // FormatPAX represents the PAX header format defined in POSIX.1-2001. 108 | // @see archive/tar.FormatPAX 109 | FormatPAX = tar.FormatPAX 110 | // FormatGNU represents the GNU header format. 111 | // @see archive/tar.FormatGNU 112 | FormatGNU = tar.FormatGNU 113 | ) 114 | 115 | // A Header represents a single header in a tar archive. 116 | // Some fields may not be populated. 117 | // 118 | // For forward compatibility, users that retrieve a Header from Reader.Next, 119 | // mutate it in some ways, and then pass it back to Writer.WriteHeader 120 | // should do so by creating a new Header and copying the fields 121 | // that they are interested in preserving. 122 | type Header = tar.Header 123 | 124 | // Type flags for Header.Typeflag. 125 | const ( 126 | // Type '0' indicates a regular file. 127 | TypeReg = tar.TypeReg 128 | TypeRegA = tar.TypeRegA // Deprecated: Use TypeReg instead. 129 | 130 | // Type '1' to '6' are header-only flags and may not have a data body. 131 | TypeLink = tar.TypeLink // Hard link 132 | TypeSymlink = tar.TypeSymlink // Symbolic link 133 | TypeChar = tar.TypeChar // Character device node 134 | TypeBlock = tar.TypeBlock // Block device node 135 | TypeDir = tar.TypeDir // Directory 136 | TypeFifo = tar.TypeFifo // FIFO node 137 | 138 | // Type '7' is reserved. 139 | TypeCont = tar.TypeCont 140 | 141 | // Type 'x' is used by the PAX format to store key-value records that 142 | // are only relevant to the next file. 143 | // This package transparently handles these types. 144 | TypeXHeader = tar.TypeXHeader 145 | 146 | // Type 'g' is used by the PAX format to store key-value records that 147 | // are relevant to all subsequent files. 148 | // This package only supports parsing and composing such headers, 149 | // but does not currently support persisting the global state across files. 150 | TypeXGlobalHeader = tar.TypeXGlobalHeader 151 | 152 | // Type 'S' indicates a sparse file in the GNU format. 153 | TypeGNUSparse = tar.TypeGNUSparse 154 | 155 | // Types 'L' and 'K' are used by the GNU format for a meta file 156 | // used to store the path or link name for the next file. 157 | // This package transparently handles these types. 158 | TypeGNULongName = tar.TypeGNULongName 159 | TypeGNULongLink = tar.TypeGNULongLink 160 | ) 161 | 162 | // SecurityMode controls security features to enforce 163 | type SecurityMode int 164 | 165 | var allowListedPaxKeys = []string{"ctime", "mtime", "atime"} 166 | 167 | const ( 168 | // SkipSpecialFiles security mode skips special files (e.g. block devices or fifos) 169 | SkipSpecialFiles SecurityMode = 1 170 | // SanitizeFileMode will drop special file modes (e.g. setuid and tmp bit) 171 | // This feature is not enabled by default. 172 | SanitizeFileMode SecurityMode = 2 173 | // SanitizeFilenames will sanitize filenames (dropping .. path components and turning entries into relative) 174 | // The very first version (early 2022) of this library featured this security measure only. 175 | // This feature is enabled by default. 176 | SanitizeFilenames SecurityMode = 4 177 | // DropXattrs will drop extended attributes from the header 178 | // This feature is not enabled by default. 179 | DropXattrs SecurityMode = 16 180 | // PreventSymlinkTraversal drops malicious entries that attempt to write to an outside location 181 | // through a symbolic link. 182 | // This feature is enabled by default. 183 | PreventSymlinkTraversal SecurityMode = 32 184 | // PreventCaseInsensitiveSymlinkTraversal activates case insensitive symlink traversal detection. 185 | // This feature requires PreventSymlinkTraversal to be enabled as well. 186 | // By default, this is activated only on MacOS and Windows builds. If you are extracting to a 187 | // case insensitive filesystem on a Unix platform, you should activate this feature explicitly. 188 | PreventCaseInsensitiveSymlinkTraversal SecurityMode = 64 189 | // SkipWindowsShortFilenames drops archive entries that have a path component that look like a 190 | // Windows short filename (e.g. GIT~1). 191 | // By default, this is activated only on Windows builds. If you are extracting to a Windows 192 | // filesystem on a non-Windows platform, you should activate this feature explicitly. 193 | SkipWindowsShortFilenames SecurityMode = 128 194 | ) 195 | 196 | // MaximumSecurityMode enables all features for maximum security. 197 | // Recommended for integrations that need file contents only (and nothing unix specific). 198 | const MaximumSecurityMode = SkipSpecialFiles | SanitizeFileMode | SanitizeFilenames | PreventSymlinkTraversal | DropXattrs | PreventCaseInsensitiveSymlinkTraversal | SkipWindowsShortFilenames 199 | 200 | var ( 201 | // ErrHeader invalid tar header 202 | ErrHeader = tar.ErrHeader 203 | 204 | // ErrWriteTooLong write too long 205 | ErrWriteTooLong = tar.ErrWriteTooLong 206 | 207 | // ErrFieldTooLong header field too long 208 | ErrFieldTooLong = tar.ErrFieldTooLong 209 | 210 | // ErrWriteAfterClose write after close 211 | ErrWriteAfterClose = tar.ErrWriteAfterClose 212 | ) 213 | 214 | // Writer provides sequential writing of a tar archive. 215 | // Write.WriteHeader begins a new file with the provided Header, 216 | // and then Writer can be treated as an io.Writer to supply that file's data. 217 | type Writer = tar.Writer 218 | 219 | // NewWriter creates a new Writer writing to w. 220 | func NewWriter(w io.Writer) *tar.Writer { 221 | return tar.NewWriter(w) 222 | } 223 | 224 | // FileInfoHeader creates a partially-populated Header from fi. 225 | // If fi describes a symlink, FileInfoHeader records link as the link target. 226 | // If fi describes a directory, a slash is appended to the name. 227 | // 228 | // Since fs.FileInfo's Name method only returns the base name of 229 | // the file it describes, it may be necessary to modify Header.Name 230 | // to provide the full path name of the file. 231 | func FileInfoHeader(fi fs.FileInfo, link string) (*Header, error) { 232 | return tar.FileInfoHeader(fi, link) 233 | } 234 | 235 | // Reader provides sequential access to the contents of a tar archive. 236 | // Reader.Next advances to the next file in the archive (including the first), 237 | // and then Reader can be treated as an io.Reader to access the file's data. 238 | type Reader struct { 239 | unsafeReader *tar.Reader 240 | 241 | securityMode SecurityMode 242 | symlinks map[string]bool 243 | } 244 | 245 | // NewReader creates a new Reader reading from r. 246 | func NewReader(r io.Reader) *Reader { 247 | re := Reader{unsafeReader: tar.NewReader(r)} 248 | re.securityMode = DefaultSecurityMode 249 | re.symlinks = make(map[string]bool) 250 | return &re 251 | } 252 | 253 | func leaveKeys(in map[string]string, allowListedKeys ...string) map[string]string { 254 | re := map[string]string{} 255 | for inK, inV := range in { 256 | for _, alK := range allowListedKeys { 257 | if alK == inK { 258 | re[inK] = inV 259 | break 260 | } 261 | } 262 | } 263 | return re 264 | } 265 | 266 | // SetSecurityMode controls the security features applied when reading this tar archive 267 | func (tr *Reader) SetSecurityMode(s SecurityMode) { 268 | tr.securityMode = s 269 | } 270 | 271 | // GetSecurityMode returns the currently enabled security features 272 | func (tr *Reader) GetSecurityMode() SecurityMode { 273 | return tr.securityMode 274 | } 275 | 276 | // Next advances to the next entry in the tar archive. 277 | // The Header.Size determines how many bytes can be read for the next file. 278 | // Any remaining data in the current file is automatically discarded. 279 | // 280 | // io.EOF is returned at the end of the input. 281 | func (tr *Reader) Next() (*tar.Header, error) { 282 | for { 283 | h, err := tr.unsafeReader.Next() 284 | if err != nil { 285 | return h, err 286 | } 287 | 288 | if tr.securityMode&SkipSpecialFiles != 0 { 289 | // non-safe entries are skipped 290 | if h.Typeflag != TypeReg && h.Typeflag != TypeDir && h.Typeflag != TypeSymlink { 291 | continue 292 | } 293 | } 294 | 295 | if tr.securityMode&SanitizeFileMode != 0 { 296 | // clearing out any potentially special bits (e.g. setuid) 297 | h.Mode = h.Mode & 0777 // &^ s_ISUID &^ s_ISGID &^ s_ISVTX 298 | } 299 | 300 | if tr.securityMode&SanitizeFilenames != 0 { 301 | // Sanitize h.Name 302 | h.Name = sanitizer.SanitizePath(h.Name) 303 | } 304 | 305 | if tr.securityMode&SkipWindowsShortFilenames != 0 && sanitizer.HasWindowsShortFilenames(h.Name) { 306 | continue 307 | } 308 | 309 | if tr.securityMode&PreventSymlinkTraversal != 0 { 310 | hName := sanitizer.SanitizePath(h.Name) 311 | hName = strings.TrimSuffix(hName, "/") 312 | if tr.securityMode&PreventCaseInsensitiveSymlinkTraversal != 0 { 313 | hName = strings.ToLower(hName) 314 | } 315 | 316 | n := strings.Split(hName, "/") 317 | traversal := false 318 | for i := 1; i <= len(n); i++ { 319 | subPath := strings.Join(n[0:i], "/") 320 | if tr.symlinks[subPath] { 321 | // a symlink has already been seen on this path. We need to drop this entry. 322 | traversal = true 323 | break 324 | } 325 | } 326 | if traversal { 327 | continue 328 | } 329 | if h.Linkname != "" { 330 | tr.symlinks[hName] = true 331 | } 332 | } 333 | 334 | if tr.securityMode&DropXattrs != 0 { 335 | // Dropping extended attributes, if present 336 | h.Xattrs = nil 337 | h.PAXRecords = leaveKeys(h.PAXRecords, allowListedPaxKeys...) 338 | } 339 | 340 | return h, err 341 | } 342 | } 343 | 344 | // Read reads from the current file in the tar archive. 345 | // It returns (0, io.EOF) when it reaches the end of that file, 346 | // until Next is called to advance to the next file. 347 | // 348 | // If the current file is sparse, then the regions marked as a hole 349 | // are read back as NUL-bytes. 350 | // 351 | // Calling Read on special types like TypeLink, TypeSymlink, TypeChar, 352 | // TypeBlock, TypeDir, and TypeFifo returns (0, io.EOF) regardless of what 353 | // the Header.Size claims. 354 | func (tr *Reader) Read(b []byte) (int, error) { 355 | return tr.unsafeReader.Read(b) 356 | } 357 | -------------------------------------------------------------------------------- /tar/tar_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package tar 16 | 17 | import ( 18 | "archive/tar" 19 | "bytes" 20 | _ "embed" 21 | "io" 22 | "reflect" 23 | "strings" 24 | "testing" 25 | 26 | "github.com/google/go-cmp/cmp" 27 | "github.com/google/go-cmp/cmp/cmpopts" 28 | ) 29 | 30 | var ( 31 | // Archive containing files: readme.txt, /gopher.txt, and ../todo.txt 32 | //go:embed traverse.tar 33 | eTraverseTar []byte 34 | 35 | /* 36 | $ tar tvf traverse-via-links.tar 37 | lrwxrwxrwx imrer/primarygroup 0 2023-03-08 09:43 linktoroot -> / 38 | -rw-rw-r-- imrer/primarygroup 5 2023-03-08 09:44 linktoroot/root/.bashrc 39 | lrwxrwxrwx imrer/primarygroup 0 2023-03-08 09:46 linktoescape -> ../outside.txt 40 | -rw-rw-r-- imrer/primarygroup 6 2023-03-08 09:46 linktoescape 41 | */ 42 | //go:embed traverse-via-links.tar 43 | eTraverseViaLinksTar []byte 44 | 45 | /* 46 | $ tar tvf traverse-slash-at-the-end.tar 47 | lrwxrwxrwx imrer/primarygroup 0 2023-03-23 13:28 linktoroot/ -> / 48 | -rw-r----- imrer/primarygroup 5 2023-03-23 13:28 linktoroot/root/.bashrc 49 | */ 50 | //go:embed traverse-slash-at-the-end.tar 51 | eTraverseSlashAtTheEndTar []byte 52 | 53 | /* 54 | The input archive we are testing looks like this: 55 | $ tar tvf specialfiles.tar 56 | prw-r----- imrer/primarygroup 0 2023-03-08 13:36 fifo 57 | crw-r--r-- root/root 1,3 2023-03-08 13:37 null 58 | brw-r--r-- root/root 8,0 2023-03-08 13:39 sda 59 | drwxr-x--- imrer/primarygroup 0 2023-03-08 13:41 dir/ 60 | -rw-r----- imrer/primarygroup 8 2023-03-08 13:38 regular.txt 61 | lrwxrwxrwx imrer/primarygroup 0 2023-03-08 13:41 symlink -> regular.txt 62 | hrw-r----- imrer/primarygroup 0 2023-03-08 13:38 hardlink link to regular.txt 63 | */ 64 | //go:embed specialfiles.tar 65 | eSpecialFilesTar []byte 66 | 67 | /* 68 | The input archive we are testing looks like this: 69 | $ tar tvf specialmodes.tar 70 | -rwSr-S--- imrer/primarygroup 12 2023-03-08 13:55 setuidstuff.txt 71 | -rwsr-s--- imrer/primarygroup 13 2023-03-08 13:55 setuidstuff2.txt 72 | -rw-r----T imrer/primarygroup 9 2023-03-08 13:55 tmpstuff.txt 73 | drwxr-x--- imrer/primarygroup 0 2023-03-09 08:23 somedir/ 74 | */ 75 | //go:embed specialmodes.tar 76 | eSpecialModesTar []byte 77 | 78 | /* 79 | archive normally containing: 80 | 2023/03/08 15:35:39 Contents of &{Typeflag:48 Name:something.txt Linkname: Size:10 Mode:416 Uid:1040569 Gid:89939 81 | Uname:imrer Gname:primarygroup ModTime:2023-03-08 15:29:32.94598663 +0000 UTC 82 | AccessTime:2023-03-08 15:29:32.94598663 +0000 UTC ChangeTime:2023-03-08 15:29:52.687447969 +0000 UTC 83 | Devmajor:0 Devminor:0 Xattrs:map[user.hello:world] 84 | PAXRecords:map[SCHILY.xattr.user.hello:world 85 | atime:1678289372.94598663 ctime:1678289392.687447969 mtime:1678289372.94598663] Format:PAX}: 86 | */ 87 | //go:embed xattr.tar 88 | eXattrTar []byte 89 | 90 | /* 91 | lrwxrwxrwx root/root 0 2024-10-10 11:17 tmp -> / 92 | -rw-r--r-- root/root 5 2024-10-10 11:17 Tmp/test-file 93 | */ 94 | //go:embed case-insensitive.tar 95 | eTraverseViaCaseInsensitiveLinksTar []byte 96 | 97 | /* 98 | -rw-r----- imrer/primarygroup 5 2024-10-11 14:27 3D Objects 99 | -rw-r----- imrer/primarygroup 5 2024-10-11 14:27 Androi~2 100 | -rw-r----- imrer/primarygroup 5 2024-10-11 14:27 ANDROI~2 101 | drwxr-x--- imrer/primarygroup 0 2024-10-11 14:27 foo/ 102 | drwxr-x--- imrer/primarygroup 0 2024-10-11 14:27 foo/ANDROI~2/ 103 | -rw-r----- imrer/primarygroup 5 2024-10-11 14:27 foo/ANDROI~2/bar 104 | drwxr-x--- imrer/primarygroup 0 2024-10-11 14:27 foo/FOOOOO~1.JPG/ 105 | -rw-r----- imrer/primarygroup 5 2024-10-11 14:27 foo/FOOOOO~1.JPG/bar 106 | drwxr-x--- imrer/primarygroup 0 2024-10-11 14:27 foo/Androi~2/ 107 | -rw-r----- imrer/primarygroup 5 2024-10-11 14:27 foo/Androi~2/bar 108 | -rw-r----- imrer/primarygroup 5 2024-10-11 14:27 FOOOOO~1.JPG 109 | -rw-r----- imrer/primarygroup 5 2024-10-11 14:27 Some~Stuff 110 | */ 111 | //go:embed winshort.tar 112 | eWinShortTar []byte 113 | ) 114 | 115 | func isSlashRune(r rune) bool { return r == '/' || r == '\\' } 116 | 117 | // Check whether a path contains .. entries 118 | func containsDotDot(v string) bool { 119 | for _, ent := range strings.FieldsFunc(v, isSlashRune) { 120 | if ent == ".." { 121 | return true 122 | } 123 | } 124 | return false 125 | } 126 | 127 | // Based on example from: https://pkg.go.dev/archive/tar#pkg-overview 128 | func TestSafetar(t *testing.T) { 129 | buf := bytes.NewBuffer(eTraverseTar[:]) 130 | 131 | // Open and iterate through the files in the archive. 132 | tr := NewReader(buf) 133 | for { 134 | hdr, err := tr.Next() 135 | if err == io.EOF { 136 | break // End of archive 137 | } 138 | if err != nil { 139 | t.Fatal(err) 140 | } 141 | if strings.HasPrefix(hdr.Name, "/") { 142 | t.Errorf("hdr.Name has unwanted '/' prefix: %q", hdr.Name) 143 | } 144 | if containsDotDot(hdr.Name) { 145 | t.Errorf("hdr.Name contains unwanted '..': %q", hdr.Name) 146 | } 147 | } 148 | } 149 | 150 | func TestSafetarLinksDefaultMode(t *testing.T) { 151 | buf := bytes.NewBuffer(eTraverseViaLinksTar[:]) 152 | 153 | // default settings with PreventSymlinkTraversal 154 | tr := NewReader(buf) 155 | hdr, err := tr.Next() 156 | if err != nil { 157 | t.Fatal(err) 158 | } 159 | 160 | // first entry is supposed to be linktoroot/root/.bashrc (linktoroot symlink skipped) 161 | if hdr.Name != "linktoroot" { 162 | t.Errorf("unexpected 1st entry: %q", hdr.Name) 163 | } 164 | if hdr.Typeflag != TypeSymlink { 165 | t.Errorf("unexpected 1st entry type: %v", hdr.Typeflag) 166 | } 167 | if hdr.Linkname != "/" { 168 | t.Errorf("unexpected 1st entry Linkname: %v", hdr.Linkname) 169 | } 170 | 171 | hdr, err = tr.Next() 172 | if err != nil { 173 | t.Fatal(err) 174 | } 175 | if hdr.Name != "linktoescape" { 176 | t.Errorf("unexpected 2nd entry: %q", hdr.Name) 177 | } 178 | if hdr.Typeflag != TypeSymlink { 179 | t.Errorf("unexpected 2nd entry type: %v", hdr.Typeflag) 180 | } 181 | if hdr.Linkname != "../outside.txt" { 182 | t.Errorf("unexpected 2nd entry Linkname: %v", hdr.Linkname) 183 | } 184 | 185 | hdr, err = tr.Next() 186 | if hdr != nil { 187 | t.Errorf("unexpected entry: %v", hdr) 188 | } 189 | if err != io.EOF { 190 | t.Fatal(err) 191 | } 192 | } 193 | 194 | func TestSafetarLinksDefaultModeSlashAtTheEnd(t *testing.T) { 195 | // note the commend at sanitizePath: 196 | // "Add back trailing / if safe" 197 | // this test ensures the PreventSymlinkTraversal security check cannot be bypassed via 198 | // entries ending with slash 199 | 200 | buf := bytes.NewBuffer(eTraverseSlashAtTheEndTar[:]) 201 | 202 | // default settings with PreventSymlinkTraversal 203 | tr := NewReader(buf) 204 | hdr, err := tr.Next() 205 | if err != nil { 206 | t.Fatal(err) 207 | } 208 | 209 | // first entry is supposed to be linktoroot/root/.bashrc (linktoroot symlink skipped) 210 | if hdr.Name != "linktoroot/" { 211 | t.Errorf("unexpected 1st entry: %q", hdr.Name) 212 | } 213 | if hdr.Typeflag != TypeSymlink { 214 | t.Errorf("unexpected 1st entry type: %v", hdr.Typeflag) 215 | } 216 | if hdr.Linkname != "/" { 217 | t.Errorf("unexpected 1st entry Linkname: %v", hdr.Linkname) 218 | } 219 | 220 | hdr, err = tr.Next() 221 | if hdr != nil { 222 | t.Errorf("unexpected entry: %v", hdr) 223 | } 224 | if err != io.EOF { 225 | t.Fatal(err) 226 | } 227 | } 228 | 229 | func TestSafetarLinksWithoutSanitization(t *testing.T) { 230 | buf := bytes.NewBuffer(eTraverseViaLinksTar[:]) 231 | 232 | // Open and iterate through the files in the archive. 233 | tr := NewReader(buf) 234 | tr.SetSecurityMode(tr.GetSecurityMode() &^ PreventSymlinkTraversal) 235 | hdr, err := tr.Next() 236 | if err != nil { 237 | t.Fatal(err) 238 | } 239 | 240 | // first entry is supposed to be linktoroot pointing to the root 241 | if hdr.Name != "linktoroot" { 242 | t.Errorf("unexpected 1st entry: %q", hdr.Name) 243 | } 244 | if hdr.Typeflag != TypeSymlink { 245 | t.Errorf("unexpected 1st entry type: %v", hdr.Typeflag) 246 | } 247 | if hdr.Linkname != "/" { 248 | t.Errorf("unexpected 1st entry Linkname: %v", hdr.Linkname) 249 | } 250 | 251 | hdr, err = tr.Next() 252 | if err != nil { 253 | t.Fatal(err) 254 | } 255 | if hdr.Name != "linktoroot/root/.bashrc" { 256 | t.Errorf("unexpected 2nd entry: %q", hdr.Name) 257 | } 258 | if hdr.Typeflag != TypeReg { 259 | t.Errorf("unexpected 2nd entry type: %v", hdr.Typeflag) 260 | } 261 | if hdr.Linkname != "" { 262 | t.Errorf("unexpected 2nd entry Linkname: %v", hdr.Linkname) 263 | } 264 | 265 | hdr, err = tr.Next() 266 | if err != nil { 267 | t.Fatal(err) 268 | } 269 | if hdr.Name != "linktoescape" { 270 | t.Errorf("unexpected 3rd entry: %q", hdr.Name) 271 | } 272 | if hdr.Typeflag != TypeSymlink { 273 | t.Errorf("unexpected 3rd entry type: %v", hdr.Typeflag) 274 | } 275 | if hdr.Linkname != "../outside.txt" { 276 | t.Errorf("unexpected 3rd entry Linkname: %v", hdr.Linkname) 277 | } 278 | 279 | hdr, err = tr.Next() 280 | if err != nil { 281 | t.Fatal(err) 282 | } 283 | if hdr.Name != "linktoescape" { 284 | t.Errorf("unexpected 4th entry: %q", hdr.Name) 285 | } 286 | if hdr.Typeflag != TypeReg { 287 | t.Errorf("unexpected 4th entry type: %v", hdr.Typeflag) 288 | } 289 | if hdr.Linkname != "" { 290 | t.Errorf("unexpected 4th entry Linkname: %v", hdr.Linkname) 291 | } 292 | 293 | hdr, err = tr.Next() 294 | if hdr != nil { 295 | t.Errorf("unexpected entry: %v", hdr) 296 | } 297 | if err != io.EOF { 298 | t.Fatal(err) 299 | } 300 | } 301 | 302 | func TestSpecialFiles(t *testing.T) { 303 | buf := bytes.NewBuffer(eSpecialFilesTar[:]) 304 | 305 | // Open and iterate through the files in the archive. 306 | tr := NewReader(buf) 307 | tr.SetSecurityMode(tr.GetSecurityMode() | SkipSpecialFiles) 308 | hdr, err := tr.Next() 309 | if err != nil { 310 | t.Fatal(err) 311 | } 312 | 313 | if hdr.Name != "dir/" { 314 | t.Errorf("unexpected 1st entry: %q", hdr.Name) 315 | } 316 | if hdr.Typeflag != TypeDir { 317 | t.Errorf("unexpected 1st entry type: %v", hdr.Typeflag) 318 | } 319 | 320 | hdr, err = tr.Next() 321 | if err != nil { 322 | t.Fatal(err) 323 | } 324 | if hdr.Name != "regular.txt" { 325 | t.Errorf("unexpected 2nd entry: %q", hdr.Name) 326 | } 327 | if hdr.Typeflag != TypeReg { 328 | t.Errorf("unexpected 2nd entry type: %v", hdr.Typeflag) 329 | } 330 | 331 | hdr, err = tr.Next() 332 | if err != nil { 333 | t.Fatal(err) 334 | } 335 | if hdr.Name != "symlink" { 336 | t.Errorf("unexpected 3rd entry: %q", hdr.Name) 337 | } 338 | if hdr.Typeflag != TypeSymlink { 339 | t.Errorf("unexpected 3rd entry type: %v", hdr.Typeflag) 340 | } 341 | if hdr.Linkname != "regular.txt" { 342 | t.Errorf("unexpected 3rd entry Linkname: %v", hdr.Linkname) 343 | } 344 | 345 | hdr, err = tr.Next() 346 | if hdr != nil { 347 | t.Errorf("unexpected entry: %v", hdr) 348 | } 349 | if err != io.EOF { 350 | t.Fatal(err) 351 | } 352 | } 353 | 354 | func TestSpecialModes(t *testing.T) { 355 | buf := bytes.NewBuffer(eSpecialModesTar[:]) 356 | 357 | // Open and iterate through the files in the archive. 358 | tr := NewReader(buf) 359 | tr.SetSecurityMode(tr.GetSecurityMode() | SanitizeFileMode) 360 | hdr, err := tr.Next() 361 | if err != nil { 362 | t.Fatal(err) 363 | } 364 | 365 | if hdr.Name != "setuidstuff.txt" { 366 | t.Errorf("unexpected 1st entry: %q", hdr.Name) 367 | } 368 | if hdr.Typeflag != TypeReg { 369 | t.Errorf("unexpected 1st entry type: %v", hdr.Typeflag) 370 | } 371 | if hdr.Mode != 0640 { 372 | t.Errorf("unexpected 1st entry mode: %v", hdr.Mode) 373 | } 374 | 375 | hdr, err = tr.Next() 376 | if err != nil { 377 | t.Fatal(err) 378 | } 379 | if hdr.Name != "setuidstuff2.txt" { 380 | t.Errorf("unexpected 2nd entry: %q", hdr.Name) 381 | } 382 | if hdr.Typeflag != TypeReg { 383 | t.Errorf("unexpected 2nd entry type: %v", hdr.Typeflag) 384 | } 385 | if hdr.Mode != 0750 { 386 | t.Errorf("unexpected 2nd entry mode: %v", hdr.Mode) 387 | } 388 | 389 | hdr, err = tr.Next() 390 | if err != nil { 391 | t.Fatal(err) 392 | } 393 | if hdr.Name != "tmpstuff.txt" { 394 | t.Errorf("unexpected 3rd entry: %q", hdr.Name) 395 | } 396 | if hdr.Typeflag != TypeReg { 397 | t.Errorf("unexpected 3rd entry type: %v", hdr.Typeflag) 398 | } 399 | if hdr.Mode != 0640 { 400 | t.Errorf("unexpected 3rd entry mode: %v", hdr.Mode) 401 | } 402 | 403 | hdr, err = tr.Next() 404 | if err != nil { 405 | t.Fatal(err) 406 | } 407 | if hdr.Name != "somedir/" { 408 | t.Errorf("unexpected 4st entry: %q", hdr.Name) 409 | } 410 | if hdr.Typeflag != TypeDir { 411 | t.Errorf("unexpected 4st entry type: %v", hdr.Typeflag) 412 | } 413 | if hdr.Mode != 0750 { 414 | t.Errorf("unexpected 4st entry mode: %v", hdr.Mode) 415 | } 416 | 417 | hdr, err = tr.Next() 418 | if hdr != nil { 419 | t.Errorf("unexpected entry: %v", hdr) 420 | } 421 | if err != io.EOF { 422 | t.Fatal(err) 423 | } 424 | } 425 | 426 | func testLeaveKeys(t *testing.T) { 427 | m := map[string]string{"foo": "bar", "Foo": "Bar"} 428 | n := leaveKeys(m, "foo") 429 | if !reflect.DeepEqual(n, map[string]string{"foo": "bar"}) { 430 | t.Errorf("function leaveKeys unexpected return: %v", n) 431 | } 432 | } 433 | 434 | // why isn't this part of golang core? 435 | func contains[T comparable](s []T, e T) bool { 436 | for _, v := range s { 437 | if v == e { 438 | return true 439 | } 440 | } 441 | return false 442 | } 443 | 444 | func TestXattrs(t *testing.T) { 445 | buf := bytes.NewBuffer(eXattrTar[:]) 446 | 447 | // Open and iterate through the files in the archive. 448 | members := 0 449 | tr := NewReader(buf) 450 | tr.SetSecurityMode(tr.GetSecurityMode() | DropXattrs) 451 | for { 452 | hdr, err := tr.Next() 453 | if err == io.EOF { 454 | break // End of archive 455 | } 456 | if err != nil { 457 | t.Fatal(err) 458 | } 459 | members++ 460 | if hdr.Name != "something.txt" { 461 | t.Errorf("hdr.Name is unexpected: %q", hdr.Name) 462 | } 463 | if hdr.Xattrs != nil { 464 | t.Errorf("hdr.Xattrs is non-nil: %+v", hdr.Xattrs) 465 | } 466 | if hdr.PAXRecords != nil { 467 | for _, k := range allowListedPaxKeys { 468 | if hdr.PAXRecords[k] == "" { 469 | t.Errorf("%s in hdr.PAXRecords is empty", k) 470 | } 471 | } 472 | for k := range hdr.PAXRecords { 473 | if !contains(allowListedPaxKeys, k) { 474 | t.Errorf("unexpected item in PAXRecords: %v", k) 475 | } 476 | } 477 | } else { 478 | t.Errorf("hdr.PAXRecords is nil") 479 | } 480 | } 481 | 482 | if members != 1 { 483 | t.Errorf("the Reader didn't yield any members") 484 | } 485 | } 486 | 487 | func TestSafetarLinksCaseInsensitive(t *testing.T) { 488 | buf := bytes.NewBuffer(eTraverseViaCaseInsensitiveLinksTar[:]) 489 | 490 | // default settings with PreventSymlinkTraversal 491 | tr := NewReader(buf) 492 | tr.SetSecurityMode(tr.GetSecurityMode() | PreventCaseInsensitiveSymlinkTraversal) 493 | hdr, err := tr.Next() 494 | if err != nil { 495 | t.Fatal(err) 496 | } 497 | 498 | // first entry is supposed to be tmp -> / 499 | want := &tar.Header{Name: "tmp", Typeflag: TypeSymlink, Linkname: "/"} 500 | opts := cmpopts.IgnoreFields(tar.Header{}, "Mode", "Uname", "Gname", "ModTime", "Format") 501 | if diff := cmp.Diff(hdr, want, opts); diff != "" { 502 | t.Errorf("Next() returned unexpected diff (-want +got):\n%s", diff) 503 | } 504 | 505 | hdr, err = tr.Next() 506 | if hdr != nil { 507 | t.Errorf("No more tar entries were expected. Next() = %+v, want nil", hdr) 508 | } 509 | if err != io.EOF { 510 | t.Fatal(err) 511 | } 512 | } 513 | 514 | func TestWindowsShortFilenames(t *testing.T) { 515 | buf := bytes.NewBuffer(eWinShortTar[:]) 516 | t.Logf("size of archive: %d", len(buf.Bytes())) 517 | tr := NewReader(buf) 518 | tr.SetSecurityMode(tr.GetSecurityMode() | SkipWindowsShortFilenames) 519 | 520 | for i, want := range []string{"3D Objects", "foo/", "Some~Stuff"} { 521 | hdr, err := tr.Next() 522 | if err != nil { 523 | t.Errorf("No errors were expected at entry %d. Next() = %+v, want nil", i, err) 524 | } 525 | if hdr.Name != want { 526 | t.Errorf("Unexpected entry %d. Next().Name = %v, want %v", i, hdr.Name, want) 527 | } 528 | } 529 | 530 | hdr, err := tr.Next() 531 | if hdr != nil { 532 | t.Errorf("No more tar entries were expected. Next() = %+v, want nil", hdr) 533 | } 534 | if err != io.EOF { 535 | t.Fatal(err) 536 | } 537 | } 538 | -------------------------------------------------------------------------------- /tar/winshort.tar: -------------------------------------------------------------------------------- 1 | 3D Objects0000640376027102575230000000000514702232674013307 0ustar imrerprimarygrouphelo 2 | Androi~20000640376027102575230000000000514702232674013163 0ustar imrerprimarygrouphelo 3 | ANDROI~20000640376027102575230000000000514702232674012723 0ustar imrerprimarygrouphelo 4 | foo/0000750376027102575230000000000014702232674012333 5ustar imrerprimarygroupfoo/ANDROI~2/0000750376027102575230000000000014702232674013567 5ustar imrerprimarygroupfoo/ANDROI~2/bar0000640376027102575230000000000514702232674014252 0ustar imrerprimarygrouphelo 5 | foo/FOOOOO~1.JPG/0000750376027102575230000000000014702232674014231 5ustar imrerprimarygroupfoo/FOOOOO~1.JPG/bar0000640376027102575230000000000514702232674014714 0ustar imrerprimarygrouphelo 6 | foo/Androi~2/0000750376027102575230000000000014702232674014027 5ustar imrerprimarygroupfoo/Androi~2/bar0000640376027102575230000000000514702232674014512 0ustar imrerprimarygrouphelo 7 | FOOOOO~1.JPG0000640376027102575230000000000514702232674013365 0ustar imrerprimarygrouphelo 8 | Some~Stuff0000640376027102575230000000000514702232674013600 0ustar imrerprimarygrouphelo 9 | --------------------------------------------------------------------------------