├── .github └── workflows │ └── unit-tests.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── cache ├── cache.go ├── cache_test.go ├── proto │ ├── cache.pb.go │ └── cache.proto └── testdata │ ├── hashes.json │ └── hashr-cache-gLinux ├── client ├── client.go ├── cloudspanner │ └── cloudspanner.go └── postgres │ └── postgres.go ├── common └── common.go ├── core └── hashr │ ├── hashr.go │ ├── hashr_test.go │ └── testdata │ ├── 20200106.00.00-ubuntu-desktop-export │ ├── hashes.json │ └── tmp │ │ └── hashdb │ │ └── 20200106.00.00-ubuntu-desktop │ │ ├── file.01 │ │ ├── file.02 │ │ ├── file.03 │ │ ├── file.04 │ │ ├── file.05 │ │ ├── file.06 │ │ ├── file.07 │ │ ├── file.08 │ │ ├── file.09 │ │ └── file.10 │ └── 20200106.00.00-ubuntu-laptop-export │ ├── hashes.json │ └── tmp │ └── hashr │ └── 20200106.00.00-ubuntu-laptop │ ├── file.01 │ ├── file.02 │ ├── file.03 │ ├── file.04 │ ├── file.05 │ ├── file.06 │ ├── file.07 │ ├── file.08 │ ├── file.09 │ └── file.10 ├── docker ├── Dockerfile ├── README.md └── cloudbuild.yaml ├── docs └── assets │ └── HashR.png ├── exporters ├── gcp │ ├── gcp.go │ ├── gcp_test.go │ └── testdata │ │ └── extraction │ │ ├── file.01 │ │ ├── file.02 │ │ ├── file.03 │ │ └── file.04 └── postgres │ ├── postgres.go │ ├── postgres_test.go │ └── testdata │ └── extraction │ ├── ._file.01 │ ├── ._file.02 │ ├── ._file.03 │ ├── ._file.04 │ ├── file.01 │ ├── file.02 │ ├── file.03 │ └── file.04 ├── go.mod ├── go.sum ├── hashr.go ├── importers ├── aws │ ├── aws.go │ └── aws_test.go ├── common │ ├── common.go │ ├── common_test.go │ └── testdata │ │ └── targz │ │ ├── dir1 │ │ ├── desktop.tar.gz │ │ ├── laptop.tar.gz │ │ └── server.tar.gz │ │ ├── dir2 │ │ ├── desktop.tar.gz │ │ ├── laptop.tar.gz │ │ └── server.tar.gz │ │ ├── dir3 │ │ ├── desktop.tar.gz │ │ ├── laptop.tar.gz │ │ └── server.tar.gz │ │ └── dir4 │ │ ├── desktop.tar.gz │ │ ├── laptop.tar.gz │ │ └── server.tar.gz ├── deb │ ├── deb.go │ ├── deb_test.go │ ├── generate_tests.sh │ └── testdata │ │ ├── 20200106.00.00 │ │ ├── ubuntu-desktop.deb │ │ ├── ubuntu-laptop.deb │ │ └── ubuntu-server.deb │ │ ├── 20200107.00.00 │ │ ├── ubuntu-desktop.deb │ │ ├── ubuntu-laptop.deb │ │ └── ubuntu-server.deb │ │ ├── 20200107.01.00 │ │ ├── ubuntu-desktop.deb │ │ ├── ubuntu-laptop.deb │ │ └── ubuntu-server.deb │ │ └── 20200108.00.00 │ │ ├── ubuntu-desktop.deb │ │ ├── ubuntu-laptop.deb │ │ └── ubuntu-server.deb ├── gcp │ ├── gcp.go │ ├── gcp_test.go │ └── testdata │ │ ├── ._ubuntu-1804-lts-drawfork-v20190613.tar.gz │ │ └── ubuntu-1804-lts-drawfork-v20190613.tar.gz ├── gcr │ ├── gcr.go │ └── gcr_test.go ├── importer.go.example ├── iso9660 │ ├── generate_tests.sh │ ├── iso9660.go │ ├── iso9660_test.go │ └── testdata │ │ ├── 20200106.00.00 │ │ ├── ubuntu-desktop.iso │ │ ├── ubuntu-laptop.iso │ │ └── ubuntu-server.iso │ │ ├── 20200107.00.00 │ │ ├── ubuntu-desktop.iso │ │ ├── ubuntu-laptop.iso │ │ └── ubuntu-server.iso │ │ ├── 20200107.01.00 │ │ ├── ubuntu-desktop.iso │ │ ├── ubuntu-laptop.iso │ │ └── ubuntu-server.iso │ │ └── 20200108.00.00 │ │ ├── ubuntu-desktop.iso │ │ ├── ubuntu-laptop.iso │ │ └── ubuntu-server.iso ├── rpm │ ├── generate_tests.sh │ ├── rpm.go │ ├── rpm_test.go │ └── testdata │ │ ├── 20200106.00.00 │ │ ├── ubuntu-desktop.rpm │ │ ├── ubuntu-laptop.rpm │ │ └── ubuntu-server.rpm │ │ ├── 20200107.00.00 │ │ ├── ubuntu-desktop.rpm │ │ ├── ubuntu-laptop.rpm │ │ └── ubuntu-server.rpm │ │ ├── 20200107.01.00 │ │ ├── ubuntu-desktop.rpm │ │ ├── ubuntu-laptop.rpm │ │ └── ubuntu-server.rpm │ │ └── 20200108.00.00 │ │ ├── ubuntu-desktop.rpm │ │ ├── ubuntu-laptop.rpm │ │ └── ubuntu-server.rpm ├── targz │ ├── targz.go │ ├── targz_test.go │ └── testdata │ │ ├── 20200106.00.00 │ │ ├── ubuntu-desktop.tar.gz │ │ ├── ubuntu-laptop.tar.gz │ │ └── ubuntu-server.tar.gz │ │ ├── 20200107.00.00 │ │ ├── ubuntu-desktop.tar.gz │ │ ├── ubuntu-laptop.tar.gz │ │ └── ubuntu-server.tar.gz │ │ ├── 20200107.01.00 │ │ ├── ubuntu-desktop.tar.gz │ │ ├── ubuntu-laptop.tar.gz │ │ └── ubuntu-server.tar.gz │ │ └── 20200108.00.00 │ │ ├── ubuntu-desktop.tar.gz │ │ ├── ubuntu-laptop.tar.gz │ │ └── ubuntu-server.tar.gz ├── windows │ └── windows.go ├── wsus │ ├── testdata │ │ ├── ._03E86F3A0947C8A5183AD0C66A48782FA216BEFF.cab │ │ ├── ._138ECA2DEB45E284DC0BB94CC8849D1933B072FF.cab │ │ ├── ._1BDBDA1C53B6C980DD440B93646D8021CC90F1FF.cab │ │ ├── ._1F35F72D34C16FF7D7270D60472D8AD9FF9D7EFF.cab │ │ ├── 03E86F3A0947C8A5183AD0C66A48782FA216BEFF.cab │ │ ├── 138ECA2DEB45E284DC0BB94CC8849D1933B072FF.cab │ │ ├── 1BDBDA1C53B6C980DD440B93646D8021CC90F1FF.cab │ │ └── 1F35F72D34C16FF7D7270D60472D8AD9FF9D7EFF.cab │ ├── wsus.go │ └── wsus_test.go └── zip │ ├── generate_tests.sh │ ├── testdata │ ├── 20200106.00.00 │ │ ├── ubuntu-desktop.jar │ │ ├── ubuntu-laptop.whl │ │ └── ubuntu-server.egg │ ├── 20200107.00.00 │ │ ├── ubuntu-desktop.zip │ │ ├── ubuntu-laptop.zip │ │ └── ubuntu-server.zip │ ├── 20200107.01.00 │ │ ├── ubuntu-desktop.zip │ │ ├── ubuntu-laptop.zip │ │ └── ubuntu-server.zip │ └── 20200108.00.00 │ │ ├── ubuntu-desktop.zip │ │ ├── ubuntu-laptop.zip │ │ └── ubuntu-server.zip │ ├── zip.go │ └── zip_test.go ├── processors └── local │ ├── local.go │ ├── local_test.go │ └── testdata │ ├── ._disk_2_xfs_volumes.raw │ └── disk_2_xfs_volumes.raw ├── scripts ├── CreateCloudSpannerExporterTables.ddl ├── CreateJobsTable.ddl ├── CreateJobsTable.sql ├── CreatePostgresExporterTables.sql ├── aws │ ├── AwsHashrUploaderPolicy.json │ ├── AwsHashrWorkerPolicy.json │ ├── hashr_aws_init.txt │ └── hashr_setup.sh └── hashr-archive └── storage ├── cloudspanner └── cloudspanner.go └── postgres └── postgres.go /.github/workflows/unit-tests.yml: -------------------------------------------------------------------------------- 1 | name: unit-tests 2 | 3 | on: 4 | pull_request: 5 | types: [opened, synchronize, reopened] 6 | 7 | jobs: 8 | BuildTest: 9 | 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v3 13 | 14 | - name: Set up Go 15 | uses: actions/setup-go@v3 16 | with: 17 | go-version: 1.19 18 | 19 | - name: Install golint 20 | run: go install golang.org/x/lint/golint@latest 21 | 22 | - name: Run Spanner emulator 23 | run: docker run -d -p 9010:9010 -p 9020:9020 gcr.io/cloud-spanner-emulator/emulator 24 | 25 | - name: Run golint 26 | run: golint ./... 27 | 28 | - name: Build 29 | run: go build -v ./... 30 | 31 | - name: Test 32 | run: go test -timeout 2m -v ./... 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | hashr 2 | 3 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement (CLA). You (or your employer) retain the copyright to your 10 | contribution; this simply gives us permission to use and redistribute your 11 | contributions as part of the project. Head over to 12 | to see your current agreements on file or 13 | to sign a new one. 14 | 15 | You generally only need to submit a CLA once, so if you've already submitted one 16 | (even if it was for a different project), you probably don't need to do it 17 | again. 18 | 19 | ## Code Reviews 20 | 21 | All submissions, including submissions by project members, require review. We 22 | use GitHub pull requests for this purpose. Consult 23 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 24 | information on using pull requests. 25 | 26 | ## Community Guidelines 27 | 28 | This project follows 29 | [Google's Open Source Community Guidelines](https://opensource.google/conduct/). -------------------------------------------------------------------------------- /cache/cache.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package cache provides functions that are used to interact with local cache. 16 | package cache 17 | 18 | import ( 19 | "encoding/json" 20 | "fmt" 21 | "io/ioutil" 22 | "os" 23 | "path/filepath" 24 | "sync" 25 | 26 | "github.com/golang/glog" 27 | "github.com/google/hashr/common" 28 | "google.golang.org/protobuf/proto" 29 | "google.golang.org/protobuf/types/known/timestamppb" 30 | 31 | cpb "github.com/google/hashr/cache/proto" 32 | ) 33 | 34 | func readJSON(extraction *common.Extraction) ([]common.Sample, error) { 35 | pathJSON := filepath.Join(extraction.Path, "hashes.json") 36 | var samples []common.Sample 37 | 38 | data, err := ioutil.ReadFile(pathJSON) 39 | if err != nil { 40 | return nil, fmt.Errorf("error while reading hashes.json file: %v", err) 41 | } 42 | 43 | err = json.Unmarshal(data, &samples) 44 | if err != nil { 45 | return nil, fmt.Errorf("error unmarshalling hashes.json file: %v", err) 46 | } 47 | 48 | for _, sample := range samples { 49 | for i := range sample.Paths { 50 | sample.Paths[i] = filepath.Join(extraction.Path, sample.Paths[i]) 51 | } 52 | } 53 | 54 | return samples, nil 55 | } 56 | 57 | // Save saves the cache to a local file. 58 | func Save(repoName, cacheDir string, cacheMap *sync.Map) error { 59 | // TODO(mlegin): Compress the file before saving it to disk. 60 | cachePath := filepath.Join(cacheDir, fmt.Sprintf("hashr-cache-%s", repoName)) 61 | 62 | cache := &cpb.Cache{Samples: make(map[string]*cpb.Entries)} 63 | cacheMap.Range(func(key, value interface{}) bool { 64 | hash, ok := key.(string) 65 | if !ok { 66 | glog.Exitf("Unexpected key type in cache map: %v", key) 67 | } 68 | 69 | entries, ok := value.(*cpb.Entries) 70 | if !ok { 71 | glog.Exitf("Unexpected value type in cache map: %v", key) 72 | } 73 | 74 | cache.Samples[hash] = entries 75 | 76 | return true 77 | }) 78 | 79 | data, err := proto.Marshal(cache) 80 | if err != nil { 81 | return fmt.Errorf("error marshalling %s repo cache: %v", repoName, err) 82 | } 83 | 84 | cacheFile, err := os.Create(cachePath) 85 | if err != nil { 86 | return fmt.Errorf("error opening %s repo cache file for write: %v", repoName, err) 87 | } 88 | 89 | _, err = cacheFile.Write(data) 90 | if err != nil { 91 | return fmt.Errorf("error writing to %s repo cache file: %v", repoName, err) 92 | } 93 | glog.Infof("Successfully saved %s repo cache to %s.", repoName, cachePath) 94 | 95 | return nil 96 | } 97 | 98 | // Load reads cache entries from a file stored locally. If the file is not present, the cache is 99 | // created in memory. 100 | func Load(repoName, cacheDir string) (*sync.Map, error) { 101 | var cacheMap sync.Map 102 | cachePath := filepath.Join(cacheDir, fmt.Sprintf("hashr-cache-%s", repoName)) 103 | if _, err := os.Stat(cachePath); os.IsNotExist(err) { 104 | glog.Infof("Cache for %s repo not found at %s. Creating new cache in memory.", repoName, cachePath) 105 | return &cacheMap, nil 106 | } 107 | 108 | data, err := ioutil.ReadFile(cachePath) 109 | if err != nil { 110 | // If there is an error while reading the file it might be corrupted. 111 | if err := os.Remove(cachePath); err != nil { 112 | return nil, fmt.Errorf("error while trying to delete the %s repo cache file: %v", repoName, err) 113 | } 114 | return &cacheMap, nil 115 | } 116 | 117 | cache := &cpb.Cache{} 118 | if err := proto.Unmarshal(data, cache); err != nil { 119 | // If there is an error while unmarshalling the file it might be corrupted. 120 | if err := os.Remove(cachePath); err != nil { 121 | return nil, fmt.Errorf("error while trying to delete the %s repo cache file: %v", repoName, err) 122 | } 123 | return &cacheMap, nil 124 | } 125 | glog.Infof("Successfully loaded cache for %s repo from %s.", repoName, cachePath) 126 | 127 | for k, v := range cache.Samples { 128 | cacheMap.Store(k, v) 129 | } 130 | 131 | return &cacheMap, nil 132 | } 133 | 134 | // Check checks if files present in a given extraction are already in the local cache. 135 | func Check(extraction *common.Extraction, cache *sync.Map) ([]common.Sample, error) { 136 | samples, err := readJSON(extraction) 137 | if err != nil { 138 | return nil, fmt.Errorf("error while reading hashes.json file: %v", err) 139 | } 140 | 141 | var exports []common.Sample 142 | for _, sample := range samples { 143 | newCacheEntry := &cpb.CacheEntry{ 144 | SourceId: extraction.SourceID, 145 | SourceHash: extraction.SourceSHA256, 146 | } 147 | newExport := common.Sample{ 148 | Sha256: sample.Sha256, 149 | Paths: sample.Paths, 150 | } 151 | 152 | if sampleCache, ok := cache.Load(sample.Sha256); ok { 153 | // If the sample is already in the cache, add a new entry. 154 | sampleCache.(*cpb.Entries).Entries = append(sampleCache.(*cpb.Entries).Entries, newCacheEntry) 155 | sampleCache.(*cpb.Entries).LastUpdated = timestamppb.Now() 156 | } else { 157 | // Add a new sample to the cache. 158 | cache.Store(sample.Sha256, &cpb.Entries{ 159 | LastUpdated: timestamppb.Now(), 160 | Entries: []*cpb.CacheEntry{newCacheEntry}, 161 | }) 162 | newExport.Upload = true 163 | } 164 | 165 | exports = append(exports, newExport) 166 | } 167 | 168 | return exports, nil 169 | } 170 | -------------------------------------------------------------------------------- /cache/proto/cache.proto: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | syntax = "proto3"; 16 | 17 | package cachepb; 18 | 19 | import "google/protobuf/timestamp.proto"; 20 | 21 | option go_package = "github.com/google/hashr/cache/proto/cachepb"; 22 | 23 | message CacheEntry { 24 | string source_id = 1; 25 | string source_hash = 2; 26 | repeated string path = 3; 27 | } 28 | 29 | message Entries { 30 | google.protobuf.Timestamp last_updated = 1; 31 | repeated CacheEntry entries = 2; 32 | } 33 | 34 | message Cache { 35 | map samples = 1; 36 | } -------------------------------------------------------------------------------- /cache/testdata/hashes.json: -------------------------------------------------------------------------------- 1 | [{"sha256": "d5d66fe6a4559c59ad103ab40e01c4fc0df7eb8ba901d50e5ceae3909b2e0d61", "paths": ["/gLinuxTestRepo/20200227.00.00/export/file.09"]}, {"sha256": "4878dd6c7af7fecdf89832384d84ed93b78123e69e6a0097efac5320da2ac637", "paths": ["/gLinuxTestRepo/20200227.00.00/export/file.02"]}, {"sha256": "ca8a605cf72b21b89f9211af1550d7f943a2b844084241f60eddd9d6536c78ec", "paths": ["/gLinuxTestRepo/20200227.00.00/export/file.10"]}, {"sha256": "4741b2746859cbe24f529a4f3108c2d8b4ea5f442f8a3743ff3543c76f369c90", "paths": ["/gLinuxTestRepo/20200227.00.00/export/file.01"]}, {"sha256": "d889bcc21cffc076d6e9cf7e32d0dd801977141e6f71d4c96ae84e5f1765e71a", "paths": ["/gLinuxTestRepo/20200227.00.00/export/file.07"]}, {"sha256": "00632850049f80763ada81ec0cacf015dbd67fb1b956ec2acb8aa862e511b3bc", "paths": ["/gLinuxTestRepo/20200227.00.00/export/file.04"]}, {"sha256": "b1f8a81821e18bba696a52b5169524076f77bc588c02ab195f969df4e2650dce", "paths": ["/gLinuxTestRepo/20200227.00.00/export/file.03"]}, {"sha256": "8780622e75a9c1be4b30ae9e15d6d94249926aaa9139b7a563e42ee0eab70eea", "paths": ["/gLinuxTestRepo/20200227.00.00/export/file.05"]}, {"sha256": "99962d9e62c15c73527ca72b4e5e85809d4254326800eb2c65b35339029e02d1", "paths": ["/gLinuxTestRepo/20200227.00.00/export/file.06"]}, {"sha256": "e0a98ad618a3cef7f8754a2711322e398879f47e50ca491c75eca6ba476e421a", "paths": ["/gLinuxTestRepo/20200227.00.00/export/file.08"]}] 2 | -------------------------------------------------------------------------------- /cache/testdata/hashr-cache-gLinux: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/cache/testdata/hashr-cache-gLinux -------------------------------------------------------------------------------- /client/client.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "encoding/json" 7 | "flag" 8 | "fmt" 9 | 10 | "cloud.google.com/go/spanner" 11 | "github.com/google/hashr/client/cloudspanner" 12 | "github.com/google/hashr/client/postgres" 13 | _ "github.com/lib/pq" 14 | 15 | "github.com/golang/glog" 16 | ) 17 | 18 | var ( 19 | hashStorage = flag.String("hashStorage", "", "Storage used for computed hashes, can have one of the two values: postgres, cloudspanner") 20 | spannerDBPath = flag.String("spanner_db_path", "", "Path to spanner DB.") 21 | 22 | // Postgres DB flags 23 | postgresHost = flag.String("postgres_host", "localhost", "PostgreSQL instance address.") 24 | postgresPort = flag.Int("postgres_port", 5432, "PostgresSQL instance port.") 25 | postgresUser = flag.String("postgres_user", "hashr", "PostgresSQL user.") 26 | postgresPassword = flag.String("postgres_password", "hashr", "PostgresSQL password.") 27 | postgresDBName = flag.String("postgres_db", "hashr", "PostgresSQL database.") 28 | ) 29 | 30 | // Storage represents storage that is used to store data about processed sources. 31 | type Storage interface { 32 | GetSamples(ctx context.Context) (map[string]map[string]string, error) 33 | } 34 | 35 | func main() { 36 | ctx := context.Background() 37 | flag.Parse() 38 | 39 | var storage Storage 40 | switch *hashStorage { 41 | case "postgres": 42 | psqlInfo := fmt.Sprintf("host=%s port=%d user=%s password=%s dbname=%s sslmode=disable", 43 | *postgresHost, *postgresPort, *postgresUser, *postgresPassword, *postgresDBName) 44 | 45 | db, err := sql.Open("postgres", psqlInfo) 46 | if err != nil { 47 | glog.Exitf("Error initializing Postgres client: %v", err) 48 | } 49 | defer db.Close() 50 | 51 | storage, err = postgres.NewStorage(db) 52 | if err != nil { 53 | glog.Exitf("Error initializing Postgres storage: %v", err) 54 | } 55 | case "cloudspanner": 56 | spannerClient, err := spanner.NewClient(ctx, *spannerDBPath) 57 | if err != nil { 58 | glog.Exitf("Error initializing Spanner client: %v", err) 59 | } 60 | 61 | storage, err = cloudspanner.NewStorage(ctx, spannerClient) 62 | if err != nil { 63 | glog.Exitf("Error initializing Postgres storage: %v", err) 64 | } 65 | default: 66 | glog.Exit("hashStorage flag needs to have one of the two values: postgres, cloudspanner") 67 | 68 | } 69 | samples, err := storage.GetSamples(ctx) 70 | if err != nil { 71 | glog.Exitf("Error retriving samples: %v", err) 72 | } 73 | 74 | jsonData, err := json.Marshal(samples) 75 | if err != nil { 76 | fmt.Println("Error:", err) 77 | return 78 | } 79 | 80 | fmt.Println(string(jsonData)) 81 | } 82 | -------------------------------------------------------------------------------- /client/cloudspanner/cloudspanner.go: -------------------------------------------------------------------------------- 1 | package cloudspanner 2 | 3 | import ( 4 | "context" 5 | "strconv" 6 | 7 | "cloud.google.com/go/spanner" 8 | 9 | "google.golang.org/api/iterator" 10 | ) 11 | 12 | // Storage allows to interact with cloud spanner. 13 | type Storage struct { 14 | spannerClient *spanner.Client 15 | } 16 | 17 | // NewStorage creates new Storage struct that allows to interact with cloud spanner. 18 | func NewStorage(ctx context.Context, spannerClient *spanner.Client) (*Storage, error) { 19 | return &Storage{spannerClient: spannerClient}, nil 20 | } 21 | 22 | // GetSamples fetches processing samples from cloud spanner. 23 | func (s *Storage) GetSamples(ctx context.Context) (map[string]map[string]string, error) { 24 | samples := make(map[string]map[string]string) 25 | iter := s.spannerClient.Single().Read(ctx, "samples", 26 | spanner.AllKeys(), []string{"sha256", "mimetype", "file_output", "size"}) 27 | defer iter.Stop() 28 | for { 29 | row, err := iter.Next() 30 | if err == iterator.Done { 31 | break 32 | } 33 | if err != nil { 34 | return nil, err 35 | } 36 | var sha256, mimetype, fileOutput string 37 | var size int64 38 | err = row.ColumnByName("sha256", &sha256) 39 | if err != nil { 40 | return nil, err 41 | } 42 | err = row.ColumnByName("mimetype", &mimetype) 43 | if err != nil { 44 | return nil, err 45 | } 46 | err = row.ColumnByName("file_output", &fileOutput) 47 | if err != nil { 48 | return nil, err 49 | } 50 | err = row.ColumnByName("size", &size) 51 | if err != nil { 52 | return nil, err 53 | } 54 | samples[sha256] = make(map[string]string) 55 | 56 | // Assign values to the nested map 57 | samples[sha256]["sha256"] = sha256 58 | samples[sha256]["mimetype"] = mimetype 59 | samples[sha256]["file_output"] = fileOutput 60 | samples[sha256]["size"] = strconv.FormatInt(size, 10) 61 | 62 | } 63 | return samples, nil 64 | } 65 | -------------------------------------------------------------------------------- /client/postgres/postgres.go: -------------------------------------------------------------------------------- 1 | // Package postgres implements PostgreSQL as a hashR storage. 2 | package postgres 3 | 4 | import ( 5 | "context" 6 | "database/sql" 7 | "fmt" 8 | 9 | // Blank import below is needed for the SQL driver. 10 | _ "github.com/lib/pq" 11 | ) 12 | 13 | // Storage allows to interact with PostgreSQL instance. 14 | type Storage struct { 15 | sqlDB *sql.DB 16 | } 17 | 18 | // NewStorage creates new Storage struct that allows to interact with PostgreSQL instance and all the necessary tables, if they don't exist. 19 | func NewStorage(sqlDB *sql.DB) (*Storage, error) { 20 | return &Storage{sqlDB: sqlDB}, nil 21 | } 22 | 23 | // GetSamples fetches processed samples from postgres. 24 | func (s *Storage) GetSamples(ctx context.Context) (map[string]map[string]string, error) { 25 | exists, err := tableExists(s.sqlDB, "samples") 26 | if err != nil { 27 | return nil, err 28 | } 29 | 30 | samples := make(map[string]map[string]string) 31 | 32 | if exists { 33 | var sql = `SELECT * FROM samples;` 34 | 35 | rows, err := s.sqlDB.Query(sql) 36 | 37 | if err != nil { 38 | return nil, err 39 | } 40 | 41 | defer rows.Close() 42 | 43 | for rows.Next() { 44 | var sha256, mimetype, fileOutput, size string 45 | err := rows.Scan(&sha256, &mimetype, &fileOutput, &size) 46 | if err != nil { 47 | return nil, err 48 | } 49 | 50 | samples[sha256] = make(map[string]string) 51 | 52 | // Assign values to the nested map 53 | samples[sha256]["sha256"] = sha256 54 | samples[sha256]["mimetype"] = mimetype 55 | samples[sha256]["file_output"] = fileOutput 56 | samples[sha256]["size"] = size 57 | } 58 | 59 | } else { 60 | return nil, fmt.Errorf("table samples does not exist") 61 | } 62 | 63 | return samples, nil 64 | } 65 | 66 | func tableExists(db *sql.DB, tableName string) (bool, error) { 67 | // Query to check if the table exists in PostgreSQL 68 | query := ` 69 | SELECT EXISTS ( 70 | SELECT 1 71 | FROM information_schema.tables 72 | WHERE table_name = $1 73 | ) 74 | ` 75 | 76 | var exists bool 77 | err := db.QueryRow(query, tableName).Scan(&exists) 78 | if err != nil { 79 | return false, err 80 | } 81 | 82 | return exists, nil 83 | } 84 | -------------------------------------------------------------------------------- /common/common.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package common provides common data structures used in hashR. 16 | package common 17 | 18 | // Sample represent single file extracted from a given source. 19 | type Sample struct { 20 | Sha256 string `json:"sha256"` 21 | Paths []string `json:"paths"` 22 | Upload bool `json:"Upload"` 23 | } 24 | 25 | // Extraction contains information about image_export.py extraction. 26 | type Extraction struct { 27 | SourceID string 28 | RepoName string 29 | BaseDir string 30 | Path string 31 | SourceSHA256 string 32 | } 33 | -------------------------------------------------------------------------------- /core/hashr/hashr_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package hashr 16 | 17 | import ( 18 | "context" 19 | "fmt" 20 | "os" 21 | "testing" 22 | 23 | "github.com/golang/glog" 24 | 25 | "github.com/google/hashr/common" 26 | 27 | "cloud.google.com/go/spanner" 28 | "google.golang.org/api/option" 29 | "google.golang.org/grpc" 30 | "google.golang.org/grpc/credentials/insecure" 31 | 32 | database "cloud.google.com/go/spanner/admin/database/apiv1" 33 | instance "cloud.google.com/go/spanner/admin/instance/apiv1" 34 | 35 | dbadminpb "google.golang.org/genproto/googleapis/spanner/admin/database/v1" 36 | instancepb "google.golang.org/genproto/googleapis/spanner/admin/instance/v1" 37 | ) 38 | 39 | const ( 40 | dbSchema = `CREATE TABLE jobs ( 41 | imported_at TIMESTAMP NOT NULL, 42 | id STRING(500), 43 | repo STRING(200), 44 | repo_path STRING(500), 45 | quick_sha256 STRING(100) NOT NULL, 46 | location STRING(1000), 47 | sha256 STRING(100), 48 | status STRING(50), 49 | error STRING(10000), 50 | preprocessing_duration INT64, 51 | processing_duration INT64, 52 | export_duration INT64, 53 | files_extracted INT64, 54 | files_exported INT64, 55 | ) PRIMARY KEY(quick_sha256)` 56 | ) 57 | 58 | type testImporter struct { 59 | } 60 | 61 | func (i *testImporter) RepoName() string { 62 | return "ubuntu" 63 | } 64 | 65 | func (i *testImporter) RepoPath() string { 66 | return "ubuntu" 67 | } 68 | 69 | func (i *testImporter) DiscoverRepo() ([]Source, error) { 70 | sources := []Source{ 71 | &testSource{id: "001", localPath: "/tmp/001", quickSha256hash: "7a3e6b16cb75f48fb897eff3ae732f3154f6d203b53f33660f01b4c3b6bc2df9", repoPath: "/tmp/"}, 72 | &testSource{id: "002", localPath: "/tmp/002", quickSha256hash: "a1dd6837f284625bdb1cb68f1dbc85c5dc4d8b05bae24c94ed5f55c477326ea2", repoPath: "/tmp/"}, 73 | } 74 | 75 | for _, source := range sources { 76 | file, err := os.Create(source.RemotePath()) 77 | if err != nil { 78 | return nil, fmt.Errorf("could not create dummy sources: %v", err) 79 | } 80 | file.Close() 81 | } 82 | 83 | return sources, nil 84 | } 85 | 86 | type testSource struct { 87 | id string 88 | localPath string 89 | quickSha256hash string 90 | repoPath string 91 | } 92 | 93 | func (s *testSource) Preprocess() (string, error) { 94 | return "", nil 95 | } 96 | func (s *testSource) QuickSHA256Hash() (string, error) { 97 | return s.quickSha256hash, nil 98 | } 99 | func (s *testSource) RemotePath() string { 100 | return s.localPath 101 | } 102 | func (s *testSource) ID() string { 103 | return s.id 104 | } 105 | func (s *testSource) RepoName() string { 106 | return "ubuntu" 107 | } 108 | func (s *testSource) RepoPath() string { 109 | return "ubuntu" 110 | } 111 | func (s *testSource) Local() bool { 112 | return false 113 | } 114 | func (s *testSource) LocalPath() string { 115 | return s.localPath 116 | } 117 | func (s *testSource) Description() string { 118 | return "" 119 | } 120 | 121 | type testProcessor struct { 122 | } 123 | 124 | func (p *testProcessor) ImageExport(sourcePath string) (string, error) { 125 | return "testdata/20200106.00.00-ubuntu-laptop-export", nil 126 | } 127 | 128 | type testExporter struct { 129 | } 130 | 131 | func (e *testExporter) Export(ctx context.Context, repoName, repoPath, sourceID, sourceHash, sourcePath, sourceDescription string, samples []common.Sample) error { 132 | return nil 133 | } 134 | 135 | func (e *testExporter) Name() string { 136 | return "testExporter" 137 | } 138 | 139 | // TestRun requires Spanner emulator to be running: https://cloud.google.com/spanner/docs/emulator. 140 | func TestRun(t *testing.T) { 141 | for _, tc := range []struct { 142 | export bool 143 | exportPath string 144 | exportWorkerCount int 145 | processingWorkerCount int 146 | purgeJobsFile bool 147 | }{ 148 | { 149 | export: false, 150 | exportPath: "/tmp/hashr-export", 151 | exportWorkerCount: 100, 152 | processingWorkerCount: 1, 153 | }, 154 | { 155 | export: true, 156 | exportWorkerCount: 100, 157 | processingWorkerCount: 1, 158 | purgeJobsFile: true, 159 | }, 160 | { 161 | export: false, 162 | exportPath: "/tmp/hashr-export", 163 | processingWorkerCount: 1, 164 | }, 165 | { 166 | export: false, 167 | exportPath: "/tmp/hashr-export", 168 | processingWorkerCount: 1, 169 | }, 170 | } { 171 | ctx := context.Background() 172 | 173 | o := []option.ClientOption{ 174 | option.WithEndpoint("localhost:9010"), 175 | option.WithoutAuthentication(), 176 | option.WithGRPCDialOption(grpc.WithTransportCredentials(insecure.NewCredentials())), 177 | } 178 | 179 | instanceAdmin, err := instance.NewInstanceAdminClient(ctx, o...) 180 | if err != nil { 181 | glog.Fatalf("error dialing instance admin: %v", err) 182 | } 183 | defer instanceAdmin.Close() 184 | 185 | if err := instanceAdmin.DeleteInstance(ctx, &instancepb.DeleteInstanceRequest{Name: "projects/hashr/instances/hashr"}); err != nil { 186 | glog.Warning(err) 187 | } 188 | 189 | op, err := instanceAdmin.CreateInstance(ctx, &instancepb.CreateInstanceRequest{ 190 | Parent: "projects/hashr", 191 | InstanceId: "hashr", 192 | Instance: &instancepb.Instance{ 193 | DisplayName: "hashr", 194 | NodeCount: 1, 195 | }, 196 | }) 197 | if err != nil { 198 | glog.Fatalf("error creating test spanner instance: %v", err) 199 | } 200 | if _, err := op.Wait(ctx); err != nil { 201 | glog.Fatalf("error creating test spanner instance: %v", err) 202 | } 203 | 204 | databaseAdmin, err := database.NewDatabaseAdminClient(ctx, o...) 205 | if err != nil { 206 | glog.Fatalf("error creating database admin client for emulator: %v", err) 207 | } 208 | 209 | dbURI := "projects/hashr/instances/hashr/databases/hashr" 210 | op2, err := databaseAdmin.CreateDatabase(ctx, &dbadminpb.CreateDatabaseRequest{ 211 | Parent: "projects/hashr/instances/hashr", 212 | CreateStatement: "CREATE DATABASE hashr", 213 | ExtraStatements: []string{dbSchema}, 214 | }) 215 | if err != nil { 216 | glog.Fatalf("error creating test DB %v: %v", dbURI, err) 217 | } 218 | if _, err = op2.Wait(ctx); err != nil { 219 | glog.Fatalf("error creating test DB %v: %v", dbURI, err) 220 | } 221 | 222 | spannerStorage, err := newStorage(ctx, dbURI, o...) 223 | if err != nil { 224 | glog.Fatalf("error creating test spanner client: %v", err) 225 | } 226 | 227 | hdb := New([]Importer{&testImporter{}}, &testProcessor{}, []Exporter{&testExporter{}}, spannerStorage) 228 | hdb.CacheDir = "/tmp/" 229 | hdb.Export = tc.export 230 | hdb.ExportPath = tc.exportPath 231 | hdb.ExportWorkerCount = tc.exportWorkerCount 232 | hdb.ProcessingWorkerCount = tc.processingWorkerCount 233 | 234 | // This is a simple test to check the full processing logic with different number of workers. 235 | // The test should fail on any error. 236 | // TODO(mlegin): Add a test to check the telemetry stats of the whole Run. 237 | for i := 1; i <= tc.processingWorkerCount; i++ { 238 | hdb.ProcessingWorkerCount = i 239 | if err := hdb.Run(context.Background()); err != nil { 240 | t.Errorf("Unexpected error while running hashR: %v", err) 241 | } 242 | } 243 | } 244 | } 245 | 246 | // Storage allows to interact with cloud spanner. 247 | type fakeStorage struct { 248 | spannerClient *spanner.Client 249 | } 250 | 251 | // NewStorage creates new Storage struct that allows to interact with cloud spanner. 252 | func newStorage(ctx context.Context, spannerDBPath string, opts ...option.ClientOption) (*fakeStorage, error) { 253 | spannerClient, err := spanner.NewClient(ctx, spannerDBPath, opts...) 254 | if err != nil { 255 | return nil, err 256 | } 257 | 258 | return &fakeStorage{spannerClient: spannerClient}, nil 259 | } 260 | 261 | // UpdateJobs updates cloud spanner table. 262 | func (s *fakeStorage) UpdateJobs(ctx context.Context, qHash string, p *ProcessingSource) error { 263 | return nil 264 | } 265 | 266 | // FetchJobs fetches processing jobs from cloud spanner. 267 | func (s *fakeStorage) FetchJobs(ctx context.Context) (map[string]string, error) { 268 | return make(map[string]string), nil 269 | } 270 | -------------------------------------------------------------------------------- /core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/hashes.json: -------------------------------------------------------------------------------- 1 | [{"sha256": "952b39dff291f84b330206a1131c06592c8055800071fc84888c9a3052e51543", "paths": ["tmp/hashr/20200106.00.00-ubuntu-desktop/file.09"]}, {"sha256": "a74bb803c7ff5bd875867fc3f4ceabb6fbe888eea6361b876111cb8060fe7e8c", "paths": ["tmp/hashr/20200106.00.00-ubuntu-desktop/file.02"]}, {"sha256": "79f5431b2eecae25c0b29ad8e5d8642d0575b015ed8008ff277dd2308cbdd173", "paths": ["tmp/hashr/20200106.00.00-ubuntu-desktop/file.10"]}, {"sha256": "c2e7f7d23b30766c2d55e847b349d0540f4847b263ee15521dc72023846884ea", "paths": ["tmp/hashr/20200106.00.00-ubuntu-desktop/file.01"]}, {"sha256": "2cbbbd2fa4045f092ed359cd6632e01e1e45006681949aa98cee7aa0edc6f771", "paths": ["tmp/hashr/20200106.00.00-ubuntu-desktop/file.07"]}, {"sha256": "efa02f852f81f973f2c10784bc5194de1d09f3e905ea296b22090ff3379ed6c1", "paths": ["tmp/hashr/20200106.00.00-ubuntu-desktop/file.04"]}, {"sha256": "2789f4b90b038d57e592d01e0cd13a98b398cc7a524c3e8a7faaaaaf59893e7d", "paths": ["tmp/hashr/20200106.00.00-ubuntu-desktop/file.03"]}, {"sha256": "ddf7c381937d07c67e509f18feec42a939bddf2ea7db985a4b045d583c95ec04", "paths": ["tmp/hashr/20200106.00.00-ubuntu-desktop/file.05"]}, {"sha256": "2fd1880876ca7640d04fae51fa988fe40505ab15f0f1a05ca6b0b5f09985c82a", "paths": ["tmp/hashr/20200106.00.00-ubuntu-desktop/file.06"]}, {"sha256": "8ab37107e0ed8d084afaf252fcdb2e66b99812ab12864b1cd12dfd5a44b25e5e", "paths": ["tmp/hashr/20200106.00.00-ubuntu-desktop/file.08"]}] -------------------------------------------------------------------------------- /core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.01: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.01 -------------------------------------------------------------------------------- /core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.02: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.02 -------------------------------------------------------------------------------- /core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.03: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.03 -------------------------------------------------------------------------------- /core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.04: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.04 -------------------------------------------------------------------------------- /core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.05: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.05 -------------------------------------------------------------------------------- /core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.06: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.06 -------------------------------------------------------------------------------- /core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.07: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.07 -------------------------------------------------------------------------------- /core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.08: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.08 -------------------------------------------------------------------------------- /core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.09: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.09 -------------------------------------------------------------------------------- /core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.10: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.10 -------------------------------------------------------------------------------- /core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/hashes.json: -------------------------------------------------------------------------------- 1 | [{"sha256": "d370aa6801e91d7fa48db7f4388c4e3858f58de124d9c4fd53cb9a25bbc2fa34", "paths": ["tmp/hashr/20200106.00.00-ubuntu-laptop/file.09"]}, {"sha256": "3f1ee77b201b6c4f1c37872e363387b247415293853a3f7eed25effee396b68f", "paths": ["tmp/hashr/20200106.00.00-ubuntu-laptop/file.02"]}, {"sha256": "b665f633c8f1e184972cb9ebc99490cf999fcae5d520fc8e144ee1124d05628b", "paths": ["tmp/hashr/20200106.00.00-ubuntu-laptop/file.10"]}, {"sha256": "8bc259fd7d49e3a94a2001e7ec276c51736a66167fc90e3453771b0e8e9fc17c", "paths": ["tmp/hashr/20200106.00.00-ubuntu-laptop/file.01"]}, {"sha256": "fa6d182f5bd8613830c118e0d7296baa59ae33b0d32a4557106cd15098b8bcf9", "paths": ["tmp/hashr/20200106.00.00-ubuntu-laptop/file.07"]}, {"sha256": "b6f453c6cb97193dbf52bdd8423d3c5f6308521af9254cd65f5cb9a777c6b203", "paths": ["tmp/hashr/20200106.00.00-ubuntu-laptop/file.04"]}, {"sha256": "b9b1fcb88ca7c884c4105c3f9e6f5c782521533ab529b84db41d82241a1b148e", "paths": ["tmp/hashr/20200106.00.00-ubuntu-laptop/file.03"]}, {"sha256": "e6af44bf176b209b8ca050e7834aef2b1b6bcc292acde3c456a8a81d2d47c37c", "paths": ["tmp/hashr/20200106.00.00-ubuntu-laptop/file.05"]}, {"sha256": "a649460a16c3a2d9a097f93e6e2d0c89c5a52ca5e1cc6d6ca03c64417905753d", "paths": ["tmp/hashr/20200106.00.00-ubuntu-laptop/file.06"]}, {"sha256": "f9788be264fc476a842f3e23950a1c0070b47948f95eeccc8c243c45afd62524", "paths": ["tmp/hashr/20200106.00.00-ubuntu-laptop/file.08"]}] -------------------------------------------------------------------------------- /core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.01: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.01 -------------------------------------------------------------------------------- /core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.02: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.02 -------------------------------------------------------------------------------- /core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.03: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.03 -------------------------------------------------------------------------------- /core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.04: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.04 -------------------------------------------------------------------------------- /core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.05: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.05 -------------------------------------------------------------------------------- /core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.06: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.06 -------------------------------------------------------------------------------- /core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.07: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.07 -------------------------------------------------------------------------------- /core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.08: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.08 -------------------------------------------------------------------------------- /core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.09: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.09 -------------------------------------------------------------------------------- /core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.10: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.10 -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # Use: docker build --no-cache --build-arg PPA_TRACK="[staging|stable]" 2 | 3 | # Stage 0 - hashr-builder 4 | FROM golang:alpine AS hashr-builder 5 | RUN apk add --no-cache build-base 6 | 7 | # Compile hashr statically 8 | RUN mkdir -p /opt/hashr/ 9 | COPY . /opt/hashr/ 10 | RUN cd /opt/hashr/ && GOOS=linux GOARCH=amd64 go build -v -ldflags="-linkmode=external -extldflags=-static" -tags osusergo,netgo -o hashr hashr.go 11 | 12 | # Stage 1 - hashr 13 | FROM ubuntu:22.04 14 | 15 | ARG PPA_TRACK=stable 16 | ENV DEBIAN_FRONTEND=noninteractive 17 | 18 | RUN apt-get update && apt-get -y upgrade && apt-get -y install \ 19 | p7zip-full \ 20 | apt-transport-https \ 21 | apt-utils \ 22 | sudo \ 23 | curl \ 24 | file \ 25 | gpg \ 26 | software-properties-common \ 27 | && apt-get clean && rm -rf /var/cache/apt/* /var/lib/apt/lists/* 28 | 29 | RUN add-apt-repository -y ppa:gift/$PPA_TRACK 30 | RUN apt-get update && apt-get -y install plaso-tools 31 | 32 | RUN useradd -G disk,sudo -u 999 hashr 33 | RUN echo "hashr ALL = (root) NOPASSWD: /bin/mount,/bin/umount,/sbin/losetup,/bin/rm" > /etc/sudoers.d/hashr 34 | 35 | USER root 36 | 37 | WORKDIR /usr/local/bin 38 | COPY --from=hashr-builder /opt/hashr/hashr . 39 | 40 | VOLUME ["/data"] 41 | 42 | WORKDIR /home/hashr/ 43 | RUN chmod +x /usr/local/bin/hashr 44 | USER hashr 45 | ENTRYPOINT ["/usr/local/bin/hashr"] 46 | -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | # HashR in docker 2 | 3 | Follow these steps to set-up HashR running in a docker container. 4 | 5 | If you want a local installation, check [these steps](https://github.com/google/hashr#setting-up-hashr). 6 | 7 | ## Table of contents 8 | 9 | * [HashR docker image](#hashr-docker-image) 10 | * [Pull the HashR image](#pull-the-hashr-image) 11 | * [Build the HashR image](#build-the-hashr-image) 12 | * [Setup a database and importers](#setup-a-database-and-importers) 13 | * [Database](#database) 14 | * [Importers](#importers) 15 | * [Docker networking](#docker-networking) 16 | * [Run HashR](#run-hashr) 17 | * [Examples](#examples) 18 | 19 | 20 | ## HashR docker image 21 | 22 | You can either use our hosted docker image or build it yourself. 23 | 24 | ### Pull the HashR image 25 | 26 | The HashR docker image will provide the HashR binary and tools it needs to 27 | work. 28 | 29 | By default the latest tagged release will be pulled if not specified otherwise: 30 | 31 | ```shell 32 | docker pull us-docker.pkg.dev/osdfir-registry/hashr/release/hashr 33 | ``` 34 | 35 | Pulling a specific release tag: 36 | 37 | ```shell 38 | docker pull us-docker.pkg.dev/osdfir-registry/hashr/release/hashr:v1.7.1 39 | ``` 40 | 41 | ### Build the HashR image 42 | 43 | From the repository root folder run the following command: 44 | 45 | ```shell 46 | docker build -f docker/Dockerfile . 47 | ``` 48 | 49 | ## Setup a database and importers 50 | 51 | ### Database 52 | 53 | You still need to provide your own database for HashR to store the results. 54 | Check the [Setting up storage for processing tasks](https://github.com/google/hashr#setting-up-storage-for-processing-tasks) step in the local installation 55 | guide. 56 | 57 | ### Importers 58 | 59 | Follow the [Setting up importers](https://github.com/google/hashr#setting-up-importers) 60 | guide to setup the importers you want to use. 61 | 62 | Come back here for running HashR in docker with specific importers. 63 | 64 | ### Docker networking 65 | 66 | Create a docker network that will be used by `hashr_postgresql` and the `hashr` 67 | container. 68 | 69 | ```shell 70 | docker network create hashr_net 71 | ``` 72 | 73 | ```shell 74 | docker network connect hashr_net hashr_postgresql 75 | ``` 76 | 77 | ## Run HashR 78 | 79 | Get all availalbe HashR flags 80 | 81 | ```shell 82 | docker run us-docker.pkg.dev/osdfir-registry/hashr/release/hashr -h 83 | ``` 84 | 85 | ### Examples 86 | 87 | > **NOTE** 88 | Ensure that the host directory mapped into `/data/` in the container is 89 | readable for all! 90 | 91 | Run HashR using the `iso9660` importer and export results to PostgreSQL: 92 | 93 | ```shell 94 | docker run -it \ 95 | --privileged \ 96 | --network hashr_net \ 97 | -v ${pwd}/ISO:/data/iso \ 98 | us-docker.pkg.dev/osdfir-registry/hashr/release/hashr \ 99 | -storage postgres \ 100 | -postgres_host hashr_postgresql \ 101 | -postgres_port 5432 \ 102 | -postgres_user hashr \ 103 | -postgres_password hashr \ 104 | -postgres_db hashr \ 105 | -importers iso9660 \ 106 | -iso_repo_path /data/iso/ \ 107 | -exporters postgres 108 | ``` 109 | 110 | Run HashR using the `deb` importer and export results to PostgreSQL: 111 | 112 | ```shell 113 | docker run -it \ 114 | --network hashr_net \ 115 | -v ${pwd}/DEB:/data/deb \ 116 | us-docker.pkg.dev/osdfir-registry/hashr/release/hashr \ 117 | -storage postgres \ 118 | -postgres_host hashr_postgresql \ 119 | -postgres_port 5432 \ 120 | -postgres_user hashr \ 121 | -postgres_password hashr \ 122 | -postgres_db hashr \ 123 | -importers deb \ 124 | -deb_repo_path /data/deb/ \ 125 | -exporters postgres 126 | ``` 127 | 128 | Run HashR using the `GCP`importer and export results to PostgreSQL: 129 | 130 | ```shell 131 | docker run -it \ 132 | --network hashr_net \ 133 | -v ${pwd}/hashr-sa-private-key.json:/creds/hashr-sa-private-key.json \ 134 | -e GOOGLE_APPLICATION_CREDENTIALS='/creds/hashr-sa-private-key.json' \ 135 | us-docker.pkg.dev/osdfir-registry/hashr/release/hashr \ 136 | -storage postgres \ 137 | -postgres_host hashr_postgresql \ 138 | -postgres_port 5432 \ 139 | -postgres_user hashr \ 140 | -postgres_password hashr \ 141 | -postgres_db hashr \ 142 | -importers GCP \ 143 | -gcp_projects debian-cloud,centos-cloud,rhel-cloud \ 144 | -hashr_gcp_project \ 145 | -hashr_gcs_bucket \ 146 | -exporters postgres 147 | ``` 148 | 149 | ### Debugging 150 | 151 | Here are some known issues that you can run into when using HashR with docker. 152 | 153 | #### Folder: permission denied 154 | 155 | If you get a permission error from HashR when working with docker volumes ensure 156 | that the folder you are mapping into the container has the same group id as the 157 | HashR group inside the container. Most likely this will be the `1000`. To change 158 | the group, run: 159 | 160 | `sudo chown -R :1000 ` 161 | 162 | #### mount: permission denied 163 | 164 | Some importers need to mount the provided file (e.g. ISO files). This is not 165 | supported inside the docker container by default. To workaround this issue, use 166 | the `--privileged` flag with your `docker run` command. 167 | 168 | #### Debugging inside the container 169 | 170 | To debug problems inside the HashR container start an interactive shell like 171 | with the following command: 172 | 173 | ``` 174 | docker run -it \ 175 | --network hashr_net \ 176 | --entrypoint=/bin/bash \ 177 | us-docker.pkg.dev/osdfir-registry/hashr/release/hashr 178 | ``` 179 | 180 | #### Logging output 181 | 182 | For debugging purposes you can send logging output to stderr by using the 183 | `---logtostderr=1` flag. 184 | 185 | General hashr logs its output to `/tmp/hashr.INFO`. 186 | -------------------------------------------------------------------------------- /docker/cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | # Google Cloud Build configuration for HashR release 2 | steps: 3 | - name: gcr.io/cloud-builders/docker 4 | args: 5 | [ 6 | "build", 7 | "-t", 8 | "us-docker.pkg.dev/osdfir-registry/hashr/release/hashr:$TAG_NAME", 9 | "-t", 10 | "us-docker.pkg.dev/osdfir-registry/hashr/release/hashr:latest", 11 | "-f", 12 | "docker/Dockerfile", 13 | ".", 14 | ] 15 | timeout: 4800s 16 | timeout: 4800s 17 | images: 18 | - us-docker.pkg.dev/osdfir-registry/hashr/release/hashr:latest 19 | - us-docker.pkg.dev/osdfir-registry/hashr/release/hashr:$TAG_NAME 20 | -------------------------------------------------------------------------------- /docs/assets/HashR.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/docs/assets/HashR.png -------------------------------------------------------------------------------- /exporters/gcp/gcp.go: -------------------------------------------------------------------------------- 1 | package gcp 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "fmt" 7 | "net/http" 8 | "os" 9 | "os/exec" 10 | "strings" 11 | "sync" 12 | 13 | "cloud.google.com/go/spanner" 14 | "github.com/golang/glog" 15 | "github.com/google/hashr/common" 16 | "google.golang.org/api/iterator" 17 | "google.golang.org/api/storage/v1" 18 | "google.golang.org/grpc/codes" 19 | ) 20 | 21 | const ( 22 | // Name contains name of the exporter. 23 | Name = "GCP" 24 | ) 25 | 26 | // Exporter is an instance of GCP Exporter. 27 | type Exporter struct { 28 | spannerClient *spanner.Client 29 | storageClient *storage.Service 30 | GCSBucket string 31 | uploadPayloads bool 32 | workerCount int 33 | wg sync.WaitGroup 34 | } 35 | 36 | // NewExporter creates new GCP exporter. 37 | func NewExporter(spannerClient *spanner.Client, storageClient *storage.Service, GCSBucket string, uploadPayloads bool, workerCount int) (*Exporter, error) { 38 | return &Exporter{spannerClient: spannerClient, storageClient: storageClient, GCSBucket: GCSBucket, uploadPayloads: uploadPayloads, workerCount: workerCount}, nil 39 | } 40 | 41 | // Name returns exporter name. 42 | func (e *Exporter) Name() string { 43 | return Name 44 | } 45 | 46 | // Export exports extracted data to GCP (Spanner + GCS). 47 | func (e *Exporter) Export(ctx context.Context, sourceRepoName, sourceRepoPath, sourceID, sourceHash, sourcePath, sourceDescription string, samples []common.Sample) error { 48 | if err := e.insertSource(ctx, sourceHash, sourceID, sourcePath, sourceRepoName, sourceRepoPath, sourceDescription); err != nil { 49 | return fmt.Errorf("could not upload source data: %v", err) 50 | } 51 | 52 | jobs := make(chan common.Sample, len(samples)) 53 | for w := 1; w <= e.workerCount; w++ { 54 | e.wg.Add(1) 55 | go e.worker(ctx, sourceHash, jobs) 56 | } 57 | 58 | go func() { 59 | for _, sample := range samples { 60 | jobs <- sample 61 | } 62 | close(jobs) 63 | }() 64 | e.wg.Wait() 65 | 66 | return nil 67 | } 68 | 69 | func (e *Exporter) worker(ctx context.Context, sourceHash string, samples <-chan common.Sample) { 70 | defer e.wg.Done() 71 | for sample := range samples { 72 | if err := e.insertSample(ctx, sample); err != nil { 73 | glog.Errorf("skipping %s, could not insert sample data: %v", sample.Sha256, err) 74 | continue 75 | } 76 | 77 | if err := e.insertRelationship(ctx, sample, sourceHash); err != nil { 78 | glog.Errorf("skipping %s, could not insert source <-> sample relationship: %v", sample.Sha256, err) 79 | continue 80 | } 81 | } 82 | } 83 | 84 | func (e *Exporter) insertRelationship(ctx context.Context, sample common.Sample, sourceSha256 string) error { 85 | var paths, existingPaths []string 86 | 87 | for _, path := range sample.Paths { 88 | s := strings.Split(path, "/extracted/") 89 | if len(s) < 2 { 90 | glog.Warningf("sample path does not follow expected format: %s", path) 91 | continue 92 | } 93 | paths = append(paths, strings.TrimPrefix(strings.TrimPrefix(s[len(s)-1], "mnt"), "export")) 94 | } 95 | 96 | sql := spanner.Statement{ 97 | SQL: `SELECT sample_paths FROM samples_sources WHERE sample_sha256 = @sha256`, 98 | Params: map[string]interface{}{ 99 | "sha256": sample.Sha256, 100 | }, 101 | } 102 | 103 | iter := e.spannerClient.Single().Query(ctx, sql) 104 | defer iter.Stop() 105 | row, err := iter.Next() 106 | if err != iterator.Done { 107 | if err := row.Columns(&existingPaths); err != nil { 108 | return err 109 | } 110 | } 111 | if err != iterator.Done && err != nil { 112 | return err 113 | } 114 | 115 | _, err = e.spannerClient.Apply(ctx, []*spanner.Mutation{ 116 | spanner.InsertOrUpdate("samples_sources", 117 | []string{ 118 | "sample_sha256", 119 | "source_sha256", 120 | "sample_paths"}, 121 | []interface{}{ 122 | sample.Sha256, 123 | sourceSha256, 124 | append(existingPaths, paths...), 125 | })}) 126 | if err != nil { 127 | return fmt.Errorf("failed to insert data %v", err) 128 | } 129 | 130 | return nil 131 | } 132 | 133 | func (e *Exporter) insertSample(ctx context.Context, sample common.Sample) error { 134 | var samplePath string 135 | var fi os.FileInfo 136 | var err error 137 | // If sample has more than one path associated with it, take the first that is valid. 138 | for _, path := range sample.Paths { 139 | if fi, err = os.Stat(path); err == nil { 140 | samplePath = path 141 | break 142 | } 143 | } 144 | 145 | file, err := os.Open(samplePath) 146 | if err != nil { 147 | return fmt.Errorf("could not open %v", samplePath) 148 | } 149 | defer file.Close() 150 | 151 | mimeType, err := getFileContentType(file) 152 | if err != nil { 153 | glog.Warningf("Could not get file content type: %v", err) 154 | } 155 | 156 | fileOutput, err := fileCmdOutput(samplePath) 157 | if err != nil { 158 | glog.Warningf("Could not get file cmd output: %v", err) 159 | } 160 | 161 | fileOutput = strings.TrimPrefix(fileOutput, fmt.Sprintf("%s%s", samplePath, ":")) 162 | _, err = e.spannerClient.Apply(ctx, []*spanner.Mutation{ 163 | spanner.Insert("samples", 164 | []string{ 165 | "sha256", 166 | "mimetype", 167 | "file_output", 168 | "size"}, 169 | []interface{}{ 170 | sample.Sha256, 171 | mimeType, 172 | fileOutput, 173 | fi.Size(), 174 | })}) 175 | if spanner.ErrCode(err) != codes.AlreadyExists && err != nil { 176 | return fmt.Errorf("failed to insert data %v", err) 177 | } 178 | 179 | if e.uploadPayloads && sample.Upload { 180 | fmt.Println(samplePath) 181 | file, err := os.Open(samplePath) 182 | if err != nil { 183 | return fmt.Errorf("error while opening file: %v", err) 184 | } 185 | 186 | fi, err := file.Stat() 187 | if err != nil { 188 | return fmt.Errorf("error while opening file: %v", err) 189 | } 190 | 191 | fmt.Println(fi.Size()) 192 | 193 | name := fmt.Sprintf("%s/%s", strings.ToUpper(sample.Sha256[0:2]), strings.ToUpper(sample.Sha256)) 194 | object := &storage.Object{ 195 | Name: name, 196 | } 197 | 198 | _, err = e.storageClient.Objects.Insert(e.GCSBucket, object).Media(file).Do() 199 | if err != nil { 200 | return fmt.Errorf("error uploading data to GCS: %v", err) 201 | } 202 | 203 | _, err = e.spannerClient.Apply(ctx, []*spanner.Mutation{ 204 | spanner.Insert("payloads", 205 | []string{ 206 | "sha256", 207 | "gcs_path"}, 208 | []interface{}{ 209 | sample.Sha256, 210 | fmt.Sprintf("gs://%s/%s", e.GCSBucket, name), 211 | })}) 212 | if spanner.ErrCode(err) != codes.AlreadyExists && err != nil { 213 | return fmt.Errorf("failed to insert data %v", err) 214 | } 215 | } 216 | 217 | return nil 218 | } 219 | 220 | func (e *Exporter) insertSource(ctx context.Context, sourceHash, sourceID, sourcePath, sourceRepoName, sourceRepoPath, sourceDescription string) error { 221 | var sourceIDs []string 222 | 223 | sql := spanner.Statement{ 224 | SQL: `SELECT source_id FROM sources WHERE sha256 = @sha256`, 225 | Params: map[string]interface{}{ 226 | "sha256": sourceHash, 227 | }, 228 | } 229 | 230 | iter := e.spannerClient.Single().Query(ctx, sql) 231 | defer iter.Stop() 232 | row, err := iter.Next() 233 | if err != iterator.Done { 234 | if err := row.Columns(&sourceIDs); err != nil { 235 | return err 236 | } 237 | } 238 | if err != iterator.Done && err != nil { 239 | return err 240 | } 241 | 242 | _, err = e.spannerClient.Apply(ctx, []*spanner.Mutation{ 243 | spanner.InsertOrUpdate("sources", 244 | []string{ 245 | "sha256", 246 | "source_id", 247 | "source_path", 248 | "source_description", 249 | "repo_name", 250 | "repo_path"}, 251 | []interface{}{ 252 | sourceHash, 253 | append(sourceIDs, sourceID), 254 | sourcePath, 255 | sourceDescription, 256 | sourceRepoName, 257 | sourceRepoPath, 258 | })}) 259 | if err != nil { 260 | return fmt.Errorf("failed to insert data %v", err) 261 | } 262 | 263 | return nil 264 | } 265 | 266 | func getFileContentType(out *os.File) (string, error) { 267 | 268 | // Only the first 512 bytes are used to check the content type. 269 | buffer := make([]byte, 512) 270 | 271 | _, err := out.Read(buffer) 272 | if err != nil { 273 | return "", err 274 | } 275 | 276 | contentType := http.DetectContentType(buffer) 277 | 278 | return contentType, nil 279 | } 280 | 281 | func fileCmdOutput(filepath string) (string, error) { 282 | cmd := exec.Command("/usr/bin/file", filepath) 283 | var stdout, stderr bytes.Buffer 284 | cmd.Stdout = &stdout 285 | cmd.Stderr = &stderr 286 | 287 | err := cmd.Run() 288 | if err != nil { 289 | return "", fmt.Errorf("error while executing %s: %v\nStdout: %v\nStderr: %v", "/usr/bin/file", err, stdout.String(), stderr.String()) 290 | } 291 | 292 | return strings.TrimSuffix(stdout.String(), "\n"), nil 293 | } 294 | -------------------------------------------------------------------------------- /exporters/gcp/gcp_test.go: -------------------------------------------------------------------------------- 1 | package gcp 2 | 3 | import ( 4 | "context" 5 | "path/filepath" 6 | "testing" 7 | 8 | "github.com/golang/glog" 9 | "github.com/google/hashr/common" 10 | "google.golang.org/api/option" 11 | "google.golang.org/grpc" 12 | "google.golang.org/grpc/credentials/insecure" 13 | 14 | "cloud.google.com/go/spanner" 15 | database "cloud.google.com/go/spanner/admin/database/apiv1" 16 | instance "cloud.google.com/go/spanner/admin/instance/apiv1" 17 | 18 | dbadminpb "google.golang.org/genproto/googleapis/spanner/admin/database/v1" 19 | instancepb "google.golang.org/genproto/googleapis/spanner/admin/instance/v1" 20 | ) 21 | 22 | const ( 23 | samplesTable = ` 24 | CREATE TABLE samples ( 25 | sha256 STRING(100), 26 | mimetype STRING(MAX), 27 | file_output STRING(MAX), 28 | size INT64 29 | ) PRIMARY KEY(sha256)` 30 | 31 | payloadsTable = ` 32 | CREATE TABLE payloads ( 33 | sha256 STRING(100), 34 | gcs_path STRING(200) 35 | ) PRIMARY KEY(sha256)` 36 | 37 | sourcesTable = ` 38 | CREATE TABLE sources ( 39 | sha256 STRING(100), 40 | source_id ARRAY, 41 | source_path STRING(MAX), 42 | source_description STRING(MAX), 43 | repo_name STRING(MAX), 44 | repo_path STRING(MAX), 45 | ) PRIMARY KEY(sha256)` 46 | 47 | samplesSourcesTable = `CREATE TABLE samples_sources ( 48 | sample_sha256 STRING(100), 49 | source_sha256 STRING(100), 50 | sample_paths ARRAY, 51 | CONSTRAINT FK_Sample FOREIGN KEY (sample_sha256) REFERENCES samples (sha256), 52 | CONSTRAINT FK_Source FOREIGN KEY (source_sha256) REFERENCES sources (sha256), 53 | ) PRIMARY KEY (sample_sha256, source_sha256)` 54 | ) 55 | 56 | func TestExport(t *testing.T) { 57 | ctx := context.Background() 58 | 59 | o := []option.ClientOption{ 60 | option.WithEndpoint("localhost:9010"), 61 | option.WithoutAuthentication(), 62 | option.WithGRPCDialOption(grpc.WithTransportCredentials(insecure.NewCredentials())), 63 | } 64 | 65 | instanceAdmin, err := instance.NewInstanceAdminClient(ctx, o...) 66 | if err != nil { 67 | glog.Fatalf("error dialing instance admin: %v", err) 68 | } 69 | defer instanceAdmin.Close() 70 | 71 | if err := instanceAdmin.DeleteInstance(ctx, &instancepb.DeleteInstanceRequest{Name: "projects/hashr/instances/hashr"}); err != nil { 72 | glog.Warning(err) 73 | } 74 | 75 | op, err := instanceAdmin.CreateInstance(ctx, &instancepb.CreateInstanceRequest{ 76 | Parent: "projects/hashr", 77 | InstanceId: "hashr", 78 | Instance: &instancepb.Instance{ 79 | DisplayName: "hashr", 80 | NodeCount: 1, 81 | }, 82 | }) 83 | if err != nil { 84 | glog.Fatalf("error creating test spanner instance: %v", err) 85 | } 86 | if _, err := op.Wait(ctx); err != nil { 87 | glog.Fatalf("error creating test spanner instance: %v", err) 88 | } 89 | 90 | databaseAdmin, err := database.NewDatabaseAdminClient(ctx, o...) 91 | if err != nil { 92 | glog.Fatalf("error creating database admin client for emulator: %v", err) 93 | } 94 | 95 | dbURI := "projects/hashr/instances/hashr/databases/hashr" 96 | op2, err := databaseAdmin.CreateDatabase(ctx, &dbadminpb.CreateDatabaseRequest{ 97 | Parent: "projects/hashr/instances/hashr", 98 | CreateStatement: "CREATE DATABASE hashr", 99 | ExtraStatements: []string{samplesTable, sourcesTable, payloadsTable, samplesSourcesTable}, 100 | }) 101 | if err != nil { 102 | glog.Fatalf("error creating test DB %v: %v", dbURI, err) 103 | } 104 | if _, err = op2.Wait(ctx); err != nil { 105 | glog.Fatalf("error creating test DB %v: %v", dbURI, err) 106 | } 107 | 108 | spannerClient, err := spanner.NewClient(ctx, dbURI, o...) 109 | if err != nil { 110 | glog.Fatalf("error creating Spanner client %v: %v", dbURI, err) 111 | } 112 | 113 | exporter, err := NewExporter(spannerClient, nil, "gcs-bucket", false, 10) 114 | if err != nil { 115 | glog.Fatalf("error creating Cloud Spanner exporter: %v", err) 116 | } 117 | 118 | samples := []common.Sample{ 119 | { 120 | Sha256: "a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3", 121 | Paths: []string{filepath.Join("testdata/extraction", "file.01")}, 122 | Upload: true, 123 | }, 124 | { 125 | Sha256: "5c7a0f6e38f86f4db12130e5ca9f734f4def519b9a884ee8ea9fc45f9626c6fb", 126 | Paths: []string{filepath.Join("testdata/extraction", "file.02")}, 127 | Upload: true, 128 | }, 129 | { 130 | Sha256: "9ad2027cae0d7b0f041a6fc1e3124ad4046b2665068c44c74546ad9811e81ec7", 131 | Paths: []string{filepath.Join("testdata/extraction", "file.03")}, 132 | Upload: true, 133 | }, 134 | } 135 | 136 | if err := exporter.Export(ctx, "GCP", "ubuntu", "ubuntu-1604-lts", "07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc", "", "Official Ubuntu GCP image.", samples); err != nil { 137 | t.Fatalf("unexpected error while running Export() = %v", err) 138 | } 139 | 140 | } 141 | -------------------------------------------------------------------------------- /exporters/gcp/testdata/extraction/file.01: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/exporters/gcp/testdata/extraction/file.01 -------------------------------------------------------------------------------- /exporters/gcp/testdata/extraction/file.02: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/exporters/gcp/testdata/extraction/file.02 -------------------------------------------------------------------------------- /exporters/gcp/testdata/extraction/file.03: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/exporters/gcp/testdata/extraction/file.03 -------------------------------------------------------------------------------- /exporters/gcp/testdata/extraction/file.04: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/exporters/gcp/testdata/extraction/file.04 -------------------------------------------------------------------------------- /exporters/postgres/postgres_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package postgres 16 | 17 | import ( 18 | "context" 19 | "io" 20 | "os" 21 | "path/filepath" 22 | "testing" 23 | 24 | "github.com/google/hashr/common" 25 | 26 | "github.com/DATA-DOG/go-sqlmock" 27 | ) 28 | 29 | func TestExport(t *testing.T) { 30 | db, mock, err := sqlmock.New(sqlmock.QueryMatcherOption(sqlmock.QueryMatcherEqual)) 31 | if err != nil { 32 | t.Fatalf("could not open a stub database connection: %v", err) 33 | } 34 | defer db.Close() 35 | 36 | mock.ExpectQuery(`SELECT EXISTS ( SELECT 1 FROM information_schema.tables WHERE table_name=$1 );`).WithArgs("samples").WillReturnRows(mock.NewRows([]string{"t"}).AddRow("t")) 37 | mock.ExpectQuery(`SELECT EXISTS ( SELECT 1 FROM information_schema.tables WHERE table_name=$1 );`).WithArgs("payloads").WillReturnRows(mock.NewRows([]string{"t"}).AddRow("t")) 38 | mock.ExpectQuery(`SELECT EXISTS ( SELECT 1 FROM information_schema.tables WHERE table_name=$1 );`).WithArgs("sources").WillReturnRows(mock.NewRows([]string{"t"}).AddRow("t")) 39 | mock.ExpectQuery(`SELECT EXISTS ( SELECT 1 FROM information_schema.tables WHERE table_name=$1 );`).WithArgs("samples_sources").WillReturnRows(mock.NewRows([]string{"t"}).AddRow("t")) 40 | 41 | postgresExporter, err := NewExporter(db, false) 42 | if err != nil { 43 | t.Fatalf("could not create Postgres exporter: %v", err) 44 | } 45 | 46 | mock.ExpectQuery(`SELECT sha256 FROM sources WHERE sha256=$1;`).WithArgs("07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc").WillReturnRows(mock.NewRows([]string{"sha256"})) 47 | mock.ExpectExec(`INSERT INTO sources (sha256, sourceID, sourcePath, repoName, repoPath, sourceDescription) VALUES ($1, $2, $3, $4, $5, $6)`).WithArgs("07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc", `{"ubuntu-1604-lts"}`, "", "GCP", "ubuntu", "Official Ubuntu GCP image.").WillReturnResult(sqlmock.NewResult(1, 1)) 48 | 49 | mock.ExpectQuery(`SELECT sha256 FROM samples WHERE sha256=$1;`).WithArgs("a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3").WillReturnRows(mock.NewRows([]string{"sha256"})) 50 | mock.ExpectExec(`INSERT INTO samples (sha256, size, mimetype, file_output) VALUES ($1, $2, $3, $4)`).WithArgs("a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3", 8192, "application/octet-stream", " data").WillReturnResult(sqlmock.NewResult(1, 1)) 51 | mock.ExpectQuery("SELECT sample_sha256,source_sha256 FROM samples_sources WHERE sample_sha256=$1 AND source_sha256=$2;").WithArgs("a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3", "07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc").WillReturnRows(mock.NewRows([]string{"a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3", "07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc"})) 52 | mock.ExpectExec(`INSERT INTO samples_sources (sample_sha256, source_sha256, sample_paths) VALUES ($1, $2, $3)`).WithArgs("a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3", "07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc", `{"file.01"}`).WillReturnResult(sqlmock.NewResult(1, 1)) 53 | 54 | mock.ExpectQuery(`SELECT sha256 FROM samples WHERE sha256=$1;`).WithArgs("5c7a0f6e38f86f4db12130e5ca9f734f4def519b9a884ee8ea9fc45f9626c6fb").WillReturnRows(mock.NewRows([]string{"sha256"})) 55 | mock.ExpectExec(`INSERT INTO samples (sha256, size, mimetype, file_output) VALUES ($1, $2, $3, $4)`).WithArgs("5c7a0f6e38f86f4db12130e5ca9f734f4def519b9a884ee8ea9fc45f9626c6fb", 7168, "application/octet-stream", " data").WillReturnResult(sqlmock.NewResult(1, 1)) 56 | mock.ExpectQuery("SELECT sample_sha256,source_sha256 FROM samples_sources WHERE sample_sha256=$1 AND source_sha256=$2;").WithArgs("5c7a0f6e38f86f4db12130e5ca9f734f4def519b9a884ee8ea9fc45f9626c6fb", "07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc").WillReturnRows(mock.NewRows([]string{"5c7a0f6e38f86f4db12130e5ca9f734f4def519b9a884ee8ea9fc45f9626c6fb", "07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc"})) 57 | mock.ExpectExec(`INSERT INTO samples_sources (sample_sha256, source_sha256, sample_paths) VALUES ($1, $2, $3)`).WithArgs("5c7a0f6e38f86f4db12130e5ca9f734f4def519b9a884ee8ea9fc45f9626c6fb", "07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc", `{"file.02"}`).WillReturnResult(sqlmock.NewResult(1, 1)) 58 | 59 | mock.ExpectQuery(`SELECT sha256 FROM samples WHERE sha256=$1;`).WithArgs("9ad2027cae0d7b0f041a6fc1e3124ad4046b2665068c44c74546ad9811e81ec7").WillReturnRows(mock.NewRows([]string{"sha256"})) 60 | mock.ExpectExec(`INSERT INTO samples (sha256, size, mimetype, file_output) VALUES ($1, $2, $3, $4)`).WithArgs("9ad2027cae0d7b0f041a6fc1e3124ad4046b2665068c44c74546ad9811e81ec7", 5120, "application/octet-stream", " data").WillReturnResult(sqlmock.NewResult(1, 1)) 61 | mock.ExpectQuery("SELECT sample_sha256,source_sha256 FROM samples_sources WHERE sample_sha256=$1 AND source_sha256=$2;").WithArgs("9ad2027cae0d7b0f041a6fc1e3124ad4046b2665068c44c74546ad9811e81ec7", "07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc").WillReturnRows(mock.NewRows([]string{"9ad2027cae0d7b0f041a6fc1e3124ad4046b2665068c44c74546ad9811e81ec7", "07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc"})) 62 | mock.ExpectExec(`INSERT INTO samples_sources (sample_sha256, source_sha256, sample_paths) VALUES ($1, $2, $3)`).WithArgs("9ad2027cae0d7b0f041a6fc1e3124ad4046b2665068c44c74546ad9811e81ec7", "07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc", `{"file.03"}`).WillReturnResult(sqlmock.NewResult(1, 1)) 63 | 64 | tempDir := "/tmp/extracted/" 65 | if err := os.MkdirAll(tempDir, 0777); err != nil { 66 | t.Fatalf("Could not create temp extraction directory(%s): %v", tempDir, err) 67 | } 68 | 69 | // We need to copy the file to the tmp dir, otherwise we'll end up opening symlinks. 70 | for _, filename := range []string{"file.01", "file.02", "file.03"} { 71 | in, err := os.Open(filepath.Join("testdata/extraction", filename)) 72 | if err != nil { 73 | t.Fatal(err) 74 | } 75 | out, err := os.Create(filepath.Join(tempDir, filename)) 76 | if err != nil { 77 | t.Fatal(err) 78 | } 79 | _, err = io.Copy(out, in) 80 | if err != nil { 81 | t.Fatal(err) 82 | } 83 | out.Close() 84 | } 85 | 86 | samples := []common.Sample{ 87 | { 88 | Sha256: "a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3", 89 | Paths: []string{filepath.Join(tempDir, "file.01")}, 90 | Upload: true, 91 | }, 92 | { 93 | Sha256: "5c7a0f6e38f86f4db12130e5ca9f734f4def519b9a884ee8ea9fc45f9626c6fb", 94 | Paths: []string{filepath.Join(tempDir, "file.02")}, 95 | Upload: true, 96 | }, 97 | { 98 | Sha256: "9ad2027cae0d7b0f041a6fc1e3124ad4046b2665068c44c74546ad9811e81ec7", 99 | Paths: []string{filepath.Join(tempDir, "file.03")}, 100 | Upload: true, 101 | }, 102 | } 103 | 104 | if err := postgresExporter.Export(context.Background(), "GCP", "ubuntu", "ubuntu-1604-lts", "07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc", "", "Official Ubuntu GCP image.", samples); err != nil { 105 | t.Fatalf("unexpected error while running Export() = %v", err) 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /exporters/postgres/testdata/extraction/._file.01: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/exporters/postgres/testdata/extraction/._file.01 -------------------------------------------------------------------------------- /exporters/postgres/testdata/extraction/._file.02: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/exporters/postgres/testdata/extraction/._file.02 -------------------------------------------------------------------------------- /exporters/postgres/testdata/extraction/._file.03: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/exporters/postgres/testdata/extraction/._file.03 -------------------------------------------------------------------------------- /exporters/postgres/testdata/extraction/._file.04: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/exporters/postgres/testdata/extraction/._file.04 -------------------------------------------------------------------------------- /exporters/postgres/testdata/extraction/file.01: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/exporters/postgres/testdata/extraction/file.01 -------------------------------------------------------------------------------- /exporters/postgres/testdata/extraction/file.02: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/exporters/postgres/testdata/extraction/file.02 -------------------------------------------------------------------------------- /exporters/postgres/testdata/extraction/file.03: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/exporters/postgres/testdata/extraction/file.03 -------------------------------------------------------------------------------- /exporters/postgres/testdata/extraction/file.04: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/exporters/postgres/testdata/extraction/file.04 -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/google/hashr 2 | 3 | go 1.18 4 | 5 | require ( 6 | cloud.google.com/go/spanner v1.53.1 7 | github.com/DATA-DOG/go-sqlmock v1.5.0 8 | github.com/Microsoft/go-winio v0.6.1 9 | github.com/aws/aws-sdk-go-v2 v1.24.1 10 | github.com/aws/aws-sdk-go-v2/config v1.26.3 11 | github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.15.11 12 | github.com/aws/aws-sdk-go-v2/service/ec2 v1.144.0 13 | github.com/aws/aws-sdk-go-v2/service/s3 v1.48.0 14 | github.com/golang/glog v1.2.0 15 | github.com/google/go-cmp v0.6.0 16 | github.com/google/go-containerregistry v0.17.0 17 | github.com/hooklift/iso9660 v1.0.0 18 | github.com/lib/pq v1.10.9 19 | github.com/sassoftware/go-rpmutils v0.2.0 20 | golang.org/x/crypto v0.21.0 21 | golang.org/x/oauth2 v0.15.0 22 | google.golang.org/api v0.153.0 23 | google.golang.org/genproto v0.0.0-20231127180814-3a041ad873d4 24 | google.golang.org/grpc v1.59.0 25 | google.golang.org/protobuf v1.33.0 26 | pault.ag/go/debian v0.16.0 27 | ) 28 | 29 | require ( 30 | cloud.google.com/go v0.111.0 // indirect 31 | cloud.google.com/go/compute v1.23.3 // indirect 32 | cloud.google.com/go/compute/metadata v0.2.3 // indirect 33 | cloud.google.com/go/iam v1.1.5 // indirect 34 | cloud.google.com/go/longrunning v0.5.4 // indirect 35 | github.com/DataDog/zstd v1.5.5 // indirect 36 | github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.5.4 // indirect 37 | github.com/aws/aws-sdk-go-v2/credentials v1.16.14 // indirect 38 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.11 // indirect 39 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.10 // indirect 40 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.10 // indirect 41 | github.com/aws/aws-sdk-go-v2/internal/ini v1.7.2 // indirect 42 | github.com/aws/aws-sdk-go-v2/internal/v4a v1.2.10 // indirect 43 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.10.4 // indirect 44 | github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.2.10 // indirect 45 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.10.10 // indirect 46 | github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.16.10 // indirect 47 | github.com/aws/aws-sdk-go-v2/service/sso v1.18.6 // indirect 48 | github.com/aws/aws-sdk-go-v2/service/ssooidc v1.21.6 // indirect 49 | github.com/aws/aws-sdk-go-v2/service/sts v1.26.7 // indirect 50 | github.com/aws/smithy-go v1.19.0 // indirect 51 | github.com/c4milo/gotoolkit v0.0.0-20190525173301-67483a18c17a // indirect 52 | github.com/census-instrumentation/opencensus-proto v0.4.1 // indirect 53 | github.com/cespare/xxhash/v2 v2.2.0 // indirect 54 | github.com/cncf/udpa/go v0.0.0-20220112060539-c52dc94e7fbe // indirect 55 | github.com/cncf/xds/go v0.0.0-20231128003011-0fa0005c9caa // indirect 56 | github.com/containerd/stargz-snapshotter/estargz v0.15.1 // indirect 57 | github.com/docker/cli v24.0.7+incompatible // indirect 58 | github.com/docker/distribution v2.8.3+incompatible // indirect 59 | github.com/docker/docker v24.0.9+incompatible // indirect 60 | github.com/docker/docker-credential-helpers v0.8.0 // indirect 61 | github.com/envoyproxy/go-control-plane v0.11.1 // indirect 62 | github.com/envoyproxy/protoc-gen-validate v1.0.2 // indirect 63 | github.com/go-logr/logr v1.3.0 // indirect 64 | github.com/go-logr/stdr v1.2.2 // indirect 65 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect 66 | github.com/golang/protobuf v1.5.3 // indirect 67 | github.com/google/s2a-go v0.1.7 // indirect 68 | github.com/google/uuid v1.4.0 // indirect 69 | github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect 70 | github.com/googleapis/gax-go/v2 v2.12.0 // indirect 71 | github.com/hooklift/assert v0.1.0 // indirect 72 | github.com/jmespath/go-jmespath v0.4.0 // indirect 73 | github.com/kjk/lzma v0.0.0-20161016003348-3fd93898850d // indirect 74 | github.com/klauspost/compress v1.17.4 // indirect 75 | github.com/mitchellh/go-homedir v1.1.0 // indirect 76 | github.com/opencontainers/go-digest v1.0.0 // indirect 77 | github.com/opencontainers/image-spec v1.1.0-rc5 // indirect 78 | github.com/pkg/errors v0.9.1 // indirect 79 | github.com/sirupsen/logrus v1.9.3 // indirect 80 | github.com/ulikunitz/xz v0.5.11 // indirect 81 | github.com/vbatts/tar-split v0.11.5 // indirect 82 | github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect 83 | go.opencensus.io v0.24.0 // indirect 84 | go.opentelemetry.io/otel v1.21.0 // indirect 85 | go.opentelemetry.io/otel/metric v1.21.0 // indirect 86 | go.opentelemetry.io/otel/trace v1.21.0 // indirect 87 | golang.org/x/net v0.23.0 // indirect 88 | golang.org/x/sync v0.5.0 // indirect 89 | golang.org/x/sys v0.18.0 // indirect 90 | golang.org/x/text v0.14.0 // indirect 91 | golang.org/x/time v0.5.0 // indirect 92 | golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect 93 | google.golang.org/appengine v1.6.8 // indirect 94 | google.golang.org/genproto/googleapis/api v0.0.0-20231127180814-3a041ad873d4 // indirect 95 | google.golang.org/genproto/googleapis/rpc v0.0.0-20231127180814-3a041ad873d4 // indirect 96 | pault.ag/go/topsort v0.1.1 // indirect 97 | ) 98 | -------------------------------------------------------------------------------- /importers/aws/aws_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package aws implements AWS repository importer unit tests. 16 | 17 | package aws 18 | 19 | import ( 20 | "bytes" 21 | "context" 22 | "strconv" 23 | "testing" 24 | 25 | "github.com/aws/aws-sdk-go-v2/aws" 26 | "github.com/aws/aws-sdk-go-v2/service/ec2" 27 | "github.com/aws/aws-sdk-go-v2/service/ec2/types" 28 | ) 29 | 30 | type mockDescribeImagesAPI func(ctx context.Context, params *ec2.DescribeImagesInput, optFns ...func(*ec2.Options)) (*ec2.DescribeImagesOutput, error) 31 | 32 | func (m mockDescribeImagesAPI) DescribeImages(ctx context.Context, params *ec2.DescribeImagesInput, optFns ...func(*ec2.Options)) (*ec2.DescribeImagesOutput, error) { 33 | return m(ctx, params, optFns...) 34 | } 35 | 36 | func TestDiscoveryRepo(t *testing.T) { 37 | cases := []struct { 38 | client func(t *testing.T) ec2DescribeImagesAPI 39 | architecture []string 40 | expect []byte 41 | }{ 42 | { 43 | client: func(t *testing.T) ec2DescribeImagesAPI { 44 | return mockDescribeImagesAPI(func(ctx context.Context, params *ec2.DescribeImagesInput, optFns ...func(*ec2.Options)) (*ec2.DescribeImagesOutput, error) { 45 | t.Helper() 46 | 47 | return &ec2.DescribeImagesOutput{ 48 | Images: []types.Image{ 49 | { 50 | ImageId: aws.String("ami-sample"), 51 | }, 52 | }, 53 | }, nil 54 | }) 55 | }, 56 | architecture: []string{"x86_64"}, 57 | expect: []byte("ami-sample"), 58 | }, 59 | } 60 | 61 | for i, tt := range cases { 62 | t.Run(strconv.Itoa(i), func(t *testing.T) { 63 | ctx := context.TODO() 64 | images, err := getAmazonImages(ctx, tt.client(t), tt.architecture) 65 | if err != nil { 66 | t.Fatalf("expect no error, got %v", err) 67 | } 68 | if e, a := tt.expect, []byte(*images[0].ImageId); bytes.Compare(e, a) != 0 { 69 | t.Errorf("expect %v, got %v", e, a) 70 | } 71 | }) 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /importers/common/common.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package common provides common functions used by hashR importers. 16 | package common 17 | 18 | import ( 19 | "archive/tar" 20 | "compress/gzip" 21 | "fmt" 22 | "io" 23 | "io/ioutil" 24 | "os" 25 | "path" 26 | "path/filepath" 27 | "strings" 28 | 29 | "github.com/golang/glog" 30 | ) 31 | 32 | // ExtractTarGz extracts tar.gz file to given output folder. If directory does not exist, it will 33 | // be created. 34 | func ExtractTarGz(tarGzPath, outputFolder string) error { 35 | if _, err := os.Stat(outputFolder); os.IsNotExist(err) { 36 | if err2 := os.MkdirAll(outputFolder, 0755); err2 != nil { 37 | return fmt.Errorf("error while creating target directory: %v", err2) 38 | } 39 | } 40 | 41 | gzFile, err := os.Open(tarGzPath) 42 | if err != nil { 43 | return err 44 | } 45 | defer gzFile.Close() 46 | 47 | gzReader, err := gzip.NewReader(gzFile) 48 | if err != nil { 49 | return err 50 | } 51 | 52 | tarReader := tar.NewReader(gzReader) 53 | 54 | glog.Infof("Extracting %s to %s", tarGzPath, outputFolder) 55 | 56 | for { 57 | header, err := tarReader.Next() 58 | 59 | switch { 60 | case err == io.EOF: 61 | return nil 62 | case err != nil: 63 | return err 64 | } 65 | 66 | if containsDotDot(header.Name) { 67 | glog.Warningf("not extracting %s, potential path traversal", header.Name) 68 | continue 69 | } 70 | destEntry := filepath.Join(outputFolder, header.Name) 71 | 72 | switch header.Typeflag { 73 | case tar.TypeDir: 74 | if _, err := os.Stat(destEntry); os.IsNotExist(err) { 75 | if err := os.MkdirAll(destEntry, 0755); err != nil { 76 | return fmt.Errorf("error while creating destination directory: %v", err) 77 | } 78 | } 79 | case tar.TypeReg: 80 | if _, err := os.Stat(filepath.Dir(destEntry)); os.IsNotExist(err) { 81 | if err := os.MkdirAll(filepath.Dir(destEntry), 0755); err != nil { 82 | return fmt.Errorf("error while creating destination directory: %v", err) 83 | } 84 | } 85 | 86 | destFile, err := os.Create(destEntry) 87 | if err != nil { 88 | return fmt.Errorf("error while creating destination file: %v", err) 89 | } 90 | 91 | _, err = io.Copy(destFile, tarReader) 92 | if err != nil { 93 | return fmt.Errorf("error while extracting destination file: %v", err) 94 | } 95 | destFile.Close() 96 | } 97 | } 98 | } 99 | 100 | func containsDotDot(v string) bool { 101 | if !strings.Contains(v, "..") { 102 | return false 103 | } 104 | for _, ent := range strings.FieldsFunc(v, isSlashRune) { 105 | if ent == ".." { 106 | return true 107 | } 108 | } 109 | return false 110 | } 111 | 112 | func isSlashRune(r rune) bool { return r == '/' || r == '\\' } 113 | 114 | // LocalTempDir creates local temporary directory. 115 | func LocalTempDir(sourceID string) (string, error) { 116 | tempDir, err := ioutil.TempDir("", fmt.Sprintf("hashr-%s-", sourceID)) 117 | if err != nil { 118 | return "", err 119 | } 120 | 121 | return tempDir, nil 122 | } 123 | 124 | // CopyToLocal copies a source to a local file system. 125 | func CopyToLocal(remotePath, sourceID string) (string, error) { 126 | tempDir, err := LocalTempDir(sourceID) 127 | if err != nil { 128 | return "", err 129 | } 130 | 131 | sourceFile, err := os.Open(remotePath) 132 | if err != nil { 133 | return "", err 134 | } 135 | 136 | destPath := path.Join(tempDir, filepath.Base(remotePath)) 137 | destFile, err := os.Create(destPath) 138 | if err != nil { 139 | return destPath, err 140 | } 141 | 142 | glog.Infof("Copying %s to %s", sourceID, destPath) 143 | 144 | _, err = io.Copy(destFile, sourceFile) 145 | if err != nil { 146 | return destPath, err 147 | } 148 | 149 | glog.Infof("Done copying %s", sourceID) 150 | return destPath, nil 151 | } 152 | -------------------------------------------------------------------------------- /importers/common/testdata/targz/dir1/desktop.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/common/testdata/targz/dir1/desktop.tar.gz -------------------------------------------------------------------------------- /importers/common/testdata/targz/dir1/laptop.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/common/testdata/targz/dir1/laptop.tar.gz -------------------------------------------------------------------------------- /importers/common/testdata/targz/dir1/server.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/common/testdata/targz/dir1/server.tar.gz -------------------------------------------------------------------------------- /importers/common/testdata/targz/dir2/desktop.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/common/testdata/targz/dir2/desktop.tar.gz -------------------------------------------------------------------------------- /importers/common/testdata/targz/dir2/laptop.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/common/testdata/targz/dir2/laptop.tar.gz -------------------------------------------------------------------------------- /importers/common/testdata/targz/dir2/server.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/common/testdata/targz/dir2/server.tar.gz -------------------------------------------------------------------------------- /importers/common/testdata/targz/dir3/desktop.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/common/testdata/targz/dir3/desktop.tar.gz -------------------------------------------------------------------------------- /importers/common/testdata/targz/dir3/laptop.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/common/testdata/targz/dir3/laptop.tar.gz -------------------------------------------------------------------------------- /importers/common/testdata/targz/dir3/server.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/common/testdata/targz/dir3/server.tar.gz -------------------------------------------------------------------------------- /importers/common/testdata/targz/dir4/desktop.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/common/testdata/targz/dir4/desktop.tar.gz -------------------------------------------------------------------------------- /importers/common/testdata/targz/dir4/laptop.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/common/testdata/targz/dir4/laptop.tar.gz -------------------------------------------------------------------------------- /importers/common/testdata/targz/dir4/server.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/common/testdata/targz/dir4/server.tar.gz -------------------------------------------------------------------------------- /importers/deb/deb.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package deb implements deb package importer. 16 | package deb 17 | 18 | import ( 19 | "archive/tar" 20 | "crypto/sha256" 21 | "fmt" 22 | "io" 23 | "os" 24 | "path/filepath" 25 | "strings" 26 | 27 | "github.com/golang/glog" 28 | 29 | "github.com/google/hashr/core/hashr" 30 | "github.com/google/hashr/importers/common" 31 | 32 | "pault.ag/go/debian/deb" 33 | ) 34 | 35 | const ( 36 | // RepoName contains the repository name. 37 | RepoName = "deb" 38 | chunkSize = 1024 * 1024 * 10 // 10MB 39 | ) 40 | 41 | // Archive holds data related to deb archive. 42 | type Archive struct { 43 | filename string 44 | remotePath string 45 | localPath string 46 | quickSha256hash string 47 | repoPath string 48 | } 49 | 50 | func isSubElem(parent, sub string) (bool, error) { 51 | up := ".." + string(os.PathSeparator) 52 | 53 | // path-comparisons using filepath.Abs don't work reliably according to docs (no unique representation). 54 | rel, err := filepath.Rel(parent, sub) 55 | if err != nil { 56 | return false, err 57 | } 58 | if !strings.HasPrefix(rel, up) && rel != ".." { 59 | return true, nil 60 | } 61 | return false, nil 62 | } 63 | 64 | func extractTar(tarfile *tar.Reader, outputFolder string) error { 65 | for { 66 | header, err := tarfile.Next() 67 | 68 | if err == io.EOF { 69 | break 70 | } 71 | 72 | if err != nil { 73 | return fmt.Errorf("error while unpacking deb package: %v", err) 74 | } 75 | 76 | name := header.Name 77 | 78 | switch header.Typeflag { 79 | case tar.TypeSymlink: 80 | continue 81 | 82 | case tar.TypeDir: 83 | continue 84 | 85 | case tar.TypeRegA: 86 | case tar.TypeReg: 87 | unpackPath := filepath.Join(outputFolder, name) 88 | unpackFolder := filepath.Dir(unpackPath) 89 | if _, err := os.Stat(unpackFolder); os.IsNotExist(err) { 90 | if err2 := os.MkdirAll(unpackFolder, 0755); err2 != nil { 91 | return fmt.Errorf("error while creating target directory: %v", err2) 92 | } 93 | } 94 | 95 | fileIsSubelem, err := isSubElem(outputFolder, unpackPath) 96 | if err != nil || !fileIsSubelem { 97 | return fmt.Errorf("error, deb package tried to unpack file above parent") 98 | } 99 | 100 | unpackFileHandle, err := os.Create(unpackPath) 101 | if err != nil { 102 | return fmt.Errorf("error while creating destination file: %v", err) 103 | } 104 | defer unpackFileHandle.Close() 105 | _, err = io.Copy(unpackFileHandle, tarfile) 106 | if err != nil { 107 | return fmt.Errorf("error while writing to destination file: %v", err) 108 | } 109 | 110 | default: 111 | fmt.Printf("Unknown tar entry type: %c in file %s\n", header.Typeflag, name) 112 | } 113 | } 114 | 115 | return nil 116 | } 117 | 118 | func extractDeb(debPath, outputFolder string) error { 119 | if _, err := os.Stat(outputFolder); os.IsNotExist(err) { 120 | if err2 := os.MkdirAll(outputFolder, 0755); err2 != nil { 121 | return fmt.Errorf("error while creating target directory: %v", err2) 122 | } 123 | } 124 | 125 | fd, err := os.Open(debPath) 126 | if err != nil { 127 | return fmt.Errorf("failed to open deb file: %v", err) 128 | } 129 | defer fd.Close() 130 | 131 | debFile, err := deb.Load(fd, debPath) 132 | if err != nil { 133 | return fmt.Errorf("failed to parse deb file: %v", err) 134 | } 135 | 136 | err = extractTar(debFile.Data, outputFolder) 137 | if err != nil { 138 | return err 139 | } 140 | 141 | return nil 142 | } 143 | 144 | // Preprocess extracts the contents of a .deb file. 145 | func (a *Archive) Preprocess() (string, error) { 146 | var err error 147 | a.localPath, err = common.CopyToLocal(a.remotePath, a.ID()) 148 | if err != nil { 149 | return "", fmt.Errorf("error while copying %s to local file system: %v", a.remotePath, err) 150 | } 151 | 152 | baseDir, _ := filepath.Split(a.localPath) 153 | extractionDir := filepath.Join(baseDir, "extracted") 154 | 155 | if err := extractDeb(a.localPath, extractionDir); err != nil { 156 | return "", err 157 | } 158 | 159 | return extractionDir, nil 160 | } 161 | 162 | // ID returns non-unique deb Archive ID. 163 | func (a *Archive) ID() string { 164 | return a.filename 165 | } 166 | 167 | // RepoName returns repository name. 168 | func (a *Archive) RepoName() string { 169 | return RepoName 170 | } 171 | 172 | // RepoPath returns repository path. 173 | func (a *Archive) RepoPath() string { 174 | return a.repoPath 175 | } 176 | 177 | // LocalPath returns local path to a deb Archive .deb file. 178 | func (a *Archive) LocalPath() string { 179 | return a.localPath 180 | } 181 | 182 | // RemotePath returns non-local path to a deb Archive .deb file. 183 | func (a *Archive) RemotePath() string { 184 | return a.remotePath 185 | } 186 | 187 | // Description provides additional description for a .deb file. 188 | func (a *Archive) Description() string { 189 | return "" 190 | } 191 | 192 | // QuickSHA256Hash calculates sha256 hash of .deb file. 193 | func (a *Archive) QuickSHA256Hash() (string, error) { 194 | // Check if the quick hash was already calculated. 195 | if a.quickSha256hash != "" { 196 | return a.quickSha256hash, nil 197 | } 198 | 199 | f, err := os.Open(a.remotePath) 200 | if err != nil { 201 | return "", err 202 | } 203 | defer f.Close() 204 | 205 | fileInfo, err := f.Stat() 206 | if err != nil { 207 | return "", err 208 | } 209 | 210 | // Check if the file is smaller than 20MB, if so hash the whole file. 211 | if fileInfo.Size() < int64(chunkSize*2) { 212 | h := sha256.New() 213 | if _, err := io.Copy(h, f); err != nil { 214 | return "", err 215 | } 216 | a.quickSha256hash = fmt.Sprintf("%x", h.Sum(nil)) 217 | return a.quickSha256hash, nil 218 | } 219 | 220 | header := make([]byte, chunkSize) 221 | _, err = f.Read(header) 222 | if err != nil { 223 | return "", err 224 | } 225 | 226 | footer := make([]byte, chunkSize) 227 | _, err = f.ReadAt(footer, fileInfo.Size()-int64(chunkSize)) 228 | if err != nil { 229 | return "", err 230 | } 231 | 232 | a.quickSha256hash = fmt.Sprintf("%x", sha256.Sum256(append(header, footer...))) 233 | return a.quickSha256hash, nil 234 | } 235 | 236 | // NewRepo returns new instance of deb repository. 237 | func NewRepo(path string) *Repo { 238 | return &Repo{location: path} 239 | } 240 | 241 | // Repo holds data related to a deb repository. 242 | type Repo struct { 243 | location string 244 | files []string 245 | Archives []*Archive 246 | } 247 | 248 | // RepoName returns repository name. 249 | func (r *Repo) RepoName() string { 250 | return RepoName 251 | } 252 | 253 | // RepoPath returns repository path. 254 | func (r *Repo) RepoPath() string { 255 | return r.location 256 | } 257 | 258 | // DiscoverRepo traverses the repository and looks for files that are related to deb archives. 259 | func (r *Repo) DiscoverRepo() ([]hashr.Source, error) { 260 | 261 | if err := filepath.Walk(r.location, walk(&r.files)); err != nil { 262 | return nil, err 263 | } 264 | 265 | for _, file := range r.files { 266 | _, filename := filepath.Split(file) 267 | 268 | if strings.HasSuffix(filename, ".deb") { 269 | r.Archives = append(r.Archives, &Archive{filename: filename, remotePath: file, repoPath: r.location}) 270 | } 271 | } 272 | 273 | var sources []hashr.Source 274 | for _, Archive := range r.Archives { 275 | sources = append(sources, Archive) 276 | } 277 | 278 | return sources, nil 279 | } 280 | 281 | func walk(files *[]string) filepath.WalkFunc { 282 | return func(path string, info os.FileInfo, err error) error { 283 | if err != nil { 284 | glog.Errorf("Could not open %s: %v", path, err) 285 | return nil 286 | } 287 | if info.IsDir() { 288 | return nil 289 | } 290 | if strings.HasSuffix(info.Name(), ".deb") { 291 | *files = append(*files, path) 292 | } 293 | 294 | return nil 295 | } 296 | } 297 | -------------------------------------------------------------------------------- /importers/deb/generate_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/zsh 2 | 3 | compressions=("gzip" "xz" "zstd" "none") 4 | i=0 5 | 6 | for tar in $(find -name '*.tar.gz'); do 7 | echo "$tar" 8 | filename=$(basename "$tar") 9 | tardir=$(dirname "$tar") 10 | tempdir=$(mktemp -d) 11 | tar -C "$tempdir" -xf "$tar" 12 | mkdir -p "$tempdir/DEBIAN" 13 | 14 | cat < "$tempdir/DEBIAN/control" 15 | Package: hashr-testdata 16 | Version: 1.0 17 | Architecture: arm64 18 | Maintainer: Example 19 | Description: This text does not matter. 20 | EOF 21 | dpkg-deb -Z${compressions[$(expr $i % 4)+1]} --build --root-owner-group "$tempdir" 22 | rm -r "$tempdir" 23 | cp "$tempdir.deb" "$tardir/$(echo "$filename" | sed 's/.tar.gz/.deb/g')" 24 | rm "$tar" 25 | i=$(expr $i + 1) 26 | done 27 | -------------------------------------------------------------------------------- /importers/deb/testdata/20200106.00.00/ubuntu-desktop.deb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/deb/testdata/20200106.00.00/ubuntu-desktop.deb -------------------------------------------------------------------------------- /importers/deb/testdata/20200106.00.00/ubuntu-laptop.deb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/deb/testdata/20200106.00.00/ubuntu-laptop.deb -------------------------------------------------------------------------------- /importers/deb/testdata/20200106.00.00/ubuntu-server.deb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/deb/testdata/20200106.00.00/ubuntu-server.deb -------------------------------------------------------------------------------- /importers/deb/testdata/20200107.00.00/ubuntu-desktop.deb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/deb/testdata/20200107.00.00/ubuntu-desktop.deb -------------------------------------------------------------------------------- /importers/deb/testdata/20200107.00.00/ubuntu-laptop.deb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/deb/testdata/20200107.00.00/ubuntu-laptop.deb -------------------------------------------------------------------------------- /importers/deb/testdata/20200107.00.00/ubuntu-server.deb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/deb/testdata/20200107.00.00/ubuntu-server.deb -------------------------------------------------------------------------------- /importers/deb/testdata/20200107.01.00/ubuntu-desktop.deb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/deb/testdata/20200107.01.00/ubuntu-desktop.deb -------------------------------------------------------------------------------- /importers/deb/testdata/20200107.01.00/ubuntu-laptop.deb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/deb/testdata/20200107.01.00/ubuntu-laptop.deb -------------------------------------------------------------------------------- /importers/deb/testdata/20200107.01.00/ubuntu-server.deb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/deb/testdata/20200107.01.00/ubuntu-server.deb -------------------------------------------------------------------------------- /importers/deb/testdata/20200108.00.00/ubuntu-desktop.deb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/deb/testdata/20200108.00.00/ubuntu-desktop.deb -------------------------------------------------------------------------------- /importers/deb/testdata/20200108.00.00/ubuntu-laptop.deb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/deb/testdata/20200108.00.00/ubuntu-laptop.deb -------------------------------------------------------------------------------- /importers/deb/testdata/20200108.00.00/ubuntu-server.deb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/deb/testdata/20200108.00.00/ubuntu-server.deb -------------------------------------------------------------------------------- /importers/gcp/testdata/._ubuntu-1804-lts-drawfork-v20190613.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/gcp/testdata/._ubuntu-1804-lts-drawfork-v20190613.tar.gz -------------------------------------------------------------------------------- /importers/gcp/testdata/ubuntu-1804-lts-drawfork-v20190613.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/gcp/testdata/ubuntu-1804-lts-drawfork-v20190613.tar.gz -------------------------------------------------------------------------------- /importers/gcr/gcr.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package gcr implements Google Container Repository importer. 16 | package gcr 17 | 18 | import ( 19 | "archive/tar" 20 | "compress/gzip" 21 | "context" 22 | "fmt" 23 | "io" 24 | "os" 25 | "path/filepath" 26 | "strings" 27 | 28 | "github.com/golang/glog" 29 | "github.com/google/hashr/core/hashr" 30 | "github.com/google/hashr/importers/common" 31 | 32 | "github.com/google/go-containerregistry/pkg/authn" 33 | "github.com/google/go-containerregistry/pkg/crane" 34 | "github.com/google/go-containerregistry/pkg/name" 35 | "github.com/google/go-containerregistry/pkg/v1/google" 36 | "github.com/google/go-containerregistry/pkg/v1/remote" 37 | 38 | "golang.org/x/oauth2" 39 | ) 40 | 41 | const ( 42 | // RepoName contains the repository name. 43 | RepoName = "gcr" 44 | ) 45 | 46 | var ( 47 | auth authn.Authenticator 48 | opts google.Option 49 | remoteOpts []remote.Option 50 | ) 51 | 52 | // Preprocess extracts the contents of GCR image. 53 | func (i *image) Preprocess() (string, error) { 54 | imgID := fmt.Sprintf("%s@sha256:%s", i.id, i.quickHash) 55 | ref, err := name.ParseReference(imgID, name.StrictValidation) 56 | if err != nil { 57 | return "", fmt.Errorf("error parsing reference from image %q: %v", imgID, err) 58 | } 59 | 60 | fmt.Println(remoteOpts) 61 | // remote.Image(ref, ) 62 | // img, err := remote.Image(ref, remote.WithAuth(auth)) 63 | img, err := remote.Image(ref, remoteOpts...) 64 | if err != nil { 65 | return "", fmt.Errorf("error retrieving src image %q: %v", imgID, err) 66 | } 67 | 68 | layers, err := img.Layers() 69 | if err != nil { 70 | return "", fmt.Errorf("error retrieving layers from image %q: %v", imgID, err) 71 | } 72 | 73 | tmpDir, err := common.LocalTempDir(strings.ReplaceAll(i.id, string(os.PathSeparator), "-")) 74 | if err != nil { 75 | return "", fmt.Errorf("error creating temp dir: %v", err) 76 | } 77 | 78 | i.localPath = filepath.Join(tmpDir, fmt.Sprintf("%s.tar", strings.ReplaceAll(imgID, "/", "_"))) 79 | 80 | if err := crane.Save(img, imgID, i.localPath); err != nil { 81 | return "", fmt.Errorf("error saving src image %q: %v", imgID, err) 82 | } 83 | 84 | for id, layer := range layers { 85 | hash, err := layer.Digest() 86 | if err != nil { 87 | return "", fmt.Errorf("error retrieving hash layer: %v", err) 88 | } 89 | 90 | r, err := layer.Compressed() 91 | if err != nil { 92 | return "", fmt.Errorf("error downloading layer %d: %v", id, err) 93 | } 94 | 95 | destFolder := filepath.Join(tmpDir, "extracted", hash.Hex) 96 | 97 | if err := extractTarGz(r, destFolder); err != nil { 98 | return "", fmt.Errorf("error extracting layer %d: %v", id, err) 99 | } 100 | 101 | if err := r.Close(); err != nil { 102 | return "", fmt.Errorf("error closing download for layer %d: %v", id, err) 103 | } 104 | } 105 | 106 | return filepath.Join(tmpDir, "extracted"), nil 107 | } 108 | 109 | // ID returns non-unique GCR image ID. 110 | func (i *image) ID() string { 111 | return fmt.Sprintf("%s@sha256:%s", i.id, i.quickHash) 112 | } 113 | 114 | // RepoName returns repository name. 115 | func (i *image) RepoName() string { 116 | return RepoName 117 | } 118 | 119 | // RepoPath returns repository path. 120 | func (i *image) RepoPath() string { 121 | return "" 122 | } 123 | 124 | // LocalPath returns local path to a GCR image. 125 | func (i *image) LocalPath() string { 126 | return i.localPath 127 | } 128 | 129 | // RemotePath returns remote path to a GCR image. 130 | func (i *image) RemotePath() string { 131 | return i.remotePath 132 | } 133 | 134 | // QuickSHA256Hash return sha256 hash of a GCR image. 135 | func (i *image) QuickSHA256Hash() (string, error) { 136 | return i.quickHash, nil 137 | } 138 | 139 | // Description provides additional description for GCP image. 140 | func (i *image) Description() string { 141 | return i.description 142 | } 143 | 144 | // NewRepo returns new instance of a GCR repository. 145 | func NewRepo(ctx context.Context, oauth2Token oauth2.TokenSource, repositoryPath string) (*Repo, error) { 146 | repo, err := name.NewRepository(repositoryPath) 147 | if err != nil { 148 | return nil, fmt.Errorf("could not create a new Container Registry repository: %v", err) 149 | } 150 | 151 | auth = google.NewTokenSourceAuthenticator(oauth2Token) 152 | opts = google.WithAuth(auth) 153 | remoteOpts = append(remoteOpts, remote.WithAuth(auth)) 154 | 155 | return &Repo{path: repositoryPath, gcr: repo}, nil 156 | } 157 | 158 | // Repo holds data related to a GCR repository. 159 | type Repo struct { 160 | path string 161 | gcr name.Repository 162 | images []*image 163 | } 164 | 165 | // RepoName returns repository name. 166 | func (r *Repo) RepoName() string { 167 | return RepoName 168 | } 169 | 170 | // RepoPath returns repository path. 171 | func (r *Repo) RepoPath() string { 172 | return r.path 173 | } 174 | 175 | // DiscoverRepo traverses the GCR repository and return supported images. 176 | func (r *Repo) DiscoverRepo() ([]hashr.Source, error) { 177 | if err := google.Walk(r.gcr, discoverImages(&r.images), opts); err != nil { 178 | return nil, fmt.Errorf("error while discovering %s GCR repository: %v", r.path, err) 179 | } 180 | 181 | var sources []hashr.Source 182 | for _, image := range r.images { 183 | sources = append(sources, image) 184 | } 185 | 186 | return sources, nil 187 | } 188 | 189 | type image struct { 190 | id string 191 | localPath string 192 | remotePath string 193 | quickHash string 194 | description string 195 | } 196 | 197 | func supportedMedia(mediaType string) bool { 198 | unsupportedMediaTypes := []string{ 199 | "application/vnd.docker.distribution.manifest.v1+json", 200 | "application/vnd.docker.distribution.manifest.v1+prettyjws", 201 | "application/vnd.oci.image.manifest.v1+json", 202 | } 203 | 204 | for _, unsupportedMediaType := range unsupportedMediaTypes { 205 | if strings.EqualFold(mediaType, unsupportedMediaType) { 206 | return false 207 | } 208 | } 209 | 210 | return true 211 | } 212 | 213 | func discoverImages(images *[]*image) google.WalkFunc { 214 | return func(repo name.Repository, tags *google.Tags, err error) error { 215 | if err != nil { 216 | return err 217 | } 218 | 219 | for digest, manifest := range tags.Manifests { 220 | if !supportedMedia(manifest.MediaType) { 221 | continue 222 | } 223 | 224 | if !strings.Contains(digest, "sha256:") { 225 | return fmt.Errorf("image digest is not in expected format: %s", digest) 226 | } 227 | 228 | parts := strings.Split(digest, ":") 229 | if len(parts[1]) != 64 { 230 | return fmt.Errorf("image digest is not in expected format: %s", digest) 231 | } 232 | 233 | *images = append(*images, &image{ 234 | id: repo.Name(), 235 | quickHash: parts[1], 236 | remotePath: repo.Name(), 237 | description: fmt.Sprintf("Tags: %s, Media Type: %s, Created on: %s, Uploaded on: %s", manifest.Tags, manifest.MediaType, manifest.Created.UTC().String(), manifest.Uploaded.UTC().String()), 238 | }) 239 | } 240 | 241 | return nil 242 | } 243 | } 244 | 245 | func extractTarGz(r io.Reader, outputFolder string) error { 246 | if _, err := os.Stat(outputFolder); os.IsNotExist(err) { 247 | if err2 := os.MkdirAll(outputFolder, 0755); err2 != nil { 248 | return fmt.Errorf("error while creating target directory: %v", err2) 249 | } 250 | } 251 | 252 | gzReader, err := gzip.NewReader(r) 253 | if err != nil { 254 | return err 255 | } 256 | 257 | tarReader := tar.NewReader(gzReader) 258 | 259 | glog.Infof("Extracting to %s", outputFolder) 260 | 261 | for { 262 | header, err := tarReader.Next() 263 | 264 | switch { 265 | case err == io.EOF: 266 | return nil 267 | case err != nil: 268 | return err 269 | } 270 | 271 | if containsDotDot(header.Name) { 272 | glog.Warningf("not extracting %s, potential path traversal", header.Name) 273 | continue 274 | } 275 | destEntry := filepath.Join(outputFolder, header.Name) 276 | 277 | switch header.Typeflag { 278 | case tar.TypeDir: 279 | if _, err := os.Stat(destEntry); os.IsNotExist(err) { 280 | if err := os.MkdirAll(destEntry, 0755); err != nil { 281 | return fmt.Errorf("error while creating destination directory: %v", err) 282 | } 283 | } 284 | case tar.TypeReg: 285 | if _, err := os.Stat(filepath.Dir(destEntry)); os.IsNotExist(err) { 286 | if err := os.MkdirAll(filepath.Dir(destEntry), 0755); err != nil { 287 | return fmt.Errorf("error while creating destination directory: %v", err) 288 | } 289 | } 290 | 291 | destFile, err := os.Create(destEntry) 292 | if err != nil { 293 | return fmt.Errorf("error while creating destination file: %v", err) 294 | } 295 | 296 | _, err = io.Copy(destFile, tarReader) 297 | if err != nil { 298 | return fmt.Errorf("error while extracting destination file: %v", err) 299 | } 300 | destFile.Close() 301 | } 302 | } 303 | } 304 | 305 | func containsDotDot(v string) bool { 306 | if !strings.Contains(v, "..") { 307 | return false 308 | } 309 | for _, ent := range strings.FieldsFunc(v, isSlashRune) { 310 | if ent == ".." { 311 | return true 312 | } 313 | } 314 | return false 315 | } 316 | 317 | func isSlashRune(r rune) bool { return r == '/' || r == '\\' } 318 | -------------------------------------------------------------------------------- /importers/gcr/gcr_test.go: -------------------------------------------------------------------------------- 1 | package gcr 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "crypto/ecdsa" 7 | "crypto/elliptic" 8 | "crypto/rand" 9 | "crypto/tls" 10 | "crypto/x509" 11 | "encoding/json" 12 | "encoding/pem" 13 | "math/big" 14 | "net" 15 | "net/http" 16 | "net/http/httptest" 17 | "path" 18 | "strings" 19 | "testing" 20 | "time" 21 | 22 | "github.com/golang/glog" 23 | "golang.org/x/oauth2" 24 | 25 | "github.com/google/go-cmp/cmp" 26 | "github.com/google/go-cmp/cmp/cmpopts" 27 | "github.com/google/go-containerregistry/pkg/name" 28 | "github.com/google/go-containerregistry/pkg/registry" 29 | "github.com/google/go-containerregistry/pkg/v1/google" 30 | "github.com/google/go-containerregistry/pkg/v1/partial" 31 | "github.com/google/go-containerregistry/pkg/v1/random" 32 | ) 33 | 34 | // Helper functions below (newTLSServer, etc.) have been copied from https://github.com/google/go-containerregistry 35 | 36 | type fakeRepo struct { 37 | h http.Handler 38 | repos map[string]google.Tags 39 | } 40 | 41 | func (fr *fakeRepo) ServeHTTP(w http.ResponseWriter, r *http.Request) { 42 | glog.Infof("%s %s", r.Method, r.URL) 43 | if strings.HasPrefix(r.URL.Path, "/v2/") && strings.HasSuffix(r.URL.Path, "/tags/list") { 44 | repo := strings.TrimSuffix(strings.TrimPrefix(r.URL.Path, "/v2/"), "/tags/list") 45 | if tags, ok := fr.repos[repo]; !ok { 46 | w.WriteHeader(http.StatusNotFound) 47 | } else { 48 | glog.Infof("%+v", tags) 49 | if err := json.NewEncoder(w).Encode(tags); err != nil { 50 | glog.Exit(err) 51 | } 52 | } 53 | } else { 54 | fr.h.ServeHTTP(w, r) 55 | } 56 | } 57 | 58 | func newFakeRepo(stuff map[name.Reference]partial.Describable) (*fakeRepo, error) { 59 | h := registry.New() 60 | repos := make(map[string]google.Tags) 61 | 62 | for ref, thing := range stuff { 63 | repo := ref.Context().RepositoryStr() 64 | tags, ok := repos[repo] 65 | if !ok { 66 | tags = google.Tags{ 67 | Name: repo, 68 | Children: []string{}, 69 | } 70 | } 71 | 72 | // Populate the "child" field. 73 | for parentPath := repo; parentPath != "."; parentPath = path.Dir(parentPath) { 74 | child, parent := path.Base(parentPath), path.Dir(parentPath) 75 | tags, ok := repos[parent] 76 | if !ok { 77 | tags = google.Tags{} 78 | } 79 | for _, c := range repos[parent].Children { 80 | if c == child { 81 | break 82 | } 83 | } 84 | tags.Children = append(tags.Children, child) 85 | repos[parent] = tags 86 | } 87 | 88 | // Populate the "manifests" and "tags" field. 89 | d, err := thing.Digest() 90 | if err != nil { 91 | return nil, err 92 | } 93 | mt, err := thing.MediaType() 94 | if err != nil { 95 | return nil, err 96 | } 97 | if tags.Manifests == nil { 98 | tags.Manifests = make(map[string]google.ManifestInfo) 99 | } 100 | mi, ok := tags.Manifests[d.String()] 101 | if !ok { 102 | mi = google.ManifestInfo{ 103 | MediaType: string(mt), 104 | Tags: []string{}, 105 | } 106 | } 107 | if tag, ok := ref.(name.Tag); ok { 108 | tags.Tags = append(tags.Tags, tag.Identifier()) 109 | mi.Tags = append(mi.Tags, tag.Identifier()) 110 | } 111 | tags.Manifests[d.String()] = mi 112 | repos[repo] = tags 113 | } 114 | 115 | return &fakeRepo{h: h, repos: repos}, nil 116 | } 117 | 118 | func getTestRepo() (*fakeRepo, []*image, error) { 119 | image1, err := random.Image(1024, 5) 120 | if err != nil { 121 | return nil, nil, err 122 | } 123 | 124 | ha1, err := image1.Digest() 125 | if err != nil { 126 | return nil, nil, err 127 | } 128 | 129 | image1name := "registry.example.com/test/hashr/aaa" 130 | lr1, err := name.ParseReference(image1name) 131 | if err != nil { 132 | return nil, nil, err 133 | } 134 | 135 | ref1 := lr1.Context().Tag("foo") 136 | 137 | image2, err := random.Image(1024, 5) 138 | if err != nil { 139 | return nil, nil, err 140 | } 141 | 142 | ha2, err := image2.Digest() 143 | if err != nil { 144 | return nil, nil, err 145 | } 146 | 147 | image2name := "registry.example.com/test/hashr/bbb" 148 | lr2, err := name.ParseReference(image2name) 149 | if err != nil { 150 | return nil, nil, err 151 | } 152 | 153 | ref2 := lr2.Context().Tag("bar") 154 | wantImages := []*image{ 155 | { 156 | id: image1name, 157 | quickHash: ha1.Hex, 158 | description: "Tags: [foo], Media Type: application/vnd.docker.distribution.manifest.v2+json, Created on: 1754-08-30 22:43:41.129 +0000 UTC, Uploaded on: 1754-08-30 22:43:41.129 +0000 UTC", 159 | remotePath: image1name, 160 | }, 161 | { 162 | id: image2name, 163 | quickHash: ha2.Hex, 164 | description: "Tags: [bar], Media Type: application/vnd.docker.distribution.manifest.v2+json, Created on: 1754-08-30 22:43:41.129 +0000 UTC, Uploaded on: 1754-08-30 22:43:41.129 +0000 UTC", 165 | remotePath: image2name, 166 | }, 167 | } 168 | 169 | // Set up a fake registry. 170 | h, err := newFakeRepo(map[name.Reference]partial.Describable{ 171 | ref1: image1, 172 | ref2: image2, 173 | }) 174 | if err != nil { 175 | return nil, nil, err 176 | } 177 | 178 | return h, wantImages, nil 179 | } 180 | 181 | func TestDiscoverRepo(t *testing.T) { 182 | fakeRepo, wantImages, err := getTestRepo() 183 | if err != nil { 184 | t.Fatalf("could not create fake GCR repo: %v", err) 185 | } 186 | 187 | s, err := newTLSServer("registry.example.com", fakeRepo) 188 | if err != nil { 189 | glog.Exit(err) 190 | } 191 | defer s.Close() 192 | 193 | repo, err := NewRepo(context.Background(), oauth2.StaticTokenSource(&oauth2.Token{}), "registry.example.com/test/hashr") 194 | if err != nil { 195 | t.Fatalf("could not create new GCR repo: %v", err) 196 | } 197 | 198 | // Route requests to our test registry. 199 | opts = google.WithTransport(s.Client().Transport) 200 | 201 | gotSources, err := repo.DiscoverRepo() 202 | if err != nil { 203 | t.Fatalf("unexpected error in DiscoverRepo(): %v", err) 204 | } 205 | 206 | var gotImages []*image 207 | for _, source := range gotSources { 208 | if image, ok := source.(*image); ok { 209 | gotImages = append(gotImages, image) 210 | } else { 211 | t.Fatal("error while casting Source interface to Image struct") 212 | } 213 | } 214 | 215 | cmpOpts := []cmp.Option{ 216 | cmp.AllowUnexported(image{}), 217 | cmpopts.SortSlices(func(a, b *image) bool { 218 | return a.id < b.id 219 | }), 220 | } 221 | 222 | if !cmp.Equal(wantImages, gotImages, cmpOpts...) { 223 | t.Errorf("DiscoverRepo() unexpected diff (-want/+got):\n%s", cmp.Diff(wantImages, gotImages, cmp.AllowUnexported(image{}))) 224 | } 225 | } 226 | 227 | func newTLSServer(domain string, handler http.Handler) (*httptest.Server, error) { 228 | s := httptest.NewUnstartedServer(handler) 229 | 230 | template := x509.Certificate{ 231 | SerialNumber: big.NewInt(1), 232 | NotBefore: time.Now().Add(-1 * time.Hour), 233 | NotAfter: time.Now().Add(time.Hour), 234 | IPAddresses: []net.IP{ 235 | net.IPv4(127, 0, 0, 1), 236 | net.IPv6loopback, 237 | }, 238 | DNSNames: []string{domain}, 239 | 240 | KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature | x509.KeyUsageCertSign, 241 | ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, 242 | BasicConstraintsValid: true, 243 | IsCA: true, 244 | } 245 | 246 | priv, err := ecdsa.GenerateKey(elliptic.P521(), rand.Reader) 247 | if err != nil { 248 | return nil, err 249 | } 250 | 251 | b, err := x509.CreateCertificate(rand.Reader, &template, &template, &priv.PublicKey, priv) 252 | if err != nil { 253 | return nil, err 254 | } 255 | 256 | pc := &bytes.Buffer{} 257 | if err := pem.Encode(pc, &pem.Block{Type: "CERTIFICATE", Bytes: b}); err != nil { 258 | return nil, err 259 | } 260 | 261 | ek, err := x509.MarshalECPrivateKey(priv) 262 | if err != nil { 263 | return nil, err 264 | } 265 | 266 | pk := &bytes.Buffer{} 267 | if err := pem.Encode(pk, &pem.Block{Type: "EC PRIVATE KEY", Bytes: ek}); err != nil { 268 | return nil, err 269 | } 270 | 271 | c, err := tls.X509KeyPair(pc.Bytes(), pk.Bytes()) 272 | if err != nil { 273 | return nil, err 274 | } 275 | s.TLS = &tls.Config{ 276 | Certificates: []tls.Certificate{c}, 277 | } 278 | s.StartTLS() 279 | 280 | certpool := x509.NewCertPool() 281 | certpool.AddCert(s.Certificate()) 282 | 283 | t := &http.Transport{ 284 | TLSClientConfig: &tls.Config{ 285 | RootCAs: certpool, 286 | }, 287 | DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) { 288 | return net.Dial(s.Listener.Addr().Network(), s.Listener.Addr().String()) 289 | }, 290 | } 291 | s.Client().Transport = t 292 | 293 | return s, nil 294 | } 295 | -------------------------------------------------------------------------------- /importers/importer.go.example: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package template 16 | 17 | import ( 18 | "context" 19 | 20 | "github.com/google/hashr/core/hashr" 21 | ) 22 | 23 | const ( 24 | // RepoName contains the repository name. 25 | RepoName = "windows" 26 | ) 27 | 28 | type image struct { 29 | id string 30 | localPath string 31 | remotePath string 32 | quickHash string 33 | } 34 | 35 | // Preprocess extracts the contents of Windows ISO file. 36 | func (i *image) Preprocess() (string, error) { 37 | return "", nil 38 | } 39 | 40 | // ID returns non-unique Windows ISO file ID. 41 | func (i *image) ID() string { 42 | return i.id 43 | } 44 | 45 | // RepoName returns repository name. 46 | func (i *image) RepoName() string { 47 | return RepoName 48 | } 49 | 50 | // RepoPath returns repository path. 51 | func (i *image) RepoPath() string { 52 | return "" 53 | } 54 | 55 | // LocalPath returns local path to a Windows ISO file. 56 | func (i *image) LocalPath() string { 57 | return i.localPath 58 | } 59 | 60 | // RemotePath returns remote path to a Windows ISO file. 61 | func (i *image) RemotePath() string { 62 | return i.remotePath 63 | } 64 | 65 | // QuickSHA256Hash calculates sha256 hash of a Windows Update file metadata. 66 | func (i *image) QuickSHA256Hash() (string, error) { 67 | return i.quickHash, nil 68 | } 69 | 70 | // NewRepo returns new instance of a Windows ISO repository. 71 | func NewRepo(ctx context.Context, repositoryPath string) (*Repo, error) { 72 | return &Repo{path: repositoryPath}, nil 73 | } 74 | 75 | // Repo holds data related to a Windows WSUS repository. 76 | type Repo struct { 77 | path string 78 | } 79 | 80 | // RepoName returns repository name. 81 | func (r *Repo) RepoName() string { 82 | return RepoName 83 | } 84 | 85 | // RepoPath returns repository path. 86 | func (r *Repo) RepoPath() string { 87 | return r.path 88 | } 89 | 90 | // DiscoverRepo traverses the repository and looks for files that are related to WSUS packages. 91 | func (r *Repo) DiscoverRepo() ([]hashr.Source, error) { 92 | var sources []hashr.Source 93 | return sources, nil 94 | } 95 | -------------------------------------------------------------------------------- /importers/iso9660/generate_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | for tar in $(find -name '*.tar.gz'); do 4 | echo "$tar" 5 | filename=$(basename "$tar") 6 | tardir=$(dirname "$tar") 7 | tempdir=$(mktemp -d) 8 | tar -C "$tempdir" -xf "$tar" 9 | 10 | cd "$tempdir" 11 | mkisofs -o data.iso . 12 | cd - 13 | cp "$tempdir/data.iso" "$tardir/$(echo "$filename" | sed 's/.tar.gz/.iso/g')" 14 | rm -r "$tempdir" 15 | rm "$tar" 16 | done 17 | -------------------------------------------------------------------------------- /importers/iso9660/iso9660.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package iso9660 implements iso9660 repository importer. 16 | package iso9660 17 | 18 | import ( 19 | "crypto/sha256" 20 | "fmt" 21 | "io" 22 | "io/fs" 23 | "os" 24 | "path/filepath" 25 | "strings" 26 | 27 | "github.com/hooklift/iso9660" 28 | 29 | "github.com/golang/glog" 30 | 31 | "github.com/google/hashr/core/hashr" 32 | "github.com/google/hashr/importers/common" 33 | ) 34 | 35 | const ( 36 | // RepoName contains the repository name. 37 | RepoName = "iso9660" 38 | chunkSize = 1024 * 1024 * 10 // 10MB 39 | ) 40 | 41 | // Archive holds data related to the ISO file. 42 | type ISO9660 struct { 43 | filename string 44 | remotePath string 45 | localPath string 46 | quickSha256hash string 47 | repoPath string 48 | } 49 | 50 | // Preprocess extracts the contents of a .tar.gz file. 51 | func (a *ISO9660) Preprocess() (string, error) { 52 | var err error 53 | a.localPath, err = common.CopyToLocal(a.remotePath, a.ID()) 54 | if err != nil { 55 | return "", fmt.Errorf("error while copying %s to local file system: %v", a.remotePath, err) 56 | } 57 | 58 | baseDir, _ := filepath.Split(a.localPath) 59 | extractionDir := filepath.Join(baseDir, "extracted") 60 | 61 | if err := extractIso(a.localPath, extractionDir); err != nil { 62 | return "", err 63 | } 64 | 65 | return extractionDir, nil 66 | } 67 | 68 | func extractIso(isoPath, outputFolder string) error { 69 | if _, err := os.Stat(outputFolder); os.IsNotExist(err) { 70 | if err2 := os.MkdirAll(outputFolder, 0755); err2 != nil { 71 | return fmt.Errorf("error while creating target directory: %v", err2) 72 | } 73 | } 74 | 75 | // Step 1: Open ISO reader 76 | file, err := os.Open(isoPath) 77 | if err != nil { 78 | return fmt.Errorf("error opening ISO file: %v", err) 79 | } 80 | 81 | r, err := iso9660.NewReader(file) 82 | if err != nil { 83 | return fmt.Errorf("error parsing ISO file: %v", err) 84 | } 85 | 86 | // 2. Get the absolute destination path 87 | outputFolder, err = filepath.Abs(outputFolder) 88 | if err != nil { 89 | return err 90 | } 91 | 92 | // Step 3: Iterate over files 93 | for { 94 | f, err := r.Next() 95 | if err == io.EOF { 96 | break 97 | } 98 | 99 | if err != nil { 100 | return fmt.Errorf("error retrieving next file from ISO: %v", err) 101 | } 102 | 103 | err = unpackFile(f, outputFolder) 104 | if err != nil { 105 | return err 106 | } 107 | } 108 | 109 | return nil 110 | } 111 | 112 | func unpackFile(f fs.FileInfo, destination string) error { 113 | // Step 4: Create output path 114 | fp := filepath.Join(destination, f.Name()) 115 | if f.IsDir() { 116 | if err := os.MkdirAll(fp, f.Mode()); err != nil { 117 | return fmt.Errorf("error creating destination directory: %v", err) 118 | } 119 | return nil 120 | } 121 | 122 | parentDir, _ := filepath.Split(fp) 123 | if err := os.MkdirAll(parentDir, f.Mode()); err != nil { 124 | return fmt.Errorf("error while creating target directory: %v", err) 125 | } 126 | 127 | // Step 5: Create destination file 128 | freader := f.Sys().(io.Reader) 129 | ff, err := os.Create(fp) 130 | if err != nil { 131 | fmt.Errorf("error while creating destination file: %v", err) 132 | } 133 | defer func() { 134 | if err := ff.Close(); err != nil { 135 | fmt.Errorf("error while closing file: %v", err) 136 | } 137 | }() 138 | 139 | if err := ff.Chmod(f.Mode()); err != nil { 140 | fmt.Errorf("error while chmod: %v", err) 141 | } 142 | 143 | // Step 6: Extract file contents 144 | if _, err := io.Copy(ff, freader); err != nil { 145 | fmt.Errorf("error while extracting file data: %v", err) 146 | } 147 | return nil 148 | } 149 | 150 | // ID returns non-unique ISO file Archive ID. 151 | func (a *ISO9660) ID() string { 152 | return a.filename 153 | } 154 | 155 | // RepoName returns repository name. 156 | func (a *ISO9660) RepoName() string { 157 | return RepoName 158 | } 159 | 160 | // RepoPath returns repository path. 161 | func (a *ISO9660) RepoPath() string { 162 | return a.repoPath 163 | } 164 | 165 | // LocalPath returns local path to a ISO file Archive .iso file. 166 | func (a *ISO9660) LocalPath() string { 167 | return a.localPath 168 | } 169 | 170 | // RemotePath returns non-local path to a ISO file Archive .iso file. 171 | func (a *ISO9660) RemotePath() string { 172 | return a.remotePath 173 | } 174 | 175 | // Description provides additional description for a .iso file. 176 | func (a *ISO9660) Description() string { 177 | return "" 178 | } 179 | 180 | // QuickSHA256Hash calculates sha256 hash of .iso file. 181 | func (a *ISO9660) QuickSHA256Hash() (string, error) { 182 | // Check if the quick hash was already calculated. 183 | if a.quickSha256hash != "" { 184 | return a.quickSha256hash, nil 185 | } 186 | 187 | f, err := os.Open(a.remotePath) 188 | if err != nil { 189 | return "", err 190 | } 191 | defer f.Close() 192 | 193 | fileInfo, err := f.Stat() 194 | if err != nil { 195 | return "", err 196 | } 197 | 198 | // Check if the file is smaller than 20MB, if so hash the whole file. 199 | if fileInfo.Size() < int64(chunkSize*2) { 200 | h := sha256.New() 201 | if _, err := io.Copy(h, f); err != nil { 202 | return "", err 203 | } 204 | a.quickSha256hash = fmt.Sprintf("%x", h.Sum(nil)) 205 | return a.quickSha256hash, nil 206 | } 207 | 208 | header := make([]byte, chunkSize) 209 | _, err = f.Read(header) 210 | if err != nil { 211 | return "", err 212 | } 213 | 214 | footer := make([]byte, chunkSize) 215 | _, err = f.ReadAt(footer, fileInfo.Size()-int64(chunkSize)) 216 | if err != nil { 217 | return "", err 218 | } 219 | 220 | a.quickSha256hash = fmt.Sprintf("%x", sha256.Sum256(append(header, footer...))) 221 | return a.quickSha256hash, nil 222 | } 223 | 224 | // NewRepo returns new instance of an ISO file repository. 225 | func NewRepo(path string) *Repo { 226 | return &Repo{location: path} 227 | } 228 | 229 | // Repo holds data related to an ISO file repository. 230 | type Repo struct { 231 | location string 232 | files []string 233 | Archives []*ISO9660 234 | } 235 | 236 | // RepoName returns repository name. 237 | func (r *Repo) RepoName() string { 238 | return RepoName 239 | } 240 | 241 | // RepoPath returns repository path. 242 | func (r *Repo) RepoPath() string { 243 | return r.location 244 | } 245 | 246 | // DiscoverRepo traverses the repository and looks for files that are related to ISO file base Archives. 247 | func (r *Repo) DiscoverRepo() ([]hashr.Source, error) { 248 | if err := filepath.Walk(r.location, walk(&r.files)); err != nil { 249 | return nil, err 250 | } 251 | 252 | for _, file := range r.files { 253 | _, filename := filepath.Split(file) 254 | 255 | r.Archives = append(r.Archives, &ISO9660{filename: filename, remotePath: file, repoPath: r.location}) 256 | } 257 | 258 | var sources []hashr.Source 259 | for _, Archive := range r.Archives { 260 | sources = append(sources, Archive) 261 | } 262 | 263 | return sources, nil 264 | } 265 | 266 | func walk(files *[]string) filepath.WalkFunc { 267 | return func(path string, info os.FileInfo, err error) error { 268 | if err != nil { 269 | glog.Errorf("Could not open %s: %v", path, err) 270 | return nil 271 | } 272 | if info.IsDir() { 273 | return nil 274 | } 275 | 276 | if strings.HasSuffix(info.Name(), ".iso") { 277 | *files = append(*files, path) 278 | } 279 | 280 | return nil 281 | } 282 | } 283 | -------------------------------------------------------------------------------- /importers/iso9660/testdata/20200106.00.00/ubuntu-desktop.iso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/iso9660/testdata/20200106.00.00/ubuntu-desktop.iso -------------------------------------------------------------------------------- /importers/iso9660/testdata/20200106.00.00/ubuntu-laptop.iso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/iso9660/testdata/20200106.00.00/ubuntu-laptop.iso -------------------------------------------------------------------------------- /importers/iso9660/testdata/20200106.00.00/ubuntu-server.iso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/iso9660/testdata/20200106.00.00/ubuntu-server.iso -------------------------------------------------------------------------------- /importers/iso9660/testdata/20200107.00.00/ubuntu-desktop.iso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/iso9660/testdata/20200107.00.00/ubuntu-desktop.iso -------------------------------------------------------------------------------- /importers/iso9660/testdata/20200107.00.00/ubuntu-laptop.iso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/iso9660/testdata/20200107.00.00/ubuntu-laptop.iso -------------------------------------------------------------------------------- /importers/iso9660/testdata/20200107.00.00/ubuntu-server.iso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/iso9660/testdata/20200107.00.00/ubuntu-server.iso -------------------------------------------------------------------------------- /importers/iso9660/testdata/20200107.01.00/ubuntu-desktop.iso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/iso9660/testdata/20200107.01.00/ubuntu-desktop.iso -------------------------------------------------------------------------------- /importers/iso9660/testdata/20200107.01.00/ubuntu-laptop.iso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/iso9660/testdata/20200107.01.00/ubuntu-laptop.iso -------------------------------------------------------------------------------- /importers/iso9660/testdata/20200107.01.00/ubuntu-server.iso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/iso9660/testdata/20200107.01.00/ubuntu-server.iso -------------------------------------------------------------------------------- /importers/iso9660/testdata/20200108.00.00/ubuntu-desktop.iso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/iso9660/testdata/20200108.00.00/ubuntu-desktop.iso -------------------------------------------------------------------------------- /importers/iso9660/testdata/20200108.00.00/ubuntu-laptop.iso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/iso9660/testdata/20200108.00.00/ubuntu-laptop.iso -------------------------------------------------------------------------------- /importers/iso9660/testdata/20200108.00.00/ubuntu-server.iso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/iso9660/testdata/20200108.00.00/ubuntu-server.iso -------------------------------------------------------------------------------- /importers/rpm/generate_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | for tar in $(find -name '*.tar.gz'); do 4 | echo "$tar" 5 | filename=$(basename "$tar") 6 | tardir=$(dirname "$tar") 7 | tempdir=$(mktemp -d) 8 | echo "$tempdir" 9 | mkdir -p "$tempdir/BUILDROOT" "$tempdir/BUILD" "$tempdir/RPMS" "$tempdir/SOURCES" "$tempdir/SPECS" "$tempdir/SRPMS" 10 | #tar -C "$tempdir/SOURCES" -xvf "$tar" 11 | mkdir -p "$tempdir/SOURCES/testdata-1.0" 12 | tar -C "$tempdir/SOURCES/testdata-1.0" -xf "$tar" 13 | cd "$tempdir/SOURCES/" 14 | tar -czf "$tempdir/SOURCES/testdata-1.0.tar.gz" "testdata-1.0" 15 | tar -tf "$tempdir/SOURCES/testdata-1.0.tar.gz" 16 | rm -rf "$tempdir/SOURCES/testdata-1.0" 17 | cd - 18 | 19 | cat < "$tempdir/SPECS/testdata.spec" 20 | Summary: Test data 21 | Name: testdata 22 | Version: 1.0 23 | Release: 1%{?dist} 24 | License: Apache 2.0 25 | Group: Development/Tools 26 | BuildArch: noarch 27 | Source0: %{name}-%{version}.tar.gz 28 | 29 | %description 30 | Just test data 31 | 32 | %prep 33 | %setup -q 34 | 35 | %install 36 | rm -rf "\$RPM_BUILD_ROOT" 37 | mkdir -p "\$RPM_BUILD_ROOT" 38 | cp -r . "\$RPM_BUILD_ROOT/" 39 | 40 | %clean 41 | rm -rf \$RPM_BUILD_ROOT 42 | 43 | %files 44 | /* 45 | 46 | 47 | %changelog 48 | * Fri Nov 18 2022 Carl Svensson - 0.0.1 49 | - Test data 50 | EOF 51 | rpmbuild --buildroot "$tempdir/BUILDROOT" --define "_topdir $tempdir" -bb "$tempdir/SPECS/testdata.spec" 52 | cp "$tempdir/RPMS/noarch/testdata-1.0-1.noarch.rpm" "$tardir/$(echo "$filename" | sed 's/.tar.gz/.rpm/g')" 53 | rm -r "$tempdir" 54 | rm "$tar" 55 | done 56 | -------------------------------------------------------------------------------- /importers/rpm/rpm.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package rpm implements rpm package importer. 16 | package rpm 17 | 18 | import ( 19 | "crypto/sha256" 20 | "fmt" 21 | "io" 22 | "os" 23 | "path/filepath" 24 | "strings" 25 | 26 | "github.com/golang/glog" 27 | 28 | "github.com/google/hashr/core/hashr" 29 | "github.com/google/hashr/importers/common" 30 | 31 | rpmutils "github.com/sassoftware/go-rpmutils" 32 | ) 33 | 34 | const ( 35 | // RepoName contains the repository name. 36 | RepoName = "rpm" 37 | chunkSize = 1024 * 1024 * 10 // 10MB 38 | ) 39 | 40 | // Archive holds data related to rpm archive. 41 | type Archive struct { 42 | filename string 43 | remotePath string 44 | localPath string 45 | quickSha256hash string 46 | repoPath string 47 | } 48 | 49 | func extractRPM(rpmPath, outputFolder string) error { 50 | if _, err := os.Stat(outputFolder); os.IsNotExist(err) { 51 | if err2 := os.MkdirAll(outputFolder, 0755); err2 != nil { 52 | return fmt.Errorf("error while creating target directory: %v", err2) 53 | } 54 | } 55 | 56 | fd, err := os.Open(rpmPath) 57 | if err != nil { 58 | return fmt.Errorf("failed to open rpm file: %v", err) 59 | } 60 | defer fd.Close() 61 | 62 | rpmFile, err := rpmutils.ReadRpm(fd) 63 | if err != nil { 64 | return fmt.Errorf("failed to parse rpm file: %v", err) 65 | } 66 | 67 | err = rpmFile.ExpandPayload(outputFolder) 68 | if err != nil { 69 | return fmt.Errorf("failed to extract rpm file: %v", err) 70 | } 71 | 72 | return nil 73 | } 74 | 75 | // Preprocess extracts the contents of a .rpm file. 76 | func (a *Archive) Preprocess() (string, error) { 77 | var err error 78 | a.localPath, err = common.CopyToLocal(a.remotePath, a.ID()) 79 | if err != nil { 80 | return "", fmt.Errorf("error while copying %s to local file system: %v", a.remotePath, err) 81 | } 82 | 83 | baseDir, _ := filepath.Split(a.localPath) 84 | extractionDir := filepath.Join(baseDir, "extracted") 85 | 86 | if err := extractRPM(a.localPath, extractionDir); err != nil { 87 | return "", err 88 | } 89 | 90 | return extractionDir, nil 91 | } 92 | 93 | // ID returns non-unique rpm Archive ID. 94 | func (a *Archive) ID() string { 95 | return a.filename 96 | } 97 | 98 | // RepoName returns repository name. 99 | func (a *Archive) RepoName() string { 100 | return RepoName 101 | } 102 | 103 | // RepoPath returns repository path. 104 | func (a *Archive) RepoPath() string { 105 | return a.repoPath 106 | } 107 | 108 | // LocalPath returns local path to a rpm Archive .rpm file. 109 | func (a *Archive) LocalPath() string { 110 | return a.localPath 111 | } 112 | 113 | // RemotePath returns non-local path to a rpm Archive .rpm file. 114 | func (a *Archive) RemotePath() string { 115 | return a.remotePath 116 | } 117 | 118 | // Description provides additional description for a .rpm file. 119 | func (a *Archive) Description() string { 120 | return "" 121 | } 122 | 123 | // QuickSHA256Hash calculates sha256 hash of .rpm file. 124 | func (a *Archive) QuickSHA256Hash() (string, error) { 125 | // Check if the quick hash was already calculated. 126 | if a.quickSha256hash != "" { 127 | return a.quickSha256hash, nil 128 | } 129 | 130 | f, err := os.Open(a.remotePath) 131 | if err != nil { 132 | return "", err 133 | } 134 | defer f.Close() 135 | 136 | fileInfo, err := f.Stat() 137 | if err != nil { 138 | return "", err 139 | } 140 | 141 | // Check if the file is smaller than 20MB, if so hash the whole file. 142 | if fileInfo.Size() < int64(chunkSize*2) { 143 | h := sha256.New() 144 | if _, err := io.Copy(h, f); err != nil { 145 | return "", err 146 | } 147 | a.quickSha256hash = fmt.Sprintf("%x", h.Sum(nil)) 148 | return a.quickSha256hash, nil 149 | } 150 | 151 | header := make([]byte, chunkSize) 152 | _, err = f.Read(header) 153 | if err != nil { 154 | return "", err 155 | } 156 | 157 | footer := make([]byte, chunkSize) 158 | _, err = f.ReadAt(footer, fileInfo.Size()-int64(chunkSize)) 159 | if err != nil { 160 | return "", err 161 | } 162 | 163 | a.quickSha256hash = fmt.Sprintf("%x", sha256.Sum256(append(header, footer...))) 164 | return a.quickSha256hash, nil 165 | } 166 | 167 | // NewRepo returns new instance of rpm repository. 168 | func NewRepo(path string) *Repo { 169 | return &Repo{location: path} 170 | } 171 | 172 | // Repo holds data related to a rpm repository. 173 | type Repo struct { 174 | location string 175 | files []string 176 | Archives []*Archive 177 | } 178 | 179 | // RepoName returns repository name. 180 | func (r *Repo) RepoName() string { 181 | return RepoName 182 | } 183 | 184 | // RepoPath returns repository path. 185 | func (r *Repo) RepoPath() string { 186 | return r.location 187 | } 188 | 189 | // DiscoverRepo traverses the repository and looks for files that are related to rpm archives. 190 | func (r *Repo) DiscoverRepo() ([]hashr.Source, error) { 191 | 192 | if err := filepath.Walk(r.location, walk(&r.files)); err != nil { 193 | return nil, err 194 | } 195 | 196 | for _, file := range r.files { 197 | _, filename := filepath.Split(file) 198 | 199 | if strings.HasSuffix(filename, ".rpm") { 200 | r.Archives = append(r.Archives, &Archive{filename: filename, remotePath: file, repoPath: r.location}) 201 | } 202 | } 203 | 204 | var sources []hashr.Source 205 | for _, Archive := range r.Archives { 206 | sources = append(sources, Archive) 207 | } 208 | 209 | return sources, nil 210 | } 211 | 212 | func walk(files *[]string) filepath.WalkFunc { 213 | return func(path string, info os.FileInfo, err error) error { 214 | if err != nil { 215 | glog.Errorf("Could not open %s: %v", path, err) 216 | return nil 217 | } 218 | if info.IsDir() { 219 | return nil 220 | } 221 | if strings.HasSuffix(info.Name(), ".rpm") { 222 | *files = append(*files, path) 223 | } 224 | 225 | return nil 226 | } 227 | } 228 | -------------------------------------------------------------------------------- /importers/rpm/testdata/20200106.00.00/ubuntu-desktop.rpm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/rpm/testdata/20200106.00.00/ubuntu-desktop.rpm -------------------------------------------------------------------------------- /importers/rpm/testdata/20200106.00.00/ubuntu-laptop.rpm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/rpm/testdata/20200106.00.00/ubuntu-laptop.rpm -------------------------------------------------------------------------------- /importers/rpm/testdata/20200106.00.00/ubuntu-server.rpm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/rpm/testdata/20200106.00.00/ubuntu-server.rpm -------------------------------------------------------------------------------- /importers/rpm/testdata/20200107.00.00/ubuntu-desktop.rpm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/rpm/testdata/20200107.00.00/ubuntu-desktop.rpm -------------------------------------------------------------------------------- /importers/rpm/testdata/20200107.00.00/ubuntu-laptop.rpm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/rpm/testdata/20200107.00.00/ubuntu-laptop.rpm -------------------------------------------------------------------------------- /importers/rpm/testdata/20200107.00.00/ubuntu-server.rpm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/rpm/testdata/20200107.00.00/ubuntu-server.rpm -------------------------------------------------------------------------------- /importers/rpm/testdata/20200107.01.00/ubuntu-desktop.rpm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/rpm/testdata/20200107.01.00/ubuntu-desktop.rpm -------------------------------------------------------------------------------- /importers/rpm/testdata/20200107.01.00/ubuntu-laptop.rpm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/rpm/testdata/20200107.01.00/ubuntu-laptop.rpm -------------------------------------------------------------------------------- /importers/rpm/testdata/20200107.01.00/ubuntu-server.rpm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/rpm/testdata/20200107.01.00/ubuntu-server.rpm -------------------------------------------------------------------------------- /importers/rpm/testdata/20200108.00.00/ubuntu-desktop.rpm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/rpm/testdata/20200108.00.00/ubuntu-desktop.rpm -------------------------------------------------------------------------------- /importers/rpm/testdata/20200108.00.00/ubuntu-laptop.rpm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/rpm/testdata/20200108.00.00/ubuntu-laptop.rpm -------------------------------------------------------------------------------- /importers/rpm/testdata/20200108.00.00/ubuntu-server.rpm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/rpm/testdata/20200108.00.00/ubuntu-server.rpm -------------------------------------------------------------------------------- /importers/targz/targz.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package targz implements targz repository importer. 16 | package targz 17 | 18 | import ( 19 | "crypto/sha256" 20 | "fmt" 21 | "io" 22 | "os" 23 | "path/filepath" 24 | "strings" 25 | 26 | "github.com/golang/glog" 27 | 28 | "github.com/google/hashr/core/hashr" 29 | "github.com/google/hashr/importers/common" 30 | ) 31 | 32 | const ( 33 | // RepoName contains the repository name. 34 | RepoName = "targz" 35 | chunkSize = 1024 * 1024 * 10 // 10MB 36 | ) 37 | 38 | // Archive holds data related to targz archive. 39 | type Archive struct { 40 | filename string 41 | remotePath string 42 | localPath string 43 | quickSha256hash string 44 | repoPath string 45 | } 46 | 47 | // Preprocess extracts the contents of a .tar.gz file. 48 | func (a *Archive) Preprocess() (string, error) { 49 | var err error 50 | a.localPath, err = common.CopyToLocal(a.remotePath, a.ID()) 51 | if err != nil { 52 | return "", fmt.Errorf("error while copying %s to local file system: %v", a.remotePath, err) 53 | } 54 | 55 | baseDir, _ := filepath.Split(a.localPath) 56 | extractionDir := filepath.Join(baseDir, "extracted") 57 | 58 | if err := common.ExtractTarGz(a.localPath, extractionDir); err != nil { 59 | return "", err 60 | } 61 | 62 | return extractionDir, nil 63 | } 64 | 65 | // ID returns non-unique targz Archive ID. 66 | func (a *Archive) ID() string { 67 | return a.filename 68 | } 69 | 70 | // RepoName returns repository name. 71 | func (a *Archive) RepoName() string { 72 | return RepoName 73 | } 74 | 75 | // RepoPath returns repository path. 76 | func (a *Archive) RepoPath() string { 77 | return a.repoPath 78 | } 79 | 80 | // LocalPath returns local path to a targz Archive .tar.gz file. 81 | func (a *Archive) LocalPath() string { 82 | return a.localPath 83 | } 84 | 85 | // RemotePath returns non-local path to a targz Archive .tar.gz file. 86 | func (a *Archive) RemotePath() string { 87 | return a.remotePath 88 | } 89 | 90 | // Description provides additional description for a .tar.gz file. 91 | func (a *Archive) Description() string { 92 | return "" 93 | } 94 | 95 | // QuickSHA256Hash calculates sha256 hash of .tar.gz file. 96 | func (a *Archive) QuickSHA256Hash() (string, error) { 97 | // Check if the quick hash was already calculated. 98 | if a.quickSha256hash != "" { 99 | return a.quickSha256hash, nil 100 | } 101 | 102 | f, err := os.Open(a.remotePath) 103 | if err != nil { 104 | return "", err 105 | } 106 | defer f.Close() 107 | 108 | fileInfo, err := f.Stat() 109 | if err != nil { 110 | return "", err 111 | } 112 | 113 | // Check if the file is smaller than 20MB, if so hash the whole file. 114 | if fileInfo.Size() < int64(chunkSize*2) { 115 | h := sha256.New() 116 | if _, err := io.Copy(h, f); err != nil { 117 | return "", err 118 | } 119 | a.quickSha256hash = fmt.Sprintf("%x", h.Sum(nil)) 120 | return a.quickSha256hash, nil 121 | } 122 | 123 | header := make([]byte, chunkSize) 124 | _, err = f.Read(header) 125 | if err != nil { 126 | return "", err 127 | } 128 | 129 | footer := make([]byte, chunkSize) 130 | _, err = f.ReadAt(footer, fileInfo.Size()-int64(chunkSize)) 131 | if err != nil { 132 | return "", err 133 | } 134 | 135 | a.quickSha256hash = fmt.Sprintf("%x", sha256.Sum256(append(header, footer...))) 136 | return a.quickSha256hash, nil 137 | } 138 | 139 | // NewRepo returns new instance of targz repository. 140 | func NewRepo(path string) *Repo { 141 | return &Repo{location: path} 142 | } 143 | 144 | // Repo holds data related to a targz repository. 145 | type Repo struct { 146 | location string 147 | files []string 148 | Archives []*Archive 149 | } 150 | 151 | // RepoName returns repository name. 152 | func (r *Repo) RepoName() string { 153 | return RepoName 154 | } 155 | 156 | // RepoPath returns repository path. 157 | func (r *Repo) RepoPath() string { 158 | return r.location 159 | } 160 | 161 | // DiscoverRepo traverses the repository and looks for files that are related to targz base Archives. 162 | func (r *Repo) DiscoverRepo() ([]hashr.Source, error) { 163 | 164 | if err := filepath.Walk(r.location, walk(&r.files)); err != nil { 165 | return nil, err 166 | } 167 | 168 | for _, file := range r.files { 169 | _, filename := filepath.Split(file) 170 | 171 | if strings.HasSuffix(filename, ".tar.gz") { 172 | r.Archives = append(r.Archives, &Archive{filename: filename, remotePath: file, repoPath: r.location}) 173 | } 174 | } 175 | 176 | var sources []hashr.Source 177 | for _, Archive := range r.Archives { 178 | sources = append(sources, Archive) 179 | } 180 | 181 | return sources, nil 182 | } 183 | 184 | func walk(files *[]string) filepath.WalkFunc { 185 | return func(path string, info os.FileInfo, err error) error { 186 | if err != nil { 187 | glog.Errorf("Could not open %s: %v", path, err) 188 | return nil 189 | } 190 | if info.IsDir() { 191 | return nil 192 | } 193 | if strings.HasSuffix(info.Name(), ".tar.gz") || strings.HasSuffix(info.Name(), ".tar.gz.sig") { 194 | *files = append(*files, path) 195 | } 196 | 197 | return nil 198 | } 199 | } 200 | -------------------------------------------------------------------------------- /importers/targz/testdata/20200106.00.00/ubuntu-desktop.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/targz/testdata/20200106.00.00/ubuntu-desktop.tar.gz -------------------------------------------------------------------------------- /importers/targz/testdata/20200106.00.00/ubuntu-laptop.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/targz/testdata/20200106.00.00/ubuntu-laptop.tar.gz -------------------------------------------------------------------------------- /importers/targz/testdata/20200106.00.00/ubuntu-server.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/targz/testdata/20200106.00.00/ubuntu-server.tar.gz -------------------------------------------------------------------------------- /importers/targz/testdata/20200107.00.00/ubuntu-desktop.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/targz/testdata/20200107.00.00/ubuntu-desktop.tar.gz -------------------------------------------------------------------------------- /importers/targz/testdata/20200107.00.00/ubuntu-laptop.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/targz/testdata/20200107.00.00/ubuntu-laptop.tar.gz -------------------------------------------------------------------------------- /importers/targz/testdata/20200107.00.00/ubuntu-server.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/targz/testdata/20200107.00.00/ubuntu-server.tar.gz -------------------------------------------------------------------------------- /importers/targz/testdata/20200107.01.00/ubuntu-desktop.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/targz/testdata/20200107.01.00/ubuntu-desktop.tar.gz -------------------------------------------------------------------------------- /importers/targz/testdata/20200107.01.00/ubuntu-laptop.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/targz/testdata/20200107.01.00/ubuntu-laptop.tar.gz -------------------------------------------------------------------------------- /importers/targz/testdata/20200107.01.00/ubuntu-server.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/targz/testdata/20200107.01.00/ubuntu-server.tar.gz -------------------------------------------------------------------------------- /importers/targz/testdata/20200108.00.00/ubuntu-desktop.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/targz/testdata/20200108.00.00/ubuntu-desktop.tar.gz -------------------------------------------------------------------------------- /importers/targz/testdata/20200108.00.00/ubuntu-laptop.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/targz/testdata/20200108.00.00/ubuntu-laptop.tar.gz -------------------------------------------------------------------------------- /importers/targz/testdata/20200108.00.00/ubuntu-server.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/targz/testdata/20200108.00.00/ubuntu-server.tar.gz -------------------------------------------------------------------------------- /importers/windows/windows.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package windows implements Windows ISO-13346 repository importer. 16 | package windows 17 | 18 | import ( 19 | "bytes" 20 | "context" 21 | "crypto/sha256" 22 | "fmt" 23 | "io" 24 | "os" 25 | "os/exec" 26 | "path/filepath" 27 | "strconv" 28 | "strings" 29 | "time" 30 | 31 | "github.com/Microsoft/go-winio/wim" 32 | "github.com/golang/glog" 33 | "github.com/google/hashr/core/hashr" 34 | "github.com/google/hashr/importers/common" 35 | ) 36 | 37 | const ( 38 | // RepoName contains the repository name. 39 | RepoName = "windows" 40 | ) 41 | 42 | // Preprocess extracts the contents of Windows ISO file. 43 | func (w *wimImage) Preprocess() (string, error) { 44 | var err error 45 | w.localPath, err = common.CopyToLocal(w.remotePath, w.id) 46 | if err != nil { 47 | return "", fmt.Errorf("error while copying %s to %s: %v", w.remotePath, w.localPath, err) 48 | } 49 | 50 | baseDir, _ := filepath.Split(w.localPath) 51 | 52 | extractionDir := filepath.Join(baseDir, "extracted") 53 | 54 | mountDir := filepath.Join(baseDir, "mnt") 55 | if err := os.MkdirAll(mountDir, 0755); err != nil { 56 | return "", fmt.Errorf("could not create mount directory: %v", err) 57 | } 58 | 59 | _, err = shellCommand("sudo", "mount", w.localPath, mountDir) 60 | if err != nil { 61 | return "", fmt.Errorf("error while executing mount cmd: %v", err) 62 | } 63 | 64 | installWimPath := filepath.Join(mountDir, "/sources/install.wim") 65 | 66 | wimFile, err := os.Open(installWimPath) 67 | if err != nil { 68 | return "", fmt.Errorf("error while opening %s: %v", installWimPath, err) 69 | } 70 | 71 | reader, err := wim.NewReader(wimFile) 72 | if err != nil { 73 | return "", fmt.Errorf("error while creating wim reader %s: %v", installWimPath, err) 74 | } 75 | 76 | for _, image := range reader.Image { 77 | if image.Name == w.imageName { 78 | glog.Infof("Extracting files from %s located in %s to %s", image.Name, w.localPath, extractionDir) 79 | err := extractWimImage(image, extractionDir) 80 | if err != nil { 81 | return "", fmt.Errorf("error while extracting wim image %s: %v", image.Name, err) 82 | } 83 | glog.Infof("Done extracting files from %s", image.Name) 84 | } 85 | } 86 | 87 | time.Sleep(time.Second * 10) 88 | _, err = shellCommand("sudo", "umount", "-fl", mountDir) 89 | if err != nil { 90 | return "", fmt.Errorf("error while executing umount cmd: %v", err) 91 | } 92 | 93 | return extractionDir, nil 94 | } 95 | func extractWimImage(image *wim.Image, extractionDir string) error { 96 | rootDir, err := image.Open() 97 | if err != nil { 98 | return fmt.Errorf("error while opening wim file %s: %v", image.Name, err) 99 | } 100 | 101 | if err := extractWimFolder(rootDir, rootDir.Name, extractionDir); err != nil { 102 | return err 103 | } 104 | 105 | return nil 106 | } 107 | 108 | func extractWimFolder(wimFile *wim.File, path, extractionDir string) error { 109 | files, err := wimFile.Readdir() 110 | if err != nil { 111 | return fmt.Errorf("error while opening wim file %s: %v", wimFile.Name, err) 112 | } 113 | for _, file := range files { 114 | dstPath := filepath.Join(extractionDir, path, file.Name) 115 | if file.IsDir() { 116 | if err := os.MkdirAll(dstPath, 0755); err != nil { 117 | glog.Errorf("Could not create destination directory %s: %v", dstPath, err) 118 | continue 119 | } 120 | if err := extractWimFolder(file, filepath.Join(path, file.Name), extractionDir); err != nil { 121 | glog.Warningf("Failed to extract Wim folder %s: %v", file.Name, err) 122 | } 123 | } else { 124 | if err := copyFile(file, dstPath); err != nil { 125 | glog.Errorf("Could not copy to destination file %s: %v", dstPath, err) 126 | continue 127 | } 128 | } 129 | } 130 | 131 | return nil 132 | } 133 | 134 | func copyFile(file *wim.File, dstPath string) error { 135 | destFile, err := os.Create(dstPath) 136 | if err != nil { 137 | return fmt.Errorf("error while creating destination file: %v", err) 138 | } 139 | 140 | content, err := file.Open() 141 | if err != nil { 142 | return fmt.Errorf("error while opening wim %s file for reading: %v", file.Name, err) 143 | } 144 | 145 | _, err = io.Copy(destFile, content) 146 | if err != nil { 147 | return fmt.Errorf("error while copying destination file %s: %v", file.Name, err) 148 | } 149 | 150 | destFile.Close() 151 | content.Close() 152 | 153 | return nil 154 | } 155 | 156 | var execute = func(name string, args ...string) *exec.Cmd { 157 | glog.Infof("name: %v, args: %v", name, args) 158 | return exec.Command(name, args...) 159 | } 160 | 161 | func shellCommand(binary string, args ...string) (string, error) { 162 | cmd := execute(binary, args...) 163 | var stdout, stderr bytes.Buffer 164 | cmd.Stdout = &stdout 165 | cmd.Stderr = &stderr 166 | 167 | err := cmd.Run() 168 | if err != nil { 169 | return "", fmt.Errorf("error while executing %s: %v\nStdout: %v\nStderr: %v", binary, err, stdout.String(), stderr.String()) 170 | } 171 | 172 | return stdout.String(), nil 173 | } 174 | 175 | // ID returns non-unique Windows ISO file ID. 176 | func (w *wimImage) ID() string { 177 | return w.id 178 | } 179 | 180 | // RepoName returns repository name. 181 | func (w *wimImage) RepoName() string { 182 | return RepoName 183 | } 184 | 185 | // RepoPath returns repository path. 186 | func (w *wimImage) RepoPath() string { 187 | return w.repoPath 188 | } 189 | 190 | // LocalPath returns local path to a Windows ISO file. 191 | func (w *wimImage) LocalPath() string { 192 | return w.localPath 193 | } 194 | 195 | // RemotePath returns remote path to a Windows ISO file. 196 | func (w *wimImage) RemotePath() string { 197 | return w.remotePath 198 | } 199 | 200 | // QuickSHA256Hash calculates sha256 hash of a Windows ISO file. 201 | func (w *wimImage) QuickSHA256Hash() (string, error) { 202 | return w.quickHash, nil 203 | } 204 | 205 | // Description provides additional description for a Windows ISO file. 206 | func (w *wimImage) Description() string { 207 | return "" 208 | } 209 | 210 | // NewRepo returns new instance of a Windows ISO repository. 211 | func NewRepo(ctx context.Context, repositoryPath string) (*Repo, error) { 212 | return &Repo{path: repositoryPath}, nil 213 | } 214 | 215 | // Repo holds data related to a Windows repository. 216 | type Repo struct { 217 | path string 218 | files []string 219 | wimImages []*wimImage 220 | } 221 | 222 | type wimImage struct { 223 | id string 224 | imageName string 225 | localPath string 226 | remotePath string 227 | quickHash string 228 | repoPath string 229 | } 230 | 231 | // RepoName returns repository name. 232 | func (r *Repo) RepoName() string { 233 | return RepoName 234 | } 235 | 236 | // RepoPath returns repository path. 237 | func (r *Repo) RepoPath() string { 238 | return r.path 239 | } 240 | 241 | // DiscoverRepo traverses the repository and looks for .iso files. 242 | func (r *Repo) DiscoverRepo() ([]hashr.Source, error) { 243 | 244 | if err := filepath.Walk(r.path, walk(&r.files)); err != nil { 245 | return nil, err 246 | } 247 | 248 | for _, filePath := range r.files { 249 | tempDir, err := common.LocalTempDir(strings.ReplaceAll(strings.TrimPrefix(filePath, r.path+string(os.PathSeparator)), string(os.PathSeparator), "-")) 250 | if err != nil { 251 | return nil, fmt.Errorf("error while creating temp dir: %v", err) 252 | } 253 | 254 | mountDir := filepath.Join(tempDir, "mnt") 255 | if err := os.MkdirAll(mountDir, 0755); err != nil { 256 | return nil, fmt.Errorf("could not create mount directory: %v", err) 257 | } 258 | 259 | _, err = shellCommand("sudo", "mount", filePath, mountDir) 260 | if err != nil { 261 | return nil, fmt.Errorf("error while executing mount cmd: %v", err) 262 | } 263 | 264 | installWimPath := filepath.Join(mountDir, "/sources/install.wim") 265 | 266 | wimFile, err := os.Open(installWimPath) 267 | if err != nil { 268 | return nil, fmt.Errorf("error while opening %s: %v", installWimPath, err) 269 | } 270 | 271 | reader, err := wim.NewReader(wimFile) 272 | if err != nil { 273 | return nil, fmt.Errorf("error while creating wim reader %s: %v", installWimPath, err) 274 | } 275 | 276 | glog.Infof("Opened %s wim file", installWimPath) 277 | 278 | for _, image := range reader.Image { 279 | glog.Infof("Found %s image in %s", image.Name, installWimPath) 280 | r.wimImages = append(r.wimImages, &wimImage{ 281 | imageName: image.Name, 282 | id: fmt.Sprintf("%s-%d.%d-%d-%dsp", strings.ReplaceAll(image.Name, " ", ""), image.Windows.Version.Major, image.Windows.Version.Minor, image.Windows.Version.Build, image.Windows.Version.SPBuild), 283 | localPath: filePath, 284 | remotePath: filePath, 285 | repoPath: r.path, 286 | quickHash: fmt.Sprintf("%x", sha256.Sum256([]byte(image.CreationTime.Time().String()+ 287 | image.Name+ 288 | image.Windows.ProductName+ 289 | strconv.Itoa(image.Windows.Version.Build)+ 290 | strconv.Itoa(image.Windows.Version.Major)+ 291 | strconv.Itoa(image.Windows.Version.Minor)+ 292 | strconv.Itoa(image.Windows.Version.SPBuild)))), 293 | }) 294 | } 295 | 296 | wimFile.Close() 297 | 298 | time.Sleep(time.Second * 10) 299 | _, err = shellCommand("sudo", "umount", "-fl", mountDir) 300 | if err != nil { 301 | return nil, fmt.Errorf("error while executing umount cmd: %v", err) 302 | } 303 | } 304 | 305 | var sources []hashr.Source 306 | for _, wimImage := range r.wimImages { 307 | sources = append(sources, wimImage) 308 | } 309 | 310 | return sources, nil 311 | } 312 | 313 | func walk(files *[]string) filepath.WalkFunc { 314 | return func(path string, info os.FileInfo, err error) error { 315 | if err != nil { 316 | glog.Errorf("Could not open %s: %v", path, err) 317 | return nil 318 | } 319 | if info.IsDir() { 320 | return nil 321 | } 322 | 323 | if strings.EqualFold(filepath.Ext(info.Name()), ".iso") { 324 | *files = append(*files, path) 325 | } 326 | 327 | return nil 328 | } 329 | } 330 | -------------------------------------------------------------------------------- /importers/wsus/testdata/._03E86F3A0947C8A5183AD0C66A48782FA216BEFF.cab: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/wsus/testdata/._03E86F3A0947C8A5183AD0C66A48782FA216BEFF.cab -------------------------------------------------------------------------------- /importers/wsus/testdata/._138ECA2DEB45E284DC0BB94CC8849D1933B072FF.cab: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/wsus/testdata/._138ECA2DEB45E284DC0BB94CC8849D1933B072FF.cab -------------------------------------------------------------------------------- /importers/wsus/testdata/._1BDBDA1C53B6C980DD440B93646D8021CC90F1FF.cab: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/wsus/testdata/._1BDBDA1C53B6C980DD440B93646D8021CC90F1FF.cab -------------------------------------------------------------------------------- /importers/wsus/testdata/._1F35F72D34C16FF7D7270D60472D8AD9FF9D7EFF.cab: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/wsus/testdata/._1F35F72D34C16FF7D7270D60472D8AD9FF9D7EFF.cab -------------------------------------------------------------------------------- /importers/wsus/testdata/03E86F3A0947C8A5183AD0C66A48782FA216BEFF.cab: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/wsus/testdata/03E86F3A0947C8A5183AD0C66A48782FA216BEFF.cab -------------------------------------------------------------------------------- /importers/wsus/testdata/138ECA2DEB45E284DC0BB94CC8849D1933B072FF.cab: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/wsus/testdata/138ECA2DEB45E284DC0BB94CC8849D1933B072FF.cab -------------------------------------------------------------------------------- /importers/wsus/testdata/1BDBDA1C53B6C980DD440B93646D8021CC90F1FF.cab: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/wsus/testdata/1BDBDA1C53B6C980DD440B93646D8021CC90F1FF.cab -------------------------------------------------------------------------------- /importers/wsus/testdata/1F35F72D34C16FF7D7270D60472D8AD9FF9D7EFF.cab: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/wsus/testdata/1F35F72D34C16FF7D7270D60472D8AD9FF9D7EFF.cab -------------------------------------------------------------------------------- /importers/zip/generate_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | for tar in $(find -name '*.tar.gz'); do 4 | echo "$tar" 5 | filename=$(basename "$tar") 6 | tardir=$(dirname "$tar") 7 | tempdir=$(mktemp -d) 8 | tar -C "$tempdir" -xf "$tar" 9 | 10 | cd "$tempdir" 11 | zip -r data.zip . 12 | cd - 13 | cp "$tempdir/data.zip" "$tardir/$(echo "$filename" | sed 's/.tar.gz/.zip/g')" 14 | rm -r "$tempdir" 15 | rm "$tar" 16 | done 17 | -------------------------------------------------------------------------------- /importers/zip/testdata/20200106.00.00/ubuntu-desktop.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/zip/testdata/20200106.00.00/ubuntu-desktop.jar -------------------------------------------------------------------------------- /importers/zip/testdata/20200106.00.00/ubuntu-laptop.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/zip/testdata/20200106.00.00/ubuntu-laptop.whl -------------------------------------------------------------------------------- /importers/zip/testdata/20200106.00.00/ubuntu-server.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/zip/testdata/20200106.00.00/ubuntu-server.egg -------------------------------------------------------------------------------- /importers/zip/testdata/20200107.00.00/ubuntu-desktop.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/zip/testdata/20200107.00.00/ubuntu-desktop.zip -------------------------------------------------------------------------------- /importers/zip/testdata/20200107.00.00/ubuntu-laptop.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/zip/testdata/20200107.00.00/ubuntu-laptop.zip -------------------------------------------------------------------------------- /importers/zip/testdata/20200107.00.00/ubuntu-server.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/zip/testdata/20200107.00.00/ubuntu-server.zip -------------------------------------------------------------------------------- /importers/zip/testdata/20200107.01.00/ubuntu-desktop.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/zip/testdata/20200107.01.00/ubuntu-desktop.zip -------------------------------------------------------------------------------- /importers/zip/testdata/20200107.01.00/ubuntu-laptop.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/zip/testdata/20200107.01.00/ubuntu-laptop.zip -------------------------------------------------------------------------------- /importers/zip/testdata/20200107.01.00/ubuntu-server.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/zip/testdata/20200107.01.00/ubuntu-server.zip -------------------------------------------------------------------------------- /importers/zip/testdata/20200108.00.00/ubuntu-desktop.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/zip/testdata/20200108.00.00/ubuntu-desktop.zip -------------------------------------------------------------------------------- /importers/zip/testdata/20200108.00.00/ubuntu-laptop.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/zip/testdata/20200108.00.00/ubuntu-laptop.zip -------------------------------------------------------------------------------- /importers/zip/testdata/20200108.00.00/ubuntu-server.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/zip/testdata/20200108.00.00/ubuntu-server.zip -------------------------------------------------------------------------------- /importers/zip/zip.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package zip implements zip repository importer. 16 | package zip 17 | 18 | import ( 19 | "archive/zip" 20 | "crypto/sha256" 21 | "fmt" 22 | "io" 23 | "os" 24 | "path/filepath" 25 | "strings" 26 | 27 | "github.com/golang/glog" 28 | 29 | "github.com/google/hashr/core/hashr" 30 | "github.com/google/hashr/importers/common" 31 | ) 32 | 33 | const ( 34 | // RepoName contains the repository name. 35 | RepoName = "zip" 36 | chunkSize = 1024 * 1024 * 10 // 10MB 37 | ) 38 | 39 | // Archive holds data related to zip archive. 40 | type Archive struct { 41 | filename string 42 | remotePath string 43 | localPath string 44 | quickSha256hash string 45 | repoPath string 46 | } 47 | 48 | // Preprocess extracts the contents of a .zip file. 49 | func (a *Archive) Preprocess() (string, error) { 50 | var err error 51 | a.localPath, err = common.CopyToLocal(a.remotePath, a.ID()) 52 | if err != nil { 53 | return "", fmt.Errorf("error while copying %s to local file system: %v", a.remotePath, err) 54 | } 55 | 56 | baseDir, _ := filepath.Split(a.localPath) 57 | extractionDir := filepath.Join(baseDir, "extracted") 58 | 59 | if err := extractZip(a.localPath, extractionDir); err != nil { 60 | return "", err 61 | } 62 | 63 | return extractionDir, nil 64 | } 65 | 66 | func extractZip(zipPath, outputFolder string) error { 67 | if _, err := os.Stat(outputFolder); os.IsNotExist(err) { 68 | if err2 := os.MkdirAll(outputFolder, 0755); err2 != nil { 69 | return fmt.Errorf("error while creating target directory: %v", err2) 70 | } 71 | } 72 | 73 | // 1. Open the zip file 74 | zipReader, err := zip.OpenReader(zipPath) 75 | if err != nil { 76 | return fmt.Errorf("failed to open zip file: %v", err) 77 | } 78 | defer zipReader.Close() 79 | 80 | // 2. Get the absolute destination path 81 | outputFolder, err = filepath.Abs(outputFolder) 82 | if err != nil { 83 | return err 84 | } 85 | 86 | // 3. Iterate over zip files inside the archive and unzip each of them 87 | for _, f := range zipReader.File { 88 | err := unzipFile(f, outputFolder) 89 | if err != nil { 90 | return err 91 | } 92 | } 93 | 94 | return nil 95 | } 96 | 97 | func unzipFile(f *zip.File, destination string) error { 98 | // 4. Check if file paths are not vulnerable to Zip Slip 99 | filePath := filepath.Join(destination, f.Name) 100 | if !strings.HasPrefix(filePath, filepath.Clean(destination)+string(os.PathSeparator)) { 101 | return fmt.Errorf("invalid file path: %s", filePath) 102 | } 103 | 104 | // 5. Create directory tree 105 | if f.FileInfo().IsDir() { 106 | if err := os.MkdirAll(filePath, os.ModePerm); err != nil { 107 | return err 108 | } 109 | return nil 110 | } 111 | 112 | if err := os.MkdirAll(filepath.Dir(filePath), os.ModePerm); err != nil { 113 | return err 114 | } 115 | 116 | // 6. Create a destination file for unzipped content 117 | destinationFile, err := os.OpenFile(filePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode()) 118 | if err != nil { 119 | return err 120 | } 121 | defer destinationFile.Close() 122 | 123 | // 7. Unzip the content of a file and copy it to the destination file 124 | zippedFile, err := f.Open() 125 | if err != nil { 126 | return err 127 | } 128 | defer zippedFile.Close() 129 | 130 | if _, err := io.Copy(destinationFile, zippedFile); err != nil { 131 | return err 132 | } 133 | return nil 134 | } 135 | 136 | // ID returns non-unique zip Archive ID. 137 | func (a *Archive) ID() string { 138 | return a.filename 139 | } 140 | 141 | // RepoName returns repository name. 142 | func (a *Archive) RepoName() string { 143 | return RepoName 144 | } 145 | 146 | // RepoPath returns repository path. 147 | func (a *Archive) RepoPath() string { 148 | return a.repoPath 149 | } 150 | 151 | // LocalPath returns local path to a zip Archive .zip file. 152 | func (a *Archive) LocalPath() string { 153 | return a.localPath 154 | } 155 | 156 | // RemotePath returns non-local path to a zip Archive .zip file. 157 | func (a *Archive) RemotePath() string { 158 | return a.remotePath 159 | } 160 | 161 | // Description provides additional description for a .zip file. 162 | func (a *Archive) Description() string { 163 | return "" 164 | } 165 | 166 | // QuickSHA256Hash calculates sha256 hash of .zip file. 167 | func (a *Archive) QuickSHA256Hash() (string, error) { 168 | // Check if the quick hash was already calculated. 169 | if a.quickSha256hash != "" { 170 | return a.quickSha256hash, nil 171 | } 172 | 173 | f, err := os.Open(a.remotePath) 174 | if err != nil { 175 | return "", err 176 | } 177 | defer f.Close() 178 | 179 | fileInfo, err := f.Stat() 180 | if err != nil { 181 | return "", err 182 | } 183 | 184 | // Check if the file is smaller than 20MB, if so hash the whole file. 185 | if fileInfo.Size() < int64(chunkSize*2) { 186 | h := sha256.New() 187 | if _, err := io.Copy(h, f); err != nil { 188 | return "", err 189 | } 190 | a.quickSha256hash = fmt.Sprintf("%x", h.Sum(nil)) 191 | return a.quickSha256hash, nil 192 | } 193 | 194 | header := make([]byte, chunkSize) 195 | _, err = f.Read(header) 196 | if err != nil { 197 | return "", err 198 | } 199 | 200 | footer := make([]byte, chunkSize) 201 | _, err = f.ReadAt(footer, fileInfo.Size()-int64(chunkSize)) 202 | if err != nil { 203 | return "", err 204 | } 205 | 206 | a.quickSha256hash = fmt.Sprintf("%x", sha256.Sum256(append(header, footer...))) 207 | return a.quickSha256hash, nil 208 | } 209 | 210 | // NewRepo returns new instance of zip repository. 211 | func NewRepo(path string, fileExtensions string) *Repo { 212 | exts := strings.Split(fileExtensions, ",") 213 | for i, ext := range exts { 214 | exts[i] = "." + ext 215 | } 216 | 217 | return &Repo{location: path, fileExtensions: exts} 218 | } 219 | 220 | // Repo holds data related to a zip repository. 221 | type Repo struct { 222 | location string 223 | fileExtensions []string 224 | files []string 225 | Archives []*Archive 226 | } 227 | 228 | // RepoName returns repository name. 229 | func (r *Repo) RepoName() string { 230 | return RepoName 231 | } 232 | 233 | // RepoPath returns repository path. 234 | func (r *Repo) RepoPath() string { 235 | return r.location 236 | } 237 | 238 | // DiscoverRepo traverses the repository and looks for files that are related to zip base Archives. 239 | func (r *Repo) DiscoverRepo() ([]hashr.Source, error) { 240 | if err := filepath.Walk(r.location, walk(&r.files, r.fileExtensions)); err != nil { 241 | return nil, err 242 | } 243 | 244 | for _, file := range r.files { 245 | _, filename := filepath.Split(file) 246 | 247 | r.Archives = append(r.Archives, &Archive{filename: filename, remotePath: file, repoPath: r.location}) 248 | } 249 | 250 | var sources []hashr.Source 251 | for _, Archive := range r.Archives { 252 | sources = append(sources, Archive) 253 | } 254 | 255 | return sources, nil 256 | } 257 | 258 | func walk(files *[]string, extensions []string) filepath.WalkFunc { 259 | return func(path string, info os.FileInfo, err error) error { 260 | if err != nil { 261 | glog.Errorf("Could not open %s: %v", path, err) 262 | return nil 263 | } 264 | if info.IsDir() { 265 | return nil 266 | } 267 | 268 | for _, ext := range extensions { 269 | if strings.HasSuffix(info.Name(), ext) { 270 | *files = append(*files, path) 271 | break 272 | } 273 | } 274 | 275 | return nil 276 | } 277 | } 278 | -------------------------------------------------------------------------------- /processors/local/local.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package local provides functions to process data locally. 16 | package local 17 | 18 | import ( 19 | "bytes" 20 | "fmt" 21 | "os/exec" 22 | "path/filepath" 23 | 24 | "github.com/golang/glog" 25 | ) 26 | 27 | var execute = func(name string, args ...string) *exec.Cmd { 28 | glog.Infof("name: %v, args: %v", name, args) 29 | return exec.Command(name, args...) 30 | } 31 | 32 | // Processor is an instance of local processor. 33 | type Processor struct { 34 | } 35 | 36 | // New returns new local processor instance. 37 | func New() *Processor { 38 | return &Processor{} 39 | } 40 | 41 | func shellCommand(binary string, args ...string) (string, error) { 42 | cmd := execute(binary, args...) 43 | var stdout, stderr bytes.Buffer 44 | cmd.Stdout = &stdout 45 | cmd.Stderr = &stderr 46 | 47 | err := cmd.Run() 48 | if err != nil { 49 | return "", fmt.Errorf("error while executing %s: %v\nStdout: %v\nStderr: %v", binary, err, stdout.String(), stderr.String()) 50 | } 51 | 52 | return stdout.String(), nil 53 | } 54 | 55 | // ImageExport runs image_export.py binary locally. 56 | func (p *Processor) ImageExport(sourcePath string) (string, error) { 57 | // TODO(mlegin): check if image_export.py is present on the local machine. 58 | baseDir := filepath.Dir(sourcePath) 59 | exportDir := filepath.Join(baseDir, "export") 60 | logFile := filepath.Join(baseDir, "image_export.log") 61 | 62 | dockerArgs := []string{"run", "--rm", "-v", "/tmp/:/tmp", "log2timeline/plaso", "image_export", "--logfile", logFile, "--partitions", "all", "--volumes", "all", "-w", exportDir, sourcePath} 63 | localArgs := []string{"--logfile", logFile, "--partitions", "all", "--volumes", "all", "-w", exportDir, sourcePath} 64 | var err error 65 | 66 | if inDockerContainer() { 67 | _, err = shellCommand("image_export.py", localArgs...) 68 | } else { 69 | _, err = shellCommand("docker", dockerArgs...) 70 | } 71 | 72 | if err != nil { 73 | return "", fmt.Errorf("error while running image_export: %v", err) 74 | } 75 | 76 | return exportDir, nil 77 | } 78 | 79 | func inDockerContainer() bool { 80 | _, err := shellCommand("which", "image_export.py") 81 | return err == nil 82 | } 83 | -------------------------------------------------------------------------------- /processors/local/local_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package local 16 | 17 | import ( 18 | "fmt" 19 | "io" 20 | "io/ioutil" 21 | "os" 22 | "os/exec" 23 | "path/filepath" 24 | "testing" 25 | ) 26 | 27 | func TestExecute(t *testing.T) { 28 | bytes, err := execute("echo", "test").Output() 29 | if err != nil { 30 | t.Fatalf("unexpected error while running test echo cmd: %v", err) 31 | } 32 | if got, want := string(bytes), "test\n"; got != want { 33 | t.Errorf("echo = %s; want = %s", got, want) 34 | } 35 | } 36 | 37 | func TestImageExport(t *testing.T) { 38 | execute = fakeExecute 39 | tempDir, err := ioutil.TempDir("", "hashr-test") 40 | if err != nil { 41 | t.Fatalf("error while creating temp directory: %v", err) 42 | } 43 | defer os.RemoveAll(tempDir) 44 | 45 | sourceFile, err := os.Open("testdata/disk_2_xfs_volumes.raw") 46 | if err != nil { 47 | t.Fatalf("unexpected error while opening test WIM file: %v", err) 48 | } 49 | 50 | xfsTempPath := filepath.Join(tempDir, "disk_2_xfs_volumes.raw") 51 | destFile, err := os.Create(xfsTempPath) 52 | if err != nil { 53 | t.Fatalf("unexpected error creating temp destination file: %v", err) 54 | } 55 | 56 | _, err = io.Copy(destFile, sourceFile) 57 | if err != nil { 58 | t.Fatalf("unexpected error while copying to temp destination file: %v", err) 59 | } 60 | 61 | processor := New() 62 | gotOut, err := processor.ImageExport(xfsTempPath) 63 | if err != nil { 64 | t.Fatalf("unexpected error while running ImageExport(): %v", err) 65 | } 66 | 67 | wantOut := filepath.Join(tempDir, "export") 68 | 69 | if gotOut != wantOut { 70 | t.Errorf("ImageExport() = %s; want = %s", gotOut, wantOut) 71 | } 72 | 73 | } 74 | 75 | func fakeExecute(command string, args ...string) *exec.Cmd { 76 | var mockStdOut string 77 | 78 | cs := []string{"-test.run=TestHelperProcess", "--", command} 79 | cs = append(cs, args...) 80 | cmd := exec.Command(os.Args[0], cs...) 81 | cmd.Env = []string{"GO_WANT_HELPER_PROCESS=1", 82 | "STDOUT=" + mockStdOut} 83 | return cmd 84 | } 85 | 86 | // This isn't a real test. It's used as a helper process. 87 | func TestHelperProcess(t *testing.T) { 88 | if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" { 89 | return 90 | } 91 | 92 | fmt.Fprint(os.Stdout, os.Getenv("STDOUT")) 93 | os.Exit(0) 94 | } 95 | -------------------------------------------------------------------------------- /processors/local/testdata/._disk_2_xfs_volumes.raw: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/processors/local/testdata/._disk_2_xfs_volumes.raw -------------------------------------------------------------------------------- /processors/local/testdata/disk_2_xfs_volumes.raw: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/processors/local/testdata/disk_2_xfs_volumes.raw -------------------------------------------------------------------------------- /scripts/CreateCloudSpannerExporterTables.ddl: -------------------------------------------------------------------------------- 1 | -- Copyright 2022 Google LLC 2 | -- 3 | -- Licensed under the Apache License, Version 2.0 (the "License"); 4 | -- you may not use this file except in compliance with the License. 5 | -- You may obtain a copy of the License at 6 | -- 7 | -- https:--www.apache.org/licenses/LICENSE-2.0 8 | -- 9 | -- Unless required by applicable law or agreed to in writing, software 10 | -- distributed under the License is distributed on an "AS IS" BASIS, 11 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | -- See the License for the specific language governing permissions and 13 | -- limitations under the License. 14 | 15 | CREATE TABLE samples ( 16 | sha256 STRING(100), 17 | mimetype STRING(MAX), 18 | file_output STRING(MAX), 19 | size INT64 20 | ) PRIMARY KEY(sha256); 21 | 22 | CREATE TABLE payloads ( 23 | sha256 STRING(100), 24 | gcs_path STRING(200) 25 | ) PRIMARY KEY(sha256); 26 | 27 | CREATE TABLE sources ( 28 | sha256 STRING(100), 29 | source_id ARRAY, 30 | source_path STRING(MAX), 31 | source_description STRING(MAX), 32 | repo_name STRING(MAX), 33 | repo_path STRING(MAX), 34 | ) PRIMARY KEY(sha256); 35 | 36 | CREATE TABLE samples_sources ( 37 | sample_sha256 STRING(100), 38 | source_sha256 STRING(100), 39 | sample_paths ARRAY, 40 | CONSTRAINT FK_Sample FOREIGN KEY (sample_sha256) REFERENCES samples (sha256), 41 | CONSTRAINT FK_Source FOREIGN KEY (source_sha256) REFERENCES sources (sha256), 42 | ) PRIMARY KEY (sample_sha256, source_sha256); -------------------------------------------------------------------------------- /scripts/CreateJobsTable.ddl: -------------------------------------------------------------------------------- 1 | CREATE TABLE jobs ( 2 | imported_at TIMESTAMP NOT NULL, 3 | id STRING(500), 4 | repo STRING(200), 5 | repo_path STRING(500), 6 | quick_sha256 STRING(100) NOT NULL, 7 | location STRING(1000), 8 | sha256 STRING(100), 9 | status STRING(50), 10 | error STRING(10000), 11 | preprocessing_duration INT64, 12 | processing_duration INT64, 13 | export_duration INT64, 14 | files_extracted INT64, 15 | files_exported INT64, 16 | ) PRIMARY KEY(quick_sha256) 17 | -------------------------------------------------------------------------------- /scripts/CreateJobsTable.sql: -------------------------------------------------------------------------------- 1 | -- Copyright 2022 Google LLC 2 | -- 3 | -- Licensed under the Apache License, Version 2.0 (the "License"); 4 | -- you may not use this file except in compliance with the License. 5 | -- You may obtain a copy of the License at 6 | -- 7 | -- https:--www.apache.org/licenses/LICENSE-2.0 8 | -- 9 | -- Unless required by applicable law or agreed to in writing, software 10 | -- distributed under the License is distributed on an "AS IS" BASIS, 11 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | -- See the License for the specific language governing permissions and 13 | -- limitations under the License. 14 | 15 | CREATE TABLE jobs ( 16 | quick_sha256 VARCHAR(100) PRIMARY KEY, 17 | imported_at INT NOT NULL, 18 | id text, 19 | repo text, 20 | repo_path text, 21 | location text, 22 | sha256 VARCHAR(100), 23 | status VARCHAR(50), 24 | error text, 25 | preprocessing_duration INT, 26 | processing_duration INT, 27 | export_duration INT, 28 | files_extracted INT, 29 | files_exported INT 30 | ); -------------------------------------------------------------------------------- /scripts/CreatePostgresExporterTables.sql: -------------------------------------------------------------------------------- 1 | -- Copyright 2022 Google LLC 2 | -- 3 | -- Licensed under the Apache License, Version 2.0 (the "License"); 4 | -- you may not use this file except in compliance with the License. 5 | -- You may obtain a copy of the License at 6 | -- 7 | -- https:--www.apache.org/licenses/LICENSE-2.0 8 | -- 9 | -- Unless required by applicable law or agreed to in writing, software 10 | -- distributed under the License is distributed on an "AS IS" BASIS, 11 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | -- See the License for the specific language governing permissions and 13 | -- limitations under the License. 14 | 15 | CREATE TABLE samples ( 16 | sha256 VARCHAR(100) PRIMARY KEY, 17 | mimetype text, 18 | file_output text, 19 | size INT 20 | ); 21 | 22 | CREATE TABLE payloads ( 23 | sha256 VARCHAR(100) PRIMARY KEY, 24 | payload bytea 25 | ); 26 | 27 | CREATE TABLE sources ( 28 | sha256 VARCHAR(100) PRIMARY KEY, 29 | sourceID text[], 30 | sourcePath text, 31 | sourceDescription text, 32 | repoName text, 33 | repoPath text 34 | ); 35 | 36 | CREATE TABLE samples_sources ( 37 | sample_sha256 VARCHAR(100) REFERENCES samples(sha256) NOT NULL, 38 | source_sha256 VARCHAR(100) REFERENCES sources(sha256) NOT NULL, 39 | sample_paths text[], 40 | PRIMARY KEY (sample_sha256, source_sha256) 41 | ); -------------------------------------------------------------------------------- /scripts/aws/AwsHashrUploaderPolicy.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Sid": "VisualEditor0", 6 | "Effect": "Allow", 7 | "Action": [ 8 | "s3:PutObject", 9 | "s3:GetObject", 10 | "s3:ListBucket", 11 | "s3:PutObjectTagging", 12 | "s3:PutBucketObjectLockConfiguration" 13 | ], 14 | "Resource": [ 15 | "arn:aws:s3:::hashr-bucket", 16 | "arn:aws:s3:::hashr-bucket/*" 17 | ] 18 | } 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /scripts/aws/AwsHashrWorkerPolicy.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Sid": "VisualEditor0", 6 | "Effect": "Allow", 7 | "Action": [ 8 | "s3:PutObject", 9 | "s3:GetObject", 10 | "s3:DescribeJob", 11 | "s3:ListBucket", 12 | "s3:DeleteObject", 13 | "s3:GetBucketLocation", 14 | "s3:GetObjectVersion" 15 | ], 16 | "Resource": [ 17 | "arn:aws:s3:::hashr-bucket", 18 | "arn:aws:s3:::hashr-bucket/*" 19 | ] 20 | }, 21 | { 22 | "Sid": "VisualEditor1", 23 | "Effect": "Allow", 24 | "Action": [ 25 | "ec2:DetachVolume", 26 | "ec2:AttachVolume", 27 | "ec2:CopySnapshot", 28 | "ec2:DeregisterImage", 29 | "ec2:DescribeInstances", 30 | "ec2:DescribeTags", 31 | "ec2:DescribeSnapshotAttribute", 32 | "ec2:DescribeInstanceAttribute", 33 | "s3:ListJobs", 34 | "ec2:CopyImage", 35 | "ec2:DescribeSnapshots", 36 | "ec2:DescribeVolumeAttribute", 37 | "ec2:CreateVolume", 38 | "ec2:DescribeImages", 39 | "ec2:DeleteVolume", 40 | "ec2:DescribeVolumeStatus", 41 | "ec2:CreateDefaultSubnet", 42 | "ec2:DescribeAvailabilityZones", 43 | "ec2:DescribeImageAttribute", 44 | "ec2:DescribeVolumes", 45 | "ec2:CreateSnapshot", 46 | "ec2:DescribeInstanceTypes", 47 | "ec2:DescribeInstanceStatus" 48 | ], 49 | "Resource": "*" 50 | } 51 | ] 52 | } 53 | -------------------------------------------------------------------------------- /scripts/aws/hashr_aws_init.txt: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Download hashr-archive from github 4 | HASHR_ARCHIVE_SRC=https://raw.githubusercontent.com/google/hashr/main/scripts/hashr-archive 5 | 6 | wget -O /tmp/hashr-archive ${HASHR_ARCHIVE_SRC} 7 | 8 | sudo mv /tmp/hashr-archive /usr/local/sbin/hashr-archive 9 | sudo chmod +x /usr/local/sbin/hashr-archive 10 | 11 | # Creating data directory 12 | sudo mkdir -p /data 13 | sudo chown -R $USER /data 14 | 15 | -------------------------------------------------------------------------------- /scripts/aws/hashr_setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Script to create AWS EC2 instances as HashR workers. 4 | # 5 | 6 | # AWS configuration 7 | AWS_PROFILE="default" 8 | AWS_REGION="ap-southeast-2" 9 | 10 | # AWS instance source 11 | IMAGE_ID="ami-09eebd0b9bd845bf1" 12 | INSTANCE_TYPE="t2.micro" 13 | INSTANCE_COUNT=2 14 | KEY_NAME="HashrAwsKey" 15 | USER="ec2-user" 16 | USER_DATA="file://hashr_aws_init.txt" 17 | WORKER_TAG_INUSE_NAME="InUse" 18 | WORKER_TAG_INUSE_VALUE="false" 19 | WORKER_TAG_ROLE_NAME="role" 20 | WORKER_TAG_ROLE_VALUE="hashr-worker" 21 | 22 | SECURITY_GROUP_NAME="hashr-security-group" 23 | SECURITY_GROUP_ID="" 24 | 25 | # NOTE: You should change this to limit exposure. 26 | SECURITY_SOURCE_CIDR="0.0.0.0/0" 27 | WORKER_AWS_CONFIG_FILE="hashr.uploader.tar.gz" 28 | 29 | SCRIPT_DIR=`dirname $0` 30 | logfile=${SCRIPT_DIR}/hashr_aws_setup.log 31 | touch $logfile 32 | 33 | create_key_pair() { 34 | local keyPairId 35 | 36 | echo "Creating AWS key pair ${KEY_NAME}" 37 | 38 | keyPairId=`aws --profile ${AWS_PROFILE} ec2 describe-key-pairs --filters Name=key-name,Values=${KEY_NAME} | jq -r '.KeyPairs[0].KeyPairId'` 39 | if [ "${keyPairId}" == "null" ]; then 40 | aws --profile ${AWS_PROFILE} ec2 create-key-pair --key-name ${KEY_NAME} | jq -r '.KeyMaterial' > $HOME/.ssh/${KEY_NAME} 41 | chmod 600 ${HOME}/.ssh/${KEY_NAME} 42 | 43 | keyPairId=`aws --profile ${AWS_PROFILE} ec2 describe-key-pairs --filters Name=key-name,Values=${KEY_NAME} | jq -r '.KeyPairs[0].KeyPairId'` 44 | echo -e " - Created a new AWS key pair ${keyPairId}" 45 | return 46 | fi 47 | 48 | echo -e " Key pair ${KEY_NAME} exists with ID ${keyPairId}" 49 | } 50 | 51 | create_security_group_id() { 52 | local securityGroupId 53 | 54 | echo "Setting up security group ${SECURITY_GROUP_NAME}" 55 | 56 | SECURITY_GROUP_ID=`aws --profile ${AWS_PROFILE} ec2 describe-security-groups --filters Name=group-name,Values=${SECURITY_GROUP_NAME} | jq -r '.SecurityGroups[].GroupId'` 57 | if [ "${SECURITY_GROUP_ID}" == "" ]; then 58 | securityGroupId=`aws --profile ${AWS_PROFILE} ec2 create-security-group --group-name ${SECURITY_GROUP_NAME} --description "Security group for HashR AWS worker" | jq -r '.GroupId'` 59 | aws --profile ${AWS_PROFILE} ec2 authorize-security-group-ingress --group-id ${securityGroupId} --protocol tcp --port 22 --cidr "${SECURITY_SOURCE_CIDR}" > $logfile 2>&1 60 | 61 | SECURITY_GROUP_ID=${securityGroupId} 62 | sleep 5 63 | 64 | echo -e " - Created security group ${SECURITY_GROUP_NAME} (${securityGroupId})" 65 | else 66 | echo -e " - Security group ${SECURITY_GROUP_NAME} exists ${SECURITY_GROUP_ID}" 67 | fi 68 | } 69 | 70 | check_instance_status() { 71 | local instanceId="$1" 72 | local instanceState="$2" 73 | local instanceStateName="" 74 | 75 | local count=0 76 | while true 77 | do 78 | if [ $count -ge 5 ]; then 79 | echo "Something went wrong. The instance $instanceId should be up by now" 80 | return 1 81 | fi 82 | 83 | instanceStateName=`aws --profile ${AWS_PROFILE} ec2 describe-instances --instance-ids ${instanceId} | jq -r '.Reservations[].Instances[0].State.Name'` 84 | echo " Current state of ${instanceId} is ${instanceStateName}" 85 | if [ "${instanceStateName}" == "${instanceState}" ]; then 86 | return 0 87 | fi 88 | 89 | sleep 10 90 | count=$((count + 1)) 91 | done 92 | } 93 | 94 | copy_aws_config() { 95 | local instanceId="$1" 96 | local publicDnsName="" 97 | local sshOptions="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null" 98 | local securityGroupname="" 99 | 100 | echo " - Copying AWS configuration to instance ${instanceId}" 101 | securityGroupName=`aws --profile ${AWS_PROFILE} ec2 describe-instance-attribute --instance-id ${instanceId} --attribute groupSet | jq -r '.Groups[].GroupName'` 102 | if [ "$securityGroupName}" != "${SECURITY_GROUP_NAME}" ]; then 103 | aws --profile ${AWS_PROFILE} ec2 modify-instance-attribute --instance-id ${instanceId} --groups ${SECURITY_GROUP_ID} 104 | sleep 5 105 | fi 106 | 107 | publicDnsName=`aws --profile ${AWS_PROFILE} ec2 describe-instances --instance-id ${instanceId} | jq -r '.Reservations[].Instances[0].PublicDnsName'` 108 | scp -i ~/.ssh/${KEY_NAME} ${sshOptions} ${SCRIPT_DIR}/${WORKER_AWS_CONFIG_FILE} ${USER}@${publicDnsName}:~/ > $logfile 2>&1 109 | ssh -i ~/.ssh/${KEY_NAME} ${sshOptions} ${USER}@${publicDnsName} "tar -zxf ~/${WORKER_AWS_CONFIG_FILE} -C ~/" > $logfile 2>&1 110 | } 111 | 112 | run_ec2_instance() { 113 | local instanceSateName="" 114 | local volumeId="" 115 | 116 | echo "Running ${INSTANCE_COUNT} EC2 instances" 117 | 118 | for instanceId in `aws --profile ${AWS_PROFILE} ec2 run-instances --image-id ${IMAGE_ID} --count ${INSTANCE_COUNT} --instance-type ${INSTANCE_TYPE} --key-name ${KEY_NAME} --security-group-ids ${SECURITY_GROUP_ID} --associate-public-ip-address --tag-specifications 'ResourceType=instance,Tags=[{Key=role,Value=hashr-worker},{Key=InUse,Value=false},]' --user-data ${USER_DATA} | jq -r '.Instances[].InstanceId'` 119 | do 120 | # We want to make sure the instance is in the running state before we increase 121 | # the size of the disk. 122 | echo " - Checking if ${instanceId} is running" 123 | check_instance_status ${instanceId} "running" 124 | if [ $? -eq 1 ]; then 125 | exit 1 126 | fi 127 | 128 | # Increase the size of the disk. 129 | volumeId=`aws --profile ${AWS_PROFILE} ec2 describe-volumes --filters Name=attachment.instance-id,Values=i-094382052d8b0c550 Name=attachment.device,Values=/dev/xvda | jq -r '.Volumes[0].VolumeId'` 130 | aws --profile ${AWS_PROFILE} ec2 modify-volume --volume-id ${volumeId} --size 50 > $logfile 2>&1 131 | 132 | # We need to restart the instance to take effect of the new disk size. 133 | aws --profile ${AWS_PROFILE} ec2 stop-instances --instance-id ${instanceId} > $logfile 2>&1 134 | echo " - Checking if ${instanceId} is stopped" 135 | check_instance_status ${instanceId} "stopped" 136 | if [ $? -eq 1 ]; then 137 | exit 1 138 | fi 139 | 140 | aws --profile ${AWS_PROFILE} ec2 start-instances --instance-id ${instanceId} > $logfile 2>&1 141 | echo " - Checking if ${instanceId} is running" 142 | check_instance_status ${instanceId} "running" 143 | if [ $? -eq 1 ]; then 144 | exit 1 145 | fi 146 | 147 | copy_aws_config ${instanceId} 148 | 149 | echo -e " - Created HashR worker ${instanceId}" 150 | done 151 | } 152 | 153 | remove_key_pair() { 154 | echo "Removing key pair ${KEY_NAME}" 155 | aws --profile ${AWS_PROFILE} ec2 delete-key-pair --key-name ${KEY_NAME} 156 | } 157 | 158 | remove_security_group() { 159 | local securityGroupId 160 | 161 | securityGroupId=`aws --profile ${AWS_PROFILE} ec2 describe-security-groups --filters Name=group-name,Values=${SECURITY_GROUP_NAME} | jq -r '.SecurityGroups[].GroupId'` 162 | echo "Security group ID ${securityGroupId} for ${SECURITY_GROUP_NAME}" 163 | 164 | if [ "${securityGroupId}" == "" ]; then 165 | echo " - No security group ID for security group ${SECURITY_GROUP_NAME}" 166 | else 167 | # Check if security-group-id is still in use 168 | instances=`aws --profile ${AWS_PROFILE} ec2 describe-instances --filters Name=instance.group-id,Values=${securityGroupId} | jq -r '.Reservations[].Instances[].InstanceId'` 169 | if [ "${instances}" != "" ]; then 170 | echo -e "Security group ${securityGroupId} (${SECURITY_GROUP_NAME}) is in use in the following instances:\n${instances}" 171 | else 172 | # Delete security group. 173 | echo "Removing security group ${SECURITY_GROUP_NAME} (${securityGroupId})" 174 | aws --profile ${AWS_PROFILE} ec2 delete-security-group --group-id ${securityGroupId} 175 | fi 176 | fi 177 | } 178 | 179 | remove_instances() { 180 | echo "Removing EC2 worker instances" 181 | 182 | for instanceId in `aws --profile ${AWS_PROFILE} ec2 describe-instances --filters Name=tag-value,Values=${WORKER_TAG_ROLE_VALUE} | jq -r '.Reservations[].Instances[].InstanceId'` 183 | do 184 | echo " - Removing the worker instance ${instanceId}" 185 | aws --profile ${AWS_PROFILE} ec2 terminate-instances --instance-id ${instanceId} > $logfile 2>&1 186 | done 187 | } 188 | 189 | # Main 190 | case "$1" in 191 | setup) 192 | dirpath=`dirname $0` 193 | if [ ! -f ${dirpath}/${WORKER_AWS_CONFIG_FILE} ]; then 194 | echo "No AWS configuration file (${WORKER_AWS_CONFIG_FILE}) for worker" 195 | exit 1 196 | fi 197 | 198 | create_key_pair 199 | 200 | sleep 5 201 | create_security_group_id 202 | 203 | sleep 5 204 | run_ec2_instance 205 | ;; 206 | create-key) 207 | echo "Creating keypair ${KEY_NAME}" 208 | create_key_pair 209 | ;; 210 | create-sg) 211 | echo "Creating security group ${SECURITY_GROUP_NAME}" 212 | create_security_group_id 213 | ;; 214 | remove-key) 215 | echo "Removing key pair ${KEY_NAME}" 216 | remove_key_pair 217 | ;; 218 | remove-sg) 219 | echo "Removing security group ${SECURITY_GROUP_NAME}" 220 | remove_security_group 221 | ;; 222 | remove-instance) 223 | echo "Removing EC2 worker instances" 224 | remove_instances 225 | ;; 226 | remove-all) 227 | echo "Removing HashR AWS instances, security group, and key pair" 228 | remove_instances 229 | 230 | sleep 5 231 | remove_security_group 232 | 233 | sleep 5 234 | remove_key_pair 235 | ;; 236 | *) 237 | echo "Usage: `basename $0` {setup|create-key|create-sg|remove-key|remove-sg|remove-instance|remove-all}" || true 238 | exit 1 239 | esac 240 | 241 | exit 0 242 | -------------------------------------------------------------------------------- /scripts/hashr-archive: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2023 Google LLC 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # https://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | # A HashR - AWS disk archival creation script. 18 | 19 | name=`basename $0` 20 | workingdir=/data 21 | logfile=${workingdir}/hashr-archive.`hostname`.log 22 | 23 | if [ ! -f ${logfile} ]; then 24 | sudo touch ${logfile} 25 | sudo chmod 666 ${logfile} 26 | fi 27 | 28 | if [ $# -ne 3 ]; then 29 | echo "${name}: two arguments required" 30 | exit 31 | fi 32 | 33 | device="$1" 34 | imageid="$2" 35 | bucket="$3" 36 | 37 | tarGzName="${imageid}.tar.gz" 38 | tarGzPath=${workingdir}/${tarGzName} 39 | 40 | function log_message() { 41 | echo -e "`date --utc +'%Y-%m-%dT%H:%M:%SZ'` ${name}: $1" >> $logfile 42 | } 43 | 44 | function check_execution_status() { 45 | local statusCode=$1 46 | local statusMessage="$2" 47 | local MAX_SIZE=90 48 | 49 | messageSize=${#statusMessage} 50 | paddingSize=`expr ${MAX_SIZE} - ${messageSize}` 51 | 52 | padding="" 53 | if [ ${paddingSize} -gt 0 ]; then 54 | for i in $(seq 0 ${paddingSize}) 55 | do 56 | padding+=" " 57 | done 58 | fi 59 | 60 | if [ ${statusCode} -eq 0 ]; then 61 | log_message "${statusMessage} ${padding} [ COMPLETED ]" 62 | else 63 | log_message "${statusMessage} ${padding} [ FAILED ]" 64 | fi 65 | } 66 | 67 | if [ "${device}" == "" ]; then 68 | echo "${name}: Device (param1) is required" 69 | log_messsag "Device (param1) is required" 70 | exit 1 71 | fi 72 | 73 | if [[ "${device}" =~ ^/dev/[a-z]{3,4}$ ]]; then 74 | log_message "Device ${device} is valid" 75 | else 76 | echo "${name}: Device pattern does not match" 77 | log_message "${device} does not match required pattern" 78 | exit 1 79 | fi 80 | 81 | if [ "${imageid}" == "" ]; then 82 | echo "imageid (param2) is required" 83 | log_message "imageid (param2) is required" 84 | exit 1 85 | fi 86 | 87 | if [ "${bucket}" == "" ]; then 88 | echo "${name}: S3 bucket (param3) is required" 89 | log_message "S3 bucket (param3) is required" 90 | exit 1 91 | fi 92 | 93 | cd ${workingdir} 94 | 95 | log_message "Creating raw disk image" 96 | sudo dd if=${device} of=${imageid} bs=1M >> ${logfile} 2>&1 97 | check_execution_status $? "Creating raw disk image ${workingdir}/${imageid} from ${device}" 98 | 99 | log_message "Creating raw disk archive" 100 | sudo tar -C ${workingdir} -zcf ${tarGzPath} ${imageid} >> ${logfile} 2>&1 101 | check_execution_status $? "Creating raw disk archive ${tarGzName}" 102 | 103 | log_message "Uploading disk image ${tarGzPath} to ${bucket}" 104 | aws s3 cp ${tarGzPath} s3://${bucket} >> ${logfile} 2>&1 105 | check_execution_status $? "Disk upload to ${bucket} completed" 106 | 107 | log_message "Removing disk image" 108 | sudo rm -f ${imageid} >> ${logfile} 2>&1 109 | check_execution_status $? "Removing disk ${imageid}" 110 | 111 | log_message "Removing disk image ${tarGzPath}" 112 | sudo rm -f ${tarGzPath} >> ${logfile} 2>&1 113 | check_execution_status $? "Disk removal ${tarGzPath} completed" 114 | 115 | log_message "Creation done file ${tarGzPath}.done" 116 | sudo touch ${tarGzPath}.done 117 | check_execution_status $? "Creation of ${tarGzPath}.done completed" 118 | 119 | -------------------------------------------------------------------------------- /storage/cloudspanner/cloudspanner.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package cloudspanner implements cloud spanner as a hashR storage. 16 | package cloudspanner 17 | 18 | import ( 19 | "context" 20 | "fmt" 21 | "time" 22 | 23 | "github.com/google/hashr/core/hashr" 24 | 25 | "cloud.google.com/go/spanner" 26 | 27 | "google.golang.org/api/iterator" 28 | ) 29 | 30 | // Storage allows to interact with cloud spanner. 31 | type Storage struct { 32 | spannerClient *spanner.Client 33 | } 34 | 35 | // NewStorage creates new Storage struct that allows to interact with cloud spanner. 36 | func NewStorage(ctx context.Context, spannerClient *spanner.Client) (*Storage, error) { 37 | return &Storage{spannerClient: spannerClient}, nil 38 | } 39 | 40 | // UpdateJobs updates cloud spanner table. 41 | func (s *Storage) UpdateJobs(ctx context.Context, qHash string, p *hashr.ProcessingSource) error { 42 | _, err := s.spannerClient.Apply(ctx, []*spanner.Mutation{ 43 | spanner.InsertOrUpdate("jobs", 44 | []string{ 45 | "quick_sha256", 46 | "imported_at", 47 | "id", 48 | "repo", 49 | "repo_path", 50 | "location", 51 | "sha256", 52 | "status", 53 | "error", 54 | "preprocessing_duration", 55 | "processing_duration", 56 | "export_duration", 57 | "files_extracted", 58 | "files_exported"}, 59 | []interface{}{ 60 | qHash, 61 | time.Unix(p.ImportedAt, 0), 62 | p.ID, 63 | p.Repo, 64 | p.RepoPath, 65 | p.RemoteSourcePath, 66 | p.Sha256, 67 | p.Status, 68 | p.Error, 69 | int64(p.PreprocessingDuration.Seconds()), 70 | int64(p.ProcessingDuration.Seconds()), 71 | int64(p.ExportDuration.Seconds()), 72 | p.SampleCount, 73 | p.ExportCount, 74 | })}) 75 | if err != nil { 76 | return fmt.Errorf("failed to insert data %v", err) 77 | } 78 | 79 | return nil 80 | } 81 | 82 | // FetchJobs fetches processing jobs from cloud spanner. 83 | func (s *Storage) FetchJobs(ctx context.Context) (map[string]string, error) { 84 | processed := make(map[string]string) 85 | iter := s.spannerClient.Single().Read(ctx, "jobs", 86 | spanner.AllKeys(), []string{"quick_sha256", "status"}) 87 | defer iter.Stop() 88 | for { 89 | row, err := iter.Next() 90 | if err == iterator.Done { 91 | break 92 | } 93 | if err != nil { 94 | return nil, err 95 | } 96 | var quickSha256, status string 97 | err = row.ColumnByName("quick_sha256", &quickSha256) 98 | if err != nil { 99 | return nil, err 100 | } 101 | err = row.ColumnByName("status", &status) 102 | if err != nil { 103 | return nil, err 104 | } 105 | processed[quickSha256] = status 106 | } 107 | return processed, nil 108 | } 109 | -------------------------------------------------------------------------------- /storage/postgres/postgres.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package postgres implements PostgreSQL as a hashR storage. 16 | package postgres 17 | 18 | import ( 19 | "context" 20 | "database/sql" 21 | "fmt" 22 | 23 | "github.com/google/hashr/core/hashr" 24 | 25 | // Blank import below is needed for the SQL driver. 26 | _ "github.com/lib/pq" 27 | ) 28 | 29 | // Storage allows to interact with PostgreSQL instance. 30 | type Storage struct { 31 | sqlDB *sql.DB 32 | } 33 | 34 | // NewStorage creates new Storage struct that allows to interact with PostgreSQL instance and all the necessary tables, if they don't exist. 35 | func NewStorage(sqlDB *sql.DB) (*Storage, error) { 36 | // Check if the "jobs" table exists. 37 | exists, err := tableExists(sqlDB, "jobs") 38 | if err != nil { 39 | return nil, fmt.Errorf("error while checking if jobs table exists: %v", err) 40 | } 41 | 42 | if !exists { 43 | sql := `CREATE TABLE jobs ( 44 | quick_sha256 VARCHAR(100) PRIMARY KEY, 45 | imported_at INT NOT NULL, 46 | id text, 47 | repo text, 48 | repo_path text, 49 | location text, 50 | sha256 VARCHAR(100), 51 | status VARCHAR(50), 52 | error text, 53 | preprocessing_duration INT, 54 | processing_duration INT, 55 | export_duration INT, 56 | files_extracted INT, 57 | files_exported INT 58 | )` 59 | _, err = sqlDB.Exec(sql) 60 | if err != nil { 61 | return nil, fmt.Errorf("error while creating jobs table: %v", err) 62 | } 63 | } 64 | 65 | return &Storage{sqlDB: sqlDB}, nil 66 | } 67 | 68 | func (s *Storage) rowExists(qHash string) (bool, error) { 69 | sqlStatement := `SELECT quick_sha256 FROM jobs WHERE quick_sha256=$1;` 70 | var quickSha256 string 71 | row := s.sqlDB.QueryRow(sqlStatement, qHash) 72 | switch err := row.Scan(&quickSha256); err { 73 | case sql.ErrNoRows: 74 | return false, nil 75 | case nil: 76 | return true, nil 77 | default: 78 | return false, err 79 | } 80 | } 81 | 82 | // UpdateJobs updates cloud spanner table. 83 | func (s *Storage) UpdateJobs(ctx context.Context, qHash string, p *hashr.ProcessingSource) error { 84 | exists, err := s.rowExists(qHash) 85 | if err != nil { 86 | return err 87 | } 88 | 89 | var sql string 90 | if exists { 91 | sql = ` 92 | UPDATE jobs SET imported_at = $2, id = $3, repo = $4, repo_path = $5, location = $6, sha256 = $7, status = $8, error = $9, preprocessing_duration = $10, processing_duration = $11, export_duration = $12, files_extracted = $13, files_exported = $14 93 | WHERE quick_sha256 = $1` 94 | } else { 95 | sql = ` 96 | INSERT INTO jobs (quick_sha256, imported_at, id, repo, repo_path, location, sha256, status, error, preprocessing_duration, processing_duration, export_duration, files_extracted, files_exported) 97 | VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)` 98 | } 99 | 100 | _, err = s.sqlDB.Exec(sql, qHash, p.ImportedAt, p.ID, p.Repo, p.RepoPath, p.RemoteSourcePath, p.Sha256, p.Status, p.Error, int(p.PreprocessingDuration.Seconds()), int(p.ProcessingDuration.Seconds()), int(p.ExportDuration.Seconds()), p.SampleCount, p.ExportCount) 101 | if err != nil { 102 | return err 103 | } 104 | return nil 105 | } 106 | 107 | // FetchJobs fetches processing jobs from cloud spanner. 108 | func (s *Storage) FetchJobs(ctx context.Context) (map[string]string, error) { 109 | processed := make(map[string]string) 110 | 111 | rows, err := s.sqlDB.Query("SELECT quick_sha256, status FROM jobs") 112 | if err != nil { 113 | return nil, err 114 | } 115 | defer rows.Close() 116 | for rows.Next() { 117 | var quickSha256, status string 118 | err = rows.Scan(&quickSha256, &status) 119 | if err != nil { 120 | return nil, err 121 | } 122 | processed[quickSha256] = status 123 | } 124 | err = rows.Err() 125 | if err != nil { 126 | return nil, err 127 | } 128 | 129 | return processed, nil 130 | } 131 | 132 | func tableExists(db *sql.DB, tableName string) (bool, error) { 133 | // Query to check if the table exists in PostgreSQL 134 | query := ` 135 | SELECT EXISTS ( 136 | SELECT 1 137 | FROM information_schema.tables 138 | WHERE table_name = $1 139 | ) 140 | ` 141 | 142 | var exists bool 143 | err := db.QueryRow(query, tableName).Scan(&exists) 144 | if err != nil { 145 | return false, err 146 | } 147 | 148 | return exists, nil 149 | } 150 | --------------------------------------------------------------------------------