├── .github
└── workflows
│ └── unit-tests.yml
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── cache
├── cache.go
├── cache_test.go
├── proto
│ ├── cache.pb.go
│ └── cache.proto
└── testdata
│ ├── hashes.json
│ └── hashr-cache-gLinux
├── client
├── client.go
├── cloudspanner
│ └── cloudspanner.go
└── postgres
│ └── postgres.go
├── common
└── common.go
├── core
└── hashr
│ ├── hashr.go
│ ├── hashr_test.go
│ └── testdata
│ ├── 20200106.00.00-ubuntu-desktop-export
│ ├── hashes.json
│ └── tmp
│ │ └── hashdb
│ │ └── 20200106.00.00-ubuntu-desktop
│ │ ├── file.01
│ │ ├── file.02
│ │ ├── file.03
│ │ ├── file.04
│ │ ├── file.05
│ │ ├── file.06
│ │ ├── file.07
│ │ ├── file.08
│ │ ├── file.09
│ │ └── file.10
│ └── 20200106.00.00-ubuntu-laptop-export
│ ├── hashes.json
│ └── tmp
│ └── hashr
│ └── 20200106.00.00-ubuntu-laptop
│ ├── file.01
│ ├── file.02
│ ├── file.03
│ ├── file.04
│ ├── file.05
│ ├── file.06
│ ├── file.07
│ ├── file.08
│ ├── file.09
│ └── file.10
├── docker
├── Dockerfile
├── README.md
└── cloudbuild.yaml
├── docs
└── assets
│ └── HashR.png
├── exporters
├── gcp
│ ├── gcp.go
│ ├── gcp_test.go
│ └── testdata
│ │ └── extraction
│ │ ├── file.01
│ │ ├── file.02
│ │ ├── file.03
│ │ └── file.04
└── postgres
│ ├── postgres.go
│ ├── postgres_test.go
│ └── testdata
│ └── extraction
│ ├── ._file.01
│ ├── ._file.02
│ ├── ._file.03
│ ├── ._file.04
│ ├── file.01
│ ├── file.02
│ ├── file.03
│ └── file.04
├── go.mod
├── go.sum
├── hashr.go
├── importers
├── aws
│ ├── aws.go
│ └── aws_test.go
├── common
│ ├── common.go
│ ├── common_test.go
│ └── testdata
│ │ └── targz
│ │ ├── dir1
│ │ ├── desktop.tar.gz
│ │ ├── laptop.tar.gz
│ │ └── server.tar.gz
│ │ ├── dir2
│ │ ├── desktop.tar.gz
│ │ ├── laptop.tar.gz
│ │ └── server.tar.gz
│ │ ├── dir3
│ │ ├── desktop.tar.gz
│ │ ├── laptop.tar.gz
│ │ └── server.tar.gz
│ │ └── dir4
│ │ ├── desktop.tar.gz
│ │ ├── laptop.tar.gz
│ │ └── server.tar.gz
├── deb
│ ├── deb.go
│ ├── deb_test.go
│ ├── generate_tests.sh
│ └── testdata
│ │ ├── 20200106.00.00
│ │ ├── ubuntu-desktop.deb
│ │ ├── ubuntu-laptop.deb
│ │ └── ubuntu-server.deb
│ │ ├── 20200107.00.00
│ │ ├── ubuntu-desktop.deb
│ │ ├── ubuntu-laptop.deb
│ │ └── ubuntu-server.deb
│ │ ├── 20200107.01.00
│ │ ├── ubuntu-desktop.deb
│ │ ├── ubuntu-laptop.deb
│ │ └── ubuntu-server.deb
│ │ └── 20200108.00.00
│ │ ├── ubuntu-desktop.deb
│ │ ├── ubuntu-laptop.deb
│ │ └── ubuntu-server.deb
├── gcp
│ ├── gcp.go
│ ├── gcp_test.go
│ └── testdata
│ │ ├── ._ubuntu-1804-lts-drawfork-v20190613.tar.gz
│ │ └── ubuntu-1804-lts-drawfork-v20190613.tar.gz
├── gcr
│ ├── gcr.go
│ └── gcr_test.go
├── importer.go.example
├── iso9660
│ ├── generate_tests.sh
│ ├── iso9660.go
│ ├── iso9660_test.go
│ └── testdata
│ │ ├── 20200106.00.00
│ │ ├── ubuntu-desktop.iso
│ │ ├── ubuntu-laptop.iso
│ │ └── ubuntu-server.iso
│ │ ├── 20200107.00.00
│ │ ├── ubuntu-desktop.iso
│ │ ├── ubuntu-laptop.iso
│ │ └── ubuntu-server.iso
│ │ ├── 20200107.01.00
│ │ ├── ubuntu-desktop.iso
│ │ ├── ubuntu-laptop.iso
│ │ └── ubuntu-server.iso
│ │ └── 20200108.00.00
│ │ ├── ubuntu-desktop.iso
│ │ ├── ubuntu-laptop.iso
│ │ └── ubuntu-server.iso
├── rpm
│ ├── generate_tests.sh
│ ├── rpm.go
│ ├── rpm_test.go
│ └── testdata
│ │ ├── 20200106.00.00
│ │ ├── ubuntu-desktop.rpm
│ │ ├── ubuntu-laptop.rpm
│ │ └── ubuntu-server.rpm
│ │ ├── 20200107.00.00
│ │ ├── ubuntu-desktop.rpm
│ │ ├── ubuntu-laptop.rpm
│ │ └── ubuntu-server.rpm
│ │ ├── 20200107.01.00
│ │ ├── ubuntu-desktop.rpm
│ │ ├── ubuntu-laptop.rpm
│ │ └── ubuntu-server.rpm
│ │ └── 20200108.00.00
│ │ ├── ubuntu-desktop.rpm
│ │ ├── ubuntu-laptop.rpm
│ │ └── ubuntu-server.rpm
├── targz
│ ├── targz.go
│ ├── targz_test.go
│ └── testdata
│ │ ├── 20200106.00.00
│ │ ├── ubuntu-desktop.tar.gz
│ │ ├── ubuntu-laptop.tar.gz
│ │ └── ubuntu-server.tar.gz
│ │ ├── 20200107.00.00
│ │ ├── ubuntu-desktop.tar.gz
│ │ ├── ubuntu-laptop.tar.gz
│ │ └── ubuntu-server.tar.gz
│ │ ├── 20200107.01.00
│ │ ├── ubuntu-desktop.tar.gz
│ │ ├── ubuntu-laptop.tar.gz
│ │ └── ubuntu-server.tar.gz
│ │ └── 20200108.00.00
│ │ ├── ubuntu-desktop.tar.gz
│ │ ├── ubuntu-laptop.tar.gz
│ │ └── ubuntu-server.tar.gz
├── windows
│ └── windows.go
├── wsus
│ ├── testdata
│ │ ├── ._03E86F3A0947C8A5183AD0C66A48782FA216BEFF.cab
│ │ ├── ._138ECA2DEB45E284DC0BB94CC8849D1933B072FF.cab
│ │ ├── ._1BDBDA1C53B6C980DD440B93646D8021CC90F1FF.cab
│ │ ├── ._1F35F72D34C16FF7D7270D60472D8AD9FF9D7EFF.cab
│ │ ├── 03E86F3A0947C8A5183AD0C66A48782FA216BEFF.cab
│ │ ├── 138ECA2DEB45E284DC0BB94CC8849D1933B072FF.cab
│ │ ├── 1BDBDA1C53B6C980DD440B93646D8021CC90F1FF.cab
│ │ └── 1F35F72D34C16FF7D7270D60472D8AD9FF9D7EFF.cab
│ ├── wsus.go
│ └── wsus_test.go
└── zip
│ ├── generate_tests.sh
│ ├── testdata
│ ├── 20200106.00.00
│ │ ├── ubuntu-desktop.jar
│ │ ├── ubuntu-laptop.whl
│ │ └── ubuntu-server.egg
│ ├── 20200107.00.00
│ │ ├── ubuntu-desktop.zip
│ │ ├── ubuntu-laptop.zip
│ │ └── ubuntu-server.zip
│ ├── 20200107.01.00
│ │ ├── ubuntu-desktop.zip
│ │ ├── ubuntu-laptop.zip
│ │ └── ubuntu-server.zip
│ └── 20200108.00.00
│ │ ├── ubuntu-desktop.zip
│ │ ├── ubuntu-laptop.zip
│ │ └── ubuntu-server.zip
│ ├── zip.go
│ └── zip_test.go
├── processors
└── local
│ ├── local.go
│ ├── local_test.go
│ └── testdata
│ ├── ._disk_2_xfs_volumes.raw
│ └── disk_2_xfs_volumes.raw
├── scripts
├── CreateCloudSpannerExporterTables.ddl
├── CreateJobsTable.ddl
├── CreateJobsTable.sql
├── CreatePostgresExporterTables.sql
├── aws
│ ├── AwsHashrUploaderPolicy.json
│ ├── AwsHashrWorkerPolicy.json
│ ├── hashr_aws_init.txt
│ └── hashr_setup.sh
└── hashr-archive
└── storage
├── cloudspanner
└── cloudspanner.go
└── postgres
└── postgres.go
/.github/workflows/unit-tests.yml:
--------------------------------------------------------------------------------
1 | name: unit-tests
2 |
3 | on:
4 | pull_request:
5 | types: [opened, synchronize, reopened]
6 |
7 | jobs:
8 | BuildTest:
9 |
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@v3
13 |
14 | - name: Set up Go
15 | uses: actions/setup-go@v3
16 | with:
17 | go-version: 1.19
18 |
19 | - name: Install golint
20 | run: go install golang.org/x/lint/golint@latest
21 |
22 | - name: Run Spanner emulator
23 | run: docker run -d -p 9010:9010 -p 9020:9020 gcr.io/cloud-spanner-emulator/emulator
24 |
25 | - name: Run golint
26 | run: golint ./...
27 |
28 | - name: Build
29 | run: go build -v ./...
30 |
31 | - name: Test
32 | run: go test -timeout 2m -v ./...
33 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | hashr
2 |
3 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | We'd love to accept your patches and contributions to this project. There are
4 | just a few small guidelines you need to follow.
5 |
6 | ## Contributor License Agreement
7 |
8 | Contributions to this project must be accompanied by a Contributor License
9 | Agreement (CLA). You (or your employer) retain the copyright to your
10 | contribution; this simply gives us permission to use and redistribute your
11 | contributions as part of the project. Head over to
12 | to see your current agreements on file or
13 | to sign a new one.
14 |
15 | You generally only need to submit a CLA once, so if you've already submitted one
16 | (even if it was for a different project), you probably don't need to do it
17 | again.
18 |
19 | ## Code Reviews
20 |
21 | All submissions, including submissions by project members, require review. We
22 | use GitHub pull requests for this purpose. Consult
23 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
24 | information on using pull requests.
25 |
26 | ## Community Guidelines
27 |
28 | This project follows
29 | [Google's Open Source Community Guidelines](https://opensource.google/conduct/).
--------------------------------------------------------------------------------
/cache/cache.go:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // https://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | // Package cache provides functions that are used to interact with local cache.
16 | package cache
17 |
18 | import (
19 | "encoding/json"
20 | "fmt"
21 | "io/ioutil"
22 | "os"
23 | "path/filepath"
24 | "sync"
25 |
26 | "github.com/golang/glog"
27 | "github.com/google/hashr/common"
28 | "google.golang.org/protobuf/proto"
29 | "google.golang.org/protobuf/types/known/timestamppb"
30 |
31 | cpb "github.com/google/hashr/cache/proto"
32 | )
33 |
34 | func readJSON(extraction *common.Extraction) ([]common.Sample, error) {
35 | pathJSON := filepath.Join(extraction.Path, "hashes.json")
36 | var samples []common.Sample
37 |
38 | data, err := ioutil.ReadFile(pathJSON)
39 | if err != nil {
40 | return nil, fmt.Errorf("error while reading hashes.json file: %v", err)
41 | }
42 |
43 | err = json.Unmarshal(data, &samples)
44 | if err != nil {
45 | return nil, fmt.Errorf("error unmarshalling hashes.json file: %v", err)
46 | }
47 |
48 | for _, sample := range samples {
49 | for i := range sample.Paths {
50 | sample.Paths[i] = filepath.Join(extraction.Path, sample.Paths[i])
51 | }
52 | }
53 |
54 | return samples, nil
55 | }
56 |
57 | // Save saves the cache to a local file.
58 | func Save(repoName, cacheDir string, cacheMap *sync.Map) error {
59 | // TODO(mlegin): Compress the file before saving it to disk.
60 | cachePath := filepath.Join(cacheDir, fmt.Sprintf("hashr-cache-%s", repoName))
61 |
62 | cache := &cpb.Cache{Samples: make(map[string]*cpb.Entries)}
63 | cacheMap.Range(func(key, value interface{}) bool {
64 | hash, ok := key.(string)
65 | if !ok {
66 | glog.Exitf("Unexpected key type in cache map: %v", key)
67 | }
68 |
69 | entries, ok := value.(*cpb.Entries)
70 | if !ok {
71 | glog.Exitf("Unexpected value type in cache map: %v", key)
72 | }
73 |
74 | cache.Samples[hash] = entries
75 |
76 | return true
77 | })
78 |
79 | data, err := proto.Marshal(cache)
80 | if err != nil {
81 | return fmt.Errorf("error marshalling %s repo cache: %v", repoName, err)
82 | }
83 |
84 | cacheFile, err := os.Create(cachePath)
85 | if err != nil {
86 | return fmt.Errorf("error opening %s repo cache file for write: %v", repoName, err)
87 | }
88 |
89 | _, err = cacheFile.Write(data)
90 | if err != nil {
91 | return fmt.Errorf("error writing to %s repo cache file: %v", repoName, err)
92 | }
93 | glog.Infof("Successfully saved %s repo cache to %s.", repoName, cachePath)
94 |
95 | return nil
96 | }
97 |
98 | // Load reads cache entries from a file stored locally. If the file is not present, the cache is
99 | // created in memory.
100 | func Load(repoName, cacheDir string) (*sync.Map, error) {
101 | var cacheMap sync.Map
102 | cachePath := filepath.Join(cacheDir, fmt.Sprintf("hashr-cache-%s", repoName))
103 | if _, err := os.Stat(cachePath); os.IsNotExist(err) {
104 | glog.Infof("Cache for %s repo not found at %s. Creating new cache in memory.", repoName, cachePath)
105 | return &cacheMap, nil
106 | }
107 |
108 | data, err := ioutil.ReadFile(cachePath)
109 | if err != nil {
110 | // If there is an error while reading the file it might be corrupted.
111 | if err := os.Remove(cachePath); err != nil {
112 | return nil, fmt.Errorf("error while trying to delete the %s repo cache file: %v", repoName, err)
113 | }
114 | return &cacheMap, nil
115 | }
116 |
117 | cache := &cpb.Cache{}
118 | if err := proto.Unmarshal(data, cache); err != nil {
119 | // If there is an error while unmarshalling the file it might be corrupted.
120 | if err := os.Remove(cachePath); err != nil {
121 | return nil, fmt.Errorf("error while trying to delete the %s repo cache file: %v", repoName, err)
122 | }
123 | return &cacheMap, nil
124 | }
125 | glog.Infof("Successfully loaded cache for %s repo from %s.", repoName, cachePath)
126 |
127 | for k, v := range cache.Samples {
128 | cacheMap.Store(k, v)
129 | }
130 |
131 | return &cacheMap, nil
132 | }
133 |
134 | // Check checks if files present in a given extraction are already in the local cache.
135 | func Check(extraction *common.Extraction, cache *sync.Map) ([]common.Sample, error) {
136 | samples, err := readJSON(extraction)
137 | if err != nil {
138 | return nil, fmt.Errorf("error while reading hashes.json file: %v", err)
139 | }
140 |
141 | var exports []common.Sample
142 | for _, sample := range samples {
143 | newCacheEntry := &cpb.CacheEntry{
144 | SourceId: extraction.SourceID,
145 | SourceHash: extraction.SourceSHA256,
146 | }
147 | newExport := common.Sample{
148 | Sha256: sample.Sha256,
149 | Paths: sample.Paths,
150 | }
151 |
152 | if sampleCache, ok := cache.Load(sample.Sha256); ok {
153 | // If the sample is already in the cache, add a new entry.
154 | sampleCache.(*cpb.Entries).Entries = append(sampleCache.(*cpb.Entries).Entries, newCacheEntry)
155 | sampleCache.(*cpb.Entries).LastUpdated = timestamppb.Now()
156 | } else {
157 | // Add a new sample to the cache.
158 | cache.Store(sample.Sha256, &cpb.Entries{
159 | LastUpdated: timestamppb.Now(),
160 | Entries: []*cpb.CacheEntry{newCacheEntry},
161 | })
162 | newExport.Upload = true
163 | }
164 |
165 | exports = append(exports, newExport)
166 | }
167 |
168 | return exports, nil
169 | }
170 |
--------------------------------------------------------------------------------
/cache/proto/cache.proto:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // https://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | syntax = "proto3";
16 |
17 | package cachepb;
18 |
19 | import "google/protobuf/timestamp.proto";
20 |
21 | option go_package = "github.com/google/hashr/cache/proto/cachepb";
22 |
23 | message CacheEntry {
24 | string source_id = 1;
25 | string source_hash = 2;
26 | repeated string path = 3;
27 | }
28 |
29 | message Entries {
30 | google.protobuf.Timestamp last_updated = 1;
31 | repeated CacheEntry entries = 2;
32 | }
33 |
34 | message Cache {
35 | map samples = 1;
36 | }
--------------------------------------------------------------------------------
/cache/testdata/hashes.json:
--------------------------------------------------------------------------------
1 | [{"sha256": "d5d66fe6a4559c59ad103ab40e01c4fc0df7eb8ba901d50e5ceae3909b2e0d61", "paths": ["/gLinuxTestRepo/20200227.00.00/export/file.09"]}, {"sha256": "4878dd6c7af7fecdf89832384d84ed93b78123e69e6a0097efac5320da2ac637", "paths": ["/gLinuxTestRepo/20200227.00.00/export/file.02"]}, {"sha256": "ca8a605cf72b21b89f9211af1550d7f943a2b844084241f60eddd9d6536c78ec", "paths": ["/gLinuxTestRepo/20200227.00.00/export/file.10"]}, {"sha256": "4741b2746859cbe24f529a4f3108c2d8b4ea5f442f8a3743ff3543c76f369c90", "paths": ["/gLinuxTestRepo/20200227.00.00/export/file.01"]}, {"sha256": "d889bcc21cffc076d6e9cf7e32d0dd801977141e6f71d4c96ae84e5f1765e71a", "paths": ["/gLinuxTestRepo/20200227.00.00/export/file.07"]}, {"sha256": "00632850049f80763ada81ec0cacf015dbd67fb1b956ec2acb8aa862e511b3bc", "paths": ["/gLinuxTestRepo/20200227.00.00/export/file.04"]}, {"sha256": "b1f8a81821e18bba696a52b5169524076f77bc588c02ab195f969df4e2650dce", "paths": ["/gLinuxTestRepo/20200227.00.00/export/file.03"]}, {"sha256": "8780622e75a9c1be4b30ae9e15d6d94249926aaa9139b7a563e42ee0eab70eea", "paths": ["/gLinuxTestRepo/20200227.00.00/export/file.05"]}, {"sha256": "99962d9e62c15c73527ca72b4e5e85809d4254326800eb2c65b35339029e02d1", "paths": ["/gLinuxTestRepo/20200227.00.00/export/file.06"]}, {"sha256": "e0a98ad618a3cef7f8754a2711322e398879f47e50ca491c75eca6ba476e421a", "paths": ["/gLinuxTestRepo/20200227.00.00/export/file.08"]}]
2 |
--------------------------------------------------------------------------------
/cache/testdata/hashr-cache-gLinux:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/cache/testdata/hashr-cache-gLinux
--------------------------------------------------------------------------------
/client/client.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "context"
5 | "database/sql"
6 | "encoding/json"
7 | "flag"
8 | "fmt"
9 |
10 | "cloud.google.com/go/spanner"
11 | "github.com/google/hashr/client/cloudspanner"
12 | "github.com/google/hashr/client/postgres"
13 | _ "github.com/lib/pq"
14 |
15 | "github.com/golang/glog"
16 | )
17 |
18 | var (
19 | hashStorage = flag.String("hashStorage", "", "Storage used for computed hashes, can have one of the two values: postgres, cloudspanner")
20 | spannerDBPath = flag.String("spanner_db_path", "", "Path to spanner DB.")
21 |
22 | // Postgres DB flags
23 | postgresHost = flag.String("postgres_host", "localhost", "PostgreSQL instance address.")
24 | postgresPort = flag.Int("postgres_port", 5432, "PostgresSQL instance port.")
25 | postgresUser = flag.String("postgres_user", "hashr", "PostgresSQL user.")
26 | postgresPassword = flag.String("postgres_password", "hashr", "PostgresSQL password.")
27 | postgresDBName = flag.String("postgres_db", "hashr", "PostgresSQL database.")
28 | )
29 |
30 | // Storage represents storage that is used to store data about processed sources.
31 | type Storage interface {
32 | GetSamples(ctx context.Context) (map[string]map[string]string, error)
33 | }
34 |
35 | func main() {
36 | ctx := context.Background()
37 | flag.Parse()
38 |
39 | var storage Storage
40 | switch *hashStorage {
41 | case "postgres":
42 | psqlInfo := fmt.Sprintf("host=%s port=%d user=%s password=%s dbname=%s sslmode=disable",
43 | *postgresHost, *postgresPort, *postgresUser, *postgresPassword, *postgresDBName)
44 |
45 | db, err := sql.Open("postgres", psqlInfo)
46 | if err != nil {
47 | glog.Exitf("Error initializing Postgres client: %v", err)
48 | }
49 | defer db.Close()
50 |
51 | storage, err = postgres.NewStorage(db)
52 | if err != nil {
53 | glog.Exitf("Error initializing Postgres storage: %v", err)
54 | }
55 | case "cloudspanner":
56 | spannerClient, err := spanner.NewClient(ctx, *spannerDBPath)
57 | if err != nil {
58 | glog.Exitf("Error initializing Spanner client: %v", err)
59 | }
60 |
61 | storage, err = cloudspanner.NewStorage(ctx, spannerClient)
62 | if err != nil {
63 | glog.Exitf("Error initializing Postgres storage: %v", err)
64 | }
65 | default:
66 | glog.Exit("hashStorage flag needs to have one of the two values: postgres, cloudspanner")
67 |
68 | }
69 | samples, err := storage.GetSamples(ctx)
70 | if err != nil {
71 | glog.Exitf("Error retriving samples: %v", err)
72 | }
73 |
74 | jsonData, err := json.Marshal(samples)
75 | if err != nil {
76 | fmt.Println("Error:", err)
77 | return
78 | }
79 |
80 | fmt.Println(string(jsonData))
81 | }
82 |
--------------------------------------------------------------------------------
/client/cloudspanner/cloudspanner.go:
--------------------------------------------------------------------------------
1 | package cloudspanner
2 |
3 | import (
4 | "context"
5 | "strconv"
6 |
7 | "cloud.google.com/go/spanner"
8 |
9 | "google.golang.org/api/iterator"
10 | )
11 |
12 | // Storage allows to interact with cloud spanner.
13 | type Storage struct {
14 | spannerClient *spanner.Client
15 | }
16 |
17 | // NewStorage creates new Storage struct that allows to interact with cloud spanner.
18 | func NewStorage(ctx context.Context, spannerClient *spanner.Client) (*Storage, error) {
19 | return &Storage{spannerClient: spannerClient}, nil
20 | }
21 |
22 | // GetSamples fetches processing samples from cloud spanner.
23 | func (s *Storage) GetSamples(ctx context.Context) (map[string]map[string]string, error) {
24 | samples := make(map[string]map[string]string)
25 | iter := s.spannerClient.Single().Read(ctx, "samples",
26 | spanner.AllKeys(), []string{"sha256", "mimetype", "file_output", "size"})
27 | defer iter.Stop()
28 | for {
29 | row, err := iter.Next()
30 | if err == iterator.Done {
31 | break
32 | }
33 | if err != nil {
34 | return nil, err
35 | }
36 | var sha256, mimetype, fileOutput string
37 | var size int64
38 | err = row.ColumnByName("sha256", &sha256)
39 | if err != nil {
40 | return nil, err
41 | }
42 | err = row.ColumnByName("mimetype", &mimetype)
43 | if err != nil {
44 | return nil, err
45 | }
46 | err = row.ColumnByName("file_output", &fileOutput)
47 | if err != nil {
48 | return nil, err
49 | }
50 | err = row.ColumnByName("size", &size)
51 | if err != nil {
52 | return nil, err
53 | }
54 | samples[sha256] = make(map[string]string)
55 |
56 | // Assign values to the nested map
57 | samples[sha256]["sha256"] = sha256
58 | samples[sha256]["mimetype"] = mimetype
59 | samples[sha256]["file_output"] = fileOutput
60 | samples[sha256]["size"] = strconv.FormatInt(size, 10)
61 |
62 | }
63 | return samples, nil
64 | }
65 |
--------------------------------------------------------------------------------
/client/postgres/postgres.go:
--------------------------------------------------------------------------------
1 | // Package postgres implements PostgreSQL as a hashR storage.
2 | package postgres
3 |
4 | import (
5 | "context"
6 | "database/sql"
7 | "fmt"
8 |
9 | // Blank import below is needed for the SQL driver.
10 | _ "github.com/lib/pq"
11 | )
12 |
13 | // Storage allows to interact with PostgreSQL instance.
14 | type Storage struct {
15 | sqlDB *sql.DB
16 | }
17 |
18 | // NewStorage creates new Storage struct that allows to interact with PostgreSQL instance and all the necessary tables, if they don't exist.
19 | func NewStorage(sqlDB *sql.DB) (*Storage, error) {
20 | return &Storage{sqlDB: sqlDB}, nil
21 | }
22 |
23 | // GetSamples fetches processed samples from postgres.
24 | func (s *Storage) GetSamples(ctx context.Context) (map[string]map[string]string, error) {
25 | exists, err := tableExists(s.sqlDB, "samples")
26 | if err != nil {
27 | return nil, err
28 | }
29 |
30 | samples := make(map[string]map[string]string)
31 |
32 | if exists {
33 | var sql = `SELECT * FROM samples;`
34 |
35 | rows, err := s.sqlDB.Query(sql)
36 |
37 | if err != nil {
38 | return nil, err
39 | }
40 |
41 | defer rows.Close()
42 |
43 | for rows.Next() {
44 | var sha256, mimetype, fileOutput, size string
45 | err := rows.Scan(&sha256, &mimetype, &fileOutput, &size)
46 | if err != nil {
47 | return nil, err
48 | }
49 |
50 | samples[sha256] = make(map[string]string)
51 |
52 | // Assign values to the nested map
53 | samples[sha256]["sha256"] = sha256
54 | samples[sha256]["mimetype"] = mimetype
55 | samples[sha256]["file_output"] = fileOutput
56 | samples[sha256]["size"] = size
57 | }
58 |
59 | } else {
60 | return nil, fmt.Errorf("table samples does not exist")
61 | }
62 |
63 | return samples, nil
64 | }
65 |
66 | func tableExists(db *sql.DB, tableName string) (bool, error) {
67 | // Query to check if the table exists in PostgreSQL
68 | query := `
69 | SELECT EXISTS (
70 | SELECT 1
71 | FROM information_schema.tables
72 | WHERE table_name = $1
73 | )
74 | `
75 |
76 | var exists bool
77 | err := db.QueryRow(query, tableName).Scan(&exists)
78 | if err != nil {
79 | return false, err
80 | }
81 |
82 | return exists, nil
83 | }
84 |
--------------------------------------------------------------------------------
/common/common.go:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // https://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | // Package common provides common data structures used in hashR.
16 | package common
17 |
18 | // Sample represent single file extracted from a given source.
19 | type Sample struct {
20 | Sha256 string `json:"sha256"`
21 | Paths []string `json:"paths"`
22 | Upload bool `json:"Upload"`
23 | }
24 |
25 | // Extraction contains information about image_export.py extraction.
26 | type Extraction struct {
27 | SourceID string
28 | RepoName string
29 | BaseDir string
30 | Path string
31 | SourceSHA256 string
32 | }
33 |
--------------------------------------------------------------------------------
/core/hashr/hashr_test.go:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // https://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package hashr
16 |
17 | import (
18 | "context"
19 | "fmt"
20 | "os"
21 | "testing"
22 |
23 | "github.com/golang/glog"
24 |
25 | "github.com/google/hashr/common"
26 |
27 | "cloud.google.com/go/spanner"
28 | "google.golang.org/api/option"
29 | "google.golang.org/grpc"
30 | "google.golang.org/grpc/credentials/insecure"
31 |
32 | database "cloud.google.com/go/spanner/admin/database/apiv1"
33 | instance "cloud.google.com/go/spanner/admin/instance/apiv1"
34 |
35 | dbadminpb "google.golang.org/genproto/googleapis/spanner/admin/database/v1"
36 | instancepb "google.golang.org/genproto/googleapis/spanner/admin/instance/v1"
37 | )
38 |
39 | const (
40 | dbSchema = `CREATE TABLE jobs (
41 | imported_at TIMESTAMP NOT NULL,
42 | id STRING(500),
43 | repo STRING(200),
44 | repo_path STRING(500),
45 | quick_sha256 STRING(100) NOT NULL,
46 | location STRING(1000),
47 | sha256 STRING(100),
48 | status STRING(50),
49 | error STRING(10000),
50 | preprocessing_duration INT64,
51 | processing_duration INT64,
52 | export_duration INT64,
53 | files_extracted INT64,
54 | files_exported INT64,
55 | ) PRIMARY KEY(quick_sha256)`
56 | )
57 |
58 | type testImporter struct {
59 | }
60 |
61 | func (i *testImporter) RepoName() string {
62 | return "ubuntu"
63 | }
64 |
65 | func (i *testImporter) RepoPath() string {
66 | return "ubuntu"
67 | }
68 |
69 | func (i *testImporter) DiscoverRepo() ([]Source, error) {
70 | sources := []Source{
71 | &testSource{id: "001", localPath: "/tmp/001", quickSha256hash: "7a3e6b16cb75f48fb897eff3ae732f3154f6d203b53f33660f01b4c3b6bc2df9", repoPath: "/tmp/"},
72 | &testSource{id: "002", localPath: "/tmp/002", quickSha256hash: "a1dd6837f284625bdb1cb68f1dbc85c5dc4d8b05bae24c94ed5f55c477326ea2", repoPath: "/tmp/"},
73 | }
74 |
75 | for _, source := range sources {
76 | file, err := os.Create(source.RemotePath())
77 | if err != nil {
78 | return nil, fmt.Errorf("could not create dummy sources: %v", err)
79 | }
80 | file.Close()
81 | }
82 |
83 | return sources, nil
84 | }
85 |
86 | type testSource struct {
87 | id string
88 | localPath string
89 | quickSha256hash string
90 | repoPath string
91 | }
92 |
93 | func (s *testSource) Preprocess() (string, error) {
94 | return "", nil
95 | }
96 | func (s *testSource) QuickSHA256Hash() (string, error) {
97 | return s.quickSha256hash, nil
98 | }
99 | func (s *testSource) RemotePath() string {
100 | return s.localPath
101 | }
102 | func (s *testSource) ID() string {
103 | return s.id
104 | }
105 | func (s *testSource) RepoName() string {
106 | return "ubuntu"
107 | }
108 | func (s *testSource) RepoPath() string {
109 | return "ubuntu"
110 | }
111 | func (s *testSource) Local() bool {
112 | return false
113 | }
114 | func (s *testSource) LocalPath() string {
115 | return s.localPath
116 | }
117 | func (s *testSource) Description() string {
118 | return ""
119 | }
120 |
121 | type testProcessor struct {
122 | }
123 |
124 | func (p *testProcessor) ImageExport(sourcePath string) (string, error) {
125 | return "testdata/20200106.00.00-ubuntu-laptop-export", nil
126 | }
127 |
128 | type testExporter struct {
129 | }
130 |
131 | func (e *testExporter) Export(ctx context.Context, repoName, repoPath, sourceID, sourceHash, sourcePath, sourceDescription string, samples []common.Sample) error {
132 | return nil
133 | }
134 |
135 | func (e *testExporter) Name() string {
136 | return "testExporter"
137 | }
138 |
139 | // TestRun requires Spanner emulator to be running: https://cloud.google.com/spanner/docs/emulator.
140 | func TestRun(t *testing.T) {
141 | for _, tc := range []struct {
142 | export bool
143 | exportPath string
144 | exportWorkerCount int
145 | processingWorkerCount int
146 | purgeJobsFile bool
147 | }{
148 | {
149 | export: false,
150 | exportPath: "/tmp/hashr-export",
151 | exportWorkerCount: 100,
152 | processingWorkerCount: 1,
153 | },
154 | {
155 | export: true,
156 | exportWorkerCount: 100,
157 | processingWorkerCount: 1,
158 | purgeJobsFile: true,
159 | },
160 | {
161 | export: false,
162 | exportPath: "/tmp/hashr-export",
163 | processingWorkerCount: 1,
164 | },
165 | {
166 | export: false,
167 | exportPath: "/tmp/hashr-export",
168 | processingWorkerCount: 1,
169 | },
170 | } {
171 | ctx := context.Background()
172 |
173 | o := []option.ClientOption{
174 | option.WithEndpoint("localhost:9010"),
175 | option.WithoutAuthentication(),
176 | option.WithGRPCDialOption(grpc.WithTransportCredentials(insecure.NewCredentials())),
177 | }
178 |
179 | instanceAdmin, err := instance.NewInstanceAdminClient(ctx, o...)
180 | if err != nil {
181 | glog.Fatalf("error dialing instance admin: %v", err)
182 | }
183 | defer instanceAdmin.Close()
184 |
185 | if err := instanceAdmin.DeleteInstance(ctx, &instancepb.DeleteInstanceRequest{Name: "projects/hashr/instances/hashr"}); err != nil {
186 | glog.Warning(err)
187 | }
188 |
189 | op, err := instanceAdmin.CreateInstance(ctx, &instancepb.CreateInstanceRequest{
190 | Parent: "projects/hashr",
191 | InstanceId: "hashr",
192 | Instance: &instancepb.Instance{
193 | DisplayName: "hashr",
194 | NodeCount: 1,
195 | },
196 | })
197 | if err != nil {
198 | glog.Fatalf("error creating test spanner instance: %v", err)
199 | }
200 | if _, err := op.Wait(ctx); err != nil {
201 | glog.Fatalf("error creating test spanner instance: %v", err)
202 | }
203 |
204 | databaseAdmin, err := database.NewDatabaseAdminClient(ctx, o...)
205 | if err != nil {
206 | glog.Fatalf("error creating database admin client for emulator: %v", err)
207 | }
208 |
209 | dbURI := "projects/hashr/instances/hashr/databases/hashr"
210 | op2, err := databaseAdmin.CreateDatabase(ctx, &dbadminpb.CreateDatabaseRequest{
211 | Parent: "projects/hashr/instances/hashr",
212 | CreateStatement: "CREATE DATABASE hashr",
213 | ExtraStatements: []string{dbSchema},
214 | })
215 | if err != nil {
216 | glog.Fatalf("error creating test DB %v: %v", dbURI, err)
217 | }
218 | if _, err = op2.Wait(ctx); err != nil {
219 | glog.Fatalf("error creating test DB %v: %v", dbURI, err)
220 | }
221 |
222 | spannerStorage, err := newStorage(ctx, dbURI, o...)
223 | if err != nil {
224 | glog.Fatalf("error creating test spanner client: %v", err)
225 | }
226 |
227 | hdb := New([]Importer{&testImporter{}}, &testProcessor{}, []Exporter{&testExporter{}}, spannerStorage)
228 | hdb.CacheDir = "/tmp/"
229 | hdb.Export = tc.export
230 | hdb.ExportPath = tc.exportPath
231 | hdb.ExportWorkerCount = tc.exportWorkerCount
232 | hdb.ProcessingWorkerCount = tc.processingWorkerCount
233 |
234 | // This is a simple test to check the full processing logic with different number of workers.
235 | // The test should fail on any error.
236 | // TODO(mlegin): Add a test to check the telemetry stats of the whole Run.
237 | for i := 1; i <= tc.processingWorkerCount; i++ {
238 | hdb.ProcessingWorkerCount = i
239 | if err := hdb.Run(context.Background()); err != nil {
240 | t.Errorf("Unexpected error while running hashR: %v", err)
241 | }
242 | }
243 | }
244 | }
245 |
246 | // Storage allows to interact with cloud spanner.
247 | type fakeStorage struct {
248 | spannerClient *spanner.Client
249 | }
250 |
251 | // NewStorage creates new Storage struct that allows to interact with cloud spanner.
252 | func newStorage(ctx context.Context, spannerDBPath string, opts ...option.ClientOption) (*fakeStorage, error) {
253 | spannerClient, err := spanner.NewClient(ctx, spannerDBPath, opts...)
254 | if err != nil {
255 | return nil, err
256 | }
257 |
258 | return &fakeStorage{spannerClient: spannerClient}, nil
259 | }
260 |
261 | // UpdateJobs updates cloud spanner table.
262 | func (s *fakeStorage) UpdateJobs(ctx context.Context, qHash string, p *ProcessingSource) error {
263 | return nil
264 | }
265 |
266 | // FetchJobs fetches processing jobs from cloud spanner.
267 | func (s *fakeStorage) FetchJobs(ctx context.Context) (map[string]string, error) {
268 | return make(map[string]string), nil
269 | }
270 |
--------------------------------------------------------------------------------
/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/hashes.json:
--------------------------------------------------------------------------------
1 | [{"sha256": "952b39dff291f84b330206a1131c06592c8055800071fc84888c9a3052e51543", "paths": ["tmp/hashr/20200106.00.00-ubuntu-desktop/file.09"]}, {"sha256": "a74bb803c7ff5bd875867fc3f4ceabb6fbe888eea6361b876111cb8060fe7e8c", "paths": ["tmp/hashr/20200106.00.00-ubuntu-desktop/file.02"]}, {"sha256": "79f5431b2eecae25c0b29ad8e5d8642d0575b015ed8008ff277dd2308cbdd173", "paths": ["tmp/hashr/20200106.00.00-ubuntu-desktop/file.10"]}, {"sha256": "c2e7f7d23b30766c2d55e847b349d0540f4847b263ee15521dc72023846884ea", "paths": ["tmp/hashr/20200106.00.00-ubuntu-desktop/file.01"]}, {"sha256": "2cbbbd2fa4045f092ed359cd6632e01e1e45006681949aa98cee7aa0edc6f771", "paths": ["tmp/hashr/20200106.00.00-ubuntu-desktop/file.07"]}, {"sha256": "efa02f852f81f973f2c10784bc5194de1d09f3e905ea296b22090ff3379ed6c1", "paths": ["tmp/hashr/20200106.00.00-ubuntu-desktop/file.04"]}, {"sha256": "2789f4b90b038d57e592d01e0cd13a98b398cc7a524c3e8a7faaaaaf59893e7d", "paths": ["tmp/hashr/20200106.00.00-ubuntu-desktop/file.03"]}, {"sha256": "ddf7c381937d07c67e509f18feec42a939bddf2ea7db985a4b045d583c95ec04", "paths": ["tmp/hashr/20200106.00.00-ubuntu-desktop/file.05"]}, {"sha256": "2fd1880876ca7640d04fae51fa988fe40505ab15f0f1a05ca6b0b5f09985c82a", "paths": ["tmp/hashr/20200106.00.00-ubuntu-desktop/file.06"]}, {"sha256": "8ab37107e0ed8d084afaf252fcdb2e66b99812ab12864b1cd12dfd5a44b25e5e", "paths": ["tmp/hashr/20200106.00.00-ubuntu-desktop/file.08"]}]
--------------------------------------------------------------------------------
/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.01:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.01
--------------------------------------------------------------------------------
/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.02:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.02
--------------------------------------------------------------------------------
/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.03:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.03
--------------------------------------------------------------------------------
/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.04:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.04
--------------------------------------------------------------------------------
/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.05:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.05
--------------------------------------------------------------------------------
/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.06:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.06
--------------------------------------------------------------------------------
/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.07:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.07
--------------------------------------------------------------------------------
/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.08:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.08
--------------------------------------------------------------------------------
/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.09:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.09
--------------------------------------------------------------------------------
/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.10:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-desktop-export/tmp/hashdb/20200106.00.00-ubuntu-desktop/file.10
--------------------------------------------------------------------------------
/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/hashes.json:
--------------------------------------------------------------------------------
1 | [{"sha256": "d370aa6801e91d7fa48db7f4388c4e3858f58de124d9c4fd53cb9a25bbc2fa34", "paths": ["tmp/hashr/20200106.00.00-ubuntu-laptop/file.09"]}, {"sha256": "3f1ee77b201b6c4f1c37872e363387b247415293853a3f7eed25effee396b68f", "paths": ["tmp/hashr/20200106.00.00-ubuntu-laptop/file.02"]}, {"sha256": "b665f633c8f1e184972cb9ebc99490cf999fcae5d520fc8e144ee1124d05628b", "paths": ["tmp/hashr/20200106.00.00-ubuntu-laptop/file.10"]}, {"sha256": "8bc259fd7d49e3a94a2001e7ec276c51736a66167fc90e3453771b0e8e9fc17c", "paths": ["tmp/hashr/20200106.00.00-ubuntu-laptop/file.01"]}, {"sha256": "fa6d182f5bd8613830c118e0d7296baa59ae33b0d32a4557106cd15098b8bcf9", "paths": ["tmp/hashr/20200106.00.00-ubuntu-laptop/file.07"]}, {"sha256": "b6f453c6cb97193dbf52bdd8423d3c5f6308521af9254cd65f5cb9a777c6b203", "paths": ["tmp/hashr/20200106.00.00-ubuntu-laptop/file.04"]}, {"sha256": "b9b1fcb88ca7c884c4105c3f9e6f5c782521533ab529b84db41d82241a1b148e", "paths": ["tmp/hashr/20200106.00.00-ubuntu-laptop/file.03"]}, {"sha256": "e6af44bf176b209b8ca050e7834aef2b1b6bcc292acde3c456a8a81d2d47c37c", "paths": ["tmp/hashr/20200106.00.00-ubuntu-laptop/file.05"]}, {"sha256": "a649460a16c3a2d9a097f93e6e2d0c89c5a52ca5e1cc6d6ca03c64417905753d", "paths": ["tmp/hashr/20200106.00.00-ubuntu-laptop/file.06"]}, {"sha256": "f9788be264fc476a842f3e23950a1c0070b47948f95eeccc8c243c45afd62524", "paths": ["tmp/hashr/20200106.00.00-ubuntu-laptop/file.08"]}]
--------------------------------------------------------------------------------
/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.01:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.01
--------------------------------------------------------------------------------
/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.02:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.02
--------------------------------------------------------------------------------
/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.03:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.03
--------------------------------------------------------------------------------
/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.04:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.04
--------------------------------------------------------------------------------
/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.05:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.05
--------------------------------------------------------------------------------
/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.06:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.06
--------------------------------------------------------------------------------
/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.07:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.07
--------------------------------------------------------------------------------
/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.08:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.08
--------------------------------------------------------------------------------
/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.09:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.09
--------------------------------------------------------------------------------
/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.10:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/core/hashr/testdata/20200106.00.00-ubuntu-laptop-export/tmp/hashr/20200106.00.00-ubuntu-laptop/file.10
--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
1 | # Use: docker build --no-cache --build-arg PPA_TRACK="[staging|stable]"
2 |
3 | # Stage 0 - hashr-builder
4 | FROM golang:alpine AS hashr-builder
5 | RUN apk add --no-cache build-base
6 |
7 | # Compile hashr statically
8 | RUN mkdir -p /opt/hashr/
9 | COPY . /opt/hashr/
10 | RUN cd /opt/hashr/ && GOOS=linux GOARCH=amd64 go build -v -ldflags="-linkmode=external -extldflags=-static" -tags osusergo,netgo -o hashr hashr.go
11 |
12 | # Stage 1 - hashr
13 | FROM ubuntu:22.04
14 |
15 | ARG PPA_TRACK=stable
16 | ENV DEBIAN_FRONTEND=noninteractive
17 |
18 | RUN apt-get update && apt-get -y upgrade && apt-get -y install \
19 | p7zip-full \
20 | apt-transport-https \
21 | apt-utils \
22 | sudo \
23 | curl \
24 | file \
25 | gpg \
26 | software-properties-common \
27 | && apt-get clean && rm -rf /var/cache/apt/* /var/lib/apt/lists/*
28 |
29 | RUN add-apt-repository -y ppa:gift/$PPA_TRACK
30 | RUN apt-get update && apt-get -y install plaso-tools
31 |
32 | RUN useradd -G disk,sudo -u 999 hashr
33 | RUN echo "hashr ALL = (root) NOPASSWD: /bin/mount,/bin/umount,/sbin/losetup,/bin/rm" > /etc/sudoers.d/hashr
34 |
35 | USER root
36 |
37 | WORKDIR /usr/local/bin
38 | COPY --from=hashr-builder /opt/hashr/hashr .
39 |
40 | VOLUME ["/data"]
41 |
42 | WORKDIR /home/hashr/
43 | RUN chmod +x /usr/local/bin/hashr
44 | USER hashr
45 | ENTRYPOINT ["/usr/local/bin/hashr"]
46 |
--------------------------------------------------------------------------------
/docker/README.md:
--------------------------------------------------------------------------------
1 | # HashR in docker
2 |
3 | Follow these steps to set-up HashR running in a docker container.
4 |
5 | If you want a local installation, check [these steps](https://github.com/google/hashr#setting-up-hashr).
6 |
7 | ## Table of contents
8 |
9 | * [HashR docker image](#hashr-docker-image)
10 | * [Pull the HashR image](#pull-the-hashr-image)
11 | * [Build the HashR image](#build-the-hashr-image)
12 | * [Setup a database and importers](#setup-a-database-and-importers)
13 | * [Database](#database)
14 | * [Importers](#importers)
15 | * [Docker networking](#docker-networking)
16 | * [Run HashR](#run-hashr)
17 | * [Examples](#examples)
18 |
19 |
20 | ## HashR docker image
21 |
22 | You can either use our hosted docker image or build it yourself.
23 |
24 | ### Pull the HashR image
25 |
26 | The HashR docker image will provide the HashR binary and tools it needs to
27 | work.
28 |
29 | By default the latest tagged release will be pulled if not specified otherwise:
30 |
31 | ```shell
32 | docker pull us-docker.pkg.dev/osdfir-registry/hashr/release/hashr
33 | ```
34 |
35 | Pulling a specific release tag:
36 |
37 | ```shell
38 | docker pull us-docker.pkg.dev/osdfir-registry/hashr/release/hashr:v1.7.1
39 | ```
40 |
41 | ### Build the HashR image
42 |
43 | From the repository root folder run the following command:
44 |
45 | ```shell
46 | docker build -f docker/Dockerfile .
47 | ```
48 |
49 | ## Setup a database and importers
50 |
51 | ### Database
52 |
53 | You still need to provide your own database for HashR to store the results.
54 | Check the [Setting up storage for processing tasks](https://github.com/google/hashr#setting-up-storage-for-processing-tasks) step in the local installation
55 | guide.
56 |
57 | ### Importers
58 |
59 | Follow the [Setting up importers](https://github.com/google/hashr#setting-up-importers)
60 | guide to setup the importers you want to use.
61 |
62 | Come back here for running HashR in docker with specific importers.
63 |
64 | ### Docker networking
65 |
66 | Create a docker network that will be used by `hashr_postgresql` and the `hashr`
67 | container.
68 |
69 | ```shell
70 | docker network create hashr_net
71 | ```
72 |
73 | ```shell
74 | docker network connect hashr_net hashr_postgresql
75 | ```
76 |
77 | ## Run HashR
78 |
79 | Get all availalbe HashR flags
80 |
81 | ```shell
82 | docker run us-docker.pkg.dev/osdfir-registry/hashr/release/hashr -h
83 | ```
84 |
85 | ### Examples
86 |
87 | > **NOTE**
88 | Ensure that the host directory mapped into `/data/` in the container is
89 | readable for all!
90 |
91 | Run HashR using the `iso9660` importer and export results to PostgreSQL:
92 |
93 | ```shell
94 | docker run -it \
95 | --privileged \
96 | --network hashr_net \
97 | -v ${pwd}/ISO:/data/iso \
98 | us-docker.pkg.dev/osdfir-registry/hashr/release/hashr \
99 | -storage postgres \
100 | -postgres_host hashr_postgresql \
101 | -postgres_port 5432 \
102 | -postgres_user hashr \
103 | -postgres_password hashr \
104 | -postgres_db hashr \
105 | -importers iso9660 \
106 | -iso_repo_path /data/iso/ \
107 | -exporters postgres
108 | ```
109 |
110 | Run HashR using the `deb` importer and export results to PostgreSQL:
111 |
112 | ```shell
113 | docker run -it \
114 | --network hashr_net \
115 | -v ${pwd}/DEB:/data/deb \
116 | us-docker.pkg.dev/osdfir-registry/hashr/release/hashr \
117 | -storage postgres \
118 | -postgres_host hashr_postgresql \
119 | -postgres_port 5432 \
120 | -postgres_user hashr \
121 | -postgres_password hashr \
122 | -postgres_db hashr \
123 | -importers deb \
124 | -deb_repo_path /data/deb/ \
125 | -exporters postgres
126 | ```
127 |
128 | Run HashR using the `GCP`importer and export results to PostgreSQL:
129 |
130 | ```shell
131 | docker run -it \
132 | --network hashr_net \
133 | -v ${pwd}/hashr-sa-private-key.json:/creds/hashr-sa-private-key.json \
134 | -e GOOGLE_APPLICATION_CREDENTIALS='/creds/hashr-sa-private-key.json' \
135 | us-docker.pkg.dev/osdfir-registry/hashr/release/hashr \
136 | -storage postgres \
137 | -postgres_host hashr_postgresql \
138 | -postgres_port 5432 \
139 | -postgres_user hashr \
140 | -postgres_password hashr \
141 | -postgres_db hashr \
142 | -importers GCP \
143 | -gcp_projects debian-cloud,centos-cloud,rhel-cloud \
144 | -hashr_gcp_project \
145 | -hashr_gcs_bucket \
146 | -exporters postgres
147 | ```
148 |
149 | ### Debugging
150 |
151 | Here are some known issues that you can run into when using HashR with docker.
152 |
153 | #### Folder: permission denied
154 |
155 | If you get a permission error from HashR when working with docker volumes ensure
156 | that the folder you are mapping into the container has the same group id as the
157 | HashR group inside the container. Most likely this will be the `1000`. To change
158 | the group, run:
159 |
160 | `sudo chown -R :1000 `
161 |
162 | #### mount: permission denied
163 |
164 | Some importers need to mount the provided file (e.g. ISO files). This is not
165 | supported inside the docker container by default. To workaround this issue, use
166 | the `--privileged` flag with your `docker run` command.
167 |
168 | #### Debugging inside the container
169 |
170 | To debug problems inside the HashR container start an interactive shell like
171 | with the following command:
172 |
173 | ```
174 | docker run -it \
175 | --network hashr_net \
176 | --entrypoint=/bin/bash \
177 | us-docker.pkg.dev/osdfir-registry/hashr/release/hashr
178 | ```
179 |
180 | #### Logging output
181 |
182 | For debugging purposes you can send logging output to stderr by using the
183 | `---logtostderr=1` flag.
184 |
185 | General hashr logs its output to `/tmp/hashr.INFO`.
186 |
--------------------------------------------------------------------------------
/docker/cloudbuild.yaml:
--------------------------------------------------------------------------------
1 | # Google Cloud Build configuration for HashR release
2 | steps:
3 | - name: gcr.io/cloud-builders/docker
4 | args:
5 | [
6 | "build",
7 | "-t",
8 | "us-docker.pkg.dev/osdfir-registry/hashr/release/hashr:$TAG_NAME",
9 | "-t",
10 | "us-docker.pkg.dev/osdfir-registry/hashr/release/hashr:latest",
11 | "-f",
12 | "docker/Dockerfile",
13 | ".",
14 | ]
15 | timeout: 4800s
16 | timeout: 4800s
17 | images:
18 | - us-docker.pkg.dev/osdfir-registry/hashr/release/hashr:latest
19 | - us-docker.pkg.dev/osdfir-registry/hashr/release/hashr:$TAG_NAME
20 |
--------------------------------------------------------------------------------
/docs/assets/HashR.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/docs/assets/HashR.png
--------------------------------------------------------------------------------
/exporters/gcp/gcp.go:
--------------------------------------------------------------------------------
1 | package gcp
2 |
3 | import (
4 | "bytes"
5 | "context"
6 | "fmt"
7 | "net/http"
8 | "os"
9 | "os/exec"
10 | "strings"
11 | "sync"
12 |
13 | "cloud.google.com/go/spanner"
14 | "github.com/golang/glog"
15 | "github.com/google/hashr/common"
16 | "google.golang.org/api/iterator"
17 | "google.golang.org/api/storage/v1"
18 | "google.golang.org/grpc/codes"
19 | )
20 |
21 | const (
22 | // Name contains name of the exporter.
23 | Name = "GCP"
24 | )
25 |
26 | // Exporter is an instance of GCP Exporter.
27 | type Exporter struct {
28 | spannerClient *spanner.Client
29 | storageClient *storage.Service
30 | GCSBucket string
31 | uploadPayloads bool
32 | workerCount int
33 | wg sync.WaitGroup
34 | }
35 |
36 | // NewExporter creates new GCP exporter.
37 | func NewExporter(spannerClient *spanner.Client, storageClient *storage.Service, GCSBucket string, uploadPayloads bool, workerCount int) (*Exporter, error) {
38 | return &Exporter{spannerClient: spannerClient, storageClient: storageClient, GCSBucket: GCSBucket, uploadPayloads: uploadPayloads, workerCount: workerCount}, nil
39 | }
40 |
41 | // Name returns exporter name.
42 | func (e *Exporter) Name() string {
43 | return Name
44 | }
45 |
46 | // Export exports extracted data to GCP (Spanner + GCS).
47 | func (e *Exporter) Export(ctx context.Context, sourceRepoName, sourceRepoPath, sourceID, sourceHash, sourcePath, sourceDescription string, samples []common.Sample) error {
48 | if err := e.insertSource(ctx, sourceHash, sourceID, sourcePath, sourceRepoName, sourceRepoPath, sourceDescription); err != nil {
49 | return fmt.Errorf("could not upload source data: %v", err)
50 | }
51 |
52 | jobs := make(chan common.Sample, len(samples))
53 | for w := 1; w <= e.workerCount; w++ {
54 | e.wg.Add(1)
55 | go e.worker(ctx, sourceHash, jobs)
56 | }
57 |
58 | go func() {
59 | for _, sample := range samples {
60 | jobs <- sample
61 | }
62 | close(jobs)
63 | }()
64 | e.wg.Wait()
65 |
66 | return nil
67 | }
68 |
69 | func (e *Exporter) worker(ctx context.Context, sourceHash string, samples <-chan common.Sample) {
70 | defer e.wg.Done()
71 | for sample := range samples {
72 | if err := e.insertSample(ctx, sample); err != nil {
73 | glog.Errorf("skipping %s, could not insert sample data: %v", sample.Sha256, err)
74 | continue
75 | }
76 |
77 | if err := e.insertRelationship(ctx, sample, sourceHash); err != nil {
78 | glog.Errorf("skipping %s, could not insert source <-> sample relationship: %v", sample.Sha256, err)
79 | continue
80 | }
81 | }
82 | }
83 |
84 | func (e *Exporter) insertRelationship(ctx context.Context, sample common.Sample, sourceSha256 string) error {
85 | var paths, existingPaths []string
86 |
87 | for _, path := range sample.Paths {
88 | s := strings.Split(path, "/extracted/")
89 | if len(s) < 2 {
90 | glog.Warningf("sample path does not follow expected format: %s", path)
91 | continue
92 | }
93 | paths = append(paths, strings.TrimPrefix(strings.TrimPrefix(s[len(s)-1], "mnt"), "export"))
94 | }
95 |
96 | sql := spanner.Statement{
97 | SQL: `SELECT sample_paths FROM samples_sources WHERE sample_sha256 = @sha256`,
98 | Params: map[string]interface{}{
99 | "sha256": sample.Sha256,
100 | },
101 | }
102 |
103 | iter := e.spannerClient.Single().Query(ctx, sql)
104 | defer iter.Stop()
105 | row, err := iter.Next()
106 | if err != iterator.Done {
107 | if err := row.Columns(&existingPaths); err != nil {
108 | return err
109 | }
110 | }
111 | if err != iterator.Done && err != nil {
112 | return err
113 | }
114 |
115 | _, err = e.spannerClient.Apply(ctx, []*spanner.Mutation{
116 | spanner.InsertOrUpdate("samples_sources",
117 | []string{
118 | "sample_sha256",
119 | "source_sha256",
120 | "sample_paths"},
121 | []interface{}{
122 | sample.Sha256,
123 | sourceSha256,
124 | append(existingPaths, paths...),
125 | })})
126 | if err != nil {
127 | return fmt.Errorf("failed to insert data %v", err)
128 | }
129 |
130 | return nil
131 | }
132 |
133 | func (e *Exporter) insertSample(ctx context.Context, sample common.Sample) error {
134 | var samplePath string
135 | var fi os.FileInfo
136 | var err error
137 | // If sample has more than one path associated with it, take the first that is valid.
138 | for _, path := range sample.Paths {
139 | if fi, err = os.Stat(path); err == nil {
140 | samplePath = path
141 | break
142 | }
143 | }
144 |
145 | file, err := os.Open(samplePath)
146 | if err != nil {
147 | return fmt.Errorf("could not open %v", samplePath)
148 | }
149 | defer file.Close()
150 |
151 | mimeType, err := getFileContentType(file)
152 | if err != nil {
153 | glog.Warningf("Could not get file content type: %v", err)
154 | }
155 |
156 | fileOutput, err := fileCmdOutput(samplePath)
157 | if err != nil {
158 | glog.Warningf("Could not get file cmd output: %v", err)
159 | }
160 |
161 | fileOutput = strings.TrimPrefix(fileOutput, fmt.Sprintf("%s%s", samplePath, ":"))
162 | _, err = e.spannerClient.Apply(ctx, []*spanner.Mutation{
163 | spanner.Insert("samples",
164 | []string{
165 | "sha256",
166 | "mimetype",
167 | "file_output",
168 | "size"},
169 | []interface{}{
170 | sample.Sha256,
171 | mimeType,
172 | fileOutput,
173 | fi.Size(),
174 | })})
175 | if spanner.ErrCode(err) != codes.AlreadyExists && err != nil {
176 | return fmt.Errorf("failed to insert data %v", err)
177 | }
178 |
179 | if e.uploadPayloads && sample.Upload {
180 | fmt.Println(samplePath)
181 | file, err := os.Open(samplePath)
182 | if err != nil {
183 | return fmt.Errorf("error while opening file: %v", err)
184 | }
185 |
186 | fi, err := file.Stat()
187 | if err != nil {
188 | return fmt.Errorf("error while opening file: %v", err)
189 | }
190 |
191 | fmt.Println(fi.Size())
192 |
193 | name := fmt.Sprintf("%s/%s", strings.ToUpper(sample.Sha256[0:2]), strings.ToUpper(sample.Sha256))
194 | object := &storage.Object{
195 | Name: name,
196 | }
197 |
198 | _, err = e.storageClient.Objects.Insert(e.GCSBucket, object).Media(file).Do()
199 | if err != nil {
200 | return fmt.Errorf("error uploading data to GCS: %v", err)
201 | }
202 |
203 | _, err = e.spannerClient.Apply(ctx, []*spanner.Mutation{
204 | spanner.Insert("payloads",
205 | []string{
206 | "sha256",
207 | "gcs_path"},
208 | []interface{}{
209 | sample.Sha256,
210 | fmt.Sprintf("gs://%s/%s", e.GCSBucket, name),
211 | })})
212 | if spanner.ErrCode(err) != codes.AlreadyExists && err != nil {
213 | return fmt.Errorf("failed to insert data %v", err)
214 | }
215 | }
216 |
217 | return nil
218 | }
219 |
220 | func (e *Exporter) insertSource(ctx context.Context, sourceHash, sourceID, sourcePath, sourceRepoName, sourceRepoPath, sourceDescription string) error {
221 | var sourceIDs []string
222 |
223 | sql := spanner.Statement{
224 | SQL: `SELECT source_id FROM sources WHERE sha256 = @sha256`,
225 | Params: map[string]interface{}{
226 | "sha256": sourceHash,
227 | },
228 | }
229 |
230 | iter := e.spannerClient.Single().Query(ctx, sql)
231 | defer iter.Stop()
232 | row, err := iter.Next()
233 | if err != iterator.Done {
234 | if err := row.Columns(&sourceIDs); err != nil {
235 | return err
236 | }
237 | }
238 | if err != iterator.Done && err != nil {
239 | return err
240 | }
241 |
242 | _, err = e.spannerClient.Apply(ctx, []*spanner.Mutation{
243 | spanner.InsertOrUpdate("sources",
244 | []string{
245 | "sha256",
246 | "source_id",
247 | "source_path",
248 | "source_description",
249 | "repo_name",
250 | "repo_path"},
251 | []interface{}{
252 | sourceHash,
253 | append(sourceIDs, sourceID),
254 | sourcePath,
255 | sourceDescription,
256 | sourceRepoName,
257 | sourceRepoPath,
258 | })})
259 | if err != nil {
260 | return fmt.Errorf("failed to insert data %v", err)
261 | }
262 |
263 | return nil
264 | }
265 |
266 | func getFileContentType(out *os.File) (string, error) {
267 |
268 | // Only the first 512 bytes are used to check the content type.
269 | buffer := make([]byte, 512)
270 |
271 | _, err := out.Read(buffer)
272 | if err != nil {
273 | return "", err
274 | }
275 |
276 | contentType := http.DetectContentType(buffer)
277 |
278 | return contentType, nil
279 | }
280 |
281 | func fileCmdOutput(filepath string) (string, error) {
282 | cmd := exec.Command("/usr/bin/file", filepath)
283 | var stdout, stderr bytes.Buffer
284 | cmd.Stdout = &stdout
285 | cmd.Stderr = &stderr
286 |
287 | err := cmd.Run()
288 | if err != nil {
289 | return "", fmt.Errorf("error while executing %s: %v\nStdout: %v\nStderr: %v", "/usr/bin/file", err, stdout.String(), stderr.String())
290 | }
291 |
292 | return strings.TrimSuffix(stdout.String(), "\n"), nil
293 | }
294 |
--------------------------------------------------------------------------------
/exporters/gcp/gcp_test.go:
--------------------------------------------------------------------------------
1 | package gcp
2 |
3 | import (
4 | "context"
5 | "path/filepath"
6 | "testing"
7 |
8 | "github.com/golang/glog"
9 | "github.com/google/hashr/common"
10 | "google.golang.org/api/option"
11 | "google.golang.org/grpc"
12 | "google.golang.org/grpc/credentials/insecure"
13 |
14 | "cloud.google.com/go/spanner"
15 | database "cloud.google.com/go/spanner/admin/database/apiv1"
16 | instance "cloud.google.com/go/spanner/admin/instance/apiv1"
17 |
18 | dbadminpb "google.golang.org/genproto/googleapis/spanner/admin/database/v1"
19 | instancepb "google.golang.org/genproto/googleapis/spanner/admin/instance/v1"
20 | )
21 |
22 | const (
23 | samplesTable = `
24 | CREATE TABLE samples (
25 | sha256 STRING(100),
26 | mimetype STRING(MAX),
27 | file_output STRING(MAX),
28 | size INT64
29 | ) PRIMARY KEY(sha256)`
30 |
31 | payloadsTable = `
32 | CREATE TABLE payloads (
33 | sha256 STRING(100),
34 | gcs_path STRING(200)
35 | ) PRIMARY KEY(sha256)`
36 |
37 | sourcesTable = `
38 | CREATE TABLE sources (
39 | sha256 STRING(100),
40 | source_id ARRAY,
41 | source_path STRING(MAX),
42 | source_description STRING(MAX),
43 | repo_name STRING(MAX),
44 | repo_path STRING(MAX),
45 | ) PRIMARY KEY(sha256)`
46 |
47 | samplesSourcesTable = `CREATE TABLE samples_sources (
48 | sample_sha256 STRING(100),
49 | source_sha256 STRING(100),
50 | sample_paths ARRAY,
51 | CONSTRAINT FK_Sample FOREIGN KEY (sample_sha256) REFERENCES samples (sha256),
52 | CONSTRAINT FK_Source FOREIGN KEY (source_sha256) REFERENCES sources (sha256),
53 | ) PRIMARY KEY (sample_sha256, source_sha256)`
54 | )
55 |
56 | func TestExport(t *testing.T) {
57 | ctx := context.Background()
58 |
59 | o := []option.ClientOption{
60 | option.WithEndpoint("localhost:9010"),
61 | option.WithoutAuthentication(),
62 | option.WithGRPCDialOption(grpc.WithTransportCredentials(insecure.NewCredentials())),
63 | }
64 |
65 | instanceAdmin, err := instance.NewInstanceAdminClient(ctx, o...)
66 | if err != nil {
67 | glog.Fatalf("error dialing instance admin: %v", err)
68 | }
69 | defer instanceAdmin.Close()
70 |
71 | if err := instanceAdmin.DeleteInstance(ctx, &instancepb.DeleteInstanceRequest{Name: "projects/hashr/instances/hashr"}); err != nil {
72 | glog.Warning(err)
73 | }
74 |
75 | op, err := instanceAdmin.CreateInstance(ctx, &instancepb.CreateInstanceRequest{
76 | Parent: "projects/hashr",
77 | InstanceId: "hashr",
78 | Instance: &instancepb.Instance{
79 | DisplayName: "hashr",
80 | NodeCount: 1,
81 | },
82 | })
83 | if err != nil {
84 | glog.Fatalf("error creating test spanner instance: %v", err)
85 | }
86 | if _, err := op.Wait(ctx); err != nil {
87 | glog.Fatalf("error creating test spanner instance: %v", err)
88 | }
89 |
90 | databaseAdmin, err := database.NewDatabaseAdminClient(ctx, o...)
91 | if err != nil {
92 | glog.Fatalf("error creating database admin client for emulator: %v", err)
93 | }
94 |
95 | dbURI := "projects/hashr/instances/hashr/databases/hashr"
96 | op2, err := databaseAdmin.CreateDatabase(ctx, &dbadminpb.CreateDatabaseRequest{
97 | Parent: "projects/hashr/instances/hashr",
98 | CreateStatement: "CREATE DATABASE hashr",
99 | ExtraStatements: []string{samplesTable, sourcesTable, payloadsTable, samplesSourcesTable},
100 | })
101 | if err != nil {
102 | glog.Fatalf("error creating test DB %v: %v", dbURI, err)
103 | }
104 | if _, err = op2.Wait(ctx); err != nil {
105 | glog.Fatalf("error creating test DB %v: %v", dbURI, err)
106 | }
107 |
108 | spannerClient, err := spanner.NewClient(ctx, dbURI, o...)
109 | if err != nil {
110 | glog.Fatalf("error creating Spanner client %v: %v", dbURI, err)
111 | }
112 |
113 | exporter, err := NewExporter(spannerClient, nil, "gcs-bucket", false, 10)
114 | if err != nil {
115 | glog.Fatalf("error creating Cloud Spanner exporter: %v", err)
116 | }
117 |
118 | samples := []common.Sample{
119 | {
120 | Sha256: "a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3",
121 | Paths: []string{filepath.Join("testdata/extraction", "file.01")},
122 | Upload: true,
123 | },
124 | {
125 | Sha256: "5c7a0f6e38f86f4db12130e5ca9f734f4def519b9a884ee8ea9fc45f9626c6fb",
126 | Paths: []string{filepath.Join("testdata/extraction", "file.02")},
127 | Upload: true,
128 | },
129 | {
130 | Sha256: "9ad2027cae0d7b0f041a6fc1e3124ad4046b2665068c44c74546ad9811e81ec7",
131 | Paths: []string{filepath.Join("testdata/extraction", "file.03")},
132 | Upload: true,
133 | },
134 | }
135 |
136 | if err := exporter.Export(ctx, "GCP", "ubuntu", "ubuntu-1604-lts", "07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc", "", "Official Ubuntu GCP image.", samples); err != nil {
137 | t.Fatalf("unexpected error while running Export() = %v", err)
138 | }
139 |
140 | }
141 |
--------------------------------------------------------------------------------
/exporters/gcp/testdata/extraction/file.01:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/exporters/gcp/testdata/extraction/file.01
--------------------------------------------------------------------------------
/exporters/gcp/testdata/extraction/file.02:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/exporters/gcp/testdata/extraction/file.02
--------------------------------------------------------------------------------
/exporters/gcp/testdata/extraction/file.03:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/exporters/gcp/testdata/extraction/file.03
--------------------------------------------------------------------------------
/exporters/gcp/testdata/extraction/file.04:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/exporters/gcp/testdata/extraction/file.04
--------------------------------------------------------------------------------
/exporters/postgres/postgres_test.go:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // https://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package postgres
16 |
17 | import (
18 | "context"
19 | "io"
20 | "os"
21 | "path/filepath"
22 | "testing"
23 |
24 | "github.com/google/hashr/common"
25 |
26 | "github.com/DATA-DOG/go-sqlmock"
27 | )
28 |
29 | func TestExport(t *testing.T) {
30 | db, mock, err := sqlmock.New(sqlmock.QueryMatcherOption(sqlmock.QueryMatcherEqual))
31 | if err != nil {
32 | t.Fatalf("could not open a stub database connection: %v", err)
33 | }
34 | defer db.Close()
35 |
36 | mock.ExpectQuery(`SELECT EXISTS ( SELECT 1 FROM information_schema.tables WHERE table_name=$1 );`).WithArgs("samples").WillReturnRows(mock.NewRows([]string{"t"}).AddRow("t"))
37 | mock.ExpectQuery(`SELECT EXISTS ( SELECT 1 FROM information_schema.tables WHERE table_name=$1 );`).WithArgs("payloads").WillReturnRows(mock.NewRows([]string{"t"}).AddRow("t"))
38 | mock.ExpectQuery(`SELECT EXISTS ( SELECT 1 FROM information_schema.tables WHERE table_name=$1 );`).WithArgs("sources").WillReturnRows(mock.NewRows([]string{"t"}).AddRow("t"))
39 | mock.ExpectQuery(`SELECT EXISTS ( SELECT 1 FROM information_schema.tables WHERE table_name=$1 );`).WithArgs("samples_sources").WillReturnRows(mock.NewRows([]string{"t"}).AddRow("t"))
40 |
41 | postgresExporter, err := NewExporter(db, false)
42 | if err != nil {
43 | t.Fatalf("could not create Postgres exporter: %v", err)
44 | }
45 |
46 | mock.ExpectQuery(`SELECT sha256 FROM sources WHERE sha256=$1;`).WithArgs("07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc").WillReturnRows(mock.NewRows([]string{"sha256"}))
47 | mock.ExpectExec(`INSERT INTO sources (sha256, sourceID, sourcePath, repoName, repoPath, sourceDescription) VALUES ($1, $2, $3, $4, $5, $6)`).WithArgs("07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc", `{"ubuntu-1604-lts"}`, "", "GCP", "ubuntu", "Official Ubuntu GCP image.").WillReturnResult(sqlmock.NewResult(1, 1))
48 |
49 | mock.ExpectQuery(`SELECT sha256 FROM samples WHERE sha256=$1;`).WithArgs("a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3").WillReturnRows(mock.NewRows([]string{"sha256"}))
50 | mock.ExpectExec(`INSERT INTO samples (sha256, size, mimetype, file_output) VALUES ($1, $2, $3, $4)`).WithArgs("a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3", 8192, "application/octet-stream", " data").WillReturnResult(sqlmock.NewResult(1, 1))
51 | mock.ExpectQuery("SELECT sample_sha256,source_sha256 FROM samples_sources WHERE sample_sha256=$1 AND source_sha256=$2;").WithArgs("a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3", "07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc").WillReturnRows(mock.NewRows([]string{"a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3", "07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc"}))
52 | mock.ExpectExec(`INSERT INTO samples_sources (sample_sha256, source_sha256, sample_paths) VALUES ($1, $2, $3)`).WithArgs("a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3", "07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc", `{"file.01"}`).WillReturnResult(sqlmock.NewResult(1, 1))
53 |
54 | mock.ExpectQuery(`SELECT sha256 FROM samples WHERE sha256=$1;`).WithArgs("5c7a0f6e38f86f4db12130e5ca9f734f4def519b9a884ee8ea9fc45f9626c6fb").WillReturnRows(mock.NewRows([]string{"sha256"}))
55 | mock.ExpectExec(`INSERT INTO samples (sha256, size, mimetype, file_output) VALUES ($1, $2, $3, $4)`).WithArgs("5c7a0f6e38f86f4db12130e5ca9f734f4def519b9a884ee8ea9fc45f9626c6fb", 7168, "application/octet-stream", " data").WillReturnResult(sqlmock.NewResult(1, 1))
56 | mock.ExpectQuery("SELECT sample_sha256,source_sha256 FROM samples_sources WHERE sample_sha256=$1 AND source_sha256=$2;").WithArgs("5c7a0f6e38f86f4db12130e5ca9f734f4def519b9a884ee8ea9fc45f9626c6fb", "07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc").WillReturnRows(mock.NewRows([]string{"5c7a0f6e38f86f4db12130e5ca9f734f4def519b9a884ee8ea9fc45f9626c6fb", "07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc"}))
57 | mock.ExpectExec(`INSERT INTO samples_sources (sample_sha256, source_sha256, sample_paths) VALUES ($1, $2, $3)`).WithArgs("5c7a0f6e38f86f4db12130e5ca9f734f4def519b9a884ee8ea9fc45f9626c6fb", "07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc", `{"file.02"}`).WillReturnResult(sqlmock.NewResult(1, 1))
58 |
59 | mock.ExpectQuery(`SELECT sha256 FROM samples WHERE sha256=$1;`).WithArgs("9ad2027cae0d7b0f041a6fc1e3124ad4046b2665068c44c74546ad9811e81ec7").WillReturnRows(mock.NewRows([]string{"sha256"}))
60 | mock.ExpectExec(`INSERT INTO samples (sha256, size, mimetype, file_output) VALUES ($1, $2, $3, $4)`).WithArgs("9ad2027cae0d7b0f041a6fc1e3124ad4046b2665068c44c74546ad9811e81ec7", 5120, "application/octet-stream", " data").WillReturnResult(sqlmock.NewResult(1, 1))
61 | mock.ExpectQuery("SELECT sample_sha256,source_sha256 FROM samples_sources WHERE sample_sha256=$1 AND source_sha256=$2;").WithArgs("9ad2027cae0d7b0f041a6fc1e3124ad4046b2665068c44c74546ad9811e81ec7", "07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc").WillReturnRows(mock.NewRows([]string{"9ad2027cae0d7b0f041a6fc1e3124ad4046b2665068c44c74546ad9811e81ec7", "07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc"}))
62 | mock.ExpectExec(`INSERT INTO samples_sources (sample_sha256, source_sha256, sample_paths) VALUES ($1, $2, $3)`).WithArgs("9ad2027cae0d7b0f041a6fc1e3124ad4046b2665068c44c74546ad9811e81ec7", "07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc", `{"file.03"}`).WillReturnResult(sqlmock.NewResult(1, 1))
63 |
64 | tempDir := "/tmp/extracted/"
65 | if err := os.MkdirAll(tempDir, 0777); err != nil {
66 | t.Fatalf("Could not create temp extraction directory(%s): %v", tempDir, err)
67 | }
68 |
69 | // We need to copy the file to the tmp dir, otherwise we'll end up opening symlinks.
70 | for _, filename := range []string{"file.01", "file.02", "file.03"} {
71 | in, err := os.Open(filepath.Join("testdata/extraction", filename))
72 | if err != nil {
73 | t.Fatal(err)
74 | }
75 | out, err := os.Create(filepath.Join(tempDir, filename))
76 | if err != nil {
77 | t.Fatal(err)
78 | }
79 | _, err = io.Copy(out, in)
80 | if err != nil {
81 | t.Fatal(err)
82 | }
83 | out.Close()
84 | }
85 |
86 | samples := []common.Sample{
87 | {
88 | Sha256: "a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3",
89 | Paths: []string{filepath.Join(tempDir, "file.01")},
90 | Upload: true,
91 | },
92 | {
93 | Sha256: "5c7a0f6e38f86f4db12130e5ca9f734f4def519b9a884ee8ea9fc45f9626c6fb",
94 | Paths: []string{filepath.Join(tempDir, "file.02")},
95 | Upload: true,
96 | },
97 | {
98 | Sha256: "9ad2027cae0d7b0f041a6fc1e3124ad4046b2665068c44c74546ad9811e81ec7",
99 | Paths: []string{filepath.Join(tempDir, "file.03")},
100 | Upload: true,
101 | },
102 | }
103 |
104 | if err := postgresExporter.Export(context.Background(), "GCP", "ubuntu", "ubuntu-1604-lts", "07123e1f482356c415f684407a3b8723e10b2cbbc0b8fcd6282c49d37c9c1abc", "", "Official Ubuntu GCP image.", samples); err != nil {
105 | t.Fatalf("unexpected error while running Export() = %v", err)
106 | }
107 | }
108 |
--------------------------------------------------------------------------------
/exporters/postgres/testdata/extraction/._file.01:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/exporters/postgres/testdata/extraction/._file.01
--------------------------------------------------------------------------------
/exporters/postgres/testdata/extraction/._file.02:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/exporters/postgres/testdata/extraction/._file.02
--------------------------------------------------------------------------------
/exporters/postgres/testdata/extraction/._file.03:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/exporters/postgres/testdata/extraction/._file.03
--------------------------------------------------------------------------------
/exporters/postgres/testdata/extraction/._file.04:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/exporters/postgres/testdata/extraction/._file.04
--------------------------------------------------------------------------------
/exporters/postgres/testdata/extraction/file.01:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/exporters/postgres/testdata/extraction/file.01
--------------------------------------------------------------------------------
/exporters/postgres/testdata/extraction/file.02:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/exporters/postgres/testdata/extraction/file.02
--------------------------------------------------------------------------------
/exporters/postgres/testdata/extraction/file.03:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/exporters/postgres/testdata/extraction/file.03
--------------------------------------------------------------------------------
/exporters/postgres/testdata/extraction/file.04:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/exporters/postgres/testdata/extraction/file.04
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/google/hashr
2 |
3 | go 1.18
4 |
5 | require (
6 | cloud.google.com/go/spanner v1.53.1
7 | github.com/DATA-DOG/go-sqlmock v1.5.0
8 | github.com/Microsoft/go-winio v0.6.1
9 | github.com/aws/aws-sdk-go-v2 v1.24.1
10 | github.com/aws/aws-sdk-go-v2/config v1.26.3
11 | github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.15.11
12 | github.com/aws/aws-sdk-go-v2/service/ec2 v1.144.0
13 | github.com/aws/aws-sdk-go-v2/service/s3 v1.48.0
14 | github.com/golang/glog v1.2.0
15 | github.com/google/go-cmp v0.6.0
16 | github.com/google/go-containerregistry v0.17.0
17 | github.com/hooklift/iso9660 v1.0.0
18 | github.com/lib/pq v1.10.9
19 | github.com/sassoftware/go-rpmutils v0.2.0
20 | golang.org/x/crypto v0.21.0
21 | golang.org/x/oauth2 v0.15.0
22 | google.golang.org/api v0.153.0
23 | google.golang.org/genproto v0.0.0-20231127180814-3a041ad873d4
24 | google.golang.org/grpc v1.59.0
25 | google.golang.org/protobuf v1.33.0
26 | pault.ag/go/debian v0.16.0
27 | )
28 |
29 | require (
30 | cloud.google.com/go v0.111.0 // indirect
31 | cloud.google.com/go/compute v1.23.3 // indirect
32 | cloud.google.com/go/compute/metadata v0.2.3 // indirect
33 | cloud.google.com/go/iam v1.1.5 // indirect
34 | cloud.google.com/go/longrunning v0.5.4 // indirect
35 | github.com/DataDog/zstd v1.5.5 // indirect
36 | github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.5.4 // indirect
37 | github.com/aws/aws-sdk-go-v2/credentials v1.16.14 // indirect
38 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.11 // indirect
39 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.10 // indirect
40 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.10 // indirect
41 | github.com/aws/aws-sdk-go-v2/internal/ini v1.7.2 // indirect
42 | github.com/aws/aws-sdk-go-v2/internal/v4a v1.2.10 // indirect
43 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.10.4 // indirect
44 | github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.2.10 // indirect
45 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.10.10 // indirect
46 | github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.16.10 // indirect
47 | github.com/aws/aws-sdk-go-v2/service/sso v1.18.6 // indirect
48 | github.com/aws/aws-sdk-go-v2/service/ssooidc v1.21.6 // indirect
49 | github.com/aws/aws-sdk-go-v2/service/sts v1.26.7 // indirect
50 | github.com/aws/smithy-go v1.19.0 // indirect
51 | github.com/c4milo/gotoolkit v0.0.0-20190525173301-67483a18c17a // indirect
52 | github.com/census-instrumentation/opencensus-proto v0.4.1 // indirect
53 | github.com/cespare/xxhash/v2 v2.2.0 // indirect
54 | github.com/cncf/udpa/go v0.0.0-20220112060539-c52dc94e7fbe // indirect
55 | github.com/cncf/xds/go v0.0.0-20231128003011-0fa0005c9caa // indirect
56 | github.com/containerd/stargz-snapshotter/estargz v0.15.1 // indirect
57 | github.com/docker/cli v24.0.7+incompatible // indirect
58 | github.com/docker/distribution v2.8.3+incompatible // indirect
59 | github.com/docker/docker v24.0.9+incompatible // indirect
60 | github.com/docker/docker-credential-helpers v0.8.0 // indirect
61 | github.com/envoyproxy/go-control-plane v0.11.1 // indirect
62 | github.com/envoyproxy/protoc-gen-validate v1.0.2 // indirect
63 | github.com/go-logr/logr v1.3.0 // indirect
64 | github.com/go-logr/stdr v1.2.2 // indirect
65 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
66 | github.com/golang/protobuf v1.5.3 // indirect
67 | github.com/google/s2a-go v0.1.7 // indirect
68 | github.com/google/uuid v1.4.0 // indirect
69 | github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
70 | github.com/googleapis/gax-go/v2 v2.12.0 // indirect
71 | github.com/hooklift/assert v0.1.0 // indirect
72 | github.com/jmespath/go-jmespath v0.4.0 // indirect
73 | github.com/kjk/lzma v0.0.0-20161016003348-3fd93898850d // indirect
74 | github.com/klauspost/compress v1.17.4 // indirect
75 | github.com/mitchellh/go-homedir v1.1.0 // indirect
76 | github.com/opencontainers/go-digest v1.0.0 // indirect
77 | github.com/opencontainers/image-spec v1.1.0-rc5 // indirect
78 | github.com/pkg/errors v0.9.1 // indirect
79 | github.com/sirupsen/logrus v1.9.3 // indirect
80 | github.com/ulikunitz/xz v0.5.11 // indirect
81 | github.com/vbatts/tar-split v0.11.5 // indirect
82 | github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
83 | go.opencensus.io v0.24.0 // indirect
84 | go.opentelemetry.io/otel v1.21.0 // indirect
85 | go.opentelemetry.io/otel/metric v1.21.0 // indirect
86 | go.opentelemetry.io/otel/trace v1.21.0 // indirect
87 | golang.org/x/net v0.23.0 // indirect
88 | golang.org/x/sync v0.5.0 // indirect
89 | golang.org/x/sys v0.18.0 // indirect
90 | golang.org/x/text v0.14.0 // indirect
91 | golang.org/x/time v0.5.0 // indirect
92 | golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect
93 | google.golang.org/appengine v1.6.8 // indirect
94 | google.golang.org/genproto/googleapis/api v0.0.0-20231127180814-3a041ad873d4 // indirect
95 | google.golang.org/genproto/googleapis/rpc v0.0.0-20231127180814-3a041ad873d4 // indirect
96 | pault.ag/go/topsort v0.1.1 // indirect
97 | )
98 |
--------------------------------------------------------------------------------
/importers/aws/aws_test.go:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // https://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | // Package aws implements AWS repository importer unit tests.
16 |
17 | package aws
18 |
19 | import (
20 | "bytes"
21 | "context"
22 | "strconv"
23 | "testing"
24 |
25 | "github.com/aws/aws-sdk-go-v2/aws"
26 | "github.com/aws/aws-sdk-go-v2/service/ec2"
27 | "github.com/aws/aws-sdk-go-v2/service/ec2/types"
28 | )
29 |
30 | type mockDescribeImagesAPI func(ctx context.Context, params *ec2.DescribeImagesInput, optFns ...func(*ec2.Options)) (*ec2.DescribeImagesOutput, error)
31 |
32 | func (m mockDescribeImagesAPI) DescribeImages(ctx context.Context, params *ec2.DescribeImagesInput, optFns ...func(*ec2.Options)) (*ec2.DescribeImagesOutput, error) {
33 | return m(ctx, params, optFns...)
34 | }
35 |
36 | func TestDiscoveryRepo(t *testing.T) {
37 | cases := []struct {
38 | client func(t *testing.T) ec2DescribeImagesAPI
39 | architecture []string
40 | expect []byte
41 | }{
42 | {
43 | client: func(t *testing.T) ec2DescribeImagesAPI {
44 | return mockDescribeImagesAPI(func(ctx context.Context, params *ec2.DescribeImagesInput, optFns ...func(*ec2.Options)) (*ec2.DescribeImagesOutput, error) {
45 | t.Helper()
46 |
47 | return &ec2.DescribeImagesOutput{
48 | Images: []types.Image{
49 | {
50 | ImageId: aws.String("ami-sample"),
51 | },
52 | },
53 | }, nil
54 | })
55 | },
56 | architecture: []string{"x86_64"},
57 | expect: []byte("ami-sample"),
58 | },
59 | }
60 |
61 | for i, tt := range cases {
62 | t.Run(strconv.Itoa(i), func(t *testing.T) {
63 | ctx := context.TODO()
64 | images, err := getAmazonImages(ctx, tt.client(t), tt.architecture)
65 | if err != nil {
66 | t.Fatalf("expect no error, got %v", err)
67 | }
68 | if e, a := tt.expect, []byte(*images[0].ImageId); bytes.Compare(e, a) != 0 {
69 | t.Errorf("expect %v, got %v", e, a)
70 | }
71 | })
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/importers/common/common.go:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // https://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | // Package common provides common functions used by hashR importers.
16 | package common
17 |
18 | import (
19 | "archive/tar"
20 | "compress/gzip"
21 | "fmt"
22 | "io"
23 | "io/ioutil"
24 | "os"
25 | "path"
26 | "path/filepath"
27 | "strings"
28 |
29 | "github.com/golang/glog"
30 | )
31 |
32 | // ExtractTarGz extracts tar.gz file to given output folder. If directory does not exist, it will
33 | // be created.
34 | func ExtractTarGz(tarGzPath, outputFolder string) error {
35 | if _, err := os.Stat(outputFolder); os.IsNotExist(err) {
36 | if err2 := os.MkdirAll(outputFolder, 0755); err2 != nil {
37 | return fmt.Errorf("error while creating target directory: %v", err2)
38 | }
39 | }
40 |
41 | gzFile, err := os.Open(tarGzPath)
42 | if err != nil {
43 | return err
44 | }
45 | defer gzFile.Close()
46 |
47 | gzReader, err := gzip.NewReader(gzFile)
48 | if err != nil {
49 | return err
50 | }
51 |
52 | tarReader := tar.NewReader(gzReader)
53 |
54 | glog.Infof("Extracting %s to %s", tarGzPath, outputFolder)
55 |
56 | for {
57 | header, err := tarReader.Next()
58 |
59 | switch {
60 | case err == io.EOF:
61 | return nil
62 | case err != nil:
63 | return err
64 | }
65 |
66 | if containsDotDot(header.Name) {
67 | glog.Warningf("not extracting %s, potential path traversal", header.Name)
68 | continue
69 | }
70 | destEntry := filepath.Join(outputFolder, header.Name)
71 |
72 | switch header.Typeflag {
73 | case tar.TypeDir:
74 | if _, err := os.Stat(destEntry); os.IsNotExist(err) {
75 | if err := os.MkdirAll(destEntry, 0755); err != nil {
76 | return fmt.Errorf("error while creating destination directory: %v", err)
77 | }
78 | }
79 | case tar.TypeReg:
80 | if _, err := os.Stat(filepath.Dir(destEntry)); os.IsNotExist(err) {
81 | if err := os.MkdirAll(filepath.Dir(destEntry), 0755); err != nil {
82 | return fmt.Errorf("error while creating destination directory: %v", err)
83 | }
84 | }
85 |
86 | destFile, err := os.Create(destEntry)
87 | if err != nil {
88 | return fmt.Errorf("error while creating destination file: %v", err)
89 | }
90 |
91 | _, err = io.Copy(destFile, tarReader)
92 | if err != nil {
93 | return fmt.Errorf("error while extracting destination file: %v", err)
94 | }
95 | destFile.Close()
96 | }
97 | }
98 | }
99 |
100 | func containsDotDot(v string) bool {
101 | if !strings.Contains(v, "..") {
102 | return false
103 | }
104 | for _, ent := range strings.FieldsFunc(v, isSlashRune) {
105 | if ent == ".." {
106 | return true
107 | }
108 | }
109 | return false
110 | }
111 |
112 | func isSlashRune(r rune) bool { return r == '/' || r == '\\' }
113 |
114 | // LocalTempDir creates local temporary directory.
115 | func LocalTempDir(sourceID string) (string, error) {
116 | tempDir, err := ioutil.TempDir("", fmt.Sprintf("hashr-%s-", sourceID))
117 | if err != nil {
118 | return "", err
119 | }
120 |
121 | return tempDir, nil
122 | }
123 |
124 | // CopyToLocal copies a source to a local file system.
125 | func CopyToLocal(remotePath, sourceID string) (string, error) {
126 | tempDir, err := LocalTempDir(sourceID)
127 | if err != nil {
128 | return "", err
129 | }
130 |
131 | sourceFile, err := os.Open(remotePath)
132 | if err != nil {
133 | return "", err
134 | }
135 |
136 | destPath := path.Join(tempDir, filepath.Base(remotePath))
137 | destFile, err := os.Create(destPath)
138 | if err != nil {
139 | return destPath, err
140 | }
141 |
142 | glog.Infof("Copying %s to %s", sourceID, destPath)
143 |
144 | _, err = io.Copy(destFile, sourceFile)
145 | if err != nil {
146 | return destPath, err
147 | }
148 |
149 | glog.Infof("Done copying %s", sourceID)
150 | return destPath, nil
151 | }
152 |
--------------------------------------------------------------------------------
/importers/common/testdata/targz/dir1/desktop.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/common/testdata/targz/dir1/desktop.tar.gz
--------------------------------------------------------------------------------
/importers/common/testdata/targz/dir1/laptop.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/common/testdata/targz/dir1/laptop.tar.gz
--------------------------------------------------------------------------------
/importers/common/testdata/targz/dir1/server.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/common/testdata/targz/dir1/server.tar.gz
--------------------------------------------------------------------------------
/importers/common/testdata/targz/dir2/desktop.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/common/testdata/targz/dir2/desktop.tar.gz
--------------------------------------------------------------------------------
/importers/common/testdata/targz/dir2/laptop.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/common/testdata/targz/dir2/laptop.tar.gz
--------------------------------------------------------------------------------
/importers/common/testdata/targz/dir2/server.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/common/testdata/targz/dir2/server.tar.gz
--------------------------------------------------------------------------------
/importers/common/testdata/targz/dir3/desktop.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/common/testdata/targz/dir3/desktop.tar.gz
--------------------------------------------------------------------------------
/importers/common/testdata/targz/dir3/laptop.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/common/testdata/targz/dir3/laptop.tar.gz
--------------------------------------------------------------------------------
/importers/common/testdata/targz/dir3/server.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/common/testdata/targz/dir3/server.tar.gz
--------------------------------------------------------------------------------
/importers/common/testdata/targz/dir4/desktop.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/common/testdata/targz/dir4/desktop.tar.gz
--------------------------------------------------------------------------------
/importers/common/testdata/targz/dir4/laptop.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/common/testdata/targz/dir4/laptop.tar.gz
--------------------------------------------------------------------------------
/importers/common/testdata/targz/dir4/server.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/common/testdata/targz/dir4/server.tar.gz
--------------------------------------------------------------------------------
/importers/deb/deb.go:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // https://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | // Package deb implements deb package importer.
16 | package deb
17 |
18 | import (
19 | "archive/tar"
20 | "crypto/sha256"
21 | "fmt"
22 | "io"
23 | "os"
24 | "path/filepath"
25 | "strings"
26 |
27 | "github.com/golang/glog"
28 |
29 | "github.com/google/hashr/core/hashr"
30 | "github.com/google/hashr/importers/common"
31 |
32 | "pault.ag/go/debian/deb"
33 | )
34 |
35 | const (
36 | // RepoName contains the repository name.
37 | RepoName = "deb"
38 | chunkSize = 1024 * 1024 * 10 // 10MB
39 | )
40 |
41 | // Archive holds data related to deb archive.
42 | type Archive struct {
43 | filename string
44 | remotePath string
45 | localPath string
46 | quickSha256hash string
47 | repoPath string
48 | }
49 |
50 | func isSubElem(parent, sub string) (bool, error) {
51 | up := ".." + string(os.PathSeparator)
52 |
53 | // path-comparisons using filepath.Abs don't work reliably according to docs (no unique representation).
54 | rel, err := filepath.Rel(parent, sub)
55 | if err != nil {
56 | return false, err
57 | }
58 | if !strings.HasPrefix(rel, up) && rel != ".." {
59 | return true, nil
60 | }
61 | return false, nil
62 | }
63 |
64 | func extractTar(tarfile *tar.Reader, outputFolder string) error {
65 | for {
66 | header, err := tarfile.Next()
67 |
68 | if err == io.EOF {
69 | break
70 | }
71 |
72 | if err != nil {
73 | return fmt.Errorf("error while unpacking deb package: %v", err)
74 | }
75 |
76 | name := header.Name
77 |
78 | switch header.Typeflag {
79 | case tar.TypeSymlink:
80 | continue
81 |
82 | case tar.TypeDir:
83 | continue
84 |
85 | case tar.TypeRegA:
86 | case tar.TypeReg:
87 | unpackPath := filepath.Join(outputFolder, name)
88 | unpackFolder := filepath.Dir(unpackPath)
89 | if _, err := os.Stat(unpackFolder); os.IsNotExist(err) {
90 | if err2 := os.MkdirAll(unpackFolder, 0755); err2 != nil {
91 | return fmt.Errorf("error while creating target directory: %v", err2)
92 | }
93 | }
94 |
95 | fileIsSubelem, err := isSubElem(outputFolder, unpackPath)
96 | if err != nil || !fileIsSubelem {
97 | return fmt.Errorf("error, deb package tried to unpack file above parent")
98 | }
99 |
100 | unpackFileHandle, err := os.Create(unpackPath)
101 | if err != nil {
102 | return fmt.Errorf("error while creating destination file: %v", err)
103 | }
104 | defer unpackFileHandle.Close()
105 | _, err = io.Copy(unpackFileHandle, tarfile)
106 | if err != nil {
107 | return fmt.Errorf("error while writing to destination file: %v", err)
108 | }
109 |
110 | default:
111 | fmt.Printf("Unknown tar entry type: %c in file %s\n", header.Typeflag, name)
112 | }
113 | }
114 |
115 | return nil
116 | }
117 |
118 | func extractDeb(debPath, outputFolder string) error {
119 | if _, err := os.Stat(outputFolder); os.IsNotExist(err) {
120 | if err2 := os.MkdirAll(outputFolder, 0755); err2 != nil {
121 | return fmt.Errorf("error while creating target directory: %v", err2)
122 | }
123 | }
124 |
125 | fd, err := os.Open(debPath)
126 | if err != nil {
127 | return fmt.Errorf("failed to open deb file: %v", err)
128 | }
129 | defer fd.Close()
130 |
131 | debFile, err := deb.Load(fd, debPath)
132 | if err != nil {
133 | return fmt.Errorf("failed to parse deb file: %v", err)
134 | }
135 |
136 | err = extractTar(debFile.Data, outputFolder)
137 | if err != nil {
138 | return err
139 | }
140 |
141 | return nil
142 | }
143 |
144 | // Preprocess extracts the contents of a .deb file.
145 | func (a *Archive) Preprocess() (string, error) {
146 | var err error
147 | a.localPath, err = common.CopyToLocal(a.remotePath, a.ID())
148 | if err != nil {
149 | return "", fmt.Errorf("error while copying %s to local file system: %v", a.remotePath, err)
150 | }
151 |
152 | baseDir, _ := filepath.Split(a.localPath)
153 | extractionDir := filepath.Join(baseDir, "extracted")
154 |
155 | if err := extractDeb(a.localPath, extractionDir); err != nil {
156 | return "", err
157 | }
158 |
159 | return extractionDir, nil
160 | }
161 |
162 | // ID returns non-unique deb Archive ID.
163 | func (a *Archive) ID() string {
164 | return a.filename
165 | }
166 |
167 | // RepoName returns repository name.
168 | func (a *Archive) RepoName() string {
169 | return RepoName
170 | }
171 |
172 | // RepoPath returns repository path.
173 | func (a *Archive) RepoPath() string {
174 | return a.repoPath
175 | }
176 |
177 | // LocalPath returns local path to a deb Archive .deb file.
178 | func (a *Archive) LocalPath() string {
179 | return a.localPath
180 | }
181 |
182 | // RemotePath returns non-local path to a deb Archive .deb file.
183 | func (a *Archive) RemotePath() string {
184 | return a.remotePath
185 | }
186 |
187 | // Description provides additional description for a .deb file.
188 | func (a *Archive) Description() string {
189 | return ""
190 | }
191 |
192 | // QuickSHA256Hash calculates sha256 hash of .deb file.
193 | func (a *Archive) QuickSHA256Hash() (string, error) {
194 | // Check if the quick hash was already calculated.
195 | if a.quickSha256hash != "" {
196 | return a.quickSha256hash, nil
197 | }
198 |
199 | f, err := os.Open(a.remotePath)
200 | if err != nil {
201 | return "", err
202 | }
203 | defer f.Close()
204 |
205 | fileInfo, err := f.Stat()
206 | if err != nil {
207 | return "", err
208 | }
209 |
210 | // Check if the file is smaller than 20MB, if so hash the whole file.
211 | if fileInfo.Size() < int64(chunkSize*2) {
212 | h := sha256.New()
213 | if _, err := io.Copy(h, f); err != nil {
214 | return "", err
215 | }
216 | a.quickSha256hash = fmt.Sprintf("%x", h.Sum(nil))
217 | return a.quickSha256hash, nil
218 | }
219 |
220 | header := make([]byte, chunkSize)
221 | _, err = f.Read(header)
222 | if err != nil {
223 | return "", err
224 | }
225 |
226 | footer := make([]byte, chunkSize)
227 | _, err = f.ReadAt(footer, fileInfo.Size()-int64(chunkSize))
228 | if err != nil {
229 | return "", err
230 | }
231 |
232 | a.quickSha256hash = fmt.Sprintf("%x", sha256.Sum256(append(header, footer...)))
233 | return a.quickSha256hash, nil
234 | }
235 |
236 | // NewRepo returns new instance of deb repository.
237 | func NewRepo(path string) *Repo {
238 | return &Repo{location: path}
239 | }
240 |
241 | // Repo holds data related to a deb repository.
242 | type Repo struct {
243 | location string
244 | files []string
245 | Archives []*Archive
246 | }
247 |
248 | // RepoName returns repository name.
249 | func (r *Repo) RepoName() string {
250 | return RepoName
251 | }
252 |
253 | // RepoPath returns repository path.
254 | func (r *Repo) RepoPath() string {
255 | return r.location
256 | }
257 |
258 | // DiscoverRepo traverses the repository and looks for files that are related to deb archives.
259 | func (r *Repo) DiscoverRepo() ([]hashr.Source, error) {
260 |
261 | if err := filepath.Walk(r.location, walk(&r.files)); err != nil {
262 | return nil, err
263 | }
264 |
265 | for _, file := range r.files {
266 | _, filename := filepath.Split(file)
267 |
268 | if strings.HasSuffix(filename, ".deb") {
269 | r.Archives = append(r.Archives, &Archive{filename: filename, remotePath: file, repoPath: r.location})
270 | }
271 | }
272 |
273 | var sources []hashr.Source
274 | for _, Archive := range r.Archives {
275 | sources = append(sources, Archive)
276 | }
277 |
278 | return sources, nil
279 | }
280 |
281 | func walk(files *[]string) filepath.WalkFunc {
282 | return func(path string, info os.FileInfo, err error) error {
283 | if err != nil {
284 | glog.Errorf("Could not open %s: %v", path, err)
285 | return nil
286 | }
287 | if info.IsDir() {
288 | return nil
289 | }
290 | if strings.HasSuffix(info.Name(), ".deb") {
291 | *files = append(*files, path)
292 | }
293 |
294 | return nil
295 | }
296 | }
297 |
--------------------------------------------------------------------------------
/importers/deb/generate_tests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/zsh
2 |
3 | compressions=("gzip" "xz" "zstd" "none")
4 | i=0
5 |
6 | for tar in $(find -name '*.tar.gz'); do
7 | echo "$tar"
8 | filename=$(basename "$tar")
9 | tardir=$(dirname "$tar")
10 | tempdir=$(mktemp -d)
11 | tar -C "$tempdir" -xf "$tar"
12 | mkdir -p "$tempdir/DEBIAN"
13 |
14 | cat < "$tempdir/DEBIAN/control"
15 | Package: hashr-testdata
16 | Version: 1.0
17 | Architecture: arm64
18 | Maintainer: Example
19 | Description: This text does not matter.
20 | EOF
21 | dpkg-deb -Z${compressions[$(expr $i % 4)+1]} --build --root-owner-group "$tempdir"
22 | rm -r "$tempdir"
23 | cp "$tempdir.deb" "$tardir/$(echo "$filename" | sed 's/.tar.gz/.deb/g')"
24 | rm "$tar"
25 | i=$(expr $i + 1)
26 | done
27 |
--------------------------------------------------------------------------------
/importers/deb/testdata/20200106.00.00/ubuntu-desktop.deb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/deb/testdata/20200106.00.00/ubuntu-desktop.deb
--------------------------------------------------------------------------------
/importers/deb/testdata/20200106.00.00/ubuntu-laptop.deb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/deb/testdata/20200106.00.00/ubuntu-laptop.deb
--------------------------------------------------------------------------------
/importers/deb/testdata/20200106.00.00/ubuntu-server.deb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/deb/testdata/20200106.00.00/ubuntu-server.deb
--------------------------------------------------------------------------------
/importers/deb/testdata/20200107.00.00/ubuntu-desktop.deb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/deb/testdata/20200107.00.00/ubuntu-desktop.deb
--------------------------------------------------------------------------------
/importers/deb/testdata/20200107.00.00/ubuntu-laptop.deb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/deb/testdata/20200107.00.00/ubuntu-laptop.deb
--------------------------------------------------------------------------------
/importers/deb/testdata/20200107.00.00/ubuntu-server.deb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/deb/testdata/20200107.00.00/ubuntu-server.deb
--------------------------------------------------------------------------------
/importers/deb/testdata/20200107.01.00/ubuntu-desktop.deb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/deb/testdata/20200107.01.00/ubuntu-desktop.deb
--------------------------------------------------------------------------------
/importers/deb/testdata/20200107.01.00/ubuntu-laptop.deb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/deb/testdata/20200107.01.00/ubuntu-laptop.deb
--------------------------------------------------------------------------------
/importers/deb/testdata/20200107.01.00/ubuntu-server.deb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/deb/testdata/20200107.01.00/ubuntu-server.deb
--------------------------------------------------------------------------------
/importers/deb/testdata/20200108.00.00/ubuntu-desktop.deb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/deb/testdata/20200108.00.00/ubuntu-desktop.deb
--------------------------------------------------------------------------------
/importers/deb/testdata/20200108.00.00/ubuntu-laptop.deb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/deb/testdata/20200108.00.00/ubuntu-laptop.deb
--------------------------------------------------------------------------------
/importers/deb/testdata/20200108.00.00/ubuntu-server.deb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/deb/testdata/20200108.00.00/ubuntu-server.deb
--------------------------------------------------------------------------------
/importers/gcp/testdata/._ubuntu-1804-lts-drawfork-v20190613.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/gcp/testdata/._ubuntu-1804-lts-drawfork-v20190613.tar.gz
--------------------------------------------------------------------------------
/importers/gcp/testdata/ubuntu-1804-lts-drawfork-v20190613.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/gcp/testdata/ubuntu-1804-lts-drawfork-v20190613.tar.gz
--------------------------------------------------------------------------------
/importers/gcr/gcr.go:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // https://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | // Package gcr implements Google Container Repository importer.
16 | package gcr
17 |
18 | import (
19 | "archive/tar"
20 | "compress/gzip"
21 | "context"
22 | "fmt"
23 | "io"
24 | "os"
25 | "path/filepath"
26 | "strings"
27 |
28 | "github.com/golang/glog"
29 | "github.com/google/hashr/core/hashr"
30 | "github.com/google/hashr/importers/common"
31 |
32 | "github.com/google/go-containerregistry/pkg/authn"
33 | "github.com/google/go-containerregistry/pkg/crane"
34 | "github.com/google/go-containerregistry/pkg/name"
35 | "github.com/google/go-containerregistry/pkg/v1/google"
36 | "github.com/google/go-containerregistry/pkg/v1/remote"
37 |
38 | "golang.org/x/oauth2"
39 | )
40 |
41 | const (
42 | // RepoName contains the repository name.
43 | RepoName = "gcr"
44 | )
45 |
46 | var (
47 | auth authn.Authenticator
48 | opts google.Option
49 | remoteOpts []remote.Option
50 | )
51 |
52 | // Preprocess extracts the contents of GCR image.
53 | func (i *image) Preprocess() (string, error) {
54 | imgID := fmt.Sprintf("%s@sha256:%s", i.id, i.quickHash)
55 | ref, err := name.ParseReference(imgID, name.StrictValidation)
56 | if err != nil {
57 | return "", fmt.Errorf("error parsing reference from image %q: %v", imgID, err)
58 | }
59 |
60 | fmt.Println(remoteOpts)
61 | // remote.Image(ref, )
62 | // img, err := remote.Image(ref, remote.WithAuth(auth))
63 | img, err := remote.Image(ref, remoteOpts...)
64 | if err != nil {
65 | return "", fmt.Errorf("error retrieving src image %q: %v", imgID, err)
66 | }
67 |
68 | layers, err := img.Layers()
69 | if err != nil {
70 | return "", fmt.Errorf("error retrieving layers from image %q: %v", imgID, err)
71 | }
72 |
73 | tmpDir, err := common.LocalTempDir(strings.ReplaceAll(i.id, string(os.PathSeparator), "-"))
74 | if err != nil {
75 | return "", fmt.Errorf("error creating temp dir: %v", err)
76 | }
77 |
78 | i.localPath = filepath.Join(tmpDir, fmt.Sprintf("%s.tar", strings.ReplaceAll(imgID, "/", "_")))
79 |
80 | if err := crane.Save(img, imgID, i.localPath); err != nil {
81 | return "", fmt.Errorf("error saving src image %q: %v", imgID, err)
82 | }
83 |
84 | for id, layer := range layers {
85 | hash, err := layer.Digest()
86 | if err != nil {
87 | return "", fmt.Errorf("error retrieving hash layer: %v", err)
88 | }
89 |
90 | r, err := layer.Compressed()
91 | if err != nil {
92 | return "", fmt.Errorf("error downloading layer %d: %v", id, err)
93 | }
94 |
95 | destFolder := filepath.Join(tmpDir, "extracted", hash.Hex)
96 |
97 | if err := extractTarGz(r, destFolder); err != nil {
98 | return "", fmt.Errorf("error extracting layer %d: %v", id, err)
99 | }
100 |
101 | if err := r.Close(); err != nil {
102 | return "", fmt.Errorf("error closing download for layer %d: %v", id, err)
103 | }
104 | }
105 |
106 | return filepath.Join(tmpDir, "extracted"), nil
107 | }
108 |
109 | // ID returns non-unique GCR image ID.
110 | func (i *image) ID() string {
111 | return fmt.Sprintf("%s@sha256:%s", i.id, i.quickHash)
112 | }
113 |
114 | // RepoName returns repository name.
115 | func (i *image) RepoName() string {
116 | return RepoName
117 | }
118 |
119 | // RepoPath returns repository path.
120 | func (i *image) RepoPath() string {
121 | return ""
122 | }
123 |
124 | // LocalPath returns local path to a GCR image.
125 | func (i *image) LocalPath() string {
126 | return i.localPath
127 | }
128 |
129 | // RemotePath returns remote path to a GCR image.
130 | func (i *image) RemotePath() string {
131 | return i.remotePath
132 | }
133 |
134 | // QuickSHA256Hash return sha256 hash of a GCR image.
135 | func (i *image) QuickSHA256Hash() (string, error) {
136 | return i.quickHash, nil
137 | }
138 |
139 | // Description provides additional description for GCP image.
140 | func (i *image) Description() string {
141 | return i.description
142 | }
143 |
144 | // NewRepo returns new instance of a GCR repository.
145 | func NewRepo(ctx context.Context, oauth2Token oauth2.TokenSource, repositoryPath string) (*Repo, error) {
146 | repo, err := name.NewRepository(repositoryPath)
147 | if err != nil {
148 | return nil, fmt.Errorf("could not create a new Container Registry repository: %v", err)
149 | }
150 |
151 | auth = google.NewTokenSourceAuthenticator(oauth2Token)
152 | opts = google.WithAuth(auth)
153 | remoteOpts = append(remoteOpts, remote.WithAuth(auth))
154 |
155 | return &Repo{path: repositoryPath, gcr: repo}, nil
156 | }
157 |
158 | // Repo holds data related to a GCR repository.
159 | type Repo struct {
160 | path string
161 | gcr name.Repository
162 | images []*image
163 | }
164 |
165 | // RepoName returns repository name.
166 | func (r *Repo) RepoName() string {
167 | return RepoName
168 | }
169 |
170 | // RepoPath returns repository path.
171 | func (r *Repo) RepoPath() string {
172 | return r.path
173 | }
174 |
175 | // DiscoverRepo traverses the GCR repository and return supported images.
176 | func (r *Repo) DiscoverRepo() ([]hashr.Source, error) {
177 | if err := google.Walk(r.gcr, discoverImages(&r.images), opts); err != nil {
178 | return nil, fmt.Errorf("error while discovering %s GCR repository: %v", r.path, err)
179 | }
180 |
181 | var sources []hashr.Source
182 | for _, image := range r.images {
183 | sources = append(sources, image)
184 | }
185 |
186 | return sources, nil
187 | }
188 |
189 | type image struct {
190 | id string
191 | localPath string
192 | remotePath string
193 | quickHash string
194 | description string
195 | }
196 |
197 | func supportedMedia(mediaType string) bool {
198 | unsupportedMediaTypes := []string{
199 | "application/vnd.docker.distribution.manifest.v1+json",
200 | "application/vnd.docker.distribution.manifest.v1+prettyjws",
201 | "application/vnd.oci.image.manifest.v1+json",
202 | }
203 |
204 | for _, unsupportedMediaType := range unsupportedMediaTypes {
205 | if strings.EqualFold(mediaType, unsupportedMediaType) {
206 | return false
207 | }
208 | }
209 |
210 | return true
211 | }
212 |
213 | func discoverImages(images *[]*image) google.WalkFunc {
214 | return func(repo name.Repository, tags *google.Tags, err error) error {
215 | if err != nil {
216 | return err
217 | }
218 |
219 | for digest, manifest := range tags.Manifests {
220 | if !supportedMedia(manifest.MediaType) {
221 | continue
222 | }
223 |
224 | if !strings.Contains(digest, "sha256:") {
225 | return fmt.Errorf("image digest is not in expected format: %s", digest)
226 | }
227 |
228 | parts := strings.Split(digest, ":")
229 | if len(parts[1]) != 64 {
230 | return fmt.Errorf("image digest is not in expected format: %s", digest)
231 | }
232 |
233 | *images = append(*images, &image{
234 | id: repo.Name(),
235 | quickHash: parts[1],
236 | remotePath: repo.Name(),
237 | description: fmt.Sprintf("Tags: %s, Media Type: %s, Created on: %s, Uploaded on: %s", manifest.Tags, manifest.MediaType, manifest.Created.UTC().String(), manifest.Uploaded.UTC().String()),
238 | })
239 | }
240 |
241 | return nil
242 | }
243 | }
244 |
245 | func extractTarGz(r io.Reader, outputFolder string) error {
246 | if _, err := os.Stat(outputFolder); os.IsNotExist(err) {
247 | if err2 := os.MkdirAll(outputFolder, 0755); err2 != nil {
248 | return fmt.Errorf("error while creating target directory: %v", err2)
249 | }
250 | }
251 |
252 | gzReader, err := gzip.NewReader(r)
253 | if err != nil {
254 | return err
255 | }
256 |
257 | tarReader := tar.NewReader(gzReader)
258 |
259 | glog.Infof("Extracting to %s", outputFolder)
260 |
261 | for {
262 | header, err := tarReader.Next()
263 |
264 | switch {
265 | case err == io.EOF:
266 | return nil
267 | case err != nil:
268 | return err
269 | }
270 |
271 | if containsDotDot(header.Name) {
272 | glog.Warningf("not extracting %s, potential path traversal", header.Name)
273 | continue
274 | }
275 | destEntry := filepath.Join(outputFolder, header.Name)
276 |
277 | switch header.Typeflag {
278 | case tar.TypeDir:
279 | if _, err := os.Stat(destEntry); os.IsNotExist(err) {
280 | if err := os.MkdirAll(destEntry, 0755); err != nil {
281 | return fmt.Errorf("error while creating destination directory: %v", err)
282 | }
283 | }
284 | case tar.TypeReg:
285 | if _, err := os.Stat(filepath.Dir(destEntry)); os.IsNotExist(err) {
286 | if err := os.MkdirAll(filepath.Dir(destEntry), 0755); err != nil {
287 | return fmt.Errorf("error while creating destination directory: %v", err)
288 | }
289 | }
290 |
291 | destFile, err := os.Create(destEntry)
292 | if err != nil {
293 | return fmt.Errorf("error while creating destination file: %v", err)
294 | }
295 |
296 | _, err = io.Copy(destFile, tarReader)
297 | if err != nil {
298 | return fmt.Errorf("error while extracting destination file: %v", err)
299 | }
300 | destFile.Close()
301 | }
302 | }
303 | }
304 |
305 | func containsDotDot(v string) bool {
306 | if !strings.Contains(v, "..") {
307 | return false
308 | }
309 | for _, ent := range strings.FieldsFunc(v, isSlashRune) {
310 | if ent == ".." {
311 | return true
312 | }
313 | }
314 | return false
315 | }
316 |
317 | func isSlashRune(r rune) bool { return r == '/' || r == '\\' }
318 |
--------------------------------------------------------------------------------
/importers/gcr/gcr_test.go:
--------------------------------------------------------------------------------
1 | package gcr
2 |
3 | import (
4 | "bytes"
5 | "context"
6 | "crypto/ecdsa"
7 | "crypto/elliptic"
8 | "crypto/rand"
9 | "crypto/tls"
10 | "crypto/x509"
11 | "encoding/json"
12 | "encoding/pem"
13 | "math/big"
14 | "net"
15 | "net/http"
16 | "net/http/httptest"
17 | "path"
18 | "strings"
19 | "testing"
20 | "time"
21 |
22 | "github.com/golang/glog"
23 | "golang.org/x/oauth2"
24 |
25 | "github.com/google/go-cmp/cmp"
26 | "github.com/google/go-cmp/cmp/cmpopts"
27 | "github.com/google/go-containerregistry/pkg/name"
28 | "github.com/google/go-containerregistry/pkg/registry"
29 | "github.com/google/go-containerregistry/pkg/v1/google"
30 | "github.com/google/go-containerregistry/pkg/v1/partial"
31 | "github.com/google/go-containerregistry/pkg/v1/random"
32 | )
33 |
34 | // Helper functions below (newTLSServer, etc.) have been copied from https://github.com/google/go-containerregistry
35 |
36 | type fakeRepo struct {
37 | h http.Handler
38 | repos map[string]google.Tags
39 | }
40 |
41 | func (fr *fakeRepo) ServeHTTP(w http.ResponseWriter, r *http.Request) {
42 | glog.Infof("%s %s", r.Method, r.URL)
43 | if strings.HasPrefix(r.URL.Path, "/v2/") && strings.HasSuffix(r.URL.Path, "/tags/list") {
44 | repo := strings.TrimSuffix(strings.TrimPrefix(r.URL.Path, "/v2/"), "/tags/list")
45 | if tags, ok := fr.repos[repo]; !ok {
46 | w.WriteHeader(http.StatusNotFound)
47 | } else {
48 | glog.Infof("%+v", tags)
49 | if err := json.NewEncoder(w).Encode(tags); err != nil {
50 | glog.Exit(err)
51 | }
52 | }
53 | } else {
54 | fr.h.ServeHTTP(w, r)
55 | }
56 | }
57 |
58 | func newFakeRepo(stuff map[name.Reference]partial.Describable) (*fakeRepo, error) {
59 | h := registry.New()
60 | repos := make(map[string]google.Tags)
61 |
62 | for ref, thing := range stuff {
63 | repo := ref.Context().RepositoryStr()
64 | tags, ok := repos[repo]
65 | if !ok {
66 | tags = google.Tags{
67 | Name: repo,
68 | Children: []string{},
69 | }
70 | }
71 |
72 | // Populate the "child" field.
73 | for parentPath := repo; parentPath != "."; parentPath = path.Dir(parentPath) {
74 | child, parent := path.Base(parentPath), path.Dir(parentPath)
75 | tags, ok := repos[parent]
76 | if !ok {
77 | tags = google.Tags{}
78 | }
79 | for _, c := range repos[parent].Children {
80 | if c == child {
81 | break
82 | }
83 | }
84 | tags.Children = append(tags.Children, child)
85 | repos[parent] = tags
86 | }
87 |
88 | // Populate the "manifests" and "tags" field.
89 | d, err := thing.Digest()
90 | if err != nil {
91 | return nil, err
92 | }
93 | mt, err := thing.MediaType()
94 | if err != nil {
95 | return nil, err
96 | }
97 | if tags.Manifests == nil {
98 | tags.Manifests = make(map[string]google.ManifestInfo)
99 | }
100 | mi, ok := tags.Manifests[d.String()]
101 | if !ok {
102 | mi = google.ManifestInfo{
103 | MediaType: string(mt),
104 | Tags: []string{},
105 | }
106 | }
107 | if tag, ok := ref.(name.Tag); ok {
108 | tags.Tags = append(tags.Tags, tag.Identifier())
109 | mi.Tags = append(mi.Tags, tag.Identifier())
110 | }
111 | tags.Manifests[d.String()] = mi
112 | repos[repo] = tags
113 | }
114 |
115 | return &fakeRepo{h: h, repos: repos}, nil
116 | }
117 |
118 | func getTestRepo() (*fakeRepo, []*image, error) {
119 | image1, err := random.Image(1024, 5)
120 | if err != nil {
121 | return nil, nil, err
122 | }
123 |
124 | ha1, err := image1.Digest()
125 | if err != nil {
126 | return nil, nil, err
127 | }
128 |
129 | image1name := "registry.example.com/test/hashr/aaa"
130 | lr1, err := name.ParseReference(image1name)
131 | if err != nil {
132 | return nil, nil, err
133 | }
134 |
135 | ref1 := lr1.Context().Tag("foo")
136 |
137 | image2, err := random.Image(1024, 5)
138 | if err != nil {
139 | return nil, nil, err
140 | }
141 |
142 | ha2, err := image2.Digest()
143 | if err != nil {
144 | return nil, nil, err
145 | }
146 |
147 | image2name := "registry.example.com/test/hashr/bbb"
148 | lr2, err := name.ParseReference(image2name)
149 | if err != nil {
150 | return nil, nil, err
151 | }
152 |
153 | ref2 := lr2.Context().Tag("bar")
154 | wantImages := []*image{
155 | {
156 | id: image1name,
157 | quickHash: ha1.Hex,
158 | description: "Tags: [foo], Media Type: application/vnd.docker.distribution.manifest.v2+json, Created on: 1754-08-30 22:43:41.129 +0000 UTC, Uploaded on: 1754-08-30 22:43:41.129 +0000 UTC",
159 | remotePath: image1name,
160 | },
161 | {
162 | id: image2name,
163 | quickHash: ha2.Hex,
164 | description: "Tags: [bar], Media Type: application/vnd.docker.distribution.manifest.v2+json, Created on: 1754-08-30 22:43:41.129 +0000 UTC, Uploaded on: 1754-08-30 22:43:41.129 +0000 UTC",
165 | remotePath: image2name,
166 | },
167 | }
168 |
169 | // Set up a fake registry.
170 | h, err := newFakeRepo(map[name.Reference]partial.Describable{
171 | ref1: image1,
172 | ref2: image2,
173 | })
174 | if err != nil {
175 | return nil, nil, err
176 | }
177 |
178 | return h, wantImages, nil
179 | }
180 |
181 | func TestDiscoverRepo(t *testing.T) {
182 | fakeRepo, wantImages, err := getTestRepo()
183 | if err != nil {
184 | t.Fatalf("could not create fake GCR repo: %v", err)
185 | }
186 |
187 | s, err := newTLSServer("registry.example.com", fakeRepo)
188 | if err != nil {
189 | glog.Exit(err)
190 | }
191 | defer s.Close()
192 |
193 | repo, err := NewRepo(context.Background(), oauth2.StaticTokenSource(&oauth2.Token{}), "registry.example.com/test/hashr")
194 | if err != nil {
195 | t.Fatalf("could not create new GCR repo: %v", err)
196 | }
197 |
198 | // Route requests to our test registry.
199 | opts = google.WithTransport(s.Client().Transport)
200 |
201 | gotSources, err := repo.DiscoverRepo()
202 | if err != nil {
203 | t.Fatalf("unexpected error in DiscoverRepo(): %v", err)
204 | }
205 |
206 | var gotImages []*image
207 | for _, source := range gotSources {
208 | if image, ok := source.(*image); ok {
209 | gotImages = append(gotImages, image)
210 | } else {
211 | t.Fatal("error while casting Source interface to Image struct")
212 | }
213 | }
214 |
215 | cmpOpts := []cmp.Option{
216 | cmp.AllowUnexported(image{}),
217 | cmpopts.SortSlices(func(a, b *image) bool {
218 | return a.id < b.id
219 | }),
220 | }
221 |
222 | if !cmp.Equal(wantImages, gotImages, cmpOpts...) {
223 | t.Errorf("DiscoverRepo() unexpected diff (-want/+got):\n%s", cmp.Diff(wantImages, gotImages, cmp.AllowUnexported(image{})))
224 | }
225 | }
226 |
227 | func newTLSServer(domain string, handler http.Handler) (*httptest.Server, error) {
228 | s := httptest.NewUnstartedServer(handler)
229 |
230 | template := x509.Certificate{
231 | SerialNumber: big.NewInt(1),
232 | NotBefore: time.Now().Add(-1 * time.Hour),
233 | NotAfter: time.Now().Add(time.Hour),
234 | IPAddresses: []net.IP{
235 | net.IPv4(127, 0, 0, 1),
236 | net.IPv6loopback,
237 | },
238 | DNSNames: []string{domain},
239 |
240 | KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature | x509.KeyUsageCertSign,
241 | ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth},
242 | BasicConstraintsValid: true,
243 | IsCA: true,
244 | }
245 |
246 | priv, err := ecdsa.GenerateKey(elliptic.P521(), rand.Reader)
247 | if err != nil {
248 | return nil, err
249 | }
250 |
251 | b, err := x509.CreateCertificate(rand.Reader, &template, &template, &priv.PublicKey, priv)
252 | if err != nil {
253 | return nil, err
254 | }
255 |
256 | pc := &bytes.Buffer{}
257 | if err := pem.Encode(pc, &pem.Block{Type: "CERTIFICATE", Bytes: b}); err != nil {
258 | return nil, err
259 | }
260 |
261 | ek, err := x509.MarshalECPrivateKey(priv)
262 | if err != nil {
263 | return nil, err
264 | }
265 |
266 | pk := &bytes.Buffer{}
267 | if err := pem.Encode(pk, &pem.Block{Type: "EC PRIVATE KEY", Bytes: ek}); err != nil {
268 | return nil, err
269 | }
270 |
271 | c, err := tls.X509KeyPair(pc.Bytes(), pk.Bytes())
272 | if err != nil {
273 | return nil, err
274 | }
275 | s.TLS = &tls.Config{
276 | Certificates: []tls.Certificate{c},
277 | }
278 | s.StartTLS()
279 |
280 | certpool := x509.NewCertPool()
281 | certpool.AddCert(s.Certificate())
282 |
283 | t := &http.Transport{
284 | TLSClientConfig: &tls.Config{
285 | RootCAs: certpool,
286 | },
287 | DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
288 | return net.Dial(s.Listener.Addr().Network(), s.Listener.Addr().String())
289 | },
290 | }
291 | s.Client().Transport = t
292 |
293 | return s, nil
294 | }
295 |
--------------------------------------------------------------------------------
/importers/importer.go.example:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // https://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package template
16 |
17 | import (
18 | "context"
19 |
20 | "github.com/google/hashr/core/hashr"
21 | )
22 |
23 | const (
24 | // RepoName contains the repository name.
25 | RepoName = "windows"
26 | )
27 |
28 | type image struct {
29 | id string
30 | localPath string
31 | remotePath string
32 | quickHash string
33 | }
34 |
35 | // Preprocess extracts the contents of Windows ISO file.
36 | func (i *image) Preprocess() (string, error) {
37 | return "", nil
38 | }
39 |
40 | // ID returns non-unique Windows ISO file ID.
41 | func (i *image) ID() string {
42 | return i.id
43 | }
44 |
45 | // RepoName returns repository name.
46 | func (i *image) RepoName() string {
47 | return RepoName
48 | }
49 |
50 | // RepoPath returns repository path.
51 | func (i *image) RepoPath() string {
52 | return ""
53 | }
54 |
55 | // LocalPath returns local path to a Windows ISO file.
56 | func (i *image) LocalPath() string {
57 | return i.localPath
58 | }
59 |
60 | // RemotePath returns remote path to a Windows ISO file.
61 | func (i *image) RemotePath() string {
62 | return i.remotePath
63 | }
64 |
65 | // QuickSHA256Hash calculates sha256 hash of a Windows Update file metadata.
66 | func (i *image) QuickSHA256Hash() (string, error) {
67 | return i.quickHash, nil
68 | }
69 |
70 | // NewRepo returns new instance of a Windows ISO repository.
71 | func NewRepo(ctx context.Context, repositoryPath string) (*Repo, error) {
72 | return &Repo{path: repositoryPath}, nil
73 | }
74 |
75 | // Repo holds data related to a Windows WSUS repository.
76 | type Repo struct {
77 | path string
78 | }
79 |
80 | // RepoName returns repository name.
81 | func (r *Repo) RepoName() string {
82 | return RepoName
83 | }
84 |
85 | // RepoPath returns repository path.
86 | func (r *Repo) RepoPath() string {
87 | return r.path
88 | }
89 |
90 | // DiscoverRepo traverses the repository and looks for files that are related to WSUS packages.
91 | func (r *Repo) DiscoverRepo() ([]hashr.Source, error) {
92 | var sources []hashr.Source
93 | return sources, nil
94 | }
95 |
--------------------------------------------------------------------------------
/importers/iso9660/generate_tests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | for tar in $(find -name '*.tar.gz'); do
4 | echo "$tar"
5 | filename=$(basename "$tar")
6 | tardir=$(dirname "$tar")
7 | tempdir=$(mktemp -d)
8 | tar -C "$tempdir" -xf "$tar"
9 |
10 | cd "$tempdir"
11 | mkisofs -o data.iso .
12 | cd -
13 | cp "$tempdir/data.iso" "$tardir/$(echo "$filename" | sed 's/.tar.gz/.iso/g')"
14 | rm -r "$tempdir"
15 | rm "$tar"
16 | done
17 |
--------------------------------------------------------------------------------
/importers/iso9660/iso9660.go:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // https://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | // Package iso9660 implements iso9660 repository importer.
16 | package iso9660
17 |
18 | import (
19 | "crypto/sha256"
20 | "fmt"
21 | "io"
22 | "io/fs"
23 | "os"
24 | "path/filepath"
25 | "strings"
26 |
27 | "github.com/hooklift/iso9660"
28 |
29 | "github.com/golang/glog"
30 |
31 | "github.com/google/hashr/core/hashr"
32 | "github.com/google/hashr/importers/common"
33 | )
34 |
35 | const (
36 | // RepoName contains the repository name.
37 | RepoName = "iso9660"
38 | chunkSize = 1024 * 1024 * 10 // 10MB
39 | )
40 |
41 | // Archive holds data related to the ISO file.
42 | type ISO9660 struct {
43 | filename string
44 | remotePath string
45 | localPath string
46 | quickSha256hash string
47 | repoPath string
48 | }
49 |
50 | // Preprocess extracts the contents of a .tar.gz file.
51 | func (a *ISO9660) Preprocess() (string, error) {
52 | var err error
53 | a.localPath, err = common.CopyToLocal(a.remotePath, a.ID())
54 | if err != nil {
55 | return "", fmt.Errorf("error while copying %s to local file system: %v", a.remotePath, err)
56 | }
57 |
58 | baseDir, _ := filepath.Split(a.localPath)
59 | extractionDir := filepath.Join(baseDir, "extracted")
60 |
61 | if err := extractIso(a.localPath, extractionDir); err != nil {
62 | return "", err
63 | }
64 |
65 | return extractionDir, nil
66 | }
67 |
68 | func extractIso(isoPath, outputFolder string) error {
69 | if _, err := os.Stat(outputFolder); os.IsNotExist(err) {
70 | if err2 := os.MkdirAll(outputFolder, 0755); err2 != nil {
71 | return fmt.Errorf("error while creating target directory: %v", err2)
72 | }
73 | }
74 |
75 | // Step 1: Open ISO reader
76 | file, err := os.Open(isoPath)
77 | if err != nil {
78 | return fmt.Errorf("error opening ISO file: %v", err)
79 | }
80 |
81 | r, err := iso9660.NewReader(file)
82 | if err != nil {
83 | return fmt.Errorf("error parsing ISO file: %v", err)
84 | }
85 |
86 | // 2. Get the absolute destination path
87 | outputFolder, err = filepath.Abs(outputFolder)
88 | if err != nil {
89 | return err
90 | }
91 |
92 | // Step 3: Iterate over files
93 | for {
94 | f, err := r.Next()
95 | if err == io.EOF {
96 | break
97 | }
98 |
99 | if err != nil {
100 | return fmt.Errorf("error retrieving next file from ISO: %v", err)
101 | }
102 |
103 | err = unpackFile(f, outputFolder)
104 | if err != nil {
105 | return err
106 | }
107 | }
108 |
109 | return nil
110 | }
111 |
112 | func unpackFile(f fs.FileInfo, destination string) error {
113 | // Step 4: Create output path
114 | fp := filepath.Join(destination, f.Name())
115 | if f.IsDir() {
116 | if err := os.MkdirAll(fp, f.Mode()); err != nil {
117 | return fmt.Errorf("error creating destination directory: %v", err)
118 | }
119 | return nil
120 | }
121 |
122 | parentDir, _ := filepath.Split(fp)
123 | if err := os.MkdirAll(parentDir, f.Mode()); err != nil {
124 | return fmt.Errorf("error while creating target directory: %v", err)
125 | }
126 |
127 | // Step 5: Create destination file
128 | freader := f.Sys().(io.Reader)
129 | ff, err := os.Create(fp)
130 | if err != nil {
131 | fmt.Errorf("error while creating destination file: %v", err)
132 | }
133 | defer func() {
134 | if err := ff.Close(); err != nil {
135 | fmt.Errorf("error while closing file: %v", err)
136 | }
137 | }()
138 |
139 | if err := ff.Chmod(f.Mode()); err != nil {
140 | fmt.Errorf("error while chmod: %v", err)
141 | }
142 |
143 | // Step 6: Extract file contents
144 | if _, err := io.Copy(ff, freader); err != nil {
145 | fmt.Errorf("error while extracting file data: %v", err)
146 | }
147 | return nil
148 | }
149 |
150 | // ID returns non-unique ISO file Archive ID.
151 | func (a *ISO9660) ID() string {
152 | return a.filename
153 | }
154 |
155 | // RepoName returns repository name.
156 | func (a *ISO9660) RepoName() string {
157 | return RepoName
158 | }
159 |
160 | // RepoPath returns repository path.
161 | func (a *ISO9660) RepoPath() string {
162 | return a.repoPath
163 | }
164 |
165 | // LocalPath returns local path to a ISO file Archive .iso file.
166 | func (a *ISO9660) LocalPath() string {
167 | return a.localPath
168 | }
169 |
170 | // RemotePath returns non-local path to a ISO file Archive .iso file.
171 | func (a *ISO9660) RemotePath() string {
172 | return a.remotePath
173 | }
174 |
175 | // Description provides additional description for a .iso file.
176 | func (a *ISO9660) Description() string {
177 | return ""
178 | }
179 |
180 | // QuickSHA256Hash calculates sha256 hash of .iso file.
181 | func (a *ISO9660) QuickSHA256Hash() (string, error) {
182 | // Check if the quick hash was already calculated.
183 | if a.quickSha256hash != "" {
184 | return a.quickSha256hash, nil
185 | }
186 |
187 | f, err := os.Open(a.remotePath)
188 | if err != nil {
189 | return "", err
190 | }
191 | defer f.Close()
192 |
193 | fileInfo, err := f.Stat()
194 | if err != nil {
195 | return "", err
196 | }
197 |
198 | // Check if the file is smaller than 20MB, if so hash the whole file.
199 | if fileInfo.Size() < int64(chunkSize*2) {
200 | h := sha256.New()
201 | if _, err := io.Copy(h, f); err != nil {
202 | return "", err
203 | }
204 | a.quickSha256hash = fmt.Sprintf("%x", h.Sum(nil))
205 | return a.quickSha256hash, nil
206 | }
207 |
208 | header := make([]byte, chunkSize)
209 | _, err = f.Read(header)
210 | if err != nil {
211 | return "", err
212 | }
213 |
214 | footer := make([]byte, chunkSize)
215 | _, err = f.ReadAt(footer, fileInfo.Size()-int64(chunkSize))
216 | if err != nil {
217 | return "", err
218 | }
219 |
220 | a.quickSha256hash = fmt.Sprintf("%x", sha256.Sum256(append(header, footer...)))
221 | return a.quickSha256hash, nil
222 | }
223 |
224 | // NewRepo returns new instance of an ISO file repository.
225 | func NewRepo(path string) *Repo {
226 | return &Repo{location: path}
227 | }
228 |
229 | // Repo holds data related to an ISO file repository.
230 | type Repo struct {
231 | location string
232 | files []string
233 | Archives []*ISO9660
234 | }
235 |
236 | // RepoName returns repository name.
237 | func (r *Repo) RepoName() string {
238 | return RepoName
239 | }
240 |
241 | // RepoPath returns repository path.
242 | func (r *Repo) RepoPath() string {
243 | return r.location
244 | }
245 |
246 | // DiscoverRepo traverses the repository and looks for files that are related to ISO file base Archives.
247 | func (r *Repo) DiscoverRepo() ([]hashr.Source, error) {
248 | if err := filepath.Walk(r.location, walk(&r.files)); err != nil {
249 | return nil, err
250 | }
251 |
252 | for _, file := range r.files {
253 | _, filename := filepath.Split(file)
254 |
255 | r.Archives = append(r.Archives, &ISO9660{filename: filename, remotePath: file, repoPath: r.location})
256 | }
257 |
258 | var sources []hashr.Source
259 | for _, Archive := range r.Archives {
260 | sources = append(sources, Archive)
261 | }
262 |
263 | return sources, nil
264 | }
265 |
266 | func walk(files *[]string) filepath.WalkFunc {
267 | return func(path string, info os.FileInfo, err error) error {
268 | if err != nil {
269 | glog.Errorf("Could not open %s: %v", path, err)
270 | return nil
271 | }
272 | if info.IsDir() {
273 | return nil
274 | }
275 |
276 | if strings.HasSuffix(info.Name(), ".iso") {
277 | *files = append(*files, path)
278 | }
279 |
280 | return nil
281 | }
282 | }
283 |
--------------------------------------------------------------------------------
/importers/iso9660/testdata/20200106.00.00/ubuntu-desktop.iso:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/iso9660/testdata/20200106.00.00/ubuntu-desktop.iso
--------------------------------------------------------------------------------
/importers/iso9660/testdata/20200106.00.00/ubuntu-laptop.iso:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/iso9660/testdata/20200106.00.00/ubuntu-laptop.iso
--------------------------------------------------------------------------------
/importers/iso9660/testdata/20200106.00.00/ubuntu-server.iso:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/iso9660/testdata/20200106.00.00/ubuntu-server.iso
--------------------------------------------------------------------------------
/importers/iso9660/testdata/20200107.00.00/ubuntu-desktop.iso:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/iso9660/testdata/20200107.00.00/ubuntu-desktop.iso
--------------------------------------------------------------------------------
/importers/iso9660/testdata/20200107.00.00/ubuntu-laptop.iso:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/iso9660/testdata/20200107.00.00/ubuntu-laptop.iso
--------------------------------------------------------------------------------
/importers/iso9660/testdata/20200107.00.00/ubuntu-server.iso:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/iso9660/testdata/20200107.00.00/ubuntu-server.iso
--------------------------------------------------------------------------------
/importers/iso9660/testdata/20200107.01.00/ubuntu-desktop.iso:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/iso9660/testdata/20200107.01.00/ubuntu-desktop.iso
--------------------------------------------------------------------------------
/importers/iso9660/testdata/20200107.01.00/ubuntu-laptop.iso:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/iso9660/testdata/20200107.01.00/ubuntu-laptop.iso
--------------------------------------------------------------------------------
/importers/iso9660/testdata/20200107.01.00/ubuntu-server.iso:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/iso9660/testdata/20200107.01.00/ubuntu-server.iso
--------------------------------------------------------------------------------
/importers/iso9660/testdata/20200108.00.00/ubuntu-desktop.iso:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/iso9660/testdata/20200108.00.00/ubuntu-desktop.iso
--------------------------------------------------------------------------------
/importers/iso9660/testdata/20200108.00.00/ubuntu-laptop.iso:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/iso9660/testdata/20200108.00.00/ubuntu-laptop.iso
--------------------------------------------------------------------------------
/importers/iso9660/testdata/20200108.00.00/ubuntu-server.iso:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/iso9660/testdata/20200108.00.00/ubuntu-server.iso
--------------------------------------------------------------------------------
/importers/rpm/generate_tests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | for tar in $(find -name '*.tar.gz'); do
4 | echo "$tar"
5 | filename=$(basename "$tar")
6 | tardir=$(dirname "$tar")
7 | tempdir=$(mktemp -d)
8 | echo "$tempdir"
9 | mkdir -p "$tempdir/BUILDROOT" "$tempdir/BUILD" "$tempdir/RPMS" "$tempdir/SOURCES" "$tempdir/SPECS" "$tempdir/SRPMS"
10 | #tar -C "$tempdir/SOURCES" -xvf "$tar"
11 | mkdir -p "$tempdir/SOURCES/testdata-1.0"
12 | tar -C "$tempdir/SOURCES/testdata-1.0" -xf "$tar"
13 | cd "$tempdir/SOURCES/"
14 | tar -czf "$tempdir/SOURCES/testdata-1.0.tar.gz" "testdata-1.0"
15 | tar -tf "$tempdir/SOURCES/testdata-1.0.tar.gz"
16 | rm -rf "$tempdir/SOURCES/testdata-1.0"
17 | cd -
18 |
19 | cat < "$tempdir/SPECS/testdata.spec"
20 | Summary: Test data
21 | Name: testdata
22 | Version: 1.0
23 | Release: 1%{?dist}
24 | License: Apache 2.0
25 | Group: Development/Tools
26 | BuildArch: noarch
27 | Source0: %{name}-%{version}.tar.gz
28 |
29 | %description
30 | Just test data
31 |
32 | %prep
33 | %setup -q
34 |
35 | %install
36 | rm -rf "\$RPM_BUILD_ROOT"
37 | mkdir -p "\$RPM_BUILD_ROOT"
38 | cp -r . "\$RPM_BUILD_ROOT/"
39 |
40 | %clean
41 | rm -rf \$RPM_BUILD_ROOT
42 |
43 | %files
44 | /*
45 |
46 |
47 | %changelog
48 | * Fri Nov 18 2022 Carl Svensson - 0.0.1
49 | - Test data
50 | EOF
51 | rpmbuild --buildroot "$tempdir/BUILDROOT" --define "_topdir $tempdir" -bb "$tempdir/SPECS/testdata.spec"
52 | cp "$tempdir/RPMS/noarch/testdata-1.0-1.noarch.rpm" "$tardir/$(echo "$filename" | sed 's/.tar.gz/.rpm/g')"
53 | rm -r "$tempdir"
54 | rm "$tar"
55 | done
56 |
--------------------------------------------------------------------------------
/importers/rpm/rpm.go:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // https://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | // Package rpm implements rpm package importer.
16 | package rpm
17 |
18 | import (
19 | "crypto/sha256"
20 | "fmt"
21 | "io"
22 | "os"
23 | "path/filepath"
24 | "strings"
25 |
26 | "github.com/golang/glog"
27 |
28 | "github.com/google/hashr/core/hashr"
29 | "github.com/google/hashr/importers/common"
30 |
31 | rpmutils "github.com/sassoftware/go-rpmutils"
32 | )
33 |
34 | const (
35 | // RepoName contains the repository name.
36 | RepoName = "rpm"
37 | chunkSize = 1024 * 1024 * 10 // 10MB
38 | )
39 |
40 | // Archive holds data related to rpm archive.
41 | type Archive struct {
42 | filename string
43 | remotePath string
44 | localPath string
45 | quickSha256hash string
46 | repoPath string
47 | }
48 |
49 | func extractRPM(rpmPath, outputFolder string) error {
50 | if _, err := os.Stat(outputFolder); os.IsNotExist(err) {
51 | if err2 := os.MkdirAll(outputFolder, 0755); err2 != nil {
52 | return fmt.Errorf("error while creating target directory: %v", err2)
53 | }
54 | }
55 |
56 | fd, err := os.Open(rpmPath)
57 | if err != nil {
58 | return fmt.Errorf("failed to open rpm file: %v", err)
59 | }
60 | defer fd.Close()
61 |
62 | rpmFile, err := rpmutils.ReadRpm(fd)
63 | if err != nil {
64 | return fmt.Errorf("failed to parse rpm file: %v", err)
65 | }
66 |
67 | err = rpmFile.ExpandPayload(outputFolder)
68 | if err != nil {
69 | return fmt.Errorf("failed to extract rpm file: %v", err)
70 | }
71 |
72 | return nil
73 | }
74 |
75 | // Preprocess extracts the contents of a .rpm file.
76 | func (a *Archive) Preprocess() (string, error) {
77 | var err error
78 | a.localPath, err = common.CopyToLocal(a.remotePath, a.ID())
79 | if err != nil {
80 | return "", fmt.Errorf("error while copying %s to local file system: %v", a.remotePath, err)
81 | }
82 |
83 | baseDir, _ := filepath.Split(a.localPath)
84 | extractionDir := filepath.Join(baseDir, "extracted")
85 |
86 | if err := extractRPM(a.localPath, extractionDir); err != nil {
87 | return "", err
88 | }
89 |
90 | return extractionDir, nil
91 | }
92 |
93 | // ID returns non-unique rpm Archive ID.
94 | func (a *Archive) ID() string {
95 | return a.filename
96 | }
97 |
98 | // RepoName returns repository name.
99 | func (a *Archive) RepoName() string {
100 | return RepoName
101 | }
102 |
103 | // RepoPath returns repository path.
104 | func (a *Archive) RepoPath() string {
105 | return a.repoPath
106 | }
107 |
108 | // LocalPath returns local path to a rpm Archive .rpm file.
109 | func (a *Archive) LocalPath() string {
110 | return a.localPath
111 | }
112 |
113 | // RemotePath returns non-local path to a rpm Archive .rpm file.
114 | func (a *Archive) RemotePath() string {
115 | return a.remotePath
116 | }
117 |
118 | // Description provides additional description for a .rpm file.
119 | func (a *Archive) Description() string {
120 | return ""
121 | }
122 |
123 | // QuickSHA256Hash calculates sha256 hash of .rpm file.
124 | func (a *Archive) QuickSHA256Hash() (string, error) {
125 | // Check if the quick hash was already calculated.
126 | if a.quickSha256hash != "" {
127 | return a.quickSha256hash, nil
128 | }
129 |
130 | f, err := os.Open(a.remotePath)
131 | if err != nil {
132 | return "", err
133 | }
134 | defer f.Close()
135 |
136 | fileInfo, err := f.Stat()
137 | if err != nil {
138 | return "", err
139 | }
140 |
141 | // Check if the file is smaller than 20MB, if so hash the whole file.
142 | if fileInfo.Size() < int64(chunkSize*2) {
143 | h := sha256.New()
144 | if _, err := io.Copy(h, f); err != nil {
145 | return "", err
146 | }
147 | a.quickSha256hash = fmt.Sprintf("%x", h.Sum(nil))
148 | return a.quickSha256hash, nil
149 | }
150 |
151 | header := make([]byte, chunkSize)
152 | _, err = f.Read(header)
153 | if err != nil {
154 | return "", err
155 | }
156 |
157 | footer := make([]byte, chunkSize)
158 | _, err = f.ReadAt(footer, fileInfo.Size()-int64(chunkSize))
159 | if err != nil {
160 | return "", err
161 | }
162 |
163 | a.quickSha256hash = fmt.Sprintf("%x", sha256.Sum256(append(header, footer...)))
164 | return a.quickSha256hash, nil
165 | }
166 |
167 | // NewRepo returns new instance of rpm repository.
168 | func NewRepo(path string) *Repo {
169 | return &Repo{location: path}
170 | }
171 |
172 | // Repo holds data related to a rpm repository.
173 | type Repo struct {
174 | location string
175 | files []string
176 | Archives []*Archive
177 | }
178 |
179 | // RepoName returns repository name.
180 | func (r *Repo) RepoName() string {
181 | return RepoName
182 | }
183 |
184 | // RepoPath returns repository path.
185 | func (r *Repo) RepoPath() string {
186 | return r.location
187 | }
188 |
189 | // DiscoverRepo traverses the repository and looks for files that are related to rpm archives.
190 | func (r *Repo) DiscoverRepo() ([]hashr.Source, error) {
191 |
192 | if err := filepath.Walk(r.location, walk(&r.files)); err != nil {
193 | return nil, err
194 | }
195 |
196 | for _, file := range r.files {
197 | _, filename := filepath.Split(file)
198 |
199 | if strings.HasSuffix(filename, ".rpm") {
200 | r.Archives = append(r.Archives, &Archive{filename: filename, remotePath: file, repoPath: r.location})
201 | }
202 | }
203 |
204 | var sources []hashr.Source
205 | for _, Archive := range r.Archives {
206 | sources = append(sources, Archive)
207 | }
208 |
209 | return sources, nil
210 | }
211 |
212 | func walk(files *[]string) filepath.WalkFunc {
213 | return func(path string, info os.FileInfo, err error) error {
214 | if err != nil {
215 | glog.Errorf("Could not open %s: %v", path, err)
216 | return nil
217 | }
218 | if info.IsDir() {
219 | return nil
220 | }
221 | if strings.HasSuffix(info.Name(), ".rpm") {
222 | *files = append(*files, path)
223 | }
224 |
225 | return nil
226 | }
227 | }
228 |
--------------------------------------------------------------------------------
/importers/rpm/testdata/20200106.00.00/ubuntu-desktop.rpm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/rpm/testdata/20200106.00.00/ubuntu-desktop.rpm
--------------------------------------------------------------------------------
/importers/rpm/testdata/20200106.00.00/ubuntu-laptop.rpm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/rpm/testdata/20200106.00.00/ubuntu-laptop.rpm
--------------------------------------------------------------------------------
/importers/rpm/testdata/20200106.00.00/ubuntu-server.rpm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/rpm/testdata/20200106.00.00/ubuntu-server.rpm
--------------------------------------------------------------------------------
/importers/rpm/testdata/20200107.00.00/ubuntu-desktop.rpm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/rpm/testdata/20200107.00.00/ubuntu-desktop.rpm
--------------------------------------------------------------------------------
/importers/rpm/testdata/20200107.00.00/ubuntu-laptop.rpm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/rpm/testdata/20200107.00.00/ubuntu-laptop.rpm
--------------------------------------------------------------------------------
/importers/rpm/testdata/20200107.00.00/ubuntu-server.rpm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/rpm/testdata/20200107.00.00/ubuntu-server.rpm
--------------------------------------------------------------------------------
/importers/rpm/testdata/20200107.01.00/ubuntu-desktop.rpm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/rpm/testdata/20200107.01.00/ubuntu-desktop.rpm
--------------------------------------------------------------------------------
/importers/rpm/testdata/20200107.01.00/ubuntu-laptop.rpm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/rpm/testdata/20200107.01.00/ubuntu-laptop.rpm
--------------------------------------------------------------------------------
/importers/rpm/testdata/20200107.01.00/ubuntu-server.rpm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/rpm/testdata/20200107.01.00/ubuntu-server.rpm
--------------------------------------------------------------------------------
/importers/rpm/testdata/20200108.00.00/ubuntu-desktop.rpm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/rpm/testdata/20200108.00.00/ubuntu-desktop.rpm
--------------------------------------------------------------------------------
/importers/rpm/testdata/20200108.00.00/ubuntu-laptop.rpm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/rpm/testdata/20200108.00.00/ubuntu-laptop.rpm
--------------------------------------------------------------------------------
/importers/rpm/testdata/20200108.00.00/ubuntu-server.rpm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/rpm/testdata/20200108.00.00/ubuntu-server.rpm
--------------------------------------------------------------------------------
/importers/targz/targz.go:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // https://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | // Package targz implements targz repository importer.
16 | package targz
17 |
18 | import (
19 | "crypto/sha256"
20 | "fmt"
21 | "io"
22 | "os"
23 | "path/filepath"
24 | "strings"
25 |
26 | "github.com/golang/glog"
27 |
28 | "github.com/google/hashr/core/hashr"
29 | "github.com/google/hashr/importers/common"
30 | )
31 |
32 | const (
33 | // RepoName contains the repository name.
34 | RepoName = "targz"
35 | chunkSize = 1024 * 1024 * 10 // 10MB
36 | )
37 |
38 | // Archive holds data related to targz archive.
39 | type Archive struct {
40 | filename string
41 | remotePath string
42 | localPath string
43 | quickSha256hash string
44 | repoPath string
45 | }
46 |
47 | // Preprocess extracts the contents of a .tar.gz file.
48 | func (a *Archive) Preprocess() (string, error) {
49 | var err error
50 | a.localPath, err = common.CopyToLocal(a.remotePath, a.ID())
51 | if err != nil {
52 | return "", fmt.Errorf("error while copying %s to local file system: %v", a.remotePath, err)
53 | }
54 |
55 | baseDir, _ := filepath.Split(a.localPath)
56 | extractionDir := filepath.Join(baseDir, "extracted")
57 |
58 | if err := common.ExtractTarGz(a.localPath, extractionDir); err != nil {
59 | return "", err
60 | }
61 |
62 | return extractionDir, nil
63 | }
64 |
65 | // ID returns non-unique targz Archive ID.
66 | func (a *Archive) ID() string {
67 | return a.filename
68 | }
69 |
70 | // RepoName returns repository name.
71 | func (a *Archive) RepoName() string {
72 | return RepoName
73 | }
74 |
75 | // RepoPath returns repository path.
76 | func (a *Archive) RepoPath() string {
77 | return a.repoPath
78 | }
79 |
80 | // LocalPath returns local path to a targz Archive .tar.gz file.
81 | func (a *Archive) LocalPath() string {
82 | return a.localPath
83 | }
84 |
85 | // RemotePath returns non-local path to a targz Archive .tar.gz file.
86 | func (a *Archive) RemotePath() string {
87 | return a.remotePath
88 | }
89 |
90 | // Description provides additional description for a .tar.gz file.
91 | func (a *Archive) Description() string {
92 | return ""
93 | }
94 |
95 | // QuickSHA256Hash calculates sha256 hash of .tar.gz file.
96 | func (a *Archive) QuickSHA256Hash() (string, error) {
97 | // Check if the quick hash was already calculated.
98 | if a.quickSha256hash != "" {
99 | return a.quickSha256hash, nil
100 | }
101 |
102 | f, err := os.Open(a.remotePath)
103 | if err != nil {
104 | return "", err
105 | }
106 | defer f.Close()
107 |
108 | fileInfo, err := f.Stat()
109 | if err != nil {
110 | return "", err
111 | }
112 |
113 | // Check if the file is smaller than 20MB, if so hash the whole file.
114 | if fileInfo.Size() < int64(chunkSize*2) {
115 | h := sha256.New()
116 | if _, err := io.Copy(h, f); err != nil {
117 | return "", err
118 | }
119 | a.quickSha256hash = fmt.Sprintf("%x", h.Sum(nil))
120 | return a.quickSha256hash, nil
121 | }
122 |
123 | header := make([]byte, chunkSize)
124 | _, err = f.Read(header)
125 | if err != nil {
126 | return "", err
127 | }
128 |
129 | footer := make([]byte, chunkSize)
130 | _, err = f.ReadAt(footer, fileInfo.Size()-int64(chunkSize))
131 | if err != nil {
132 | return "", err
133 | }
134 |
135 | a.quickSha256hash = fmt.Sprintf("%x", sha256.Sum256(append(header, footer...)))
136 | return a.quickSha256hash, nil
137 | }
138 |
139 | // NewRepo returns new instance of targz repository.
140 | func NewRepo(path string) *Repo {
141 | return &Repo{location: path}
142 | }
143 |
144 | // Repo holds data related to a targz repository.
145 | type Repo struct {
146 | location string
147 | files []string
148 | Archives []*Archive
149 | }
150 |
151 | // RepoName returns repository name.
152 | func (r *Repo) RepoName() string {
153 | return RepoName
154 | }
155 |
156 | // RepoPath returns repository path.
157 | func (r *Repo) RepoPath() string {
158 | return r.location
159 | }
160 |
161 | // DiscoverRepo traverses the repository and looks for files that are related to targz base Archives.
162 | func (r *Repo) DiscoverRepo() ([]hashr.Source, error) {
163 |
164 | if err := filepath.Walk(r.location, walk(&r.files)); err != nil {
165 | return nil, err
166 | }
167 |
168 | for _, file := range r.files {
169 | _, filename := filepath.Split(file)
170 |
171 | if strings.HasSuffix(filename, ".tar.gz") {
172 | r.Archives = append(r.Archives, &Archive{filename: filename, remotePath: file, repoPath: r.location})
173 | }
174 | }
175 |
176 | var sources []hashr.Source
177 | for _, Archive := range r.Archives {
178 | sources = append(sources, Archive)
179 | }
180 |
181 | return sources, nil
182 | }
183 |
184 | func walk(files *[]string) filepath.WalkFunc {
185 | return func(path string, info os.FileInfo, err error) error {
186 | if err != nil {
187 | glog.Errorf("Could not open %s: %v", path, err)
188 | return nil
189 | }
190 | if info.IsDir() {
191 | return nil
192 | }
193 | if strings.HasSuffix(info.Name(), ".tar.gz") || strings.HasSuffix(info.Name(), ".tar.gz.sig") {
194 | *files = append(*files, path)
195 | }
196 |
197 | return nil
198 | }
199 | }
200 |
--------------------------------------------------------------------------------
/importers/targz/testdata/20200106.00.00/ubuntu-desktop.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/targz/testdata/20200106.00.00/ubuntu-desktop.tar.gz
--------------------------------------------------------------------------------
/importers/targz/testdata/20200106.00.00/ubuntu-laptop.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/targz/testdata/20200106.00.00/ubuntu-laptop.tar.gz
--------------------------------------------------------------------------------
/importers/targz/testdata/20200106.00.00/ubuntu-server.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/targz/testdata/20200106.00.00/ubuntu-server.tar.gz
--------------------------------------------------------------------------------
/importers/targz/testdata/20200107.00.00/ubuntu-desktop.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/targz/testdata/20200107.00.00/ubuntu-desktop.tar.gz
--------------------------------------------------------------------------------
/importers/targz/testdata/20200107.00.00/ubuntu-laptop.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/targz/testdata/20200107.00.00/ubuntu-laptop.tar.gz
--------------------------------------------------------------------------------
/importers/targz/testdata/20200107.00.00/ubuntu-server.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/targz/testdata/20200107.00.00/ubuntu-server.tar.gz
--------------------------------------------------------------------------------
/importers/targz/testdata/20200107.01.00/ubuntu-desktop.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/targz/testdata/20200107.01.00/ubuntu-desktop.tar.gz
--------------------------------------------------------------------------------
/importers/targz/testdata/20200107.01.00/ubuntu-laptop.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/targz/testdata/20200107.01.00/ubuntu-laptop.tar.gz
--------------------------------------------------------------------------------
/importers/targz/testdata/20200107.01.00/ubuntu-server.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/targz/testdata/20200107.01.00/ubuntu-server.tar.gz
--------------------------------------------------------------------------------
/importers/targz/testdata/20200108.00.00/ubuntu-desktop.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/targz/testdata/20200108.00.00/ubuntu-desktop.tar.gz
--------------------------------------------------------------------------------
/importers/targz/testdata/20200108.00.00/ubuntu-laptop.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/targz/testdata/20200108.00.00/ubuntu-laptop.tar.gz
--------------------------------------------------------------------------------
/importers/targz/testdata/20200108.00.00/ubuntu-server.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/targz/testdata/20200108.00.00/ubuntu-server.tar.gz
--------------------------------------------------------------------------------
/importers/windows/windows.go:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // https://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | // Package windows implements Windows ISO-13346 repository importer.
16 | package windows
17 |
18 | import (
19 | "bytes"
20 | "context"
21 | "crypto/sha256"
22 | "fmt"
23 | "io"
24 | "os"
25 | "os/exec"
26 | "path/filepath"
27 | "strconv"
28 | "strings"
29 | "time"
30 |
31 | "github.com/Microsoft/go-winio/wim"
32 | "github.com/golang/glog"
33 | "github.com/google/hashr/core/hashr"
34 | "github.com/google/hashr/importers/common"
35 | )
36 |
37 | const (
38 | // RepoName contains the repository name.
39 | RepoName = "windows"
40 | )
41 |
42 | // Preprocess extracts the contents of Windows ISO file.
43 | func (w *wimImage) Preprocess() (string, error) {
44 | var err error
45 | w.localPath, err = common.CopyToLocal(w.remotePath, w.id)
46 | if err != nil {
47 | return "", fmt.Errorf("error while copying %s to %s: %v", w.remotePath, w.localPath, err)
48 | }
49 |
50 | baseDir, _ := filepath.Split(w.localPath)
51 |
52 | extractionDir := filepath.Join(baseDir, "extracted")
53 |
54 | mountDir := filepath.Join(baseDir, "mnt")
55 | if err := os.MkdirAll(mountDir, 0755); err != nil {
56 | return "", fmt.Errorf("could not create mount directory: %v", err)
57 | }
58 |
59 | _, err = shellCommand("sudo", "mount", w.localPath, mountDir)
60 | if err != nil {
61 | return "", fmt.Errorf("error while executing mount cmd: %v", err)
62 | }
63 |
64 | installWimPath := filepath.Join(mountDir, "/sources/install.wim")
65 |
66 | wimFile, err := os.Open(installWimPath)
67 | if err != nil {
68 | return "", fmt.Errorf("error while opening %s: %v", installWimPath, err)
69 | }
70 |
71 | reader, err := wim.NewReader(wimFile)
72 | if err != nil {
73 | return "", fmt.Errorf("error while creating wim reader %s: %v", installWimPath, err)
74 | }
75 |
76 | for _, image := range reader.Image {
77 | if image.Name == w.imageName {
78 | glog.Infof("Extracting files from %s located in %s to %s", image.Name, w.localPath, extractionDir)
79 | err := extractWimImage(image, extractionDir)
80 | if err != nil {
81 | return "", fmt.Errorf("error while extracting wim image %s: %v", image.Name, err)
82 | }
83 | glog.Infof("Done extracting files from %s", image.Name)
84 | }
85 | }
86 |
87 | time.Sleep(time.Second * 10)
88 | _, err = shellCommand("sudo", "umount", "-fl", mountDir)
89 | if err != nil {
90 | return "", fmt.Errorf("error while executing umount cmd: %v", err)
91 | }
92 |
93 | return extractionDir, nil
94 | }
95 | func extractWimImage(image *wim.Image, extractionDir string) error {
96 | rootDir, err := image.Open()
97 | if err != nil {
98 | return fmt.Errorf("error while opening wim file %s: %v", image.Name, err)
99 | }
100 |
101 | if err := extractWimFolder(rootDir, rootDir.Name, extractionDir); err != nil {
102 | return err
103 | }
104 |
105 | return nil
106 | }
107 |
108 | func extractWimFolder(wimFile *wim.File, path, extractionDir string) error {
109 | files, err := wimFile.Readdir()
110 | if err != nil {
111 | return fmt.Errorf("error while opening wim file %s: %v", wimFile.Name, err)
112 | }
113 | for _, file := range files {
114 | dstPath := filepath.Join(extractionDir, path, file.Name)
115 | if file.IsDir() {
116 | if err := os.MkdirAll(dstPath, 0755); err != nil {
117 | glog.Errorf("Could not create destination directory %s: %v", dstPath, err)
118 | continue
119 | }
120 | if err := extractWimFolder(file, filepath.Join(path, file.Name), extractionDir); err != nil {
121 | glog.Warningf("Failed to extract Wim folder %s: %v", file.Name, err)
122 | }
123 | } else {
124 | if err := copyFile(file, dstPath); err != nil {
125 | glog.Errorf("Could not copy to destination file %s: %v", dstPath, err)
126 | continue
127 | }
128 | }
129 | }
130 |
131 | return nil
132 | }
133 |
134 | func copyFile(file *wim.File, dstPath string) error {
135 | destFile, err := os.Create(dstPath)
136 | if err != nil {
137 | return fmt.Errorf("error while creating destination file: %v", err)
138 | }
139 |
140 | content, err := file.Open()
141 | if err != nil {
142 | return fmt.Errorf("error while opening wim %s file for reading: %v", file.Name, err)
143 | }
144 |
145 | _, err = io.Copy(destFile, content)
146 | if err != nil {
147 | return fmt.Errorf("error while copying destination file %s: %v", file.Name, err)
148 | }
149 |
150 | destFile.Close()
151 | content.Close()
152 |
153 | return nil
154 | }
155 |
156 | var execute = func(name string, args ...string) *exec.Cmd {
157 | glog.Infof("name: %v, args: %v", name, args)
158 | return exec.Command(name, args...)
159 | }
160 |
161 | func shellCommand(binary string, args ...string) (string, error) {
162 | cmd := execute(binary, args...)
163 | var stdout, stderr bytes.Buffer
164 | cmd.Stdout = &stdout
165 | cmd.Stderr = &stderr
166 |
167 | err := cmd.Run()
168 | if err != nil {
169 | return "", fmt.Errorf("error while executing %s: %v\nStdout: %v\nStderr: %v", binary, err, stdout.String(), stderr.String())
170 | }
171 |
172 | return stdout.String(), nil
173 | }
174 |
175 | // ID returns non-unique Windows ISO file ID.
176 | func (w *wimImage) ID() string {
177 | return w.id
178 | }
179 |
180 | // RepoName returns repository name.
181 | func (w *wimImage) RepoName() string {
182 | return RepoName
183 | }
184 |
185 | // RepoPath returns repository path.
186 | func (w *wimImage) RepoPath() string {
187 | return w.repoPath
188 | }
189 |
190 | // LocalPath returns local path to a Windows ISO file.
191 | func (w *wimImage) LocalPath() string {
192 | return w.localPath
193 | }
194 |
195 | // RemotePath returns remote path to a Windows ISO file.
196 | func (w *wimImage) RemotePath() string {
197 | return w.remotePath
198 | }
199 |
200 | // QuickSHA256Hash calculates sha256 hash of a Windows ISO file.
201 | func (w *wimImage) QuickSHA256Hash() (string, error) {
202 | return w.quickHash, nil
203 | }
204 |
205 | // Description provides additional description for a Windows ISO file.
206 | func (w *wimImage) Description() string {
207 | return ""
208 | }
209 |
210 | // NewRepo returns new instance of a Windows ISO repository.
211 | func NewRepo(ctx context.Context, repositoryPath string) (*Repo, error) {
212 | return &Repo{path: repositoryPath}, nil
213 | }
214 |
215 | // Repo holds data related to a Windows repository.
216 | type Repo struct {
217 | path string
218 | files []string
219 | wimImages []*wimImage
220 | }
221 |
222 | type wimImage struct {
223 | id string
224 | imageName string
225 | localPath string
226 | remotePath string
227 | quickHash string
228 | repoPath string
229 | }
230 |
231 | // RepoName returns repository name.
232 | func (r *Repo) RepoName() string {
233 | return RepoName
234 | }
235 |
236 | // RepoPath returns repository path.
237 | func (r *Repo) RepoPath() string {
238 | return r.path
239 | }
240 |
241 | // DiscoverRepo traverses the repository and looks for .iso files.
242 | func (r *Repo) DiscoverRepo() ([]hashr.Source, error) {
243 |
244 | if err := filepath.Walk(r.path, walk(&r.files)); err != nil {
245 | return nil, err
246 | }
247 |
248 | for _, filePath := range r.files {
249 | tempDir, err := common.LocalTempDir(strings.ReplaceAll(strings.TrimPrefix(filePath, r.path+string(os.PathSeparator)), string(os.PathSeparator), "-"))
250 | if err != nil {
251 | return nil, fmt.Errorf("error while creating temp dir: %v", err)
252 | }
253 |
254 | mountDir := filepath.Join(tempDir, "mnt")
255 | if err := os.MkdirAll(mountDir, 0755); err != nil {
256 | return nil, fmt.Errorf("could not create mount directory: %v", err)
257 | }
258 |
259 | _, err = shellCommand("sudo", "mount", filePath, mountDir)
260 | if err != nil {
261 | return nil, fmt.Errorf("error while executing mount cmd: %v", err)
262 | }
263 |
264 | installWimPath := filepath.Join(mountDir, "/sources/install.wim")
265 |
266 | wimFile, err := os.Open(installWimPath)
267 | if err != nil {
268 | return nil, fmt.Errorf("error while opening %s: %v", installWimPath, err)
269 | }
270 |
271 | reader, err := wim.NewReader(wimFile)
272 | if err != nil {
273 | return nil, fmt.Errorf("error while creating wim reader %s: %v", installWimPath, err)
274 | }
275 |
276 | glog.Infof("Opened %s wim file", installWimPath)
277 |
278 | for _, image := range reader.Image {
279 | glog.Infof("Found %s image in %s", image.Name, installWimPath)
280 | r.wimImages = append(r.wimImages, &wimImage{
281 | imageName: image.Name,
282 | id: fmt.Sprintf("%s-%d.%d-%d-%dsp", strings.ReplaceAll(image.Name, " ", ""), image.Windows.Version.Major, image.Windows.Version.Minor, image.Windows.Version.Build, image.Windows.Version.SPBuild),
283 | localPath: filePath,
284 | remotePath: filePath,
285 | repoPath: r.path,
286 | quickHash: fmt.Sprintf("%x", sha256.Sum256([]byte(image.CreationTime.Time().String()+
287 | image.Name+
288 | image.Windows.ProductName+
289 | strconv.Itoa(image.Windows.Version.Build)+
290 | strconv.Itoa(image.Windows.Version.Major)+
291 | strconv.Itoa(image.Windows.Version.Minor)+
292 | strconv.Itoa(image.Windows.Version.SPBuild)))),
293 | })
294 | }
295 |
296 | wimFile.Close()
297 |
298 | time.Sleep(time.Second * 10)
299 | _, err = shellCommand("sudo", "umount", "-fl", mountDir)
300 | if err != nil {
301 | return nil, fmt.Errorf("error while executing umount cmd: %v", err)
302 | }
303 | }
304 |
305 | var sources []hashr.Source
306 | for _, wimImage := range r.wimImages {
307 | sources = append(sources, wimImage)
308 | }
309 |
310 | return sources, nil
311 | }
312 |
313 | func walk(files *[]string) filepath.WalkFunc {
314 | return func(path string, info os.FileInfo, err error) error {
315 | if err != nil {
316 | glog.Errorf("Could not open %s: %v", path, err)
317 | return nil
318 | }
319 | if info.IsDir() {
320 | return nil
321 | }
322 |
323 | if strings.EqualFold(filepath.Ext(info.Name()), ".iso") {
324 | *files = append(*files, path)
325 | }
326 |
327 | return nil
328 | }
329 | }
330 |
--------------------------------------------------------------------------------
/importers/wsus/testdata/._03E86F3A0947C8A5183AD0C66A48782FA216BEFF.cab:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/wsus/testdata/._03E86F3A0947C8A5183AD0C66A48782FA216BEFF.cab
--------------------------------------------------------------------------------
/importers/wsus/testdata/._138ECA2DEB45E284DC0BB94CC8849D1933B072FF.cab:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/wsus/testdata/._138ECA2DEB45E284DC0BB94CC8849D1933B072FF.cab
--------------------------------------------------------------------------------
/importers/wsus/testdata/._1BDBDA1C53B6C980DD440B93646D8021CC90F1FF.cab:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/wsus/testdata/._1BDBDA1C53B6C980DD440B93646D8021CC90F1FF.cab
--------------------------------------------------------------------------------
/importers/wsus/testdata/._1F35F72D34C16FF7D7270D60472D8AD9FF9D7EFF.cab:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/wsus/testdata/._1F35F72D34C16FF7D7270D60472D8AD9FF9D7EFF.cab
--------------------------------------------------------------------------------
/importers/wsus/testdata/03E86F3A0947C8A5183AD0C66A48782FA216BEFF.cab:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/wsus/testdata/03E86F3A0947C8A5183AD0C66A48782FA216BEFF.cab
--------------------------------------------------------------------------------
/importers/wsus/testdata/138ECA2DEB45E284DC0BB94CC8849D1933B072FF.cab:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/wsus/testdata/138ECA2DEB45E284DC0BB94CC8849D1933B072FF.cab
--------------------------------------------------------------------------------
/importers/wsus/testdata/1BDBDA1C53B6C980DD440B93646D8021CC90F1FF.cab:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/wsus/testdata/1BDBDA1C53B6C980DD440B93646D8021CC90F1FF.cab
--------------------------------------------------------------------------------
/importers/wsus/testdata/1F35F72D34C16FF7D7270D60472D8AD9FF9D7EFF.cab:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/wsus/testdata/1F35F72D34C16FF7D7270D60472D8AD9FF9D7EFF.cab
--------------------------------------------------------------------------------
/importers/zip/generate_tests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | for tar in $(find -name '*.tar.gz'); do
4 | echo "$tar"
5 | filename=$(basename "$tar")
6 | tardir=$(dirname "$tar")
7 | tempdir=$(mktemp -d)
8 | tar -C "$tempdir" -xf "$tar"
9 |
10 | cd "$tempdir"
11 | zip -r data.zip .
12 | cd -
13 | cp "$tempdir/data.zip" "$tardir/$(echo "$filename" | sed 's/.tar.gz/.zip/g')"
14 | rm -r "$tempdir"
15 | rm "$tar"
16 | done
17 |
--------------------------------------------------------------------------------
/importers/zip/testdata/20200106.00.00/ubuntu-desktop.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/zip/testdata/20200106.00.00/ubuntu-desktop.jar
--------------------------------------------------------------------------------
/importers/zip/testdata/20200106.00.00/ubuntu-laptop.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/zip/testdata/20200106.00.00/ubuntu-laptop.whl
--------------------------------------------------------------------------------
/importers/zip/testdata/20200106.00.00/ubuntu-server.egg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/zip/testdata/20200106.00.00/ubuntu-server.egg
--------------------------------------------------------------------------------
/importers/zip/testdata/20200107.00.00/ubuntu-desktop.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/zip/testdata/20200107.00.00/ubuntu-desktop.zip
--------------------------------------------------------------------------------
/importers/zip/testdata/20200107.00.00/ubuntu-laptop.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/zip/testdata/20200107.00.00/ubuntu-laptop.zip
--------------------------------------------------------------------------------
/importers/zip/testdata/20200107.00.00/ubuntu-server.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/zip/testdata/20200107.00.00/ubuntu-server.zip
--------------------------------------------------------------------------------
/importers/zip/testdata/20200107.01.00/ubuntu-desktop.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/zip/testdata/20200107.01.00/ubuntu-desktop.zip
--------------------------------------------------------------------------------
/importers/zip/testdata/20200107.01.00/ubuntu-laptop.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/zip/testdata/20200107.01.00/ubuntu-laptop.zip
--------------------------------------------------------------------------------
/importers/zip/testdata/20200107.01.00/ubuntu-server.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/zip/testdata/20200107.01.00/ubuntu-server.zip
--------------------------------------------------------------------------------
/importers/zip/testdata/20200108.00.00/ubuntu-desktop.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/zip/testdata/20200108.00.00/ubuntu-desktop.zip
--------------------------------------------------------------------------------
/importers/zip/testdata/20200108.00.00/ubuntu-laptop.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/zip/testdata/20200108.00.00/ubuntu-laptop.zip
--------------------------------------------------------------------------------
/importers/zip/testdata/20200108.00.00/ubuntu-server.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/importers/zip/testdata/20200108.00.00/ubuntu-server.zip
--------------------------------------------------------------------------------
/importers/zip/zip.go:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // https://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | // Package zip implements zip repository importer.
16 | package zip
17 |
18 | import (
19 | "archive/zip"
20 | "crypto/sha256"
21 | "fmt"
22 | "io"
23 | "os"
24 | "path/filepath"
25 | "strings"
26 |
27 | "github.com/golang/glog"
28 |
29 | "github.com/google/hashr/core/hashr"
30 | "github.com/google/hashr/importers/common"
31 | )
32 |
33 | const (
34 | // RepoName contains the repository name.
35 | RepoName = "zip"
36 | chunkSize = 1024 * 1024 * 10 // 10MB
37 | )
38 |
39 | // Archive holds data related to zip archive.
40 | type Archive struct {
41 | filename string
42 | remotePath string
43 | localPath string
44 | quickSha256hash string
45 | repoPath string
46 | }
47 |
48 | // Preprocess extracts the contents of a .zip file.
49 | func (a *Archive) Preprocess() (string, error) {
50 | var err error
51 | a.localPath, err = common.CopyToLocal(a.remotePath, a.ID())
52 | if err != nil {
53 | return "", fmt.Errorf("error while copying %s to local file system: %v", a.remotePath, err)
54 | }
55 |
56 | baseDir, _ := filepath.Split(a.localPath)
57 | extractionDir := filepath.Join(baseDir, "extracted")
58 |
59 | if err := extractZip(a.localPath, extractionDir); err != nil {
60 | return "", err
61 | }
62 |
63 | return extractionDir, nil
64 | }
65 |
66 | func extractZip(zipPath, outputFolder string) error {
67 | if _, err := os.Stat(outputFolder); os.IsNotExist(err) {
68 | if err2 := os.MkdirAll(outputFolder, 0755); err2 != nil {
69 | return fmt.Errorf("error while creating target directory: %v", err2)
70 | }
71 | }
72 |
73 | // 1. Open the zip file
74 | zipReader, err := zip.OpenReader(zipPath)
75 | if err != nil {
76 | return fmt.Errorf("failed to open zip file: %v", err)
77 | }
78 | defer zipReader.Close()
79 |
80 | // 2. Get the absolute destination path
81 | outputFolder, err = filepath.Abs(outputFolder)
82 | if err != nil {
83 | return err
84 | }
85 |
86 | // 3. Iterate over zip files inside the archive and unzip each of them
87 | for _, f := range zipReader.File {
88 | err := unzipFile(f, outputFolder)
89 | if err != nil {
90 | return err
91 | }
92 | }
93 |
94 | return nil
95 | }
96 |
97 | func unzipFile(f *zip.File, destination string) error {
98 | // 4. Check if file paths are not vulnerable to Zip Slip
99 | filePath := filepath.Join(destination, f.Name)
100 | if !strings.HasPrefix(filePath, filepath.Clean(destination)+string(os.PathSeparator)) {
101 | return fmt.Errorf("invalid file path: %s", filePath)
102 | }
103 |
104 | // 5. Create directory tree
105 | if f.FileInfo().IsDir() {
106 | if err := os.MkdirAll(filePath, os.ModePerm); err != nil {
107 | return err
108 | }
109 | return nil
110 | }
111 |
112 | if err := os.MkdirAll(filepath.Dir(filePath), os.ModePerm); err != nil {
113 | return err
114 | }
115 |
116 | // 6. Create a destination file for unzipped content
117 | destinationFile, err := os.OpenFile(filePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode())
118 | if err != nil {
119 | return err
120 | }
121 | defer destinationFile.Close()
122 |
123 | // 7. Unzip the content of a file and copy it to the destination file
124 | zippedFile, err := f.Open()
125 | if err != nil {
126 | return err
127 | }
128 | defer zippedFile.Close()
129 |
130 | if _, err := io.Copy(destinationFile, zippedFile); err != nil {
131 | return err
132 | }
133 | return nil
134 | }
135 |
136 | // ID returns non-unique zip Archive ID.
137 | func (a *Archive) ID() string {
138 | return a.filename
139 | }
140 |
141 | // RepoName returns repository name.
142 | func (a *Archive) RepoName() string {
143 | return RepoName
144 | }
145 |
146 | // RepoPath returns repository path.
147 | func (a *Archive) RepoPath() string {
148 | return a.repoPath
149 | }
150 |
151 | // LocalPath returns local path to a zip Archive .zip file.
152 | func (a *Archive) LocalPath() string {
153 | return a.localPath
154 | }
155 |
156 | // RemotePath returns non-local path to a zip Archive .zip file.
157 | func (a *Archive) RemotePath() string {
158 | return a.remotePath
159 | }
160 |
161 | // Description provides additional description for a .zip file.
162 | func (a *Archive) Description() string {
163 | return ""
164 | }
165 |
166 | // QuickSHA256Hash calculates sha256 hash of .zip file.
167 | func (a *Archive) QuickSHA256Hash() (string, error) {
168 | // Check if the quick hash was already calculated.
169 | if a.quickSha256hash != "" {
170 | return a.quickSha256hash, nil
171 | }
172 |
173 | f, err := os.Open(a.remotePath)
174 | if err != nil {
175 | return "", err
176 | }
177 | defer f.Close()
178 |
179 | fileInfo, err := f.Stat()
180 | if err != nil {
181 | return "", err
182 | }
183 |
184 | // Check if the file is smaller than 20MB, if so hash the whole file.
185 | if fileInfo.Size() < int64(chunkSize*2) {
186 | h := sha256.New()
187 | if _, err := io.Copy(h, f); err != nil {
188 | return "", err
189 | }
190 | a.quickSha256hash = fmt.Sprintf("%x", h.Sum(nil))
191 | return a.quickSha256hash, nil
192 | }
193 |
194 | header := make([]byte, chunkSize)
195 | _, err = f.Read(header)
196 | if err != nil {
197 | return "", err
198 | }
199 |
200 | footer := make([]byte, chunkSize)
201 | _, err = f.ReadAt(footer, fileInfo.Size()-int64(chunkSize))
202 | if err != nil {
203 | return "", err
204 | }
205 |
206 | a.quickSha256hash = fmt.Sprintf("%x", sha256.Sum256(append(header, footer...)))
207 | return a.quickSha256hash, nil
208 | }
209 |
210 | // NewRepo returns new instance of zip repository.
211 | func NewRepo(path string, fileExtensions string) *Repo {
212 | exts := strings.Split(fileExtensions, ",")
213 | for i, ext := range exts {
214 | exts[i] = "." + ext
215 | }
216 |
217 | return &Repo{location: path, fileExtensions: exts}
218 | }
219 |
220 | // Repo holds data related to a zip repository.
221 | type Repo struct {
222 | location string
223 | fileExtensions []string
224 | files []string
225 | Archives []*Archive
226 | }
227 |
228 | // RepoName returns repository name.
229 | func (r *Repo) RepoName() string {
230 | return RepoName
231 | }
232 |
233 | // RepoPath returns repository path.
234 | func (r *Repo) RepoPath() string {
235 | return r.location
236 | }
237 |
238 | // DiscoverRepo traverses the repository and looks for files that are related to zip base Archives.
239 | func (r *Repo) DiscoverRepo() ([]hashr.Source, error) {
240 | if err := filepath.Walk(r.location, walk(&r.files, r.fileExtensions)); err != nil {
241 | return nil, err
242 | }
243 |
244 | for _, file := range r.files {
245 | _, filename := filepath.Split(file)
246 |
247 | r.Archives = append(r.Archives, &Archive{filename: filename, remotePath: file, repoPath: r.location})
248 | }
249 |
250 | var sources []hashr.Source
251 | for _, Archive := range r.Archives {
252 | sources = append(sources, Archive)
253 | }
254 |
255 | return sources, nil
256 | }
257 |
258 | func walk(files *[]string, extensions []string) filepath.WalkFunc {
259 | return func(path string, info os.FileInfo, err error) error {
260 | if err != nil {
261 | glog.Errorf("Could not open %s: %v", path, err)
262 | return nil
263 | }
264 | if info.IsDir() {
265 | return nil
266 | }
267 |
268 | for _, ext := range extensions {
269 | if strings.HasSuffix(info.Name(), ext) {
270 | *files = append(*files, path)
271 | break
272 | }
273 | }
274 |
275 | return nil
276 | }
277 | }
278 |
--------------------------------------------------------------------------------
/processors/local/local.go:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // https://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | // Package local provides functions to process data locally.
16 | package local
17 |
18 | import (
19 | "bytes"
20 | "fmt"
21 | "os/exec"
22 | "path/filepath"
23 |
24 | "github.com/golang/glog"
25 | )
26 |
27 | var execute = func(name string, args ...string) *exec.Cmd {
28 | glog.Infof("name: %v, args: %v", name, args)
29 | return exec.Command(name, args...)
30 | }
31 |
32 | // Processor is an instance of local processor.
33 | type Processor struct {
34 | }
35 |
36 | // New returns new local processor instance.
37 | func New() *Processor {
38 | return &Processor{}
39 | }
40 |
41 | func shellCommand(binary string, args ...string) (string, error) {
42 | cmd := execute(binary, args...)
43 | var stdout, stderr bytes.Buffer
44 | cmd.Stdout = &stdout
45 | cmd.Stderr = &stderr
46 |
47 | err := cmd.Run()
48 | if err != nil {
49 | return "", fmt.Errorf("error while executing %s: %v\nStdout: %v\nStderr: %v", binary, err, stdout.String(), stderr.String())
50 | }
51 |
52 | return stdout.String(), nil
53 | }
54 |
55 | // ImageExport runs image_export.py binary locally.
56 | func (p *Processor) ImageExport(sourcePath string) (string, error) {
57 | // TODO(mlegin): check if image_export.py is present on the local machine.
58 | baseDir := filepath.Dir(sourcePath)
59 | exportDir := filepath.Join(baseDir, "export")
60 | logFile := filepath.Join(baseDir, "image_export.log")
61 |
62 | dockerArgs := []string{"run", "--rm", "-v", "/tmp/:/tmp", "log2timeline/plaso", "image_export", "--logfile", logFile, "--partitions", "all", "--volumes", "all", "-w", exportDir, sourcePath}
63 | localArgs := []string{"--logfile", logFile, "--partitions", "all", "--volumes", "all", "-w", exportDir, sourcePath}
64 | var err error
65 |
66 | if inDockerContainer() {
67 | _, err = shellCommand("image_export.py", localArgs...)
68 | } else {
69 | _, err = shellCommand("docker", dockerArgs...)
70 | }
71 |
72 | if err != nil {
73 | return "", fmt.Errorf("error while running image_export: %v", err)
74 | }
75 |
76 | return exportDir, nil
77 | }
78 |
79 | func inDockerContainer() bool {
80 | _, err := shellCommand("which", "image_export.py")
81 | return err == nil
82 | }
83 |
--------------------------------------------------------------------------------
/processors/local/local_test.go:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // https://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package local
16 |
17 | import (
18 | "fmt"
19 | "io"
20 | "io/ioutil"
21 | "os"
22 | "os/exec"
23 | "path/filepath"
24 | "testing"
25 | )
26 |
27 | func TestExecute(t *testing.T) {
28 | bytes, err := execute("echo", "test").Output()
29 | if err != nil {
30 | t.Fatalf("unexpected error while running test echo cmd: %v", err)
31 | }
32 | if got, want := string(bytes), "test\n"; got != want {
33 | t.Errorf("echo = %s; want = %s", got, want)
34 | }
35 | }
36 |
37 | func TestImageExport(t *testing.T) {
38 | execute = fakeExecute
39 | tempDir, err := ioutil.TempDir("", "hashr-test")
40 | if err != nil {
41 | t.Fatalf("error while creating temp directory: %v", err)
42 | }
43 | defer os.RemoveAll(tempDir)
44 |
45 | sourceFile, err := os.Open("testdata/disk_2_xfs_volumes.raw")
46 | if err != nil {
47 | t.Fatalf("unexpected error while opening test WIM file: %v", err)
48 | }
49 |
50 | xfsTempPath := filepath.Join(tempDir, "disk_2_xfs_volumes.raw")
51 | destFile, err := os.Create(xfsTempPath)
52 | if err != nil {
53 | t.Fatalf("unexpected error creating temp destination file: %v", err)
54 | }
55 |
56 | _, err = io.Copy(destFile, sourceFile)
57 | if err != nil {
58 | t.Fatalf("unexpected error while copying to temp destination file: %v", err)
59 | }
60 |
61 | processor := New()
62 | gotOut, err := processor.ImageExport(xfsTempPath)
63 | if err != nil {
64 | t.Fatalf("unexpected error while running ImageExport(): %v", err)
65 | }
66 |
67 | wantOut := filepath.Join(tempDir, "export")
68 |
69 | if gotOut != wantOut {
70 | t.Errorf("ImageExport() = %s; want = %s", gotOut, wantOut)
71 | }
72 |
73 | }
74 |
75 | func fakeExecute(command string, args ...string) *exec.Cmd {
76 | var mockStdOut string
77 |
78 | cs := []string{"-test.run=TestHelperProcess", "--", command}
79 | cs = append(cs, args...)
80 | cmd := exec.Command(os.Args[0], cs...)
81 | cmd.Env = []string{"GO_WANT_HELPER_PROCESS=1",
82 | "STDOUT=" + mockStdOut}
83 | return cmd
84 | }
85 |
86 | // This isn't a real test. It's used as a helper process.
87 | func TestHelperProcess(t *testing.T) {
88 | if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" {
89 | return
90 | }
91 |
92 | fmt.Fprint(os.Stdout, os.Getenv("STDOUT"))
93 | os.Exit(0)
94 | }
95 |
--------------------------------------------------------------------------------
/processors/local/testdata/._disk_2_xfs_volumes.raw:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/processors/local/testdata/._disk_2_xfs_volumes.raw
--------------------------------------------------------------------------------
/processors/local/testdata/disk_2_xfs_volumes.raw:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/hashr/eb9c77b9fdbbf99ad52e16cb98d8468cd1add96a/processors/local/testdata/disk_2_xfs_volumes.raw
--------------------------------------------------------------------------------
/scripts/CreateCloudSpannerExporterTables.ddl:
--------------------------------------------------------------------------------
1 | -- Copyright 2022 Google LLC
2 | --
3 | -- Licensed under the Apache License, Version 2.0 (the "License");
4 | -- you may not use this file except in compliance with the License.
5 | -- You may obtain a copy of the License at
6 | --
7 | -- https:--www.apache.org/licenses/LICENSE-2.0
8 | --
9 | -- Unless required by applicable law or agreed to in writing, software
10 | -- distributed under the License is distributed on an "AS IS" BASIS,
11 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | -- See the License for the specific language governing permissions and
13 | -- limitations under the License.
14 |
15 | CREATE TABLE samples (
16 | sha256 STRING(100),
17 | mimetype STRING(MAX),
18 | file_output STRING(MAX),
19 | size INT64
20 | ) PRIMARY KEY(sha256);
21 |
22 | CREATE TABLE payloads (
23 | sha256 STRING(100),
24 | gcs_path STRING(200)
25 | ) PRIMARY KEY(sha256);
26 |
27 | CREATE TABLE sources (
28 | sha256 STRING(100),
29 | source_id ARRAY,
30 | source_path STRING(MAX),
31 | source_description STRING(MAX),
32 | repo_name STRING(MAX),
33 | repo_path STRING(MAX),
34 | ) PRIMARY KEY(sha256);
35 |
36 | CREATE TABLE samples_sources (
37 | sample_sha256 STRING(100),
38 | source_sha256 STRING(100),
39 | sample_paths ARRAY,
40 | CONSTRAINT FK_Sample FOREIGN KEY (sample_sha256) REFERENCES samples (sha256),
41 | CONSTRAINT FK_Source FOREIGN KEY (source_sha256) REFERENCES sources (sha256),
42 | ) PRIMARY KEY (sample_sha256, source_sha256);
--------------------------------------------------------------------------------
/scripts/CreateJobsTable.ddl:
--------------------------------------------------------------------------------
1 | CREATE TABLE jobs (
2 | imported_at TIMESTAMP NOT NULL,
3 | id STRING(500),
4 | repo STRING(200),
5 | repo_path STRING(500),
6 | quick_sha256 STRING(100) NOT NULL,
7 | location STRING(1000),
8 | sha256 STRING(100),
9 | status STRING(50),
10 | error STRING(10000),
11 | preprocessing_duration INT64,
12 | processing_duration INT64,
13 | export_duration INT64,
14 | files_extracted INT64,
15 | files_exported INT64,
16 | ) PRIMARY KEY(quick_sha256)
17 |
--------------------------------------------------------------------------------
/scripts/CreateJobsTable.sql:
--------------------------------------------------------------------------------
1 | -- Copyright 2022 Google LLC
2 | --
3 | -- Licensed under the Apache License, Version 2.0 (the "License");
4 | -- you may not use this file except in compliance with the License.
5 | -- You may obtain a copy of the License at
6 | --
7 | -- https:--www.apache.org/licenses/LICENSE-2.0
8 | --
9 | -- Unless required by applicable law or agreed to in writing, software
10 | -- distributed under the License is distributed on an "AS IS" BASIS,
11 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | -- See the License for the specific language governing permissions and
13 | -- limitations under the License.
14 |
15 | CREATE TABLE jobs (
16 | quick_sha256 VARCHAR(100) PRIMARY KEY,
17 | imported_at INT NOT NULL,
18 | id text,
19 | repo text,
20 | repo_path text,
21 | location text,
22 | sha256 VARCHAR(100),
23 | status VARCHAR(50),
24 | error text,
25 | preprocessing_duration INT,
26 | processing_duration INT,
27 | export_duration INT,
28 | files_extracted INT,
29 | files_exported INT
30 | );
--------------------------------------------------------------------------------
/scripts/CreatePostgresExporterTables.sql:
--------------------------------------------------------------------------------
1 | -- Copyright 2022 Google LLC
2 | --
3 | -- Licensed under the Apache License, Version 2.0 (the "License");
4 | -- you may not use this file except in compliance with the License.
5 | -- You may obtain a copy of the License at
6 | --
7 | -- https:--www.apache.org/licenses/LICENSE-2.0
8 | --
9 | -- Unless required by applicable law or agreed to in writing, software
10 | -- distributed under the License is distributed on an "AS IS" BASIS,
11 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | -- See the License for the specific language governing permissions and
13 | -- limitations under the License.
14 |
15 | CREATE TABLE samples (
16 | sha256 VARCHAR(100) PRIMARY KEY,
17 | mimetype text,
18 | file_output text,
19 | size INT
20 | );
21 |
22 | CREATE TABLE payloads (
23 | sha256 VARCHAR(100) PRIMARY KEY,
24 | payload bytea
25 | );
26 |
27 | CREATE TABLE sources (
28 | sha256 VARCHAR(100) PRIMARY KEY,
29 | sourceID text[],
30 | sourcePath text,
31 | sourceDescription text,
32 | repoName text,
33 | repoPath text
34 | );
35 |
36 | CREATE TABLE samples_sources (
37 | sample_sha256 VARCHAR(100) REFERENCES samples(sha256) NOT NULL,
38 | source_sha256 VARCHAR(100) REFERENCES sources(sha256) NOT NULL,
39 | sample_paths text[],
40 | PRIMARY KEY (sample_sha256, source_sha256)
41 | );
--------------------------------------------------------------------------------
/scripts/aws/AwsHashrUploaderPolicy.json:
--------------------------------------------------------------------------------
1 | {
2 | "Version": "2012-10-17",
3 | "Statement": [
4 | {
5 | "Sid": "VisualEditor0",
6 | "Effect": "Allow",
7 | "Action": [
8 | "s3:PutObject",
9 | "s3:GetObject",
10 | "s3:ListBucket",
11 | "s3:PutObjectTagging",
12 | "s3:PutBucketObjectLockConfiguration"
13 | ],
14 | "Resource": [
15 | "arn:aws:s3:::hashr-bucket",
16 | "arn:aws:s3:::hashr-bucket/*"
17 | ]
18 | }
19 | ]
20 | }
21 |
--------------------------------------------------------------------------------
/scripts/aws/AwsHashrWorkerPolicy.json:
--------------------------------------------------------------------------------
1 | {
2 | "Version": "2012-10-17",
3 | "Statement": [
4 | {
5 | "Sid": "VisualEditor0",
6 | "Effect": "Allow",
7 | "Action": [
8 | "s3:PutObject",
9 | "s3:GetObject",
10 | "s3:DescribeJob",
11 | "s3:ListBucket",
12 | "s3:DeleteObject",
13 | "s3:GetBucketLocation",
14 | "s3:GetObjectVersion"
15 | ],
16 | "Resource": [
17 | "arn:aws:s3:::hashr-bucket",
18 | "arn:aws:s3:::hashr-bucket/*"
19 | ]
20 | },
21 | {
22 | "Sid": "VisualEditor1",
23 | "Effect": "Allow",
24 | "Action": [
25 | "ec2:DetachVolume",
26 | "ec2:AttachVolume",
27 | "ec2:CopySnapshot",
28 | "ec2:DeregisterImage",
29 | "ec2:DescribeInstances",
30 | "ec2:DescribeTags",
31 | "ec2:DescribeSnapshotAttribute",
32 | "ec2:DescribeInstanceAttribute",
33 | "s3:ListJobs",
34 | "ec2:CopyImage",
35 | "ec2:DescribeSnapshots",
36 | "ec2:DescribeVolumeAttribute",
37 | "ec2:CreateVolume",
38 | "ec2:DescribeImages",
39 | "ec2:DeleteVolume",
40 | "ec2:DescribeVolumeStatus",
41 | "ec2:CreateDefaultSubnet",
42 | "ec2:DescribeAvailabilityZones",
43 | "ec2:DescribeImageAttribute",
44 | "ec2:DescribeVolumes",
45 | "ec2:CreateSnapshot",
46 | "ec2:DescribeInstanceTypes",
47 | "ec2:DescribeInstanceStatus"
48 | ],
49 | "Resource": "*"
50 | }
51 | ]
52 | }
53 |
--------------------------------------------------------------------------------
/scripts/aws/hashr_aws_init.txt:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Download hashr-archive from github
4 | HASHR_ARCHIVE_SRC=https://raw.githubusercontent.com/google/hashr/main/scripts/hashr-archive
5 |
6 | wget -O /tmp/hashr-archive ${HASHR_ARCHIVE_SRC}
7 |
8 | sudo mv /tmp/hashr-archive /usr/local/sbin/hashr-archive
9 | sudo chmod +x /usr/local/sbin/hashr-archive
10 |
11 | # Creating data directory
12 | sudo mkdir -p /data
13 | sudo chown -R $USER /data
14 |
15 |
--------------------------------------------------------------------------------
/scripts/aws/hashr_setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Script to create AWS EC2 instances as HashR workers.
4 | #
5 |
6 | # AWS configuration
7 | AWS_PROFILE="default"
8 | AWS_REGION="ap-southeast-2"
9 |
10 | # AWS instance source
11 | IMAGE_ID="ami-09eebd0b9bd845bf1"
12 | INSTANCE_TYPE="t2.micro"
13 | INSTANCE_COUNT=2
14 | KEY_NAME="HashrAwsKey"
15 | USER="ec2-user"
16 | USER_DATA="file://hashr_aws_init.txt"
17 | WORKER_TAG_INUSE_NAME="InUse"
18 | WORKER_TAG_INUSE_VALUE="false"
19 | WORKER_TAG_ROLE_NAME="role"
20 | WORKER_TAG_ROLE_VALUE="hashr-worker"
21 |
22 | SECURITY_GROUP_NAME="hashr-security-group"
23 | SECURITY_GROUP_ID=""
24 |
25 | # NOTE: You should change this to limit exposure.
26 | SECURITY_SOURCE_CIDR="0.0.0.0/0"
27 | WORKER_AWS_CONFIG_FILE="hashr.uploader.tar.gz"
28 |
29 | SCRIPT_DIR=`dirname $0`
30 | logfile=${SCRIPT_DIR}/hashr_aws_setup.log
31 | touch $logfile
32 |
33 | create_key_pair() {
34 | local keyPairId
35 |
36 | echo "Creating AWS key pair ${KEY_NAME}"
37 |
38 | keyPairId=`aws --profile ${AWS_PROFILE} ec2 describe-key-pairs --filters Name=key-name,Values=${KEY_NAME} | jq -r '.KeyPairs[0].KeyPairId'`
39 | if [ "${keyPairId}" == "null" ]; then
40 | aws --profile ${AWS_PROFILE} ec2 create-key-pair --key-name ${KEY_NAME} | jq -r '.KeyMaterial' > $HOME/.ssh/${KEY_NAME}
41 | chmod 600 ${HOME}/.ssh/${KEY_NAME}
42 |
43 | keyPairId=`aws --profile ${AWS_PROFILE} ec2 describe-key-pairs --filters Name=key-name,Values=${KEY_NAME} | jq -r '.KeyPairs[0].KeyPairId'`
44 | echo -e " - Created a new AWS key pair ${keyPairId}"
45 | return
46 | fi
47 |
48 | echo -e " Key pair ${KEY_NAME} exists with ID ${keyPairId}"
49 | }
50 |
51 | create_security_group_id() {
52 | local securityGroupId
53 |
54 | echo "Setting up security group ${SECURITY_GROUP_NAME}"
55 |
56 | SECURITY_GROUP_ID=`aws --profile ${AWS_PROFILE} ec2 describe-security-groups --filters Name=group-name,Values=${SECURITY_GROUP_NAME} | jq -r '.SecurityGroups[].GroupId'`
57 | if [ "${SECURITY_GROUP_ID}" == "" ]; then
58 | securityGroupId=`aws --profile ${AWS_PROFILE} ec2 create-security-group --group-name ${SECURITY_GROUP_NAME} --description "Security group for HashR AWS worker" | jq -r '.GroupId'`
59 | aws --profile ${AWS_PROFILE} ec2 authorize-security-group-ingress --group-id ${securityGroupId} --protocol tcp --port 22 --cidr "${SECURITY_SOURCE_CIDR}" > $logfile 2>&1
60 |
61 | SECURITY_GROUP_ID=${securityGroupId}
62 | sleep 5
63 |
64 | echo -e " - Created security group ${SECURITY_GROUP_NAME} (${securityGroupId})"
65 | else
66 | echo -e " - Security group ${SECURITY_GROUP_NAME} exists ${SECURITY_GROUP_ID}"
67 | fi
68 | }
69 |
70 | check_instance_status() {
71 | local instanceId="$1"
72 | local instanceState="$2"
73 | local instanceStateName=""
74 |
75 | local count=0
76 | while true
77 | do
78 | if [ $count -ge 5 ]; then
79 | echo "Something went wrong. The instance $instanceId should be up by now"
80 | return 1
81 | fi
82 |
83 | instanceStateName=`aws --profile ${AWS_PROFILE} ec2 describe-instances --instance-ids ${instanceId} | jq -r '.Reservations[].Instances[0].State.Name'`
84 | echo " Current state of ${instanceId} is ${instanceStateName}"
85 | if [ "${instanceStateName}" == "${instanceState}" ]; then
86 | return 0
87 | fi
88 |
89 | sleep 10
90 | count=$((count + 1))
91 | done
92 | }
93 |
94 | copy_aws_config() {
95 | local instanceId="$1"
96 | local publicDnsName=""
97 | local sshOptions="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
98 | local securityGroupname=""
99 |
100 | echo " - Copying AWS configuration to instance ${instanceId}"
101 | securityGroupName=`aws --profile ${AWS_PROFILE} ec2 describe-instance-attribute --instance-id ${instanceId} --attribute groupSet | jq -r '.Groups[].GroupName'`
102 | if [ "$securityGroupName}" != "${SECURITY_GROUP_NAME}" ]; then
103 | aws --profile ${AWS_PROFILE} ec2 modify-instance-attribute --instance-id ${instanceId} --groups ${SECURITY_GROUP_ID}
104 | sleep 5
105 | fi
106 |
107 | publicDnsName=`aws --profile ${AWS_PROFILE} ec2 describe-instances --instance-id ${instanceId} | jq -r '.Reservations[].Instances[0].PublicDnsName'`
108 | scp -i ~/.ssh/${KEY_NAME} ${sshOptions} ${SCRIPT_DIR}/${WORKER_AWS_CONFIG_FILE} ${USER}@${publicDnsName}:~/ > $logfile 2>&1
109 | ssh -i ~/.ssh/${KEY_NAME} ${sshOptions} ${USER}@${publicDnsName} "tar -zxf ~/${WORKER_AWS_CONFIG_FILE} -C ~/" > $logfile 2>&1
110 | }
111 |
112 | run_ec2_instance() {
113 | local instanceSateName=""
114 | local volumeId=""
115 |
116 | echo "Running ${INSTANCE_COUNT} EC2 instances"
117 |
118 | for instanceId in `aws --profile ${AWS_PROFILE} ec2 run-instances --image-id ${IMAGE_ID} --count ${INSTANCE_COUNT} --instance-type ${INSTANCE_TYPE} --key-name ${KEY_NAME} --security-group-ids ${SECURITY_GROUP_ID} --associate-public-ip-address --tag-specifications 'ResourceType=instance,Tags=[{Key=role,Value=hashr-worker},{Key=InUse,Value=false},]' --user-data ${USER_DATA} | jq -r '.Instances[].InstanceId'`
119 | do
120 | # We want to make sure the instance is in the running state before we increase
121 | # the size of the disk.
122 | echo " - Checking if ${instanceId} is running"
123 | check_instance_status ${instanceId} "running"
124 | if [ $? -eq 1 ]; then
125 | exit 1
126 | fi
127 |
128 | # Increase the size of the disk.
129 | volumeId=`aws --profile ${AWS_PROFILE} ec2 describe-volumes --filters Name=attachment.instance-id,Values=i-094382052d8b0c550 Name=attachment.device,Values=/dev/xvda | jq -r '.Volumes[0].VolumeId'`
130 | aws --profile ${AWS_PROFILE} ec2 modify-volume --volume-id ${volumeId} --size 50 > $logfile 2>&1
131 |
132 | # We need to restart the instance to take effect of the new disk size.
133 | aws --profile ${AWS_PROFILE} ec2 stop-instances --instance-id ${instanceId} > $logfile 2>&1
134 | echo " - Checking if ${instanceId} is stopped"
135 | check_instance_status ${instanceId} "stopped"
136 | if [ $? -eq 1 ]; then
137 | exit 1
138 | fi
139 |
140 | aws --profile ${AWS_PROFILE} ec2 start-instances --instance-id ${instanceId} > $logfile 2>&1
141 | echo " - Checking if ${instanceId} is running"
142 | check_instance_status ${instanceId} "running"
143 | if [ $? -eq 1 ]; then
144 | exit 1
145 | fi
146 |
147 | copy_aws_config ${instanceId}
148 |
149 | echo -e " - Created HashR worker ${instanceId}"
150 | done
151 | }
152 |
153 | remove_key_pair() {
154 | echo "Removing key pair ${KEY_NAME}"
155 | aws --profile ${AWS_PROFILE} ec2 delete-key-pair --key-name ${KEY_NAME}
156 | }
157 |
158 | remove_security_group() {
159 | local securityGroupId
160 |
161 | securityGroupId=`aws --profile ${AWS_PROFILE} ec2 describe-security-groups --filters Name=group-name,Values=${SECURITY_GROUP_NAME} | jq -r '.SecurityGroups[].GroupId'`
162 | echo "Security group ID ${securityGroupId} for ${SECURITY_GROUP_NAME}"
163 |
164 | if [ "${securityGroupId}" == "" ]; then
165 | echo " - No security group ID for security group ${SECURITY_GROUP_NAME}"
166 | else
167 | # Check if security-group-id is still in use
168 | instances=`aws --profile ${AWS_PROFILE} ec2 describe-instances --filters Name=instance.group-id,Values=${securityGroupId} | jq -r '.Reservations[].Instances[].InstanceId'`
169 | if [ "${instances}" != "" ]; then
170 | echo -e "Security group ${securityGroupId} (${SECURITY_GROUP_NAME}) is in use in the following instances:\n${instances}"
171 | else
172 | # Delete security group.
173 | echo "Removing security group ${SECURITY_GROUP_NAME} (${securityGroupId})"
174 | aws --profile ${AWS_PROFILE} ec2 delete-security-group --group-id ${securityGroupId}
175 | fi
176 | fi
177 | }
178 |
179 | remove_instances() {
180 | echo "Removing EC2 worker instances"
181 |
182 | for instanceId in `aws --profile ${AWS_PROFILE} ec2 describe-instances --filters Name=tag-value,Values=${WORKER_TAG_ROLE_VALUE} | jq -r '.Reservations[].Instances[].InstanceId'`
183 | do
184 | echo " - Removing the worker instance ${instanceId}"
185 | aws --profile ${AWS_PROFILE} ec2 terminate-instances --instance-id ${instanceId} > $logfile 2>&1
186 | done
187 | }
188 |
189 | # Main
190 | case "$1" in
191 | setup)
192 | dirpath=`dirname $0`
193 | if [ ! -f ${dirpath}/${WORKER_AWS_CONFIG_FILE} ]; then
194 | echo "No AWS configuration file (${WORKER_AWS_CONFIG_FILE}) for worker"
195 | exit 1
196 | fi
197 |
198 | create_key_pair
199 |
200 | sleep 5
201 | create_security_group_id
202 |
203 | sleep 5
204 | run_ec2_instance
205 | ;;
206 | create-key)
207 | echo "Creating keypair ${KEY_NAME}"
208 | create_key_pair
209 | ;;
210 | create-sg)
211 | echo "Creating security group ${SECURITY_GROUP_NAME}"
212 | create_security_group_id
213 | ;;
214 | remove-key)
215 | echo "Removing key pair ${KEY_NAME}"
216 | remove_key_pair
217 | ;;
218 | remove-sg)
219 | echo "Removing security group ${SECURITY_GROUP_NAME}"
220 | remove_security_group
221 | ;;
222 | remove-instance)
223 | echo "Removing EC2 worker instances"
224 | remove_instances
225 | ;;
226 | remove-all)
227 | echo "Removing HashR AWS instances, security group, and key pair"
228 | remove_instances
229 |
230 | sleep 5
231 | remove_security_group
232 |
233 | sleep 5
234 | remove_key_pair
235 | ;;
236 | *)
237 | echo "Usage: `basename $0` {setup|create-key|create-sg|remove-key|remove-sg|remove-instance|remove-all}" || true
238 | exit 1
239 | esac
240 |
241 | exit 0
242 |
--------------------------------------------------------------------------------
/scripts/hashr-archive:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Copyright 2023 Google LLC
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # https://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | # A HashR - AWS disk archival creation script.
18 |
19 | name=`basename $0`
20 | workingdir=/data
21 | logfile=${workingdir}/hashr-archive.`hostname`.log
22 |
23 | if [ ! -f ${logfile} ]; then
24 | sudo touch ${logfile}
25 | sudo chmod 666 ${logfile}
26 | fi
27 |
28 | if [ $# -ne 3 ]; then
29 | echo "${name}: two arguments required"
30 | exit
31 | fi
32 |
33 | device="$1"
34 | imageid="$2"
35 | bucket="$3"
36 |
37 | tarGzName="${imageid}.tar.gz"
38 | tarGzPath=${workingdir}/${tarGzName}
39 |
40 | function log_message() {
41 | echo -e "`date --utc +'%Y-%m-%dT%H:%M:%SZ'` ${name}: $1" >> $logfile
42 | }
43 |
44 | function check_execution_status() {
45 | local statusCode=$1
46 | local statusMessage="$2"
47 | local MAX_SIZE=90
48 |
49 | messageSize=${#statusMessage}
50 | paddingSize=`expr ${MAX_SIZE} - ${messageSize}`
51 |
52 | padding=""
53 | if [ ${paddingSize} -gt 0 ]; then
54 | for i in $(seq 0 ${paddingSize})
55 | do
56 | padding+=" "
57 | done
58 | fi
59 |
60 | if [ ${statusCode} -eq 0 ]; then
61 | log_message "${statusMessage} ${padding} [ COMPLETED ]"
62 | else
63 | log_message "${statusMessage} ${padding} [ FAILED ]"
64 | fi
65 | }
66 |
67 | if [ "${device}" == "" ]; then
68 | echo "${name}: Device (param1) is required"
69 | log_messsag "Device (param1) is required"
70 | exit 1
71 | fi
72 |
73 | if [[ "${device}" =~ ^/dev/[a-z]{3,4}$ ]]; then
74 | log_message "Device ${device} is valid"
75 | else
76 | echo "${name}: Device pattern does not match"
77 | log_message "${device} does not match required pattern"
78 | exit 1
79 | fi
80 |
81 | if [ "${imageid}" == "" ]; then
82 | echo "imageid (param2) is required"
83 | log_message "imageid (param2) is required"
84 | exit 1
85 | fi
86 |
87 | if [ "${bucket}" == "" ]; then
88 | echo "${name}: S3 bucket (param3) is required"
89 | log_message "S3 bucket (param3) is required"
90 | exit 1
91 | fi
92 |
93 | cd ${workingdir}
94 |
95 | log_message "Creating raw disk image"
96 | sudo dd if=${device} of=${imageid} bs=1M >> ${logfile} 2>&1
97 | check_execution_status $? "Creating raw disk image ${workingdir}/${imageid} from ${device}"
98 |
99 | log_message "Creating raw disk archive"
100 | sudo tar -C ${workingdir} -zcf ${tarGzPath} ${imageid} >> ${logfile} 2>&1
101 | check_execution_status $? "Creating raw disk archive ${tarGzName}"
102 |
103 | log_message "Uploading disk image ${tarGzPath} to ${bucket}"
104 | aws s3 cp ${tarGzPath} s3://${bucket} >> ${logfile} 2>&1
105 | check_execution_status $? "Disk upload to ${bucket} completed"
106 |
107 | log_message "Removing disk image"
108 | sudo rm -f ${imageid} >> ${logfile} 2>&1
109 | check_execution_status $? "Removing disk ${imageid}"
110 |
111 | log_message "Removing disk image ${tarGzPath}"
112 | sudo rm -f ${tarGzPath} >> ${logfile} 2>&1
113 | check_execution_status $? "Disk removal ${tarGzPath} completed"
114 |
115 | log_message "Creation done file ${tarGzPath}.done"
116 | sudo touch ${tarGzPath}.done
117 | check_execution_status $? "Creation of ${tarGzPath}.done completed"
118 |
119 |
--------------------------------------------------------------------------------
/storage/cloudspanner/cloudspanner.go:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // https://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | // Package cloudspanner implements cloud spanner as a hashR storage.
16 | package cloudspanner
17 |
18 | import (
19 | "context"
20 | "fmt"
21 | "time"
22 |
23 | "github.com/google/hashr/core/hashr"
24 |
25 | "cloud.google.com/go/spanner"
26 |
27 | "google.golang.org/api/iterator"
28 | )
29 |
30 | // Storage allows to interact with cloud spanner.
31 | type Storage struct {
32 | spannerClient *spanner.Client
33 | }
34 |
35 | // NewStorage creates new Storage struct that allows to interact with cloud spanner.
36 | func NewStorage(ctx context.Context, spannerClient *spanner.Client) (*Storage, error) {
37 | return &Storage{spannerClient: spannerClient}, nil
38 | }
39 |
40 | // UpdateJobs updates cloud spanner table.
41 | func (s *Storage) UpdateJobs(ctx context.Context, qHash string, p *hashr.ProcessingSource) error {
42 | _, err := s.spannerClient.Apply(ctx, []*spanner.Mutation{
43 | spanner.InsertOrUpdate("jobs",
44 | []string{
45 | "quick_sha256",
46 | "imported_at",
47 | "id",
48 | "repo",
49 | "repo_path",
50 | "location",
51 | "sha256",
52 | "status",
53 | "error",
54 | "preprocessing_duration",
55 | "processing_duration",
56 | "export_duration",
57 | "files_extracted",
58 | "files_exported"},
59 | []interface{}{
60 | qHash,
61 | time.Unix(p.ImportedAt, 0),
62 | p.ID,
63 | p.Repo,
64 | p.RepoPath,
65 | p.RemoteSourcePath,
66 | p.Sha256,
67 | p.Status,
68 | p.Error,
69 | int64(p.PreprocessingDuration.Seconds()),
70 | int64(p.ProcessingDuration.Seconds()),
71 | int64(p.ExportDuration.Seconds()),
72 | p.SampleCount,
73 | p.ExportCount,
74 | })})
75 | if err != nil {
76 | return fmt.Errorf("failed to insert data %v", err)
77 | }
78 |
79 | return nil
80 | }
81 |
82 | // FetchJobs fetches processing jobs from cloud spanner.
83 | func (s *Storage) FetchJobs(ctx context.Context) (map[string]string, error) {
84 | processed := make(map[string]string)
85 | iter := s.spannerClient.Single().Read(ctx, "jobs",
86 | spanner.AllKeys(), []string{"quick_sha256", "status"})
87 | defer iter.Stop()
88 | for {
89 | row, err := iter.Next()
90 | if err == iterator.Done {
91 | break
92 | }
93 | if err != nil {
94 | return nil, err
95 | }
96 | var quickSha256, status string
97 | err = row.ColumnByName("quick_sha256", &quickSha256)
98 | if err != nil {
99 | return nil, err
100 | }
101 | err = row.ColumnByName("status", &status)
102 | if err != nil {
103 | return nil, err
104 | }
105 | processed[quickSha256] = status
106 | }
107 | return processed, nil
108 | }
109 |
--------------------------------------------------------------------------------
/storage/postgres/postgres.go:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // https://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | // Package postgres implements PostgreSQL as a hashR storage.
16 | package postgres
17 |
18 | import (
19 | "context"
20 | "database/sql"
21 | "fmt"
22 |
23 | "github.com/google/hashr/core/hashr"
24 |
25 | // Blank import below is needed for the SQL driver.
26 | _ "github.com/lib/pq"
27 | )
28 |
29 | // Storage allows to interact with PostgreSQL instance.
30 | type Storage struct {
31 | sqlDB *sql.DB
32 | }
33 |
34 | // NewStorage creates new Storage struct that allows to interact with PostgreSQL instance and all the necessary tables, if they don't exist.
35 | func NewStorage(sqlDB *sql.DB) (*Storage, error) {
36 | // Check if the "jobs" table exists.
37 | exists, err := tableExists(sqlDB, "jobs")
38 | if err != nil {
39 | return nil, fmt.Errorf("error while checking if jobs table exists: %v", err)
40 | }
41 |
42 | if !exists {
43 | sql := `CREATE TABLE jobs (
44 | quick_sha256 VARCHAR(100) PRIMARY KEY,
45 | imported_at INT NOT NULL,
46 | id text,
47 | repo text,
48 | repo_path text,
49 | location text,
50 | sha256 VARCHAR(100),
51 | status VARCHAR(50),
52 | error text,
53 | preprocessing_duration INT,
54 | processing_duration INT,
55 | export_duration INT,
56 | files_extracted INT,
57 | files_exported INT
58 | )`
59 | _, err = sqlDB.Exec(sql)
60 | if err != nil {
61 | return nil, fmt.Errorf("error while creating jobs table: %v", err)
62 | }
63 | }
64 |
65 | return &Storage{sqlDB: sqlDB}, nil
66 | }
67 |
68 | func (s *Storage) rowExists(qHash string) (bool, error) {
69 | sqlStatement := `SELECT quick_sha256 FROM jobs WHERE quick_sha256=$1;`
70 | var quickSha256 string
71 | row := s.sqlDB.QueryRow(sqlStatement, qHash)
72 | switch err := row.Scan(&quickSha256); err {
73 | case sql.ErrNoRows:
74 | return false, nil
75 | case nil:
76 | return true, nil
77 | default:
78 | return false, err
79 | }
80 | }
81 |
82 | // UpdateJobs updates cloud spanner table.
83 | func (s *Storage) UpdateJobs(ctx context.Context, qHash string, p *hashr.ProcessingSource) error {
84 | exists, err := s.rowExists(qHash)
85 | if err != nil {
86 | return err
87 | }
88 |
89 | var sql string
90 | if exists {
91 | sql = `
92 | UPDATE jobs SET imported_at = $2, id = $3, repo = $4, repo_path = $5, location = $6, sha256 = $7, status = $8, error = $9, preprocessing_duration = $10, processing_duration = $11, export_duration = $12, files_extracted = $13, files_exported = $14
93 | WHERE quick_sha256 = $1`
94 | } else {
95 | sql = `
96 | INSERT INTO jobs (quick_sha256, imported_at, id, repo, repo_path, location, sha256, status, error, preprocessing_duration, processing_duration, export_duration, files_extracted, files_exported)
97 | VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)`
98 | }
99 |
100 | _, err = s.sqlDB.Exec(sql, qHash, p.ImportedAt, p.ID, p.Repo, p.RepoPath, p.RemoteSourcePath, p.Sha256, p.Status, p.Error, int(p.PreprocessingDuration.Seconds()), int(p.ProcessingDuration.Seconds()), int(p.ExportDuration.Seconds()), p.SampleCount, p.ExportCount)
101 | if err != nil {
102 | return err
103 | }
104 | return nil
105 | }
106 |
107 | // FetchJobs fetches processing jobs from cloud spanner.
108 | func (s *Storage) FetchJobs(ctx context.Context) (map[string]string, error) {
109 | processed := make(map[string]string)
110 |
111 | rows, err := s.sqlDB.Query("SELECT quick_sha256, status FROM jobs")
112 | if err != nil {
113 | return nil, err
114 | }
115 | defer rows.Close()
116 | for rows.Next() {
117 | var quickSha256, status string
118 | err = rows.Scan(&quickSha256, &status)
119 | if err != nil {
120 | return nil, err
121 | }
122 | processed[quickSha256] = status
123 | }
124 | err = rows.Err()
125 | if err != nil {
126 | return nil, err
127 | }
128 |
129 | return processed, nil
130 | }
131 |
132 | func tableExists(db *sql.DB, tableName string) (bool, error) {
133 | // Query to check if the table exists in PostgreSQL
134 | query := `
135 | SELECT EXISTS (
136 | SELECT 1
137 | FROM information_schema.tables
138 | WHERE table_name = $1
139 | )
140 | `
141 |
142 | var exists bool
143 | err := db.QueryRow(query, tableName).Scan(&exists)
144 | if err != nil {
145 | return false, err
146 | }
147 |
148 | return exists, nil
149 | }
150 |
--------------------------------------------------------------------------------