├── .env.filplus ├── .env.repdao ├── .env.retrievalworker ├── .env.statemarketdeals ├── .github ├── ISSUE_TEMPLATE │ └── new-roadmap-node.md └── workflows │ └── build.yml ├── .gitignore ├── .golangci.yml ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── aws.Dockerfile ├── filplus.md ├── go.mod ├── go.sum ├── integration ├── filplus │ ├── main.go │ ├── rand.go │ ├── rand_test.go │ └── util │ │ └── util.go ├── oneoff │ └── main.go ├── repdao │ └── main.go ├── repdao_dp │ └── main.go ├── spadev0 │ ├── main.go │ ├── main_test.go │ └── util.go ├── spcoverage │ └── main.go └── statemarketdeals │ └── main.go ├── pkg ├── cmd │ └── retrieval_worker │ │ └── main.go ├── convert │ └── multiaddr.go ├── env │ └── env.go ├── model │ ├── deal_state.go │ ├── protocol.go │ ├── protocol_cbor_gen.go │ ├── query_response.go │ ├── query_response_cbor_gen.go │ └── rpc │ │ └── types.go ├── net │ ├── bitswap.go │ ├── graphsync.go │ ├── host.go │ └── http.go ├── process │ └── process_manager.go ├── requesterror │ └── error.go ├── resolver │ ├── location_resolver.go │ ├── protocol_provider.go │ └── provider_resolver.go ├── resources │ ├── country-to-continent.json │ └── static.go ├── task │ ├── errors.go │ ├── errors_test.go │ ├── task.go │ ├── task_result.go │ └── task_worker.go └── tools │ └── tooling.go ├── result.zip └── worker ├── bitswap ├── cmd │ └── BitswapWorker.go └── worker.go ├── graphsync ├── cmd │ └── GraphsyncWorker.go └── worker.go ├── http ├── cmd │ └── HttpWorker.go └── worker.go └── stub ├── cmd └── StubWorker.go └── worker.go /.env.filplus: -------------------------------------------------------------------------------- 1 | LOTUS_API_URL=https://api.node.glif.io/rpc/v1 2 | LOTUS_API_TOKEN= 3 | QUEUE_MONGO_URI=mongodb+srv://user:pass@host/?retryWrites=true&w=majority 4 | QUEUE_MONGO_DATABASE=test 5 | RESULT_MONGO_URI=mongodb+srv://user:pass@host/?retryWrites=true&w=majority 6 | RESULT_MONGO_DATABASE=test 7 | STATEMARKETDEALS_MONGO_URI=mongodb+srv://user:pass@host/?retryWrites=true&w=majority 8 | STATEMARKETDEALS_MONGO_DATABASE=test 9 | PROVIDER_CACHE_TTL=24h 10 | LOCATION_CACHE_TTL=720h 11 | IPINFO_TOKEN= 12 | FILPLUS_INTEGRATION_BATCH_SIZE=1000 13 | GOLOG_LOG_FMT=json 14 | GOLOG_LOG_LEVEL=info 15 | FILPLUS_INTEGRATION_RANDOM_CONSTANT=4.0 16 | -------------------------------------------------------------------------------- /.env.repdao: -------------------------------------------------------------------------------- 1 | REPDAO_MONGO_URI= 2 | REPDAO_MONGO_DATABASE=reputation 3 | REPDAO_MONGO_COLLECTION= 4 | RESULT_MONGO_URI= 5 | RESULT_MONGO_DATABASE=prod 6 | -------------------------------------------------------------------------------- /.env.retrievalworker: -------------------------------------------------------------------------------- 1 | PROCESS_MODULES=./graphsync_worker,./http_worker,./bitswap_worker 2 | PROCESS_ERROR_INTERVAL=5s 3 | TASK_WORKER_POLL_INTERVAL=30s 4 | TASK_WORKER_TIMEOUT_BUFFER=10s 5 | QUEUE_MONGO_URI=mongodb+srv://user:pass@host/?retryWrites=true&w=majority 6 | QUEUE_MONGO_DATABASE=test 7 | RESULT_MONGO_URI=mongodb+srv://user:pass@host/?retryWrites=true&w=majority 8 | RESULT_MONGO_DATABASE=test 9 | ACCEPTED_CONTINENTS= 10 | ACCEPTED_COUNTRIES= 11 | CONCURRENCY_GRAPHSYNC_WORKER=10 12 | CONCURRENCY_BITSWAP_WORKER=10 13 | CONCURRENCY_HTTP_WORKER=10 14 | GOLOG_LOG_LEVEL=panic,convert=info,env=debug,bitswap_client=info,graphsync_client=info,http_client=info,process-manager=info,task-worker=info,bitswap_worker=info 15 | GOLOG_LOG_FMT=json 16 | -------------------------------------------------------------------------------- /.env.statemarketdeals: -------------------------------------------------------------------------------- 1 | LOTUS_API_URL=https://api.node.glif.io/rpc/v1 2 | LOTUS_API_TOKEN= 3 | STATEMARKETDEALS_MONGO_URI=mongodb+srv://user:pass@host/?retryWrites=true&w=majority 4 | STATEMARKETDEALS_MONGO_DATABASE=test 5 | STATEMARKETDEALS_BATCH_SIZE=1000 6 | STATEMARKETDEALS_INTERVAL=6h 7 | GOLOG_LOG_LEVEL=info 8 | GOLOG_LOG_FMT=json 9 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/new-roadmap-node.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: New Roadmap Node 3 | about: Generate a new starmap roadmap milestone or root 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | Description: 11 | 12 | ETA: YYY-MM-DD 13 | Maintainer: 14 | 15 | ```[tasklist] 16 | ### Milestones 17 | - [ ] 18 | ``` 19 | 20 | Known dependencies: 21 | - 22 | 23 | View this [starmap](https://starmap.site/) at: 24 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build, lint and dockerize 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | tags: [ "v*.*.*" ] 7 | pull_request: 8 | branches: [ "main" ] 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v3 15 | 16 | - name: Set up Go 17 | uses: actions/setup-go@v4 18 | with: 19 | go-version: '1.20.x' 20 | 21 | - name: Build 22 | run: make build 23 | 24 | - name: Test 25 | run: go test -v ./... 26 | 27 | - name: Lint 28 | uses: golangci/golangci-lint-action@v3 29 | with: 30 | version: v1.53 31 | args: --timeout=10m 32 | 33 | - uses: dominikh/staticcheck-action@v1.3.0 34 | with: 35 | version: "2022.1.3" 36 | 37 | - name: Set up QEMU 38 | uses: docker/setup-qemu-action@v2 39 | 40 | - name: Set up Docker Buildx 41 | uses: docker/setup-buildx-action@v2 42 | 43 | - name: Login to Docker Hub 44 | if: github.event_name != 'pull_request' 45 | uses: docker/login-action@v2 46 | with: 47 | username: ${{ secrets.DOCKER_HUB_USERNAME }} 48 | password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }} 49 | 50 | - name: Docker meta 51 | id: meta 52 | uses: docker/metadata-action@v4 53 | with: 54 | # list of Docker images to use as base name for tags 55 | images: | 56 | datapreservationprogram/retrievalbot 57 | # generate Docker tags based on the following events/attributes 58 | tags: | 59 | type=schedule 60 | type=ref,event=branch 61 | type=ref,event=pr 62 | type=semver,pattern={{version}} 63 | type=semver,pattern={{major}}.{{minor}} 64 | type=semver,pattern={{major}} 65 | type=raw,value=latest,enable={{is_default_branch}} 66 | 67 | - name: Build and push 68 | if: github.event_name != 'pull_request' 69 | uses: docker/build-push-action@v4 70 | with: 71 | context: . 72 | platforms: linux/amd64 73 | file: ./Dockerfile 74 | push: true 75 | tags: ${{ steps.meta.outputs.tags }} 76 | labels: ${{ steps.meta.outputs.labels }} 77 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, built with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | # Dependency directories (remove the comment below to include it) 15 | # vendor/ 16 | .env 17 | .env.prod 18 | 19 | /bitswap_worker 20 | /filplus_integration 21 | /graphsync_worker 22 | /http_worker 23 | /oneoff_integration 24 | /retrieval_worker 25 | /statemarketdeals 26 | /stub_worker 27 | /repdao 28 | /repdao_dp 29 | /vendor 30 | /spcoverage 31 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | run: 2 | tests: false 3 | skip-dirs: 4 | - extern 5 | - integration/repdao 6 | - integration/repdao_dp 7 | 8 | linters: 9 | enable-all: true 10 | disable: 11 | - typecheck 12 | - interfacer 13 | - structcheck 14 | - golint 15 | - ifshort 16 | - scopelint 17 | - varcheck 18 | - varnamelen 19 | - maligned 20 | - deadcode 21 | - structcheck 22 | - gci 23 | - goimports 24 | - gofumpt 25 | - nolintlint 26 | - ireturn 27 | - nosnakecase 28 | - nlreturn 29 | - godox 30 | - gomoddirectives 31 | - rowserrcheck 32 | - sqlclosecheck 33 | - wastedassign 34 | - gocognit 35 | - wsl 36 | - musttag 37 | - exhaustivestruct 38 | - cyclop 39 | - gomnd 40 | - gochecknoglobals 41 | - funlen 42 | - gocyclo 43 | - exhaustruct 44 | - wrapcheck 45 | - nestif 46 | - containedctx 47 | - maintidx 48 | - nonamedreturns 49 | - nilnil 50 | - prealloc 51 | - gochecknoinits 52 | - dupl 53 | - forbidigo 54 | - godot 55 | - depguard 56 | - nakedret 57 | - govet 58 | 59 | linters-settings: 60 | revive: 61 | rules: 62 | - name: var-naming 63 | disabled: true 64 | lll: 65 | line-length: 120 66 | tagliatelle: 67 | case: 68 | rules: 69 | json: "snake" -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.19-alpine as builder 2 | WORKDIR /app 3 | COPY . . 4 | RUN go build -o build/retrieval_worker ./pkg/cmd/retrieval_worker 5 | RUN go build -o build/stub_worker ./worker/stub/cmd 6 | RUN go build -o build/graphsync_worker ./worker/graphsync/cmd 7 | RUN go build -o build/http_worker ./worker/http/cmd 8 | RUN go build -o build/bitswap_worker ./worker/bitswap/cmd 9 | RUN go build -o build/oneoff_integration ./integration/oneoff 10 | RUN go build -o build/statemarketdeals ./integration/statemarketdeals 11 | RUN go build -o build/filplus_integration ./integration/filplus 12 | RUN go build -o build/repdao ./integration/repdao 13 | RUN go build -o build/repdao_dp ./integration/repdao_dp 14 | RUN go build -o build/spcoverage ./integration/spcoverage 15 | 16 | FROM alpine:latest 17 | WORKDIR /app 18 | COPY --from=builder /app/build/ . 19 | CMD ["/app/retrieval_worker"] 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The contents of this repository are Copyright (c) corresponding authors and 2 | contributors, licensed under the `Permissive License Stack` meaning either of: 3 | 4 | - Apache-2.0 Software License: https://www.apache.org/licenses/LICENSE-2.0 5 | ([...4tr2kfsq](https://dweb.link/ipfs/bafkreiankqxazcae4onkp436wag2lj3ccso4nawxqkkfckd6cg4tr2kfsq)) 6 | 7 | - MIT Software License: https://opensource.org/licenses/MIT 8 | ([...vljevcba](https://dweb.link/ipfs/bafkreiepofszg4gfe2gzuhojmksgemsub2h4uy2gewdnr35kswvljevcba)) 9 | 10 | You may not use the contents of this repository except in compliance 11 | with one of the listed Licenses. For an extended clarification of the 12 | intent behind the choice of Licensing please refer to 13 | https://protocol.ai/blog/announcing-the-permissive-license-stack/ 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the terms listed in this notice is distributed on 17 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 18 | either express or implied. See each License for the specific language 19 | governing permissions and limitations under that License. 20 | 21 | 22 | `SPDX-License-Identifier: Apache-2.0 OR MIT` 23 | 24 | Verbatim copies of both licenses are included below: 25 | 26 |
Apache-2.0 Software License 27 | 28 | ``` 29 | Apache License 30 | Version 2.0, January 2004 31 | http://www.apache.org/licenses/ 32 | 33 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 34 | 35 | 1. Definitions. 36 | 37 | "License" shall mean the terms and conditions for use, reproduction, 38 | and distribution as defined by Sections 1 through 9 of this document. 39 | 40 | "Licensor" shall mean the copyright owner or entity authorized by 41 | the copyright owner that is granting the License. 42 | 43 | "Legal Entity" shall mean the union of the acting entity and all 44 | other entities that control, are controlled by, or are under common 45 | control with that entity. For the purposes of this definition, 46 | "control" means (i) the power, direct or indirect, to cause the 47 | direction or management of such entity, whether by contract or 48 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 49 | outstanding shares, or (iii) beneficial ownership of such entity. 50 | 51 | "You" (or "Your") shall mean an individual or Legal Entity 52 | exercising permissions granted by this License. 53 | 54 | "Source" form shall mean the preferred form for making modifications, 55 | including but not limited to software source code, documentation 56 | source, and configuration files. 57 | 58 | "Object" form shall mean any form resulting from mechanical 59 | transformation or translation of a Source form, including but 60 | not limited to compiled object code, generated documentation, 61 | and conversions to other media types. 62 | 63 | "Work" shall mean the work of authorship, whether in Source or 64 | Object form, made available under the License, as indicated by a 65 | copyright notice that is included in or attached to the work 66 | (an example is provided in the Appendix below). 67 | 68 | "Derivative Works" shall mean any work, whether in Source or Object 69 | form, that is based on (or derived from) the Work and for which the 70 | editorial revisions, annotations, elaborations, or other modifications 71 | represent, as a whole, an original work of authorship. For the purposes 72 | of this License, Derivative Works shall not include works that remain 73 | separable from, or merely link (or bind by name) to the interfaces of, 74 | the Work and Derivative Works thereof. 75 | 76 | "Contribution" shall mean any work of authorship, including 77 | the original version of the Work and any modifications or additions 78 | to that Work or Derivative Works thereof, that is intentionally 79 | submitted to Licensor for inclusion in the Work by the copyright owner 80 | or by an individual or Legal Entity authorized to submit on behalf of 81 | the copyright owner. For the purposes of this definition, "submitted" 82 | means any form of electronic, verbal, or written communication sent 83 | to the Licensor or its representatives, including but not limited to 84 | communication on electronic mailing lists, source code control systems, 85 | and issue tracking systems that are managed by, or on behalf of, the 86 | Licensor for the purpose of discussing and improving the Work, but 87 | excluding communication that is conspicuously marked or otherwise 88 | designated in writing by the copyright owner as "Not a Contribution." 89 | 90 | "Contributor" shall mean Licensor and any individual or Legal Entity 91 | on behalf of whom a Contribution has been received by Licensor and 92 | subsequently incorporated within the Work. 93 | 94 | 2. Grant of Copyright License. Subject to the terms and conditions of 95 | this License, each Contributor hereby grants to You a perpetual, 96 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 97 | copyright license to reproduce, prepare Derivative Works of, 98 | publicly display, publicly perform, sublicense, and distribute the 99 | Work and such Derivative Works in Source or Object form. 100 | 101 | 3. Grant of Patent License. Subject to the terms and conditions of 102 | this License, each Contributor hereby grants to You a perpetual, 103 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 104 | (except as stated in this section) patent license to make, have made, 105 | use, offer to sell, sell, import, and otherwise transfer the Work, 106 | where such license applies only to those patent claims licensable 107 | by such Contributor that are necessarily infringed by their 108 | Contribution(s) alone or by combination of their Contribution(s) 109 | with the Work to which such Contribution(s) was submitted. If You 110 | institute patent litigation against any entity (including a 111 | cross-claim or counterclaim in a lawsuit) alleging that the Work 112 | or a Contribution incorporated within the Work constitutes direct 113 | or contributory patent infringement, then any patent licenses 114 | granted to You under this License for that Work shall terminate 115 | as of the date such litigation is filed. 116 | 117 | 4. Redistribution. You may reproduce and distribute copies of the 118 | Work or Derivative Works thereof in any medium, with or without 119 | modifications, and in Source or Object form, provided that You 120 | meet the following conditions: 121 | 122 | (a) You must give any other recipients of the Work or 123 | Derivative Works a copy of this License; and 124 | 125 | (b) You must cause any modified files to carry prominent notices 126 | stating that You changed the files; and 127 | 128 | (c) You must retain, in the Source form of any Derivative Works 129 | that You distribute, all copyright, patent, trademark, and 130 | attribution notices from the Source form of the Work, 131 | excluding those notices that do not pertain to any part of 132 | the Derivative Works; and 133 | 134 | (d) If the Work includes a "NOTICE" text file as part of its 135 | distribution, then any Derivative Works that You distribute must 136 | include a readable copy of the attribution notices contained 137 | within such NOTICE file, excluding those notices that do not 138 | pertain to any part of the Derivative Works, in at least one 139 | of the following places: within a NOTICE text file distributed 140 | as part of the Derivative Works; within the Source form or 141 | documentation, if provided along with the Derivative Works; or, 142 | within a display generated by the Derivative Works, if and 143 | wherever such third-party notices normally appear. The contents 144 | of the NOTICE file are for informational purposes only and 145 | do not modify the License. You may add Your own attribution 146 | notices within Derivative Works that You distribute, alongside 147 | or as an addendum to the NOTICE text from the Work, provided 148 | that such additional attribution notices cannot be construed 149 | as modifying the License. 150 | 151 | You may add Your own copyright statement to Your modifications and 152 | may provide additional or different license terms and conditions 153 | for use, reproduction, or distribution of Your modifications, or 154 | for any such Derivative Works as a whole, provided Your use, 155 | reproduction, and distribution of the Work otherwise complies with 156 | the conditions stated in this License. 157 | 158 | 5. Submission of Contributions. Unless You explicitly state otherwise, 159 | any Contribution intentionally submitted for inclusion in the Work 160 | by You to the Licensor shall be under the terms and conditions of 161 | this License, without any additional terms or conditions. 162 | Notwithstanding the above, nothing herein shall supersede or modify 163 | the terms of any separate license agreement you may have executed 164 | with Licensor regarding such Contributions. 165 | 166 | 6. Trademarks. This License does not grant permission to use the trade 167 | names, trademarks, service marks, or product names of the Licensor, 168 | except as required for reasonable and customary use in describing the 169 | origin of the Work and reproducing the content of the NOTICE file. 170 | 171 | 7. Disclaimer of Warranty. Unless required by applicable law or 172 | agreed to in writing, Licensor provides the Work (and each 173 | Contributor provides its Contributions) on an "AS IS" BASIS, 174 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 175 | implied, including, without limitation, any warranties or conditions 176 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 177 | PARTICULAR PURPOSE. You are solely responsible for determining the 178 | appropriateness of using or redistributing the Work and assume any 179 | risks associated with Your exercise of permissions under this License. 180 | 181 | 8. Limitation of Liability. In no event and under no legal theory, 182 | whether in tort (including negligence), contract, or otherwise, 183 | unless required by applicable law (such as deliberate and grossly 184 | negligent acts) or agreed to in writing, shall any Contributor be 185 | liable to You for damages, including any direct, indirect, special, 186 | incidental, or consequential damages of any character arising as a 187 | result of this License or out of the use or inability to use the 188 | Work (including but not limited to damages for loss of goodwill, 189 | work stoppage, computer failure or malfunction, or any and all 190 | other commercial damages or losses), even if such Contributor 191 | has been advised of the possibility of such damages. 192 | 193 | 9. Accepting Warranty or Additional Liability. While redistributing 194 | the Work or Derivative Works thereof, You may choose to offer, 195 | and charge a fee for, acceptance of support, warranty, indemnity, 196 | or other liability obligations and/or rights consistent with this 197 | License. However, in accepting such obligations, You may act only 198 | on Your own behalf and on Your sole responsibility, not on behalf 199 | of any other Contributor, and only if You agree to indemnify, 200 | defend, and hold each Contributor harmless for any liability 201 | incurred by, or claims asserted against, such Contributor by reason 202 | of your accepting any such warranty or additional liability. 203 | 204 | END OF TERMS AND CONDITIONS 205 | ``` 206 |
207 | 208 |
MIT Software License 209 | 210 | ``` 211 | Permission is hereby granted, free of charge, to any person obtaining a copy 212 | of this software and associated documentation files (the "Software"), to deal 213 | in the Software without restriction, including without limitation the rights 214 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 215 | copies of the Software, and to permit persons to whom the Software is 216 | furnished to do so, subject to the following conditions: 217 | 218 | The above copyright notice and this permission notice shall be included in 219 | all copies or substantial portions of the Software. 220 | 221 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 222 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 223 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 224 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 225 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 226 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 227 | THE SOFTWARE. 228 | ``` 229 |
230 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | build: 2 | go build -o retrieval_worker ./pkg/cmd/retrieval_worker 3 | go build -o stub_worker ./worker/stub/cmd 4 | go build -o graphsync_worker ./worker/graphsync/cmd 5 | go build -o http_worker ./worker/http/cmd 6 | go build -o bitswap_worker ./worker/bitswap/cmd 7 | go build -o oneoff_integration ./integration/oneoff 8 | go build -o statemarketdeals ./integration/statemarketdeals 9 | go build -o filplus_integration ./integration/filplus 10 | go build -o repdao ./integration/repdao 11 | go build -o repdao_dp ./integration/repdao_dp 12 | go build -o spcoverage ./integration/spcoverage 13 | 14 | lint: 15 | gofmt -s -w . 16 | golangci-lint run --fix --timeout 10m 17 | staticcheck ./... 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RetrievalBot 2 | 3 | The goal of retrieval bot is to offer a scalable framework to perform retrieval testing over Filecoin network. 4 | 5 | There is no centralized orchestrator to manage retrieval queue or work. Instead, it uses MongoDB to manage work queue as well as saving retrieval results. 6 | 7 | ## Result Snapshot 2024-04-16 8 | The retrieval success ratio and count for each SP per day per protocol has been exported into [result.zip](./result.zip) 9 | 10 | Query used to generate the data 11 | ```javascript 12 | db.task_result.aggregate([ 13 | { 14 | $group: { 15 | _id: { 16 | sp: "$task.provider.id", 17 | type: "$task.module", 18 | date: { 19 | $dateToString: { 20 | format: "%Y-%m-%d", 21 | date: "$created_at", 22 | timezone: "UTC", 23 | } 24 | }, 25 | }, 26 | count: { $sum: 1 }, 27 | success: { $sum: { $cond: [{ $eq: ["$result.success", true] }, 1, 0] } }, 28 | } 29 | }, 30 | { 31 | $project: { 32 | _id: 0, 33 | sp: "$_id.sp", 34 | type: "$_id.type", 35 | date: "$_id.date", 36 | success: "$success", 37 | total: "$count", 38 | ratio: { $divide: ["$success", "$count"] }, 39 | } 40 | } 41 | ]) 42 | ``` 43 | 44 | ## Workers 45 | Workers refer to the unit that consumes the worker queue. There are 4 basic types of workers as of now. 46 | 47 | ### Bitswap Worker 48 | This worker currently only support retrieving a single block from the storage provider: 49 | 1. Lookup the provider's libp2p protocols 50 | 2. If it is using boost market, then lookup the supported retrieval protocols 51 | 3. Find the bitswap protocol info and make a single block retrieval 52 | 53 | ### Graphsync Worker 54 | This worker currently only support retrieving the root block from the storage provider: 55 | 1. Make graphsync retrieval with selector that only matches root block from the storage provider 56 | 57 | ### HTTP Worker 58 | This worker currently only support retrieving the first few MiB of the pieces from the storage provider: 59 | 1. Lookup the provider's libp2p protocols 60 | 2. If it is using boost market, then lookup the supported retrieval protocols 61 | 3. Find the HTTP protocol info and make the retrieval for up to first few MiB 62 | 63 | ### Stub Worker 64 | This type of worker does nothing but saves random result to the database. It is used to test the database connection and the queue. 65 | 66 | ## Integrations 67 | Integrations refer to the unit that either pushes work item to the retrieval queue, or other long-running jobs that may interact with the database in different ways 68 | 69 | ### StateMarketDeals Integration 70 | This integration periodically pulls the statemarketdeals.json from GLIP API and saves it to the database. 71 | 72 | ### FILPLUS Integration 73 | This integration pulls random active deals from StateMarketDeals database and push Bitswap/Graphsync/HTTP retrieval workitems into the work queue. 74 | 75 | ## Get started 76 | 1. Setup a mongodb server 77 | 2. Setup a free ipinfo account and grab a token 78 | 3. `make build` 79 | 4. Run the software natively or via a docker with environment variables. You need to run three programs: 80 | 1. `statemarketdeals` that pulls statemarketdeals.json from GLIP API and saves it to the database. Check [.env.statemarketdeals](./.env.statemarketdeals) for environment variables. 81 | 2. `filplus_integration` that queues retrieval tasks into a task queue. Check [.env.filplus](./.env.filplus) for environment variables. 82 | 3. `retrieval_worker` that consumes the task queue and performs the retrieval. Check [.env.retrievalworker](./.env.retrievalworker) for environment variables. 83 | 5. All programs above will load `.env` file in the working directory so you will need to copy the relevant environment variable file to `.env` 84 | 6. When running `retrieval_worker`, you need to make sure `bitswap_worker`, `graphsync_worker`, `http_worker` are in the working directory as well. 85 | -------------------------------------------------------------------------------- /aws.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/docker/library/golang:1.19-alpine as builder 2 | WORKDIR /app 3 | COPY . . 4 | RUN go build -o build/retrieval_worker ./pkg/cmd/retrieval_worker 5 | RUN go build -o build/stub_worker ./worker/stub/cmd 6 | RUN go build -o build/graphsync_worker ./worker/graphsync/cmd 7 | RUN go build -o build/http_worker ./worker/http/cmd 8 | RUN go build -o build/bitswap_worker ./worker/bitswap/cmd 9 | RUN go build -o build/oneoff_integration ./integration/oneoff 10 | RUN go build -o build/statemarketdeals ./integration/statemarketdeals 11 | RUN go build -o build/filplus_integration ./integration/filplus 12 | RUN go build -o build/repdao ./integration/repdao 13 | RUN go build -o build/repdao_dp ./integration/repdao_dp 14 | RUN go build -o build/spcoverage ./integration/spcoverage 15 | 16 | FROM public.ecr.aws/docker/library/alpine:latest 17 | WORKDIR /app 18 | COPY --from=builder /app/build/ . 19 | CMD ["/app/retrieval_worker"] 20 | -------------------------------------------------------------------------------- /filplus.md: -------------------------------------------------------------------------------- 1 | # Filplus Retrieval Sampling 2 | 3 | ## Background 4 | 5 | The goal of filplus retrieval sampling is to offer an aggregated view of how each storage provider performs in 6 | retrievability. 7 | The Filecoin community would like to make this retrieval sampling logic public and transparent, so that participants 8 | including storage providers, notaries, and data clients are aligned on best practices and how their reputation is 9 | evaluated. 10 | 11 | ## Sampling Logic 12 | 13 | All active verified deals in Filecoin network will be sampled randomly. 14 | 15 | Newer deals will have a higher chance to be sampled than older deals. The chance is 4x for each year the deal is newer 16 | determined by deal start date. This gives newer deal a higher chance to be sampled for retrieval testing. 17 | 18 | ## Deployment 19 | 20 | Currently deployed in AWS Oregon, Frankfurt and Singapore regions. Retrievals will be performed from closest region to 21 | the storage provider. 22 | 23 | A China mainland deployment is also planned (ETA 2023-06). 24 | 25 | ## Retrieval Testing 26 | 27 | ### Graphsync 28 | 29 | Graphsync is the default retrieval protocol for Filecoin. It is a libp2p protocol that is used to retrieve data from the 30 | storage provider. 31 | 32 | We look at the `label` field of the `PublishStorageDeals` message. If the `label` is a valid `CID` and it's 33 | not `pieceCID`, then we **assume** it is the root CID of the deal. 34 | 35 | We then make a graphsync retrieval request to retrieve only the root block. 36 | 37 | ### Bitswap 38 | 39 | Bitswap is the retrieval protocol that's also used in IPFS. It can be enabled by 40 | running [`booster-bitswap nodes`](https://boost.filecoin.io/bitswap-retrieval) 41 | 42 | Similar to Graphsync retrieval, we assume the label is the rootCID of the deal. We then first query the multiaddr of the 43 | storage provider to get the libp2p multiaddr that serves bitswap retrieval. 44 | Then we will attempt to make a single block retrieval request to the storage provider using bitswap protocol for just 45 | the root block. 46 | 47 | ### HTTP 48 | 49 | HTTP is the retrieval protocol that can serve piece, file and block retrieval. It can be enabled by 50 | running [`booster-http nodes`](https://boost.filecoin.io/http-retrieval) 51 | 52 | Piece retrieval is by default enabled so instead of assuming the deal proposal has the correct RootCID set in the label, 53 | we will use the `pieceCID` field of the deal proposal and make piece retrieval. 54 | We will first query the multiaddr of the storage provider to get the HTTP endpoint that serves HTTP retrieval and make a 55 | piece retrieval for only first MiB of the piece. 56 | 57 | ### HTTP V2 58 | 59 | As part of the new HTTP retrieval push, we are proposing below changes to the HTTP retrieval testing (not implemented): 60 | 61 | 1. Connect to libp2p multiaddr of the provider that's published on the chain 62 | 2. Get HTTP multiaddr using /fil/retrieval/transport/1.0.0 protocol - The SP needs to handle this protocol and return 63 | HTTP endpoint. SP can use boost or other implementation that produces the same behavior 64 | 65 | #### Piece range retrieval 66 | 67 | 1. Use the `pieceCID` field of the deal proposal and make piece retrieval with the HTTP endpoint 68 | 2. Make range retrieval for the first 100 bytes and verify it is a valid CAR V1/V2 header 69 | * If it is a [CAR V2 header](https://ipld.io/specs/transport/car/carv2/#header), then check the `data_size` in the 70 | header to calculate how much padding has been used. In the next step, we only need to perform range retrieval 71 | between `[data_offset, data_offset + data_length]` 72 | 3. Make ranges retrieval for a random offset of that piece, up to 8MiB length 73 | * We check if retrieved data is all zeroes. Overtime, we will get a ratio of how much datacap is under utilized by 74 | padding data with zeroes 75 | * Try to find `[varint, CID, block, varint, CID]`. This is a valid IPLD data block. A valid IPLD block size is <= 76 | 4MiB so we should expect to get at least 77 | one [IPLD data block](https://ipld.io/specs/transport/car/carv1/#format-description) within that range 78 | * Calculate the compression ratio of the block bytes using zstd compression 79 | * High compression ratio / low entropy means the data is highly repetitive (i.e. repeating "hello world") 80 | * Low compression ratio / high entropy means the data is noisy (i.e. random bytes, already compressed or 81 | encrypted) 82 | * Useful data usually does not have an extremely high or low entropy and the compression ratio can be compared 83 | to the original data source 84 | 4. The purpose of this retrieval type is to make sure the clients are not padding too much zeroes or are actually 85 | storing data that is not useful. Since the retrieval is lightweight, most of the retrieval testing will be using this 86 | kind 87 | 88 | #### Whole piece retrieval 89 | 90 | 1. Retrieve the whole piece and verify `PieceCID` and `PieceSize` matches the deal proposal 91 | 2. The purpose of this retrieval is to make sure the HTTP endpoint is in fact serving the correct piece. This retrieval 92 | is more expensive and will be used very rarely 93 | 94 | #### File retrieval 95 | 96 | 1. If the client has provided a list of CIDs for files included in the dataset, we have the opportunity to retrieve the 97 | whole file 98 | 2. Retrieve the first 4k bytes of the file, check the file type 99 | using [libmagic](https://man7.org/linux/man-pages/man3/libmagic.3.html). Overtime, this will give us an overall 100 | picture of what types of files is this dataset composed of 101 | 3. With less sampling rate, retrieve the whole file and store it in an online storage (i.e. web3.storage). The file can 102 | be downloaded by notary to check the content 103 | 104 | ## Storage Provider Self-Testing 105 | It is possible to check how good you, as a storage provider is performing in terms of retrievability. 106 | 107 | You may run a one-off test with your storage provider by following below instruction: 108 | ``` 109 | go install github.com/data-preservation-programs/RetrievalBot/integration/oneoff@latest 110 | oneoff 111 | ``` 112 | 113 | This will fetch relevant information about your miner and the deal and run retrieval testing against all currently available protocols. 114 | 115 | The result will not be pushed to the reputation working group database. 116 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/data-preservation-programs/RetrievalBot 2 | 3 | go 1.20 4 | 5 | require ( 6 | github.com/bcicen/jstream v1.0.1 7 | github.com/filecoin-project/go-cbor-util v0.0.1 8 | github.com/filecoin-project/go-data-transfer/v2 v2.0.0-rc5 9 | github.com/filecoin-project/go-retrieval-types v1.2.0 10 | github.com/filecoin-project/go-state-types v0.10.0 11 | github.com/filecoin-project/lassie v0.8.1 12 | github.com/google/uuid v1.3.0 13 | github.com/hannahhoward/cbor-gen-for v0.0.0-20230214144701-5d17c9d5243c 14 | github.com/ipfs/go-cid v0.4.0 15 | github.com/ipfs/go-datastore v0.6.0 16 | github.com/ipfs/go-ipfs-blockstore v1.3.0 17 | github.com/ipfs/go-libipfs v0.6.1 18 | github.com/ipfs/go-log/v2 v2.5.1 19 | github.com/ipld/go-ipld-prime v0.20.1-0.20230329011551-5056175565b0 20 | github.com/jellydator/ttlcache/v3 v3.0.1 21 | github.com/joho/godotenv v1.5.1 22 | github.com/klauspost/compress v1.16.0 23 | github.com/libp2p/go-libp2p v0.26.4 24 | github.com/mitchellh/mapstructure v1.5.0 25 | github.com/multiformats/go-multiaddr v0.9.0 26 | github.com/multiformats/go-multistream v0.4.1 27 | github.com/pkg/errors v0.9.1 28 | github.com/rjNemo/underscore v0.6.1 29 | github.com/stretchr/testify v1.8.4 30 | github.com/urfave/cli/v2 v2.25.7 31 | github.com/whyrusleeping/cbor-gen v0.0.0-20230126041949-52956bd4c9aa 32 | github.com/ybbus/jsonrpc/v3 v3.1.4 33 | go.mongodb.org/mongo-driver v1.11.3 34 | golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1 35 | golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 36 | ) 37 | 38 | require ( 39 | contrib.go.opencensus.io/exporter/prometheus v0.4.2 // indirect 40 | github.com/benbjohnson/clock v1.3.0 // indirect 41 | github.com/beorn7/perks v1.0.1 // indirect 42 | github.com/bep/debounce v1.2.1 // indirect 43 | github.com/cespare/xxhash/v2 v2.2.0 // indirect 44 | github.com/containerd/cgroups v1.1.0 // indirect 45 | github.com/coreos/go-systemd/v22 v22.5.0 // indirect 46 | github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect 47 | github.com/cskr/pubsub v1.0.2 // indirect 48 | github.com/davecgh/go-spew v1.1.1 // indirect 49 | github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c // indirect 50 | github.com/decred/dcrd/dcrec/secp256k1/v4 v4.1.0 // indirect 51 | github.com/docker/go-units v0.5.0 // indirect 52 | github.com/dustin/go-humanize v1.0.1 // indirect 53 | github.com/elastic/gosigar v0.14.2 // indirect 54 | github.com/filecoin-project/go-address v1.1.0 // indirect 55 | github.com/filecoin-project/go-amt-ipld/v4 v4.1.0 // indirect 56 | github.com/filecoin-project/go-crypto v0.0.1 // indirect 57 | github.com/filecoin-project/go-ds-versioning v0.1.2 // indirect 58 | github.com/filecoin-project/go-hamt-ipld/v3 v3.2.0 // indirect 59 | github.com/filecoin-project/go-statemachine v1.0.3 // indirect 60 | github.com/filecoin-project/go-statestore v0.2.0 // indirect 61 | github.com/flynn/noise v1.0.0 // indirect 62 | github.com/francoispqt/gojay v1.2.13 // indirect 63 | github.com/go-kit/log v0.2.1 // indirect 64 | github.com/go-logfmt/logfmt v0.6.0 // indirect 65 | github.com/go-logr/logr v1.2.3 // indirect 66 | github.com/go-logr/stdr v1.2.2 // indirect 67 | github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0 // indirect 68 | github.com/godbus/dbus/v5 v5.1.0 // indirect 69 | github.com/gogo/protobuf v1.3.2 // indirect 70 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect 71 | github.com/golang/mock v1.6.0 // indirect 72 | github.com/golang/protobuf v1.5.2 // indirect 73 | github.com/golang/snappy v0.0.4 // indirect 74 | github.com/google/gopacket v1.1.19 // indirect 75 | github.com/google/pprof v0.0.0-20230222194610-99052d3372e7 // indirect 76 | github.com/hannahhoward/go-pubsub v1.0.0 // indirect 77 | github.com/hashicorp/errwrap v1.1.0 // indirect 78 | github.com/hashicorp/go-multierror v1.1.1 // indirect 79 | github.com/hashicorp/golang-lru v0.5.4 // indirect 80 | github.com/huin/goupnp v1.1.0 // indirect 81 | github.com/ipfs/bbloom v0.0.4 // indirect 82 | github.com/ipfs/go-bitfield v1.1.0 // indirect 83 | github.com/ipfs/go-block-format v0.1.1 // indirect 84 | github.com/ipfs/go-blockservice v0.5.0 // indirect 85 | github.com/ipfs/go-graphsync v0.14.4 // indirect 86 | github.com/ipfs/go-ipfs-delay v0.0.1 // indirect 87 | github.com/ipfs/go-ipfs-ds-help v1.1.0 // indirect 88 | github.com/ipfs/go-ipfs-exchange-interface v0.2.0 // indirect 89 | github.com/ipfs/go-ipfs-pq v0.0.3 // indirect 90 | github.com/ipfs/go-ipfs-util v0.0.2 // indirect 91 | github.com/ipfs/go-ipld-cbor v0.0.6 // indirect 92 | github.com/ipfs/go-ipld-format v0.4.0 // indirect 93 | github.com/ipfs/go-ipld-legacy v0.1.1 // indirect 94 | github.com/ipfs/go-log v1.0.5 // indirect 95 | github.com/ipfs/go-merkledag v0.10.0 // indirect 96 | github.com/ipfs/go-metrics-interface v0.0.1 // indirect 97 | github.com/ipfs/go-peertaskqueue v0.8.1 // indirect 98 | github.com/ipfs/go-unixfsnode v1.6.0 // indirect 99 | github.com/ipfs/go-verifcid v0.0.2 // indirect 100 | github.com/ipld/go-car/v2 v2.9.0 // indirect 101 | github.com/ipld/go-codec-dagpb v1.6.0 // indirect 102 | github.com/ipni/go-libipni v0.0.4 // indirect 103 | github.com/jackpal/go-nat-pmp v1.0.2 // indirect 104 | github.com/jbenet/go-temp-err-catcher v0.1.0 // indirect 105 | github.com/jbenet/goprocess v0.1.4 // indirect 106 | github.com/jpillora/backoff v1.0.0 // indirect 107 | github.com/json-iterator/go v1.1.12 // indirect 108 | github.com/klauspost/cpuid/v2 v2.2.4 // indirect 109 | github.com/koron/go-ssdp v0.0.4 // indirect 110 | github.com/libp2p/go-buffer-pool v0.1.0 // indirect 111 | github.com/libp2p/go-cidranger v1.1.0 // indirect 112 | github.com/libp2p/go-flow-metrics v0.1.0 // indirect 113 | github.com/libp2p/go-libp2p-asn-util v0.3.0 // indirect 114 | github.com/libp2p/go-libp2p-record v0.2.0 // indirect 115 | github.com/libp2p/go-libp2p-routing-helpers v0.6.1 // indirect 116 | github.com/libp2p/go-mplex v0.7.0 // indirect 117 | github.com/libp2p/go-msgio v0.3.0 // indirect 118 | github.com/libp2p/go-nat v0.1.0 // indirect 119 | github.com/libp2p/go-netroute v0.2.1 // indirect 120 | github.com/libp2p/go-reuseport v0.2.0 // indirect 121 | github.com/libp2p/go-yamux/v4 v4.0.0 // indirect 122 | github.com/marten-seemann/tcp v0.0.0-20210406111302-dfbc87cc63fd // indirect 123 | github.com/mattn/go-isatty v0.0.17 // indirect 124 | github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect 125 | github.com/miekg/dns v1.1.51 // indirect 126 | github.com/mikioh/tcpinfo v0.0.0-20190314235526-30a79bb1804b // indirect 127 | github.com/mikioh/tcpopt v0.0.0-20190314235656-172688c1accc // indirect 128 | github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1 // indirect 129 | github.com/minio/sha256-simd v1.0.0 // indirect 130 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 131 | github.com/modern-go/reflect2 v1.0.2 // indirect 132 | github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe // indirect 133 | github.com/mr-tron/base58 v1.2.0 // indirect 134 | github.com/multiformats/go-base32 v0.1.0 // indirect 135 | github.com/multiformats/go-base36 v0.2.0 // indirect 136 | github.com/multiformats/go-multiaddr-dns v0.3.1 // indirect 137 | github.com/multiformats/go-multiaddr-fmt v0.1.0 // indirect 138 | github.com/multiformats/go-multibase v0.1.1 // indirect 139 | github.com/multiformats/go-multicodec v0.8.1 // indirect 140 | github.com/multiformats/go-multihash v0.2.1 // indirect 141 | github.com/multiformats/go-varint v0.0.7 // indirect 142 | github.com/onsi/ginkgo/v2 v2.8.4 // indirect 143 | github.com/opencontainers/runtime-spec v1.0.2 // indirect 144 | github.com/opentracing/opentracing-go v1.2.0 // indirect 145 | github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect 146 | github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 // indirect 147 | github.com/pmezard/go-difflib v1.0.0 // indirect 148 | github.com/polydawn/refmt v0.89.0 // indirect 149 | github.com/prometheus/client_golang v1.14.0 // indirect 150 | github.com/prometheus/client_model v0.3.0 // indirect 151 | github.com/prometheus/common v0.40.0 // indirect 152 | github.com/prometheus/procfs v0.9.0 // indirect 153 | github.com/prometheus/statsd_exporter v0.23.0 // indirect 154 | github.com/quic-go/qpack v0.4.0 // indirect 155 | github.com/quic-go/qtls-go1-19 v0.2.1 // indirect 156 | github.com/quic-go/qtls-go1-20 v0.1.1 // indirect 157 | github.com/quic-go/quic-go v0.33.0 // indirect 158 | github.com/quic-go/webtransport-go v0.5.2 // indirect 159 | github.com/raulk/go-watchdog v1.3.0 // indirect 160 | github.com/russross/blackfriday/v2 v2.1.0 // indirect 161 | github.com/spaolacci/murmur3 v1.1.0 // indirect 162 | github.com/ugorji/go/codec v1.2.6 // indirect 163 | github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11 // indirect 164 | github.com/xdg-go/pbkdf2 v1.0.0 // indirect 165 | github.com/xdg-go/scram v1.1.1 // indirect 166 | github.com/xdg-go/stringprep v1.0.3 // indirect 167 | github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect 168 | github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d // indirect 169 | go.opencensus.io v0.24.0 // indirect 170 | go.opentelemetry.io/otel v1.14.0 // indirect 171 | go.opentelemetry.io/otel/trace v1.14.0 // indirect 172 | go.uber.org/atomic v1.10.0 // indirect 173 | go.uber.org/dig v1.16.1 // indirect 174 | go.uber.org/fx v1.19.2 // indirect 175 | go.uber.org/multierr v1.9.0 // indirect 176 | go.uber.org/zap v1.24.0 // indirect 177 | golang.org/x/crypto v0.6.0 // indirect 178 | golang.org/x/mod v0.8.0 // indirect 179 | golang.org/x/net v0.7.0 // indirect 180 | golang.org/x/sync v0.1.0 // indirect 181 | golang.org/x/sys v0.5.0 // indirect 182 | golang.org/x/text v0.7.0 // indirect 183 | golang.org/x/tools v0.6.0 // indirect 184 | google.golang.org/protobuf v1.28.1 // indirect 185 | gopkg.in/yaml.v2 v2.4.0 // indirect 186 | gopkg.in/yaml.v3 v3.0.1 // indirect 187 | lukechampine.com/blake3 v1.1.7 // indirect 188 | nhooyr.io/websocket v1.8.7 // indirect 189 | ) 190 | -------------------------------------------------------------------------------- /integration/filplus/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "github.com/data-preservation-programs/RetrievalBot/integration/filplus/util" 8 | "github.com/data-preservation-programs/RetrievalBot/pkg/env" 9 | "github.com/data-preservation-programs/RetrievalBot/pkg/model" 10 | "github.com/data-preservation-programs/RetrievalBot/pkg/resolver" 11 | "github.com/data-preservation-programs/RetrievalBot/pkg/task" 12 | logging "github.com/ipfs/go-log/v2" 13 | _ "github.com/joho/godotenv/autoload" 14 | "github.com/pkg/errors" 15 | "go.mongodb.org/mongo-driver/bson" 16 | "go.mongodb.org/mongo-driver/mongo" 17 | "go.mongodb.org/mongo-driver/mongo/options" 18 | ) 19 | 20 | var logger = logging.Logger("filplus-integration") 21 | 22 | func main() { 23 | filplus := NewFilPlusIntegration() 24 | for { 25 | err := filplus.RunOnce(context.TODO()) 26 | if err != nil { 27 | logger.Error(err) 28 | } 29 | 30 | time.Sleep(time.Minute) 31 | } 32 | } 33 | 34 | type TotalPerClient struct { 35 | Client string `bson:"_id"` 36 | Total int64 `bson:"total"` 37 | } 38 | 39 | type FilPlusIntegration struct { 40 | taskCollection *mongo.Collection 41 | marketDealsCollection *mongo.Collection 42 | resultCollection *mongo.Collection 43 | batchSize int 44 | requester string 45 | locationResolver resolver.LocationResolver 46 | providerResolver resolver.ProviderResolver 47 | ipInfo resolver.IPInfo 48 | randConst float64 49 | } 50 | 51 | func GetTotalPerClient(ctx context.Context, marketDealsCollection *mongo.Collection) (map[string]int64, error) { 52 | var result []TotalPerClient 53 | agg, err := marketDealsCollection.Aggregate(ctx, []bson.M{ 54 | {"$match": bson.M{ 55 | "sector_start": bson.M{"$gt": 0}, 56 | "end": bson.M{"$gt": model.TimeToEpoch(time.Now())}, 57 | "verified": true, 58 | "slashed": bson.M{"$lt": 0}, 59 | }}, 60 | { 61 | "$group": bson.M{ 62 | "_id": "$client", 63 | "total": bson.M{ 64 | "$sum": "$piece_size", 65 | }, 66 | }, 67 | }, 68 | }) 69 | if err != nil { 70 | return nil, errors.Wrap(err, "failed to aggregate market deals") 71 | } 72 | 73 | err = agg.All(ctx, &result) 74 | if err != nil { 75 | return nil, errors.Wrap(err, "failed to decode market deals") 76 | } 77 | 78 | totalPerClient := make(map[string]int64) 79 | for _, r := range result { 80 | totalPerClient[r.Client] = r.Total 81 | } 82 | 83 | return totalPerClient, nil 84 | } 85 | 86 | func NewFilPlusIntegration() *FilPlusIntegration { 87 | ctx := context.Background() 88 | taskClient, err := mongo. 89 | Connect(ctx, options.Client().ApplyURI(env.GetRequiredString(env.QueueMongoURI))) 90 | if err != nil { 91 | panic(err) 92 | } 93 | taskCollection := taskClient. 94 | Database(env.GetRequiredString(env.QueueMongoDatabase)).Collection("task_queue") 95 | 96 | stateMarketDealsClient, err := mongo. 97 | Connect(ctx, options.Client().ApplyURI(env.GetRequiredString(env.StatemarketdealsMongoURI))) 98 | if err != nil { 99 | panic(err) 100 | } 101 | marketDealsCollection := stateMarketDealsClient. 102 | Database(env.GetRequiredString(env.StatemarketdealsMongoDatabase)). 103 | Collection("state_market_deals") 104 | 105 | resultClient, err := mongo.Connect(ctx, options.Client().ApplyURI(env.GetRequiredString(env.ResultMongoURI))) 106 | if err != nil { 107 | panic(err) 108 | } 109 | resultCollection := resultClient. 110 | Database(env.GetRequiredString(env.ResultMongoDatabase)). 111 | Collection("task_result") 112 | 113 | batchSize := env.GetInt(env.FilplusIntegrationBatchSize, 100) 114 | providerCacheTTL := env.GetDuration(env.ProviderCacheTTL, 24*time.Hour) 115 | locationCacheTTL := env.GetDuration(env.LocationCacheTTL, 24*time.Hour) 116 | locationResolver := resolver.NewLocationResolver(env.GetRequiredString(env.IPInfoToken), locationCacheTTL) 117 | providerResolver, err := resolver.NewProviderResolver( 118 | env.GetString(env.LotusAPIUrl, "https://api.node.glif.io/rpc/v0"), 119 | env.GetString(env.LotusAPIToken, ""), 120 | providerCacheTTL) 121 | if err != nil { 122 | panic(err) 123 | } 124 | 125 | // Check public IP address 126 | ipInfo, err := resolver.GetPublicIPInfo(ctx, "", "") 127 | if err != nil { 128 | panic(err) 129 | } 130 | 131 | logger.With("ipinfo", ipInfo).Infof("Public IP info retrieved") 132 | 133 | return &FilPlusIntegration{ 134 | taskCollection: taskCollection, 135 | marketDealsCollection: marketDealsCollection, 136 | batchSize: batchSize, 137 | requester: "filplus", 138 | locationResolver: locationResolver, 139 | providerResolver: *providerResolver, 140 | resultCollection: resultCollection, 141 | ipInfo: ipInfo, 142 | randConst: env.GetFloat64(env.FilplusIntegrationRandConst, 4.0), 143 | } 144 | } 145 | 146 | func (f *FilPlusIntegration) RunOnce(ctx context.Context) error { 147 | logger.Info("start running filplus integration") 148 | 149 | // If the task queue already have batch size tasks, do nothing 150 | count, err := f.taskCollection.CountDocuments(ctx, bson.M{"requester": f.requester}) 151 | if err != nil { 152 | return errors.Wrap(err, "failed to count tasks") 153 | } 154 | 155 | logger.With("count", count).Info("Current number of tasks in the queue") 156 | 157 | if count > int64(f.batchSize) { 158 | logger.Infof("task queue still have %d tasks, do nothing", count) 159 | 160 | /* Remove old tasks that has stayed in the queue for too long 161 | _, err = f.taskCollection.DeleteMany(ctx, 162 | bson.M{"requester": f.requester, "created_at": bson.M{"$lt": time.Now().UTC().Add(-24 * time.Hour)}}) 163 | if err != nil { 164 | return errors.Wrap(err, "failed to remove old tasks") 165 | } 166 | */ 167 | return nil 168 | } 169 | 170 | totalPerClient, err := GetTotalPerClient(ctx, f.marketDealsCollection) 171 | if err != nil { 172 | return errors.Wrap(err, "failed to get total per client") 173 | } 174 | 175 | // Get random documents from state_market_deals that are still active and is verified 176 | aggregateResult, err := f.marketDealsCollection.Aggregate(ctx, bson.A{ 177 | bson.M{"$sample": bson.M{"size": f.batchSize}}, 178 | bson.M{"$match": bson.M{ 179 | "sector_start": bson.M{"$gt": 0}, 180 | "end": bson.M{"$gt": model.TimeToEpoch(time.Now())}, 181 | "verified": true, 182 | "slashed": bson.M{"$lt": 0}, 183 | }}, 184 | }) 185 | 186 | if err != nil { 187 | return errors.Wrap(err, "failed to get sample documents") 188 | } 189 | 190 | var documents []model.DealState 191 | err = aggregateResult.All(ctx, &documents) 192 | if err != nil { 193 | return errors.Wrap(err, "failed to decode documents") 194 | } 195 | 196 | documents = RandomObjects(documents, len(documents)/2, f.randConst, totalPerClient) 197 | tasks, results := util.AddTasks(ctx, f.requester, f.ipInfo, documents, f.locationResolver, f.providerResolver) 198 | 199 | if len(tasks) > 0 { 200 | _, err = f.taskCollection.InsertMany(ctx, tasks) 201 | if err != nil { 202 | return errors.Wrap(err, "failed to insert tasks") 203 | } 204 | } 205 | 206 | logger.With("count", len(tasks)).Info("inserted tasks") 207 | 208 | countPerCountry := make(map[string]int) 209 | countPerContinent := make(map[string]int) 210 | countPerModule := make(map[task.ModuleName]int) 211 | for _, t := range tasks { 212 | //nolint:forcetypeassert 213 | tsk := t.(task.Task) 214 | country := tsk.Provider.Country 215 | continent := tsk.Provider.Continent 216 | module := tsk.Module 217 | countPerCountry[country]++ 218 | countPerContinent[continent]++ 219 | countPerModule[module]++ 220 | } 221 | 222 | for country, count := range countPerCountry { 223 | logger.With("country", country, "count", count).Info("tasks per country") 224 | } 225 | 226 | for continent, count := range countPerContinent { 227 | logger.With("continent", continent, "count", count).Info("tasks per continent") 228 | } 229 | 230 | for module, count := range countPerModule { 231 | logger.With("module", module, "count", count).Info("tasks per module") 232 | } 233 | 234 | if len(results) > 0 { 235 | _, err = f.resultCollection.InsertMany(ctx, results) 236 | if err != nil { 237 | return errors.Wrap(err, "failed to insert results") 238 | } 239 | } 240 | 241 | logger.With("count", len(results)).Info("inserted results") 242 | 243 | return nil 244 | } 245 | -------------------------------------------------------------------------------- /integration/filplus/rand.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "math" 5 | "math/rand" 6 | 7 | "github.com/data-preservation-programs/RetrievalBot/pkg/model" 8 | ) 9 | 10 | func weight(obj model.DealState, c float64, totalPerClient map[string]int64) float64 { 11 | total, ok := totalPerClient[obj.Client] 12 | if !ok { 13 | return 0 14 | } 15 | return math.Pow(c, -obj.AgeInYears()) * float64(obj.PieceSize) / math.Sqrt(float64(total)) 16 | } 17 | 18 | // RandomObjects Select l random objects from x with probability c^(-x[i].AgeInYears()). 19 | func RandomObjects(x []model.DealState, l int, c float64, totalPerClient map[string]int64) []model.DealState { 20 | // Calculate the sum of C^age for all objects. 21 | var sum float64 22 | for _, obj := range x { 23 | sum += weight(obj, c, totalPerClient) 24 | } 25 | 26 | // Select Y random objects. 27 | selected := make(map[int32]bool) 28 | var results []model.DealState 29 | for i := 0; i < l; i++ { 30 | // Generate a random number between 0 and the sum. 31 | // nolint:gosec 32 | randNum := rand.Float64() * sum 33 | 34 | // Iterate over the objects and subtract C^age from randNum. 35 | for _, obj := range x { 36 | if selected[obj.DealID] { 37 | // Skip objects that have already been selected. 38 | continue 39 | } 40 | randNum -= weight(obj, c, totalPerClient) 41 | if randNum <= 0 { 42 | // Add the current object to the selected list. 43 | results = append(results, obj) 44 | selected[obj.DealID] = true 45 | break 46 | } 47 | } 48 | } 49 | 50 | return results 51 | } 52 | -------------------------------------------------------------------------------- /integration/filplus/rand_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/data-preservation-programs/RetrievalBot/pkg/model" 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func TestWeight(t *testing.T) { 12 | now := model.TimeToEpoch(time.Now()) 13 | objects := []model.DealState{ 14 | {DealID: 1, SectorStart: now, PieceSize: 100, Client: "a"}, 15 | {DealID: 2, SectorStart: now, PieceSize: 200, Client: "a"}, 16 | {DealID: 3, SectorStart: model.TimeToEpoch(time.Now().Add(-24 * 365 * time.Hour)), PieceSize: 100, Client: "a"}, 17 | {DealID: 4, SectorStart: now, PieceSize: 100, Client: "b"}, 18 | {DealID: 5, SectorStart: now, PieceSize: 100, Client: "c"}, 19 | } 20 | clients := map[string]int64{ 21 | "a": 16, 22 | "b": 1600, 23 | "c": 160000, 24 | } 25 | assert.InDelta(t, 25, weight(objects[0], 2, clients), 0.1) 26 | assert.InDelta(t, 50, weight(objects[1], 2, clients), 0.1) 27 | assert.InDelta(t, 12.5, weight(objects[2], 2, clients), 0.1) 28 | assert.InDelta(t, 2.5, weight(objects[3], 2, clients), 0.1) 29 | assert.InDelta(t, 0.25, weight(objects[4], 2, clients), 0.1) 30 | } 31 | 32 | func TestRandomObjects(t *testing.T) { 33 | // Create a list of MyObject. 34 | objects := []model.DealState{ 35 | {DealID: 1, SectorStart: model.TimeToEpoch(time.Now()), PieceSize: 1, Client: "a"}, 36 | {DealID: 2, SectorStart: model.TimeToEpoch(time.Now()), PieceSize: 1, Client: "a"}, 37 | {DealID: 3, SectorStart: model.TimeToEpoch(time.Now()), PieceSize: 1, Client: "a"}, 38 | {DealID: 4, SectorStart: model.TimeToEpoch(time.Now()), PieceSize: 1, Client: "a"}, 39 | {DealID: 5, SectorStart: model.TimeToEpoch(time.Now()), PieceSize: 1, Client: "a"}, 40 | {DealID: 6, SectorStart: model.TimeToEpoch(time.Now()), PieceSize: 1, Client: "a"}, 41 | {DealID: 7, SectorStart: model.TimeToEpoch(time.Now()), PieceSize: 1, Client: "a"}, 42 | {DealID: 8, SectorStart: model.TimeToEpoch(time.Now()), PieceSize: 1, Client: "a"}, 43 | {DealID: 9, SectorStart: model.TimeToEpoch(time.Now()), PieceSize: 1, Client: "a"}, 44 | {DealID: 10, SectorStart: model.TimeToEpoch(time.Now()), PieceSize: 1, Client: "a"}, 45 | {DealID: 11, SectorStart: model.TimeToEpoch(time.Now().Add(-24 * 365 * time.Hour)), PieceSize: 1, Client: "a"}, 46 | {DealID: 12, SectorStart: model.TimeToEpoch(time.Now().Add(-24 * 365 * time.Hour)), PieceSize: 1, Client: "a"}, 47 | {DealID: 13, SectorStart: model.TimeToEpoch(time.Now().Add(-24 * 365 * time.Hour)), PieceSize: 1, Client: "a"}, 48 | {DealID: 14, SectorStart: model.TimeToEpoch(time.Now().Add(-24 * 365 * time.Hour)), PieceSize: 1, Client: "a"}, 49 | {DealID: 15, SectorStart: model.TimeToEpoch(time.Now().Add(-24 * 365 * time.Hour)), PieceSize: 1, Client: "a"}, 50 | {DealID: 16, SectorStart: model.TimeToEpoch(time.Now().Add(-24 * 365 * time.Hour)), PieceSize: 1, Client: "a"}, 51 | {DealID: 17, SectorStart: model.TimeToEpoch(time.Now().Add(-24 * 365 * time.Hour)), PieceSize: 1, Client: "a"}, 52 | {DealID: 18, SectorStart: model.TimeToEpoch(time.Now().Add(-24 * 365 * time.Hour)), PieceSize: 1, Client: "a"}, 53 | {DealID: 19, SectorStart: model.TimeToEpoch(time.Now().Add(-24 * 365 * time.Hour)), PieceSize: 1, Client: "a"}, 54 | {DealID: 20, SectorStart: model.TimeToEpoch(time.Now().Add(-24 * 365 * time.Hour)), PieceSize: 1, Client: "a"}, 55 | {DealID: 21, SectorStart: model.TimeToEpoch(time.Now().Add(-2 * 24 * 365 * time.Hour)), PieceSize: 1, Client: "a"}, 56 | {DealID: 22, SectorStart: model.TimeToEpoch(time.Now().Add(-2 * 24 * 365 * time.Hour)), PieceSize: 1, Client: "a"}, 57 | {DealID: 23, SectorStart: model.TimeToEpoch(time.Now().Add(-2 * 24 * 365 * time.Hour)), PieceSize: 1, Client: "a"}, 58 | {DealID: 24, SectorStart: model.TimeToEpoch(time.Now().Add(-2 * 24 * 365 * time.Hour)), PieceSize: 1, Client: "a"}, 59 | {DealID: 25, SectorStart: model.TimeToEpoch(time.Now().Add(-2 * 24 * 365 * time.Hour)), PieceSize: 1, Client: "a"}, 60 | {DealID: 26, SectorStart: model.TimeToEpoch(time.Now().Add(-2 * 24 * 365 * time.Hour)), PieceSize: 1, Client: "a"}, 61 | {DealID: 27, SectorStart: model.TimeToEpoch(time.Now().Add(-2 * 24 * 365 * time.Hour)), PieceSize: 1, Client: "a"}, 62 | {DealID: 28, SectorStart: model.TimeToEpoch(time.Now().Add(-2 * 24 * 365 * time.Hour)), PieceSize: 1, Client: "a"}, 63 | {DealID: 29, SectorStart: model.TimeToEpoch(time.Now().Add(-2 * 24 * 365 * time.Hour)), PieceSize: 1, Client: "a"}, 64 | {DealID: 30, SectorStart: model.TimeToEpoch(time.Now().Add(-2 * 24 * 365 * time.Hour)), PieceSize: 1, Client: "a"}, 65 | } 66 | 67 | // Select 5 random objects with C = 2. 68 | selected := RandomObjects(objects, 15, 2.0, map[string]int64{"a": 1}) 69 | 70 | // Check that the selected objects are distinct. 71 | selectedMap := make(map[int32]bool) 72 | for _, obj := range selected { 73 | if selectedMap[obj.DealID] { 74 | t.Errorf("Selected duplicate object with deal id %d", obj.DealID) 75 | } 76 | selectedMap[obj.DealID] = true 77 | } 78 | 79 | // Print the objects 80 | for _, obj := range selected { 81 | t.Logf("Selected object with deal id %d", obj.DealID) 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /integration/filplus/util/util.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "context" 5 | "strconv" 6 | "time" 7 | 8 | "github.com/data-preservation-programs/RetrievalBot/pkg/convert" 9 | "github.com/data-preservation-programs/RetrievalBot/pkg/env" 10 | "github.com/data-preservation-programs/RetrievalBot/pkg/model" 11 | "github.com/data-preservation-programs/RetrievalBot/pkg/requesterror" 12 | "github.com/data-preservation-programs/RetrievalBot/pkg/resolver" 13 | "github.com/data-preservation-programs/RetrievalBot/pkg/task" 14 | "github.com/ipfs/go-cid" 15 | logging "github.com/ipfs/go-log/v2" 16 | "github.com/libp2p/go-libp2p/core/peer" 17 | "github.com/pkg/errors" 18 | "golang.org/x/exp/slices" 19 | ) 20 | 21 | var logger = logging.Logger("addTasks") 22 | 23 | //nolint:nonamedreturns 24 | func AddTasks(ctx context.Context, 25 | requester string, 26 | ipInfo resolver.IPInfo, 27 | documents []model.DealState, 28 | locationResolver resolver.LocationResolver, 29 | providerResolver resolver.ProviderResolver) (tasks []interface{}, results []interface{}) { 30 | // Insert the documents into task queue 31 | for _, document := range documents { 32 | // If the label is a correct CID, assume it is the payload CID and try GraphSync and Bitswap retrieval 33 | labelCID, err := cid.Decode(document.Label) 34 | if err != nil { 35 | logger.With("label", document.Label, "deal_id", document.DealID). 36 | Debug("failed to decode label as CID") 37 | continue 38 | } 39 | 40 | isPayloadCID := true 41 | // Skip graphsync and bitswap if the cid is not decodable, i.e. it is a pieceCID 42 | if !slices.Contains([]uint64{cid.Raw, cid.DagCBOR, cid.DagProtobuf, cid.DagJSON, cid.DagJOSE}, 43 | labelCID.Prefix().Codec) { 44 | logger.With("provider", document.Provider, "deal_id", document.DealID, 45 | "label", document.Label, "codec", labelCID.Prefix().Codec). 46 | Info("Skip Bitswap and Graphsync because the Label is likely not a payload CID") 47 | isPayloadCID = false 48 | } 49 | 50 | providerInfo, err := providerResolver.ResolveProvider(ctx, document.Provider) 51 | if err != nil { 52 | logger.With("provider", document.Provider, "deal_id", document.DealID). 53 | Error("failed to resolve provider") 54 | continue 55 | } 56 | 57 | location, err := locationResolver.ResolveMultiaddrsBytes(ctx, providerInfo.Multiaddrs) 58 | if err != nil { 59 | if errors.As(err, &requesterror.BogonIPError{}) || 60 | errors.As(err, &requesterror.InvalidIPError{}) || 61 | errors.As(err, &requesterror.HostLookupError{}) || 62 | errors.As(err, &requesterror.NoValidMultiAddrError{}) { 63 | results = addErrorResults(requester, ipInfo, results, document, providerInfo, location, 64 | task.NoValidMultiAddrs, err.Error()) 65 | } else { 66 | logger.With("provider", document.Provider, "deal_id", document.DealID, "err", err). 67 | Error("failed to resolve provider location") 68 | } 69 | continue 70 | } 71 | 72 | _, err = peer.Decode(providerInfo.PeerId) 73 | if err != nil { 74 | logger.With("provider", document.Provider, "deal_id", document.DealID, "peerID", providerInfo.PeerId, 75 | "err", err). 76 | Info("failed to decode peerID") 77 | results = addErrorResults(requester, ipInfo, results, document, providerInfo, location, 78 | task.InvalidPeerID, err.Error()) 79 | continue 80 | } 81 | 82 | if isPayloadCID { 83 | for _, module := range []task.ModuleName{task.GraphSync, task.Bitswap} { 84 | tasks = append(tasks, task.Task{ 85 | Requester: requester, 86 | Module: module, 87 | Metadata: map[string]string{ 88 | "deal_id": strconv.Itoa(int(document.DealID)), 89 | "client": document.Client, 90 | "assume_label": "true", 91 | "retrieve_type": "root_block"}, 92 | Provider: task.Provider{ 93 | ID: document.Provider, 94 | PeerID: providerInfo.PeerId, 95 | Multiaddrs: convert.MultiaddrsBytesToStringArraySkippingError(providerInfo.Multiaddrs), 96 | City: location.City, 97 | Region: location.Region, 98 | Country: location.Country, 99 | Continent: location.Continent, 100 | }, 101 | Content: task.Content{ 102 | CID: document.Label, 103 | }, 104 | CreatedAt: time.Now().UTC(), 105 | Timeout: env.GetDuration(env.FilplusIntegrationTaskTimeout, 15*time.Second), 106 | }) 107 | } 108 | } 109 | 110 | tasks = append(tasks, task.Task{ 111 | Requester: requester, 112 | Module: task.HTTP, 113 | Metadata: map[string]string{ 114 | "deal_id": strconv.Itoa(int(document.DealID)), 115 | "client": document.Client, 116 | "retrieve_type": "piece", 117 | "retrieve_size": "1048576"}, 118 | Provider: task.Provider{ 119 | ID: document.Provider, 120 | PeerID: providerInfo.PeerId, 121 | Multiaddrs: convert.MultiaddrsBytesToStringArraySkippingError(providerInfo.Multiaddrs), 122 | City: location.City, 123 | Region: location.Region, 124 | Country: location.Country, 125 | Continent: location.Continent, 126 | }, 127 | Content: task.Content{ 128 | CID: document.PieceCID, 129 | }, 130 | CreatedAt: time.Now().UTC(), 131 | Timeout: env.GetDuration(env.FilplusIntegrationTaskTimeout, 15*time.Second), 132 | }) 133 | } 134 | logger.With("count", len(tasks)).Info("inserted tasks") 135 | //nolint:nakedret 136 | return 137 | } 138 | 139 | var moduleMetadataMap = map[task.ModuleName]map[string]string{ 140 | task.GraphSync: { 141 | "assume_label": "true", 142 | "retrieve_type": "root_block", 143 | }, 144 | task.Bitswap: { 145 | "assume_label": "true", 146 | "retrieve_type": "root_block", 147 | }, 148 | task.HTTP: { 149 | "retrieve_type": "piece", 150 | "retrieve_size": "1048576", 151 | }, 152 | } 153 | 154 | func addErrorResults( 155 | requester string, 156 | ipInfo resolver.IPInfo, 157 | results []interface{}, 158 | document model.DealState, 159 | providerInfo resolver.MinerInfo, 160 | location resolver.IPInfo, 161 | errorCode task.ErrorCode, 162 | errorMessage string, 163 | ) []interface{} { 164 | for module, metadata := range moduleMetadataMap { 165 | newMetadata := make(map[string]string) 166 | for k, v := range metadata { 167 | newMetadata[k] = v 168 | } 169 | newMetadata["deal_id"] = strconv.Itoa(int(document.DealID)) 170 | newMetadata["client"] = document.Client 171 | results = append(results, task.Result{ 172 | Task: task.Task{ 173 | Requester: requester, 174 | Module: module, 175 | Metadata: newMetadata, 176 | Provider: task.Provider{ 177 | ID: document.Provider, 178 | PeerID: providerInfo.PeerId, 179 | Multiaddrs: convert.MultiaddrsBytesToStringArraySkippingError(providerInfo.Multiaddrs), 180 | City: location.City, 181 | Region: location.Region, 182 | Country: location.Country, 183 | Continent: location.Continent, 184 | }, 185 | Content: task.Content{ 186 | CID: document.Label, 187 | }, 188 | CreatedAt: time.Now().UTC(), 189 | Timeout: env.GetDuration(env.FilplusIntegrationTaskTimeout, 15*time.Second)}, 190 | Retriever: task.Retriever{ 191 | PublicIP: ipInfo.IP, 192 | City: ipInfo.City, 193 | Region: ipInfo.Region, 194 | Country: ipInfo.Country, 195 | Continent: ipInfo.Continent, 196 | ASN: ipInfo.ASN, 197 | ISP: ipInfo.ISP, 198 | Latitude: ipInfo.Latitude, 199 | Longitude: ipInfo.Longitude, 200 | }, 201 | Result: task.RetrievalResult{ 202 | Success: false, 203 | ErrorCode: errorCode, 204 | ErrorMessage: errorMessage, 205 | TTFB: 0, 206 | Speed: 0, 207 | Duration: 0, 208 | Downloaded: 0, 209 | }, 210 | CreatedAt: time.Now().UTC(), 211 | }) 212 | } 213 | return results 214 | } 215 | -------------------------------------------------------------------------------- /integration/oneoff/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "strconv" 7 | "time" 8 | 9 | "github.com/data-preservation-programs/RetrievalBot/integration/filplus/util" 10 | "github.com/data-preservation-programs/RetrievalBot/pkg/model" 11 | "github.com/data-preservation-programs/RetrievalBot/pkg/model/rpc" 12 | "github.com/data-preservation-programs/RetrievalBot/pkg/resolver" 13 | "github.com/data-preservation-programs/RetrievalBot/pkg/task" 14 | "github.com/data-preservation-programs/RetrievalBot/worker/bitswap" 15 | "github.com/data-preservation-programs/RetrievalBot/worker/graphsync" 16 | "github.com/data-preservation-programs/RetrievalBot/worker/http" 17 | _ "github.com/joho/godotenv/autoload" 18 | "github.com/pkg/errors" 19 | "github.com/urfave/cli/v2" 20 | "github.com/ybbus/jsonrpc/v3" 21 | ) 22 | 23 | //nolint:forbidigo,forcetypeassert,exhaustive 24 | func main() { 25 | app := &cli.App{ 26 | Name: "oneoff", 27 | Usage: "make a simple oneoff task that works with filplus tests", 28 | ArgsUsage: "providerID dealID", 29 | Action: func(cctx *cli.Context) error { 30 | ctx := cctx.Context 31 | providerID := cctx.Args().Get(0) 32 | dealIDStr := cctx.Args().Get(1) 33 | dealID, err := strconv.ParseUint(dealIDStr, 10, 32) 34 | if err != nil { 35 | return errors.Wrap(err, "failed to parse dealID") 36 | } 37 | providerResolver, err := resolver.NewProviderResolver( 38 | "https://api.node.glif.io/rpc/v0", 39 | "", 40 | time.Minute, 41 | ) 42 | if err != nil { 43 | return errors.Wrap(err, "failed to create provider resolver") 44 | } 45 | 46 | providerInfo, err := providerResolver.ResolveProvider(ctx, providerID) 47 | if err != nil { 48 | return errors.Wrap(err, "failed to resolve provider") 49 | } 50 | 51 | locationResolver := resolver.NewLocationResolver("", time.Minute) 52 | _, err = locationResolver.ResolveMultiaddrsBytes(ctx, providerInfo.Multiaddrs) 53 | if err != nil { 54 | return errors.Wrap(err, "failed to resolve location") 55 | } 56 | 57 | ipInfo, err := resolver.GetPublicIPInfo(ctx, "", "") 58 | if err != nil { 59 | panic(err) 60 | } 61 | 62 | lotusClient := jsonrpc.NewClient("https://api.node.glif.io") 63 | var deal rpc.Deal 64 | err = lotusClient.CallFor(ctx, &deal, "Filecoin.StateMarketStorageDeal", dealID, nil) 65 | if err != nil { 66 | return errors.Wrap(err, "failed to get deal") 67 | } 68 | 69 | dealStates := []model.DealState{ 70 | { 71 | DealID: int32(dealID), 72 | PieceCID: deal.Proposal.PieceCID.Root, 73 | PieceSize: deal.Proposal.PieceSize, 74 | Label: deal.Proposal.Label, 75 | Verified: deal.Proposal.VerifiedDeal, 76 | Client: deal.Proposal.Client, 77 | Provider: deal.Proposal.Provider, 78 | Start: deal.Proposal.StartEpoch, 79 | End: deal.Proposal.EndEpoch, 80 | SectorStart: deal.State.SectorStartEpoch, 81 | Slashed: deal.State.SlashEpoch, 82 | LastUpdated: deal.State.LastUpdatedEpoch, 83 | }, 84 | } 85 | tasks, results := util.AddTasks(ctx, "oneoff", ipInfo, dealStates, locationResolver, *providerResolver) 86 | if len(results) > 0 { 87 | fmt.Println("Errors encountered when creating tasks:") 88 | for _, result := range results { 89 | r := result.(task.Result) 90 | fmt.Println(r) 91 | } 92 | } 93 | if len(tasks) > 0 { 94 | fmt.Println("Retrieval Test Results:") 95 | for _, tsk := range tasks { 96 | t := tsk.(task.Task) 97 | var result *task.RetrievalResult 98 | fmt.Printf(" -- Test %s --\n", t.Module) 99 | switch t.Module { 100 | case "graphsync": 101 | result, err = graphsync.Worker{}.DoWork(t) 102 | case "http": 103 | result, err = http.Worker{}.DoWork(t) 104 | case "bitswap": 105 | result, err = bitswap.Worker{}.DoWork(t) 106 | } 107 | if err != nil { 108 | fmt.Printf("Error: %s\n", err) 109 | } else { 110 | fmt.Printf("Success: %v\n", result) 111 | } 112 | } 113 | } 114 | return nil 115 | }, 116 | } 117 | 118 | if err := app.Run(os.Args); err != nil { 119 | panic(err) 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /integration/repdao/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "os" 7 | "time" 8 | 9 | _ "github.com/joho/godotenv/autoload" 10 | "go.mongodb.org/mongo-driver/bson" 11 | "go.mongodb.org/mongo-driver/mongo" 12 | "go.mongodb.org/mongo-driver/mongo/options" 13 | ) 14 | 15 | type DailyStats struct { 16 | ProviderID string `bson:"provider_id"` 17 | Date string `bson:"date"` 18 | HTTPRetrievals int `bson:"http_retrievals"` 19 | HTTPRetrievalSuccess int `bson:"http_retrieval_success"` 20 | GraphSyncRetrievals int `bson:"graphsync_retrievals"` 21 | GraphSyncRetrievalSuccess int `bson:"graphsync_retrieval_success"` 22 | BitswapRetrievals int `bson:"bitswap_retrievals"` 23 | BitswapRetrievalSuccess int `bson:"bitswap_retrieval_success"` 24 | AvgTTFBMS float64 `bson:"avg_ttfb_ms"` 25 | AvgSpeedBPS float64 `bson:"avg_speed_bps"` 26 | } 27 | 28 | func main() { 29 | ctx := context.Background() 30 | repdaoMongo, err := mongo.Connect(ctx, options.Client().ApplyURI(os.Getenv("REPDAO_MONGO_URI"))) 31 | if err != nil { 32 | panic(err) 33 | } 34 | defer repdaoMongo.Disconnect(ctx) 35 | 36 | repdao := repdaoMongo.Database(os.Getenv("REPDAO_MONGO_DATABASE")).Collection(os.Getenv("REPDAO_MONGO_COLLECTION")) 37 | 38 | retbotMongo, err := mongo.Connect(ctx, options.Client().ApplyURI(os.Getenv("RESULT_MONGO_URI"))) 39 | if err != nil { 40 | panic(err) 41 | } 42 | defer retbotMongo.Disconnect(ctx) 43 | 44 | retbot := retbotMongo.Database(os.Getenv("RESULT_MONGO_DATABASE")).Collection("task_result") 45 | 46 | // Find the last saved date 47 | var lastStats DailyStats 48 | err = repdao.FindOne(ctx, bson.D{}, options.FindOne().SetSort(bson.D{{"date", -1}})).Decode(&lastStats) 49 | if err != nil && err != mongo.ErrNoDocuments { 50 | panic(err) 51 | } 52 | 53 | var startDate time.Time 54 | if err == mongo.ErrNoDocuments { 55 | startDate = time.Time{} 56 | } else { 57 | startDate, err = time.Parse("2006-01-02", lastStats.Date) 58 | if err != nil { 59 | panic(err) 60 | } 61 | startDate = startDate.AddDate(0, 0, 1) 62 | } 63 | 64 | // Get the current day part of yesterday 65 | now := time.Now().UTC() 66 | endDate := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, now.Location()) 67 | 68 | fmt.Printf("startDate: %s, endDate: %s\n", startDate, endDate) 69 | if startDate.After(endDate) || startDate.Equal(endDate) { 70 | fmt.Println("No new data to process") 71 | return 72 | } 73 | 74 | // Aggregate the results 75 | matchStage := bson.D{{"$match", bson.D{ 76 | {"created_at", bson.D{{"$gte", startDate}, {"$lt", endDate}}}, 77 | {"task.module", bson.D{{"$in", bson.A{"http", "bitswap", "graphsync"}}}}, 78 | }}} 79 | 80 | groupStage := bson.D{{"$group", bson.D{ 81 | {"_id", bson.D{ 82 | {"provider_id", "$task.provider.id"}, 83 | {"date", bson.D{{"$dateToString", bson.D{ 84 | {"format", "%Y-%m-%d"}, 85 | {"date", "$created_at"}, 86 | }}}}, 87 | {"module", "$task.module"}, 88 | {"success", "$result.success"}, 89 | }}, 90 | {"count", bson.D{{"$sum", 1}}}, 91 | {"ttfb_sum", bson.D{{"$sum", "$result.ttfb"}}}, 92 | {"speed_sum", bson.D{{"$sum", "$result.speed"}}}, 93 | }}} 94 | 95 | groupStage2 := bson.D{{"$group", bson.D{ 96 | {"_id", bson.D{ 97 | {"provider_id", "$_id.provider_id"}, 98 | {"date", "$_id.date"}, 99 | }}, 100 | {"http_retrievals", bson.D{{"$sum", bson.D{ 101 | {"$cond", bson.A{ 102 | bson.D{{"$eq", bson.A{"$_id.module", "http"}}}, 103 | "$count", 104 | 0, 105 | }}, 106 | }}}}, 107 | {"http_retrieval_success", bson.D{{"$sum", bson.D{ 108 | {"$cond", bson.A{ 109 | bson.D{{"$and", bson.A{ 110 | bson.D{{"$eq", bson.A{"$_id.module", "http"}}}, 111 | bson.D{{"$eq", bson.A{"$_id.success", true}}}, 112 | }}}, 113 | "$count", 114 | 0, 115 | }}}, 116 | }}}, 117 | {"bitswap_retrievals", bson.D{{"$sum", bson.D{ 118 | {"$cond", bson.A{ 119 | bson.D{{"$eq", bson.A{"$_id.module", "bitswap"}}}, 120 | "$count", 121 | 0, 122 | }}, 123 | }}}}, 124 | {"bitswap_retrieval_success", bson.D{{"$sum", bson.D{ 125 | {"$cond", bson.A{ 126 | bson.D{{"$and", bson.A{ 127 | bson.D{{"$eq", bson.A{"$_id.module", "bitswap"}}}, 128 | bson.D{{"$eq", bson.A{"$_id.success", true}}}, 129 | }}}, 130 | "$count", 131 | 0, 132 | }}}, 133 | }}}, 134 | {"graphsync_retrievals", bson.D{{"$sum", bson.D{ 135 | {"$cond", bson.A{ 136 | bson.D{{"$eq", bson.A{"$_id.module", "graphsync"}}}, 137 | "$count", 138 | 0, 139 | }}, 140 | }}}}, 141 | {"graphsync_retrieval_success", bson.D{{"$sum", bson.D{ 142 | {"$cond", bson.A{ 143 | bson.D{{"$and", bson.A{ 144 | bson.D{{"$eq", bson.A{"$_id.module", "graphsync"}}}, 145 | bson.D{{"$eq", bson.A{"$_id.success", true}}}, 146 | }}}, 147 | "$count", 148 | 0, 149 | }}}, 150 | }}}, 151 | {"ttfb_sum", bson.D{{"$sum", bson.D{ 152 | {"$cond", bson.A{ 153 | bson.D{{"$eq", bson.A{"$_id.success", true}}}, 154 | "$ttfb_sum", 155 | 0, 156 | }}, 157 | }}}}, 158 | {"speed_sum", bson.D{{"$sum", bson.D{ 159 | {"$cond", bson.A{ 160 | bson.D{{"$eq", bson.A{"$_id.success", true}}}, 161 | "$speed_sum", 162 | 0, 163 | }}, 164 | }}}}, 165 | {"success_count", bson.D{{"$sum", bson.D{ 166 | {"$cond", bson.A{ 167 | bson.D{{"$eq", bson.A{"$_id.success", true}}}, 168 | "$count", 169 | 0, 170 | }}, 171 | }}}}, 172 | }}} 173 | 174 | projectStage := bson.D{{"$project", bson.D{ 175 | {"provider_id", "$_id.provider_id"}, 176 | {"date", "$_id.date"}, 177 | {"http_retrievals", 1}, 178 | {"http_retrieval_success", 1}, 179 | {"bitswap_retrievals", 1}, 180 | {"bitswap_retrieval_success", 1}, 181 | {"graphsync_retrievals", 1}, 182 | {"graphsync_retrieval_success", 1}, 183 | {"avg_ttfb_ms", bson.D{{"$cond", bson.A{ 184 | bson.D{{"$eq", bson.A{"$success_count", 0}}}, 185 | 0, 186 | bson.D{{"$divide", bson.A{bson.D{{"$divide", bson.A{"$ttfb_sum", "$success_count"}}}, 1000000.0}}}, 187 | }}}}, 188 | {"avg_speed_bps", bson.D{{"$cond", bson.A{ 189 | bson.D{{"$eq", bson.A{"$success_count", 0}}}, 190 | 0, 191 | bson.D{{"$divide", bson.A{"$speed_sum", "$success_count"}}}, 192 | }}}}, 193 | }}} 194 | 195 | cursor, err := retbot.Aggregate(context.Background(), mongo.Pipeline{matchStage, groupStage, groupStage2, projectStage}) 196 | if err != nil { 197 | panic(err) 198 | } 199 | defer cursor.Close(context.Background()) 200 | var stats []interface{} 201 | 202 | // Insert the aggregated results into the new collection 203 | for cursor.Next(context.Background()) { 204 | var dailyStats DailyStats 205 | err := cursor.Decode(&dailyStats) 206 | if err != nil { 207 | panic(err) 208 | } 209 | stats = append(stats, dailyStats) 210 | fmt.Printf("Got daily stats: %+v\n", dailyStats) 211 | } 212 | 213 | // Insert the aggregated results into the new collection 214 | result, err := repdao.InsertMany(ctx, stats) 215 | if err != nil { 216 | panic(err) 217 | } 218 | fmt.Printf("Inserted %v documents into the new collection!\n", len(result.InsertedIDs)) 219 | } 220 | -------------------------------------------------------------------------------- /integration/repdao_dp/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "os" 7 | "time" 8 | 9 | _ "github.com/joho/godotenv/autoload" 10 | "go.mongodb.org/mongo-driver/bson" 11 | "go.mongodb.org/mongo-driver/mongo" 12 | "go.mongodb.org/mongo-driver/mongo/options" 13 | ) 14 | 15 | type DailyStats struct { 16 | ProviderID string `bson:"provider_id"` 17 | Date string `bson:"date"` 18 | HTTPRetrievals int `bson:"http_retrievals"` 19 | HTTPRetrievalSuccess int `bson:"http_retrieval_success"` 20 | GraphSyncRetrievals int `bson:"graphsync_retrievals"` 21 | GraphSyncRetrievalSuccess int `bson:"graphsync_retrieval_success"` 22 | BitswapRetrievals int `bson:"bitswap_retrievals"` 23 | BitswapRetrievalSuccess int `bson:"bitswap_retrieval_success"` 24 | AvgTTFBMS float64 `bson:"avg_ttfb_ms"` 25 | AvgSpeedBPS float64 `bson:"avg_speed_bps"` 26 | } 27 | 28 | func main() { 29 | ctx := context.Background() 30 | retbotMongo, err := mongo.Connect(ctx, options.Client().ApplyURI(os.Getenv("RESULT_MONGO_URI"))) 31 | if err != nil { 32 | panic(err) 33 | } 34 | defer retbotMongo.Disconnect(ctx) 35 | 36 | retbot := retbotMongo.Database(os.Getenv("RESULT_MONGO_DATABASE")).Collection("task_result") 37 | repdaoMongo, err := mongo.Connect(ctx, options.Client().ApplyURI(os.Getenv("REPDAO_MONGO_URI"))) 38 | if err != nil { 39 | panic(err) 40 | } 41 | defer repdaoMongo.Disconnect(ctx) 42 | 43 | country := os.Getenv("RETRIEVER_COUNTRY") 44 | collectionName := os.Getenv("REPDAO_MONGO_COLLECTION") 45 | 46 | repdao := repdaoMongo.Database(os.Getenv("REPDAO_MONGO_DATABASE")).Collection(collectionName) 47 | 48 | // Find the last saved date 49 | var lastStats DailyStats 50 | err = repdao.FindOne(ctx, bson.D{}, options.FindOne().SetSort(bson.D{{"date", -1}})).Decode(&lastStats) 51 | if err != nil && err != mongo.ErrNoDocuments { 52 | panic(err) 53 | } 54 | 55 | var startDate time.Time 56 | if err == mongo.ErrNoDocuments { 57 | startDate = time.Time{} 58 | } else { 59 | startDate, err = time.Parse("2006-01-02", lastStats.Date) 60 | if err != nil { 61 | panic(err) 62 | } 63 | startDate = startDate.AddDate(0, 0, 1) 64 | } 65 | 66 | // Get the current day part of yesterday 67 | now := time.Now().UTC() 68 | endDate := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, now.Location()) 69 | 70 | fmt.Printf("startDate: %s, endDate: %s\n", startDate, endDate) 71 | if startDate.After(endDate) || startDate.Equal(endDate) { 72 | fmt.Println("No new data to process") 73 | return 74 | } 75 | 76 | // Aggregate the results 77 | matchStage := bson.D{{"$match", bson.D{ 78 | {"retriever.country", country}, 79 | {"created_at", bson.D{{"$gte", startDate}, {"$lt", endDate}}}, 80 | {"task.module", bson.D{{"$in", bson.A{"http", "bitswap", "graphsync"}}}}, 81 | }}} 82 | 83 | groupStage := bson.D{{"$group", bson.D{ 84 | {"_id", bson.D{ 85 | {"provider_id", "$task.provider.id"}, 86 | {"date", bson.D{{"$dateToString", bson.D{ 87 | {"format", "%Y-%m-%d"}, 88 | {"date", "$created_at"}, 89 | }}}}, 90 | {"module", "$task.module"}, 91 | {"success", "$result.success"}, 92 | }}, 93 | {"count", bson.D{{"$sum", 1}}}, 94 | {"ttfb_sum", bson.D{{"$sum", "$result.ttfb"}}}, 95 | {"speed_sum", bson.D{{"$sum", "$result.speed"}}}, 96 | }}} 97 | 98 | groupStage2 := bson.D{{"$group", bson.D{ 99 | {"_id", bson.D{ 100 | {"provider_id", "$_id.provider_id"}, 101 | {"date", "$_id.date"}, 102 | }}, 103 | {"http_retrievals", bson.D{{"$sum", bson.D{ 104 | {"$cond", bson.A{ 105 | bson.D{{"$eq", bson.A{"$_id.module", "http"}}}, 106 | "$count", 107 | 0, 108 | }}, 109 | }}}}, 110 | {"http_retrieval_success", bson.D{{"$sum", bson.D{ 111 | {"$cond", bson.A{ 112 | bson.D{{"$and", bson.A{ 113 | bson.D{{"$eq", bson.A{"$_id.module", "http"}}}, 114 | bson.D{{"$eq", bson.A{"$_id.success", true}}}, 115 | }}}, 116 | "$count", 117 | 0, 118 | }}}, 119 | }}}, 120 | {"bitswap_retrievals", bson.D{{"$sum", bson.D{ 121 | {"$cond", bson.A{ 122 | bson.D{{"$eq", bson.A{"$_id.module", "bitswap"}}}, 123 | "$count", 124 | 0, 125 | }}, 126 | }}}}, 127 | {"bitswap_retrieval_success", bson.D{{"$sum", bson.D{ 128 | {"$cond", bson.A{ 129 | bson.D{{"$and", bson.A{ 130 | bson.D{{"$eq", bson.A{"$_id.module", "bitswap"}}}, 131 | bson.D{{"$eq", bson.A{"$_id.success", true}}}, 132 | }}}, 133 | "$count", 134 | 0, 135 | }}}, 136 | }}}, 137 | {"graphsync_retrievals", bson.D{{"$sum", bson.D{ 138 | {"$cond", bson.A{ 139 | bson.D{{"$eq", bson.A{"$_id.module", "graphsync"}}}, 140 | "$count", 141 | 0, 142 | }}, 143 | }}}}, 144 | {"graphsync_retrieval_success", bson.D{{"$sum", bson.D{ 145 | {"$cond", bson.A{ 146 | bson.D{{"$and", bson.A{ 147 | bson.D{{"$eq", bson.A{"$_id.module", "graphsync"}}}, 148 | bson.D{{"$eq", bson.A{"$_id.success", true}}}, 149 | }}}, 150 | "$count", 151 | 0, 152 | }}}, 153 | }}}, 154 | {"ttfb_sum", bson.D{{"$sum", bson.D{ 155 | {"$cond", bson.A{ 156 | bson.D{{"$eq", bson.A{"$_id.success", true}}}, 157 | "$ttfb_sum", 158 | 0, 159 | }}, 160 | }}}}, 161 | {"speed_sum", bson.D{{"$sum", bson.D{ 162 | {"$cond", bson.A{ 163 | bson.D{{"$eq", bson.A{"$_id.success", true}}}, 164 | "$speed_sum", 165 | 0, 166 | }}, 167 | }}}}, 168 | {"success_count", bson.D{{"$sum", bson.D{ 169 | {"$cond", bson.A{ 170 | bson.D{{"$eq", bson.A{"$_id.success", true}}}, 171 | "$count", 172 | 0, 173 | }}, 174 | }}}}, 175 | }}} 176 | 177 | projectStage := bson.D{{"$project", bson.D{ 178 | {"provider_id", "$_id.provider_id"}, 179 | {"date", "$_id.date"}, 180 | {"http_retrievals", 1}, 181 | {"http_retrieval_success", 1}, 182 | {"bitswap_retrievals", 1}, 183 | {"bitswap_retrieval_success", 1}, 184 | {"graphsync_retrievals", 1}, 185 | {"graphsync_retrieval_success", 1}, 186 | {"avg_ttfb_ms", bson.D{{"$cond", bson.A{ 187 | bson.D{{"$eq", bson.A{"$success_count", 0}}}, 188 | 0, 189 | bson.D{{"$divide", bson.A{bson.D{{"$divide", bson.A{"$ttfb_sum", "$success_count"}}}, 1000000.0}}}, 190 | }}}}, 191 | {"avg_speed_bps", bson.D{{"$cond", bson.A{ 192 | bson.D{{"$eq", bson.A{"$success_count", 0}}}, 193 | 0, 194 | bson.D{{"$divide", bson.A{"$speed_sum", "$success_count"}}}, 195 | }}}}, 196 | }}} 197 | 198 | cursor, err := retbot.Aggregate(context.Background(), mongo.Pipeline{matchStage, groupStage, groupStage2, projectStage}) 199 | if err != nil { 200 | panic(err) 201 | } 202 | defer cursor.Close(context.Background()) 203 | var stats []interface{} 204 | 205 | // Insert the aggregated results into the new collection 206 | for cursor.Next(context.Background()) { 207 | var dailyStats DailyStats 208 | err := cursor.Decode(&dailyStats) 209 | if err != nil { 210 | panic(err) 211 | } 212 | stats = append(stats, dailyStats) 213 | fmt.Printf("Got daily stats: %+v\n", dailyStats) 214 | } 215 | 216 | // Insert the aggregated results into the new collection 217 | result, err := repdao.InsertMany(ctx, stats) 218 | if err != nil { 219 | panic(err) 220 | } 221 | fmt.Printf("Inserted %v documents into the new collection!\n", len(result.InsertedIDs)) 222 | 223 | } 224 | -------------------------------------------------------------------------------- /integration/spadev0/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "io/ioutil" 7 | "math" 8 | "math/rand" 9 | "net/http" 10 | "os" 11 | "time" 12 | 13 | logging "github.com/ipfs/go-log/v2" 14 | _ "github.com/joho/godotenv/autoload" 15 | "github.com/klauspost/compress/zstd" 16 | "github.com/pkg/errors" 17 | "github.com/urfave/cli/v2" 18 | ) 19 | 20 | var logger = logging.Logger("spade-v0-tasks") 21 | 22 | func main() { 23 | app := &cli.App{ 24 | Name: "spadev0", 25 | Usage: "run spade v0 replica task generation", 26 | Flags: []cli.Flag{ 27 | &cli.StringSliceFlag{ 28 | Name: "sources", 29 | DefaultText: "http://src-1/replicas.json.zst,http://src-2/replicas.json.zst", 30 | Usage: "comma-separated list of sources to fetch replica list from", 31 | Required: true, 32 | }, 33 | }, 34 | Action: func(cctx *cli.Context) error { 35 | ctx := cctx.Context 36 | // logging.SetLogLevel("spade-v0-tasks", "DEBUG") 37 | 38 | // Extract the sources from the flag 39 | sources := cctx.StringSlice("sources") 40 | 41 | for _, source := range sources { 42 | res, err := fetchActiveReplicas(ctx, source) 43 | 44 | if err != nil { 45 | return err 46 | } 47 | 48 | var perProvider = make(map[int]ProviderReplicas) 49 | 50 | for _, replica := range res.ActiveReplicas { 51 | for _, contract := range replica.Contracts { 52 | providerID := contract.ProviderID 53 | size := (1 << replica.PieceLog2Size) >> 30 // Convert to GiB 54 | perProvider[providerID] = ProviderReplicas{ 55 | size: perProvider[providerID].size + size, 56 | replicas: append(perProvider[providerID].replicas, Replica{ 57 | PieceCID: replica.PieceCID, 58 | PieceLog2Size: replica.PieceLog2Size, 59 | OptionalDagRoot: replica.OptionalDagRoot, 60 | }), 61 | } 62 | } 63 | } 64 | 65 | replicasToTest := selectReplicasToTest(perProvider) 66 | 67 | // Debug output - no functional purposes 68 | totalCids := 0 69 | totalSize := 0 70 | for prov, rps := range replicasToTest { 71 | provider := perProvider[prov] 72 | logger.Debugf("provider %d is storing %d GiB will have %d tests\n", prov, provider.size, len(rps)) 73 | totalCids += len(rps) 74 | totalSize += provider.size 75 | } 76 | logger.Debugf("total %d CIDs will be tested for %d providers\n", totalCids, len(replicasToTest)) 77 | 78 | err = AddSpadeTasks(ctx, "spadev0", replicasToTest) 79 | if err != nil { 80 | logger.Errorf("failed to add tasks: %s", err) 81 | } 82 | } 83 | return nil 84 | }, 85 | } 86 | 87 | if err := app.Run(os.Args); err != nil { 88 | panic(err) 89 | } 90 | } 91 | 92 | func fetchActiveReplicas(ctx context.Context, url string) (*ActiveReplicas, error) { 93 | logger.Debugf("fetching CIDs from %s", url) 94 | 95 | req, err := http.NewRequestWithContext(ctx, 96 | http.MethodGet, 97 | url, 98 | nil) 99 | if err != nil { 100 | return nil, errors.Wrap(err, "failed to create request") 101 | } 102 | 103 | resp, err := http.DefaultClient.Do(req) 104 | if err != nil { 105 | return nil, errors.Wrap(err, "failed to make request") 106 | } 107 | 108 | defer resp.Body.Close() 109 | 110 | if resp.StatusCode != http.StatusOK { 111 | return nil, errors.Errorf("failed to get spade CID list: %s", resp.Status) 112 | } 113 | 114 | decompressor, err := zstd.NewReader(resp.Body) 115 | if err != nil { 116 | return nil, errors.Wrap(err, "failed to create decompressor") 117 | } 118 | 119 | defer decompressor.Close() 120 | 121 | data, err := ioutil.ReadAll(decompressor) 122 | if err != nil { 123 | return nil, errors.Wrap(err, "failed to read decompressed data") 124 | } 125 | 126 | var activeReplicas ActiveReplicas 127 | err = json.Unmarshal(data, &activeReplicas) 128 | if err != nil { 129 | return nil, errors.Wrap(err, "failed to unmarshal JSON data") 130 | } 131 | 132 | return &activeReplicas, nil 133 | } 134 | 135 | // Compute a number of CIDs to test, based on the total size of data (assuming in GiB) 136 | // Minimum 1, then log2 of the size in TiB 137 | // ex: 138 | // < 4TiB = 1 cid 139 | // 4 TiB - 16TiB = 2 cids 140 | // 16 TiB - 32 TiB = 3 cids 141 | // 32 TiB - 64 TiB = 4 cids 142 | // 64 TiB - 128 TiB = 5 cids 143 | // 128 TiB - 256 TiB = 6 cids 144 | // etc... 145 | func numCidsToTest(sizeGiB int) int { 146 | return int(math.Max(math.Log2(float64(sizeGiB/1024)), 1)) 147 | } 148 | 149 | func selectReplicasToTest(perProvider map[int]ProviderReplicas) map[int][]Replica { 150 | var toTest = make(map[int][]Replica) 151 | 152 | for providerID, provider := range perProvider { 153 | toTest[providerID] = make([]Replica, 0) 154 | 155 | maxReplicas := len(provider.replicas) 156 | numCidsToTest := numCidsToTest(provider.size) 157 | 158 | // This condition should not happen, but just in case there's a situation 159 | // where a massive amount of bytes are being stored in relatively few CIDs 160 | if numCidsToTest > maxReplicas { 161 | logger.Warnf("provider %d only has %d replicas but we are trying to test %d", 162 | providerID, 163 | maxReplicas, 164 | numCidsToTest, 165 | ) 166 | numCidsToTest = maxReplicas 167 | } 168 | 169 | // Randomize which CIDs are selected 170 | rand.Seed(time.Now().UnixNano()) 171 | indices := rand.Perm(maxReplicas)[:numCidsToTest] 172 | 173 | for _, index := range indices { 174 | toTest[providerID] = append(toTest[providerID], provider.replicas[index]) 175 | } 176 | } 177 | 178 | return toTest 179 | } 180 | 181 | type ProviderReplicas struct { 182 | size int 183 | replicas []Replica 184 | } 185 | 186 | type ActiveReplicas struct { 187 | StateEpoch uint `json:"state_epoch"` 188 | ActiveReplicas []ActiveReplica `json:"active_replicas"` 189 | } 190 | 191 | type ActiveReplica struct { 192 | Contracts []Contract `json:"contracts"` 193 | Replica 194 | } 195 | 196 | type Replica struct { 197 | PieceCID string `json:"piece_cid"` 198 | PieceLog2Size int `json:"piece_log2_size"` 199 | OptionalDagRoot string `json:"optional_dag_root"` 200 | } 201 | 202 | type Contract struct { 203 | ProviderID int `json:"provider_id"` 204 | LegacyMarketID int `json:"legacy_market_id"` 205 | LegacyMarketEndEpoch int `json:"legacy_market_end_epoch"` 206 | } 207 | -------------------------------------------------------------------------------- /integration/spadev0/main_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestSelectCidsToTest(t *testing.T) { 10 | // Sample data for testing 11 | sampleData := map[int]ProviderReplicas{ 12 | 123: {size: 128, replicas: []Replica{ 13 | {OptionalDagRoot: "root1", PieceCID: "cid1"}, 14 | }}, 15 | 456: {size: 4096, replicas: []Replica{ 16 | {OptionalDagRoot: "root3", PieceCID: "cid3"}, 17 | {OptionalDagRoot: "root4", PieceCID: "cid4"}, 18 | {OptionalDagRoot: "root5", PieceCID: "cid5"}, 19 | }}, 20 | } 21 | 22 | toTest := selectReplicasToTest(sampleData) 23 | 24 | // Ensure at least one replica is selected for each provider 25 | for providerID, replicas := range toTest { 26 | if len(replicas) == 0 { 27 | t.Errorf("No replicas selected for Provider %d", providerID) 28 | } 29 | } 30 | 31 | assert.Equal(t, 1, len(toTest[123])) 32 | assert.Equal(t, 2, len(toTest[456])) 33 | } 34 | -------------------------------------------------------------------------------- /integration/spadev0/util.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "github.com/data-preservation-programs/RetrievalBot/pkg/convert" 8 | "github.com/data-preservation-programs/RetrievalBot/pkg/env" 9 | "github.com/data-preservation-programs/RetrievalBot/pkg/requesterror" 10 | "github.com/data-preservation-programs/RetrievalBot/pkg/resolver" 11 | "github.com/data-preservation-programs/RetrievalBot/pkg/task" 12 | "github.com/filecoin-project/go-address" 13 | "github.com/pkg/errors" 14 | "go.mongodb.org/mongo-driver/mongo" 15 | "go.mongodb.org/mongo-driver/mongo/options" 16 | ) 17 | 18 | func AddSpadeTasks(ctx context.Context, requester string, replicasToTest map[int][]Replica) error { 19 | var tasks []interface{} 20 | var results []interface{} 21 | 22 | // set up cache and resolvers 23 | providerCacheTTL := env.GetDuration(env.ProviderCacheTTL, 24*time.Hour) 24 | locationCacheTTL := env.GetDuration(env.LocationCacheTTL, 24*time.Hour) 25 | locationResolver := resolver.NewLocationResolver(env.GetRequiredString(env.IPInfoToken), locationCacheTTL) 26 | providerResolver, err := resolver.NewProviderResolver( 27 | env.GetString(env.LotusAPIUrl, "https://api.node.glif.io/rpc/v0"), 28 | env.GetString(env.LotusAPIToken, ""), 29 | providerCacheTTL) 30 | if err != nil { 31 | panic(err) 32 | } 33 | 34 | // Check public IP address 35 | ipInfo, err := resolver.GetPublicIPInfo(ctx, "", "") 36 | if err != nil { 37 | panic(err) 38 | } 39 | logger.With("ipinfo", ipInfo).Infof("Public IP info retrieved") 40 | 41 | address.CurrentNetwork = address.Mainnet 42 | 43 | // For each SPID, assemble retrieval tasks for it 44 | for spid, replicas := range replicasToTest { 45 | strSpid, err := address.NewIDAddress(uint64(spid)) 46 | if err != nil { 47 | logger.Errorf("failed to convert spid to address: %d : %v", spid, err) 48 | continue 49 | } 50 | t, r := prepareTasksForSP(ctx, requester, strSpid, ipInfo, replicas, locationResolver, *providerResolver) 51 | 52 | //nolint:asasalint 53 | tasks = append(tasks, t) 54 | //nolint:asasalint 55 | results = append(results, r) 56 | } 57 | 58 | // Write resulting tasks and results to the DB 59 | taskClient, err := mongo. 60 | Connect(ctx, options.Client().ApplyURI(env.GetRequiredString(env.QueueMongoURI))) 61 | if err != nil { 62 | panic(err) 63 | } 64 | taskCollection := taskClient. 65 | Database(env.GetRequiredString(env.QueueMongoDatabase)).Collection("task_queue") 66 | 67 | if len(tasks) > 0 { 68 | _, err = taskCollection.InsertMany(ctx, tasks) 69 | if err != nil { 70 | return errors.Wrap(err, "failed to insert tasks") 71 | } 72 | } 73 | 74 | resultClient, err := mongo.Connect(ctx, options.Client().ApplyURI(env.GetRequiredString(env.ResultMongoURI))) 75 | if err != nil { 76 | panic(err) 77 | } 78 | resultCollection := resultClient. 79 | Database(env.GetRequiredString(env.ResultMongoDatabase)). 80 | Collection("task_result") 81 | 82 | if len(results) > 0 { 83 | _, err = resultCollection.InsertMany(ctx, results) 84 | if err != nil { 85 | return errors.Wrap(err, "failed to insert results") 86 | } 87 | } 88 | 89 | return nil 90 | } 91 | 92 | var spadev0Metadata map[string]string = map[string]string{ 93 | "retrieve_type": "spade", 94 | "retrieve_size": "1048576", 95 | // todo: specify # of cids to test per layer of the tree TBD 96 | } 97 | 98 | func prepareTasksForSP( 99 | ctx context.Context, 100 | requester string, 101 | spid address.Address, 102 | ipInfo resolver.IPInfo, 103 | replicas []Replica, 104 | locationResolver resolver.LocationResolver, 105 | providerResolver resolver.ProviderResolver, 106 | ) (tasks []interface{}, results []interface{}) { 107 | providerInfo, err := providerResolver.ResolveProvider(ctx, spid.String()) 108 | if err != nil { 109 | logger.With("provider", spid). 110 | Error("failed to resolve provider") 111 | return 112 | } 113 | 114 | location, err := locationResolver.ResolveMultiaddrsBytes(ctx, providerInfo.Multiaddrs) 115 | if err != nil { 116 | if errors.As(err, &requesterror.BogonIPError{}) || 117 | errors.As(err, &requesterror.InvalidIPError{}) || 118 | errors.As(err, &requesterror.HostLookupError{}) || 119 | errors.As(err, &requesterror.NoValidMultiAddrError{}) { 120 | results = addErrorResults(requester, ipInfo, results, spid.String(), providerInfo, location, 121 | task.NoValidMultiAddrs, err.Error()) 122 | } else { 123 | logger.With("provider", spid, "err", err). 124 | Error("failed to resolve provider location") 125 | return 126 | } 127 | } 128 | 129 | for _, document := range replicas { 130 | tasks = append(tasks, task.Task{ 131 | Requester: requester, 132 | Module: task.Bitswap, 133 | Metadata: spadev0Metadata, 134 | Provider: task.Provider{ 135 | ID: spid.String(), 136 | PeerID: providerInfo.PeerId, 137 | Multiaddrs: convert.MultiaddrsBytesToStringArraySkippingError(providerInfo.Multiaddrs), 138 | City: location.City, 139 | Region: location.Region, 140 | Country: location.Country, 141 | Continent: location.Continent, 142 | }, 143 | Content: task.Content{ 144 | CID: document.PieceCID, 145 | }, 146 | CreatedAt: time.Now().UTC(), 147 | Timeout: env.GetDuration(env.FilplusIntegrationTaskTimeout, 15*time.Second), 148 | }) 149 | } 150 | 151 | return tasks, results 152 | } 153 | 154 | func addErrorResults( 155 | requester string, 156 | ipInfo resolver.IPInfo, 157 | results []interface{}, 158 | spid string, 159 | providerInfo resolver.MinerInfo, 160 | location resolver.IPInfo, 161 | errorCode task.ErrorCode, 162 | errorMessage string, 163 | ) []interface{} { 164 | results = append(results, task.Result{ 165 | Task: task.Task{ 166 | Requester: requester, 167 | Module: "spadev0", 168 | Metadata: spadev0Metadata, 169 | Provider: task.Provider{ 170 | ID: spid, 171 | PeerID: providerInfo.PeerId, 172 | Multiaddrs: convert.MultiaddrsBytesToStringArraySkippingError(providerInfo.Multiaddrs), 173 | City: location.City, 174 | Region: location.Region, 175 | Country: location.Country, 176 | Continent: location.Continent, 177 | }, 178 | CreatedAt: time.Now().UTC(), 179 | Timeout: env.GetDuration(env.FilplusIntegrationTaskTimeout, 15*time.Second)}, 180 | Retriever: task.Retriever{ 181 | PublicIP: ipInfo.IP, 182 | City: ipInfo.City, 183 | Region: ipInfo.Region, 184 | Country: ipInfo.Country, 185 | Continent: ipInfo.Continent, 186 | ASN: ipInfo.ASN, 187 | ISP: ipInfo.ISP, 188 | Latitude: ipInfo.Latitude, 189 | Longitude: ipInfo.Longitude, 190 | }, 191 | Result: task.RetrievalResult{ 192 | Success: false, 193 | ErrorCode: errorCode, 194 | ErrorMessage: errorMessage, 195 | TTFB: 0, 196 | Speed: 0, 197 | Duration: 0, 198 | Downloaded: 0, 199 | }, 200 | CreatedAt: time.Now().UTC(), 201 | }) 202 | return results 203 | } 204 | -------------------------------------------------------------------------------- /integration/spcoverage/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | "time" 6 | 7 | "github.com/data-preservation-programs/RetrievalBot/integration/filplus/util" 8 | "github.com/data-preservation-programs/RetrievalBot/pkg/env" 9 | "github.com/data-preservation-programs/RetrievalBot/pkg/model" 10 | "github.com/data-preservation-programs/RetrievalBot/pkg/resolver" 11 | logging "github.com/ipfs/go-log/v2" 12 | _ "github.com/joho/godotenv/autoload" 13 | "github.com/pkg/errors" 14 | "github.com/rjNemo/underscore" 15 | "github.com/urfave/cli/v2" 16 | "go.mongodb.org/mongo-driver/bson" 17 | "go.mongodb.org/mongo-driver/mongo" 18 | "go.mongodb.org/mongo-driver/mongo/options" 19 | ) 20 | 21 | var logger = logging.Logger("spcoverage") 22 | 23 | type GroupID struct { 24 | Provider string `bson:"provider"` 25 | PieceCID string `bson:"piece_cid"` 26 | } 27 | type Row struct { 28 | ID GroupID `bson:"_id"` 29 | Document model.DealState `bson:"document"` 30 | } 31 | 32 | func main() { 33 | app := &cli.App{ 34 | Name: "spcoverage", 35 | Usage: "Send tasks to make sure all deals of a given SPs are covered", 36 | Action: run, 37 | Flags: []cli.Flag{ 38 | &cli.StringSliceFlag{ 39 | Name: "sp", 40 | Usage: "The SPs to be covered", 41 | Aliases: []string{"p"}, 42 | }, 43 | &cli.StringFlag{ 44 | Name: "requester", 45 | Usage: "Name of the requester to tag the test result", 46 | Aliases: []string{"r"}, 47 | Required: true, 48 | }, 49 | }, 50 | } 51 | err := app.Run(os.Args) 52 | if err != nil { 53 | logger.Fatal(err) 54 | } 55 | } 56 | 57 | func run(c *cli.Context) error { 58 | ctx := c.Context 59 | sp := c.StringSlice("sp") 60 | requester := c.String("requester") 61 | if len(sp) == 0 { 62 | logger.Fatal("Please specify the SPs to be covered") 63 | } 64 | if requester == "" { 65 | logger.Fatal("Please specify the requester") 66 | } 67 | 68 | // Connect to the database 69 | stateMarketDealsClient, err := mongo. 70 | Connect(ctx, options.Client().ApplyURI(env.GetRequiredString(env.StatemarketdealsMongoURI))) 71 | if err != nil { 72 | panic(err) 73 | } 74 | marketDealsCollection := stateMarketDealsClient. 75 | Database(env.GetRequiredString(env.StatemarketdealsMongoDatabase)). 76 | Collection("state_market_deals") 77 | 78 | providerCacheTTL := env.GetDuration(env.ProviderCacheTTL, 24*time.Hour) 79 | locationCacheTTL := env.GetDuration(env.LocationCacheTTL, 24*time.Hour) 80 | locationResolver := resolver.NewLocationResolver(env.GetRequiredString(env.IPInfoToken), locationCacheTTL) 81 | providerResolver, err := resolver.NewProviderResolver( 82 | env.GetString(env.LotusAPIUrl, "https://api.node.glif.io/rpc/v0"), 83 | env.GetString(env.LotusAPIToken, ""), 84 | providerCacheTTL) 85 | if err != nil { 86 | panic(err) 87 | } 88 | // Check public IP address 89 | ipInfo, err := resolver.GetPublicIPInfo(ctx, "", "") 90 | if err != nil { 91 | panic(err) 92 | } 93 | logger.With("ipinfo", ipInfo).Infof("Public IP info retrieved") 94 | 95 | // Get all CIDs for the given SPs 96 | //nolint:govet 97 | result, err := marketDealsCollection.Aggregate(ctx, mongo.Pipeline{ 98 | {{"$match", bson.M{ 99 | "sector_start": bson.M{"$gt": 0}, 100 | "end": bson.M{"$gt": model.TimeToEpoch(time.Now())}, 101 | "verified": true, 102 | "slashed": bson.M{"$lt": 0}, 103 | "provider": bson.M{"$in": sp}, 104 | }}}, 105 | {{"$group", bson.D{ 106 | {"_id", bson.D{{"provider", "$provider"}, {"piece_cid", "$piece_cid"}}}, 107 | {"document", bson.D{{"$first", "$$ROOT"}}}, 108 | }}}, 109 | }) 110 | if err != nil { 111 | return errors.Wrap(err, "failed to query market deals") 112 | } 113 | var rows []Row 114 | err = result.All(ctx, &rows) 115 | if err != nil { 116 | return errors.Wrap(err, "failed to decode market deals") 117 | } 118 | 119 | logger.Infow("Market deals retrieved", "count", len(rows)) 120 | documents := underscore.Map(rows, func(row Row) model.DealState { 121 | return row.Document 122 | }) 123 | tasks, results := util.AddTasks(ctx, requester, ipInfo, documents, locationResolver, *providerResolver) 124 | 125 | taskClient, err := mongo. 126 | Connect(ctx, options.Client().ApplyURI(env.GetRequiredString(env.QueueMongoURI))) 127 | if err != nil { 128 | panic(err) 129 | } 130 | taskCollection := taskClient. 131 | Database(env.GetRequiredString(env.QueueMongoDatabase)).Collection("task_queue") 132 | 133 | if len(tasks) > 0 { 134 | _, err = taskCollection.InsertMany(ctx, tasks) 135 | if err != nil { 136 | return errors.Wrap(err, "failed to insert tasks") 137 | } 138 | } 139 | 140 | resultClient, err := mongo.Connect(ctx, options.Client().ApplyURI(env.GetRequiredString(env.ResultMongoURI))) 141 | if err != nil { 142 | panic(err) 143 | } 144 | resultCollection := resultClient. 145 | Database(env.GetRequiredString(env.ResultMongoDatabase)). 146 | Collection("task_result") 147 | 148 | if len(results) > 0 { 149 | _, err = resultCollection.InsertMany(ctx, results) 150 | if err != nil { 151 | return errors.Wrap(err, "failed to insert results") 152 | } 153 | } 154 | 155 | return nil 156 | } 157 | -------------------------------------------------------------------------------- /integration/statemarketdeals/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "net/http" 6 | "strconv" 7 | 8 | "github.com/bcicen/jstream" 9 | "github.com/data-preservation-programs/RetrievalBot/pkg/env" 10 | "github.com/data-preservation-programs/RetrievalBot/pkg/model" 11 | "github.com/data-preservation-programs/RetrievalBot/pkg/model/rpc" 12 | logging "github.com/ipfs/go-log/v2" 13 | _ "github.com/joho/godotenv/autoload" 14 | "github.com/klauspost/compress/zstd" 15 | "github.com/mitchellh/mapstructure" 16 | "github.com/pkg/errors" 17 | "go.mongodb.org/mongo-driver/bson" 18 | "go.mongodb.org/mongo-driver/mongo" 19 | "go.mongodb.org/mongo-driver/mongo/options" 20 | ) 21 | 22 | var logger = logging.Logger("state-market-deals") 23 | 24 | func main() { 25 | ctx := context.Background() 26 | err := refresh(ctx) 27 | if err != nil { 28 | logger.Error(err) 29 | } 30 | } 31 | 32 | func refresh(ctx context.Context) error { 33 | batchSize := env.GetInt(env.StatemarketdealsBatchSize, 1000) 34 | client, err := mongo.Connect(ctx, options.Client().ApplyURI(env.GetRequiredString(env.StatemarketdealsMongoURI))) 35 | if err != nil { 36 | return errors.Wrap(err, "failed to connect to mongo") 37 | } 38 | 39 | //nolint:errcheck 40 | defer client.Disconnect(ctx) 41 | collection := client.Database(env.GetRequiredString(env.StatemarketdealsMongoDatabase)). 42 | Collection("state_market_deals") 43 | 44 | logger.Info("getting deal ids from mongo") 45 | dealIDCursor, err := collection.Find(ctx, bson.D{}, options.Find(). 46 | SetProjection(bson.M{"deal_id": 1, "_id": 1, "last_updated": 1})) 47 | if err != nil { 48 | return errors.Wrap(err, "failed to get deal ids") 49 | } 50 | 51 | defer dealIDCursor.Close(ctx) 52 | 53 | dealIDSet := make(map[int32]model.DealIDLastUpdated) 54 | for dealIDCursor.Next(ctx) { 55 | var deal model.DealIDLastUpdated 56 | err = dealIDCursor.Decode(&deal) 57 | if err != nil { 58 | return errors.Wrap(err, "failed to decode deal id") 59 | } 60 | 61 | dealIDSet[deal.DealID] = deal 62 | } 63 | 64 | logger.Info("getting deals from state market deals") 65 | req, err := http.NewRequestWithContext(ctx, 66 | http.MethodGet, 67 | "https://marketdeals.s3.amazonaws.com/StateMarketDeals.json.zst", 68 | nil) 69 | if err != nil { 70 | return errors.Wrap(err, "failed to create request") 71 | } 72 | 73 | resp, err := http.DefaultClient.Do(req) 74 | if err != nil { 75 | return errors.Wrap(err, "failed to make request") 76 | } 77 | 78 | defer resp.Body.Close() 79 | 80 | if resp.StatusCode != http.StatusOK { 81 | return errors.Errorf("failed to get state market deals: %s", resp.Status) 82 | } 83 | 84 | decompressor, err := zstd.NewReader(resp.Body) 85 | if err != nil { 86 | return errors.Wrap(err, "failed to create decompressor") 87 | } 88 | 89 | defer decompressor.Close() 90 | 91 | jsonDecoder := jstream.NewDecoder(decompressor, 1).EmitKV() 92 | insertCount := 0 93 | updateCount := 0 94 | dealBatch := make([]interface{}, 0, batchSize) 95 | for stream := range jsonDecoder.Stream() { 96 | keyValuePair, ok := stream.Value.(jstream.KV) 97 | 98 | if !ok { 99 | return errors.New("failed to get key value pair") 100 | } 101 | 102 | var deal rpc.Deal 103 | err = mapstructure.Decode(keyValuePair.Value, &deal) 104 | if err != nil { 105 | return errors.Wrap(err, "failed to decode deal") 106 | } 107 | 108 | dealID, err := strconv.ParseUint(keyValuePair.Key, 10, 32) 109 | if err != nil { 110 | return errors.Wrap(err, "failed to convert deal id to int") 111 | } 112 | 113 | newDeal := model.DealState{ 114 | DealID: int32(dealID), 115 | PieceCID: deal.Proposal.PieceCID.Root, 116 | PieceSize: deal.Proposal.PieceSize, 117 | Label: deal.Proposal.Label, 118 | Verified: deal.Proposal.VerifiedDeal, 119 | Client: deal.Proposal.Client, 120 | Provider: deal.Proposal.Provider, 121 | Start: deal.Proposal.StartEpoch, 122 | End: deal.Proposal.EndEpoch, 123 | SectorStart: deal.State.SectorStartEpoch, 124 | Slashed: deal.State.SlashEpoch, 125 | LastUpdated: deal.State.LastUpdatedEpoch, 126 | } 127 | // If the deal exists but the last_updated has changed, update it 128 | existing, ok := dealIDSet[int32(dealID)] 129 | if ok { 130 | if deal.State.LastUpdatedEpoch > existing.LastUpdated { 131 | logger.With("deal_id", dealID). 132 | Debugf("updating deal as lastUpdated Changed from %d to %d", existing.LastUpdated, deal.State.LastUpdatedEpoch) 133 | updateCount += 1 134 | result, err := collection.ReplaceOne(ctx, bson.D{{"_id", existing.ID}}, newDeal) 135 | if err != nil { 136 | return errors.Wrap(err, "failed to update deal") 137 | } 138 | if result.MatchedCount == 0 { 139 | return errors.Errorf("failed to update deal: %d", dealID) 140 | } 141 | } 142 | continue 143 | } 144 | 145 | // Insert into mongo as the deal is not in mongo 146 | dealBatch = append(dealBatch, newDeal) 147 | logger.With("deal_id", dealID). 148 | Debug("inserting deal state into mongo") 149 | 150 | if len(dealBatch) == batchSize { 151 | logger.With("last", dealID). 152 | Infof("inserting %d deal state into mongo", batchSize) 153 | _, err := collection.InsertMany(ctx, dealBatch) 154 | if err != nil { 155 | return errors.Wrap(err, "failed to insert deal into mongo") 156 | } 157 | 158 | insertCount += len(dealBatch) 159 | dealBatch = make([]interface{}, 0, batchSize) 160 | } 161 | } 162 | 163 | if len(dealBatch) > 0 { 164 | logger.Infof("inserting %d deal state into mongo", len(dealBatch)) 165 | _, err := collection.InsertMany(ctx, dealBatch) 166 | if err != nil { 167 | return errors.Wrap(err, "failed to insert deal into mongo") 168 | } 169 | 170 | insertCount += len(dealBatch) 171 | } 172 | 173 | logger.With("count", insertCount, "update", updateCount).Info("finished inserting deals into mongo") 174 | if jsonDecoder.Err() != nil { 175 | logger.With("position", jsonDecoder.Pos()).Warn("prematurely reached end of json stream") 176 | return errors.Wrap(jsonDecoder.Err(), "failed to decode json further") 177 | } 178 | return nil 179 | } 180 | -------------------------------------------------------------------------------- /pkg/cmd/retrieval_worker/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "github.com/data-preservation-programs/RetrievalBot/pkg/process" 6 | _ "github.com/joho/godotenv/autoload" 7 | ) 8 | 9 | func main() { 10 | processManager, err := process.NewProcessManager() 11 | if err != nil { 12 | panic(err) 13 | } 14 | 15 | processManager.Run(context.Background()) 16 | } 17 | -------------------------------------------------------------------------------- /pkg/convert/multiaddr.go: -------------------------------------------------------------------------------- 1 | package convert 2 | 3 | import ( 4 | "github.com/filecoin-project/go-state-types/abi" 5 | logging "github.com/ipfs/go-log/v2" 6 | "github.com/multiformats/go-multiaddr" 7 | "github.com/pkg/errors" 8 | ) 9 | 10 | func MultiaddrToAbi(addr multiaddr.Multiaddr) abi.Multiaddrs { 11 | return addr.Bytes() 12 | } 13 | 14 | func AbiToMultiaddr(addr abi.Multiaddrs) (multiaddr.Multiaddr, error) { 15 | //nolint:wrapcheck 16 | return multiaddr.NewMultiaddrBytes(addr) 17 | } 18 | 19 | func MultiaddrsToAbi(addrs []multiaddr.Multiaddr) []abi.Multiaddrs { 20 | abiAddrs := make([]abi.Multiaddrs, len(addrs)) 21 | for i, addr := range addrs { 22 | abiAddrs[i] = MultiaddrToAbi(addr) 23 | } 24 | return abiAddrs 25 | } 26 | 27 | func AbiToMultiaddrs(addrs []abi.Multiaddrs) ([]multiaddr.Multiaddr, error) { 28 | multiAddrs := make([]multiaddr.Multiaddr, len(addrs)) 29 | for i, addr := range addrs { 30 | multiAddr, err := AbiToMultiaddr(addr) 31 | if err != nil { 32 | return nil, err 33 | } 34 | multiAddrs[i] = multiAddr 35 | } 36 | return multiAddrs, nil 37 | } 38 | 39 | func AbiToMultiaddrsSkippingError(addrs []abi.Multiaddrs) []multiaddr.Multiaddr { 40 | multiAddrs := make([]multiaddr.Multiaddr, 0, len(addrs)) 41 | for _, addr := range addrs { 42 | multiAddr, err := AbiToMultiaddr(addr) 43 | if err != nil { 44 | logging.Logger("convert").With("err", err, "addr", addr).Debug("Failed to decode multiaddr") 45 | continue 46 | } 47 | multiAddrs = append(multiAddrs, multiAddr) 48 | } 49 | return multiAddrs 50 | } 51 | 52 | func MultiaddrsBytesToStringArraySkippingError(addrs []abi.Multiaddrs) []string { 53 | maddrs := AbiToMultiaddrsSkippingError(addrs) 54 | strs := make([]string, len(maddrs)) 55 | for i, maddr := range maddrs { 56 | strs[i] = maddr.String() 57 | } 58 | return strs 59 | } 60 | 61 | func StringArrayToMultiaddrsSkippingError(addrs []string) []multiaddr.Multiaddr { 62 | maddrs := make([]multiaddr.Multiaddr, 0, len(addrs)) 63 | for _, addr := range addrs { 64 | maddr, err := multiaddr.NewMultiaddr(addr) 65 | if err != nil { 66 | logging.Logger("convert").With("err", err, "addr", addr).Debug("Failed to decode multiaddr") 67 | continue 68 | } 69 | maddrs = append(maddrs, maddr) 70 | } 71 | return maddrs 72 | } 73 | 74 | func StringArrayToMultiaddrs(addrs []string) ([]multiaddr.Multiaddr, error) { 75 | maddrs := make([]multiaddr.Multiaddr, 0, len(addrs)) 76 | for _, addr := range addrs { 77 | maddr, err := multiaddr.NewMultiaddr(addr) 78 | if err != nil { 79 | return nil, errors.Wrap(err, "failed to decode multiaddr") 80 | } 81 | maddrs = append(maddrs, maddr) 82 | } 83 | return maddrs, nil 84 | } 85 | -------------------------------------------------------------------------------- /pkg/env/env.go: -------------------------------------------------------------------------------- 1 | package env 2 | 3 | import ( 4 | "fmt" 5 | logging "github.com/ipfs/go-log/v2" 6 | "os" 7 | "strconv" 8 | "time" 9 | ) 10 | 11 | type Key string 12 | 13 | //nolint:gosec 14 | const ( 15 | ProcessModules Key = "PROCESS_MODULES" 16 | ProcessErrorInterval Key = "PROCESS_ERROR_INTERVAL" 17 | TaskWorkerPollInterval Key = "TASK_WORKER_POLL_INTERVAL" 18 | TaskWorkerTimeoutBuffer Key = "TASK_WORKER_TIMEOUT_BUFFER" 19 | LotusAPIUrl Key = "LOTUS_API_URL" 20 | LotusAPIToken Key = "LOTUS_API_TOKEN" 21 | QueueMongoURI Key = "QUEUE_MONGO_URI" 22 | QueueMongoDatabase Key = "QUEUE_MONGO_DATABASE" 23 | ResultMongoURI Key = "RESULT_MONGO_URI" 24 | ResultMongoDatabase Key = "RESULT_MONGO_DATABASE" 25 | FilplusIntegrationBatchSize Key = "FILPLUS_INTEGRATION_BATCH_SIZE" 26 | FilplusIntegrationTaskTimeout Key = "FILPLUS_INTEGRATION_TASK_TIMEOUT" 27 | FilplusIntegrationRandConst Key = "FILPLUS_INTEGRATION_RANDOM_CONSTANT" 28 | StatemarketdealsMongoURI Key = "STATEMARKETDEALS_MONGO_URI" 29 | StatemarketdealsMongoDatabase Key = "STATEMARKETDEALS_MONGO_DATABASE" 30 | StatemarketdealsBatchSize Key = "STATEMARKETDEALS_BATCH_SIZE" 31 | StatemarketdealsInterval Key = "STATEMARKETDEALS_INTERVAL" 32 | PublicIP Key = "_PUBLIC_IP" 33 | City Key = "_CITY" 34 | Region Key = "_REGION" 35 | Country Key = "_COUNTRY" 36 | Continent Key = "_CONTINENT" 37 | ASN Key = "_ASN" 38 | ISP Key = "_ISP" 39 | Latitude Key = "_LATITUDE" 40 | Longitude Key = "_LONGITUDE" 41 | ProviderCacheTTL Key = "PROVIDER_CACHE_TTL" 42 | LocationCacheTTL Key = "LOCATION_CACHE_TTL" 43 | AcceptedContinents Key = "ACCEPTED_CONTINENTS" 44 | AcceptedCountries Key = "ACCEPTED_COUNTRIES" 45 | IPInfoToken Key = "IPINFO_TOKEN" 46 | ) 47 | 48 | func GetString(key Key, defaultValue string) string { 49 | value := os.Getenv(string(key)) 50 | if value == "" { 51 | return defaultValue 52 | } 53 | 54 | return value 55 | } 56 | 57 | func GetInt(key Key, defaultValue int) int { 58 | value := os.Getenv(string(key)) 59 | if value == "" { 60 | return defaultValue 61 | } 62 | 63 | intValue, err := strconv.Atoi(value) 64 | if err != nil { 65 | logging.Logger("env").Debugf("failed to parse %s as int", key) 66 | return defaultValue 67 | } 68 | 69 | return intValue 70 | } 71 | 72 | func GetRequiredInt(key Key) int { 73 | value := os.Getenv(string(key)) 74 | if value == "" { 75 | logging.Logger("env").Panicf("%s not set", key) 76 | } 77 | 78 | intValue, err := strconv.Atoi(value) 79 | if err != nil { 80 | logging.Logger("env").Panicf("failed to parse %s as int", key) 81 | } 82 | 83 | return intValue 84 | } 85 | 86 | func GetRequiredString(key Key) string { 87 | value := os.Getenv(string(key)) 88 | if value == "" { 89 | logging.Logger("env").Panicf("%s not set", key) 90 | } 91 | 92 | return value 93 | } 94 | 95 | func GetRequiredFloat32(key Key) float32 { 96 | value := GetRequiredString(key) 97 | floatValue, err := strconv.ParseFloat(value, 32) 98 | if err != nil { 99 | logging.Logger("env").Panicf("failed to parse %s as float32", key) 100 | } 101 | 102 | return float32(floatValue) 103 | } 104 | 105 | func GetFloat64(key Key, defaultValue float64) float64 { 106 | value := os.Getenv(string(key)) 107 | floatValue, err := strconv.ParseFloat(value, 64) 108 | if err != nil { 109 | logging.Logger("env").Debugf("failed to parse %s as float", key) 110 | return defaultValue 111 | } 112 | 113 | return floatValue 114 | } 115 | 116 | func GetRequiredDuration(key Key) time.Duration { 117 | value := GetRequiredString(key) 118 | duration, err := time.ParseDuration(value) 119 | if err != nil { 120 | logging.Logger("env").Panicf("%s not set", key) 121 | } 122 | 123 | return duration 124 | } 125 | 126 | func GetDuration(key Key, defaultValue time.Duration) time.Duration { 127 | value := os.Getenv(string(key)) 128 | if value == "" { 129 | return defaultValue 130 | } 131 | 132 | return GetRequiredDuration(key) 133 | } 134 | 135 | func MustSet(key Key, value string) { 136 | err := os.Setenv(string(key), value) 137 | if err != nil { 138 | logging.Logger("env").Panicf("failed to set %s to %s", key, value) 139 | } 140 | } 141 | 142 | func MustSetAny(key Key, value interface{}) { 143 | str := fmt.Sprintf("%v", value) 144 | err := os.Setenv(string(key), str) 145 | if err != nil { 146 | logging.Logger("env").Panicf("failed to set %s to %s", key, value) 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /pkg/model/deal_state.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | import ( 4 | "time" 5 | 6 | "go.mongodb.org/mongo-driver/bson/primitive" 7 | ) 8 | 9 | type DealState struct { 10 | DealID int32 `bson:"deal_id"` 11 | PieceCID string `bson:"piece_cid"` 12 | PieceSize uint64 `bson:"piece_size"` 13 | Label string `bson:"label"` 14 | Verified bool `bson:"verified"` 15 | Client string `bson:"client"` 16 | Provider string `bson:"provider"` 17 | Start int32 `bson:"start"` 18 | End int32 `bson:"end"` 19 | SectorStart int32 `bson:"sector_start"` 20 | Slashed int32 `bson:"slashed"` 21 | LastUpdated int32 `bson:"last_updated"` 22 | } 23 | 24 | type DealIDLastUpdated struct { 25 | ID primitive.ObjectID `bson:"_id"` 26 | DealID int32 `bson:"deal_id"` 27 | LastUpdated int32 `bson:"last_updated"` 28 | } 29 | 30 | func EpochToTime(epoch int32) time.Time { 31 | if epoch < 0 { 32 | return time.Time{} 33 | } 34 | //nolint:gomnd 35 | return time.Unix(int64(epoch*30+1598306400), 0).UTC() 36 | } 37 | 38 | func TimeToEpoch(t time.Time) int32 { 39 | if t.IsZero() { 40 | return -1 41 | } 42 | //nolint:gomnd 43 | return int32(t.Unix()-1598306400) / 30 44 | } 45 | 46 | func (s DealState) AgeInYears() float64 { 47 | return time.Since(EpochToTime(s.SectorStart)).Hours() / 24 / 365 48 | } 49 | -------------------------------------------------------------------------------- /pkg/model/protocol.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | import ( 4 | "github.com/filecoin-project/go-state-types/abi" 5 | ) 6 | 7 | //go:generate go run github.com/hannahhoward/cbor-gen-for --map-encoding Protocol 8 | type Protocol struct { 9 | // The name of the transport protocol eg "libp2p" or "http" 10 | Name string 11 | // The address of the endpoint in multiaddr format 12 | Addresses []abi.Multiaddrs 13 | } 14 | 15 | type ProtocolName string 16 | 17 | const ( 18 | GraphSync ProtocolName = "GraphSync" 19 | Bitswap ProtocolName = "bitswap" 20 | HTTP ProtocolName = "http" 21 | HTTPS ProtocolName = "https" 22 | Libp2p ProtocolName = "libp2p" 23 | WS ProtocolName = "ws" 24 | WSS ProtocolName = "wss" 25 | ) 26 | -------------------------------------------------------------------------------- /pkg/model/protocol_cbor_gen.go: -------------------------------------------------------------------------------- 1 | // Code generated by github.com/whyrusleeping/cbor-gen. DO NOT EDIT. 2 | 3 | package model 4 | 5 | import ( 6 | "fmt" 7 | "io" 8 | "math" 9 | "sort" 10 | 11 | cid "github.com/ipfs/go-cid" 12 | cbg "github.com/whyrusleeping/cbor-gen" 13 | xerrors "golang.org/x/xerrors" 14 | ) 15 | 16 | var _ = xerrors.Errorf 17 | var _ = cid.Undef 18 | var _ = math.E 19 | var _ = sort.Sort 20 | 21 | func (t *Protocol) MarshalCBOR(w io.Writer) error { 22 | if t == nil { 23 | _, err := w.Write(cbg.CborNull) 24 | return err 25 | } 26 | 27 | cw := cbg.NewCborWriter(w) 28 | 29 | if _, err := cw.Write([]byte{162}); err != nil { 30 | return err 31 | } 32 | 33 | // t.Name (string) (string) 34 | if len("Name") > cbg.MaxLength { 35 | return xerrors.Errorf("Value in field \"Name\" was too long") 36 | } 37 | 38 | if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len("Name"))); err != nil { 39 | return err 40 | } 41 | if _, err := io.WriteString(w, string("Name")); err != nil { 42 | return err 43 | } 44 | 45 | if len(t.Name) > cbg.MaxLength { 46 | return xerrors.Errorf("Value in field t.Name was too long") 47 | } 48 | 49 | if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len(t.Name))); err != nil { 50 | return err 51 | } 52 | if _, err := io.WriteString(w, string(t.Name)); err != nil { 53 | return err 54 | } 55 | 56 | // t.Addresses ([][]uint8) (slice) 57 | if len("Addresses") > cbg.MaxLength { 58 | return xerrors.Errorf("Value in field \"Addresses\" was too long") 59 | } 60 | 61 | if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len("Addresses"))); err != nil { 62 | return err 63 | } 64 | if _, err := io.WriteString(w, string("Addresses")); err != nil { 65 | return err 66 | } 67 | 68 | if len(t.Addresses) > cbg.MaxLength { 69 | return xerrors.Errorf("Slice value in field t.Addresses was too long") 70 | } 71 | 72 | if err := cw.WriteMajorTypeHeader(cbg.MajArray, uint64(len(t.Addresses))); err != nil { 73 | return err 74 | } 75 | for _, v := range t.Addresses { 76 | if len(v) > cbg.ByteArrayMaxLen { 77 | return xerrors.Errorf("Byte array in field v was too long") 78 | } 79 | 80 | if err := cw.WriteMajorTypeHeader(cbg.MajByteString, uint64(len(v))); err != nil { 81 | return err 82 | } 83 | 84 | if _, err := cw.Write(v[:]); err != nil { 85 | return err 86 | } 87 | } 88 | return nil 89 | } 90 | 91 | func (t *Protocol) UnmarshalCBOR(r io.Reader) (err error) { 92 | *t = Protocol{} 93 | 94 | cr := cbg.NewCborReader(r) 95 | 96 | maj, extra, err := cr.ReadHeader() 97 | if err != nil { 98 | return err 99 | } 100 | defer func() { 101 | if err == io.EOF { 102 | err = io.ErrUnexpectedEOF 103 | } 104 | }() 105 | 106 | if maj != cbg.MajMap { 107 | return fmt.Errorf("cbor input should be of type map") 108 | } 109 | 110 | if extra > cbg.MaxLength { 111 | return fmt.Errorf("Protocol: map struct too large (%d)", extra) 112 | } 113 | 114 | var name string 115 | n := extra 116 | 117 | for i := uint64(0); i < n; i++ { 118 | 119 | { 120 | sval, err := cbg.ReadString(cr) 121 | if err != nil { 122 | return err 123 | } 124 | 125 | name = string(sval) 126 | } 127 | 128 | switch name { 129 | // t.Name (string) (string) 130 | case "Name": 131 | 132 | { 133 | sval, err := cbg.ReadString(cr) 134 | if err != nil { 135 | return err 136 | } 137 | 138 | t.Name = string(sval) 139 | } 140 | // t.Addresses ([][]uint8) (slice) 141 | case "Addresses": 142 | 143 | maj, extra, err = cr.ReadHeader() 144 | if err != nil { 145 | return err 146 | } 147 | 148 | if extra > cbg.MaxLength { 149 | return fmt.Errorf("t.Addresses: array too large (%d)", extra) 150 | } 151 | 152 | if maj != cbg.MajArray { 153 | return fmt.Errorf("expected cbor array") 154 | } 155 | 156 | if extra > 0 { 157 | t.Addresses = make([][]uint8, extra) 158 | } 159 | 160 | for i := 0; i < int(extra); i++ { 161 | { 162 | var maj byte 163 | var extra uint64 164 | var err error 165 | 166 | maj, extra, err = cr.ReadHeader() 167 | if err != nil { 168 | return err 169 | } 170 | 171 | if extra > cbg.ByteArrayMaxLen { 172 | return fmt.Errorf("t.Addresses[i]: byte array too large (%d)", extra) 173 | } 174 | if maj != cbg.MajByteString { 175 | return fmt.Errorf("expected byte array") 176 | } 177 | 178 | if extra > 0 { 179 | t.Addresses[i] = make([]uint8, extra) 180 | } 181 | 182 | if _, err := io.ReadFull(cr, t.Addresses[i][:]); err != nil { 183 | return err 184 | } 185 | } 186 | } 187 | 188 | default: 189 | // Field doesn't exist on this type, so ignore it 190 | cbg.ScanForLinks(r, func(cid.Cid) {}) 191 | } 192 | } 193 | 194 | return nil 195 | } 196 | -------------------------------------------------------------------------------- /pkg/model/query_response.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | //go:generate go run github.com/hannahhoward/cbor-gen-for --map-encoding QueryResponse 4 | type QueryResponse struct { 5 | Protocols []Protocol 6 | } 7 | -------------------------------------------------------------------------------- /pkg/model/query_response_cbor_gen.go: -------------------------------------------------------------------------------- 1 | // Code generated by github.com/whyrusleeping/cbor-gen. DO NOT EDIT. 2 | 3 | package model 4 | 5 | import ( 6 | "fmt" 7 | "io" 8 | "math" 9 | "sort" 10 | 11 | cid "github.com/ipfs/go-cid" 12 | cbg "github.com/whyrusleeping/cbor-gen" 13 | xerrors "golang.org/x/xerrors" 14 | ) 15 | 16 | var _ = xerrors.Errorf 17 | var _ = cid.Undef 18 | var _ = math.E 19 | var _ = sort.Sort 20 | 21 | func (t *QueryResponse) MarshalCBOR(w io.Writer) error { 22 | if t == nil { 23 | _, err := w.Write(cbg.CborNull) 24 | return err 25 | } 26 | 27 | cw := cbg.NewCborWriter(w) 28 | 29 | if _, err := cw.Write([]byte{161}); err != nil { 30 | return err 31 | } 32 | 33 | // t.Protocols ([]model.Protocol) (slice) 34 | if len("Protocols") > cbg.MaxLength { 35 | return xerrors.Errorf("Value in field \"Protocols\" was too long") 36 | } 37 | 38 | if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len("Protocols"))); err != nil { 39 | return err 40 | } 41 | if _, err := io.WriteString(w, string("Protocols")); err != nil { 42 | return err 43 | } 44 | 45 | if len(t.Protocols) > cbg.MaxLength { 46 | return xerrors.Errorf("Slice value in field t.Protocols was too long") 47 | } 48 | 49 | if err := cw.WriteMajorTypeHeader(cbg.MajArray, uint64(len(t.Protocols))); err != nil { 50 | return err 51 | } 52 | for _, v := range t.Protocols { 53 | if err := v.MarshalCBOR(cw); err != nil { 54 | return err 55 | } 56 | } 57 | return nil 58 | } 59 | 60 | func (t *QueryResponse) UnmarshalCBOR(r io.Reader) (err error) { 61 | *t = QueryResponse{} 62 | 63 | cr := cbg.NewCborReader(r) 64 | 65 | maj, extra, err := cr.ReadHeader() 66 | if err != nil { 67 | return err 68 | } 69 | defer func() { 70 | if err == io.EOF { 71 | err = io.ErrUnexpectedEOF 72 | } 73 | }() 74 | 75 | if maj != cbg.MajMap { 76 | return fmt.Errorf("cbor input should be of type map") 77 | } 78 | 79 | if extra > cbg.MaxLength { 80 | return fmt.Errorf("QueryResponse: map struct too large (%d)", extra) 81 | } 82 | 83 | var name string 84 | n := extra 85 | 86 | for i := uint64(0); i < n; i++ { 87 | 88 | { 89 | sval, err := cbg.ReadString(cr) 90 | if err != nil { 91 | return err 92 | } 93 | 94 | name = string(sval) 95 | } 96 | 97 | switch name { 98 | // t.Protocols ([]model.Protocol) (slice) 99 | case "Protocols": 100 | 101 | maj, extra, err = cr.ReadHeader() 102 | if err != nil { 103 | return err 104 | } 105 | 106 | if extra > cbg.MaxLength { 107 | return fmt.Errorf("t.Protocols: array too large (%d)", extra) 108 | } 109 | 110 | if maj != cbg.MajArray { 111 | return fmt.Errorf("expected cbor array") 112 | } 113 | 114 | if extra > 0 { 115 | t.Protocols = make([]Protocol, extra) 116 | } 117 | 118 | for i := 0; i < int(extra); i++ { 119 | 120 | var v Protocol 121 | if err := v.UnmarshalCBOR(cr); err != nil { 122 | return err 123 | } 124 | 125 | t.Protocols[i] = v 126 | } 127 | 128 | default: 129 | // Field doesn't exist on this type, so ignore it 130 | cbg.ScanForLinks(r, func(cid.Cid) {}) 131 | } 132 | } 133 | 134 | return nil 135 | } 136 | -------------------------------------------------------------------------------- /pkg/model/rpc/types.go: -------------------------------------------------------------------------------- 1 | package rpc 2 | 3 | type Deal struct { 4 | Proposal DealProposal 5 | State DealState 6 | } 7 | 8 | type Cid struct { 9 | Root string `json:"/" mapstructure:"/"` 10 | } 11 | 12 | type DealProposal struct { 13 | PieceCID Cid 14 | PieceSize uint64 15 | VerifiedDeal bool 16 | Client string 17 | Provider string 18 | Label string 19 | StartEpoch int32 20 | EndEpoch int32 21 | } 22 | 23 | type DealState struct { 24 | SectorStartEpoch int32 25 | LastUpdatedEpoch int32 26 | SlashEpoch int32 27 | } 28 | -------------------------------------------------------------------------------- /pkg/net/bitswap.go: -------------------------------------------------------------------------------- 1 | package net 2 | 3 | import ( 4 | "context" 5 | "github.com/data-preservation-programs/RetrievalBot/pkg/task" 6 | "github.com/ipfs/go-cid" 7 | "github.com/ipfs/go-datastore" 8 | blockstore "github.com/ipfs/go-ipfs-blockstore" 9 | bsclient "github.com/ipfs/go-libipfs/bitswap/client" 10 | bsmsg "github.com/ipfs/go-libipfs/bitswap/message" 11 | bsnet "github.com/ipfs/go-libipfs/bitswap/network" 12 | "github.com/ipfs/go-libipfs/blocks" 13 | logging "github.com/ipfs/go-log/v2" 14 | "github.com/libp2p/go-libp2p/core/host" 15 | "github.com/libp2p/go-libp2p/core/peer" 16 | "github.com/libp2p/go-libp2p/core/routing" 17 | "github.com/pkg/errors" 18 | "golang.org/x/exp/slices" 19 | "time" 20 | ) 21 | 22 | type SingleContentRouter struct { 23 | AddrInfo peer.AddrInfo 24 | } 25 | 26 | func (s SingleContentRouter) PutValue(context.Context, string, []byte, ...routing.Option) error { 27 | return routing.ErrNotSupported 28 | } 29 | 30 | func (s SingleContentRouter) GetValue(context.Context, string, ...routing.Option) ([]byte, error) { 31 | return nil, routing.ErrNotFound 32 | } 33 | 34 | func (s SingleContentRouter) SearchValue(ctx context.Context, key string, opts ...routing.Option) ( 35 | <-chan []byte, error) { 36 | return nil, routing.ErrNotFound 37 | } 38 | 39 | func (s SingleContentRouter) Provide(context.Context, cid.Cid, bool) error { 40 | return routing.ErrNotSupported 41 | } 42 | 43 | func (s SingleContentRouter) FindProvidersAsync(context.Context, cid.Cid, int) <-chan peer.AddrInfo { 44 | ch := make(chan peer.AddrInfo) 45 | go func() { 46 | ch <- s.AddrInfo 47 | close(ch) 48 | }() 49 | return ch 50 | } 51 | 52 | func (s SingleContentRouter) FindPeer(context.Context, peer.ID) (peer.AddrInfo, error) { 53 | return peer.AddrInfo{}, routing.ErrNotFound 54 | } 55 | 56 | func (s SingleContentRouter) Bootstrap(context.Context) error { 57 | return nil 58 | } 59 | 60 | func (s SingleContentRouter) Close() error { 61 | return nil 62 | } 63 | 64 | type MessageReceiver struct { 65 | BSClient *bsclient.Client 66 | MessageHandler func(ctx context.Context, sender peer.ID, incoming bsmsg.BitSwapMessage) 67 | } 68 | 69 | func (m MessageReceiver) ReceiveMessage( 70 | ctx context.Context, 71 | sender peer.ID, 72 | incoming bsmsg.BitSwapMessage) { 73 | m.BSClient.ReceiveMessage(ctx, sender, incoming) 74 | m.MessageHandler(ctx, sender, incoming) 75 | } 76 | 77 | func (m MessageReceiver) ReceiveError(err error) { 78 | m.BSClient.ReceiveError(err) 79 | } 80 | 81 | func (m MessageReceiver) PeerConnected(id peer.ID) { 82 | m.BSClient.PeerConnected(id) 83 | } 84 | func (m MessageReceiver) PeerDisconnected(id peer.ID) { 85 | m.BSClient.PeerDisconnected(id) 86 | } 87 | 88 | type BitswapClient struct { 89 | host host.Host 90 | timeout time.Duration 91 | } 92 | 93 | func NewBitswapClient(host host.Host, timeout time.Duration) BitswapClient { 94 | return BitswapClient{ 95 | host: host, 96 | timeout: timeout, 97 | } 98 | } 99 | 100 | func (c BitswapClient) Retrieve( 101 | parent context.Context, 102 | target peer.AddrInfo, 103 | cid cid.Cid) (*task.RetrievalResult, error) { 104 | logger := logging.Logger("bitswap_client").With("cid", cid).With("target", target) 105 | network := bsnet.NewFromIpfsHost(c.host, SingleContentRouter{ 106 | AddrInfo: target, 107 | }) 108 | bswap := bsclient.New(parent, network, blockstore.NewBlockstore(datastore.NewMapDatastore())) 109 | notFound := make(chan struct{}) 110 | network.Start(MessageReceiver{BSClient: bswap, MessageHandler: func( 111 | ctx context.Context, sender peer.ID, incoming bsmsg.BitSwapMessage) { 112 | if sender == target.ID && slices.Contains(incoming.DontHaves(), cid) { 113 | logger.Info("Block not found") 114 | close(notFound) 115 | } 116 | }}) 117 | defer bswap.Close() 118 | defer network.Stop() 119 | connectContext, cancel := context.WithTimeout(parent, c.timeout) 120 | defer cancel() 121 | logger.Info("Connecting to target peer...") 122 | err := c.host.Connect(connectContext, target) 123 | if err != nil { 124 | logger.With("err", err).Info("Failed to connect to target peer") 125 | return task.NewErrorRetrievalResultWithErrorResolution(task.CannotConnect, err), nil 126 | } 127 | 128 | startTime := time.Now() 129 | resultChan := make(chan blocks.Block) 130 | errChan := make(chan error) 131 | go func() { 132 | logger.Info("Retrieving block...") 133 | blk, err := bswap.GetBlock(connectContext, cid) 134 | if err != nil { 135 | logger.Info(err) 136 | errChan <- err 137 | } else { 138 | resultChan <- blk 139 | } 140 | }() 141 | select { 142 | case <-notFound: 143 | return task.NewErrorRetrievalResult( 144 | task.NotFound, errors.New("DONT_HAVE received from the target peer")), nil 145 | case blk := <-resultChan: 146 | elapsed := time.Since(startTime) 147 | var size = int64(len(blk.RawData())) 148 | logger.With("size", size).With("elapsed", elapsed).Info("Retrieved block") 149 | return task.NewSuccessfulRetrievalResult(elapsed, size, elapsed), nil 150 | case err := <-errChan: 151 | return task.NewErrorRetrievalResultWithErrorResolution(task.RetrievalFailure, err), nil 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /pkg/net/graphsync.go: -------------------------------------------------------------------------------- 1 | package net 2 | 3 | import ( 4 | "context" 5 | "github.com/data-preservation-programs/RetrievalBot/pkg/task" 6 | datatransfer "github.com/filecoin-project/go-data-transfer/v2" 7 | retrievaltypes "github.com/filecoin-project/go-retrieval-types" 8 | "github.com/filecoin-project/go-state-types/big" 9 | "github.com/filecoin-project/lassie/pkg/net/client" 10 | "github.com/ipfs/go-cid" 11 | "github.com/ipfs/go-datastore" 12 | "github.com/ipfs/go-datastore/sync" 13 | logging "github.com/ipfs/go-log/v2" 14 | cidlink "github.com/ipld/go-ipld-prime/linking/cid" 15 | "github.com/ipld/go-ipld-prime/storage/memstore" 16 | selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" 17 | "github.com/libp2p/go-libp2p/core/host" 18 | "github.com/libp2p/go-libp2p/core/peer" 19 | "github.com/pkg/errors" 20 | "sync/atomic" 21 | "time" 22 | ) 23 | 24 | type GraphsyncClient struct { 25 | host host.Host 26 | timeout time.Duration 27 | counter *TimeCounter 28 | } 29 | 30 | func NewGraphsyncClient(host host.Host, timeout time.Duration) GraphsyncClient { 31 | return GraphsyncClient{ 32 | host: host, 33 | timeout: timeout, 34 | counter: NewTimeCounter(), 35 | } 36 | } 37 | 38 | type TimeCounter struct { 39 | counter uint64 40 | } 41 | 42 | func NewTimeCounter() *TimeCounter { 43 | return &TimeCounter{counter: uint64(time.Now().UnixNano())} 44 | } 45 | 46 | func (tc *TimeCounter) Next() uint64 { 47 | counter := atomic.AddUint64(&tc.counter, 1) 48 | return counter 49 | } 50 | 51 | func (c GraphsyncClient) Retrieve( 52 | parent context.Context, 53 | target peer.AddrInfo, 54 | cid cid.Cid) (*task.RetrievalResult, error) { 55 | logger := logging.Logger("graphsync_client").With("cid", cid, "target", target) 56 | ctx, cancel := context.WithTimeout(parent, c.timeout) 57 | defer cancel() 58 | datastore := sync.MutexWrap(datastore.NewMapDatastore()) 59 | retrievalClient, err := client.NewClient(ctx, datastore, c.host) 60 | if err != nil { 61 | return nil, errors.Wrap(err, "failed to create graphsync retrieval client") 62 | } 63 | if err := retrievalClient.AwaitReady(); err != nil { 64 | return nil, errors.Wrap(err, "failed to wait for graphsync retrieval client to be ready") 65 | } 66 | err = retrievalClient.Connect(ctx, target) 67 | if err != nil { 68 | return task.NewErrorRetrievalResultWithErrorResolution(task.CannotConnect, err), nil 69 | } 70 | 71 | shutDown := make(chan struct{}) 72 | go func() { 73 | time.Sleep(c.timeout) 74 | shutDown <- struct{}{} 75 | }() 76 | 77 | selector := selectorparse.CommonSelector_MatchPoint 78 | params, err := retrievaltypes.NewParamsV1(big.Zero(), 0, 0, selector, nil, big.Zero()) 79 | if err != nil { 80 | return nil, errors.Wrap(err, "failed to create retrieval params") 81 | } 82 | 83 | linkSystem := cidlink.DefaultLinkSystem() 84 | storage := &memstore.Store{} 85 | linkSystem.SetWriteStorage(storage) 86 | linkSystem.SetReadStorage(storage) 87 | 88 | stats, err := retrievalClient.RetrieveFromPeer( 89 | ctx, 90 | linkSystem, 91 | target.ID, 92 | &retrievaltypes.DealProposal{ 93 | PayloadCID: cid, 94 | ID: retrievaltypes.DealID(c.counter.Next()), 95 | Params: params, 96 | }, 97 | selector, 98 | func(event datatransfer.Event, channelState datatransfer.ChannelState) { 99 | logger.With("event", event, "channelState", channelState).Debug("received data transfer event") 100 | }, 101 | shutDown, 102 | ) 103 | 104 | if err != nil { 105 | logger.Info(err) 106 | return task.NewErrorRetrievalResultWithErrorResolution(task.RetrievalFailure, err), nil 107 | } 108 | 109 | return task.NewSuccessfulRetrievalResult(stats.TimeToFirstByte, int64(stats.Size), stats.Duration), nil 110 | } 111 | -------------------------------------------------------------------------------- /pkg/net/host.go: -------------------------------------------------------------------------------- 1 | package net 2 | 3 | import ( 4 | "context" 5 | "github.com/libp2p/go-libp2p" 6 | "github.com/libp2p/go-libp2p/core/host" 7 | "github.com/libp2p/go-libp2p/core/network" 8 | "github.com/libp2p/go-libp2p/p2p/muxer/mplex" 9 | "github.com/libp2p/go-libp2p/p2p/muxer/yamux" 10 | "github.com/libp2p/go-libp2p/p2p/security/noise" 11 | tls "github.com/libp2p/go-libp2p/p2p/security/tls" 12 | quic "github.com/libp2p/go-libp2p/p2p/transport/quic" 13 | "github.com/libp2p/go-libp2p/p2p/transport/tcp" 14 | "github.com/libp2p/go-libp2p/p2p/transport/websocket" 15 | webtransport "github.com/libp2p/go-libp2p/p2p/transport/webtransport" 16 | "github.com/multiformats/go-multiaddr" 17 | ) 18 | 19 | const yamuxID = "/yamux/1.0.0" 20 | const mplexID = "/mplex/6.7.0" 21 | 22 | func InitHost(ctx context.Context, opts []libp2p.Option, listenAddrs ...multiaddr.Multiaddr) (host.Host, error) { 23 | opts = append([]libp2p.Option{ 24 | libp2p.Identity(nil), 25 | libp2p.ResourceManager(&network.NullResourceManager{})}, 26 | opts...) 27 | if len(listenAddrs) > 0 { 28 | opts = append([]libp2p.Option{libp2p.ListenAddrs(listenAddrs...)}, opts...) 29 | } 30 | // add transports 31 | opts = append([]libp2p.Option{ 32 | libp2p.Transport(tcp.NewTCPTransport, tcp.WithMetrics()), 33 | libp2p.Transport(websocket.New), 34 | libp2p.Transport(quic.NewTransport), 35 | libp2p.Transport(webtransport.New)}, 36 | opts...) 37 | // add security 38 | opts = append([]libp2p.Option{ 39 | libp2p.Security(tls.ID, tls.New), 40 | libp2p.Security(noise.ID, noise.New)}, 41 | opts...) 42 | 43 | // add muxers 44 | opts = append([]libp2p.Option{ 45 | libp2p.Muxer(yamuxID, yamuxTransport()), 46 | libp2p.Muxer(mplexID, mplex.DefaultTransport)}, 47 | opts...) 48 | 49 | //nolint:wrapcheck 50 | return libp2p.New(opts...) 51 | } 52 | 53 | func yamuxTransport() network.Multiplexer { 54 | tpt := *yamux.DefaultTransport 55 | tpt.AcceptBacklog = 512 56 | return &tpt 57 | } 58 | -------------------------------------------------------------------------------- /pkg/net/http.go: -------------------------------------------------------------------------------- 1 | package net 2 | 3 | import ( 4 | "context" 5 | "github.com/data-preservation-programs/RetrievalBot/pkg/task" 6 | "github.com/ipfs/go-cid" 7 | logging "github.com/ipfs/go-log/v2" 8 | "github.com/pkg/errors" 9 | "io" 10 | "net/http" 11 | "net/url" 12 | "time" 13 | ) 14 | 15 | type HTTPClient struct { 16 | timeout time.Duration 17 | } 18 | 19 | func NewHTTPClient(timeout time.Duration) HTTPClient { 20 | return HTTPClient{ 21 | timeout: timeout, 22 | } 23 | } 24 | 25 | func (c HTTPClient) RetrievePiece( 26 | parent context.Context, 27 | host string, 28 | cid cid.Cid, 29 | length int64) (*task.RetrievalResult, error) { 30 | logger := logging.Logger("http_client").With("cid", cid, "host", host) 31 | urlStr := host 32 | if urlStr[len(urlStr)-1] != '/' { 33 | urlStr += "/" 34 | } 35 | 36 | urlStr += "piece/" + cid.String() 37 | fileURL, err := url.Parse(urlStr) 38 | if err != nil { 39 | return nil, errors.Wrap(err, "failed to parse url") 40 | } 41 | 42 | client := &http.Client{ 43 | Timeout: c.timeout, 44 | } 45 | 46 | ctx, cancel := context.WithTimeout(parent, c.timeout) 47 | defer cancel() 48 | request, err := http.NewRequestWithContext(ctx, http.MethodGet, fileURL.String(), nil) 49 | 50 | if err != nil { 51 | return nil, errors.Wrap(err, "failed to create request") 52 | } 53 | 54 | startTime := time.Now() 55 | logger.With("URL", fileURL).Info("Sending request to host") 56 | resp, err := client.Do(request) 57 | if err != nil { 58 | return task.NewErrorRetrievalResultWithErrorResolution(task.CannotConnect, err), nil 59 | } 60 | 61 | fbTime := time.Since(startTime) 62 | 63 | defer resp.Body.Close() 64 | logger.With("status", resp.Status, "header", resp.Header).Info("Received response from host") 65 | if resp.StatusCode == http.StatusNotFound { 66 | return task.NewErrorRetrievalResultWithErrorResolution( 67 | task.NotFound, errors.Errorf("status code: %d", resp.StatusCode)), nil 68 | } 69 | 70 | if resp.StatusCode > 299 { 71 | return task.NewErrorRetrievalResultWithErrorResolution( 72 | task.RetrievalFailure, errors.Errorf("status code: %d", resp.StatusCode)), nil 73 | } 74 | 75 | downloaded, err := io.CopyN(io.Discard, resp.Body, length) 76 | if err != nil { 77 | logger.Info(err) 78 | return task.NewErrorRetrievalResultWithErrorResolution(task.RetrievalFailure, err), nil 79 | } 80 | 81 | elapsed := time.Since(startTime) 82 | return task.NewSuccessfulRetrievalResult(fbTime, downloaded, elapsed), nil 83 | } 84 | -------------------------------------------------------------------------------- /pkg/process/process_manager.go: -------------------------------------------------------------------------------- 1 | package process 2 | 3 | import ( 4 | "context" 5 | "github.com/data-preservation-programs/RetrievalBot/pkg/env" 6 | "github.com/data-preservation-programs/RetrievalBot/pkg/resolver" 7 | "github.com/google/uuid" 8 | logging "github.com/ipfs/go-log/v2" 9 | "github.com/pkg/errors" 10 | "os" 11 | "os/exec" 12 | "path/filepath" 13 | "strings" 14 | "time" 15 | ) 16 | 17 | type ProcessManager struct { 18 | concurrency map[string]int 19 | errorInterval time.Duration 20 | } 21 | 22 | func (p ProcessManager) Run(ctx context.Context) { 23 | for module, concurrency := range p.concurrency { 24 | for i := 0; i < concurrency; i++ { 25 | module := module 26 | logger := logging.Logger("process-manager").With("module", module) 27 | go func() { 28 | for { 29 | cmd := exec.CommandContext(ctx, module) 30 | cmd.Env = os.Environ() 31 | label := "correlation_id=" + uuid.New().String() 32 | if os.Getenv("GOLOG_LOG_LABELS") != "" { 33 | label = label + os.Getenv("GOLOG_LOG_LABELS") + "," + label 34 | } 35 | cmd.Env = append(cmd.Env, "GOLOG_LOG_LABELS="+label) 36 | cmd.Stdout = os.Stdout 37 | cmd.Stderr = os.Stderr 38 | logger.Debug("Spawning new process") 39 | err := cmd.Run() 40 | if errors.Is(err, context.Canceled) { 41 | logger.With("err", err).Infof("Process %s canceled", module) 42 | return 43 | } 44 | if err != nil { 45 | logger.With("err", err). 46 | Errorf("Process %s failed. Waiting for %f Seconds", module, p.errorInterval.Seconds()) 47 | time.Sleep(p.errorInterval) 48 | } 49 | } 50 | }() 51 | } 52 | } 53 | 54 | <-ctx.Done() 55 | } 56 | 57 | func NewProcessManager() (*ProcessManager, error) { 58 | logger := logging.Logger("process-manager") 59 | // Setup all worker 60 | concurrency := make(map[string]int) 61 | modules := strings.Split(env.GetRequiredString(env.ProcessModules), ",") 62 | for _, module := range modules { 63 | path, err := exec.LookPath(module) 64 | if err != nil { 65 | return nil, errors.Wrapf(err, "failed to find module %s", module) 66 | } 67 | 68 | moduleName := strings.ToUpper(strings.Split(filepath.Base(path), ".")[0]) 69 | logger.Infof("Found module %s at %s. Looking for CONCURRENCY_%s now.", module, path, moduleName) 70 | 71 | concurrencyNumber := env.GetInt(env.Key("CONCURRENCY_"+moduleName), 1) 72 | concurrency[path] = concurrencyNumber 73 | } 74 | 75 | // Check public IP address 76 | ipInfo, err := resolver.GetPublicIPInfo(context.TODO(), "", "") 77 | if err != nil { 78 | return nil, errors.Wrap(err, "failed to get public IP info") 79 | } 80 | 81 | logger.With("ipinfo", ipInfo).Infof("Public IP info retrieved") 82 | 83 | env.MustSet(env.PublicIP, ipInfo.IP) 84 | env.MustSet(env.City, ipInfo.City) 85 | env.MustSet(env.Region, ipInfo.Region) 86 | env.MustSet(env.Country, ipInfo.Country) 87 | env.MustSet(env.Continent, ipInfo.Continent) 88 | env.MustSet(env.ASN, ipInfo.ASN) 89 | env.MustSet(env.ISP, ipInfo.ISP) 90 | env.MustSetAny(env.Latitude, ipInfo.Latitude) 91 | env.MustSetAny(env.Longitude, ipInfo.Longitude) 92 | errorInterval := env.GetDuration(env.ProcessErrorInterval, 5*time.Second) 93 | 94 | return &ProcessManager{ 95 | concurrency, 96 | errorInterval, 97 | }, nil 98 | } 99 | -------------------------------------------------------------------------------- /pkg/requesterror/error.go: -------------------------------------------------------------------------------- 1 | package requesterror 2 | 3 | import ( 4 | "fmt" 5 | "github.com/libp2p/go-libp2p/core/peer" 6 | ) 7 | 8 | type InvalidIPError struct { 9 | IP string 10 | } 11 | 12 | type BogonIPError struct { 13 | IP string 14 | } 15 | 16 | type HostLookupError struct { 17 | Host string 18 | Err error 19 | } 20 | 21 | type CannotConnectError struct { 22 | PeerID peer.ID 23 | Err error 24 | } 25 | 26 | type StreamError struct { 27 | Err error 28 | } 29 | 30 | type NoValidMultiAddrError struct { 31 | } 32 | 33 | func (e NoValidMultiAddrError) Error() string { 34 | return "no valid multiaddr" 35 | } 36 | 37 | func (e HostLookupError) Error() string { 38 | return fmt.Sprintf("failed to lookup host %s: %s", e.Host, e.Err) 39 | } 40 | 41 | func (e InvalidIPError) Error() string { 42 | return fmt.Sprintf("invalid IP: %s", e.IP) 43 | } 44 | 45 | func (e BogonIPError) Error() string { 46 | return fmt.Sprintf("bogon IP: %s", e.IP) 47 | } 48 | 49 | func (e CannotConnectError) Error() string { 50 | return fmt.Sprintf("failed to connect to peer %s: %s", e.PeerID.String(), e.Err) 51 | } 52 | 53 | func (e StreamError) Error() string { 54 | return fmt.Sprintf("failed to get supported protocols from peer: %s", e.Err) 55 | } 56 | -------------------------------------------------------------------------------- /pkg/resolver/location_resolver.go: -------------------------------------------------------------------------------- 1 | package resolver 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "net" 7 | "net/http" 8 | "os" 9 | "strconv" 10 | "strings" 11 | "time" 12 | 13 | "github.com/data-preservation-programs/RetrievalBot/pkg/convert" 14 | "github.com/data-preservation-programs/RetrievalBot/pkg/requesterror" 15 | "github.com/data-preservation-programs/RetrievalBot/pkg/resources" 16 | "github.com/filecoin-project/go-state-types/abi" 17 | logging "github.com/ipfs/go-log/v2" 18 | "github.com/jellydator/ttlcache/v3" 19 | "github.com/multiformats/go-multiaddr" 20 | "github.com/pkg/errors" 21 | "golang.org/x/exp/slices" 22 | ) 23 | 24 | //nolint:gochecknoglobals 25 | var countryMapping = make(map[string]string) 26 | var _ = json.Unmarshal(resources.CountryToContinentJSON, &countryMapping) 27 | 28 | type IPInfo struct { 29 | IP string `json:"ip"` 30 | City string `json:"city"` 31 | Region string `json:"region"` 32 | Country string `json:"country"` 33 | Continent string `json:"continent"` 34 | Loc string `json:"loc"` 35 | Org string `json:"org"` 36 | Postal string `json:"postal"` 37 | Timezone string `json:"timezone"` 38 | Bogon bool `json:"bogon"` 39 | Latitude float32 40 | Longitude float32 41 | ASN string 42 | ISP string 43 | } 44 | 45 | func (i *IPInfo) Resolve() { 46 | loc := strings.Split(i.Loc, ",") 47 | //nolint:gomnd 48 | if len(loc) == 2 { 49 | if lat, err := strconv.ParseFloat(loc[0], 32); err == nil { 50 | i.Latitude = float32(lat) 51 | } 52 | if long, err := strconv.ParseFloat(loc[1], 32); err == nil { 53 | i.Longitude = float32(long) 54 | } 55 | } 56 | 57 | //nolint:gomnd 58 | org := strings.SplitN(i.Org, " ", 2) 59 | if len(org) == 2 { 60 | i.ASN = org[0] 61 | i.ISP = org[1] 62 | } 63 | } 64 | 65 | func GetPublicIPInfo(ctx context.Context, ip string, token string) (IPInfo, error) { 66 | logger := logging.Logger("location_resolver") 67 | url, exists := os.LookupEnv("IPINFO_URL") 68 | if !exists { 69 | url = "https://ipinfo.io/" 70 | } 71 | if ip != "" { 72 | url = url + ip + "/json" 73 | } 74 | 75 | if token != "" { 76 | url = url + "?token=" + token 77 | } 78 | 79 | logger.Debugf("Getting IP info for %s", ip) 80 | request, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) 81 | if err != nil { 82 | return IPInfo{}, errors.Wrap(err, "failed to create http request") 83 | } 84 | 85 | client := &http.Client{} 86 | resp, err := client.Do(request) 87 | if err != nil { 88 | return IPInfo{}, errors.Wrap(err, "failed to get IP info") 89 | } 90 | defer resp.Body.Close() 91 | 92 | if resp.StatusCode != http.StatusOK { 93 | return IPInfo{}, errors.New("failed to get IP info: " + resp.Status) 94 | } 95 | 96 | var ipInfo IPInfo 97 | err = json.NewDecoder(resp.Body).Decode(&ipInfo) 98 | if err != nil { 99 | return IPInfo{}, errors.Wrap(err, "failed to decode IP info") 100 | } 101 | 102 | if ipInfo.Bogon { 103 | return IPInfo{}, requesterror.BogonIPError{IP: ip} 104 | } 105 | 106 | ipInfo.Resolve() 107 | 108 | if continent, ok := countryMapping[ipInfo.Country]; ok { 109 | ipInfo.Continent = continent 110 | } else { 111 | logger.Error("Unknown country: " + ipInfo.Country) 112 | return IPInfo{}, errors.New("unknown country: " + ipInfo.Country) 113 | } 114 | 115 | logger.Debugf("Got IP info for %s: %+v", ip, ipInfo) 116 | return ipInfo, nil 117 | } 118 | 119 | type LocationResolver struct { 120 | cache *ttlcache.Cache[string, IPInfo] 121 | ipInfoToken string 122 | } 123 | 124 | func NewLocationResolver(ipInfoToken string, ttl time.Duration) LocationResolver { 125 | cache := ttlcache.New[string, IPInfo]( 126 | //nolint:gomnd 127 | ttlcache.WithTTL[string, IPInfo](ttl), 128 | ttlcache.WithDisableTouchOnHit[string, IPInfo]()) 129 | return LocationResolver{ 130 | cache, 131 | ipInfoToken, 132 | } 133 | } 134 | 135 | func (l LocationResolver) ResolveIP(ctx context.Context, ip net.IP) (IPInfo, error) { 136 | ipString := ip.String() 137 | if ipInfo := l.cache.Get(ipString); ipInfo != nil && !ipInfo.IsExpired() { 138 | return ipInfo.Value(), nil 139 | } 140 | 141 | ipInfo, err := GetPublicIPInfo(ctx, ipString, l.ipInfoToken) 142 | if err != nil { 143 | return IPInfo{}, errors.Wrap(err, "failed to get IP info") 144 | } 145 | 146 | l.cache.Set(ipString, ipInfo, ttlcache.DefaultTTL) 147 | return ipInfo, nil 148 | } 149 | 150 | func (l LocationResolver) ResolveIPStr(ctx context.Context, ip string) (IPInfo, error) { 151 | parsed := net.ParseIP(ip) 152 | if parsed == nil { 153 | return IPInfo{}, requesterror.InvalidIPError{IP: ip} 154 | } 155 | 156 | return l.ResolveIP(ctx, parsed) 157 | } 158 | 159 | func (l LocationResolver) ResolveMultiaddr(ctx context.Context, addr multiaddr.Multiaddr) (IPInfo, error) { 160 | host, isHostName, _, err := DecodeMultiaddr(addr) 161 | if err != nil { 162 | return IPInfo{}, errors.Wrap(err, "failed to decode multiaddr") 163 | } 164 | 165 | if isHostName { 166 | ips, err := net.LookupIP(host) 167 | if err != nil { 168 | return IPInfo{}, requesterror.HostLookupError{Host: host, Err: err} 169 | } 170 | 171 | host = ips[0].String() 172 | } 173 | 174 | return l.ResolveIPStr(ctx, host) 175 | } 176 | 177 | func (l LocationResolver) ResolveMultiaddrsBytes(ctx context.Context, bytesAddrs []abi.Multiaddrs) (IPInfo, error) { 178 | return l.ResolveMultiaddrs(ctx, convert.AbiToMultiaddrsSkippingError(bytesAddrs)) 179 | } 180 | 181 | func (l LocationResolver) ResolveMultiaddrs(ctx context.Context, addrs []multiaddr.Multiaddr) (IPInfo, error) { 182 | var lastErr error 183 | logger := logging.Logger("location_resolver") 184 | for _, addr := range addrs { 185 | ipInfo, err := l.ResolveMultiaddr(ctx, addr) 186 | if err != nil { 187 | lastErr = err 188 | logger.With("err", err).Debugf("Failed to resolve multiaddr %s", addr) 189 | continue 190 | } 191 | 192 | return ipInfo, nil 193 | } 194 | 195 | if lastErr != nil { 196 | return IPInfo{}, lastErr 197 | } 198 | 199 | return IPInfo{}, requesterror.NoValidMultiAddrError{} 200 | } 201 | 202 | type IsHostName = bool 203 | type PortNumber = int 204 | type IPOrHost = string 205 | 206 | func DecodeMultiaddr(addr multiaddr.Multiaddr) (IPOrHost, IsHostName, PortNumber, error) { 207 | protocols := addr.Protocols() 208 | isHostName := false 209 | const expectedProtocolCount = 2 210 | 211 | if len(protocols) != expectedProtocolCount { 212 | return "", false, 0, errors.New("multiaddr does not contain two protocols") 213 | } 214 | 215 | if !slices.Contains( 216 | []int{ 217 | multiaddr.P_IP4, multiaddr.P_IP6, 218 | multiaddr.P_DNS4, multiaddr.P_DNS6, 219 | multiaddr.P_DNS, multiaddr.P_DNSADDR, 220 | }, protocols[0].Code, 221 | ) { 222 | return "", false, 0, errors.New("multiaddr does not contain a valid ip or dns protocol") 223 | } 224 | 225 | if slices.Contains( 226 | []int{ 227 | multiaddr.P_DNS, multiaddr.P_DNSADDR, 228 | multiaddr.P_DNS4, multiaddr.P_DNS6, 229 | }, protocols[0].Code, 230 | ) { 231 | isHostName = true 232 | } 233 | 234 | if protocols[1].Code != multiaddr.P_TCP { 235 | return "", false, 0, errors.New("multiaddr does not contain a valid tcp protocol") 236 | } 237 | 238 | splitted := multiaddr.Split(addr) 239 | 240 | component0, ok := splitted[0].(*multiaddr.Component) 241 | if !ok { 242 | return "", false, 0, errors.New("failed to cast component") 243 | } 244 | 245 | host := component0.Value() 246 | 247 | component1, ok := splitted[1].(*multiaddr.Component) 248 | if !ok { 249 | return "", false, 0, errors.New("failed to cast component") 250 | } 251 | 252 | port, err := strconv.Atoi(component1.Value()) 253 | if err != nil { 254 | return "", false, 0, errors.Wrap(err, "failed to parse port") 255 | } 256 | 257 | return host, isHostName, port, nil 258 | } 259 | -------------------------------------------------------------------------------- /pkg/resolver/protocol_provider.go: -------------------------------------------------------------------------------- 1 | package resolver 2 | 3 | import ( 4 | "context" 5 | "github.com/data-preservation-programs/RetrievalBot/pkg/model" 6 | "github.com/data-preservation-programs/RetrievalBot/pkg/requesterror" 7 | cborutil "github.com/filecoin-project/go-cbor-util" 8 | "github.com/filecoin-project/go-state-types/abi" 9 | "github.com/libp2p/go-libp2p/core/peerstore" 10 | "github.com/libp2p/go-libp2p/core/protocol" 11 | "github.com/multiformats/go-multistream" 12 | "golang.org/x/exp/slices" 13 | "time" 14 | 15 | "github.com/libp2p/go-libp2p/core/host" 16 | "github.com/libp2p/go-libp2p/core/peer" 17 | "github.com/pkg/errors" 18 | ) 19 | 20 | const RetrievalProtocolName = "/fil/retrieval/transports/1.0.0" 21 | 22 | type ProtocolProvider struct { 23 | host host.Host 24 | timeout time.Duration 25 | } 26 | 27 | func ProtocolResolver(host host.Host, timeout time.Duration) ProtocolProvider { 28 | return ProtocolProvider{ 29 | host: host, 30 | timeout: timeout, 31 | } 32 | } 33 | 34 | func (p ProtocolProvider) IsBoostProvider(ctx context.Context, minerInfo peer.AddrInfo) (bool, error) { 35 | protocols, err := p.getLibp2pProtocols(ctx, minerInfo) 36 | if err != nil { 37 | return false, errors.Wrap(err, "failed to get libp2p protocols") 38 | } 39 | 40 | return slices.Contains(protocols, RetrievalProtocolName), nil 41 | } 42 | 43 | func (p ProtocolProvider) getLibp2pProtocols( 44 | parent context.Context, 45 | minerInfo peer.AddrInfo) ([]protocol.ID, error) { 46 | ctx, cancel := context.WithTimeout(parent, p.timeout) 47 | defer cancel() 48 | p.host.Peerstore().AddAddrs(minerInfo.ID, minerInfo.Addrs, peerstore.PermanentAddrTTL) 49 | if err := p.host.Connect(ctx, minerInfo); err != nil { 50 | return nil, requesterror.CannotConnectError{ 51 | PeerID: minerInfo.ID, 52 | Err: err, 53 | } 54 | } 55 | 56 | protocols, err := p.host.Peerstore().GetProtocols(minerInfo.ID) 57 | if err != nil { 58 | return nil, requesterror.StreamError{ 59 | Err: err, 60 | } 61 | } 62 | 63 | return protocols, nil 64 | } 65 | 66 | func (p ProtocolProvider) GetRetrievalProtocols( 67 | parent context.Context, 68 | minerInfo peer.AddrInfo, 69 | ) ([]model.Protocol, error) { 70 | ctx, cancel := context.WithTimeout(parent, p.timeout) 71 | defer cancel() 72 | 73 | if err := p.host.Connect(ctx, minerInfo); err != nil { 74 | return nil, requesterror.CannotConnectError{ 75 | PeerID: minerInfo.ID, 76 | Err: err, 77 | } 78 | } 79 | 80 | stream, err := p.host.NewStream(ctx, minerInfo.ID, RetrievalProtocolName) 81 | if errors.Is(err, multistream.ErrNotSupported[protocol.ID]{}) { 82 | addrs := make([]abi.Multiaddrs, len(minerInfo.Addrs)) 83 | for i, addr := range minerInfo.Addrs { 84 | addrs[i] = addr.Bytes() 85 | } 86 | return []model.Protocol{ 87 | { 88 | Name: "libp2p", 89 | Addresses: addrs, 90 | }, 91 | }, nil 92 | } 93 | 94 | if err != nil { 95 | return nil, requesterror.StreamError{ 96 | Err: err, 97 | } 98 | } 99 | 100 | //nolint: errcheck 101 | defer stream.Close() 102 | 103 | _ = stream.SetReadDeadline(time.Now().Add(p.timeout)) 104 | //nolint: errcheck 105 | defer stream.SetReadDeadline(time.Time{}) 106 | 107 | queryResponse := new(model.QueryResponse) 108 | err = cborutil.ReadCborRPC(stream, queryResponse) 109 | if err != nil { 110 | return nil, requesterror.StreamError{ 111 | Err: err, 112 | } 113 | } 114 | 115 | return queryResponse.Protocols, nil 116 | } 117 | -------------------------------------------------------------------------------- /pkg/resolver/provider_resolver.go: -------------------------------------------------------------------------------- 1 | package resolver 2 | 3 | import ( 4 | "context" 5 | "encoding/base64" 6 | "time" 7 | 8 | "github.com/filecoin-project/go-state-types/abi" 9 | logging "github.com/ipfs/go-log/v2" 10 | "github.com/jellydator/ttlcache/v3" 11 | "github.com/pkg/errors" 12 | "github.com/ybbus/jsonrpc/v3" 13 | ) 14 | 15 | type ProviderResolver struct { 16 | cache *ttlcache.Cache[string, MinerInfo] 17 | lotusClient jsonrpc.RPCClient 18 | } 19 | 20 | type MinerInfo struct { 21 | //nolint:stylecheck 22 | PeerId string 23 | //nolint:tagliatelle 24 | MultiaddrsBase64Encoded []string `json:"Multiaddrs"` 25 | Multiaddrs []abi.Multiaddrs 26 | } 27 | 28 | func NewProviderResolver(url string, token string, ttl time.Duration) (*ProviderResolver, error) { 29 | cache := ttlcache.New[string, MinerInfo]( 30 | //nolint:gomnd 31 | ttlcache.WithTTL[string, MinerInfo](ttl), 32 | ttlcache.WithDisableTouchOnHit[string, MinerInfo]()) 33 | var lotusClient jsonrpc.RPCClient 34 | if token == "" { 35 | lotusClient = jsonrpc.NewClient(url) 36 | } else { 37 | lotusClient = jsonrpc.NewClientWithOpts(url, &jsonrpc.RPCClientOpts{ 38 | CustomHeaders: map[string]string{ 39 | "Authorization": "Bearer " + token, 40 | }, 41 | }) 42 | } 43 | return &ProviderResolver{ 44 | cache: cache, 45 | lotusClient: lotusClient, 46 | }, nil 47 | } 48 | 49 | func (p *ProviderResolver) ResolveProvider(ctx context.Context, provider string) (MinerInfo, error) { 50 | logger := logging.Logger("location_resolver") 51 | if minerInfo := p.cache.Get(provider); minerInfo != nil && !minerInfo.IsExpired() { 52 | return minerInfo.Value(), nil 53 | } 54 | 55 | logger.With("provider", provider).Debug("Getting miner info") 56 | minerInfo := new(MinerInfo) 57 | err := p.lotusClient.CallFor(ctx, minerInfo, "Filecoin.StateMinerInfo", provider, nil) 58 | if err != nil { 59 | return MinerInfo{}, errors.Wrap(err, "failed to get miner info") 60 | } 61 | 62 | logger.With("provider", provider, "minerinfo", minerInfo).Debug("Got miner info") 63 | minerInfo.Multiaddrs = make([]abi.Multiaddrs, len(minerInfo.MultiaddrsBase64Encoded)) 64 | for i, multiaddr := range minerInfo.MultiaddrsBase64Encoded { 65 | decoded, err := base64.StdEncoding.DecodeString(multiaddr) 66 | if err != nil { 67 | return MinerInfo{}, errors.Wrap(err, "failed to decode multiaddr") 68 | } 69 | minerInfo.Multiaddrs[i] = decoded 70 | } 71 | p.cache.Set(provider, *minerInfo, ttlcache.DefaultTTL) 72 | 73 | return *minerInfo, nil 74 | } 75 | -------------------------------------------------------------------------------- /pkg/resources/country-to-continent.json: -------------------------------------------------------------------------------- 1 | {"BD": "AS", "BE": "EU", "BF": "AF", "BG": "EU", "BA": "EU", "BB": "NA", "WF": "OC", "BL": "NA", "BM": "NA", "BN": "AS", "BO": "SA", "BH": "AS", "BI": "AF", "BJ": "AF", "BT": "AS", "JM": "NA", "BV": "AN", "BW": "AF", "WS": "OC", "BQ": "NA", "BR": "SA", "BS": "NA", "JE": "EU", "BY": "EU", "BZ": "NA", "RU": "EU", "RW": "AF", "RS": "EU", "TL": "OC", "RE": "AF", "TM": "AS", "TJ": "AS", "RO": "EU", "TK": "OC", "GW": "AF", "GU": "OC", "GT": "NA", "GS": "AN", "GR": "EU", "GQ": "AF", "GP": "NA", "JP": "AS", "GY": "SA", "GG": "EU", "GF": "SA", "GE": "AS", "GD": "NA", "GB": "EU", "GA": "AF", "SV": "NA", "GN": "AF", "GM": "AF", "GL": "NA", "GI": "EU", "GH": "AF", "OM": "AS", "TN": "AF", "JO": "AS", "HR": "EU", "HT": "NA", "HU": "EU", "HK": "AS", "HN": "NA", "HM": "AN", "VE": "SA", "PR": "NA", "PS": "AS", "PW": "OC", "PT": "EU", "SJ": "EU", "PY": "SA", "IQ": "AS", "PA": "NA", "PF": "OC", "PG": "OC", "PE": "SA", "PK": "AS", "PH": "AS", "PN": "OC", "PL": "EU", "PM": "NA", "ZM": "AF", "EH": "AF", "EE": "EU", "EG": "AF", "ZA": "AF", "EC": "SA", "IT": "EU", "VN": "AS", "SB": "OC", "ET": "AF", "SO": "AF", "ZW": "AF", "SA": "AS", "ES": "EU", "ER": "AF", "ME": "EU", "MD": "EU", "MG": "AF", "MF": "NA", "MA": "AF", "MC": "EU", "UZ": "AS", "MM": "AS", "ML": "AF", "MO": "AS", "MN": "AS", "MH": "OC", "MK": "EU", "MU": "AF", "MT": "EU", "MW": "AF", "MV": "AS", "MQ": "NA", "MP": "OC", "MS": "NA", "MR": "AF", "IM": "EU", "UG": "AF", "TZ": "AF", "MY": "AS", "MX": "NA", "IL": "AS", "FR": "EU", "IO": "AS", "SH": "AF", "FI": "EU", "FJ": "OC", "FK": "SA", "FM": "OC", "FO": "EU", "NI": "NA", "NL": "EU", "NO": "EU", "NA": "AF", "VU": "OC", "NC": "OC", "NE": "AF", "NF": "OC", "NG": "AF", "NZ": "OC", "NP": "AS", "NR": "OC", "NU": "OC", "CK": "OC", "XK": "EU", "CI": "AF", "CH": "EU", "CO": "SA", "CN": "AS", "CM": "AF", "CL": "SA", "CC": "AS", "CA": "NA", "CG": "AF", "CF": "AF", "CD": "AF", "CZ": "EU", "CY": "EU", "CX": "AS", "CR": "NA", "CW": "NA", "CV": "AF", "CU": "NA", "SZ": "AF", "SY": "AS", "SX": "NA", "KG": "AS", "KE": "AF", "SS": "AF", "SR": "SA", "KI": "OC", "KH": "AS", "KN": "NA", "KM": "AF", "ST": "AF", "SK": "EU", "KR": "AS", "SI": "EU", "KP": "AS", "KW": "AS", "SN": "AF", "SM": "EU", "SL": "AF", "SC": "AF", "KZ": "AS", "KY": "NA", "SG": "AS", "SE": "EU", "SD": "AF", "DO": "NA", "DM": "NA", "DJ": "AF", "DK": "EU", "VG": "NA", "DE": "EU", "YE": "AS", "DZ": "AF", "US": "NA", "UY": "SA", "YT": "AF", "UM": "OC", "LB": "AS", "LC": "NA", "LA": "AS", "TV": "OC", "TW": "AS", "TT": "NA", "TR": "AS", "LK": "AS", "LI": "EU", "LV": "EU", "TO": "OC", "LT": "EU", "LU": "EU", "LR": "AF", "LS": "AF", "TH": "AS", "TF": "AN", "TG": "AF", "TD": "AF", "TC": "NA", "LY": "AF", "VA": "EU", "VC": "NA", "AE": "AS", "AD": "EU", "AG": "NA", "AF": "AS", "AI": "NA", "VI": "NA", "IS": "EU", "IR": "AS", "AM": "AS", "AL": "EU", "AO": "AF", "AQ": "AN", "AS": "OC", "AR": "SA", "AU": "OC", "AT": "EU", "AW": "NA", "IN": "AS", "AX": "EU", "AZ": "AS", "IE": "EU", "ID": "AS", "UA": "EU", "QA": "AS", "MZ": "AF"} 2 | -------------------------------------------------------------------------------- /pkg/resources/static.go: -------------------------------------------------------------------------------- 1 | package resources 2 | 3 | import _ "embed" 4 | 5 | //go:embed country-to-continent.json 6 | var CountryToContinentJSON []byte 7 | -------------------------------------------------------------------------------- /pkg/task/errors.go: -------------------------------------------------------------------------------- 1 | package task 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "github.com/data-preservation-programs/RetrievalBot/pkg/requesterror" 7 | "strings" 8 | ) 9 | 10 | type ErrorCode string 11 | 12 | const ( 13 | ErrorCodeNone ErrorCode = "" 14 | InvalidPeerID ErrorCode = "invalid_peerid" 15 | NoValidMultiAddrs ErrorCode = "no_valid_multiaddrs" 16 | CannotConnect ErrorCode = "cannot_connect" 17 | NotFound ErrorCode = "not_found" 18 | RetrievalFailure ErrorCode = "retrieval_failure" 19 | ProtocolNotSupported ErrorCode = "protocol_not_supported" 20 | Timeout ErrorCode = "timeout" 21 | DealRejectedPricePerByteTooLow ErrorCode = "deal_rejected_price_per_byte_too_low" 22 | DealRejectedUnsealPriceTooLow ErrorCode = "deal_rejected_unseal_price_too_low" 23 | Throttled ErrorCode = "throttled" 24 | NoAccess ErrorCode = "no_access" 25 | UnderMaintenance ErrorCode = "under_maintenance" 26 | NotOnline ErrorCode = "not_online" 27 | UnconfirmedBlockTransfer ErrorCode = "unconfirmed_block_transfer" 28 | CIDCodecNotSupported ErrorCode = "cid_codec_not_supported" 29 | ResponseRejected ErrorCode = "response_rejected" 30 | DealStateMissing ErrorCode = "deal_state_missing" 31 | ) 32 | 33 | var errorStringMap = map[string]ErrorCode{ 34 | "Price per byte too low": DealRejectedPricePerByteTooLow, 35 | "Unseal price too small": DealRejectedUnsealPriceTooLow, 36 | "Too many retrieval deals received": Throttled, 37 | "Access Control": NoAccess, 38 | "Under maintenance, retry later": UnderMaintenance, 39 | "miner is not accepting online retrieval deals": NotOnline, 40 | "unconfirmed block transfer": UnconfirmedBlockTransfer, 41 | "no decoder registered for multicodec code": CIDCodecNotSupported, 42 | "not found": NotFound, 43 | "response rejected": ResponseRejected, 44 | "failed to fetch storage deal state": DealStateMissing, 45 | "there is no unsealed piece containing payload cid": NotFound, 46 | } 47 | 48 | func resolveError(err error) ErrorCode { 49 | if errors.Is(err, context.DeadlineExceeded) { 50 | return Timeout 51 | } 52 | 53 | if errors.As(err, &requesterror.CannotConnectError{}) { 54 | return CannotConnect 55 | } 56 | 57 | if errors.As(err, &requesterror.InvalidIPError{}) || 58 | errors.As(err, &requesterror.BogonIPError{}) || 59 | errors.As(err, &requesterror.NoValidMultiAddrError{}) || 60 | errors.As(err, &requesterror.HostLookupError{}) { 61 | return NoValidMultiAddrs 62 | } 63 | 64 | if errors.As(err, &requesterror.StreamError{}) { 65 | return RetrievalFailure 66 | } 67 | 68 | for s, code := range errorStringMap { 69 | if strings.Contains(err.Error(), s) { 70 | return code 71 | } 72 | } 73 | 74 | return ErrorCodeNone 75 | } 76 | 77 | func resolveErrorResult(err error) *RetrievalResult { 78 | code := resolveError(err) 79 | if code == ErrorCodeNone { 80 | return nil 81 | } 82 | 83 | return NewErrorRetrievalResult(code, err) 84 | } 85 | -------------------------------------------------------------------------------- /pkg/task/errors_test.go: -------------------------------------------------------------------------------- 1 | package task 2 | 3 | import ( 4 | "github.com/data-preservation-programs/RetrievalBot/pkg/requesterror" 5 | "github.com/pkg/errors" 6 | "github.com/stretchr/testify/assert" 7 | "testing" 8 | ) 9 | 10 | func TestResolveError(t *testing.T) { 11 | err := errors.New("cannot dial") 12 | err = requesterror.CannotConnectError{Err: err} 13 | err = errors.Wrap(err, "failed to check if provider is boost") 14 | result := resolveErrorResult(err) 15 | assert.NotNil(t, result) 16 | } 17 | -------------------------------------------------------------------------------- /pkg/task/task.go: -------------------------------------------------------------------------------- 1 | package task 2 | 3 | import ( 4 | "github.com/data-preservation-programs/RetrievalBot/pkg/convert" 5 | "github.com/libp2p/go-libp2p/core/peer" 6 | "github.com/pkg/errors" 7 | "time" 8 | ) 9 | 10 | type Provider struct { 11 | // In case of attempting retrieval from any miner, this field will be empty 12 | ID string `bson:"id"` 13 | PeerID string `bson:"peer_id,omitempty"` 14 | Multiaddrs []string `bson:"multiaddrs,omitempty"` 15 | City string `bson:"city,omitempty"` 16 | Region string `bson:"region,omitempty"` 17 | Country string `bson:"country,omitempty"` 18 | Continent string `bson:"continent,omitempty"` 19 | } 20 | 21 | func (p Provider) GetPeerAddr() (peer.AddrInfo, error) { 22 | peerID, err := peer.Decode(p.PeerID) 23 | if err != nil { 24 | return peer.AddrInfo{}, errors.Wrap(err, "failed to decode peer id") 25 | } 26 | 27 | addrs, err := convert.StringArrayToMultiaddrs(p.Multiaddrs) 28 | if err != nil { 29 | return peer.AddrInfo{}, errors.Wrap(err, "failed to convert multiaddrs") 30 | } 31 | 32 | return peer.AddrInfo{ 33 | ID: peerID, 34 | Addrs: addrs, 35 | }, nil 36 | } 37 | 38 | type ModuleName string 39 | 40 | const ( 41 | Stub ModuleName = "stub" 42 | GraphSync ModuleName = "graphsync" 43 | HTTP ModuleName = "http" 44 | Bitswap ModuleName = "bitswap" 45 | ) 46 | 47 | type Content struct { 48 | CID string `bson:"cid"` 49 | } 50 | 51 | type Task struct { 52 | Requester string `bson:"requester"` 53 | Module ModuleName `bson:"module"` 54 | Metadata map[string]string `bson:"metadata,omitempty"` 55 | Provider Provider `bson:"provider"` 56 | Content Content `bson:"content"` 57 | Timeout time.Duration `bson:"timeout,omitempty"` 58 | CreatedAt time.Time `bson:"created_at"` 59 | } 60 | -------------------------------------------------------------------------------- /pkg/task/task_result.go: -------------------------------------------------------------------------------- 1 | package task 2 | 3 | import "time" 4 | 5 | type Retriever struct { 6 | PublicIP string `bson:"ip"` 7 | City string `bson:"city"` 8 | Region string `bson:"region"` 9 | Country string `bson:"country"` 10 | Continent string `bson:"continent"` 11 | ASN string `bson:"asn"` 12 | ISP string `bson:"isp"` 13 | Latitude float32 `bson:"lat"` 14 | Longitude float32 `bson:"long"` 15 | } 16 | 17 | func NewErrorRetrievalResult(code ErrorCode, err error) *RetrievalResult { 18 | return &RetrievalResult{ 19 | Success: false, 20 | ErrorCode: code, 21 | ErrorMessage: err.Error(), 22 | TTFB: 0, 23 | Speed: 0, 24 | Duration: 0, 25 | Downloaded: 0, 26 | } 27 | } 28 | 29 | func NewErrorRetrievalResultWithErrorResolution(code ErrorCode, err error) *RetrievalResult { 30 | result := resolveErrorResult(err) 31 | if result != nil { 32 | return result 33 | } 34 | 35 | return &RetrievalResult{ 36 | Success: false, 37 | ErrorCode: code, 38 | ErrorMessage: err.Error(), 39 | TTFB: 0, 40 | Speed: 0, 41 | Duration: 0, 42 | Downloaded: 0, 43 | } 44 | } 45 | 46 | func NewSuccessfulRetrievalResult(ttfb time.Duration, downloaded int64, duration time.Duration) *RetrievalResult { 47 | return &RetrievalResult{ 48 | Success: true, 49 | ErrorCode: ErrorCodeNone, 50 | ErrorMessage: "", 51 | TTFB: ttfb, 52 | Speed: float64(downloaded) / duration.Seconds(), 53 | Duration: duration, 54 | Downloaded: downloaded, 55 | } 56 | } 57 | 58 | type RetrievalResult struct { 59 | Success bool `bson:"success"` 60 | ErrorCode ErrorCode `bson:"error_code,omitempty"` 61 | ErrorMessage string `bson:"error_message,omitempty"` 62 | TTFB time.Duration `bson:"ttfb,omitempty"` 63 | Speed float64 `bson:"speed,omitempty"` 64 | Duration time.Duration `bson:"duration,omitempty"` 65 | Downloaded int64 `bson:"downloaded,omitempty"` 66 | } 67 | 68 | type Result struct { 69 | Task 70 | Retriever Retriever `bson:"retriever"` 71 | Result RetrievalResult `bson:"result"` 72 | CreatedAt time.Time `bson:"created_at"` 73 | } 74 | -------------------------------------------------------------------------------- /pkg/task/task_worker.go: -------------------------------------------------------------------------------- 1 | package task 2 | 3 | import ( 4 | "context" 5 | "github.com/data-preservation-programs/RetrievalBot/pkg/env" 6 | "github.com/google/uuid" 7 | logging "github.com/ipfs/go-log/v2" 8 | "github.com/pkg/errors" 9 | "go.mongodb.org/mongo-driver/bson" 10 | "go.mongodb.org/mongo-driver/mongo" 11 | "go.mongodb.org/mongo-driver/mongo/options" 12 | "strings" 13 | "time" 14 | ) 15 | 16 | type Worker interface { 17 | DoWork(task Task) (*RetrievalResult, error) 18 | } 19 | 20 | type WorkerProcess struct { 21 | id uuid.UUID 22 | taskCollection *mongo.Collection 23 | resultCollection *mongo.Collection 24 | worker Worker 25 | module ModuleName 26 | acceptedContinents string 27 | acceptedCountries string 28 | pollInterval time.Duration 29 | retrieverInfo Retriever 30 | timeoutBuffer time.Duration 31 | } 32 | 33 | func (t WorkerProcess) Close() { 34 | // nolint:errcheck 35 | t.taskCollection.Database().Client().Disconnect(context.Background()) 36 | // nolint:errcheck 37 | t.resultCollection.Database().Client().Disconnect(context.Background()) 38 | } 39 | 40 | func NewTaskWorkerProcess( 41 | ctx context.Context, 42 | module ModuleName, 43 | worker Worker) (*WorkerProcess, error) { 44 | taskClient, err := mongo.Connect(ctx, options.Client().ApplyURI(env.GetRequiredString(env.QueueMongoURI))) 45 | if err != nil { 46 | return nil, errors.Wrap(err, "failed to connect to mongo queueDB") 47 | } 48 | 49 | taskCollection := taskClient.Database(env.GetRequiredString(env.QueueMongoDatabase)).Collection("task_queue") 50 | 51 | resultClient, err := mongo.Connect(ctx, options.Client().ApplyURI(env.GetRequiredString(env.ResultMongoURI))) 52 | if err != nil { 53 | return nil, errors.Wrap(err, "failed to connect to mongo resultDB") 54 | } 55 | 56 | resultCollection := resultClient.Database(env.GetRequiredString(env.ResultMongoDatabase)).Collection("task_result") 57 | 58 | retrieverInfo := Retriever{ 59 | PublicIP: env.GetRequiredString(env.PublicIP), 60 | City: env.GetRequiredString(env.City), 61 | Region: env.GetRequiredString(env.Region), 62 | Country: env.GetRequiredString(env.Country), 63 | Continent: env.GetRequiredString(env.Continent), 64 | ASN: env.GetRequiredString(env.ASN), 65 | ISP: env.GetRequiredString(env.ISP), 66 | Latitude: env.GetRequiredFloat32(env.Latitude), 67 | Longitude: env.GetRequiredFloat32(env.Longitude), 68 | } 69 | 70 | id := uuid.New() 71 | 72 | return &WorkerProcess{ 73 | id, 74 | taskCollection, 75 | resultCollection, 76 | worker, 77 | module, 78 | env.GetString(env.AcceptedContinents, ""), 79 | env.GetString(env.AcceptedCountries, ""), 80 | env.GetDuration(env.TaskWorkerPollInterval, 10*time.Second), 81 | retrieverInfo, 82 | env.GetDuration(env.TaskWorkerTimeoutBuffer, 10*time.Second), 83 | }, nil 84 | } 85 | 86 | func (t WorkerProcess) Poll(ctx context.Context) error { 87 | logger := logging.Logger("task-worker").With("protocol", t.module, "workerId", t.id) 88 | var singleResult *mongo.SingleResult 89 | for { 90 | logger.Debug("polling for task") 91 | 92 | //nolint:govet 93 | match := bson.D{ 94 | {"module", t.module}, 95 | } 96 | 97 | if len(t.acceptedCountries) > 0 { 98 | if strings.HasPrefix(t.acceptedCountries, "!") { 99 | match = append(match, bson.E{Key: "provider.country", 100 | Value: bson.D{{Key: "$nin", Value: strings.Split(t.acceptedCountries[1:], ",")}}}) 101 | } else { 102 | match = append(match, bson.E{Key: "provider.country", 103 | Value: bson.D{{Key: "$in", Value: strings.Split(t.acceptedCountries, ",")}}}) 104 | } 105 | } 106 | 107 | if len(t.acceptedContinents) > 0 { 108 | if strings.HasPrefix(t.acceptedContinents, "!") { 109 | match = append(match, bson.E{Key: "provider.continent", 110 | Value: bson.D{{Key: "$nin", Value: strings.Split(t.acceptedContinents[1:], ",")}}}) 111 | } else { 112 | match = append(match, bson.E{Key: "provider.continent", 113 | Value: bson.D{{Key: "$in", Value: strings.Split(t.acceptedContinents, ",")}}}) 114 | } 115 | } 116 | 117 | logger.With("filter", match).Debug("FindOneAndDelete") 118 | singleResult = t.taskCollection.FindOneAndDelete(ctx, match, 119 | options.FindOneAndDelete().SetSort(bson.D{{Key: "created_at", Value: 1}})) 120 | if errors.Is(singleResult.Err(), mongo.ErrNoDocuments) { 121 | logger.Debug("no task singleResult") 122 | time.Sleep(t.pollInterval) 123 | continue 124 | } 125 | 126 | if singleResult.Err() != nil { 127 | return errors.Wrap(singleResult.Err(), "failed to find task") 128 | } 129 | 130 | break 131 | } 132 | 133 | found := new(Task) 134 | err := singleResult.Decode(found) 135 | if err != nil { 136 | return errors.Wrap(err, "failed to decode task") 137 | } 138 | 139 | logger.With("task", found).Info("found new task") 140 | resultChan := make(chan RetrievalResult) 141 | errChan := make(chan error) 142 | go func() { 143 | result, err := t.worker.DoWork(*found) 144 | if err != nil { 145 | errResult := resolveErrorResult(err) 146 | if errResult != nil { 147 | resultChan <- *errResult 148 | } else { 149 | logger.With("error", err).Error("failed to do work") 150 | errChan <- err 151 | } 152 | } else { 153 | resultChan <- *result 154 | } 155 | }() 156 | 157 | var retrievalResult RetrievalResult 158 | select { 159 | case <-ctx.Done(): 160 | //nolint:wrapcheck 161 | return ctx.Err() 162 | case <-time.After(found.Timeout + t.timeoutBuffer): 163 | retrievalResult = *NewErrorRetrievalResult(Timeout, errors.Errorf("timed out after %s", found.Timeout)) 164 | case r := <-resultChan: 165 | retrievalResult = r 166 | case err = <-errChan: 167 | return err 168 | } 169 | 170 | taskResult := Result{ 171 | Task: *found, 172 | Result: retrievalResult, 173 | Retriever: t.retrieverInfo, 174 | CreatedAt: time.Now().UTC(), 175 | } 176 | 177 | insertResult, err := t.resultCollection.InsertOne(ctx, taskResult) 178 | if err != nil { 179 | return errors.Wrap(err, "failed to insert result") 180 | } 181 | 182 | logger.With("result", retrievalResult, "InsertedID", insertResult.InsertedID).Info("inserted result") 183 | return nil 184 | } 185 | -------------------------------------------------------------------------------- /pkg/tools/tooling.go: -------------------------------------------------------------------------------- 1 | //go:build tools 2 | // +build tools 3 | 4 | package tools 5 | 6 | import ( 7 | _ "github.com/hannahhoward/cbor-gen-for" 8 | ) 9 | -------------------------------------------------------------------------------- /result.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/data-preservation-programs/RetrievalBot/9a826d967b9341796e898d5ed85e92c9a8e3f1a6/result.zip -------------------------------------------------------------------------------- /worker/bitswap/cmd/BitswapWorker.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/data-preservation-programs/RetrievalBot/pkg/task" 7 | "github.com/data-preservation-programs/RetrievalBot/worker/bitswap" 8 | logging "github.com/ipfs/go-log/v2" 9 | ) 10 | 11 | func main() { 12 | worker := bitswap.Worker{} 13 | process, err := task.NewTaskWorkerProcess(context.Background(), task.Bitswap, worker) 14 | if err != nil { 15 | panic(err) 16 | } 17 | 18 | defer process.Close() 19 | 20 | err = process.Poll(context.Background()) 21 | if err != nil { 22 | logging.Logger("task-worker").With("protocol", task.Bitswap).Error(err) 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /worker/bitswap/worker.go: -------------------------------------------------------------------------------- 1 | package bitswap 2 | 3 | import ( 4 | "context" 5 | "github.com/data-preservation-programs/RetrievalBot/pkg/convert" 6 | "github.com/data-preservation-programs/RetrievalBot/pkg/model" 7 | "github.com/data-preservation-programs/RetrievalBot/pkg/net" 8 | "github.com/data-preservation-programs/RetrievalBot/pkg/resolver" 9 | "github.com/data-preservation-programs/RetrievalBot/pkg/task" 10 | "github.com/ipfs/go-cid" 11 | logging "github.com/ipfs/go-log/v2" 12 | _ "github.com/joho/godotenv/autoload" 13 | "github.com/libp2p/go-libp2p/core/peer" 14 | "github.com/multiformats/go-multiaddr" 15 | "github.com/pkg/errors" 16 | ) 17 | 18 | type Worker struct{} 19 | 20 | var logger = logging.Logger("bitswap_worker") 21 | 22 | func (e Worker) DoWork(tsk task.Task) (*task.RetrievalResult, error) { 23 | ctx := context.Background() 24 | 25 | host, err := net.InitHost(ctx, nil) 26 | if err != nil { 27 | return nil, errors.Wrap(err, "failed to init host") 28 | } 29 | 30 | client := net.NewBitswapClient(host, tsk.Timeout) 31 | 32 | // First, check if the provider is using boost 33 | protocolProvider := resolver.ProtocolResolver(host, tsk.Timeout) 34 | addrInfo, err := tsk.Provider.GetPeerAddr() 35 | if err != nil { 36 | return nil, errors.Wrap(err, "failed to get peer addr") 37 | } 38 | contentCID := cid.MustParse(tsk.Content.CID) 39 | isBoost, err := protocolProvider.IsBoostProvider(context.Background(), addrInfo) 40 | if err != nil { 41 | return nil, errors.Wrap(err, "failed to check if provider is boost") 42 | } 43 | 44 | if !isBoost { 45 | return task.NewErrorRetrievalResult( 46 | task.ProtocolNotSupported, 47 | errors.New("Provider is not using boost")), nil 48 | } 49 | 50 | // If so, find the Bitswap endpoint 51 | protocols, err := protocolProvider.GetRetrievalProtocols(ctx, addrInfo) 52 | if err != nil { 53 | return nil, errors.Wrap(err, "failed to get retrieval protocols") 54 | } 55 | 56 | var peerID peer.ID 57 | addrs := make([]multiaddr.Multiaddr, 0) 58 | for _, protocol := range protocols { 59 | if protocol.Name != string(model.Bitswap) { 60 | continue 61 | } 62 | addr, err := convert.AbiToMultiaddr(protocol.Addresses[0]) 63 | if err != nil { 64 | return task.NewErrorRetrievalResult(task.ProtocolNotSupported, err), nil 65 | } 66 | 67 | remain, last := multiaddr.SplitLast(addr) 68 | if last.Protocol().Code == multiaddr.P_P2P { 69 | newPeerID, err := peer.IDFromBytes(last.RawValue()) 70 | if err != nil { 71 | return task.NewErrorRetrievalResult(task.ProtocolNotSupported, err), nil 72 | } 73 | if peerID == "" || peerID == newPeerID { 74 | peerID = newPeerID 75 | addrs = append(addrs, remain) 76 | } else { 77 | logger.With("name", protocol.Name, "addr", addr.String()).Warn("Found multiple peer IDs for Bitswap") 78 | } 79 | } 80 | } 81 | 82 | if peerID == "" || len(addrs) == 0 { 83 | return task.NewErrorRetrievalResult(task.ProtocolNotSupported, errors.New("No bitswap multiaddr available")), nil 84 | } 85 | 86 | //nolint:wrapcheck 87 | return client.Retrieve(ctx, peer.AddrInfo{ 88 | ID: peerID, 89 | Addrs: addrs, 90 | }, contentCID) 91 | } 92 | -------------------------------------------------------------------------------- /worker/graphsync/cmd/GraphsyncWorker.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/data-preservation-programs/RetrievalBot/pkg/task" 7 | "github.com/data-preservation-programs/RetrievalBot/worker/graphsync" 8 | logging "github.com/ipfs/go-log/v2" 9 | ) 10 | 11 | func main() { 12 | worker := graphsync.Worker{} 13 | process, err := task.NewTaskWorkerProcess(context.Background(), task.GraphSync, worker) 14 | if err != nil { 15 | panic(err) 16 | } 17 | 18 | defer process.Close() 19 | 20 | err = process.Poll(context.Background()) 21 | if err != nil { 22 | logging.Logger("task-worker").With("protocol", task.GraphSync).Error(err) 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /worker/graphsync/worker.go: -------------------------------------------------------------------------------- 1 | package graphsync 2 | 3 | import ( 4 | "context" 5 | "github.com/data-preservation-programs/RetrievalBot/pkg/net" 6 | "github.com/data-preservation-programs/RetrievalBot/pkg/task" 7 | "github.com/ipfs/go-cid" 8 | _ "github.com/joho/godotenv/autoload" 9 | "github.com/pkg/errors" 10 | ) 11 | 12 | type Worker struct{} 13 | 14 | func (e Worker) DoWork(tsk task.Task) (*task.RetrievalResult, error) { 15 | ctx := context.Background() 16 | host, err := net.InitHost(ctx, nil) 17 | if err != nil { 18 | return nil, errors.Wrap(err, "failed to init host") 19 | } 20 | 21 | client := net.NewGraphsyncClient(host, tsk.Timeout) 22 | addrInfo, err := tsk.Provider.GetPeerAddr() 23 | if err != nil { 24 | return nil, errors.Wrap(err, "failed to get peer addr") 25 | } 26 | contentCID := cid.MustParse(tsk.Content.CID) 27 | //nolint:wrapcheck 28 | return client.Retrieve(ctx, addrInfo, contentCID) 29 | } 30 | -------------------------------------------------------------------------------- /worker/http/cmd/HttpWorker.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "github.com/data-preservation-programs/RetrievalBot/pkg/task" 6 | "github.com/data-preservation-programs/RetrievalBot/worker/http" 7 | logging "github.com/ipfs/go-log/v2" 8 | ) 9 | 10 | func main() { 11 | worker := http.Worker{} 12 | process, err := task.NewTaskWorkerProcess(context.Background(), task.HTTP, worker) 13 | if err != nil { 14 | panic(err) 15 | } 16 | 17 | defer process.Close() 18 | 19 | err = process.Poll(context.Background()) 20 | if err != nil { 21 | logging.Logger("task-worker").With("protocol", task.HTTP).Error(err) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /worker/http/worker.go: -------------------------------------------------------------------------------- 1 | package http 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "github.com/data-preservation-programs/RetrievalBot/pkg/convert" 7 | "github.com/data-preservation-programs/RetrievalBot/pkg/model" 8 | "github.com/data-preservation-programs/RetrievalBot/pkg/net" 9 | "github.com/data-preservation-programs/RetrievalBot/pkg/resolver" 10 | "github.com/data-preservation-programs/RetrievalBot/pkg/task" 11 | "github.com/ipfs/go-cid" 12 | _ "github.com/joho/godotenv/autoload" 13 | "github.com/multiformats/go-multiaddr" 14 | manet "github.com/multiformats/go-multiaddr/net" 15 | "github.com/pkg/errors" 16 | net2 "net" 17 | "net/url" 18 | "strconv" 19 | ) 20 | 21 | type Worker struct{} 22 | 23 | func ToURL(ma multiaddr.Multiaddr) (*url.URL, error) { 24 | // host should be either the dns name or the IP 25 | _, host, err := manet.DialArgs(ma) 26 | if err != nil { 27 | return nil, errors.Wrap(err, "failed to get dial args") 28 | } 29 | if ip := net2.ParseIP(host); ip != nil { 30 | if !ip.To4().Equal(ip) { 31 | // raw v6 IPs need `[ip]` encapsulation. 32 | host = fmt.Sprintf("[%s]", host) 33 | } 34 | } 35 | 36 | protos := ma.Protocols() 37 | pm := make(map[int]string, len(protos)) 38 | for _, p := range protos { 39 | v, err := ma.ValueForProtocol(p.Code) 40 | if err == nil { 41 | pm[p.Code] = v 42 | } 43 | } 44 | 45 | scheme := model.HTTP 46 | //nolint:nestif 47 | if _, ok := pm[multiaddr.P_HTTPS]; ok { 48 | scheme = model.HTTPS 49 | } else if _, ok = pm[multiaddr.P_HTTP]; ok { 50 | // /tls/http == /https 51 | if _, ok = pm[multiaddr.P_TLS]; ok { 52 | scheme = model.HTTPS 53 | } 54 | } else if _, ok = pm[multiaddr.P_WSS]; ok { 55 | scheme = model.WSS 56 | } else if _, ok = pm[multiaddr.P_WS]; ok { 57 | scheme = model.WS 58 | // /tls/ws == /wss 59 | if _, ok = pm[multiaddr.P_TLS]; ok { 60 | scheme = model.WSS 61 | } 62 | } 63 | 64 | path := "" 65 | if pb, ok := pm[0x300200]; ok { 66 | path, err = url.PathUnescape(pb) 67 | if err != nil { 68 | path = "" 69 | } 70 | } 71 | 72 | //nolint:exhaustruct 73 | out := url.URL{ 74 | Scheme: string(scheme), 75 | Host: host, 76 | Path: path, 77 | } 78 | return &out, nil 79 | } 80 | 81 | func (e Worker) DoWork(tsk task.Task) (*task.RetrievalResult, error) { 82 | ctx := context.Background() 83 | client := net.NewHTTPClient(tsk.Timeout) 84 | 85 | host, err := net.InitHost(ctx, nil) 86 | if err != nil { 87 | return nil, errors.Wrap(err, "failed to init host") 88 | } 89 | 90 | // First, check if the provider is using boost 91 | protocolProvider := resolver.ProtocolResolver(host, tsk.Timeout) 92 | addrInfo, err := tsk.Provider.GetPeerAddr() 93 | if err != nil { 94 | return nil, errors.Wrap(err, "failed to get peer addr") 95 | } 96 | contentCID := cid.MustParse(tsk.Content.CID) 97 | isBoost, err := protocolProvider.IsBoostProvider(context.Background(), addrInfo) 98 | if err != nil { 99 | return nil, errors.Wrap(err, "failed to check if provider is boost") 100 | } 101 | 102 | if !isBoost { 103 | return task.NewErrorRetrievalResult( 104 | task.ProtocolNotSupported, 105 | errors.New("Provider is not using boost")), nil 106 | } 107 | 108 | // If so, find the HTTP endpoint 109 | protocols, err := protocolProvider.GetRetrievalProtocols(ctx, addrInfo) 110 | if err != nil { 111 | return nil, errors.Wrap(err, "failed to get retrieval protocols") 112 | } 113 | 114 | var urlString string 115 | for _, protocol := range protocols { 116 | if (protocol.Name == string(model.HTTP) || protocol.Name == string(model.HTTPS)) && len(protocol.Addresses) > 0 { 117 | addr, err := convert.AbiToMultiaddr(protocol.Addresses[0]) 118 | if err != nil { 119 | return task.NewErrorRetrievalResult(task.ProtocolNotSupported, err), nil 120 | } 121 | 122 | url, err := ToURL(addr) 123 | if err != nil { 124 | return task.NewErrorRetrievalResult( 125 | task.ProtocolNotSupported, 126 | errors.Wrap(err, "Cannot convert multiaddr to URL")), nil 127 | } 128 | 129 | urlString = url.String() 130 | } 131 | } 132 | 133 | if urlString == "" { 134 | return task.NewErrorRetrievalResult( 135 | task.ProtocolNotSupported, 136 | errors.New("No HTTP endpoint found")), nil 137 | } 138 | 139 | size := 1024 * 1024 140 | if sizeStr, ok := tsk.Metadata["retrieve_size"]; ok { 141 | size, err = strconv.Atoi(sizeStr) 142 | if err != nil { 143 | return nil, errors.Wrap(err, "failed to convert retrieve_size to int") 144 | } 145 | } 146 | 147 | // Finally, retrieve the file 148 | //nolint:wrapcheck 149 | return client.RetrievePiece(ctx, urlString, contentCID, int64(size)) 150 | } 151 | -------------------------------------------------------------------------------- /worker/stub/cmd/StubWorker.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "github.com/data-preservation-programs/RetrievalBot/pkg/task" 6 | "github.com/data-preservation-programs/RetrievalBot/worker/stub" 7 | _ "github.com/joho/godotenv/autoload" 8 | ) 9 | 10 | func main() { 11 | worker := stub.Worker{} 12 | process, err := task.NewTaskWorkerProcess(context.Background(), task.Stub, worker) 13 | if err != nil { 14 | panic(err) 15 | } 16 | 17 | defer process.Close() 18 | 19 | err = process.Poll(context.Background()) 20 | if err != nil { 21 | panic(err) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /worker/stub/worker.go: -------------------------------------------------------------------------------- 1 | package stub 2 | 3 | import ( 4 | "github.com/data-preservation-programs/RetrievalBot/pkg/task" 5 | "math/rand" 6 | "time" 7 | ) 8 | 9 | type Worker struct{} 10 | 11 | func (e Worker) DoWork(_ task.Task) (*task.RetrievalResult, error) { 12 | //nolint: gosec 13 | return task.NewSuccessfulRetrievalResult( 14 | time.Duration(rand.Int31()), 15 | int64(rand.Int31()), 16 | time.Duration(rand.Int31())), nil 17 | } 18 | --------------------------------------------------------------------------------