├── .env-default ├── .env-latest ├── .env-nightlies ├── .gitattributes ├── .github └── dependabot.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── COMPLIANCE.yaml ├── CONTRIBUTING.md ├── LICENSE ├── MAINTAINERS.md ├── README.md ├── SECURITY.md ├── alertmanager └── config.yml ├── docker-compose.yml ├── docs └── README.md ├── fetch-validator-status ├── DidKey.py ├── Dockerfile ├── README.md ├── REST_API.md ├── fetch_status.py ├── gunicorn_conf.py ├── main.py ├── networks.json ├── networks.py ├── plugin_collection.py ├── plugins │ ├── Example │ │ ├── README.md │ │ └── example.py │ ├── README.md │ ├── alerts │ │ ├── README.md │ │ └── alerts.py │ ├── analysis.py │ ├── generate_upgrade_schedule │ │ ├── README.md │ │ └── generate_upgrade_schedule.py │ ├── metrics │ │ ├── README.md │ │ ├── google_sheets.py │ │ └── network_metrics.py │ └── status_only.py ├── pool.py ├── requirements.txt ├── rest_api.py ├── run.sh ├── singleton.py └── util.py ├── grafana ├── config.monitoring └── provisioning │ ├── alerting │ ├── Error Warning.json │ └── WriteFailureWarning.json │ ├── dashboards │ ├── AlertsDashboard.json │ ├── NetworkOverviewDashboard.json │ ├── NodeDetailDashboard.json │ ├── PublicDashboard.json │ ├── TestDashboard.json │ └── dashboard.yml │ └── datasources │ └── datasource.yml ├── images ├── grafana │ └── Dockerfile ├── influxdb │ ├── latest │ │ └── Dockerfile │ └── nightly │ │ └── Dockerfile └── telegraf │ ├── latest │ └── Dockerfile │ └── nightly │ └── Dockerfile ├── influxdb └── config │ └── influxdb.conf ├── manage ├── prometheus ├── alert.rules └── prometheus.yml └── telegraf ├── .env-telegraf └── telegraf.conf /.env-default: -------------------------------------------------------------------------------- 1 | INDY_NODE_MONIOTR_VERSION=1.1.0 2 | TELEGRAF_ENV=./telegraf/.env-telegraf 3 | GRAFANA_ENV=./grafana/config.monitoring 4 | 5 | # ========================================= 6 | # Default Credentials 7 | # ToDo: 8 | # - Secure these 9 | # ----------------------------------------- 10 | # Grafana - Admin Credentials 11 | GF_SECURITY_ADMIN_PASSWORD=foobar 12 | GF_USERS_ALLOW_SIGN_UP=false 13 | 14 | # InfluxDB - Flux datasource token 15 | INFLUX_DB_FLUX_TOKEN=My5uP3rS3cRetT0k3n 16 | # ========================================= 17 | -------------------------------------------------------------------------------- /.env-latest: -------------------------------------------------------------------------------- 1 | TYPE=latest 2 | TELEGRAF_TAG=latest 3 | INFLUXDB_TAG=1.8 4 | GRAFANA_TAG=latest -------------------------------------------------------------------------------- /.env-nightlies: -------------------------------------------------------------------------------- 1 | TYPE=nightly 2 | TELEGRAF_TAG=latest 3 | INFLUXDB_TAG=nightly 4 | GRAFANA_TAG=main -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Set the default behavior, in case people don't have core.autocrlf set. 2 | * text=auto 3 | 4 | # Declare files that will always have LF line endings on checkout. 5 | *.sh text eol=lf 6 | *.md text eol=lf 7 | *.json text eol=lf 8 | *.yaml text eol=lf 9 | *.yml text eol=lf 10 | *.conf text eol=lf 11 | *.monitoring text eol=lf 12 | *.rules text eol=lf 13 | .env* text eol=lf 14 | manage text eol=lf 15 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # For details on how this file works refer to: 2 | # - https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file 3 | version: 2 4 | updates: 5 | # Maintain dependencies for Python Packages 6 | - package-ecosystem: "pip" 7 | directory: "/fetch-validator-status" 8 | schedule: 9 | interval: "weekly" 10 | day: "monday" 11 | time: "04:00" 12 | timezone: "Canada/Pacific" 13 | ignore: 14 | - dependency-name: "*" 15 | update-types: ["version-update:semver-major"] 16 | 17 | # Maintain dependencies for docker 18 | - package-ecosystem: "docker" 19 | directory: "/fetch-validator-status" 20 | schedule: 21 | interval: "weekly" 22 | day: "monday" 23 | time: "04:00" 24 | timezone: "Canada/Pacific" 25 | 26 | # Maintain dependencies for docker 27 | - package-ecosystem: "docker" 28 | directory: "/images/grafana" 29 | schedule: 30 | interval: "weekly" 31 | day: "monday" 32 | time: "04:00" 33 | timezone: "Canada/Pacific" 34 | 35 | # Maintain dependencies for docker 36 | - package-ecosystem: "docker" 37 | directory: "/images/influxdb/latest" 38 | schedule: 39 | interval: "weekly" 40 | day: "monday" 41 | time: "04:00" 42 | timezone: "Canada/Pacific" 43 | 44 | # Maintain dependencies for docker 45 | - package-ecosystem: "docker" 46 | directory: "/images/influxdb/nightly" 47 | schedule: 48 | interval: "weekly" 49 | day: "monday" 50 | time: "04:00" 51 | timezone: "Canada/Pacific" 52 | 53 | # Maintain dependencies for docker 54 | - package-ecosystem: "docker" 55 | directory: "/images/telegraf/latest" 56 | schedule: 57 | interval: "weekly" 58 | day: "monday" 59 | time: "04:00" 60 | timezone: "Canada/Pacific" 61 | 62 | # Maintain dependencies for docker 63 | - package-ecosystem: "docker" 64 | directory: "/images/telegraf/nightly" 65 | schedule: 66 | interval: "weekly" 67 | day: "monday" 68 | time: "04:00" 69 | timezone: "Canada/Pacific" 70 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | influxdb/data/ 2 | 3 | # Ignore environment files that could contains seeds (credentials) 4 | .env-seed* 5 | to.do 6 | 7 | **/conf/ 8 | **/log/ 9 | *.env 10 | 11 | ### 12 | ### Python 13 | ### 14 | 15 | # Byte-compiled / optimized / DLL files 16 | __pycache__/ 17 | *.py[cod] 18 | *$py.class 19 | 20 | # C extensions 21 | *.so 22 | 23 | # Distribution / packaging 24 | .Python 25 | build/ 26 | develop-eggs/ 27 | dist/ 28 | downloads/ 29 | eggs/ 30 | .eggs/ 31 | lib/ 32 | lib64/ 33 | parts/ 34 | sdist/ 35 | var/ 36 | wheels/ 37 | *.egg-info/ 38 | .installed.cfg 39 | *.egg 40 | MANIFEST 41 | 42 | # PyInstaller 43 | # Usually these files are written by a python script from a template 44 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 45 | *.manifest 46 | *.spec 47 | 48 | # Installer logs 49 | pip-log.txt 50 | pip-delete-this-directory.txt 51 | 52 | # Unit test / coverage reports 53 | htmlcov/ 54 | .tox/ 55 | .coverage 56 | .coverage.* 57 | .cache 58 | nosetests.xml 59 | coverage.xml 60 | *.cover 61 | .hypothesis/ 62 | .pytest_cache/ 63 | test-reports/ 64 | 65 | # Translations 66 | *.mo 67 | *.pot 68 | 69 | # Django stuff: 70 | *.log 71 | local_settings.py 72 | db.sqlite3 73 | 74 | # Flask stuff: 75 | instance/ 76 | .webassets-cache 77 | 78 | # Scrapy stuff: 79 | .scrapy 80 | 81 | # Sphinx documentation 82 | docs/_build/ 83 | 84 | # PyBuilder 85 | target/ 86 | 87 | # Jupyter Notebook 88 | .ipynb_checkpoints 89 | 90 | # pyenv 91 | .python-version 92 | 93 | # celery beat schedule file 94 | celerybeat-schedule 95 | 96 | # SageMath parsed files 97 | *.sage.py 98 | 99 | # Environments 100 | .env 101 | .venv 102 | env/ 103 | venv/ 104 | ENV/ 105 | env.bak/ 106 | venv.bak/ 107 | Pipfile 108 | Pipfile.lock 109 | 110 | # Spyder project settings 111 | .spyderproject 112 | .spyproject 113 | 114 | # Rope project settings 115 | .ropeproject 116 | 117 | # mkdocs documentation 118 | /site 119 | 120 | # mypy 121 | .mypy_cache/ 122 | 123 | 124 | ### 125 | ### Visual Studio Code 126 | ### 127 | 128 | .vscode/ 129 | 130 | ### 131 | ### MacOS 132 | ### 133 | 134 | # General 135 | .DS_Store 136 | .AppleDouble 137 | .LSOverride 138 | 139 | # Icon must end with two \r 140 | Icon 141 | 142 | 143 | # Thumbnails 144 | ._* 145 | 146 | # Files that might appear in the root of a volume 147 | .DocumentRevisions-V100 148 | .fseventsd 149 | .Spotlight-V100 150 | .TemporaryItems 151 | .Trashes 152 | .VolumeIcon.icns 153 | .com.apple.timemachine.donotpresent 154 | 155 | # Directories potentially created on remote AFP share 156 | .AppleDB 157 | .AppleDesktop 158 | Network Trash Folder 159 | Temporary Items 160 | .apdisk 161 | 162 | ### 163 | ### IntelliJ IDEs 164 | ### 165 | 166 | .idea/* 167 | 168 | ### 169 | ### Windows 170 | ### 171 | 172 | # Windows thumbnail cache files 173 | Thumbs.db 174 | ehthumbs.db 175 | ehthumbs_vista.db 176 | 177 | # Dump file 178 | *.stackdump 179 | 180 | # Folder config file 181 | [Dd]esktop.ini 182 | 183 | # Recycle Bin used on file shares 184 | $RECYCLE.BIN/ 185 | 186 | # Windows Installer files 187 | *.cab 188 | *.msi 189 | *.msix 190 | *.msm 191 | *.msp 192 | 193 | # Windows shortcuts 194 | *.lnk 195 | 196 | # Docs build 197 | _build/ 198 | **/*.iml 199 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, gender identity and expression, level of experience, 9 | nationality, personal appearance, race, religion, or sexual identity and 10 | orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | - Using welcoming and inclusive language 18 | - Being respectful of differing viewpoints and experiences 19 | - Gracefully accepting constructive criticism 20 | - Focusing on what is best for the community 21 | - Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | - The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | - Trolling, insulting/derogatory comments, and personal or political attacks 28 | - Public or private harassment 29 | - Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | - Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at todd.wilson@gov.bc.ca. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at [http://contributor-covenant.org/version/1/4][version] 72 | 73 | [homepage]: http://contributor-covenant.org 74 | [version]: http://contributor-covenant.org/version/1/4/ 75 | -------------------------------------------------------------------------------- /COMPLIANCE.yaml: -------------------------------------------------------------------------------- 1 | name: compliance 2 | description: | 3 | This document is used to track a projects PIA and STRA 4 | compliance. 5 | spec: 6 | - name: PIA 7 | status: not-required 8 | last-updated: '2020-07-06T20:58:58.055Z' 9 | - name: STRA 10 | status: not-required 11 | last-updated: '2020-07-06T20:58:58.055Z' 12 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## How to contribute 2 | 3 | You are encouraged to contribute to the repository by **forking and submitting a pull request**. This repo enforces DCO signing of all commits, as described [here](https://github.com/apps/dco). Please you sign **all** commits before submitting your PR. 4 | 5 | For significant changes, please open an issue first to discuss the proposed changes to avoid re-work. 6 | 7 | (If you are new to GitHub, you might start with a [basic tutorial](https://help.github.com/articles/set-up-git) and check out a more detailed guide to [pull requests](https://help.github.com/articles/using-pull-requests/).) 8 | 9 | Pull requests will be evaluated by the repository guardians on a schedule and if deemed beneficial will be committed to the master. Pull requests should have a descriptive name and include an summary of all changes made in the pull request description. 10 | 11 | All contributors retain the original copyright to their stuff, but by contributing to this project, you grant a world-wide, royalty-free, perpetual, irrevocable, non-exclusive, transferable license to all users **under the terms of the license under which this project is distributed.** 12 | 13 | All contributors are required to adhere to our [Code of Conduct](CODE_OF_CONDUCT.md) guidelines. 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2020 Province of British Columbia 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /MAINTAINERS.md: -------------------------------------------------------------------------------- 1 | # Maintainers 2 | 3 | ## Active Maintainers 4 | 5 | 6 | 7 | | Name | Github | LFID | 8 | | ---------------- | ---------------- | ---------------- | 9 | | Stephen Curran | swcurran | swcurran | 10 | | Wade Barnes | WadeBarnes | WadeBarnes | 11 | 12 | ## Emeritus Maintainers 13 | 14 | | Name | Github | LFID | 15 | |--------------|---------|---------| 16 | | | | | 17 | 18 | ## Becoming a Maintainer 19 | 20 | The Indy community welcomes contributions. Contributors may progress to become a 21 | maintainer. To become a maintainer the following steps occur, roughly in order. 22 | 23 | - 5 significant changes have been authored by the proposed maintainer and 24 | accepted. 25 | - The proposed maintainer has the sponsorship of at least one other maintainer. 26 | - This sponsoring maintainer will create a PR modifying the list of 27 | maintainers. 28 | - The proposed maintainer accepts the nomination and expresses a willingness 29 | to be a long-term (more than 6 month) committer. 30 | - This would be a comment in the above PR. 31 | - This PR will be communicated in all appropriate communication channels. It 32 | should be mentioned in any maintainer/community call. It should also be 33 | posted to the appropriate mailing list or chat channels if they exist. 34 | - Approval by at least 3 current maintainers within two weeks of the proposal or 35 | an absolute majority of current maintainers. 36 | - These votes will be recorded in the PR modifying the list of maintainers. 37 | - No veto by another maintainer within two weeks of proposal are recorded. 38 | - All vetoes must be accompanied by a public explanation as a comment in the 39 | PR for adding this maintainer 40 | - The explanation of the veto must be reasonable. 41 | - A veto can be retracted, in that case the approval/veto timeframe is reset. 42 | - It is bad form to veto, retract, and veto again. 43 | - The proposed maintainer becomes a maintainer 44 | - Either two weeks have passed since the third approval, 45 | - Or an absolute majority of maintainers approve. 46 | - In either case, no maintainer presents a veto. 47 | 48 | ## Removing Maintainers 49 | 50 | Being a maintainer is not a status symbol or a title to be maintained 51 | indefinitely. It will occasionally be necessary and appropriate to move a 52 | maintainer to emeritus status. This can occur in the following situations: 53 | 54 | - Resignation of a maintainer. 55 | - Violation of the Code of Conduct warranting removal. 56 | - Inactivity. 57 | - A general measure of inactivity will be no commits or code review comments 58 | for one reporting quarter, although this will not be strictly enforced if 59 | the maintainer expresses a reasonable intent to continue contributing. 60 | - Reasonable exceptions to inactivity will be granted for known long term 61 | leave such as parental leave and medical leave. 62 | - Other unspecified circumstances. 63 | 64 | Like adding a maintainer the record and governance process for moving a 65 | maintainer to emeritus status is recorded in the github PR making that change. 66 | 67 | Returning to active status from emeritus status uses the same steps as adding a 68 | new maintainer. Note that the emeritus maintainer already has the 5 required 69 | significant changes as there is no contribution time horizon for those. 70 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Indy Node Monitor 2 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) 3 | [![Lifecycle:Maturing](https://img.shields.io/badge/Lifecycle-Maturing-007EC6)](https://github.com/bcgov/repomountie/blob/master/doc/lifecycle-badges.md) 4 | 5 | Indy Node Monitor is a set of tools for monitoring the status of an Indy Ledger by querying the validator information of the nodes on the ledger. These tools are integrated into a full monitoring stack including the following components: 6 | 7 | - Indy Node Monitor (Fetch Validator Status) 8 | - Telegraf 9 | - Prometheus and/or InfluxDB 10 | - Alert Manager 11 | - Grafana 12 | 13 | This allows you to: 14 | - Visualize the node and network data on dashboards. 15 | - Track trends about the status of nodes and the overall ledger. 16 | - Track read and write uptimes. 17 | - Tracking ledger usage such as number of transactions on the ledger. 18 | - Drive notifications of node outages. 19 | 20 | The stack is easily managed and spun up in docker using the included `./manage` script. Starting the stack is as easy as: 21 | ``` 22 | ./manage build 23 | ./manage start 24 | ``` 25 | 26 | A browser window to the Grafana instance should launch automatically. The login username is `admin` and the password `foobar`. Once logged in you are able to browse the various (auto-provisioned) dashboards which will begin to populate after a few minutes. *Please note:- In order to collect detailed data from the network to populate many of the graphs you will require a privileged DID (NETWORK_MONITOR at minimum) on the ledger and you will need to configure the monitoring stack with the seed(s).* 27 | 28 | For more information about the commands available in the `./manage` script refer to the [command list](docs/README.md#command-list) 29 | 30 | For more infomration on how to setup and use the monitoring stack refer to this [readme](docs/README.md#setting-up-the-monitoring-stack) 31 | 32 | ## Fetch Validator Status 33 | 34 | This simple tool is the heart of the Indy Node Monitor. It is used to retrieve "validator-info"—detailed status data about an Indy node (aka "validator")—from all the nodes in a network. The results are returned as a JSON array with a record per validator, and the data can be manipulated and formatted through the use of plugins. Fetch validator status can be used as a stand-alone command line tool or through the Indy Node Monitor REST API which is used by the monitoring stack. The Indy Node Monitor REST API can be spun up easily by running `./manage start indy-node-monitor`, and a browser window will automatically launch to display the API documents and interface. 35 | 36 | For more details see the Fetch Validator Status [readme](fetch-validator-status/README.md) 37 | 38 | ## How to contribute to the monitoring stack 39 | 40 | For more details on how to contribute follow the link to this [readme](docs/README.md) 41 | 42 | ## Contributions 43 | 44 | Pull requests are welcome! Please read our [contributions guide](CONTRIBUTING.md) and submit your PRs. We enforce developer certificate of origin (DCO) commit signing. See guidance [here](https://github.com/apps/dco). 45 | 46 | We also welcome issues submitted about problems you encounter in using the tools within this repo. 47 | 48 | ## Code of Conduct 49 | 50 | All contributors are required to adhere to our [Code of Conduct](CODE_OF_CONDUCT.md) guidelines. 51 | 52 | ## License 53 | 54 | [Apache License Version 2.0](LICENSE) -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Hyperledger Security Policy 2 | 3 | ## Reporting a Security Bug 4 | 5 | If you think you have discovered a security issue in any of the Hyperledger projects, we'd love to hear from you. We will take all security bugs seriously and if confirmed upon investigation we will patch it within a reasonable amount of time and release a public security bulletin discussing the impact and credit the discoverer. 6 | 7 | There are two ways to report a security bug. The easiest is to email a description of the flaw and any related information (e.g. reproduction steps, version) to [security at hyperledger dot org](mailto:security@hyperledger.org). 8 | 9 | The other way is to file a confidential security bug in our [JIRA bug tracking system](https://jira.hyperledger.org). Be sure to set the “Security Level” to “Security issue”. 10 | 11 | The process by which the Hyperledger Security Team handles security bugs is documented further in our [Defect Response page](https://wiki.hyperledger.org/display/HYP/Defect+Response) on our [wiki](https://wiki.hyperledger.org). 12 | 13 | -------------------------------------------------------------------------------- /alertmanager/config.yml: -------------------------------------------------------------------------------- 1 | route: 2 | receiver: 'slack' 3 | 4 | receivers: 5 | - name: 'slack' 6 | # slack_configs: 7 | # - send_resolved: true 8 | # username: '' 9 | # channel: '#' 10 | # api_url: '' -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | volumes: 4 | prometheus_data: {} 5 | grafana_data: {} 6 | 7 | networks: 8 | node-monitor-net: 9 | 10 | services: 11 | indy-node-monitor: 12 | build: 13 | context: ./fetch-validator-status 14 | dockerfile: ./Dockerfile 15 | image: indy-node-monitor 16 | volumes: 17 | # Mounts for indy-node-monitor configuration 18 | - ./fetch-validator-status/plugins/:/home/indy/plugins/ 19 | - ./fetch-validator-status/conf/:/home/indy/conf/ 20 | - ./fetch-validator-status/logs/:/home/indy/logs/ 21 | ports: 22 | - "8080:8080" 23 | networks: 24 | - node-monitor-net 25 | command: > 26 | --web --verbose 27 | 28 | telegraf: 29 | # Full tag list: https://hub.docker.com/r/library/telegraf/tags/ 30 | build: 31 | context: ./images/telegraf/ 32 | dockerfile: ./${TYPE}/Dockerfile 33 | args: 34 | TELEGRAF_TAG: ${TELEGRAF_TAG} 35 | image: telegraf 36 | env_file: 37 | - ${TELEGRAF_ENV} 38 | # Telegraf requires network access to: 39 | # - InfluxDB 40 | # - Indy-Node-Monitor 41 | links: 42 | - influxdb 43 | - indy-node-monitor 44 | volumes: 45 | # Mount for telegraf configuration 46 | - ./telegraf/:/etc/telegraf/ 47 | depends_on: 48 | - influxdb 49 | - indy-node-monitor 50 | ports: 51 | # The WebUI for Chronograf is served on port 8888 52 | - "9273:9273" 53 | networks: 54 | - node-monitor-net 55 | 56 | influxdb: 57 | # Full tag list: https://hub.docker.com/r/library/influxdb/tags/ 58 | build: 59 | context: ./images/influxdb/ 60 | dockerfile: ./${TYPE}/Dockerfile 61 | args: 62 | INFLUXDB_TAG: ${INFLUXDB_TAG} 63 | image: influxdb 64 | volumes: 65 | # Mount for influxdb data directory 66 | - ./influxdb/data:/var/lib/influxdb 67 | # Mount for influxdb configuration 68 | - ./influxdb/config/:/etc/influxdb/ 69 | ports: 70 | # The API for InfluxDB is served on port 8086 71 | - "8086:8086" 72 | - "8082:8082" 73 | # UDP Port 74 | - "8089:8089/udp" 75 | networks: 76 | - node-monitor-net 77 | 78 | prometheus: 79 | image: prom/prometheus 80 | volumes: 81 | - ./prometheus/:/etc/prometheus/ 82 | - prometheus_data:/prometheus 83 | command: 84 | - '--config.file=/etc/prometheus/prometheus.yml' 85 | - '--storage.tsdb.path=/prometheus' 86 | - '--web.console.libraries=/usr/share/prometheus/console_libraries' 87 | - '--web.console.templates=/usr/share/prometheus/consoles' 88 | ports: 89 | - 9090:9090 90 | links: 91 | - telegraf 92 | - alertmanager 93 | depends_on: 94 | - telegraf 95 | networks: 96 | - node-monitor-net 97 | # deploy: 98 | # placement: 99 | # constraints: 100 | # - node.role==manager 101 | # restart_policy: 102 | # condition: on-failure 103 | 104 | alertmanager: 105 | image: prom/alertmanager 106 | ports: 107 | - 9093:9093 108 | volumes: 109 | - "./alertmanager/:/etc/alertmanager/" 110 | networks: 111 | - node-monitor-net 112 | command: 113 | - '--config.file=/etc/alertmanager/config.yml' 114 | - '--storage.path=/alertmanager' 115 | # deploy: 116 | # placement: 117 | # constraints: 118 | # - node.role==manager 119 | # restart_policy: 120 | # condition: on-failure 121 | 122 | grafana: 123 | build: 124 | context: ./images/grafana/ 125 | dockerfile: ./Dockerfile 126 | args: 127 | GRAFANA_TAG: ${GRAFANA_TAG} 128 | image: grafana 129 | depends_on: 130 | - prometheus 131 | - influxdb 132 | ports: 133 | - 3000:3000 134 | volumes: 135 | - grafana_data:/var/lib/grafana 136 | - ./grafana/provisioning/:/etc/grafana/provisioning/ 137 | env_file: 138 | - ${GRAFANA_ENV} 139 | networks: 140 | - node-monitor-net 141 | links: 142 | - influxdb 143 | - prometheus 144 | user: "472" 145 | # deploy: 146 | # placement: 147 | # constraints: 148 | # - node.role==manager 149 | # restart_policy: 150 | # condition: on-failure -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # Command List 5 | 6 | 7 | ## Usage: 8 | ````bash 9 | ./manage [options] [command] [args] 10 | ```` 11 | --- 12 | 13 | 14 | ### List 15 | ````bash 16 | list 17 | ```` 18 | * Get a list of commands 19 | 20 | 21 | ### Start 22 | ````bash 23 | up [service] 24 | or 25 | start [service] 26 | ```` 27 | * Spin up the Indy Node Monitoring Stack environment. 28 | > Optionally specify the service to spin up (dependencies will also be spun up). 29 | 30 | 31 | ### Stop 32 | ````bash 33 | down [service] 34 | or 35 | stop [service] 36 | ```` 37 | * Tear down the Indy Node Monitoring Stack environment. 38 | > Optionally specify the service to tear down. 39 | 40 | 41 | ### Restart 42 | ````bash 43 | restart [service] 44 | ```` 45 | * Restart the Indy Node Monitoring Stack environment. 46 | > Optionally specify the service to restart. 47 | 48 | 49 | ### Services 50 | ````bash 51 | list-services 52 | ```` 53 | * Get a list of the services that make up the Indy Node Monitoring Stack. 54 | 55 | 56 | ### Logs 57 | ````bash 58 | logs 59 | ```` 60 | * Stream the logs from a given container. 61 | 62 | 63 | ### Shell 64 | ````bash 65 | shell 66 | ```` 67 | * Open a shell on a given container. 68 | 69 | 70 | ### Plugins 71 | ````bash 72 | install-plugin 73 | ```` 74 | * Install a Grafana plug-in on the Grafana container. 75 | 76 | 77 | ### Clearing Data 78 | ````bash 79 | delete-data [service] 80 | ```` 81 | * Delete the data for a given service, or all services by default. 82 | > This is useful to clear data from Prometheus and/or InfluxDB when making changes to how the data is collected. 83 | 84 | 85 | ### Cleaning Environment 86 | ````bash 87 | clean 88 | ```` 89 | * Cleans up all environment resources. 90 | > Deletes data from all services 91 | 92 | > Deletes all containers images and prunes any dangling images. 93 | 94 | 95 | ### Influx CLI Shell 96 | ````bash 97 | influx-cli 98 | ```` 99 | * Open a shell to the Influx CLI on the influxDB container. 100 | 101 | 102 | ### Flux REPL Shell 103 | ````bash 104 | flux-repl 105 | ```` 106 | * Open a shell to the Flux REPL on the influxDB container. 107 | 108 | 109 | ### Builds 110 | ````bash 111 | build [service] 112 | ```` 113 | * Use for troubleshooting builds when making image configuration changes. 114 | > Builds container images based on the docker-compose configuration. 115 | 116 | 117 | ### Parsing Test 118 | ````bash 119 | Sed-test 120 | ```` 121 | * Run a 'sed' parsing test on your machine. 122 | > Helps diagnose issues with the version of 'sed' on some machines. 123 | 124 | --- 125 | 126 | 127 | ## Options: 128 | 129 | 130 | ````bash 131 | -h 132 | ```` 133 | * Print this help documentation. 134 | 135 | 136 | ````bash 137 | --nightly 138 | ```` 139 | * Build/Use 'nightly' Influx images, 'latest' images are built/used by default. 140 | > Works with the up|start and restart commands. 141 | 142 | 143 |
144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | # Setting up the Monitoring Stack 152 | Once you have the Indy Node Monitoring Stack on your system, right click the folder and open it in VS Code. 153 | 154 | * Find .env-seeds 155 | * Insert your seeds for each SBN, SSN, and SMN 156 | * Make sure to save the file before closing 157 | 158 |
159 | 160 | 161 | 162 | 163 | # Adding new dashboards to the stack 164 | * Create a new dashboard in the Grafana UI 165 | * Save JSON file to your computer 166 | * Open your *"c:/indy-node-monitor"* folder 167 | * Navagate to: 168 | * *"grafana"* 169 | * *"provisioning"* 170 | * *"dashboards"* 171 | * Place JSON file in this folder 172 | 173 | 174 |
175 | 176 | 177 | 178 | 179 | # Saving changes to existing dashboards 180 | **NOTE: The *"Copy JSON to clipboard"* button tends to NOT work!** 181 | 182 | * Click into the raw JSON, use *Ctrl+A* & *Ctrl+C* 183 | * Navagate to *"c:/indy-node-monitor"* 184 | * Right click the *"c:/indy-node-monitor"* folder and open in Visual Studio Code 185 | * In VS-Code, navigate to: 186 | * *"grafana"* 187 | * *"provisioning"* 188 | * *"dashboards"* 189 | * Find the dashboard you are working on 190 | 191 | **NOTE: Make sure you are changing the CORRECT dashboard!** 192 | 193 | * Select everything in the VS-Code file for the dashboard you are working on and paste the new changes 194 | * Make sure to save the file before closing VS-Code 195 | 196 | 197 |
198 | 199 | 200 | 201 | 202 | # Prometheus Query Example 203 | #### Example 204 | ````promQL 205 | node_response_result_data_Pool_info_reachable_node_count 206 | ```` 207 | This is looking at the *"reachable node count"* under *"pool info"*, under *"data"*, under *"result"* etc.. 208 | 209 | > If you don't know your metric names, the metrics browser button helps you see the names of all metrics Prometheus can see. This is assuming the data source is working correctly. 210 | 211 | 212 |
213 | 214 | 215 | 216 | 217 | # InfluxDB (flux) Query Example 218 | ````flux 219 | from(bucket: v.defaultBucket) 220 | |> range(start: v.timeRangeStart, stop: v.timeRangeStop) 221 | |> filter(fn: (r) => 222 | r._measurement == "node" and 223 | r._field == "node-address" 224 | ) 225 | ```` 226 | 227 | **from()** defines your bucket 228 | \ 229 | \ 230 | **range()** limits the query data by time 231 | \ 232 | \ 233 | **filter()** limits what rows of data will be output 234 | > Filters have a *fn* parameter to dictate if values are true or false *(fn: (r))*. If the parameter evaluates the row to be true it will be included in the output data. 235 | 236 |
237 | 238 | **r._measurement** is what we are measuring by 239 | \ 240 | \ 241 | **r._field** is the field of data we want to output 242 | 243 | 244 |
245 | 246 | 247 | 254 | 255 | 256 | 257 | 258 | 259 | # Alerts 260 | Alerts trigger an action (*commonly an email or slack message*) when certain requirements are met. 261 | \ 262 | \ 263 | Most commonly a separate dashboard is created as the "Alerts Dashboard". A saparate dashboard is typically used because what is displayed to set triggers is often bare compared to the main visual dashboards. 264 | \ 265 | \ 266 | The Alert Dashboard only requires the metric you want to trigger the alert and the threshold at which the metric triggers the alert. 267 | 268 | 269 |
270 | 271 | 272 | 273 | 274 | # Drop-Down/Variable Filters 275 | To set variables you need to go to the settings icon located at the top right in the Grafana the dashboard you are working on. Then select the variables tab on the left side. 276 | \ 277 | \ 278 | Variables call upon *key_tags* as values. 279 | 280 | * The _**name**_ of the variable you set will be used to filter your metrics 281 | 282 |
283 | 284 | * The _**label**_ is used as the name displayed in the drop-down list 285 | 286 | 287 | 288 | #### Example 1 289 | ```` 290 | label_values() 291 | ```` 292 | >This is your base query for making filters. A *key_tag* goes in the brackets to call the values as your list of choices. 293 | 294 | 295 | #### Example 2 296 | ```` 297 | label_values(name) 298 | ```` 299 | This creates a label value for *"name"* which will make a list of every node name from our data source since our *key_tag* for node names is *"name"* 300 | 301 | >If your query is working there should be a list of expected values in the preview of values section at the bottom. 302 | 303 | 304 |
305 | 306 | 307 | 308 | 309 | # Cascading Filters 310 | #### Example 311 | ```` 312 | label_values(node_response_result_data_Pool_info_Reachable_nodes_count{name="[[node]]"}, network) 313 | ```` 314 | 315 | **label_values()** is our base query to create the variable 316 | \ 317 | \ 318 | **node_response_result_data_pool_info_reachable_nodes_count** is our example metric wich is calling ALL reachable nodes 319 | \ 320 | \ 321 | **{name="[[node]]"}** *"name"* is the metric output for the name of each node. Calling the variable we created in the section above using *"[[node]]"*, we can compare the metric output with our variable. 322 | 323 | >This format is used in the mertic queries as well so you can pick and choose which metrics you want to change based upon the filters. 324 | 325 |
326 | 327 | **", networks)"** is, in this case, a *key_tag* following the filtered metric query which we want to use as our variable for this cascading filter. Our *key_tag*, "networks", lists all of the networks from our data source. 328 | \ 329 | \ 330 | The end result, when we select a node from our first filter, the second filter will ONLY show networks that the node is on. So if a node is on multiple networks, we can show the data per netowrk rather than the sum of multiple networks. 331 | 332 | 333 |
334 | 335 | 336 | 337 | 338 | # Filters in Prometheus queries 339 | #### Example 340 | ```` 341 | example_metric{job="tick"} 342 | ```` 343 | We filter our data by job, which in our case is *"tick"* 344 | 345 | **NOTE: A "Job" can be set in the data source** 346 | 347 | 348 |
349 | 350 | 351 | 352 | 353 | 354 | # Implimenting Grafana variables in Prometheus queries 355 | #### Example 356 | ```` 357 | example_metric{name="[[node]]"} 358 | ```` 359 | This example compares the *variable* *"[[Node]]"* **(Being the name of the variable, NOT the label title)** to the *key_tag* *"name"*. 360 | \ 361 | \ 362 | As a result, when filtered using the Grafana drop-down, only metric values associated with the node selected will be shown. 363 | 364 | 365 |
366 | 367 | 368 | 369 | 370 | # Implimenting Grafana variables in InfluxDB (flux) queries 371 | #### Example 372 | ```` 373 | from(bucket: v.defaultBucket) 374 | |> range(start: v.timeRangeStart, stop: v.timeRangeStop) 375 | |> filter(fn: (r) => 376 | r._measurement == "node" and 377 | r._field == "node-address" and 378 | r.name == "${node}" and 379 | r.network == "${network}" 380 | ) 381 | ```` 382 | 383 | **from()** defines your bucket 384 | \ 385 | \ 386 | **range()** limits the query data by time 387 | \ 388 | \ 389 | **filter()** limits what rows of data will be output 390 | > Filters have a *fn* parameter to dictate if values are true or false *(fn: (r)). If the parameter evaluates the row to be true it will be included in the output data. 391 | 392 |
393 | 394 | **r._measurement** is what we are measuring by 395 | \ 396 | \ 397 | **r._field** is the field of data we want to output 398 | \ 399 | \ 400 | **r.name** & **r.network** is referencing *key_tags* 401 | \ 402 | \ 403 | **\${node}** and **\${network}** is referencing Grafana variables 404 | 405 | 406 |
407 | 408 | 409 | 410 | # Troublshooting 411 | 412 | 413 | ### Dashboard showing no data 414 | > The monitoring stack doesn't always start up properly, as a result the dashboards might not be populated. Running the restart command in the monitoring stack should fix the issue. 415 | 416 | -------------------------------------------------------------------------------- /fetch-validator-status/DidKey.py: -------------------------------------------------------------------------------- 1 | import base58 2 | import base64 3 | import nacl.signing 4 | from indy_vdr.ledger import Request 5 | 6 | class DidKey: 7 | def __init__(self, seed): 8 | self.seed = seed 9 | self.seed = self.seed_as_bytes() 10 | self.sk = nacl.signing.SigningKey(self.seed) 11 | self.vk = bytes(self.sk.verify_key) 12 | self.did = base58.b58encode(self.vk[:16]).decode("ascii") 13 | self.verkey = base58.b58encode(self.vk).decode("ascii") 14 | 15 | def sign_request(self, req: Request): 16 | signed = self.sk.sign(req.signature_input) 17 | req.set_signature(signed.signature) 18 | 19 | def seed_as_bytes(self): 20 | if not self.seed or isinstance(self.seed, bytes): 21 | return self.seed 22 | if len(self.seed) == 64: 23 | return bytes.fromhex(self.seed) 24 | if len(self.seed) != 32: 25 | return base64.b64decode(self.seed) 26 | return self.seed.encode("ascii") -------------------------------------------------------------------------------- /fetch-validator-status/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.12-slim-bullseye 2 | 3 | ARG uid=1001 4 | ARG user=indy 5 | ARG tag_name=indy-node-monitor 6 | ARG tag_version= 7 | 8 | ENV HOME="/home/$user" \ 9 | APP_ROOT="$HOME" \ 10 | LC_ALL=C.UTF-8 \ 11 | LANG=C.UTF-8 \ 12 | PIP_NO_CACHE_DIR=off \ 13 | PYTHONUNBUFFERED=1 \ 14 | PYTHONIOENCODING=UTF-8 \ 15 | RUST_LOG=warning \ 16 | SHELL=/bin/bash \ 17 | SUMMARY="Indy Node Monitor" \ 18 | DESCRIPTION="A container for monitoring the nodes of an Indy Node network." 19 | 20 | LABEL summary="$SUMMARY" \ 21 | description="$DESCRIPTION" \ 22 | io.k8s.description="$DESCRIPTION" \ 23 | io.k8s.display-name="$tag_name $tag_version" \ 24 | name="$tag_name" \ 25 | version="$tag_version" \ 26 | maintainer="" 27 | 28 | # Add local binaries and aliases to path 29 | ENV PATH="$HOME/.local/bin:$PATH" 30 | 31 | # Make libraries resolvable by python 32 | ENV LD_LIBRARY_PATH="$HOME/.local/lib:$LD_LIBRARY_PATH" 33 | RUN echo "$HOME/.local/lib" > /etc/ld.so.conf.d/local.conf && ldconfig 34 | 35 | # Add indy user 36 | RUN useradd -U -ms /bin/bash -u $uid $user 37 | WORKDIR $HOME 38 | 39 | # - In order to drop the root user, we have to make some directories writable 40 | # to the root group as OpenShift default security model is to run the container 41 | # under random UID. 42 | RUN usermod -a -G root $user 43 | 44 | # Create standard directories to allow volume mounting and set permissions 45 | # Note: PIP_NO_CACHE_DIR environment variable should be cleared to allow caching 46 | RUN mkdir -p \ 47 | $HOME/log \ 48 | $HOME/cache \ 49 | $(python -m site --user-site) 50 | 51 | # The root group needs access the directories under $HOME for the container to function in OpenShift. 52 | # Also ensure the permissions on the python 'site-packages' folder are set correctly. 53 | RUN chown -R $user:root $HOME && \ 54 | chmod -R ug+rw $HOME $HOME/log $HOME/cache && \ 55 | chmod +rx $(python -m site --user-site) 56 | 57 | USER $user 58 | 59 | ADD requirements.txt . 60 | RUN pip3 install --upgrade pip 61 | RUN pip install --no-cache-dir -r requirements.txt 62 | 63 | ADD networks.json . 64 | ADD *.py ./ 65 | ADD plugins ./plugins 66 | 67 | ENTRYPOINT ["bash", "-c", "python main.py $@", "--"] 68 | -------------------------------------------------------------------------------- /fetch-validator-status/README.md: -------------------------------------------------------------------------------- 1 | # Fetch Validator Status 2 | 3 | This folder contains a simple Python script that uses [indy-vdr](https://github.com/hyperledger/indy-vdr) to execute a "validator-info" call to an Indy network. The validator info transaction script returns a great deal of information about the accessed ledger. An example of the JSON data returned by the call for an individual node is provided [below](#example-validator-info). 4 | 5 | The call can only be made by an entity with a suitably authorized DID on the ledger. For example, on the Sovrin MainNet, only Stewards and some within the Sovrin Foundation has that access. 6 | 7 | The easiest way to use this now is to use the `./run` script and the Docker build process provide in this folder. Work is in progress to add a CI/CD capability to `indy-vdr` so that the artifacts are published to PyPi and native Python apps can be used. In the meantime, we recommend building your own [plug-in](plugins/README.md). 8 | 9 | ## How To Run 10 | 11 | Here is guidance of how you can run the script to get validator info about any accessible Indy network. We'll start with a test on local network (using [von-network](https://github.com/bcgov/von-network)) and provide how this can be run on any Indy network, including Sovrin networks. 12 | 13 | ### Prerequisites 14 | 15 | If you are running locally, you must have `git`, `docker` and a bash terminal. On Windows, when you install `git`, the `git-bash` terminal is installed and you can use that. 16 | 17 | To try this in a browser, go to [Play With Docker](https://labs.play-with-docker.com/), login (requires a Docker Hub ID) and click the "+ ADD NEW INSTANCE` link (left side). That opens a terminal session that you can use to run these steps. 18 | 19 | The rest of the steps assume you are in your bash terminal in a folder where GitHub repos can be cloned. 20 | 21 | ### Start VON Network 22 | 23 | To start a local Indy network to test with, we'll clone a VON Network, build it and start it using the following commands run in a bash terminal: 24 | 25 | ``` bash 26 | git clone https://github.com/bcgov/von-network 27 | cd von-network 28 | ./manage build 29 | ./manage start 30 | cd .. 31 | 32 | ``` 33 | 34 | The build step will take a while as 1/3 of the Internet is downloaded. Eventually, the `start` step will execute and a four-node Indy ledger will start. Wait about 30 seconds and then go to the web interface to view the network. 35 | 36 | - If you are running locally, go to [http://localhost:9000](http://localhost:9000). 37 | - If you are on Play with Docker, click the `9000` link above the terminal session window. 38 | 39 | Note the last command above puts you back up to the folder in which you started. If you want to explore `von-network` you'll have to change back into the `von-network` folder. 40 | 41 | When you are finished your running the validator tool (covered in the steps below) and want to stop your local indy-network, change to the von-network folder and run: 42 | 43 | ```bash 44 | ./manage down 45 | 46 | ``` 47 | 48 | We'll remind you of that later in these instructions. 49 | 50 | ### Clone the indy-node-monitor repo 51 | 52 | Run these commands to clone this repo so that you can run the fetch validator info command. 53 | 54 | ```bash 55 | git clone https://github.com/bcgov/indy-node-monitor 56 | cd indy-node-monitor/fetch-validator-status 57 | 58 | ``` 59 | 60 | ### Run the Validator Info Script 61 | 62 | For a full list of script options run: 63 | ``` bash 64 | ./run.sh -h 65 | ``` 66 | 67 | To get the details for the known networks available for use with the `--net` option, run: 68 | ``` bash 69 | ./run.sh --list-nets 70 | ``` 71 | 72 | To run the validator script, run the following command in your bash terminal from the `fetch-validator-status` folder in the `indy-node-monitor` clone: 73 | 74 | ``` bash 75 | ./run.sh --net= --seed= 76 | ``` 77 | or 78 | ``` bash 79 | ./run.sh --genesis-url= --seed= 80 | ``` 81 | 82 | To just get a status summary for the nodes, run: 83 | ``` bash 84 | ./run.sh --net= --seed= --status 85 | ``` 86 | or 87 | ``` bash 88 | ./run.sh --genesis-url= --seed= --status 89 | ``` 90 | 91 | To fetch data for a single node, or a particular set of nodes use the `--nodes` argument and provide a comma delimited list of node names (aliases); 92 | ``` bash 93 | ./run.sh --net= --seed= --status --nodes node1,node2 94 | ``` 95 | 96 | For the first test run using von-network: 97 | 98 | - the `` is the Indy test network Trustee seed: `000000000000000000000000Trustee1`. 99 | - the URL is retrieved by clicking on the `Genesis Transaction` link in the VON-Network web interface and copying the URL from the browser address bar. 100 | 101 | If you are running locally, the full command is: 102 | 103 | ``` bash 104 | ./run.sh --net=vn --seed=000000000000000000000000Trustee1 105 | ``` 106 | or 107 | ``` bash 108 | ./run.sh --genesis-url=http://localhost:9000/genesis --seed=000000000000000000000000Trustee1 109 | ``` 110 | 111 | To perform an anonymous connection test when a privileged DID seed is not available, omit the `SEED` (`-a` is no longer needed to perform an anonymous connection): 112 | 113 | ``` bash 114 | ./run.sh --net= 115 | ``` 116 | or 117 | ``` bash 118 | ./run.sh --genesis-url= 119 | ``` 120 | 121 | If running in the browser, you will have to get the URL for the Genesis file (as described above) and replace the `localhost` URL above. 122 | 123 | You should see a very long JSON structure printed to the terminal. You can redirect the output to a file by adding something like `> good.json` at the end of the command. 124 | 125 | If you use the Seed of a DID that does not have permission to see validator info, you will get a much shorter JSON structure containing access denied messages. 126 | 127 | ### Damage the Indy Network 128 | 129 | To see what happens when a node is terminated, or inaccessible, terminate one of the von-network nodes and then re-run the validator info script. To terminate a von-network node run: 130 | 131 | ``` bash 132 | docker kill von_node1_1 133 | ``` 134 | 135 | When you repeat the run, you'll see that: 136 | 137 | - the command takes a while to run as it waits for a timeout from the terminated node 138 | - the entry from the terminate node is empty (but still present) 139 | - the entries of the other nodes indicate that the terminated node is not accessible 140 | 141 | Try redirecting the output to `>bad.json` and then use `diff good.json bad.json` to see all the differences. Better yet a visual diff tool. 142 | 143 | If you are finished trying this out with a local Indy network, don't forget to go back and shutdown the instance of von-network, using the commands: 144 | 145 | ``` bash 146 | cd ../.. 147 | cd von-network 148 | ./manage down 149 | 150 | ``` 151 | 152 | ### Extracting Useful Information 153 | 154 | Once you have the script running, you can write a plug-in that takes the JSON input and produces a more useful monitoring output file—probably still in JSON. Here is some information that would be useful to extract from the JSON output: 155 | 156 | - Detect when a node is inaccessible (as with Node 1 above) and produce standard output for that situation. 157 | - Detect any nodes that are accessible (populated JSON data) but that are "unreachable" to some or all of the other Indy nodes. 158 | - That indicates that the internal port to the node is not accessible, even though the public port is accessible. 159 | - The number of transaction per Indy ledger, especially the domain ledger. 160 | - The average read and write times for the node. 161 | - The average throughput time for the node. 162 | - The uptime of the node (time is last restart). 163 | - The time since last freshness check (should be less than 5 minutes). 164 | 165 | The suggestions above are only ideas. Precise meanings of the values should be investigated, particularly for "ledger" type data (e.g. number of transactions) but that are generated on a per node basis. 166 | 167 | Note that there are three different formats for the timestamps in the data structure, and all appear to be UTC. Make sure to convert times into a single format during collection. 168 | 169 | 170 | ## Plug-ins 171 | 172 | For info on plug-ins see the plug-ins [readme](plugins/README.md) 173 | 174 | ## Rest API 175 | 176 | For info on Rest API see [REST API](REST_API.md) 177 | 178 | ### Running against other Indy Networks 179 | 180 | To see the validator info against any other Indy network, you need a URL for the Genesis file for the network, and the seed for a suitably authorized DID. The pool Genesis file URLs are easy, since that is published data needed by agents connecting to Indy networks. Sovrin genesis URLs can be found [here](https://github.com/sovrin-foundation/sovrin/tree/master/sovrin). You need the URL for the raw version of the pool transaction files. For example, the URL you need for the Sovrin MainNet is: 181 | 182 | - [`https://raw.githubusercontent.com/sovrin-foundation/sovrin/master/sovrin/pool_transactions_live_genesis`](`https://raw.githubusercontent.com/sovrin-foundation/sovrin/master/sovrin/pool_transactions_live_genesis`) 183 | 184 | For the other Sovrin networks, replace `live` with `sandbox` (Sovrin Staging Net) and `builder` (Sovrin Builder Net). 185 | 186 | Getting a Seed for a DID with sufficient authorization on specific ledger is an exercise for the user. **DO NOT SHARE DID SEEDS**. Those are to be kept secret. 187 | 188 | Do not write the Seeds in a public form. The use of environment variables for these parameters is very deliberate so that no one accidentally leaks an authorized DID. 189 | 190 | Did I mention: **DO NOT SHARE DID SEEDS**? 191 | 192 | ## Example Validator info 193 | 194 | The following is an example of the data for a single node from a VON-Network instance: 195 | 196 | ```JSONC 197 | [ 198 | { 199 | "name": "Node4", 200 | "response": { 201 | "response-version": "0.0.1", 202 | "timestamp": 1594079906, 203 | "Hardware": { 204 | "HDD_used_by_node": "2 MBs" 205 | }, 206 | "Pool_info": { 207 | "Read_only": false, 208 | "Total_nodes_count": 4, 209 | "f_value": 1, 210 | "Quorums": "{'n': 4, 'f': 1, 'weak': Quorum(2), 'strong': Quorum(3), 'propagate': Quorum(2), 'prepare': Quorum(2), 'commit': Quorum(3), 'reply': Quorum(2), 'view_change': Quorum(3), 'election': Quorum(3), 'view_change_ack': Quorum(2), 'view_change_done': Quorum(3), 'same_consistency_proof': Quorum(2), 'consistency_proof': Quorum(2), 'ledger_status': Quorum(2), 'ledger_status_last_3PC': Quorum(2), 'checkpoint': Quorum(2), 'timestamp': Quorum(2), 'bls_signatures': Quorum(3), 'observer_data': Quorum(2), 'backup_instance_faulty': Quorum(2)}", 211 | "Reachable_nodes": [ 212 | [ 213 | "Node1", 214 | 0 215 | ], 216 | [ 217 | "Node3", 218 | null 219 | ], 220 | [ 221 | "Node4", 222 | null 223 | ] 224 | ], 225 | "Unreachable_nodes": [ 226 | [ 227 | "Node2", 228 | null 229 | ] 230 | ], 231 | "Reachable_nodes_count": 3, 232 | "Unreachable_nodes_count": 1, 233 | "Blacklisted_nodes": [], 234 | "Suspicious_nodes": "" 235 | }, 236 | "Protocol": {}, 237 | "Node_info": { 238 | "Name": "Node4", 239 | "Mode": "participating", 240 | "Client_port": 9708, 241 | "Client_ip": "0.0.0.0", 242 | "Client_protocol": "tcp", 243 | "Node_port": 9707, 244 | "Node_ip": "0.0.0.0", 245 | "Node_protocol": "tcp", 246 | "did": "4PS3EDQ3dW1tci1Bp6543CfuuebjFrg36kLAUcskGfaA", 247 | "verkey": "68yVKe5AeXynD5A8K91aTZFjCQEoKV4hKPtauqjHa9phgitWEGkS5TR", 248 | "BLS_key": "2zN3bHM1m4rLz54MJHYSwvqzPchYp8jkHswveCLAEJVcX6Mm1wHQD1SkPYMzUDTZvWvhuE6VNAkK3KxVeEmsanSmvjVkReDeBEMxeDaayjcZjFGPydyey1qxBHmTvAnBKoPydvuTAqx5f7YNNRAdeLmUi99gERUU7TD8KfAa6MpQ9bw", 249 | "Metrics": { 250 | "Delta": 0.1, 251 | "Lambda": 240, 252 | "Omega": 20, 253 | "instances started": { 254 | "0": 252658.103411107 255 | }, 256 | "ordered request counts": { 257 | "0": 16 258 | }, 259 | "ordered request durations": { 260 | "0": 11.218815947 261 | }, 262 | "max master request latencies": 0, 263 | "client avg request latencies": { 264 | "0": null 265 | }, 266 | "throughput": { 267 | "0": 0.0017547843 268 | }, 269 | "master throughput": 0.0017547843, 270 | "total requests": 16, 271 | "avg backup throughput": null, 272 | "master throughput ratio": null, 273 | "average-per-second": { 274 | "read-transactions": 0.0338584473, 275 | "write-transactions": 0.0001539895 276 | }, 277 | "transaction-count": { 278 | "ledger": 21, 279 | "pool": 4, 280 | "config": 0, 281 | "audit": 1042 282 | }, 283 | "uptime": 103903 284 | }, 285 | "Committed_ledger_root_hashes": { 286 | "3": "b'C3ofBGxtL6xAWXtSFcGPamSnHqT1hB2MckzPXYpy7q7e'", 287 | "0": "b'EE9Dr84v87WGqiDFhHYHC9eVF1f1q3E8GnkXNbuZ7D8y'", 288 | "2": "b'GKot5hBsd81kMupNCXHaqbhv3huEbxAFMLnpcX2hniwn'", 289 | "1": "b'45YwJVR1Lzb5u5zGAetkbBXY2YdLixvc5j2eSz2vyQgn'" 290 | }, 291 | "Committed_state_root_hashes": { 292 | "2": "b'DfNLmH4DAHTKv63YPFJzuRdeEtVwF5RtVnvKYHd8iLEA'", 293 | "0": "b'8xwEHCkVcEA9qfBJaESpWKvtHVxUkvzJctAHfBiVhAAJ'", 294 | "1": "b'EozN1ekxkmMRGMeBNoBWTYi47PCZTP2vTgNN5GKgto1H'" 295 | }, 296 | "Uncommitted_ledger_root_hashes": {}, 297 | "Uncommitted_ledger_txns": { 298 | "3": { 299 | "Count": 0 300 | }, 301 | "0": { 302 | "Count": 0 303 | }, 304 | "2": { 305 | "Count": 0 306 | }, 307 | "1": { 308 | "Count": 0 309 | } 310 | }, 311 | "Uncommitted_state_root_hashes": { 312 | "2": "b'DfNLmH4DAHTKv63YPFJzuRdeEtVwF5RtVnvKYHd8iLEA'", 313 | "0": "b'8xwEHCkVcEA9qfBJaESpWKvtHVxUkvzJctAHfBiVhAAJ'", 314 | "1": "b'EozN1ekxkmMRGMeBNoBWTYi47PCZTP2vTgNN5GKgto1H'" 315 | }, 316 | "View_change_status": { 317 | "View_No": 0, 318 | "VC_in_progress": false, 319 | "Last_view_change_started_at": "1970-01-01 00:00:00", 320 | "Last_complete_view_no": 0, 321 | "IC_queue": {}, 322 | "VCDone_queue": {} 323 | }, 324 | "Catchup_status": { 325 | "Ledger_statuses": { 326 | "3": "synced", 327 | "0": "synced", 328 | "2": "synced", 329 | "1": "synced" 330 | }, 331 | "Received_LedgerStatus": "", 332 | "Waiting_consistency_proof_msgs": { 333 | "3": null, 334 | "0": null, 335 | "2": null, 336 | "1": null 337 | }, 338 | "Number_txns_in_catchup": { 339 | "3": 0, 340 | "0": 0, 341 | "2": 0, 342 | "1": 0 343 | }, 344 | "Last_txn_3PC_keys": { 345 | "3": { 346 | "Node2": [ 347 | null, 348 | null 349 | ], 350 | "Node3": [ 351 | null, 352 | null 353 | ] 354 | }, 355 | "0": { 356 | "Node3": [ 357 | null, 358 | null 359 | ], 360 | "Node2": [ 361 | null, 362 | null 363 | ] 364 | }, 365 | "2": { 366 | "Node2": [ 367 | null, 368 | null 369 | ], 370 | "Node3": [ 371 | null, 372 | null 373 | ] 374 | }, 375 | "1": { 376 | "Node3": [ 377 | null, 378 | null 379 | ], 380 | "Node2": [ 381 | null, 382 | null 383 | ] 384 | } 385 | } 386 | }, 387 | "Freshness_status": { 388 | "1": { 389 | "Last_updated_time": "2020-07-06 23:55:07+00:00", 390 | "Has_write_consensus": true 391 | }, 392 | "0": { 393 | "Last_updated_time": "2020-07-06 23:57:33+00:00", 394 | "Has_write_consensus": true 395 | }, 396 | "2": { 397 | "Last_updated_time": "2020-07-06 23:57:33+00:00", 398 | "Has_write_consensus": true 399 | } 400 | }, 401 | "Requests_timeouts": { 402 | "Propagates_phase_req_timeouts": 0, 403 | "Ordering_phase_req_timeouts": 0 404 | }, 405 | "Count_of_replicas": 1, 406 | "Replicas_status": { 407 | "Node4:0": { 408 | "Primary": "Node1:0", 409 | "Watermarks": "1000:1300", 410 | "Last_ordered_3PC": [ 411 | 0, 412 | 1042 413 | ], 414 | "Stashed_txns": { 415 | "Stashed_checkpoints": 0, 416 | "Stashed_PrePrepare": 0 417 | } 418 | } 419 | } 420 | }, 421 | "Software": { 422 | "OS_version": "Linux-4.15.0-109-generic-x86_64-with-debian-buster-sid", 423 | "Installed_packages": [ 424 | "zipp 2.2.0", 425 | "yarl 1.4.2", 426 | "wcwidth 0.1.8", 427 | "ujson 1.33", 428 | "typing 3.7.4.1", 429 | "typing-extensions 3.7.4.1", 430 | "timeout-decorator 0.4.0", 431 | "supervisor 4.0.4", 432 | "sortedcontainers 1.5.7", 433 | "six 1.11.0", 434 | "sha3 0.2.1", 435 | "setuptools 40.6.2", 436 | "semver 2.7.9", 437 | "rlp 0.6.0", 438 | "pyzmq 18.1.0", 439 | "PyYAML 5.1.2", 440 | "python3-indy 1.14.1", 441 | "python-rocksdb 0.6.9", 442 | "python-dateutil 2.6.1", 443 | "pytest 5.3.5", 444 | "pyparsing 2.4.6", 445 | "Pympler 0.5", 446 | "Pygments 2.2.0", 447 | "pycparser 2.19", 448 | "pycares 3.1.1", 449 | "py 1.8.1", 450 | "psutil 5.4.3", 451 | "prompt-toolkit 0.57", 452 | "portalocker 0.5.7", 453 | "pluggy 0.13.1", 454 | "pip 9.0.3", 455 | "packaging 19.0", 456 | "orderedset 2.0", 457 | "multidict 4.7.4", 458 | "msgpack-python 0.4.6", 459 | "more-itertools 8.2.0", 460 | "meld3 2.0.1", 461 | "MarkupSafe 1.1.1", 462 | "libnacl 1.6.1", 463 | "leveldb 0.201", 464 | "jsonpickle 0.9.6", 465 | "Jinja2 2.11.2", 466 | "ioflo 1.5.4", 467 | "intervaltree 2.1.0", 468 | "indy-plenum 1.12.2", 469 | "indy-node 1.12.2", 470 | "importlib-metadata 1.5.0", 471 | "idna 2.8", 472 | "idna-ssl 1.1.0", 473 | "distro 1.3.0", 474 | "chardet 3.0.4", 475 | "cffi 1.14.0", 476 | "cchardet 2.1.5", 477 | "base58 1.0.3", 478 | "attrs 19.3.0", 479 | "async-timeout 3.0.1", 480 | "aiosqlite 0.10.0", 481 | "aiohttp 3.5.4", 482 | "aiohttp-jinja2 1.1.2", 483 | "aiodns 2.0.0", 484 | "indy-crypto 0.5.1" 485 | ], 486 | "Indy_packages": [ 487 | "" 488 | ], 489 | "indy-node": "1.12.2", 490 | "sovrin": "unknown" 491 | }, 492 | "Update_time": "Monday, July 6, 2020 11:58:26 PM +0000", 493 | "Memory_profiler": [], 494 | "Extractions": { 495 | "journalctl_exceptions": [ 496 | "" 497 | ], 498 | "indy-node_status": [ 499 | "" 500 | ], 501 | "node-control status": [ 502 | "" 503 | ], 504 | "upgrade_log": "", 505 | "stops_stat": null 506 | } 507 | } 508 | } 509 | ] 510 | ``` 511 | -------------------------------------------------------------------------------- /fetch-validator-status/REST_API.md: -------------------------------------------------------------------------------- 1 | # Rest API 2 | 3 | To run [fetch validator](README.md) as a webAPI `cd fetch-validator-status` and `IM=1 ./run.sh --web -v` to start the server. 4 | To run in debug mode add `--debug`. 5 | 6 | ## How To Use 7 | 8 | After running the command above. Go to http://localhost:8080/ in your browser. Then click on one of the colored drop downs and click the 'Try it out' button. Fill out any required fields then click 'execute'. This will give you a response with a, curl command, request url, and response body. -------------------------------------------------------------------------------- /fetch-validator-status/fetch_status.py: -------------------------------------------------------------------------------- 1 | from indy_vdr.ledger import ( 2 | build_get_validator_info_request, 3 | build_get_txn_request, 4 | ) 5 | from util import log 6 | from plugin_collection import PluginCollection 7 | from DidKey import DidKey 8 | from pool import PoolCollection 9 | from singleton import Singleton 10 | 11 | class NodeNotFound(Exception): 12 | pass 13 | 14 | class FetchStatus(object, metaclass=Singleton): 15 | def __init__(self, verbose, pool_collection: PoolCollection): 16 | self.verbose = verbose 17 | self.pool_collection = pool_collection 18 | 19 | async def fetch(self, network_id: str, monitor_plugins: PluginCollection, nodes: str = None, ident: DidKey = None): 20 | result = [] 21 | verifiers = {} 22 | 23 | pool, network_name = await self.pool_collection.get_pool(network_id) 24 | if ident: 25 | log(f"Building request with did: {ident.did} ...") 26 | request = build_get_validator_info_request(ident.did) 27 | ident.sign_request(request) 28 | else: 29 | log("Building an anonymous request ...") 30 | request = build_get_txn_request(None, 1, 1) 31 | 32 | from_nodes = [] 33 | if nodes: 34 | from_nodes = nodes.split(",") 35 | 36 | try: 37 | # Introduced in https://github.com/hyperledger/indy-vdr/commit/ce0e7c42491904e0d563f104eddc2386a52282f7 38 | log("Getting list of verifiers ...") 39 | verifiers = await pool.get_verifiers() 40 | except AttributeError: 41 | log("Unable to get list of verifiers. Please make sure you have the latest version of indy-vdr.") 42 | pass 43 | 44 | if verifiers and from_nodes: 45 | for node in from_nodes: 46 | if not node in verifiers: 47 | raise NodeNotFound(f'{node} is not a member of {network_name}.') 48 | 49 | log("Submitting request ...") 50 | response = await pool.submit_action(request, node_aliases = from_nodes) 51 | 52 | log("Passing results to plugins for processing ...") 53 | result = await monitor_plugins.apply_all_plugins_on_value(result, network_name, response, verifiers) 54 | log("Processing complete.") 55 | return result -------------------------------------------------------------------------------- /fetch-validator-status/gunicorn_conf.py: -------------------------------------------------------------------------------- 1 | """ 2 | File created by tiangolo. 3 | https://github.com/tiangolo/uvicorn-gunicorn-docker/blob/master/docker-images/gunicorn_conf.py 4 | """ 5 | 6 | import json 7 | import multiprocessing 8 | import os 9 | 10 | workers_per_core_str = os.getenv("WORKERS_PER_CORE", "1") 11 | max_workers_str = os.getenv("MAX_WORKERS") 12 | use_max_workers = None 13 | if max_workers_str: 14 | use_max_workers = int(max_workers_str) 15 | web_concurrency_str = os.getenv("WEB_CONCURRENCY", None) 16 | 17 | host = os.getenv("HOST", "0.0.0.0") 18 | port = os.getenv("PORT", "8080") 19 | bind_env = os.getenv("BIND", None) 20 | use_loglevel = os.getenv("LOG_LEVEL", "info") 21 | if bind_env: 22 | use_bind = bind_env 23 | else: 24 | use_bind = f"{host}:{port}" 25 | 26 | cores = multiprocessing.cpu_count() 27 | workers_per_core = float(workers_per_core_str) 28 | default_web_concurrency = workers_per_core * cores 29 | if web_concurrency_str: 30 | web_concurrency = int(web_concurrency_str) 31 | assert web_concurrency > 0 32 | else: 33 | web_concurrency = max(int(default_web_concurrency), 2) 34 | if use_max_workers: 35 | web_concurrency = min(web_concurrency, use_max_workers) 36 | accesslog_var = os.getenv("ACCESS_LOG", "-") 37 | use_accesslog = accesslog_var or None 38 | errorlog_var = os.getenv("ERROR_LOG", "-") 39 | use_errorlog = errorlog_var or None 40 | graceful_timeout_str = os.getenv("GRACEFUL_TIMEOUT", "120") 41 | timeout_str = os.getenv("TIMEOUT", "120") 42 | keepalive_str = os.getenv("KEEP_ALIVE", "5") 43 | 44 | # Gunicorn config variables 45 | loglevel = use_loglevel 46 | workers = web_concurrency 47 | bind = use_bind 48 | errorlog = use_errorlog 49 | worker_tmp_dir = "/dev/shm" 50 | accesslog = use_accesslog 51 | graceful_timeout = int(graceful_timeout_str) 52 | timeout = int(timeout_str) 53 | keepalive = int(keepalive_str) 54 | 55 | 56 | # For debugging and testing 57 | log_data = { 58 | "loglevel": loglevel, 59 | "workers": workers, 60 | "bind": bind, 61 | "graceful_timeout": graceful_timeout, 62 | "timeout": timeout, 63 | "keepalive": keepalive, 64 | "errorlog": errorlog, 65 | "accesslog": accesslog, 66 | # Additional, non-gunicorn variables 67 | "workers_per_core": workers_per_core, 68 | "use_max_workers": use_max_workers, 69 | "host": host, 70 | "port": port, 71 | } 72 | print('gunicorn config:') 73 | print(json.dumps(log_data, indent=2)) -------------------------------------------------------------------------------- /fetch-validator-status/main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import asyncio 3 | import json 4 | import os 5 | import indy_vdr 6 | from util import ( 7 | enable_verbose, 8 | log, 9 | create_did 10 | ) 11 | from fetch_status import FetchStatus 12 | from pool import PoolCollection 13 | from networks import Networks 14 | from plugin_collection import PluginCollection 15 | 16 | if __name__ == "__main__": 17 | parser = argparse.ArgumentParser(description="Fetch the status of all the indy-nodes within a given pool.") 18 | parser.add_argument("--net", choices=Networks.get_ids(), help="Connect to a known network using an ID.") 19 | parser.add_argument("--list-nets", action="store_true", help="List known networks.") 20 | parser.add_argument("--genesis-url", default=os.environ.get('GENESIS_URL') , help="The url to the genesis file describing the ledger pool. Can be specified using the 'GENESIS_URL' environment variable.") 21 | parser.add_argument("--genesis-path", default=os.getenv("GENESIS_PATH"), help="The path to the genesis file describing the ledger pool. Can be specified using the 'GENESIS_PATH' environment variable.") 22 | parser.add_argument("-s", "--seed", default=os.environ.get('SEED') , help="The privileged DID seed to use for the ledger requests. Can be specified using the 'SEED' environment variable. If DID seed is not given the request will run anonymously.") 23 | parser.add_argument("--nodes", help="The comma delimited list of the nodes from which to collect the status. The default is all of the nodes in the pool.") 24 | parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose logging.") 25 | parser.add_argument("--web", action="store_true", help="Start API server.") 26 | parser.add_argument("--debug", action="store_true", help="Run in debug mode.") 27 | parser.add_argument("--raw", action="store_true", help="Output the result as raw unformatted json with no whitespace.") 28 | 29 | monitor_plugins = PluginCollection('plugins') 30 | monitor_plugins.get_parse_args(parser) 31 | args, unknown = parser.parse_known_args() 32 | monitor_plugins.load_all_parse_args(args) 33 | 34 | enable_verbose(args.verbose) 35 | 36 | if args.web: 37 | if args.seed: 38 | print("WARNING: You are trying to run the REST API with a SEED.") 39 | print("Please remove your SEED and try again.") 40 | print("Exiting...") 41 | exit() 42 | 43 | # Pass verbose to rest api through env var 44 | os.environ['VERBOSE'] = str(args.verbose) 45 | 46 | MODULE_NAME = os.environ.get('MODULE_NAME', "rest_api") 47 | VARIABLE_NAME = os.environ.get('VARIABLE_NAME', "app") 48 | APP_MODULE = os.environ.get('APP_MODULE', f"{MODULE_NAME}:{VARIABLE_NAME}") 49 | 50 | if args.debug: 51 | HOST = os.environ.get('HOST', '0.0.0.0') 52 | PORT = os.environ.get('PORT', '8080') 53 | LOG_LEVEL = os.environ.get('LOG_LEVEL', 'info') 54 | 55 | log("Starting web server in debug mode ...") 56 | os.system(f'uvicorn --reload --host {HOST} --port {PORT} --log-level {LOG_LEVEL} "{APP_MODULE}"') 57 | else: 58 | GUNICORN_CONF = os.environ.get('GUNICORN_CONF', 'gunicorn_conf.py') 59 | WORKER_CLASS = os.environ.get('WORKER_CLASS', "uvicorn.workers.UvicornWorker") 60 | 61 | log("Starting web server ...") 62 | os.system(f'gunicorn -k "{WORKER_CLASS}" -c "{GUNICORN_CONF}" "{APP_MODULE}"') 63 | else: 64 | log("Starting from the command line ...") 65 | 66 | if args.list_nets: 67 | print(json.dumps(Networks.get_networks(), indent=2)) 68 | exit() 69 | 70 | log("indy-vdr version:", indy_vdr.version()) 71 | did_seed = None if not args.seed else args.seed 72 | ident = create_did(did_seed) 73 | networks = Networks() 74 | pool_collection = PoolCollection(args.verbose, networks) 75 | network = networks.resolve(args.net, args.genesis_url, args.genesis_path) 76 | node_info = FetchStatus(args.verbose, pool_collection) 77 | result = asyncio.get_event_loop().run_until_complete(node_info.fetch(network.id, monitor_plugins, args.nodes, ident)) 78 | if args.raw: 79 | print(json.dumps(result, separators=(',', ':'))) 80 | else: 81 | print(json.dumps(result, indent=2)) 82 | -------------------------------------------------------------------------------- /fetch-validator-status/networks.json: -------------------------------------------------------------------------------- 1 | { 2 | "sbn": { 3 | "name": "Sovrin Builder Net", 4 | "indyNamespace": "sovrin:builder", 5 | "genesisUrl": "https://raw.githubusercontent.com/sovrin-foundation/sovrin/stable/sovrin/pool_transactions_builder_genesis" 6 | }, 7 | "ssn": { 8 | "name": "Sovrin Staging Net", 9 | "indyNamespace": "sovrin:test", 10 | "genesisUrl": "https://raw.githubusercontent.com/sovrin-foundation/sovrin/stable/sovrin/pool_transactions_sandbox_genesis" 11 | }, 12 | "smn": { 13 | "name": "Sovrin Main Net", 14 | "indyNamespace": "sovrin", 15 | "genesisUrl": "https://raw.githubusercontent.com/sovrin-foundation/sovrin/stable/sovrin/pool_transactions_live_genesis" 16 | }, 17 | "vn": { 18 | "name": "Local von-network", 19 | "indyNamespace": "local:dev-docker", 20 | "genesisUrl": "http://host.docker.internal:9000/genesis" 21 | }, 22 | "vn-linux": { 23 | "name": "Linux Local von-network", 24 | "indyNamespace": "local:dev-linux", 25 | "genesisUrl": "http://localhost:9000/genesis" 26 | }, 27 | "bcd": { 28 | "name": "BCovrin Dev", 29 | "indyNamespace": "bcovrin:dev", 30 | "genesisUrl": "http://dev.bcovrin.vonx.io/genesis" 31 | }, 32 | "bct": { 33 | "name": "BCovrin Test", 34 | "indyNamespace": "bcovrin:test", 35 | "genesisUrl": "http://test.bcovrin.vonx.io/genesis" 36 | }, 37 | "bcp": { 38 | "name": "BCovrin", 39 | "indyNamespace": "bcovrin", 40 | "genesisUrl": "http://prod.bcovrin.vonx.io/genesis" 41 | }, 42 | "gld": { 43 | "name": "GreenLight Dev Ledger", 44 | "indyNamespace": "bcovrin:dev.greenlight", 45 | "genesisUrl": "http://dev.greenlight.bcovrin.vonx.io/genesis" 46 | }, 47 | "gl": { 48 | "name": "GreenLight Ledger", 49 | "indyNamespace": "bcovrin:greenlight", 50 | "genesisUrl": "http://greenlight.bcovrin.vonx.io/genesis" 51 | }, 52 | "imn": { 53 | "name": "Indicio MainNet", 54 | "indyNamespace": "indicio", 55 | "genesisUrl": "https://raw.githubusercontent.com/Indicio-tech/indicio-network/main/genesis_files/pool_transactions_mainnet_genesis" 56 | }, 57 | "idn": { 58 | "name": "Indicio DemoNet", 59 | "indyNamespace": "indicio:demo", 60 | "genesisUrl": "https://raw.githubusercontent.com/Indicio-tech/indicio-network/main/genesis_files/pool_transactions_demonet_genesis" 61 | }, 62 | "itn": { 63 | "name": "Indicio TestNet", 64 | "indyNamespace": "indicio:test", 65 | "genesisUrl": "https://raw.githubusercontent.com/Indicio-tech/indicio-network/main/genesis_files/pool_transactions_testnet_genesis" 66 | }, 67 | "cdn": { 68 | "name": "CANdy Dev Network (CANdy-dev)", 69 | "indyNamespace": "candy:dev", 70 | "genesisUrl": "https://raw.githubusercontent.com/ICCS-ISAC/dtrust-reconu/main/CANdy/dev/pool_transactions_genesis" 71 | }, 72 | "ctn": { 73 | "name": "CANdy Test Network (CANdy-test)", 74 | "indyNamespace": "candy:test", 75 | "genesisUrl": "https://raw.githubusercontent.com/ICCS-ISAC/dtrust-reconu/main/CANdy/test/pool_transactions_genesis" 76 | }, 77 | "cpn": { 78 | "name": "CANdy Production Network (CANdy-prod)", 79 | "indyNamespace": "candy", 80 | "genesisUrl": "https://raw.githubusercontent.com/ICCS-ISAC/dtrust-reconu/main/CANdy/prod/pool_transactions_genesis" 81 | } 82 | } -------------------------------------------------------------------------------- /fetch-validator-status/networks.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import urllib.request 4 | import sys 5 | import re 6 | from enum import Enum 7 | from collections import namedtuple 8 | from util import log 9 | from singleton import Singleton 10 | 11 | Network = namedtuple('Network', ['id', 'name', 'genesis_url', 'genesis_path']) 12 | 13 | class NetworkEnum(Enum): 14 | def _generate_next_value_(name, start, count, last_values): 15 | return name 16 | 17 | class Networks(object, metaclass=Singleton): 18 | def __init__(self): 19 | self._networks = self.__load_network_list() 20 | 21 | def __get_script_dir(self): 22 | return os.path.dirname(os.path.realpath(__file__)) 23 | 24 | def __load_network_list(self): 25 | log("Loading known network list ...") 26 | with open(f"{self.__get_script_dir()}/networks.json") as json_file: 27 | networks = json.load(json_file) 28 | return networks 29 | 30 | @property 31 | def ids(self): 32 | return self._networks.keys() 33 | 34 | @property 35 | def networks(self): 36 | return self._networks 37 | 38 | @staticmethod 39 | def get_ids(): 40 | networks = Networks() 41 | return networks.ids 42 | 43 | @staticmethod 44 | def get_networks(): 45 | networks = Networks() 46 | return networks.networks 47 | 48 | @staticmethod 49 | def __download_genesis_file(genesis_url: str, destination_path: str): 50 | log("Fetching genesis file ...") 51 | urllib.request.urlretrieve(genesis_url, destination_path) 52 | 53 | @staticmethod 54 | def get_NetworkEnum() -> NetworkEnum: 55 | """Dynamically generates a NetworkEnum that can be used to select the available Networks. 56 | """ 57 | networks = Networks() 58 | return NetworkEnum('Network', list(networks.ids)) 59 | 60 | def resolve(self, network_id: str = None, genesis_url: str = None, genesis_path: str = None): 61 | network_name = None 62 | genesis_path_base = f"{self.__get_script_dir()}/cache/" 63 | 64 | if network_id and network_id in self.ids: 65 | log("Connecting to '{0}' ...".format(self.networks[network_id]["name"])) 66 | network_name = self.networks[network_id]["name"] 67 | genesis_url = self.networks[network_id]["genesisUrl"] 68 | if 'genesisPath' in self.networks[network_id]: 69 | genesis_path = self.networks[network_id]['genesisPath'] 70 | 71 | if genesis_url: 72 | if not network_name: 73 | network_name = genesis_url 74 | network_id = network_name 75 | log(f"Setting network name = {network_name} ...") 76 | 77 | if not genesis_path: 78 | # Remove and replace parts of the string to make a valid path based on the network name. 79 | sub_path = network_name.replace("https://", "") 80 | sub_path = re.sub('[ /.]', '_', sub_path) 81 | genesis_path = f"{genesis_path_base}{sub_path}/" 82 | if not os.path.exists(genesis_path): 83 | os.makedirs(genesis_path) 84 | genesis_path = f"{genesis_path}genesis.txn" 85 | Networks.__download_genesis_file(genesis_url, genesis_path) 86 | self._networks[network_id] = {'name': network_name, 'genesisUrl': genesis_url, 'genesisPath': genesis_path} 87 | 88 | if not os.path.exists(genesis_path): 89 | print("Set the GENESIS_URL or GENESIS_PATH environment variable or argument.\n", file=sys.stderr) 90 | exit() 91 | 92 | network = Network(network_id, network_name, genesis_url, genesis_path) 93 | return network -------------------------------------------------------------------------------- /fetch-validator-status/plugin_collection.py: -------------------------------------------------------------------------------- 1 | """ 2 | MIT License 3 | 4 | Copyright (c) 2020 Guido Diepen 5 | Git Hub Repo: https://github.com/gdiepen/python_plugin_example 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in all 15 | copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | 25 | Change History: 26 | - 15/01/2021 Modified from orginal. 27 | """ 28 | 29 | import inspect 30 | import os 31 | import sys 32 | import pkgutil 33 | 34 | 35 | class Plugin(object): 36 | """Base class that each plugin must inherit from. within this class 37 | you must define the methods that all of your plugins must implement 38 | """ 39 | 40 | def __init__(self): 41 | self.index = None 42 | self.name = 'UNKNOWN' 43 | self.description = 'UNKNOWN' 44 | self.type = 'UNKNOWN' 45 | self.enabled = False 46 | 47 | def parse_args(self, argument): 48 | raise NotImplementedError 49 | 50 | def load_parse_args(self, argument): 51 | raise NotImplementedError 52 | 53 | def perform_operation(self, argument): 54 | """The method that we expect all plugins to implement. This is the 55 | method that our framework will call 56 | """ 57 | raise NotImplementedError 58 | 59 | class PluginCollection(object): 60 | """Upon creation, this class will read the plugins package for modules 61 | that contain a class definition that is inheriting from the Plugin class 62 | """ 63 | 64 | def __init__(self, plugin_package): 65 | """Constructor that initiates the reading of all available plugins 66 | when an instance of the PluginCollection object is created 67 | """ 68 | self.plugin_package = plugin_package 69 | self.reload_plugins() 70 | 71 | def reload_plugins(self): 72 | """Reset the list of all plugins and initiate the walk over the main 73 | provided plugin package to load all available plugins 74 | """ 75 | self.plugins = [] 76 | self.seen_paths = [] 77 | # print(f'\nLooking for plugins under package {self.plugin_package}') 78 | self.walk_package(self.plugin_package) 79 | self.sort() 80 | 81 | async def apply_all_plugins_on_value(self, result, network_name, response, verifiers): 82 | """Apply all of the plugins with the argument supplied to this function 83 | """ 84 | self.log(f'\033[38;5;37mRunning plugins ...\033[0m\n') 85 | for plugin in self.plugins: 86 | if plugin.enabled: 87 | self.log(f'\033[38;5;37mRunning {plugin.name} ...\033[0m\n') 88 | result = await plugin.perform_operation(result, network_name, response, verifiers) 89 | else: 90 | self.log(f"\033[38;5;3m{plugin.name} disabled.\033[0m\n") 91 | return result 92 | 93 | def walk_package(self, package): 94 | """Recursively walk the supplied package to retrieve all plugins 95 | """ 96 | imported_package = __import__(package, fromlist=['blah']) 97 | 98 | for _, pluginname, ispkg in pkgutil.iter_modules(imported_package.__path__, imported_package.__name__ + '.'): 99 | if not ispkg: 100 | plugin_module = __import__(pluginname, fromlist=['blah']) 101 | clsmembers = inspect.getmembers(plugin_module, inspect.isclass) 102 | for (_, c) in clsmembers: 103 | # Only add classes that are a sub class of Plugin, but NOT Plugin itself 104 | if issubclass(c, Plugin) & (c is not Plugin): 105 | # print(f' Found plugin class: {c.__module__}.{c.__name__}') 106 | self.plugins.append(c()) 107 | 108 | 109 | # Now that we have looked at all the modules in the current package, start looking 110 | # recursively for additional modules in sub packages 111 | all_current_paths = [] 112 | if isinstance(imported_package.__path__, str): 113 | all_current_paths.append(imported_package.__path__) 114 | else: 115 | all_current_paths.extend([x for x in imported_package.__path__]) 116 | 117 | for pkg_path in all_current_paths: 118 | if pkg_path not in self.seen_paths: 119 | self.seen_paths.append(pkg_path) 120 | 121 | # Get all sub directory of the current package path directory 122 | child_pkgs = [p for p in os.listdir(pkg_path) if os.path.isdir(os.path.join(pkg_path, p))] 123 | 124 | # For each sub directory, apply the walk_package method recursively 125 | for child_pkg in child_pkgs: 126 | self.walk_package(package + '.' + child_pkg) 127 | 128 | def sort(self): 129 | self.plugins.sort(key=lambda x: x.index, reverse=False) 130 | 131 | def get_parse_args(self, parser): 132 | for plugin in self.plugins: 133 | plugin.parse_args(parser) 134 | 135 | def load_all_parse_args(self, args): 136 | global verbose 137 | verbose = args.verbose 138 | if verbose: self.plugin_list() 139 | for plugin in self.plugins: 140 | plugin.load_parse_args(args) 141 | 142 | def log(self, *args): 143 | if verbose: 144 | print(*args, file=sys.stderr) 145 | 146 | def plugin_list(self): 147 | self.log("\033[38;5;37m===========================================================\033[0m") 148 | self.log("\033[38;5;37m| Plug-ins |\033[0m") 149 | self.log("\033[38;5;37m-----------------------------------------------------------\033[0m") 150 | if len(self.plugins) > 0: 151 | for plugin in self.plugins: 152 | self.log(f"\033[38;5;37m - {plugin.name}: {plugin.__class__.__module__}.{plugin.__class__.__name__}\033[0m") 153 | else: 154 | self.log(f"\033[38;5;37m - No plug-ins found ...\033[0m") 155 | self.log("\033[38;5;37m===========================================================\n\033[0m") 156 | -------------------------------------------------------------------------------- /fetch-validator-status/plugins/Example/README.md: -------------------------------------------------------------------------------- 1 | # Example 2 | 3 | The [Example Plug-in](example.py) is to be used as a template to build your own plug-ins. All is does is add `"examplePlugin": "Hello World"` to every response. This example is built as a package, but you can build plug-ins as a module like the Status Only Plug-in. 4 | 5 | Be sure to make a copy before you start building. 6 | 7 | ## How To Use 8 | `./run.sh --net ssn --status --example` 9 | 10 | --example: enables the plug-in 11 | 12 | ## Example Print Out 13 | ``` 14 | [ 15 | { 16 | "name": "test", 17 | "client-address": "tcp://00.000.000.000:0000", 18 | "node-address": "tcp://00.00.000.000:0000", 19 | "status": { 20 | "ok": true, 21 | "timestamp": "1615838114" 22 | }, 23 | "examplePlugin": "Hello World" 24 | } 25 | ] 26 | ``` -------------------------------------------------------------------------------- /fetch-validator-status/plugins/Example/example.py: -------------------------------------------------------------------------------- 1 | # This example shows the basic structure required in order for a plugin to be run. 2 | 3 | # Required 4 | import plugin_collection 5 | 6 | # Add your imports here 7 | import json 8 | 9 | # The plug-in collection parent class will find classes in the file. 10 | class main(plugin_collection.Plugin): 11 | # __init__ is required and has the necessary items in order to differentiate between plug-ins. 12 | def __init__(self): 13 | super().__init__() 14 | self.index = 3 15 | self.name = 'Example Plug-in' 16 | self.description = '' 17 | self.type = '' 18 | 19 | def parse_args(self, parser): 20 | # Declear your parser arguments here. This will add them to the main.py parser arguments. 21 | parser.add_argument("--example", action="store_true", help="Example Plug-in: Runs expample plug-in") 22 | 23 | # Here you set your variables with the arguments from the parser 24 | def load_parse_args(self, args): 25 | # Important for maintaining global verbose 26 | global verbose 27 | verbose = args.verbose 28 | 29 | # Required to enable your plug-in. This should match your parser argument. i.e. --example -> args.example 30 | self.enabled = args.example 31 | 32 | # Set your varables here 33 | 34 | # This is where your main code goes and what is kicked off after the information has been gotten from the network. 35 | # This is passed the results from the network and the name of the network. 36 | async def perform_operation(self, result, network_name, response, verifiers): 37 | # Main code here 38 | for node in result: 39 | node["examplePlugin"] = "Hello World" 40 | 41 | # Required for chaining plugins whether or not you are using result. 42 | return result -------------------------------------------------------------------------------- /fetch-validator-status/plugins/README.md: -------------------------------------------------------------------------------- 1 | # Plug-ins 2 | 3 | ## Building Plug-ins 4 | 5 | Build your own class based plugins to extract the information you want. Have a look at the included [example plug-in](Example/example.py) to see how to build your own. 6 | 7 | ## About Plug-ins 8 | 9 | Plug-in modals and packages are collected from the plug-ins folder and sorted based on the order specified by you by setting the index property in the given plug-in. Once the plug-ins are loaded the commmand line arguments are collected from the `parse_args()` function from each of the plug-ins. They are then passed into the `load_parse_args()` function where the plug-in collects its class variables. 10 | 11 | The data collected from the network is passed in sequence to each of the plugins, giving each the opportunity to parse and manipulate the data before passing the result back for subsequent plugins. 12 | 13 | note: plug-ins are only enabled when a flag is given. i.e. the [Alerts Plug-in](alerts/alerts.py) will only run if the `--alerts` flag is given. if you have a plug-in that requires more then one argument the first flag will enable the plug-in and the following flags would contain your additional arguments. See the [Network Metrics Plug-in](metrics/network_metrics.py) as an example. 14 | 15 | Once the plug-ins are initialized, the monitor engine will collect the `validator-info` data from the nodes in the specified network. The engine will then pass the response to the [Analysis Plug-in](analysis.py) before passing the analyzed result to all of the subsequent plug-ins for processing. 16 | 17 | Have a look at the included plug-ins to get an idea of how to build your own! 18 | 19 | ## Analysis 20 | 21 | The [Analysis Plug-in](analysis.py) does an analysis of the response returned from the network; returns result. 22 | 23 | *WARNING this plug-in has to run first in order for the other plug-ins to work. Plug-in index should be set to ZERO set inside the plug-in class under the INIT method. i.e. `self.index = 0`* 24 | *This plug-in is required in order to run this monitor and will automatically run without a command line argument* 25 | 26 | ## Status Only Plug-in 27 | 28 | The [Status Only Plug-in](status_only.py) removes response from the result returning only the status. 29 | 30 | ### How To Use 31 | `./run.sh --net ssn --status` or `./run.sh --net ssn --status --alerts` 32 | 33 | --status: enables the plug-in 34 | 35 | ### Example Print Out 36 | ``` 37 | [ 38 | { 39 | "name": "Test", 40 | "client-address": "tcp://00.00.00.00:0000", 41 | "node-address": "tcp://00.00.00.00:0000", 42 | "status": { 43 | "ok": true, 44 | "timestamp": "1615837991" 45 | } 46 | }, 47 | { 48 | "name": "ect", 49 | "client-address": "tcp://00.00.00.00:0000", 50 | "node-address": "tcp://00.00.00.00:0000", 51 | "status": { 52 | "ok": true, 53 | "timestamp": "1615837991" 54 | } 55 | } 56 | ] 57 | ``` 58 | 59 | ## Network Metrics 60 | 61 | See [readme](metrics/README.md) 62 | 63 | ## Alerts 64 | 65 | See [readme](alerts/README.md) 66 | 67 | ## Generate Network Upgrade Schedule 68 | 69 | See [readme](generate_upgrade_schedule/README.md) 70 | 71 | ## Example 72 | 73 | See [readme](Example/README.md) 74 | -------------------------------------------------------------------------------- /fetch-validator-status/plugins/alerts/README.md: -------------------------------------------------------------------------------- 1 | # Alerts 2 | 3 | The [Alerts Plug-in](alerts.py) filters out nodes that contain "info", "warnings", or "errors" in the "status" field. Can by used in conjuction with the Status Only Plug-in. 4 | 5 | ## How To Use 6 | `./run.sh --net ssn --alerts` or `./run.sh --net ssn --alerts --status ` 7 | 8 | --alerts: enables the plug-in 9 | 10 | ## Example Print Out 11 | ``` 12 | [ 13 | { 14 | "name": "test", 15 | "client-address": "tcp://00.000.000.000:0000", 16 | "node-address": "tcp://00.00.000.000:0000", 17 | "status": { 18 | "ok": False, 19 | "timestamp": "1615838114", 20 | 'errors': 1 21 | }, 22 | 'errors': ['timeout'] 23 | } 24 | ] 25 | ``` -------------------------------------------------------------------------------- /fetch-validator-status/plugins/alerts/alerts.py: -------------------------------------------------------------------------------- 1 | import plugin_collection 2 | import argparse 3 | import json 4 | 5 | class main(plugin_collection.Plugin): 6 | 7 | def __init__(self): 8 | super().__init__() 9 | self.index = 1 10 | self.name = 'Alerts' 11 | self.description = '' 12 | self.type = '' 13 | 14 | # def description(self) 15 | # return self.description 16 | 17 | def parse_args(self, parser): 18 | parser.add_argument("--alerts", action="store_true", help="Alert Plug-in: Filter results based on alerts. Only return data for nodes containing detected 'info', 'warnings', or 'errors'.") 19 | 20 | def load_parse_args(self, args): 21 | global verbose 22 | verbose = args.verbose 23 | 24 | self.enabled = args.alerts 25 | 26 | async def perform_operation(self, result, network_name, response, verifiers): 27 | # Filter on alerts 28 | filtered_result = [] 29 | for item in result: 30 | if ("info" in item["status"] and item["status"]["info"] > 0) \ 31 | or ("warnings" in item["status"] and item["status"]["warnings"] > 0) \ 32 | or ("errors" in item["status"] and item["status"]["errors"] > 0): 33 | filtered_result.append(item) 34 | result = filtered_result 35 | return result 36 | 37 | -------------------------------------------------------------------------------- /fetch-validator-status/plugins/analysis.py: -------------------------------------------------------------------------------- 1 | import plugin_collection 2 | import json 3 | import datetime 4 | from DidKey import DidKey 5 | from typing import Tuple 6 | 7 | class main(plugin_collection.Plugin): 8 | 9 | def __init__(self): 10 | super().__init__() 11 | self.index = 0 12 | self.name = 'Analysis' 13 | self.description = '' 14 | self.type = '' 15 | self.enabled = True 16 | 17 | def parse_args(self, parser): 18 | pass 19 | 20 | def load_parse_args(self, args): 21 | global verbose 22 | verbose = args.verbose 23 | 24 | async def perform_operation(self, result, network_name, response, verifiers): 25 | primary = "" 26 | packages = {} 27 | for node, val in response.items(): 28 | jsval = [] 29 | status = {} 30 | errors = [] 31 | warnings = [] 32 | info = [] 33 | entry = {"name": node} 34 | entry["network"] = network_name 35 | try: 36 | await self.get_node_addresses(entry, verifiers) 37 | jsval = json.loads(val) 38 | if not primary: 39 | primary = await self.get_primary_name(jsval, node) 40 | errors, warnings = await self.detect_issues(jsval, node, primary) 41 | info = await self.get_info(jsval) 42 | packages[node] = await self.get_package_info(jsval) 43 | except json.JSONDecodeError: 44 | errors = [val] # likely "timeout" 45 | 46 | # Status Summary 47 | entry["status"] = await self.get_status_summary(jsval, errors) 48 | # Info 49 | if len(info) > 0: 50 | entry["status"]["info"] = len(info) 51 | entry["info"] = info 52 | # Errors / Warnings 53 | entry["status"]["errors"] = len(errors) 54 | if len(errors) > 0: 55 | entry["errors"] = errors 56 | entry["status"]["warnings"] = len(warnings) 57 | if len(warnings) > 0: 58 | entry["warnings"] = warnings 59 | # Full Response 60 | if jsval: 61 | entry["response"] = jsval # put into status plugin minus response 62 | 63 | result.append(entry) 64 | 65 | # Package Mismatches 66 | if packages: 67 | await self.merge_package_mismatch_info(result, packages) 68 | 69 | # Connection Issues 70 | await self.detect_connection_issues(result) 71 | 72 | return result 73 | 74 | async def get_node_addresses(self, entry: any, verifiers: any) -> any: 75 | if verifiers: 76 | node_name = entry["name"] 77 | if "client_addr" in verifiers[node_name]: 78 | entry["client-address"] = verifiers[node_name]["client_addr"] 79 | if "node_addr" in verifiers[node_name]: 80 | entry["node-address"] = verifiers[node_name]["node_addr"] 81 | 82 | async def get_primary_name(self, jsval: any, node: str) -> str: 83 | primary = "" 84 | if "REPLY" in jsval["op"]: 85 | if "Node_info" in jsval["result"]["data"]: 86 | primary = jsval["result"]["data"]["Node_info"]["Replicas_status"][node+":0"]["Primary"] 87 | return primary 88 | 89 | async def get_status_summary(self, jsval: any, errors: list) -> any: 90 | status = {} 91 | status["ok"] = (len(errors) <= 0) 92 | # Ensure there is always a consistent timestamp 93 | # Note: We are not using the timestamp from the node itself for this; result.data.timestamp 94 | # - There could be clock skew on the node which would affect the time series data when 95 | # recorded and graphed. This would adversely affect the comparison of events across nodes. 96 | # - In the case of a node that is not responding, we would not receive the timestamp 97 | # from the node. 98 | # - The solution is to add a consistent timestamp marking the time the data was collected 99 | # by the monitor. 100 | status["timestamp"] = datetime.datetime.now(datetime.timezone.utc).strftime('%s') 101 | if jsval and ("REPLY" in jsval["op"]): 102 | if "timestamp" in jsval["result"]["data"]: 103 | status["node_timestamp"] = jsval["result"]["data"]["timestamp"] 104 | if "Node_info" in jsval["result"]["data"]: 105 | status["uptime"] = str(datetime.timedelta(seconds = jsval["result"]["data"]["Node_info"]["Metrics"]["uptime"])) 106 | if "Software" in jsval["result"]["data"]: 107 | status["software"] = {} 108 | status["software"]["indy-node"] = jsval["result"]["data"]["Software"]["indy-node"] 109 | status["software"]["sovrin"] = jsval["result"]["data"]["Software"]["sovrin"] 110 | 111 | return status 112 | 113 | async def get_package_info(self, jsval: any) -> any: 114 | packages = {} 115 | if jsval and ("REPLY" in jsval["op"]): 116 | if "Software" in jsval["result"]["data"]: 117 | for installed_package in jsval["result"]["data"]["Software"]["Installed_packages"]: 118 | package, version = installed_package.split() 119 | packages[package] = version 120 | 121 | return packages 122 | 123 | async def get_info(self, jsval: any) -> any: 124 | info = [] 125 | if ("REPLY" in jsval["op"]) and ("Extractions" in jsval["result"]["data"]): 126 | # Pending Upgrade 127 | if jsval["result"]["data"]["Extractions"]["upgrade_log"]: 128 | current_upgrade_status = jsval["result"]["data"]["Extractions"]["upgrade_log"][-1] 129 | if "succeeded" not in current_upgrade_status: 130 | info.append("Pending Upgrade: {0}".format(current_upgrade_status.replace('\t', ' ').replace('\n', ''))) 131 | 132 | return info 133 | 134 | async def merge_package_mismatch_info(self, result: any, packages: any): 135 | package_warnings = await self.check_package_versions(packages) 136 | if package_warnings: 137 | for node_name in package_warnings: 138 | entry_to_update = [t for t in result if t["name"] == node_name][0] 139 | if "warnings" in entry_to_update: 140 | for item in package_warnings[node_name]: 141 | entry_to_update["warnings"].append(item) 142 | else: 143 | entry_to_update["warnings"] = package_warnings[node_name] 144 | entry_to_update["status"]["warnings"] = len(entry_to_update["warnings"]) 145 | 146 | async def check_package_versions(self, packages: any) -> any: 147 | warnings = {} 148 | for node, package_list in packages.items(): 149 | mismatches = [] 150 | for package, version in package_list.items(): 151 | total = 0 152 | same = 0 153 | other_version = "" 154 | for comp_node, comp_package_list in packages.items(): 155 | if package in comp_package_list: 156 | total +=1 157 | comp_version = comp_package_list[package] 158 | if comp_version == version: 159 | same +=1 160 | else: 161 | other_version = comp_version 162 | if (same/total) < .5: 163 | mismatches.append("Package mismatch: '{0}' has '{1}' {2}, while most other nodes have '{1}' {3}".format(node, package, version, other_version)) 164 | if mismatches: 165 | warnings[node] = mismatches 166 | return warnings 167 | 168 | async def detect_issues(self, jsval: any, node: str, primary: str) -> Tuple[any, any]: 169 | errors = [] 170 | warnings = [] 171 | ledger_sync_status={} 172 | if "REPLY" in jsval["op"]: 173 | if "Node_info" in jsval["result"]["data"]: 174 | # Ledger Write Consensus Issues 175 | if not jsval["result"]["data"]["Node_info"]["Freshness_status"]["0"]["Has_write_consensus"]: 176 | errors.append("Config Ledger Has_write_consensus: {0}".format(jsval["result"]["data"]["Node_info"]["Freshness_status"]["0"]["Has_write_consensus"])) 177 | if not jsval["result"]["data"]["Node_info"]["Freshness_status"]["1"]["Has_write_consensus"]: 178 | errors.append("Main Ledger Has_write_consensus: {0}".format(jsval["result"]["data"]["Node_info"]["Freshness_status"]["1"]["Has_write_consensus"])) 179 | if not jsval["result"]["data"]["Node_info"]["Freshness_status"]["2"]["Has_write_consensus"]: 180 | errors.append("Pool Ledger Has_write_consensus: {0}".format(jsval["result"]["data"]["Node_info"]["Freshness_status"]["2"]["Has_write_consensus"])) 181 | if "1001" in jsval["result"]["data"]["Node_info"]["Freshness_status"]: 182 | if not jsval["result"]["data"]["Node_info"]["Freshness_status"]["1001"]["Has_write_consensus"]: 183 | errors.append("Token Ledger Has_write_consensus: {0}".format(jsval["result"]["data"]["Node_info"]["Freshness_status"]["1001"]["Has_write_consensus"])) 184 | 185 | # Ledger Status 186 | for ledger, status in jsval["result"]["data"]["Node_info"]["Catchup_status"]["Ledger_statuses"].items(): 187 | if status != "synced": 188 | ledger_sync_status[ledger] = status 189 | if ledger_sync_status: 190 | ledger_status = {} 191 | ledger_status["ledger_status"] = ledger_sync_status 192 | ledger_status["ledger_status"]["transaction-count"] = jsval["result"]["data"]["Node_info"]["Metrics"]["transaction-count"] 193 | warnings.append(ledger_status) 194 | 195 | # Mode 196 | if jsval["result"]["data"]["Node_info"]["Mode"] != "participating": 197 | warnings.append("Mode: {0}".format(jsval["result"]["data"]["Node_info"]["Mode"])) 198 | 199 | # Primary Node Mismatch 200 | if jsval["result"]["data"]["Node_info"]["Replicas_status"][node+":0"]["Primary"] != primary: 201 | warnings.append("Primary Mismatch! This Nodes Primary: {0} (Expected: {1})".format(jsval["result"]["data"]["Node_info"]["Replicas_status"][node+":0"]["Primary"], primary)) 202 | 203 | # Unreachable Nodes 204 | if jsval["result"]["data"]["Pool_info"]["Unreachable_nodes_count"] > 0: 205 | unreachable_node_list = [] 206 | unreachable_nodes = {"unreachable_nodes":{}} 207 | unreachable_nodes["unreachable_nodes"]["count"] = jsval["result"]["data"]["Pool_info"]["Unreachable_nodes_count"] 208 | for unreachable_node in jsval["result"]["data"]["Pool_info"]["Unreachable_nodes"]: 209 | unreachable_node_list.append(unreachable_node[0]) 210 | unreachable_nodes["unreachable_nodes"]["nodes"] = ', '.join(unreachable_node_list) 211 | warnings.append(unreachable_nodes) 212 | 213 | # Denylisted Nodes 214 | if len(jsval["result"]["data"]["Pool_info"]["Blacklisted_nodes"]) > 0: 215 | warnings.append("Denylisted Nodes: {0}".format(jsval["result"]["data"]["Pool_info"]["Blacklisted_nodes"])) 216 | else: 217 | if "reason" in jsval: 218 | errors.append(jsval["reason"]) 219 | else: 220 | errors.append("unknown error") 221 | 222 | return errors, warnings 223 | 224 | async def detect_connection_issues(self, result: any) -> any: 225 | for node in result: 226 | connection_errors = [] 227 | node_name = node["name"] 228 | if "warnings" in node: 229 | for warning in node["warnings"]: 230 | if "unreachable_nodes" in warning : 231 | for item in warning["unreachable_nodes"]["nodes"].split(', '): 232 | # This is the name of the unreachable node. Now we need to determine whether that node can't see the current one. 233 | # If the nodes can't see each other, upgrade to an error condition. 234 | unreachable_node_name = item 235 | unreachable_node_query_result = [t for t in result if t["name"] == unreachable_node_name] 236 | if unreachable_node_query_result: 237 | unreachable_node = unreachable_node_query_result[0] 238 | if "warnings" in unreachable_node: 239 | for unreachable_node_warning in unreachable_node["warnings"]: 240 | if "unreachable_nodes" in unreachable_node_warning : 241 | for unreachable_node_item in unreachable_node_warning["unreachable_nodes"]["nodes"].split(', '): 242 | if unreachable_node_item == node_name: 243 | connection_errors.append(node_name + " and " + unreachable_node_name + " can't reach each other.") 244 | 245 | # Merge errors and update status 246 | if connection_errors: 247 | if "errors" in node: 248 | for item in connection_errors: 249 | node["errors"].append(item) 250 | else: 251 | node["errors"] = connection_errors 252 | node["status"]["errors"] = len(node["errors"]) 253 | node["status"]["ok"] = (len(node["errors"]) <= 0) -------------------------------------------------------------------------------- /fetch-validator-status/plugins/generate_upgrade_schedule/README.md: -------------------------------------------------------------------------------- 1 | # Generate Network Upgrade Schedule 2 | 3 | The [Generate Network Upgrade Schedule Plug-in](generate_upgrade_schedule.py) queries a given network and generates a network update schedule for the nodes. The output can be formatted and used with the indy-cli `ledger pool-upgrade` command 4 | 5 | ## How To Use 6 | `./run.sh --net sbn --seed --raw --upgrade-schedule --upgrade-start "2022-08-13T05:00:00-0700"` 7 | 8 | `--raw`: 9 | - Outputs the result as raw unformatted json with no whitespace. 10 | 11 | `--upgrade-schedule`: 12 | - enables the plug-in. 13 | 14 | `--upgrade-start`: 15 | - defines the start day and time for the upgrade. The time must be specified in the form yyyy-mm-ddTHH:MM:SSz; for example 2022-08-13T05:00:00-0700 to schedule the first upgrade for August 13, 2022 @ 05:00 PDT (12:00 UTC) 16 | 17 | `--upgrade-interval`: 18 | - Optional. The time in minutes between each node's upgrade schedule. Default is 5 minutes. 19 | 20 | ## Example Print Out 21 | ``` 22 | {"GrHM6eSURUvJAGAuAKDrmRod74KeTpqtqUJCctajWsWr"2022-08-13T05:00:00-0700","D1z9xZXntfphcCP3wjQTjw869ZuEhtucdSFojogb328E"2022-08-13T05:05:00-0700","7cC4Uo4UxfddPndhx5Qs76PUc999m1de3EEtb9QSFvhA"2022-08-13T05:10:00-0700","3xRW7MtVMHRKcrTPyJwAFCrynoKH7JbV5B7sTZzmq7mD"2022-08-13T05:15:00-0700","GVvdyd7Y6hsBEy5yDDHjqkXgH8zW34K74RsxUiUCZDCE"2022-08-13T05:20:00-0700","5aNBs6DToRDNuXamiswdvPhvoGxoLbdEL5XTLdZrv6Xf"2022-08-13T05:25:00-0700","t2LeEE7c4BkdwpzqT1z3sBssrzSrFVKhe13v3Mtuirw"2022-08-13T05:30:00-0700","8kwxd1DwUFr2v27nSiEC7gexa1bjkUuAm7JsfJ49bzTE"2022-08-13T05:35:00-0700","3f37va9HbQVxBGqt6U7227Cnh4WezkNGKqYZrbLEWpUp"2022-08-13T05:40:00-0700","52muwfE7EjTGDKxiQCYWr58D8BcrgyKVjhHgRQdaLiMw"2022-08-13T05:45:00-0700","CLMKi7oBYH2HzTosvdTPGiM8UXBAE2PQQuC2y97LZWgf"2022-08-13T05:50:00-0700","GnuKuvbdcY9ZU3GwvUYzEo3z5nmh1BhJ8BrrsASQM1Fi"2022-08-13T05:55:00-0700","FCLZXHPFAbARuu1vSp26bhFaNQz9sveL1QWvo2KDZjwb"2022-08-13T06:00:00-0700","5QDFnybgDHeQyBuaiKBsJ1o1Pxf83FNanaUPfRQp7N2d"2022-08-13T06:05:00-0700","5YwvqQySsNSPPM2RRQWGJeuiGgcCG5uD9NvQRR7ASJac"2022-08-13T06:10:00-0700","DXn8PUYKZZkq8gC7CZ2PqwECzUs2bpxYiA5TWgoYARa7"2022-08-13T06:15:00-0700"} 23 | ``` -------------------------------------------------------------------------------- /fetch-validator-status/plugins/generate_upgrade_schedule/generate_upgrade_schedule.py: -------------------------------------------------------------------------------- 1 | import plugin_collection 2 | from datetime import datetime, timezone 3 | from datetime import timedelta 4 | 5 | class main(plugin_collection.Plugin): 6 | 7 | def __init__(self): 8 | super().__init__() 9 | self.index = 1 10 | self.name = 'Generate Network Upgrade Schedule' 11 | self.description = 'Generates a network upgrade schedule for a given network.' 12 | self.type = '' 13 | 14 | def parse_args(self, parser): 15 | parser.add_argument("--upgrade-schedule", action="store_true", help="Enables the Generate Network Upgrade Schedule plugin.") 16 | parser.add_argument("--upgrade-start", default=datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%S%z'), help="The start date and time for the upgrade schedule, in the form yyyy-mm-ddTHH:MM:SSz; for example 2022-08-13T05:00:00-0700 to schedule the first upgrade for August 13, 2022 @ 05:00 PDT (12:00 UTC)") 17 | parser.add_argument("--upgrade-interval", default=5, help="The time interval in minutes between node upgrades. Defaults to 5 minutes.") 18 | 19 | def load_parse_args(self, args): 20 | global verbose 21 | verbose = args.verbose 22 | 23 | self.enabled = args.upgrade_schedule 24 | self.start_date_time = datetime.strptime(args.upgrade_start, '%Y-%m-%dT%H:%M:%S%z') 25 | self.interval = int(args.upgrade_interval) 26 | 27 | async def perform_operation(self, result, network_name, response, verifiers): 28 | filtered_result = {} 29 | counter = 0 30 | for item in result: 31 | if "Node_info" in item["response"]["result"]["data"]: 32 | schedule = self.start_date_time + timedelta(minutes = (self.interval * counter)) 33 | filtered_result[item["response"]["result"]["data"]["Node_info"]["did"]] = schedule.strftime('%Y-%m-%dT%H:%M:%S%z') 34 | counter += 1 35 | result = filtered_result 36 | return result -------------------------------------------------------------------------------- /fetch-validator-status/plugins/metrics/README.md: -------------------------------------------------------------------------------- 1 | # Network Metrics 2 | 3 | The [Network Metrics Plug-in](network_metrics.py) is used to create the resilience graph for the [metric dashboard](https://sovrin.org/ssi-metrics-dashboards/) on the [sovrin website](https://sovrin.org/) and could be used to log any other data to google sheets. This will append a row inside google sheets allowing you to log and create graphs off the monitor. Pair this with a cron job to run every 15 minutes or so and you have your own resilience log. 4 | 5 | ## Setup 6 | In order to use the Network Metrics Plug-in your self you will need to create a folder named "conf" into the network metrics folder. That folder will need to have a google api credentials json file in it. Follow this [tutorial](https://www.youtube.com/watch?v=cnPlKLEGR7E&t=33s) on how to create the json file and how to set it up in google sheets. Make sure you watch the video to 3m 56s in order to set things up fully. 7 | 8 | Your metrics plug-in should look somthing like this: 9 | 10 | metrics\ 11 | ----> conf\ 12 | --------> *GoogleAPI.json (Name it what ever you want)*\ 13 | ----> google_sheets.py\ 14 | ----> network_metrics.py\ 15 | ----> README.md 16 | 17 | ## How To Use 18 | Once that is set up, in order for you to run the plug-in the command will look something like this:\ 19 | `./run.sh --net ssn -v --mlog --json [Json File Name] --file [Google Sheet File Name] --worksheet [Worksheet name]` 20 | 21 | --mlog: enables the plug-in\ 22 | --json: to specify which google API json file you would like to use inside the conf folder.\ 23 | --file: to specify which google sheet you would like to work in.\ 24 | --worksheet: to specify which worksheet you would like to work in, in the given google sheet file. 25 | 26 | And your done! 27 | 28 | -------------------------------------------------------------------------------- /fetch-validator-status/plugins/metrics/google_sheets.py: -------------------------------------------------------------------------------- 1 | import os 2 | import fnmatch 3 | import gspread 4 | from oauth2client.service_account import ServiceAccountCredentials 5 | 6 | def find_file(file_name): 7 | dir_path = os.path.dirname(os.path.realpath(__file__)) 8 | for root, dirs, files in os.walk(dir_path): 9 | for file in files: 10 | if fnmatch.fnmatch(file, file_name): 11 | return(root + '/' + str(file)) 12 | 13 | def gspread_authZ(gauth_json): 14 | # Google drive and Google sheets API setup 15 | scope = ["https://spreadsheets.google.com/feeds",'https://www.googleapis.com/auth/spreadsheets',"https://www.googleapis.com/auth/drive.file","https://www.googleapis.com/auth/drive"] 16 | auth_file = find_file(gauth_json) 17 | if not auth_file: 18 | print("\033[1;31;40mUnable to find the Google API Credentials json file! Make sure the file is in the './conf' folder and name you specified is correct.") 19 | print("Json name entered: " + gauth_json + ".\033[m") 20 | exit() 21 | 22 | creds = ServiceAccountCredentials.from_json_keyfile_name(auth_file, scope) # Set credentials using json file 23 | authD_client = gspread.authorize(creds) # Authorize json file 24 | return(authD_client) 25 | 26 | # Insert data in sheet 27 | def gspread_append_sheet(authD_client, file_name, worksheet_name, row): 28 | try: 29 | sheet = authD_client.open(file_name).worksheet(worksheet_name) # Open sheet 30 | sheet.append_row(row, value_input_option='USER_ENTERED') # Append sheet 31 | except: 32 | print("\033[1;31;40mUnable to upload data to sheet! Please check file and worksheet name and try again.") 33 | print("File name entered: " + file_name + ". Worksheet name entered: " + worksheet_name + ".\033[m") 34 | exit() -------------------------------------------------------------------------------- /fetch-validator-status/plugins/metrics/network_metrics.py: -------------------------------------------------------------------------------- 1 | import plugin_collection 2 | from .google_sheets import gspread_authZ, gspread_append_sheet 3 | import datetime 4 | import argparse 5 | import os 6 | 7 | class main(plugin_collection.Plugin): 8 | 9 | def __init__(self): 10 | super().__init__() 11 | self.index = 4 12 | self.name = 'Network Metrics' 13 | self.description = '' 14 | self.type = '' 15 | self.gauth_json = None 16 | self.file_name = None 17 | self.worksheet_name = None 18 | 19 | 20 | def parse_args(self, parser): 21 | parser.add_argument("--mlog", action="store_true", help="Network Metrics Plug-in: Metrics log argument uses google sheets api and requires, Google API Credentials json file name (file must be in root folder), google sheet file name and worksheet name. ex: --mlog --json [Json File Name] --file [Google Sheet File Name] --worksheet [Worksheet name]") 22 | parser.add_argument("--json", default=os.environ.get('JSON') , help="Google API Credentials json file name (file must be in root folder). Can be specified using the 'JSON' environment variable.", nargs='*') 23 | parser.add_argument("--file", default=os.environ.get('FILE') , help="Specify which google sheets file you want to log too. Can be specified using the 'FILE' environment variable.", nargs='*') 24 | parser.add_argument("--worksheet", default=os.environ.get('WORKSHEET') , help="Specify which worksheet you want to log too. Can be specified using the 'WORKSHEET' environment variable.", nargs='*') 25 | 26 | def load_parse_args(self, args): 27 | global verbose 28 | verbose = args.verbose 29 | # Support names and paths containing spaces. 30 | # Other workarounds including the standard of putting '"'s around values containing spaces does not always work. 31 | if args.json: 32 | args.json = ' '.join(args.json) 33 | if args.file: 34 | args.file = ' '.join(args.file) 35 | if args.worksheet: 36 | args.worksheet = ' '.join(args.worksheet) 37 | 38 | if args.mlog: 39 | if args.json and args.file and args.worksheet: 40 | self.enabled = args.mlog 41 | self.gauth_json = args.json 42 | self.file_name = args.file 43 | self.worksheet_name = args.worksheet 44 | else: 45 | print('Metrics log argument uses google sheets api and requires, Google API Credentials json file name (file must be in root folder), google sheet file name and worksheet name.') 46 | print('ex: --mlog --json [Json File Name] --file [Google Sheet File Name] --worksheet [Worksheet name]') 47 | exit() 48 | 49 | async def perform_operation(self, result, network_name, response, verifiers): 50 | 51 | authD_client = gspread_authZ(self.gauth_json) 52 | message = "" 53 | num_of_nodes = 0 54 | nodes_offline = 0 55 | time = datetime.datetime.now().strftime('%m/%d/%Y %H:%M:%S') # formated to 12/3/2020 21:27:49 56 | 57 | for node in result: 58 | num_of_nodes += 1 59 | if node["status"]["ok"] == False: 60 | nodes_offline += 1 61 | 62 | networkResilience = num_of_nodes - round((num_of_nodes - 1 ) / 3) 63 | 64 | # Could have a stepped warning system 65 | if nodes_offline >= networkResilience: 66 | message = "Network Resilience Danger!" 67 | 68 | active_nodes = num_of_nodes - nodes_offline 69 | 70 | row = [time, network_name, num_of_nodes, nodes_offline, networkResilience, active_nodes, message] 71 | print(row) 72 | gspread_append_sheet(authD_client, self.file_name, self.worksheet_name, row) 73 | print(f"\033[92mPosted to {self.file_name} in sheet {self.worksheet_name}.\033[m") 74 | return result -------------------------------------------------------------------------------- /fetch-validator-status/plugins/status_only.py: -------------------------------------------------------------------------------- 1 | import plugin_collection 2 | import json 3 | 4 | class main(plugin_collection.Plugin): 5 | 6 | def __init__(self): 7 | super().__init__() 8 | self.index = 2 9 | self.name = 'Status Only' 10 | self.description = '' 11 | self.type = '' 12 | 13 | def parse_args(self, parser): 14 | parser.add_argument("--status", action="store_true", help="Status Only Plug-in: Get status only. Suppresses detailed results.") 15 | 16 | def load_parse_args(self, args): 17 | global verbose 18 | verbose = args.verbose 19 | 20 | self.enabled = args.status 21 | 22 | async def perform_operation(self, result, network_name, response, verifiers): 23 | for node in result: 24 | if "response" in node: 25 | node.pop("response") 26 | return result 27 | -------------------------------------------------------------------------------- /fetch-validator-status/pool.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from util import log 3 | from indy_vdr.pool import open_pool 4 | from singleton import Singleton 5 | from networks import Networks 6 | 7 | class PoolCollection(object, metaclass=Singleton): 8 | def __init__(self, verbose, networks: Networks): 9 | self.verbose = verbose 10 | self.networks = networks 11 | self.pool_cache = {} 12 | self.lock = asyncio.Lock() 13 | 14 | async def __fetch_pool_connection(self, genesis_path): 15 | attempt = 3 16 | while attempt: 17 | try: 18 | log("Connecting to Pool ...") 19 | pool = await open_pool(transactions_path=genesis_path) 20 | except: 21 | log("Pool Timed Out! Trying again ...") 22 | if not attempt: 23 | print("Unable to get response from pool! 3 attempts where made. Exiting ...") 24 | exit() 25 | attempt -= 1 26 | continue 27 | else: 28 | log("Connected to Pool ...") 29 | break 30 | return pool 31 | 32 | async def get_pool(self, network_id): 33 | network = self.networks.resolve(network_id) 34 | # Network pool connection cache with async thread lock for REST API. 35 | async with self.lock: 36 | if network.id in self.pool_cache: 37 | # Cache hit ... 38 | log(f"The pool for {network.name} was found in the cache ...") 39 | pool = self.pool_cache[network.id]['pool'] 40 | else: 41 | # Cache miss ... 42 | log(f"A pool for {network.name} was not found in the cache, creating new connection ...") 43 | pool = await self.__fetch_pool_connection(network.genesis_path) 44 | self.pool_cache[network.id] = {} 45 | self.pool_cache[network.id]['pool'] = pool 46 | log(f"Cached the pool for {network.name} ...") 47 | return pool, network.name -------------------------------------------------------------------------------- /fetch-validator-status/requirements.txt: -------------------------------------------------------------------------------- 1 | asyncio 2 | pynacl 3 | gspread 4 | oauth2client 5 | fastapi 6 | uvicorn 7 | gunicorn 8 | 9 | base58 10 | indy-vdr -------------------------------------------------------------------------------- /fetch-validator-status/rest_api.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | from typing import Optional 4 | from fastapi import FastAPI, Header, HTTPException, Path, Query 5 | from fastapi.responses import PlainTextResponse 6 | from starlette.responses import RedirectResponse 7 | from util import ( 8 | enable_verbose, 9 | # log, 10 | create_did 11 | ) 12 | from pool import PoolCollection 13 | from networks import Networks, NetworkEnum 14 | from fetch_status import FetchStatus, NodeNotFound 15 | from plugin_collection import PluginCollection 16 | 17 | APP_NAME='Hyperledger Indy Node Monitor REST API' 18 | APP_DESCRIPTION='https://github.com/hyperledger/indy-node-monitor' 19 | APP_VERSION='0.0.0' 20 | 21 | # https://fastapi.tiangolo.com/tutorial/metadata/ 22 | app = FastAPI( 23 | title = APP_NAME, 24 | description = APP_DESCRIPTION, 25 | version = APP_VERSION 26 | ) 27 | 28 | # global variables 29 | default_args = None 30 | monitor_plugins = None 31 | pool_collection = None 32 | node_info = None 33 | 34 | Network: NetworkEnum = Networks.get_NetworkEnum() 35 | example_network_enum = list(Network)[0] 36 | example_network_name = str(list(Network)[0]).split(".")[-1] 37 | 38 | def set_plugin_parameters(status: bool = False, alerts: bool = False): 39 | # Store args and monitor_plugins for lazy loading. 40 | global default_args, pool_collection, node_info 41 | 42 | if not default_args: 43 | # Create plugin instance and set default args 44 | default_monitor_plugins = PluginCollection('plugins') 45 | parser = argparse.ArgumentParser() 46 | parser.add_argument("-v", "--verbose", default=(os.environ.get('VERBOSE', 'False').lower() == 'true'), action="store_true") 47 | default_monitor_plugins.get_parse_args(parser) 48 | default_args, unknown = parser.parse_known_args() 49 | enable_verbose(default_args.verbose) 50 | pool_collection = PoolCollection(default_args.verbose, Networks()) 51 | node_info = FetchStatus(default_args.verbose, pool_collection) 52 | 53 | # Create namespace with default args and load them into api_args 54 | api_args = argparse.Namespace() 55 | for name, value in default_args._get_kwargs(): 56 | setattr(api_args, name, value) 57 | 58 | # Set api_args with the values from the parameters 59 | setattr(api_args, 'status', status) 60 | setattr(api_args, 'alerts', alerts) 61 | 62 | # Create and load plugins with api_args 63 | monitor_plugins = PluginCollection('plugins') 64 | monitor_plugins.load_all_parse_args(api_args) 65 | 66 | return monitor_plugins 67 | 68 | # Redirect users to the '/docs' page but don't include this endpoint in the docs. 69 | @app.get("/", include_in_schema=False) 70 | async def redirect(): 71 | response = RedirectResponse(url='/docs') 72 | return response 73 | 74 | @app.get("/networks") 75 | async def networks(): 76 | data = Networks.get_networks() 77 | return data 78 | 79 | @app.get("/networks/{network}") 80 | async def network(network: Network = Path(path=example_network_enum, example=example_network_name, description="The network code."), 81 | status: bool = Query(False, description="Filter results to status only."), 82 | alerts: bool = Query(False, description="Filter results to alerts only."), 83 | seed: Optional[str] = Header(None, description="Your network monitor seed.")): 84 | monitor_plugins = set_plugin_parameters(status, alerts) 85 | ident = create_did(seed) 86 | result = await node_info.fetch(network_id=network.value, monitor_plugins=monitor_plugins, ident=ident) 87 | return result 88 | 89 | @app.get("/networks/{network}/pool/transactions", response_class=PlainTextResponse) 90 | async def network(network: Network = Path(path=example_network_enum, example=example_network_name, description="The network code.")): 91 | set_plugin_parameters() 92 | pool, _ = await pool_collection.get_pool(network.value) 93 | result = await pool.get_transactions() 94 | return result 95 | 96 | @app.get("/networks/{network}/pool/verifiers") 97 | async def network(network: Network = Path(path=example_network_enum, example=example_network_name, description="The network code.")): 98 | set_plugin_parameters() 99 | pool, _ = await pool_collection.get_pool(network.value) 100 | await pool.refresh() 101 | result = await pool.get_verifiers() 102 | return result 103 | 104 | @app.get("/networks/{network}/{node}") 105 | async def node(network: Network = Path(path=example_network_enum, example=example_network_name, description="The network code."), 106 | node: str = Path(..., example="FoundationBuilder", description="The node name."), 107 | status: bool = Query(False, description="Filter results to status only."), 108 | alerts: bool = Query(False, description="Filter results to alerts only."), 109 | seed: Optional[str] = Header(None, description="Your network monitor seed.")): 110 | monitor_plugins = set_plugin_parameters(status, alerts) 111 | ident = create_did(seed) 112 | try: 113 | result = await node_info.fetch(network_id=network.value, monitor_plugins=monitor_plugins, nodes=node, ident=ident) 114 | except NodeNotFound as error: 115 | print(error) 116 | raise HTTPException(status_code=400, detail=str(error)) 117 | 118 | return result 119 | -------------------------------------------------------------------------------- /fetch-validator-status/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # set -x 3 | 4 | export MSYS_NO_PATHCONV=1 5 | 6 | # --- Set program name here --- 7 | program_name="fetch_status" 8 | 9 | function getVolumeMount() { 10 | path=${1} 11 | path=$(realpath ${path}) 12 | path=${path%%+(/)} 13 | mountPoint=${path##*/} 14 | if [[ "$OSTYPE" == "msys" ]]; then 15 | # When running on Windows, you need to prefix the path with an extra '/' 16 | path="/${path}" 17 | fi 18 | echo " --volume='${path}:/home/indy/${mountPoint}:Z' " 19 | } 20 | 21 | function runCmd() { 22 | _cmd=${1} 23 | if [ ! -z ${LOG} ]; then 24 | _cmd+=" > ${LOG%.*}_`date +\%Y-\%m-\%d_%H-%M-%S`.json" 25 | fi 26 | 27 | eval ${_cmd} 28 | # echo 29 | # echo ${_cmd} 30 | } 31 | 32 | function isInstalled() { 33 | ( 34 | if [ -x "$(command -v ${@})" ]; then 35 | return 0 36 | else 37 | return 1 38 | fi 39 | ) 40 | } 41 | 42 | function echoYellow (){ 43 | ( 44 | _msg=${1} 45 | _yellow='\e[33m' 46 | _nc='\e[0m' # No Color 47 | echo -e "${_yellow}${_msg}${_nc}" >&2 48 | ) 49 | } 50 | 51 | JQ_EXE=jq 52 | if ! isInstalled ${JQ_EXE}; then 53 | echoYellow "The ${JQ_EXE} executable is required and was not found on your path." 54 | 55 | cat <<-EOF 56 | The recommended approach to installing the required package(s) is to use either [Homebrew](https://brew.sh/) (MAC) 57 | or [Chocolatey](https://chocolatey.org/) (Windows). For more information visit https://stedolan.github.io/jq/ 58 | 59 | Windows: 60 | - chocolatey install ${JQ_EXE} 61 | MAC: 62 | - brew install ${JQ_EXE} 63 | Debian/Ubuntu: 64 | - sudo apt-get install ${JQ_EXE} 65 | EOF 66 | exit 1 67 | fi 68 | 69 | # fetch_status can have long running commands. 70 | # Detect any existing containers running the same command and exit. 71 | runningContainers=$(docker ps | grep ${program_name} | awk '{print $1}') 72 | if [ ! -z "${runningContainers}" ]; then 73 | for runningContainer in ${runningContainers}; do 74 | runningContainerCmd=$(docker inspect ${runningContainer} | ${JQ_EXE} -r '.[0]["Config"]["Cmd"][0]') 75 | if [[ "${runningContainerCmd}" == "${@}" ]]; then 76 | echoYellow "There is an instance of $program_name already running the same command. Please wait for it to complete ..." 77 | exit 0 78 | fi 79 | done 80 | fi 81 | 82 | # IM is for "interactive mode" so Docker is run with the "-it" parameter. Probably never needed 83 | # but it is there. Use "IM=1 run.sh ..." to run the Docker container in interactive mode 84 | if [ -z "${IM+x}" ]; then 85 | export DOCKER_INTERACTIVE="" 86 | else 87 | export DOCKER_INTERACTIVE="-it" 88 | 89 | # Running interactively on Windows? 90 | if [[ "$OSTYPE" == "msys" ]]; then 91 | # Prefix interactive terminal commands ... 92 | export terminalEmu="winpty" 93 | fi 94 | fi 95 | 96 | if [[ "$@" == *"--web"* ]]; then 97 | export DOCKER_PUBLISH="--publish 8080:8080" 98 | else 99 | export DOCKER_PUBLISH="" 100 | fi 101 | 102 | docker build -t $program_name . > /dev/null 2>&1 103 | 104 | cmd="${terminalEmu} docker run --rm ${DOCKER_INTERACTIVE} \ 105 | -e "GENESIS_PATH=${GENESIS_PATH}" \ 106 | -e "GENESIS_URL=${GENESIS_URL}" \ 107 | -e "SEED=${SEED}" \ 108 | ${DOCKER_PUBLISH}" 109 | 110 | # Dynamically mount teh 'conf' directory if it exists. 111 | if [ -d "./conf" ]; then 112 | cmd+=$(getVolumeMount "./conf") 113 | fi 114 | 115 | if [ -d "./plugins" ]; then 116 | cmd+=$(getVolumeMount "./plugins") 117 | fi 118 | 119 | cmd+="$program_name \"$@\"" 120 | 121 | counter=${SAMPLES:-1} 122 | while [[ ${counter} > 0 ]] 123 | do 124 | runCmd "${cmd}" 125 | counter=$(( ${counter} - 1 )) 126 | if [[ ${counter} > 0 ]]; then 127 | # Nodes update their validator info every minute. 128 | # Therefore calling more than once per minute is not productive. 129 | sleep 60 130 | fi 131 | done -------------------------------------------------------------------------------- /fetch-validator-status/singleton.py: -------------------------------------------------------------------------------- 1 | # Meta Class 2 | # https://stackoverflow.com/questions/6760685/creating-a-singleton-in-python 3 | class Singleton(type): 4 | _instances = {} 5 | def __call__(cls, *args, **kwargs): 6 | if cls not in cls._instances: 7 | cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) 8 | return cls._instances[cls] -------------------------------------------------------------------------------- /fetch-validator-status/util.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from DidKey import DidKey 3 | 4 | verbose = False 5 | 6 | def enable_verbose(enable): 7 | global verbose 8 | verbose = enable 9 | 10 | def log(*args): 11 | if verbose: 12 | print(*args, "\n", file=sys.stderr) 13 | 14 | def create_did(seed): 15 | ident = None 16 | if seed: 17 | try: 18 | ident = DidKey(seed) 19 | log("DID:", ident.did, " Verkey:", ident.verkey) 20 | except: 21 | log("Invalid seed. Continuing anonymously ...") 22 | return ident -------------------------------------------------------------------------------- /grafana/config.monitoring: -------------------------------------------------------------------------------- 1 | GF_SECURITY_ADMIN_PASSWORD=${GF_SECURITY_ADMIN_PASSWORD} 2 | GF_USERS_ALLOW_SIGN_UP=${GF_USERS_ALLOW_SIGN_UP} 3 | INFLUX_DB_FLUX_TOKEN=${INFLUX_DB_FLUX_TOKEN} -------------------------------------------------------------------------------- /grafana/provisioning/alerting/Error Warning.json: -------------------------------------------------------------------------------- 1 | { 2 | "apiVersion": 1, 3 | "groups": [ 4 | { 5 | "orgId": 1, 6 | "name": "Alerting", 7 | "folder": "Alerts", 8 | "interval": "60s", 9 | "rules": [ 10 | { 11 | "id": 5, 12 | "uid": "wHYADzJ4z", 13 | "orgID": 1, 14 | "folderUID": "tMVPxmv4k", 15 | "ruleGroup": "Alerting", 16 | "title": "Error Warning", 17 | "condition": "C", 18 | "data": [ 19 | { 20 | "refId": "A", 21 | "queryType": "", 22 | "relativeTimeRange": { 23 | "from": 300, 24 | "to": 0 25 | }, 26 | "datasourceUid": "PBFA97CFB590B2093", 27 | "model": { 28 | "datasource": { 29 | "type": "prometheus", 30 | "uid": "PBFA97CFB590B2093" 31 | }, 32 | "editorMode": "code", 33 | "expr": "node_status_errors{job=\"tick\", network=\"Sovrin Builder Net\"}", 34 | "interval": "", 35 | "intervalMs": 15000, 36 | "legendFormat": "{{label_name}}", 37 | "maxDataPoints": 43200, 38 | "range": true, 39 | "refId": "A" 40 | } 41 | }, 42 | { 43 | "refId": "C", 44 | "queryType": "", 45 | "relativeTimeRange": { 46 | "from": 300, 47 | "to": 0 48 | }, 49 | "datasourceUid": "-100", 50 | "model": { 51 | "conditions": [ 52 | { 53 | "evaluator": { 54 | "params": [ 55 | 0 56 | ], 57 | "type": "gt" 58 | }, 59 | "operator": { 60 | "type": "and" 61 | }, 62 | "query": { 63 | "params": [ 64 | "C" 65 | ] 66 | }, 67 | "reducer": { 68 | "params": [], 69 | "type": "last" 70 | }, 71 | "type": "query" 72 | } 73 | ], 74 | "datasource": { 75 | "type": "__expr__", 76 | "uid": "-100" 77 | }, 78 | "expression": "B", 79 | "hide": false, 80 | "intervalMs": 1000, 81 | "maxDataPoints": 43200, 82 | "refId": "C", 83 | "type": "threshold" 84 | } 85 | }, 86 | { 87 | "refId": "B", 88 | "queryType": "", 89 | "relativeTimeRange": { 90 | "from": 0, 91 | "to": 0 92 | }, 93 | "datasourceUid": "-100", 94 | "model": { 95 | "conditions": [ 96 | { 97 | "evaluator": { 98 | "params": [ 99 | 0, 100 | 0 101 | ], 102 | "type": "gt" 103 | }, 104 | "operator": { 105 | "type": "and" 106 | }, 107 | "query": { 108 | "params": [] 109 | }, 110 | "reducer": { 111 | "params": [], 112 | "type": "avg" 113 | }, 114 | "type": "query" 115 | } 116 | ], 117 | "datasource": { 118 | "name": "Expression", 119 | "type": "__expr__", 120 | "uid": "__expr__" 121 | }, 122 | "expression": "A", 123 | "intervalMs": 1000, 124 | "maxDataPoints": 43200, 125 | "reducer": "last", 126 | "refId": "B", 127 | "type": "reduce" 128 | } 129 | } 130 | ], 131 | "updated": "2023-02-10T20:47:12Z", 132 | "noDataState": "NoData", 133 | "execErrState": "Error", 134 | "for": "15m", 135 | "annotations": { 136 | "__dashboardUid__": "pPcqoSfnk", 137 | "__panelId__": "4" 138 | }, 139 | "labels": { 140 | "severity": "warning" 141 | } 142 | } 143 | ] 144 | } 145 | ] 146 | } -------------------------------------------------------------------------------- /grafana/provisioning/alerting/WriteFailureWarning.json: -------------------------------------------------------------------------------- 1 | { 2 | "apiVersion": 1, 3 | "groups": [ 4 | { 5 | "orgId": 1, 6 | "name": "Alerting", 7 | "folder": "Alerts", 8 | "interval": "60s", 9 | "rules": [ 10 | { 11 | "id": 3, 12 | "uid": "0dkOBI0Vz", 13 | "orgID": 1, 14 | "folderUID": "tMVPxmv4k", 15 | "ruleGroup": "Alerting", 16 | "title": "Write failure warning", 17 | "condition": "E", 18 | "data": [ 19 | { 20 | "refId": "A", 21 | "queryType": "", 22 | "relativeTimeRange": { 23 | "from": 300, 24 | "to": 0 25 | }, 26 | "datasourceUid": "PBFA97CFB590B2093", 27 | "model": { 28 | "datasource": { 29 | "type": "prometheus", 30 | "uid": "PBFA97CFB590B2093" 31 | }, 32 | "editorMode": "code", 33 | "exemplar": true, 34 | "expr": "max(node_response_result_data_Pool_info_Total_nodes_count{job=\"tick\", network=\"Sovrin Builder Net\"} - node_response_result_data_Pool_info_f_value{job=\"tick\", network=\"Sovrin Builder Net\"})", 35 | "interval": "", 36 | "intervalMs": 15000, 37 | "legendFormat": "Write Failure", 38 | "maxDataPoints": 43200, 39 | "range": true, 40 | "refId": "A" 41 | } 42 | }, 43 | { 44 | "refId": "B", 45 | "queryType": "", 46 | "relativeTimeRange": { 47 | "from": 300, 48 | "to": 0 49 | }, 50 | "datasourceUid": "PBFA97CFB590B2093", 51 | "model": { 52 | "datasource": { 53 | "type": "prometheus", 54 | "uid": "PBFA97CFB590B2093" 55 | }, 56 | "editorMode": "code", 57 | "exemplar": true, 58 | "expr": "min(node_response_result_data_Pool_info_Reachable_nodes_count{job=\"tick\", network=\"Sovrin Builder Net\"})", 59 | "hide": false, 60 | "interval": "", 61 | "intervalMs": 15000, 62 | "legendFormat": "Reachable", 63 | "maxDataPoints": 43200, 64 | "range": true, 65 | "refId": "B" 66 | } 67 | }, 68 | { 69 | "refId": "C", 70 | "queryType": "", 71 | "relativeTimeRange": { 72 | "from": 300, 73 | "to": 0 74 | }, 75 | "datasourceUid": "PBFA97CFB590B2093", 76 | "model": { 77 | "datasource": { 78 | "type": "prometheus", 79 | "uid": "PBFA97CFB590B2093" 80 | }, 81 | "editorMode": "code", 82 | "expr": "min(node_response_result_data_Pool_info_Reachable_nodes_count{job=\"tick\", network=\"Sovrin Builder Net\"}) - max(node_response_result_data_Pool_info_Total_nodes_count{job=\"tick\", network=\"Sovrin Builder Net\"} - node_response_result_data_Pool_info_f_value{job=\"tick\", network=\"Sovrin Builder Net\"})", 83 | "hide": false, 84 | "interval": "", 85 | "intervalMs": 15000, 86 | "legendFormat": "Warning Trigger", 87 | "maxDataPoints": 43200, 88 | "range": true, 89 | "refId": "C" 90 | } 91 | }, 92 | { 93 | "refId": "D", 94 | "queryType": "", 95 | "relativeTimeRange": { 96 | "from": 0, 97 | "to": 0 98 | }, 99 | "datasourceUid": "-100", 100 | "model": { 101 | "conditions": [ 102 | { 103 | "evaluator": { 104 | "params": [], 105 | "type": "gt" 106 | }, 107 | "operator": { 108 | "type": "and" 109 | }, 110 | "query": { 111 | "params": [ 112 | "D" 113 | ] 114 | }, 115 | "reducer": { 116 | "params": [], 117 | "type": "last" 118 | }, 119 | "type": "query" 120 | } 121 | ], 122 | "datasource": { 123 | "type": "__expr__", 124 | "uid": "-100" 125 | }, 126 | "expression": "C", 127 | "hide": false, 128 | "intervalMs": 1000, 129 | "maxDataPoints": 43200, 130 | "reducer": "last", 131 | "refId": "D", 132 | "type": "reduce" 133 | } 134 | }, 135 | { 136 | "refId": "E", 137 | "queryType": "", 138 | "relativeTimeRange": { 139 | "from": 0, 140 | "to": 0 141 | }, 142 | "datasourceUid": "-100", 143 | "model": { 144 | "conditions": [ 145 | { 146 | "evaluator": { 147 | "params": [ 148 | 1 149 | ], 150 | "type": "lt" 151 | }, 152 | "operator": { 153 | "type": "and" 154 | }, 155 | "query": { 156 | "params": [ 157 | "E" 158 | ] 159 | }, 160 | "reducer": { 161 | "params": [], 162 | "type": "last" 163 | }, 164 | "type": "query" 165 | } 166 | ], 167 | "datasource": { 168 | "type": "__expr__", 169 | "uid": "-100" 170 | }, 171 | "expression": "D", 172 | "hide": false, 173 | "intervalMs": 1000, 174 | "maxDataPoints": 43200, 175 | "refId": "E", 176 | "type": "threshold" 177 | } 178 | } 179 | ], 180 | "updated": "2023-01-31T21:34:40Z", 181 | "noDataState": "NoData", 182 | "execErrState": "Error", 183 | "for": "15m", 184 | "annotations": { 185 | "__dashboardUid__": "pPcqoSfnk", 186 | "__panelId__": "2" 187 | }, 188 | "labels": { 189 | "severity": "warning" 190 | } 191 | } 192 | ] 193 | } 194 | ] 195 | } -------------------------------------------------------------------------------- /grafana/provisioning/dashboards/AlertsDashboard.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": { 7 | "type": "datasource", 8 | "uid": "grafana" 9 | }, 10 | "enable": true, 11 | "hide": true, 12 | "iconColor": "rgba(0, 211, 255, 1)", 13 | "name": "Annotations & Alerts", 14 | "target": { 15 | "limit": 100, 16 | "matchAny": false, 17 | "tags": [], 18 | "type": "dashboard" 19 | }, 20 | "type": "dashboard" 21 | } 22 | ] 23 | }, 24 | "editable": true, 25 | "fiscalYearStartMonth": 0, 26 | "graphTooltip": 0, 27 | "links": [], 28 | "liveNow": false, 29 | "panels": [ 30 | { 31 | "datasource": { 32 | "type": "prometheus", 33 | "uid": "PBFA97CFB590B2093" 34 | }, 35 | "fieldConfig": { 36 | "defaults": { 37 | "color": { 38 | "mode": "palette-classic" 39 | }, 40 | "custom": { 41 | "axisCenteredZero": false, 42 | "axisColorMode": "text", 43 | "axisLabel": "", 44 | "axisPlacement": "auto", 45 | "barAlignment": 0, 46 | "drawStyle": "line", 47 | "fillOpacity": 10, 48 | "gradientMode": "opacity", 49 | "hideFrom": { 50 | "legend": false, 51 | "tooltip": false, 52 | "viz": false 53 | }, 54 | "lineInterpolation": "linear", 55 | "lineWidth": 1, 56 | "pointSize": 5, 57 | "scaleDistribution": { 58 | "type": "linear" 59 | }, 60 | "showPoints": "never", 61 | "spanNulls": true, 62 | "stacking": { 63 | "group": "A", 64 | "mode": "none" 65 | }, 66 | "thresholdsStyle": { 67 | "mode": "off" 68 | } 69 | }, 70 | "mappings": [], 71 | "min": 0, 72 | "thresholds": { 73 | "mode": "absolute", 74 | "steps": [ 75 | { 76 | "color": "green", 77 | "value": null 78 | }, 79 | { 80 | "color": "red", 81 | "value": 80 82 | } 83 | ] 84 | }, 85 | "unit": "short" 86 | }, 87 | "overrides": [] 88 | }, 89 | "gridPos": { 90 | "h": 9, 91 | "w": 12, 92 | "x": 0, 93 | "y": 0 94 | }, 95 | "id": 2, 96 | "options": { 97 | "legend": { 98 | "calcs": [], 99 | "displayMode": "list", 100 | "placement": "bottom", 101 | "showLegend": true 102 | }, 103 | "tooltip": { 104 | "mode": "single", 105 | "sort": "none" 106 | } 107 | }, 108 | "targets": [ 109 | { 110 | "datasource": { 111 | "type": "prometheus", 112 | "uid": "PBFA97CFB590B2093" 113 | }, 114 | "editorMode": "code", 115 | "exemplar": true, 116 | "expr": "max(node_response_result_data_Pool_info_Total_nodes_count{job=\"tick\", network=\"[[network]]\"} - node_response_result_data_Pool_info_f_value{job=\"tick\", network=\"[[network]]\"})", 117 | "interval": "", 118 | "legendFormat": "Write Failure", 119 | "range": true, 120 | "refId": "A" 121 | }, 122 | { 123 | "datasource": { 124 | "type": "prometheus", 125 | "uid": "PBFA97CFB590B2093" 126 | }, 127 | "editorMode": "code", 128 | "exemplar": true, 129 | "expr": "min(node_response_result_data_Pool_info_Reachable_nodes_count{job=\"tick\", network=\"[[network]]\"})", 130 | "hide": false, 131 | "interval": "", 132 | "legendFormat": "Reachable", 133 | "range": true, 134 | "refId": "B" 135 | }, 136 | { 137 | "datasource": { 138 | "type": "prometheus", 139 | "uid": "PBFA97CFB590B2093" 140 | }, 141 | "editorMode": "code", 142 | "expr": "min(node_response_result_data_Pool_info_Reachable_nodes_count{job=\"tick\", network=\"[[network]]\"}) - max(node_response_result_data_Pool_info_Total_nodes_count{job=\"tick\", network=\"[[network]]\"} - node_response_result_data_Pool_info_f_value{job=\"tick\", network=\"[[network]]\"})", 143 | "hide": false, 144 | "legendFormat": "Warning Trigger", 145 | "range": true, 146 | "refId": "C" 147 | } 148 | ], 149 | "title": "Node Connectivity", 150 | "type": "timeseries" 151 | }, 152 | { 153 | "datasource": { 154 | "type": "prometheus", 155 | "uid": "PBFA97CFB590B2093" 156 | }, 157 | "fieldConfig": { 158 | "defaults": { 159 | "color": { 160 | "mode": "palette-classic" 161 | }, 162 | "custom": { 163 | "axisCenteredZero": false, 164 | "axisColorMode": "text", 165 | "axisLabel": "", 166 | "axisPlacement": "auto", 167 | "barAlignment": 0, 168 | "drawStyle": "line", 169 | "fillOpacity": 0, 170 | "gradientMode": "none", 171 | "hideFrom": { 172 | "legend": false, 173 | "tooltip": false, 174 | "viz": false 175 | }, 176 | "lineInterpolation": "linear", 177 | "lineWidth": 1, 178 | "pointSize": 5, 179 | "scaleDistribution": { 180 | "type": "linear" 181 | }, 182 | "showPoints": "auto", 183 | "spanNulls": false, 184 | "stacking": { 185 | "group": "A", 186 | "mode": "none" 187 | }, 188 | "thresholdsStyle": { 189 | "mode": "off" 190 | } 191 | }, 192 | "mappings": [], 193 | "thresholds": { 194 | "mode": "absolute", 195 | "steps": [ 196 | { 197 | "color": "green", 198 | "value": null 199 | }, 200 | { 201 | "color": "red", 202 | "value": 80 203 | } 204 | ] 205 | } 206 | }, 207 | "overrides": [] 208 | }, 209 | "gridPos": { 210 | "h": 8, 211 | "w": 12, 212 | "x": 12, 213 | "y": 0 214 | }, 215 | "id": 4, 216 | "options": { 217 | "legend": { 218 | "calcs": [], 219 | "displayMode": "list", 220 | "placement": "bottom", 221 | "showLegend": true 222 | }, 223 | "tooltip": { 224 | "mode": "single", 225 | "sort": "none" 226 | } 227 | }, 228 | "pluginVersion": "9.3.6", 229 | "targets": [ 230 | { 231 | "datasource": { 232 | "type": "prometheus", 233 | "uid": "PBFA97CFB590B2093" 234 | }, 235 | "editorMode": "code", 236 | "expr": "node_status_errors{job=\"tick\", network=\"[[network]]\"}", 237 | "legendFormat": "{{label_name}}", 238 | "range": true, 239 | "refId": "A" 240 | } 241 | ], 242 | "title": "WIP", 243 | "transformations": [ 244 | { 245 | "id": "labelsToFields", 246 | "options": { 247 | "keepLabels": [ 248 | "name", 249 | "network" 250 | ], 251 | "valueLabel": "name" 252 | } 253 | } 254 | ], 255 | "type": "timeseries" 256 | } 257 | ], 258 | "schemaVersion": 37, 259 | "style": "dark", 260 | "tags": [], 261 | "templating": { 262 | "list": [ 263 | { 264 | "current": { 265 | "selected": false, 266 | "text": "Sovrin Builder Net", 267 | "value": "Sovrin Builder Net" 268 | }, 269 | "datasource": { 270 | "type": "prometheus", 271 | "uid": "PBFA97CFB590B2093" 272 | }, 273 | "definition": "label_values(network)", 274 | "description": "Select a Network.", 275 | "hide": 0, 276 | "includeAll": false, 277 | "label": "Network", 278 | "multi": false, 279 | "name": "network", 280 | "options": [], 281 | "query": { 282 | "query": "label_values(network)", 283 | "refId": "StandardVariableQuery" 284 | }, 285 | "refresh": 1, 286 | "regex": "", 287 | "skipUrlSync": false, 288 | "sort": 0, 289 | "type": "query" 290 | } 291 | ] 292 | }, 293 | "time": { 294 | "from": "now-5m", 295 | "to": "now" 296 | }, 297 | "timepicker": {}, 298 | "timezone": "", 299 | "title": "Alerts Dashboard", 300 | "uid": "pPcqoSfnk", 301 | "version": 1, 302 | "weekStart": "" 303 | } -------------------------------------------------------------------------------- /grafana/provisioning/dashboards/TestDashboard.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": { 7 | "type": "datasource", 8 | "uid": "grafana" 9 | }, 10 | "enable": true, 11 | "hide": true, 12 | "iconColor": "rgba(0, 211, 255, 1)", 13 | "name": "Annotations & Alerts", 14 | "target": { 15 | "limit": 100, 16 | "matchAny": false, 17 | "tags": [], 18 | "type": "dashboard" 19 | }, 20 | "type": "dashboard" 21 | } 22 | ] 23 | }, 24 | "editable": true, 25 | "fiscalYearStartMonth": 0, 26 | "graphTooltip": 1, 27 | "links": [], 28 | "liveNow": false, 29 | "panels": [ 30 | { 31 | "collapsed": false, 32 | "datasource": { 33 | "type": "prometheus", 34 | "uid": "PBFA97CFB590B2093" 35 | }, 36 | "gridPos": { 37 | "h": 1, 38 | "w": 24, 39 | "x": 0, 40 | "y": 0 41 | }, 42 | "id": 4, 43 | "panels": [], 44 | "targets": [ 45 | { 46 | "datasource": { 47 | "type": "prometheus", 48 | "uid": "PBFA97CFB590B2093" 49 | }, 50 | "refId": "A" 51 | } 52 | ], 53 | "title": "Row title", 54 | "type": "row" 55 | }, 56 | { 57 | "aliasColors": {}, 58 | "bars": false, 59 | "dashLength": 10, 60 | "dashes": false, 61 | "datasource": { 62 | "type": "influxdb", 63 | "uid": "P951FEA4DE68E13C5" 64 | }, 65 | "fieldConfig": { 66 | "defaults": { 67 | "unit": "short" 68 | }, 69 | "overrides": [] 70 | }, 71 | "fill": 1, 72 | "fillGradient": 1, 73 | "gridPos": { 74 | "h": 8, 75 | "w": 17, 76 | "x": 0, 77 | "y": 1 78 | }, 79 | "hiddenSeries": false, 80 | "id": 6, 81 | "legend": { 82 | "avg": false, 83 | "current": false, 84 | "max": false, 85 | "min": false, 86 | "show": true, 87 | "total": false, 88 | "values": false 89 | }, 90 | "lines": true, 91 | "linewidth": 1, 92 | "nullPointMode": "null", 93 | "options": { 94 | "alertThreshold": false 95 | }, 96 | "percentage": false, 97 | "pluginVersion": "9.2.3", 98 | "pointradius": 2, 99 | "points": false, 100 | "renderer": "flot", 101 | "seriesOverrides": [], 102 | "spaceLength": 10, 103 | "stack": false, 104 | "steppedLine": false, 105 | "targets": [ 106 | { 107 | "alias": "Total Nodes", 108 | "datasource": { 109 | "type": "influxdb", 110 | "uid": "P951FEA4DE68E13C5" 111 | }, 112 | "groupBy": [ 113 | { 114 | "params": [ 115 | "$__interval" 116 | ], 117 | "type": "time" 118 | }, 119 | { 120 | "params": [ 121 | "none" 122 | ], 123 | "type": "fill" 124 | } 125 | ], 126 | "measurement": "node", 127 | "orderByTime": "ASC", 128 | "policy": "default", 129 | "refId": "A", 130 | "resultFormat": "time_series", 131 | "select": [ 132 | [ 133 | { 134 | "params": [ 135 | "response_result_data_Pool_info_Total_nodes_count" 136 | ], 137 | "type": "field" 138 | }, 139 | { 140 | "params": [], 141 | "type": "mean" 142 | } 143 | ] 144 | ], 145 | "tags": [ 146 | { 147 | "key": "name", 148 | "operator": "=~", 149 | "value": "/^$node$/" 150 | } 151 | ] 152 | }, 153 | { 154 | "alias": "Reachable Nodes", 155 | "datasource": { 156 | "type": "influxdb", 157 | "uid": "P951FEA4DE68E13C5" 158 | }, 159 | "groupBy": [ 160 | { 161 | "params": [ 162 | "$__interval" 163 | ], 164 | "type": "time" 165 | }, 166 | { 167 | "params": [ 168 | "none" 169 | ], 170 | "type": "fill" 171 | } 172 | ], 173 | "hide": false, 174 | "measurement": "node", 175 | "orderByTime": "ASC", 176 | "policy": "default", 177 | "refId": "B", 178 | "resultFormat": "time_series", 179 | "select": [ 180 | [ 181 | { 182 | "params": [ 183 | "response_result_data_Pool_info_Reachable_nodes_count" 184 | ], 185 | "type": "field" 186 | }, 187 | { 188 | "params": [], 189 | "type": "mean" 190 | } 191 | ] 192 | ], 193 | "tags": [ 194 | { 195 | "key": "name", 196 | "operator": "=~", 197 | "value": "/^$node$/" 198 | } 199 | ] 200 | }, 201 | { 202 | "alias": "f_value", 203 | "datasource": { 204 | "type": "influxdb", 205 | "uid": "P951FEA4DE68E13C5" 206 | }, 207 | "groupBy": [ 208 | { 209 | "params": [ 210 | "$__interval" 211 | ], 212 | "type": "time" 213 | }, 214 | { 215 | "params": [ 216 | "none" 217 | ], 218 | "type": "fill" 219 | } 220 | ], 221 | "hide": false, 222 | "measurement": "node", 223 | "orderByTime": "ASC", 224 | "policy": "default", 225 | "refId": "C", 226 | "resultFormat": "time_series", 227 | "select": [ 228 | [ 229 | { 230 | "params": [ 231 | "response_result_data_Pool_info_f_value" 232 | ], 233 | "type": "field" 234 | }, 235 | { 236 | "params": [], 237 | "type": "mean" 238 | } 239 | ] 240 | ], 241 | "tags": [ 242 | { 243 | "key": "name", 244 | "operator": "=~", 245 | "value": "/^$node$/" 246 | } 247 | ] 248 | }, 249 | { 250 | "alias": "Unreachable Nodes", 251 | "datasource": { 252 | "type": "influxdb", 253 | "uid": "P951FEA4DE68E13C5" 254 | }, 255 | "groupBy": [ 256 | { 257 | "params": [ 258 | "$__interval" 259 | ], 260 | "type": "time" 261 | }, 262 | { 263 | "params": [ 264 | "none" 265 | ], 266 | "type": "fill" 267 | } 268 | ], 269 | "hide": false, 270 | "measurement": "node", 271 | "orderByTime": "ASC", 272 | "policy": "default", 273 | "refId": "D", 274 | "resultFormat": "time_series", 275 | "select": [ 276 | [ 277 | { 278 | "params": [ 279 | "response_result_data_Pool_info_Unreachable_nodes_count" 280 | ], 281 | "type": "field" 282 | }, 283 | { 284 | "params": [], 285 | "type": "mean" 286 | } 287 | ] 288 | ], 289 | "tags": [ 290 | { 291 | "key": "name", 292 | "operator": "=~", 293 | "value": "/^$node$/" 294 | } 295 | ] 296 | } 297 | ], 298 | "thresholds": [], 299 | "timeRegions": [], 300 | "title": "Node Connectivity", 301 | "tooltip": { 302 | "shared": true, 303 | "sort": 0, 304 | "value_type": "individual" 305 | }, 306 | "transparent": true, 307 | "type": "graph", 308 | "xaxis": { 309 | "mode": "time", 310 | "show": true, 311 | "values": [] 312 | }, 313 | "yaxes": [ 314 | { 315 | "format": "short", 316 | "logBase": 1, 317 | "show": true 318 | }, 319 | { 320 | "format": "short", 321 | "logBase": 1, 322 | "show": true 323 | } 324 | ], 325 | "yaxis": { 326 | "align": false 327 | } 328 | }, 329 | { 330 | "circleMaxSize": 30, 331 | "circleMinSize": 2, 332 | "colors": [ 333 | "rgba(245, 54, 54, 0.9)", 334 | "rgba(237, 129, 40, 0.89)", 335 | "rgba(50, 172, 45, 0.97)" 336 | ], 337 | "datasource": { 338 | "type": "prometheus", 339 | "uid": "PBFA97CFB590B2093" 340 | }, 341 | "decimals": 0, 342 | "esMetric": "Count", 343 | "gridPos": { 344 | "h": 7, 345 | "w": 7, 346 | "x": 17, 347 | "y": 1 348 | }, 349 | "hideEmpty": false, 350 | "hideZero": false, 351 | "id": 8, 352 | "initialZoom": 1, 353 | "locationData": "countries", 354 | "mapCenter": "(0°, 0°)", 355 | "mapCenterLatitude": 0, 356 | "mapCenterLongitude": 0, 357 | "maxDataPoints": 1, 358 | "mouseWheelZoom": false, 359 | "pluginVersion": "7.5.7", 360 | "showLegend": true, 361 | "stickyLabels": false, 362 | "tableQueryOptions": { 363 | "geohashField": "geohash", 364 | "latitudeField": "latitude", 365 | "longitudeField": "longitude", 366 | "metricField": "metric", 367 | "queryType": "geohash" 368 | }, 369 | "targets": [ 370 | { 371 | "datasource": { 372 | "type": "prometheus", 373 | "uid": "PBFA97CFB590B2093" 374 | }, 375 | "groupBy": [ 376 | { 377 | "params": [ 378 | "$__interval" 379 | ], 380 | "type": "time" 381 | }, 382 | { 383 | "params": [ 384 | "null" 385 | ], 386 | "type": "fill" 387 | } 388 | ], 389 | "orderByTime": "ASC", 390 | "policy": "default", 391 | "refId": "A", 392 | "resultFormat": "time_series", 393 | "select": [ 394 | [ 395 | { 396 | "params": [ 397 | "value" 398 | ], 399 | "type": "field" 400 | }, 401 | { 402 | "params": [], 403 | "type": "mean" 404 | } 405 | ] 406 | ], 407 | "tags": [] 408 | } 409 | ], 410 | "thresholds": "0,10", 411 | "title": "Panel Title", 412 | "type": "grafana-worldmap-panel", 413 | "unitPlural": "", 414 | "unitSingle": "", 415 | "valueName": "total" 416 | }, 417 | { 418 | "aliasColors": {}, 419 | "bars": false, 420 | "dashLength": 10, 421 | "dashes": false, 422 | "datasource": { 423 | "type": "influxdb", 424 | "uid": "P951FEA4DE68E13C5" 425 | }, 426 | "fill": 1, 427 | "fillGradient": 5, 428 | "gridPos": { 429 | "h": 7, 430 | "w": 5, 431 | "x": 0, 432 | "y": 9 433 | }, 434 | "hiddenSeries": false, 435 | "id": 2, 436 | "legend": { 437 | "avg": false, 438 | "current": false, 439 | "max": false, 440 | "min": false, 441 | "show": true, 442 | "total": false, 443 | "values": false 444 | }, 445 | "lines": true, 446 | "linewidth": 1, 447 | "nullPointMode": "null", 448 | "options": { 449 | "alertThreshold": true 450 | }, 451 | "percentage": false, 452 | "pluginVersion": "9.2.3", 453 | "pointradius": 2, 454 | "points": false, 455 | "renderer": "flot", 456 | "seriesOverrides": [], 457 | "spaceLength": 10, 458 | "stack": false, 459 | "steppedLine": false, 460 | "targets": [ 461 | { 462 | "alias": "Txns per Second", 463 | "datasource": { 464 | "type": "influxdb", 465 | "uid": "P951FEA4DE68E13C5" 466 | }, 467 | "groupBy": [ 468 | { 469 | "params": [ 470 | "$__interval" 471 | ], 472 | "type": "time" 473 | }, 474 | { 475 | "params": [ 476 | "none" 477 | ], 478 | "type": "fill" 479 | } 480 | ], 481 | "measurement": "node", 482 | "orderByTime": "ASC", 483 | "policy": "default", 484 | "refId": "A", 485 | "resultFormat": "time_series", 486 | "select": [ 487 | [ 488 | { 489 | "params": [ 490 | "response_result_data_Node_info_Metrics_average-per-second_read-transactions" 491 | ], 492 | "type": "field" 493 | }, 494 | { 495 | "params": [], 496 | "type": "mean" 497 | } 498 | ] 499 | ], 500 | "tags": [ 501 | { 502 | "key": "name", 503 | "operator": "=~", 504 | "value": "/^$node$/" 505 | } 506 | ] 507 | } 508 | ], 509 | "thresholds": [], 510 | "timeRegions": [], 511 | "title": "Read Txns per Second (Avg)", 512 | "tooltip": { 513 | "shared": true, 514 | "sort": 0, 515 | "value_type": "individual" 516 | }, 517 | "type": "graph", 518 | "xaxis": { 519 | "mode": "time", 520 | "show": true, 521 | "values": [] 522 | }, 523 | "yaxes": [ 524 | { 525 | "$$hashKey": "object:418", 526 | "format": "short", 527 | "logBase": 1, 528 | "show": true 529 | }, 530 | { 531 | "$$hashKey": "object:419", 532 | "format": "short", 533 | "logBase": 1, 534 | "show": true 535 | } 536 | ], 537 | "yaxis": { 538 | "align": false 539 | } 540 | }, 541 | { 542 | "datasource": { 543 | "type": "influxdb", 544 | "uid": "P35862FFA6F3BFED8" 545 | }, 546 | "fieldConfig": { 547 | "defaults": { 548 | "color": { 549 | "mode": "palette-classic" 550 | }, 551 | "custom": { 552 | "axisCenteredZero": false, 553 | "axisColorMode": "text", 554 | "axisLabel": "", 555 | "axisPlacement": "auto", 556 | "barAlignment": 0, 557 | "drawStyle": "line", 558 | "fillOpacity": 50, 559 | "gradientMode": "opacity", 560 | "hideFrom": { 561 | "legend": false, 562 | "tooltip": false, 563 | "viz": false 564 | }, 565 | "lineInterpolation": "linear", 566 | "lineWidth": 1, 567 | "pointSize": 5, 568 | "scaleDistribution": { 569 | "type": "linear" 570 | }, 571 | "showPoints": "never", 572 | "spanNulls": true, 573 | "stacking": { 574 | "group": "A", 575 | "mode": "none" 576 | }, 577 | "thresholdsStyle": { 578 | "mode": "off" 579 | } 580 | }, 581 | "mappings": [], 582 | "thresholds": { 583 | "mode": "absolute", 584 | "steps": [ 585 | { 586 | "color": "green", 587 | "value": null 588 | }, 589 | { 590 | "color": "red", 591 | "value": 80 592 | } 593 | ] 594 | }, 595 | "unit": "short" 596 | }, 597 | "overrides": [] 598 | }, 599 | "gridPos": { 600 | "h": 7, 601 | "w": 5, 602 | "x": 5, 603 | "y": 9 604 | }, 605 | "id": 9, 606 | "options": { 607 | "legend": { 608 | "calcs": [], 609 | "displayMode": "list", 610 | "placement": "bottom", 611 | "showLegend": true 612 | }, 613 | "tooltip": { 614 | "mode": "single", 615 | "sort": "none" 616 | } 617 | }, 618 | "pluginVersion": "8.0.1", 619 | "targets": [ 620 | { 621 | "alias": "Txns per Second", 622 | "datasource": { 623 | "type": "influxdb", 624 | "uid": "P35862FFA6F3BFED8" 625 | }, 626 | "groupBy": [ 627 | { 628 | "params": [ 629 | "$__interval" 630 | ], 631 | "type": "time" 632 | }, 633 | { 634 | "params": [ 635 | "none" 636 | ], 637 | "type": "fill" 638 | } 639 | ], 640 | "measurement": "node", 641 | "orderByTime": "ASC", 642 | "policy": "default", 643 | "query": "from(bucket: v.defaultBucket)\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn:(r) =>\r\n r._measurement == \"node\" and \r\n r._field == \"response_result_data_Node_info_Metrics_average-per-second_write-transactions\" and\r\n r.name == \"${node}\"\r\n )\r\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\r\n ", 644 | "refId": "A", 645 | "resultFormat": "time_series", 646 | "select": [ 647 | [ 648 | { 649 | "params": [ 650 | "response_result_data_Node_info_Metrics_average-per-second_read-transactions" 651 | ], 652 | "type": "field" 653 | }, 654 | { 655 | "params": [], 656 | "type": "mean" 657 | } 658 | ] 659 | ], 660 | "tags": [ 661 | { 662 | "key": "name", 663 | "operator": "=~", 664 | "value": "/^$node$/" 665 | } 666 | ] 667 | } 668 | ], 669 | "title": "Write Txns per Second (Avg)", 670 | "type": "timeseries" 671 | } 672 | ], 673 | "refresh": false, 674 | "schemaVersion": 37, 675 | "style": "dark", 676 | "tags": [], 677 | "templating": { 678 | "list": [ 679 | { 680 | "current": { 681 | "selected": false, 682 | "text": "Absa", 683 | "value": "Absa" 684 | }, 685 | "datasource": { 686 | "type": "influxdb", 687 | "uid": "P951FEA4DE68E13C5" 688 | }, 689 | "definition": "show tag values on telegraf with key = \"name\"", 690 | "description": "Select a node.", 691 | "hide": 0, 692 | "includeAll": false, 693 | "label": "Node", 694 | "multi": false, 695 | "name": "node", 696 | "options": [], 697 | "query": "show tag values on telegraf with key = \"name\"", 698 | "refresh": 2, 699 | "regex": "", 700 | "skipUrlSync": false, 701 | "sort": 0, 702 | "tagValuesQuery": "", 703 | "tagsQuery": "", 704 | "type": "query", 705 | "useTags": false 706 | } 707 | ] 708 | }, 709 | "time": { 710 | "from": "now-30m", 711 | "to": "now" 712 | }, 713 | "timepicker": {}, 714 | "timezone": "", 715 | "title": "Test Dashboard", 716 | "uid": "wuu-CJqGz", 717 | "version": 1, 718 | "weekStart": "" 719 | } -------------------------------------------------------------------------------- /grafana/provisioning/dashboards/dashboard.yml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | providers: 4 | - name: 'Prometheus' 5 | orgId: 1 6 | folder: '' 7 | type: file 8 | disableDeletion: false 9 | editable: true 10 | options: 11 | path: /etc/grafana/provisioning/dashboards 12 | -------------------------------------------------------------------------------- /grafana/provisioning/datasources/datasource.yml: -------------------------------------------------------------------------------- 1 | # Datasource settings can be queried from Grafana: 2 | # http://localhost:3000/api/datasources 3 | 4 | # config file version 5 | apiVersion: 1 6 | 7 | # list of datasources that should be deleted from the database 8 | deleteDatasources: 9 | - name: Prometheus 10 | orgId: 1 11 | - name: InfluxDB 12 | orgId: 1 13 | 14 | # List of datasources to insert/update depending whats available in the database 15 | datasources: 16 | # name of the datasource. Required 17 | - name: Prometheus 18 | # datasource type. Required 19 | type: prometheus 20 | # access mode. direct or proxy. Required 21 | access: proxy 22 | # org id. will default to orgId 1 if not specified 23 | orgId: 1 24 | # url 25 | url: http://prometheus:9090 26 | # database password, if used 27 | password: 28 | # database user, if used 29 | user: 30 | # database name, if used 31 | database: 32 | # enable/disable basic auth 33 | basicAuth: false 34 | # basic auth username, if used 35 | basicAuthUser: 36 | # basic auth password, if used 37 | basicAuthPassword: 38 | # enable/disable with credentials headers 39 | withCredentials: 40 | # mark as default datasource. Max one per org 41 | isDefault: true 42 | # fields that will be converted to json and stored in json_data 43 | jsonData: 44 | graphiteVersion: "1.1" 45 | tlsAuth: false 46 | tlsAuthWithCACert: false 47 | # json object of data that will be encrypted. 48 | secureJsonData: 49 | tlsCACert: "..." 50 | tlsClientCert: "..." 51 | tlsClientKey: "..." 52 | version: 1 53 | # allow users to edit datasources from the UI. 54 | editable: true 55 | 56 | - name: InfluxDB 57 | type: influxdb 58 | typeName: InfluxDB 59 | typeLogoUrl: public/app/plugins/datasource/influxdb/img/influxdb_logo.svg 60 | access: proxy 61 | orgId: 1 62 | url: http://influxdb:8086 63 | password: '' 64 | user: '' 65 | database: telegraf 66 | basicAuth: false 67 | isDefault: false 68 | # jsonData: 69 | # defaultBucket: telegraf 70 | # httpMode: POST 71 | # organization: InfluxDB 72 | # timeInterval: '' 73 | # tlsSkipVerify: false 74 | # version: InfluxQL 75 | readOnly: false 76 | 77 | # https://docs.influxdata.com/influxdb/v2.0/tools/repl/ 78 | # https://grafana.com/docs/grafana/latest/administration/provisioning/ 79 | - name: InfluxDB-Flux 80 | type: influxdb 81 | typeName: InfluxDB 82 | typeLogoUrl: public/app/plugins/datasource/influxdb/img/influxdb_logo.svg 83 | access: proxy 84 | orgId: 1 85 | url: http://influxdb:8086 86 | password: '' 87 | user: '' 88 | database: 89 | basicAuth: false 90 | isDefault: false 91 | jsonData: 92 | defaultBucket: telegraf/autogen 93 | httpMode: POST 94 | organization: example-org 95 | version: Flux 96 | secureJsonData: 97 | token: ${INFLUX_DB_FLUX_TOKEN} 98 | readOnly: false 99 | -------------------------------------------------------------------------------- /images/grafana/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG GRAFANA_TAG 2 | FROM grafana/grafana:$GRAFANA_TAG 3 | 4 | RUN grafana-cli plugins install grafana-worldmap-panel -------------------------------------------------------------------------------- /images/influxdb/latest/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG INFLUXDB_TAG 2 | FROM influxdb:$INFLUXDB_TAG 3 | 4 | -------------------------------------------------------------------------------- /images/influxdb/nightly/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG INFLUXDB_TAG 2 | FROM quay.io/influxdb/influxdb:$INFLUXDB_TAG 3 | -------------------------------------------------------------------------------- /images/telegraf/latest/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG TELEGRAF_TAG 2 | FROM telegraf:$TELEGRAF_TAG 3 | 4 | -------------------------------------------------------------------------------- /images/telegraf/nightly/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG TELEGRAF_TAG 2 | FROM telegraf:$TELEGRAF_TAG 3 | 4 | -------------------------------------------------------------------------------- /influxdb/config/influxdb.conf: -------------------------------------------------------------------------------- 1 | reporting-disabled = false 2 | bind-address = ":8088" 3 | 4 | [meta] 5 | dir = "/var/lib/influxdb/meta" 6 | retention-autocreate = true 7 | logging-enabled = true 8 | 9 | [data] 10 | dir = "/var/lib/influxdb/data" 11 | wal-dir = "/var/lib/influxdb/wal" 12 | query-log-enabled = true 13 | cache-max-memory-size = 1073741824 14 | cache-snapshot-memory-size = 26214400 15 | cache-snapshot-write-cold-duration = "10m0s" 16 | compact-full-write-cold-duration = "4h0m0s" 17 | max-series-per-database = 1000000 18 | max-values-per-tag = 100000 19 | index-version = "tsi1" 20 | trace-logging-enabled = false 21 | 22 | [coordinator] 23 | write-timeout = "10s" 24 | max-concurrent-queries = 0 25 | query-timeout = "0s" 26 | log-queries-after = "0s" 27 | max-select-point = 0 28 | max-select-series = 0 29 | max-select-buckets = 0 30 | 31 | [retention] 32 | enabled = true 33 | check-interval = "30m0s" 34 | 35 | [shard-precreation] 36 | enabled = true 37 | check-interval = "10m0s" 38 | advance-period = "30m0s" 39 | 40 | [monitor] 41 | store-enabled = true 42 | store-database = "_internal" 43 | store-interval = "10s" 44 | 45 | [subscriber] 46 | enabled = true 47 | http-timeout = "30s" 48 | insecure-skip-verify = false 49 | ca-certs = "" 50 | write-concurrency = 40 51 | write-buffer-size = 1000 52 | 53 | [http] 54 | enabled = true 55 | flux-enabled = true 56 | bind-address = ":8086" 57 | auth-enabled = false 58 | log-enabled = true 59 | write-tracing = false 60 | pprof-enabled = true 61 | https-enabled = false 62 | https-certificate = "/etc/ssl/influxdb.pem" 63 | https-private-key = "" 64 | max-row-limit = 0 65 | max-connection-limit = 0 66 | shared-secret = "" 67 | realm = "InfluxDB" 68 | unix-socket-enabled = false 69 | bind-socket = "/var/run/influxdb.sock" 70 | 71 | [[graphite]] 72 | enabled = false 73 | bind-address = ":2003" 74 | database = "graphite" 75 | retention-policy = "" 76 | protocol = "tcp" 77 | batch-size = 5000 78 | batch-pending = 10 79 | batch-timeout = "1s" 80 | consistency-level = "one" 81 | separator = "." 82 | udp-read-buffer = 0 83 | 84 | [[collectd]] 85 | enabled = false 86 | bind-address = ":25826" 87 | database = "collectd" 88 | retention-policy = "" 89 | batch-size = 5000 90 | batch-pending = 10 91 | batch-timeout = "10s" 92 | read-buffer = 0 93 | typesdb = "/usr/share/collectd/types.db" 94 | security-level = "none" 95 | auth-file = "/etc/collectd/auth_file" 96 | 97 | [[opentsdb]] 98 | enabled = false 99 | bind-address = ":4242" 100 | database = "opentsdb" 101 | retention-policy = "" 102 | consistency-level = "one" 103 | tls-enabled = false 104 | certificate = "/etc/ssl/influxdb.pem" 105 | batch-size = 1000 106 | batch-pending = 5 107 | batch-timeout = "1s" 108 | log-point-errors = true 109 | 110 | [[udp]] 111 | enabled = true 112 | bind-address = ":8089" 113 | database = "udp" 114 | retention-policy = "" 115 | batch-size = 5000 116 | batch-pending = 10 117 | read-buffer = 0 118 | batch-timeout = "1s" 119 | precision = "" 120 | 121 | [continuous_queries] 122 | log-enabled = true 123 | enabled = true 124 | run-interval = "1s" 125 | 126 | -------------------------------------------------------------------------------- /prometheus/alert.rules: -------------------------------------------------------------------------------- 1 | groups: 2 | - name: example 3 | rules: 4 | 5 | # Alert for any instance that is unreachable for >2 minutes. 6 | - alert: service_down 7 | expr: up == 0 8 | for: 2m 9 | labels: 10 | severity: page 11 | annotations: 12 | summary: "Instance {{ $labels.instance }} down" 13 | description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes." 14 | 15 | - alert: high_load 16 | expr: node_load1 > 0.5 17 | for: 2m 18 | labels: 19 | severity: page 20 | annotations: 21 | summary: "Instance {{ $labels.instance }} under high load" 22 | description: "{{ $labels.instance }} of job {{ $labels.job }} is under high load." 23 | -------------------------------------------------------------------------------- /prometheus/prometheus.yml: -------------------------------------------------------------------------------- 1 | # my global config 2 | global: 3 | scrape_interval: 15s # By default, scrape targets every 15 seconds. 4 | evaluation_interval: 15s # By default, scrape targets every 15 seconds. 5 | # scrape_timeout is set to the global default (10s). 6 | 7 | # Attach these labels to any time series or alerts when communicating with 8 | # external systems (federation, remote storage, Alertmanager). 9 | external_labels: 10 | monitor: 'my-project' 11 | 12 | # Load and evaluate rules in this file every 'evaluation_interval' seconds. 13 | rule_files: 14 | - 'alert.rules' 15 | # - "first.rules" 16 | # - "second.rules" 17 | 18 | # alert 19 | alerting: 20 | alertmanagers: 21 | - scheme: http 22 | static_configs: 23 | - targets: 24 | - "alertmanager:9093" 25 | 26 | # A scrape configuration containing exactly one endpoint to scrape: 27 | # Here it's Prometheus itself. 28 | scrape_configs: 29 | # The job name is added as a label `job=` to any timeseries scraped from this config. 30 | 31 | - job_name: 'tick' 32 | scrape_interval: 1m 33 | static_configs: 34 | - targets: ['telegraf:9273'] 35 | 36 | - job_name: 'prometheus' 37 | 38 | # Override the global default and scrape targets from this job every 5 seconds. 39 | scrape_interval: 5s 40 | 41 | static_configs: 42 | - targets: ['localhost:9090'] -------------------------------------------------------------------------------- /telegraf/.env-telegraf: -------------------------------------------------------------------------------- 1 | HOSTNAME=telegraf 2 | SBN_SEED=${SBN_SEED} 3 | SSN_SEED=${SSN_SEED} 4 | SMN_SEED=${SMN_SEED} -------------------------------------------------------------------------------- /telegraf/telegraf.conf: -------------------------------------------------------------------------------- 1 | [agent] 2 | interval = "1m" 3 | round_interval = true 4 | metric_batch_size = 1000 5 | metric_buffer_limit = 10000 6 | collection_jitter = "0s" 7 | flush_interval = "5s" 8 | flush_jitter = "0s" 9 | precision = "" 10 | debug = false 11 | quiet = false 12 | logfile = "" 13 | hostname = "$HOSTNAME" 14 | omit_hostname = false 15 | 16 | [[outputs.influxdb]] 17 | urls = ["http://influxdb:8086"] 18 | database = "telegraf" 19 | username = "" 20 | password = "" 21 | retention_policy = "" 22 | write_consistency = "any" 23 | timeout = "5s" 24 | 25 | [[outputs.prometheus_client]] 26 | ## Address to listen on. 27 | listen = ":9273" 28 | 29 | ## Metric version controls the mapping from Telegraf metrics into 30 | ## Prometheus format. When using the prometheus input, use the same value in 31 | ## both plugins to ensure metrics are round-tripped without modification. 32 | ## 33 | ## example: metric_version = 1; 34 | ## metric_version = 2; recommended version 35 | metric_version = 2 36 | 37 | ## Use HTTP Basic Authentication. 38 | # basic_username = "Foo" 39 | # basic_password = "Bar" 40 | 41 | ## If set, the IP Ranges which are allowed to access metrics. 42 | ## ex: ip_range = ["192.168.0.0/24", "192.168.1.0/30"] 43 | # ip_range = [] 44 | 45 | ## Path to publish the metrics on. 46 | # path = "/metrics" 47 | 48 | ## Expiration interval for each metric. 0 == no expiration 49 | # expiration_interval = "60s" 50 | 51 | ## Collectors to enable, valid entries are "gocollector" and "process". 52 | ## If unset, both are enabled. 53 | # collectors_exclude = ["gocollector", "process"] 54 | 55 | ## Send string metrics as Prometheus labels. 56 | ## Unless set to false all string metrics will be sent as labels. 57 | # string_as_label = true 58 | 59 | ## If set, enable TLS with the given certificate. 60 | # tls_cert = "/etc/ssl/telegraf.crt" 61 | # tls_key = "/etc/ssl/telegraf.key" 62 | 63 | ## Set one or more allowed client CA certificate file names to 64 | ## enable mutually authenticated TLS connections 65 | # tls_allowed_cacerts = ["/etc/telegraf/clientca.pem"] 66 | 67 | ## Export metric collection time. 68 | # export_timestamp = false 69 | 70 | [[inputs.http]] 71 | ## One or more URLs from which to read formatted metrics 72 | urls = [ 73 | "http://indy-node-monitor:8080/networks/sbn" 74 | ] 75 | name_override = "node" 76 | 77 | ## HTTP method 78 | method = "GET" 79 | 80 | ## Optional HTTP headers 81 | headers = {"seed" = "${SBN_SEED}"} 82 | 83 | ## HTTP entity-body to send with POST/PUT requests. 84 | # body = "" 85 | 86 | ## HTTP Content-Encoding for write request body, can be set to "gzip" to 87 | ## compress body or "identity" to apply no encoding. 88 | # content_encoding = "identity" 89 | 90 | ## Optional file with Bearer token 91 | ## file content is added as an Authorization header 92 | # bearer_token = "/path/to/file" 93 | 94 | ## Optional HTTP Basic Auth Credentials 95 | # username = "username" 96 | # password = "pa$$word" 97 | 98 | ## OAuth2 Client Credentials. The options 'client_id', 'client_secret', and 'token_url' are required to use OAuth2. 99 | # client_id = "clientid" 100 | # client_secret = "secret" 101 | # token_url = "https://indentityprovider/oauth2/v1/token" 102 | # scopes = ["urn:opc:idm:__myscopes__"] 103 | 104 | ## HTTP Proxy support 105 | # http_proxy_url = "" 106 | 107 | ## Optional TLS Config 108 | # tls_ca = "/etc/telegraf/ca.pem" 109 | # tls_cert = "/etc/telegraf/cert.pem" 110 | # tls_key = "/etc/telegraf/key.pem" 111 | ## Use TLS but skip chain & host verification 112 | # insecure_skip_verify = false 113 | 114 | ## Amount of time allowed to complete the HTTP request 115 | timeout = "2m" 116 | 117 | ## List of success status codes 118 | success_status_codes = [200] 119 | 120 | ## Data format to consume. 121 | ## Each data format has its own unique set of configuration options, read 122 | ## more about them here: 123 | ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md 124 | data_format = "json" 125 | 126 | ## When strict is true and a JSON array is being parsed, all objects within the 127 | ## array must be valid 128 | json_strict = true 129 | 130 | ## Query is a GJSON path that specifies a specific chunk of JSON to be 131 | ## parsed, if not specified the whole document will be parsed. 132 | ## 133 | ## GJSON query paths are described here: 134 | ## https://github.com/tidwall/gjson/tree/v1.3.0#path-syntax 135 | json_query = "" 136 | 137 | ## Tag keys is an array of keys that should be added as tags. Matching keys 138 | ## are no longer saved as fields. Supports wildcard glob matching. 139 | tag_keys = [ 140 | "name", 141 | "network" 142 | ] 143 | 144 | ## Array of glob pattern strings or booleans keys that should be added as string fields. 145 | json_string_fields = [ 146 | "client-address", 147 | "node-address", 148 | "status_software_sovrin", 149 | "status_software_indy-node", 150 | ##Internal Node Ports and IPs 151 | "response_result_data_Node_info_Node_ip", 152 | "response_result_data_Node_info_Node_port", 153 | "response_result_data_Node_info_Client_ip", 154 | "response_result_data_Node_info_Client_port", 155 | "response_result_data_Node_info_Node_protocol", 156 | 157 | "status_ok", 158 | "response_result_data_Node_info_Mode", 159 | "response_result_data_Node_info_BLS_key", 160 | "response_result_data_Hardware_HDD_used_by_node", 161 | "response_result_data_Pool_info_Read_only", 162 | "response_result_data_Pool_info_Suspicious_nodes", 163 | "response_result_data_Pool_info_Quorums", 164 | "response_result_data_Software_OS_version", 165 | 166 | ##Doesn't Display 167 | "response_result_data_Node_info_verkey", 168 | "response_result_data_Node_info_Catchup_status_Ledger_statuses", 169 | "response_result_data_Node_info_Catchup_status_Waiting_consistency_proof_msgs", 170 | "response_result_data_Node_info_Catchup_status_Last_txn_3PC_keys", 171 | "response_result_data_Node_info_Committed_ledger_root_hashes", 172 | "response_result_data_Node_info_Committed_state_root_hashes", 173 | "response_result_data_Node_info_Uncommitted_state_root_hashes", 174 | "response_result_data_Node_info_did", 175 | "response_result_data_Extractions_indy-node-status", 176 | "response_result_data_Extractions_node-control-status", 177 | "response_result_data_Extractions_upgrade_log", 178 | "response_result_data_Extractions_journalctl_exceptions", 179 | ##Not sure if Reachable Nodes is a string or not 180 | "response_result_data_Pool_info_Reachable_nodes", 181 | "response_result_data_Pool_info_Unreachable_nodes", 182 | "response_result_data_Software_Installed_packages", 183 | "response_result_data_Software_Indy_packages" 184 | ] 185 | 186 | ## Name key is the key to use as the measurement name. 187 | # json_name_key = "name" 188 | 189 | ## Time key is the key containing the time that should be used to create the 190 | ## metric. 191 | json_time_key = "status_timestamp" 192 | 193 | ## Time format is the time layout that should be used to interpret the json_time_key. 194 | ## The time must be `unix`, `unix_ms`, `unix_us`, `unix_ns`, or a time in the 195 | ## "reference time". To define a different format, arrange the values from 196 | ## the "reference time" in the example to match the format you will be 197 | ## using. For more information on the "reference time", visit 198 | ## https://golang.org/pkg/time/#Time.Format 199 | ## ex: json_time_format = "Mon Jan 2 15:04:05 -0700 MST 2006" 200 | ## json_time_format = "2006-01-02T15:04:05Z07:00" 201 | ## json_time_format = "01/02/2006 15:04:05" 202 | ## json_time_format = "unix" 203 | ## json_time_format = "unix_ms" 204 | json_time_format = "unix" 205 | 206 | ## Timezone allows you to provide an override for timestamps that 207 | ## don't already include an offset 208 | ## e.g. 04/06/2016 12:41:45 209 | ## 210 | ## Default: "" which renders UTC 211 | ## Options are as follows: 212 | ## 1. Local -- interpret based on machine localtime 213 | ## 2. "America/New_York" -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones 214 | ## 3. UTC -- or blank/unspecified, will return timestamp in UTC 215 | json_timezone = "" 216 | 217 | 218 | [[inputs.http]] 219 | ## One or more URLs from which to read formatted metrics 220 | urls = [ 221 | "http://indy-node-monitor:8080/networks/ssn" 222 | ] 223 | name_override = "node" 224 | 225 | ## HTTP method 226 | method = "GET" 227 | 228 | ## Optional HTTP headers 229 | headers = {"seed" = "${SSN_SEED}"} 230 | 231 | ## HTTP entity-body to send with POST/PUT requests. 232 | # body = "" 233 | 234 | ## HTTP Content-Encoding for write request body, can be set to "gzip" to 235 | ## compress body or "identity" to apply no encoding. 236 | # content_encoding = "identity" 237 | 238 | ## Optional file with Bearer token 239 | ## file content is added as an Authorization header 240 | # bearer_token = "/path/to/file" 241 | 242 | ## Optional HTTP Basic Auth Credentials 243 | # username = "username" 244 | # password = "pa$$word" 245 | 246 | ## OAuth2 Client Credentials. The options 'client_id', 'client_secret', and 'token_url' are required to use OAuth2. 247 | # client_id = "clientid" 248 | # client_secret = "secret" 249 | # token_url = "https://indentityprovider/oauth2/v1/token" 250 | # scopes = ["urn:opc:idm:__myscopes__"] 251 | 252 | ## HTTP Proxy support 253 | # http_proxy_url = "" 254 | 255 | ## Optional TLS Config 256 | # tls_ca = "/etc/telegraf/ca.pem" 257 | # tls_cert = "/etc/telegraf/cert.pem" 258 | # tls_key = "/etc/telegraf/key.pem" 259 | ## Use TLS but skip chain & host verification 260 | # insecure_skip_verify = false 261 | 262 | ## Amount of time allowed to complete the HTTP request 263 | timeout = "2m" 264 | 265 | ## List of success status codes 266 | success_status_codes = [200] 267 | 268 | ## Data format to consume. 269 | ## Each data format has its own unique set of configuration options, read 270 | ## more about them here: 271 | ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md 272 | data_format = "json" 273 | 274 | ## When strict is true and a JSON array is being parsed, all objects within the 275 | ## array must be valid 276 | json_strict = true 277 | 278 | ## Query is a GJSON path that specifies a specific chunk of JSON to be 279 | ## parsed, if not specified the whole document will be parsed. 280 | ## 281 | ## GJSON query paths are described here: 282 | ## https://github.com/tidwall/gjson/tree/v1.3.0#path-syntax 283 | json_query = "" 284 | 285 | ## Tag keys is an array of keys that should be added as tags. Matching keys 286 | ## are no longer saved as fields. Supports wildcard glob matching. 287 | tag_keys = [ 288 | "name", 289 | "network" 290 | ] 291 | 292 | ## Array of glob pattern strings or booleans keys that should be added as string fields. 293 | json_string_fields = [ 294 | "client-address", 295 | "node-address", 296 | "status_software_sovrin", 297 | "status_software_indy-node", 298 | ##Internal Node Ports and IPs 299 | "response_result_data_Node_info_Node_ip", 300 | "response_result_data_Node_info_Node_port", 301 | "response_result_data_Node_info_Client_ip", 302 | "response_result_data_Node_info_Client_port", 303 | "response_result_data_Node_info_Node_protocol", 304 | 305 | "status_ok", 306 | "response_result_data_Node_info_Mode", 307 | "response_result_data_Node_info_BLS_key", 308 | "response_result_data_Hardware_HDD_used_by_node", 309 | "response_result_data_Pool_info_Read_only", 310 | "response_result_data_Pool_info_Suspicious_nodes", 311 | "response_result_data_Pool_info_Quorums", 312 | "response_result_data_Software_OS_version", 313 | "response_result_data_Node_info_verkey", 314 | "response_result_data_Node_info_did", 315 | 316 | ##Doesn't Display 317 | "response_result_data_Node_info_Catchup_status_Ledger_statuses", 318 | "response_result_data_Node_info_Catchup_status_Waiting_consistency_proof_msgs", 319 | "response_result_data_Node_info_Catchup_status_Last_txn_3PC_keys", 320 | "response_result_data_Node_info_Committed_ledger_root_hashes", 321 | "response_result_data_Node_info_Committed_state_root_hashes", 322 | "response_result_data_Node_info_Uncommitted_state_root_hashes", 323 | "response_result_data_Extractions_indy-node-status", 324 | "response_result_data_Extractions_node-control-status", 325 | "response_result_data_Extractions_upgrade_log", 326 | "response_result_data_Extractions_journalctl_exceptions", 327 | ##Not sure if Reachable Nodes is a string or not 328 | "response_result_data_Pool_info_Reachable_nodes", 329 | "response_result_data_Pool_info_Unreachable_nodes", 330 | "response_result_data_Software_Installed_packages", 331 | "response_result_data_Software_Indy_packages" 332 | ] 333 | 334 | ## Name key is the key to use as the measurement name. 335 | # json_name_key = "name" 336 | 337 | ## Time key is the key containing the time that should be used to create the 338 | ## metric. 339 | json_time_key = "status_timestamp" 340 | 341 | ## Time format is the time layout that should be used to interpret the json_time_key. 342 | ## The time must be `unix`, `unix_ms`, `unix_us`, `unix_ns`, or a time in the 343 | ## "reference time". To define a different format, arrange the values from 344 | ## the "reference time" in the example to match the format you will be 345 | ## using. For more information on the "reference time", visit 346 | ## https://golang.org/pkg/time/#Time.Format 347 | ## ex: json_time_format = "Mon Jan 2 15:04:05 -0700 MST 2006" 348 | ## json_time_format = "2006-01-02T15:04:05Z07:00" 349 | ## json_time_format = "01/02/2006 15:04:05" 350 | ## json_time_format = "unix" 351 | ## json_time_format = "unix_ms" 352 | json_time_format = "unix" 353 | 354 | ## Timezone allows you to provide an override for timestamps that 355 | ## don't already include an offset 356 | ## e.g. 04/06/2016 12:41:45 357 | ## 358 | ## Default: "" which renders UTC 359 | ## Options are as follows: 360 | ## 1. Local -- interpret based on machine localtime 361 | ## 2. "America/New_York" -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones 362 | ## 3. UTC -- or blank/unspecified, will return timestamp in UTC 363 | json_timezone = "" 364 | 365 | [[inputs.http]] 366 | ## One or more URLs from which to read formatted metrics 367 | urls = [ 368 | "http://indy-node-monitor:8080/networks/smn" 369 | ] 370 | name_override = "node" 371 | 372 | ## HTTP method 373 | method = "GET" 374 | 375 | ## Optional HTTP headers 376 | headers = {"seed" = "${SMN_SEED}"} 377 | 378 | ## HTTP entity-body to send with POST/PUT requests. 379 | # body = "" 380 | 381 | ## HTTP Content-Encoding for write request body, can be set to "gzip" to 382 | ## compress body or "identity" to apply no encoding. 383 | # content_encoding = "identity" 384 | 385 | ## Optional file with Bearer token 386 | ## file content is added as an Authorization header 387 | # bearer_token = "/path/to/file" 388 | 389 | ## Optional HTTP Basic Auth Credentials 390 | # username = "username" 391 | # password = "pa$$word" 392 | 393 | ## OAuth2 Client Credentials. The options 'client_id', 'client_secret', and 'token_url' are required to use OAuth2. 394 | # client_id = "clientid" 395 | # client_secret = "secret" 396 | # token_url = "https://indentityprovider/oauth2/v1/token" 397 | # scopes = ["urn:opc:idm:__myscopes__"] 398 | 399 | ## HTTP Proxy support 400 | # http_proxy_url = "" 401 | 402 | ## Optional TLS Config 403 | # tls_ca = "/etc/telegraf/ca.pem" 404 | # tls_cert = "/etc/telegraf/cert.pem" 405 | # tls_key = "/etc/telegraf/key.pem" 406 | ## Use TLS but skip chain & host verification 407 | # insecure_skip_verify = false 408 | 409 | ## Amount of time allowed to complete the HTTP request 410 | timeout = "2m" 411 | 412 | ## List of success status codes 413 | success_status_codes = [200] 414 | 415 | ## Data format to consume. 416 | ## Each data format has its own unique set of configuration options, read 417 | ## more about them here: 418 | ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md 419 | data_format = "json" 420 | 421 | ## When strict is true and a JSON array is being parsed, all objects within the 422 | ## array must be valid 423 | json_strict = true 424 | 425 | ## Query is a GJSON path that specifies a specific chunk of JSON to be 426 | ## parsed, if not specified the whole document will be parsed. 427 | ## 428 | ## GJSON query paths are described here: 429 | ## https://github.com/tidwall/gjson/tree/v1.3.0#path-syntax 430 | json_query = "" 431 | 432 | ## Tag keys is an array of keys that should be added as tags. Matching keys 433 | ## are no longer saved as fields. Supports wildcard glob matching. 434 | tag_keys = [ 435 | "name", 436 | "network" 437 | ] 438 | 439 | ## Array of glob pattern strings or booleans keys that should be added as string fields. 440 | json_string_fields = [ 441 | "client-address", 442 | "node-address", 443 | "status_software_sovrin", 444 | "status_software_indy-node", 445 | ##Internal Node Ports and IPs 446 | "response_result_data_Node_info_Node_ip", 447 | "response_result_data_Node_info_Node_port", 448 | "response_result_data_Node_info_Client_ip", 449 | "response_result_data_Node_info_Client_port", 450 | "response_result_data_Node_info_Node_protocol", 451 | 452 | "status_ok", 453 | "response_result_data_Node_info_Mode", 454 | "response_result_data_Node_info_BLS_key", 455 | "response_result_data_Hardware_HDD_used_by_node", 456 | "response_result_data_Pool_info_Read_only", 457 | "response_result_data_Pool_info_Suspicious_nodes", 458 | "response_result_data_Pool_info_Quorums", 459 | "response_result_data_Software_OS_version", 460 | 461 | ##Doesn't Display 462 | "response_result_data_Node_info_verkey", 463 | "response_result_data_Node_info_Catchup_status_Ledger_statuses", 464 | "response_result_data_Node_info_Catchup_status_Waiting_consistency_proof_msgs", 465 | "response_result_data_Node_info_Catchup_status_Last_txn_3PC_keys", 466 | "response_result_data_Node_info_Committed_ledger_root_hashes", 467 | "response_result_data_Node_info_Committed_state_root_hashes", 468 | "response_result_data_Node_info_Uncommitted_state_root_hashes", 469 | "response_result_data_Node_info_did", 470 | "response_result_data_Extractions_indy-node-status", 471 | "response_result_data_Extractions_node-control-status", 472 | "response_result_data_Extractions_upgrade_log", 473 | "response_result_data_Extractions_journalctl_exceptions", 474 | ##Not sure if Reachable Nodes is a string or not 475 | "response_result_data_Pool_info_Reachable_nodes", 476 | "response_result_data_Pool_info_Unreachable_nodes", 477 | "response_result_data_Software_Installed_packages", 478 | "response_result_data_Software_Indy_packages" 479 | ] 480 | 481 | ## Name key is the key to use as the measurement name. 482 | # json_name_key = "name" 483 | 484 | ## Time key is the key containing the time that should be used to create the 485 | ## metric. 486 | json_time_key = "status_timestamp" 487 | 488 | ## Time format is the time layout that should be used to interpret the json_time_key. 489 | ## The time must be `unix`, `unix_ms`, `unix_us`, `unix_ns`, or a time in the 490 | ## "reference time". To define a different format, arrange the values from 491 | ## the "reference time" in the example to match the format you will be 492 | ## using. For more information on the "reference time", visit 493 | ## https://golang.org/pkg/time/#Time.Format 494 | ## ex: json_time_format = "Mon Jan 2 15:04:05 -0700 MST 2006" 495 | ## json_time_format = "2006-01-02T15:04:05Z07:00" 496 | ## json_time_format = "01/02/2006 15:04:05" 497 | ## json_time_format = "unix" 498 | ## json_time_format = "unix_ms" 499 | json_time_format = "unix" 500 | 501 | ## Timezone allows you to provide an override for timestamps that 502 | ## don't already include an offset 503 | ## e.g. 04/06/2016 12:41:45 504 | ## 505 | ## Default: "" which renders UTC 506 | ## Options are as follows: 507 | ## 1. Local -- interpret based on machine localtime 508 | ## 2. "America/New_York" -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones 509 | ## 3. UTC -- or blank/unspecified, will return timestamp in UTC 510 | json_timezone = "" 511 | --------------------------------------------------------------------------------