├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ ├── release.yml │ └── test_pr.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE.md ├── MANIFEST.in ├── Makefile ├── README.md ├── assets └── images │ ├── Cygives-darkmode.svg │ ├── Cygives-lightmode.svg │ ├── infrastructure.png │ ├── raven-dark-mode.png │ └── raven-light-mode.png ├── deployment ├── docker-compose.yml ├── test.docker-compose.yml └── test.dockerfile ├── dev-requirements.in ├── dev-requirements.txt ├── docs ├── Codesee Injections │ └── README.md ├── Issue Injections │ ├── README.md │ └── issue_injection.png ├── Multi Prerequisite Exploits │ ├── Exploiting Download Action in Workflows with Missing path Parameter.md │ └── README.md ├── Pull Request Injections │ └── README.md ├── README.md └── templates │ └── issues.md ├── library ├── query_body_context_injection.yml ├── query_build_artifact_leaks_the_github_token.yml ├── query_checkout_on_issue.yml ├── query_codesee_injection.yml ├── query_email_context_injection.yml ├── query_enterprise_github_server.yml ├── query_injectable_context_composite_action.yml ├── query_injectable_input_composite_action.yml ├── query_label_context_injection.yml ├── query_message_context_injection.yml ├── query_priv_esc_workflow_run.yml ├── query_pull_request_target_injection.yml ├── query_ref_context_injection.yml ├── query_self_hosted_workflow.yml ├── query_title_context_injection.yml ├── query_unpinnable_action.yml └── query_usage_of_outdated_node.yml ├── main.py ├── requirements.in ├── requirements.txt ├── setup.py ├── src ├── __init__.py ├── cmdline.py ├── common │ ├── __init__.py │ ├── ignore_warnings.py │ └── utils.py ├── config │ ├── __init__.py │ └── config.py ├── downloader │ ├── __init__.py │ ├── download.py │ ├── gh_api.py │ └── utils.py ├── indexer │ ├── __init__.py │ └── index.py ├── logger │ ├── __init__.py │ └── log.py ├── queries │ └── __init__.py ├── reporter │ ├── __init__.py │ ├── report.py │ └── slack_reporter.py ├── storage │ ├── __init__.py │ ├── neo4j_graph.py │ ├── neo4j_utils.py │ ├── redis_connection.py │ └── redis_utils.py └── workflow_components │ ├── __init__.py │ ├── composite_action.py │ ├── dependency.py │ ├── parsing_utils.py │ └── workflow.py └── tests ├── __init__.py ├── integration ├── __init__.py ├── integration_consts.py ├── structures_json │ ├── demo-index.json │ ├── integration-1.json │ └── reusable-workflows.json └── test_graph_structures.py ├── tests_init.py ├── unit ├── __init__.py ├── test_composite_action.py ├── test_dependency.py ├── test_parsing_utils.py ├── test_report.py ├── test_utils.py └── test_workflow.py └── utils.py /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help RAVEN improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Version [e.g. 22] 29 | 30 | **Additional context** 31 | Add any other context about the problem here. 32 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: release 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | test_release: 9 | permissions: 10 | contents: read 11 | name: Test Release 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: cycodelabs/cimon-action@v0 15 | with: 16 | # Turn prevent once policy is verified 17 | # prevent: true 18 | client-id: ${{ secrets.CIMON_CLIENT_ID }} 19 | secret: ${{ secrets.CIMON_SECRET }} 20 | fail-on-error: true 21 | 22 | - name: Checkout 23 | uses: actions/checkout@v4 24 | 25 | - name: Test Organization 26 | env: 27 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 28 | run: | 29 | make test-build 30 | 31 | publish: 32 | name: Release to PyPI 33 | runs-on: ubuntu-latest 34 | needs: test_release 35 | permissions: 36 | id-token: write 37 | if: ${{ startsWith(github.ref_name, 'v') }} 38 | steps: 39 | - uses: cycodelabs/cimon-action@v0 40 | with: 41 | # Turn prevent once policy is verified 42 | # prevent: true 43 | client-id: ${{ secrets.CIMON_CLIENT_ID }} 44 | secret: ${{ secrets.CIMON_SECRET }} 45 | fail-on-error: true 46 | 47 | - name: Checkout 48 | uses: actions/checkout@v4 49 | with: 50 | ref: ${{ github.ref }} 51 | 52 | - name: Set up Python 53 | uses: actions/setup-python@v5 54 | with: 55 | python-version: '3.12' 56 | 57 | - name: Install dependencies 58 | run: | 59 | python -m pip install --upgrade pip 60 | pip install build 61 | 62 | - name: Build package 63 | env: 64 | RAVEN_VERSION: ${{ github.ref_name }} 65 | run: python -m build 66 | 67 | - name: Publish package 68 | uses: pypa/gh-action-pypi-publish@release/v1 -------------------------------------------------------------------------------- /.github/workflows/test_pr.yml: -------------------------------------------------------------------------------- 1 | name: Test PR 2 | 3 | on: [pull_request] 4 | 5 | permissions: 6 | contents: read 7 | 8 | jobs: 9 | python-style: 10 | name: Style-check and lint Python files 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: cycodelabs/cimon-action@v0 14 | with: 15 | prevent: true 16 | client-id: ${{ secrets.CIMON_CLIENT_ID }} 17 | secret: ${{ secrets.CIMON_SECRET }} 18 | fail-on-error: true 19 | allowed-hosts: > 20 | files.pythonhosted.org 21 | pypi.org 22 | 23 | - name: Checkout 24 | uses: actions/checkout@v4 25 | 26 | - name: Install dependencies 27 | run: python -m pip install black flake8 28 | 29 | - name: Black 30 | run: python -m black --diff --check . 31 | 32 | # A job that runs integration tests in an isolated environment against 33 | # a predefined organization: RavenIntegrationTests 34 | test_raven: 35 | runs-on: ubuntu-latest 36 | steps: 37 | - uses: cycodelabs/cimon-action@v0 38 | with: 39 | # Turn prevent once cimon docker compose bug is fixed 40 | # prevent: true 41 | client-id: ${{ secrets.CIMON_CLIENT_ID }} 42 | secret: ${{ secrets.CIMON_SECRET }} 43 | fail-on-error: true 44 | allowed-hosts: > 45 | auth.docker.io 46 | pypi.org 47 | 48 | - name: Checkout 49 | uses: actions/checkout@v4 50 | 51 | - name: Test Organization 52 | env: 53 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 54 | run: | 55 | make test-build 56 | 57 | # A job for testing the setup process and unit tests of RAVEN against 58 | # different versions of Python 59 | test_raven_package: 60 | runs-on: ubuntu-latest 61 | 62 | strategy: 63 | matrix: 64 | python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] 65 | 66 | steps: 67 | - uses: cycodelabs/cimon-action@v0 68 | with: 69 | # Turn prevent once cimon docker compose bug is fixed 70 | # prevent: true 71 | client-id: ${{ secrets.CIMON_CLIENT_ID }} 72 | secret: ${{ secrets.CIMON_SECRET }} 73 | fail-on-error: true 74 | allowed-hosts: > 75 | raw.githubusercontent.com 76 | files.pythonhosted.org 77 | pypi.org 78 | 79 | - name: Checkout 80 | uses: actions/checkout@v4 81 | 82 | - name: Set up Python ${{ matrix.python-version }} 83 | uses: actions/setup-python@v5 84 | with: 85 | python-version: ${{ matrix.python-version }} 86 | 87 | - name: Build Package 88 | run: python -m pip install -r dev-requirements.txt . 89 | 90 | - name: Setup environment 91 | run: make setup 92 | 93 | - name: Test Raven 94 | env: 95 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 96 | run: | 97 | raven download account --token $GITHUB_TOKEN --account-name RavenIntegrationTests 98 | raven index 99 | raven report --format json | jq > /dev/null 100 | 101 | - name: Run Unit Tests 102 | run: pytest -v tests/unit -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /data 2 | /data_backup 3 | .vscode/ 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | pip-wheel-metadata/ 28 | share/python-wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | MANIFEST 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .nox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | *.py,cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # Local debugging - Run tests outside containers 89 | temp_test_raven.py 90 | temp/ 91 | 92 | # pyenv 93 | .python-version 94 | 95 | # pipenv 96 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 97 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 98 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 99 | # install all needed dependencies. 100 | #Pipfile.lock 101 | 102 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 103 | __pypackages__/ 104 | 105 | # Celery stuff 106 | celerybeat-schedule 107 | celerybeat.pid 108 | 109 | # SageMath parsed files 110 | *.sage.py 111 | 112 | # Environments 113 | .env 114 | .venv 115 | env/ 116 | venv/ 117 | ENV/ 118 | env.bak/ 119 | venv.bak/ 120 | 121 | # Spyder project settings 122 | .spyderproject 123 | .spyproject 124 | 125 | # Rope project settings 126 | .ropeproject 127 | 128 | # mkdocs documentation 129 | /site 130 | 131 | # Mac 132 | .DS_Store 133 | 134 | # mypy 135 | .mypy_cache/ 136 | .dmypy.json 137 | dmypy.json 138 | 139 | # Pyre type checker 140 | .pyre/ 141 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | We encourage contributions from the community to help improve our tooling and research. We manage contributions primarily through GitHub Issues and Pull Requests. 4 | 5 | If you have a feature request, bug report, or any improvement suggestions, please create an issue to discuss it. To start contributing, you may check [good first issue](https://github.com/CycodeLabs/Raven/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) label to get started quickly into the code base. 6 | 7 | To contribute code changes, fork our repository, make your modifications, and then submit a pull request. 8 | 9 | ## Development 10 | 11 | To prepare a development environment, follow these instructions: 12 | 13 | **Step 1**: Clone the project 14 | 15 | ```bash 16 | git clone https://github.com/CycodeLabs/raven.git 17 | cd raven 18 | ``` 19 | 20 | **Step 2**: Create a virtual environment and install requirements 21 | 22 | ```bash 23 | python3 -m venv .venv 24 | source .venv/bin/activate 25 | pip3 install -r requirements.txt 26 | ``` 27 | 28 | **Step 3**: Make code modifications 29 | 30 | **Step 4**: Setup the Redis server and the Neo4j database 31 | 32 | ```bash 33 | make setup 34 | ``` 35 | 36 | **Step 5**: Run Raven 37 | 38 | ```bash 39 | python3 main.py -h 40 | ``` 41 | 42 | **Step 6**: Test Raven 43 | 44 | ```bash 45 | make test-build 46 | ``` 47 | 48 | Feel free to reach out to the development team through research@cycode.com. We appreciate your collaboration and look forward to your valuable contributions! 49 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md LICENSE requirements.txt main.py 2 | recursive-include tests *.py 3 | recursive-include src * -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | setup: 2 | @echo "Building Services..." 3 | docker compose -p raven --file deployment/docker-compose.yml up -d 4 | 5 | clean-setup: 6 | @echo "Stopping Services..." 7 | docker compose -p raven --file deployment/docker-compose.yml down 8 | 9 | stop: 10 | @echo "Stopping Services..." 11 | docker compose -p raven --file deployment/docker-compose.yml down 12 | 13 | test-build: 14 | @echo "Running Tests in isolated environment..." 15 | docker compose -p test-raven --file deployment/test.docker-compose.yml up --force-recreate --build --abort-on-container-exit 16 | 17 | test-run: 18 | @echo "DO NOT USE DIRECTLY; PLEASE USE: make test-build" 19 | @echo "Running Tests..." 20 | @pytest -v tests -------------------------------------------------------------------------------- /assets/images/infrastructure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/assets/images/infrastructure.png -------------------------------------------------------------------------------- /assets/images/raven-dark-mode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/assets/images/raven-dark-mode.png -------------------------------------------------------------------------------- /assets/images/raven-light-mode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/assets/images/raven-light-mode.png -------------------------------------------------------------------------------- /deployment/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | services: 4 | neo4j: 5 | image: neo4j:5.26.2 6 | container_name: raven-neo4j 7 | environment: 8 | NEO4J_AUTH: neo4j/123456789 # Change 'password' to your desired Neo4j password 9 | ports: 10 | - "7474:7474" 11 | - "7687:7687" 12 | healthcheck: 13 | test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:7474"] 14 | interval: 10s 15 | timeout: 5s 16 | retries: 3 17 | volumes: 18 | - raven-neo4j:/data 19 | 20 | redis: 21 | image: redis:7.4.2 22 | container_name: raven-redis 23 | depends_on: 24 | neo4j: 25 | condition: service_healthy 26 | ports: 27 | - "6379:6379" 28 | healthcheck: 29 | test: ["CMD", "redis-cli", "ping"] 30 | interval: 10s 31 | timeout: 5s 32 | retries: 3 33 | volumes: 34 | - raven-redis:/data 35 | 36 | volumes: 37 | raven-redis: 38 | raven-neo4j: -------------------------------------------------------------------------------- /deployment/test.docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | services: 4 | neo4j-test: 5 | image: neo4j:5.26.2 6 | container_name: raven-neo4j-test 7 | environment: 8 | NEO4J_AUTH: neo4j/123456789 # Change 'password' to your desired Neo4j password 9 | ports: 10 | - "7474:7474" 11 | - "7687:7687" 12 | healthcheck: 13 | test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:7474"] 14 | interval: 10s 15 | timeout: 5s 16 | retries: 3 17 | 18 | redis-test: 19 | image: redis:7.4.2 20 | container_name: raven-redis-test 21 | ports: 22 | - "6379:6379" 23 | healthcheck: 24 | test: ["CMD", "redis-cli", "ping"] 25 | interval: 10s 26 | timeout: 5s 27 | retries: 3 28 | 29 | raven-test: 30 | build: 31 | dockerfile: ./deployment/test.dockerfile 32 | context: .. 33 | depends_on: 34 | neo4j-test: 35 | condition: service_healthy 36 | redis-test: 37 | condition: service_healthy 38 | environment: 39 | - GITHUB_TOKEN=$GITHUB_TOKEN 40 | container_name: raven-engine-test 41 | -------------------------------------------------------------------------------- /deployment/test.dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9.21 2 | 3 | # Set the working directory 4 | RUN mkdir -p /raven 5 | RUN mkdir -p /raven/src 6 | RUN mkdir -p /raven/tests 7 | 8 | # Copy the current directory contents into the container at /raven 9 | WORKDIR /raven 10 | COPY Makefile dev-requirements.txt /raven/ 11 | COPY src /raven/src 12 | COPY library /raven/library 13 | COPY tests /raven/tests 14 | 15 | # Install any needed packages specified in requirements.txt 16 | RUN pip3 install -r dev-requirements.txt 17 | 18 | # Run RAVEN tests 19 | CMD ["make", "test-run"] -------------------------------------------------------------------------------- /dev-requirements.in: -------------------------------------------------------------------------------- 1 | -r requirements.in 2 | pytest -------------------------------------------------------------------------------- /dev-requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.13 3 | # by the following command: 4 | # 5 | # pip-compile --no-annotate --output-file=dev-requirements.txt dev-requirements.in 6 | # 7 | certifi==2025.1.31 8 | charset-normalizer==3.4.1 9 | colorama==0.4.6 10 | idna==3.10 11 | iniconfig==2.0.0 12 | interchange==2021.0.4 13 | loguru==0.7.3 14 | monotonic==1.6 15 | packaging==24.2 16 | pansi==2024.11.0 17 | pillow==11.1.0 18 | pluggy==1.5.0 19 | py2neo==2021.2.4 20 | pygments==2.19.1 21 | pytest==8.3.4 22 | pytz==2025.1 23 | pyyaml==6.0.2 24 | redis==5.2.1 25 | requests==2.32.3 26 | six==1.17.0 27 | slack-sdk==3.34.0 28 | tqdm==4.67.1 29 | urllib3==2.3.0 30 | -------------------------------------------------------------------------------- /docs/Codesee Injections/README.md: -------------------------------------------------------------------------------- 1 | # CodeSee Injections 2 | 3 | ## Overview 4 | CodeSee is a startup company providing methods to visualize your codebase and effective tooling for reviewing and collaboration. Similar to other developer-centric products (e.g., CodeCov), during CodeSee integration, it creates a new Github Actions workflow that embeds its capabilities for every pull request, allowing developers efficiently review the added code. Cycode discovered a branch name injection vulnerability in `codesee-map-action` that may allow Remote Code Execution (RCE) on the pipeline. 5 | 6 | ## Description 7 | Calling `Codesee-io/codesee-map-action` action up to the vulnerable versions with branch name containing malicious code injection payload such as `a";ls;"` allowed to inject code to CodeSee NPM package. Malicious threat actors can create a malicious script file that would be fetched together with a forked pull request that will be executed inside the pipeline. 8 | 9 | ## Remediation 10 | This issue was fixed in version `0.376.0`. CodeSee’s internal review discovered this injection vulnerability entered their system as a result of a logical bug in the code used to escape the user-supplied branch name. In addition to repairing that logic directly, they replaced all command executions in their code analysis to run without a shell, ensuring no subtle escaping logic was required. As the CLI is written in node, this involved replacing calls to child_process.exec with child_process.execFile. 11 | 12 | To further mitigate any other future vulnerabilities, CodeSee introduced the following permissions in the workflow to minimize that: 13 | ``` yaml 14 | permissions: read-all 15 | ``` 16 | Even if such vulnerabilities have been found, the GITHUB_TOKEN won’t have sufficient permissions to perform any malicious activity. 17 | 18 | ## References 19 | - [Cycode Collaborates with CodeSee to Secure the Pipelines of Thousands of Open-Source Projects](https://cycode.com/blog/cycode-secures-thousands-of-open-source-projects/) 20 | 21 | ## Real-World Examples 22 | ### freeCodeCamp/freeCodeCamp - 374K ⭐️ 23 | 24 | * **Description**: This workflow used `Codesee-io/codesee-map-action` latest version with `mapUpload` parameter. 25 | * **Commit Link**: [0871341c9cbf96ab455bc3e0bce636e2ef2a2be2](https://github.com/freeCodeCamp/freeCodeCamp/commit/0871341c9cbf96ab455bc3e0bce636e2ef2a2be2) 26 | * **Remediation**: Removed usage of CodeSee map action. 27 | 28 | ### slimtoolkit/slim - 17.3K ⭐️ 29 | 30 | * **Description**: This workflow used `Codesee-io/codesee-map-action` latest version with `mapUpload` parameter. 31 | * **Commit Link**: [bb846649cb3dfaad83c3b2ccbee552786c7dc635](https://github.com/slimtoolkit/slim/commit/bb846649cb3dfaad83c3b2ccbee552786c7dc635) 32 | * **Remediation**: Removed usage of CodeSee map action. 33 | 34 | ### statelyai/xstate - 24.8K ⭐️ 35 | 36 | * **Description**: This workflow used `Codesee-io/codesee-map-action` latest version with `mapUpload` parameter. 37 | * **Commit Link**: N/A 38 | * **Remediation**: Updated Through CodeSee package fix. 39 | 40 | ## Detections 41 | 42 | ### CodeSee Usage 43 | Initially, verify if the workflow hasn't altered the default workflow permissions, then confirm if the workflow uses the `Codesee-io/codesee-map-action` action. Then, verify manually that the workflow is using the vulnerable versions. 44 | 45 | ``` cypher 46 | MATCH (w:Workflow) 47 | WHERE 48 | w.permissions is null AND 49 | EXISTS { 50 | (w)-[*]->(ca:CompositeAction) 51 | WHERE ( 52 | ca.path = "Codesee-io/codesee-map-action" 53 | ) 54 | } 55 | RETURN DISTINCT w.path, w.url; 56 | ``` -------------------------------------------------------------------------------- /docs/Issue Injections/README.md: -------------------------------------------------------------------------------- 1 | # Issue Injections 2 | 3 | 4 | ## Overview 5 | GitHub Actions can be initiated based on an issue event. The workflow can access and utilize the information present in the issue's title or body, including printing it or performing other actions within the workflow. However, it's important to be aware that malicious actors might attempt to exploit this functionality by inserting harmful payloads into the issue's title or body. This could potentially lead to the execution of malicious code within the workflow. 6 | 7 | 8 | ## Description 9 | This issue arises when GitHub Actions workflows, triggered by issue events, process the issue details, such as the title or body, without proper input validation or sanitization. 10 | 11 | Let’s take the following workflow as an example: 12 | ``` yaml 13 | name: Issues Injections 14 | 15 | on: 16 | issues: 17 | types: [opened] 18 | 19 | jobs: 20 | print_issue_title: 21 | runs-on: ubuntu-latest 22 | 23 | name: Print issue title 24 | steps: 25 | - run: echo "${{github.event.issue.title}}" 26 | ``` 27 | 28 | Threat actors can exploit this by injecting malicious payloads into the issue title. By injecting malicious code to the workflow, an attacker can exfiltrate the pipeline secrets or, with the proper permissions, use the `GITHUB_TOKEN` environment variable to push new code to the repository. 29 | 30 | ## Remediation 31 | * Avoid directly executing or interpreting user-supplied data as code or command arguments. 32 | * Always load user input to environment variables first. 33 | 34 | ## References 35 | - [Cycode Discovers Vulnerabilities in CI/CD Pipelines of Popular Open-Source Projects](https://cycode.com/blog/github-actions-vulnerabilities/) 36 | 37 | ## Real-World Examples 38 | 39 | 40 | ### fauna/faunadb-js - 694 ⭐️ 41 | * **Description**: This workflow runs when an issue is being opened. Lines 26 and 27 use the issue body and title in an insecure manner, at `create-jira-tickets.yml`. 42 | * **Fix Commit Link**: [ee6f53f9c985bde41976743530e3846dee058587](https://github.com/fauna/faunadb-js/commit/ee6f53f9c985bde41976743530e3846dee058587) 43 | * **Remediation**: Removed the workflow. 44 | 45 | ### wireapp/wire-ios - 3.2K ⭐️ 46 | * **Description**: This workflow runs when an issue is being opened. Line 15 use the issue title at `issue.yml`. 47 | * **Fix Commit Link**: [650fb1aa51a1c843c10bc89a11732b45a6345b00](https://github.com/withastro/astro/commit/650fb1aa51a1c843c10bc89a11732b45a6345b00) 48 | * **Remediation**: Removed direct call to `${{ github.event.issue.title }}"`. 49 | 50 | ### withastro/astro - 35.2K ⭐️ 51 | * **Description**: This workflow runs when an issue is being opened. Line 26 use the issue title at `reviewBot.yml`. 52 | * **Fix Commit Link**: [9d39d6c93b5a58a0bc8c1aba10e0d67756359630](https://github.com/wireapp/wire-ios/commit/9d39d6c93b5a58a0bc8c1aba10e0d67756359630) 53 | * **Remediation**: Replaced direct call to `${{ github.event.issue.title }}"` with environment variable `ISSUE_TITLE: ${{ github.event.issue.title }}`. 54 | 55 | ### kiegroup/kogito-runtimes - 458 ⭐️ 56 | * **Description**: This workflow runs when an issue is being opened. Line 11 use the issue title at `issues.yml`. 57 | * **Fix Commit Link**: [53c18e5372e5306e0aa580f201f820b80359ad11](https://github.com/kiegroup/kogito-runtimes/commit/53c18e5372e5306e0aa580f201f820b80359ad11) 58 | * **Remediation**: Removed direct call to `${{ github.event.issue.title }}"`. 59 | 60 | 61 | ### Ombi-app/Ombi - 3.4k ⭐️ 62 | * **Description**: This workflow runs when an issue is being opened. Line 13 use the issue body at `issue-check.yml`. 63 | * **Fix Commit Link**: [5cc0d7727d72fe1fee8a3f6c3874d44a5b785de4](https://github.com/Ombi-app/Ombi/commit/5cc0d7727d72fe1fee8a3f6c3874d44a5b785de4) 64 | * **Remediation**: Removed direct call to `${{ github.event.issue.title }}"`. 65 | 66 | 67 | ## Detections 68 | 69 | ### Issue + Command Injection 70 | This detection identifies workflows triggered by events like issue comments, issues that depend on specific GitHub issue-related data. 71 | ``` cypher 72 | MATCH (w:Workflow)-[*]->(d:StepCodeDependency) 73 | WHERE 74 | ( 75 | "issue_comment" in w.trigger OR 76 | "issues" in w.trigger 77 | ) AND 78 | ( 79 | d.param IN ["github.event.issue.title", "github.event.issue.body"] 80 | ) 81 | RETURN DISTINCT w.path, w.url; 82 | ``` 83 | 84 | image 85 | 86 | 87 | ### Issue Comment + Checkout 88 | This detection identifies workflows triggered by issue events where a job involves checking out code from a repository ("actions/checkout") on issue event. 89 | 90 | ``` cypher 91 | MATCH (w:Workflow)-[*]->(j:Job) 92 | WHERE 93 | ( 94 | "issue_comment" in w.trigger OR 95 | "issues" in w.trigger 96 | ) AND 97 | EXISTS { 98 | (j)-->(s:Step)-->(ca:CompositeAction) 99 | WHERE ( 100 | ca.path = "actions/checkout" AND 101 | ANY(param IN s.with WHERE 102 | ( 103 | param STARTS WITH "ref" and 104 | ( 105 | param contains "head.sha" OR 106 | param contains "head.ref" 107 | ) 108 | ) 109 | ) 110 | ) 111 | } 112 | RETURN w.path, w.url 113 | ``` -------------------------------------------------------------------------------- /docs/Issue Injections/issue_injection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/docs/Issue Injections/issue_injection.png -------------------------------------------------------------------------------- /docs/Multi Prerequisite Exploits/Exploiting Download Action in Workflows with Missing path Parameter.md: -------------------------------------------------------------------------------- 1 | # Exploiting Download Action in Workflows with Missing "path" Parameter 2 | 3 | > **Note**: Read more about this vulnerability here [Cycode Blog Post](https://cycode.com/blog/analyzing-the-vulnerability-that-could-have-compromised-microsoft-365-users/). Read the blog post to learn more. 4 | 5 | ## The Problem 6 | 7 | Due to the limitations in native GitHub actions for sharing artifacts across different workflows, developers often resort to custom actions and APIs. One such widely-used custom action is `dawidd6/action-download-artifact`, which currently has over 12,000 dependent repositories. 8 | 9 | This custom action and the underlying GitHub API for downloading artifacts didn't discriminate between artifacts created by the base repository and those from a forked repository. This oversight could lead to a workflow to download and process poisoned artifacts, introducing vulnerabilities into the software supply chain. 10 | 11 | Without specifying the `path` parameter to indicate where the action should extract the artifact, the action will default to extracting it into the current working directory. This can overwrite original files in the repository and may lead to malicious activity on the build system, posing a risk of a significant supply chain attack. 12 | 13 | Github updated its API for GetArtifact and ListArtifacts endpoints to provide more information to help developers differentiate between trusted and untrusted artifacts. 14 | 15 | 16 | ### In Simple Terms 17 | 18 | An attacker can fork a repository, create a malicious artifact, and then inject this artifact into the original repository's workflow. The workflow who downloads the artifact, in turn, would unknowingly use the tainted artifact. 19 | 20 | ## References 21 | - [From Default to Secure: Analyzing the Vulnerability that Could Have Compromised Microsoft 365 Users](https://cycode.com/blog/analyzing-the-vulnerability-that-could-have-compromised-microsoft-365-users/). 22 | - [Novel Pipeline Vulnerability Discovered; Rust Found Vulnerable](https://www.legitsecurity.com/blog/artifact-poisoning-vulnerability-discovered-in-rust). 23 | 24 | ## Real-World Examples 25 | 26 | ### tiangolo/fastapi - 64k ⭐️ 27 | * **Description**: Unsafe handling of artifacts download and extract the artifact into the current working directory. 28 | * **Fix Commit Link**: [9efab1bd96ef061edf1753626573a0a2be1eef09](github.com/tiangolo/fastapi/commit/9efab1bd96ef061edf1753626573a0a2be1eef09) 29 | * **Remediation**: Created a specific directory for uploading and extracting artifacts, `site`. 30 | 31 | ### microsoft/fluentui - 16k ⭐️ 32 | * **Description**: Unsafe handling of artifacts within the build process, leading to code execution on the build system and a potentially significant supply chain attack that would deliver malware to all Microsoft 365 users. 33 | * **Fix Commit Link**: [2ea6195152131766641311ee5604e746b578d8e7](https://github.com/microsoft/fluentui/commit/2ea6195152131766641311ee5604e746b578d8e7) 34 | * **Remediation**: Removed the workflow. 35 | 36 | 37 | ### tiangolo/sqlmodel - 11k ⭐️ 38 | * **Description**: Unsafe handling of artifacts download and extract the artifact into the current working directory. 39 | * **Fix Commit Link**: [2ea6195152131766641311ee5604e746b578d8e7](https://github.com/tiangolo/sqlmodel/commit/cf36b2d9baccf527bc61071850f102e2cd8bf6bf) 40 | * **Remediation**: Created a specific directory for uploading and extracting artifact, `site`. 41 | 42 | ## Query 43 | 44 | ``` cypher 45 | MATCH p=(w1:Workflow)-->(w2:Workflow)-[*]->(s:Step)-->(ca:CompositeAction) 46 | WHERE ( 47 | "pull_request" in w1.trigger OR 48 | "pull_request_target" in w1.trigger OR 49 | "issue_comment" in w1.trigger OR 50 | "issues" in w1.trigger 51 | ) AND ( 52 | ca.path = "dawidd6/action-download-artifact" 53 | ) AND ( 54 | not ANY(param IN s.with WHERE 55 | ( 56 | param contains "path" 57 | ) 58 | ) 59 | ) AND 60 | EXISTS { 61 | (w2)-[*]->(caTmp:CompositeAction) 62 | WHERE caTmp.path = "actions/checkout" 63 | } 64 | RETURN DISTINCT w1.url, w2.url; 65 | ``` 66 | 67 | > **Note**: According to the release documentation, in version 2.28.0 of `dawidd6/action-download-artifact`, the action will ignore forks when downloading artifacts. -------------------------------------------------------------------------------- /docs/Multi Prerequisite Exploits/README.md: -------------------------------------------------------------------------------- 1 | # Multi-Prerequisite Exploits 2 | 3 | ## Overview 4 | 5 | This section is dedicated to exploring and documenting exploits that require multiple conditions to be met for the vulnerability to be triggered. These are complex vulnerabilities that may involve a series of specific events, configurations, or sequences of actions to exploit. 6 | 7 | Understanding these types of exploits can be essential for comprehensive security assessments, as they can often slip through the cracks of simpler vulnerability scans. 8 | 9 | ## Exploits Documented Here 10 | 11 | 12 | 13 | Stay tuned for more in-depth content and real-world examples.# Multi-Prerequisite Exploits 14 | -------------------------------------------------------------------------------- /docs/Pull Request Injections/README.md: -------------------------------------------------------------------------------- 1 | # Vulnerability Name 2 | 3 | ## Overview 4 | Pull Requests (PRs) are a cornerstone of collaborative coding but can become a security loophole when integrated with automated workflows like GitHub Actions. Without proper input validation or sanitation, attackers can exploit this by injecting malicious code into PR titles, descriptions, or file changes. These injections can compromise the integrity of the entire codebase by executing unauthorized commands, code, or even exfiltrating sensitive information. This documentation aims to explore the vulnerabilities, real-world examples, remediation strategies, and detection techniques associated with pull request injections. 5 | 6 | 7 | ## Description 8 | We will present two scenarios of pull request injections in vulnerable workflows: 9 | 10 | ### 1. pull_request + Pull Request Title: 11 | In this scenario, workflows trigger on pull request events and execute jobs that depend on the pull request title (github.event.pull_request.title) without any permissions checks or input sanitization. 12 | ```yaml 13 | on: 14 | pull_request: 15 | types: [opened, synchronize] 16 | 17 | jobs: 18 | use_pr_title: 19 | runs-on: ubuntu-latest 20 | steps: 21 | - name: Print PR Title 22 | run: echo "Pull Request Title is ${{ github.event.pull_request.title }}" 23 | ``` 24 | 25 | ### 2. pull_request_target + Checkout 26 | Using the `pull_request_target` event in a GitHub Actions workflow is risky because it runs in the context of the base repository, not the fork. This means it has access to secrets and write permissions to the repository. The real danger arises when such a workflow is combined with `checkout` action, which checks out code from an incoming, potentially untrusted pull request and then executes scripts or runs commands based on that code. Without proper permissions checks, this could allow a malicious actor to run untrusted code in a privileged environment, potentially leading to unauthorized access or data leaks. 27 | 28 | ```yaml 29 | on: 30 | pull_request_target: 31 | types: [opened, synchronize] 32 | 33 | jobs: 34 | checkout_code: 35 | runs-on: ubuntu-latest 36 | steps: 37 | # Checks out code from the incoming pull request 38 | - name: Checkout code 39 | uses: actions/checkout@v2 40 | with: 41 | ref: ${{ github.event.pull_request.head.sha }} 42 | 43 | # Executes scripts or runs commands based on the checked out code 44 | - name: Build and deploy 45 | run: make deploy 46 | ``` 47 | ## Remediation 48 | * **Input Validation**: Sanitize and validate data from pull request titles or other user-generated fields before using them in your workflows. 49 | 50 | * **Limited Permissions**: Minimize the permissions granted to GitHub Actions. Use read-only permissions where possible. 51 | 52 | * **Workflow Segregation**: Consider using separate workflows for trusted and untrusted events to minimize risk. 53 | 54 | * **Manual Approval**: Use manual approval of actions run. 55 | 56 | ## References 57 | - [Cycode Discovers Vulnerabilities in CI/CD Pipelines of Popular Open-Source Projects](https://cycode.com/blog/github-actions-vulnerabilities/) 58 | 59 | ## Real-World Examples 60 | ### fauna/faunadb-js - 694 ⭐️ 61 | * **Description**: This workflow runs when a pull_request is created. Lines 32 and 33 use the pull request's body and title in an insecure manner, at `create-jira-tickets.yml`. 62 | * **Fix Commit Link**: [ee6f53f9c985bde41976743530e3846dee058587](https://github.com/fauna/faunadb-js/commit/ee6f53f9c985bde41976743530e3846dee058587) 63 | * **Remediation**: Removed the workflow. 64 | 65 | ## Queries 66 | ### 1. Pull Request + Pull Request Title 67 | 68 | This query looks for GitHub Actions workflows that are triggered by pull requests and specifically focuses on those that don't have defined permissions. It then identifies any jobs and steps within those workflows that use the pull request title (github.event.pull_request.title) in some way. The goal is to find potential security risks arising from the use of unsanitized pull request titles. 69 | 70 | ``` cypher 71 | MATCH (w:Workflow)-[*]->(j:Job)-->(s:Step)-->(dep:StepCodeDependency) 72 | WHERE 73 | w.permissions IS NULL AND 74 | "pull_request" IN w.trigger AND 75 | s.run IS NOT NULL AND 76 | dep.param = "github.event.pull_request.title" 77 | RETURN DISTINCT w, j, s, dep; 78 | ``` 79 | 80 | ### 2. Pull Request Target + Checkout 81 | 82 | This query aims to identify workflows that are triggered by the `pull_request_target` event and don't have specified permissions. It then looks for jobs within those workflows that use the actions/checkout action to checkout code based on pull request data. The query focuses on parameters that start with "ref" and contain either head.sha or head.ref. Due to its broad nature, this query might produce many false positives, but it's designed to flag potentially risky configurations involving `pull_request_target` and code checkout. 83 | 84 | ``` cypher 85 | MATCH (w:Workflow)-[*]->(j:Job) 86 | WHERE 87 | w.permissions is null AND 88 | "pull_request_target" in w.trigger AND 89 | EXISTS { 90 | (j)-->(s:Step)-->(ca:CompositeAction) 91 | WHERE ( 92 | ANY(param IN s.with WHERE 93 | ( 94 | param STARTS WITH "ref" and 95 | ( 96 | param contains "head.sha" OR 97 | param contains "head.ref" 98 | ) 99 | ) 100 | ) 101 | ) 102 | } 103 | RETURN DISTINCT w.path, w.url; 104 | ``` 105 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Exploit Documentations 📚 2 | 3 | Welcome to the GitHub Actions Exploit Documentation directory! This is where you'll find in-depth write-ups on security vulnerabilities specific to GitHub Actions workflows and the relevant queries to find the in Raven's ecosystem. 4 | 5 | ## What's Inside? 6 | 7 | - Analyses of GitHub Actions vulnerabilities and examples of them. 8 | - Cypher queries to identify vulnerable repositories on Neo4j. 9 | - Mitigation suggestions and preventive best practices. 10 | - Source references, such as research articles and GitHub issues. 11 | 12 | ## Quick Links 13 | 14 | - [Issue Injections](/docs/Issue%20Injections/README.md) 15 | - [Pull Request Injections](/docs/Pull%20Request%20Injections/README.md) 16 | - [Workflow Run Injections](/docs/Multi%20Prerequisite%20Exploits/README.md) 17 | - [CodeSee Injections](/docs/Codesee%20Injections/README.md) 18 | 19 | ## Notes 20 | 21 | - The information presented here is based on rigorous research and real-world examples. 22 | - Always act responsibly and ethically when using this information. 23 | 24 | ## Contributions 25 | 26 | We encourage you to contribute by submitting more GitHub Actions exploit documentation and queries, or by improving the existing write-ups. Contributions are always welcome! -------------------------------------------------------------------------------- /docs/templates/issues.md: -------------------------------------------------------------------------------- 1 | # Vulnerability Name 2 | 3 | ## Overview 4 | Provide a brief introduction to the specific vulnerability type. 5 | 6 | ## Description 7 | Include a detailed description of the vulnerability, explaining what it is, how it can be exploited, and why it's important to detect and remediate it. 8 | 9 | ## Remediation 10 | Provide guidance on how to remediate the vulnerability once it's detected. This may include steps to update GitHub Actions configurations, change specific workflow files, or apply best practices. 11 | 12 | ## References 13 | Include links to external resources, documentation, or security advisories related to this vulnerability type. This can help users understand the issue better and find additional information. 14 | 15 | ## Real-World Examples 16 | 17 | ### Repository Name 18 | 19 | * **Description**: Briefly describe the vulnerability that was present in this repository's GitHub Actions workflow. 20 | * **Commit Link**: Provide links to the specific commits in the repository where the vulnerability existed. 21 | * **Remediation**: Explain how the vulnerability was fixed in this repository. Include links to relevant code changes or pull requests. 22 | 23 | 24 | ## Detections 25 | Include sample Cypher queries that users can run against their indexed GitHub Actions workflows in the Neo4j database to detect instances of this vulnerability. Make sure to explain the purpose of each query and any parameters that need to be configured. 26 | 27 | ### Example-1 28 | This Cypher query identifies workflows triggered by events like issue comments, issues, or pull request targets that depend on specific GitHub event-related data. 29 | ``` cypher 30 | MATCH (w:Workflow)-[*]->(d:StepCodeDependency) 31 | WHERE 32 | ( 33 | "issue_comment" in w.trigger OR 34 | "issues" in w.trigger OR 35 | "pull_request_target" in w.trigger 36 | ) AND 37 | ( 38 | d.param IN ["github.event.issue.title", "github.event.issue.body", "github.event.pull_request.title", "github.event.pull_request.body", "github.event.comment.body", "github.event.review.body", "github.event.review_comment.body", "github.event.pages.*.page_name", "github.event.commits.*.message", "github.event.head_commit.message", "github.event.head_commit.author.email", "github.event.head_commit.author.name", "github.event.commits.*.author.email", "github.event.commits.*.author.name", "github.event.pull_request.head.ref", "github.event.pull_request.head.label", "github.event.pull_request.head.repo.default_branch", "github.head_ref"] 39 | ) 40 | RETURN DISTINCT w.path, w.url; 41 | ``` -------------------------------------------------------------------------------- /library/query_body_context_injection.yml: -------------------------------------------------------------------------------- 1 | id: RQ-1 2 | 3 | info: 4 | name: Body Context Injection 5 | severity: critical 6 | description: Body Injection is caused by using body variables in inline scripts 7 | full-description: | 8 | Issues, comments, discussions and PR bodies can contain any text and special characters. 9 | By using a body variable in an inline script, an attacker can inject arbitrary code 10 | into the build process. 11 | references: 12 | - https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions 13 | - https://cycode.com/blog/github-actions-vulnerabilities/ 14 | - https://github.com/CycodeLabs/raven/blob/main/docs/issue_injections/README.md 15 | tags: 16 | - injection 17 | - unauthenticated 18 | 19 | query: | 20 | MATCH (w:Workflow)-[*]->(d:StepCodeDependency) 21 | WHERE ( 22 | "issues" in w.trigger OR 23 | "issue_comment" in w.trigger OR 24 | "pull_request_target" in w.trigger 25 | ) AND 26 | ( 27 | d.param IN [ 28 | "github.event.comment.body", 29 | "github.event.issue.body", 30 | "github.event.discussion.body", 31 | "github.event.pull_request.body" 32 | ] 33 | ) 34 | RETURN DISTINCT w.url AS url; -------------------------------------------------------------------------------- /library/query_build_artifact_leaks_the_github_token.yml: -------------------------------------------------------------------------------- 1 | id: RQ-17 2 | 3 | info: 4 | name: Build Artifact Leaks the GitHub Token 5 | severity: critical 6 | description: Including `actions/checkout` and `actions/upload-artifact` in a workflow can expose the `GITHUB_TOKEN` in the build artifact if the root directory is uploaded. 7 | full-description: | 8 | When you use the `actions/checkout` action, the `GITHUB_TOKEN` is automatically added to the 9 | `.git/config` file. If you subsequently use the `actions/upload-artifact` action with the path 10 | set to the root directory, the `.git/config` file will be included in the build artifact. 11 | This can expose the `GITHUB_TOKEN` within the artifact. 12 | references: 13 | - https://unit42.paloaltonetworks.com/github-repo-artifacts-leak-tokens/ 14 | tags: 15 | - unauthenticated 16 | 17 | query: | 18 | MATCH (w:Workflow)-[*]->(j:Job) 19 | WHERE 20 | EXISTS { 21 | MATCH (j)-->(s:Step)-->(ca:CompositeAction) 22 | WHERE ca.path = "actions/checkout" 23 | } 24 | AND 25 | EXISTS { 26 | MATCH (j)-->(s:Step)-->(ca:CompositeAction) 27 | WHERE ca.path = "actions/upload-artifact" 28 | AND "path:." IN s.with 29 | } 30 | RETURN DISTINCT w.url AS url; -------------------------------------------------------------------------------- /library/query_checkout_on_issue.yml: -------------------------------------------------------------------------------- 1 | id: RQ-2 2 | 3 | info: 4 | name: Checkout On New Issue 5 | severity: critical 6 | description: Workflows triggered by issue events, where a job involves checking out code from a repository ("actions/checkout") on an issue event. 7 | full-description: 8 | references: 9 | - https://github.com/CycodeLabs/raven/tree/main/docs/issue_injections 10 | - https://cycode.com/blog/github-actions-vulnerabilities/ 11 | tags: 12 | - unauthenticated 13 | 14 | query: | 15 | MATCH (w:Workflow)-[*]->(j:Job) 16 | WHERE 17 | ( 18 | "issue_comment" in w.trigger OR 19 | "issues" in w.trigger 20 | ) AND 21 | EXISTS { 22 | (j)-->(s:Step)-->(ca:CompositeAction) 23 | WHERE ( 24 | ca.path = "actions/checkout" AND 25 | ANY(param IN s.with WHERE 26 | ( 27 | param STARTS WITH "ref" and 28 | ( 29 | param contains "head.sha" OR 30 | param contains "head.ref" 31 | ) 32 | ) 33 | ) 34 | ) 35 | } 36 | RETURN DISTINCT w.url AS url; -------------------------------------------------------------------------------- /library/query_codesee_injection.yml: -------------------------------------------------------------------------------- 1 | id: RQ-3 2 | 3 | info: 4 | name: CodeSee Injection 5 | severity: info 6 | description: CodeSee NPM package before v0.376.0 allowed code injection vulnerability. 7 | full-description: 8 | references: 9 | - https://github.com/CycodeLabs/raven/tree/main/docs/codesee_injections 10 | - https://cycode.com/blog/cycode-secures-thousands-of-open-source-projects/ 11 | tags: 12 | - unauthenticated 13 | - injection 14 | - fixed 15 | 16 | query: | 17 | MATCH (w:Workflow) 18 | WHERE 19 | w.permissions is null AND 20 | EXISTS { 21 | (w)-[*]->(ca:CompositeAction) 22 | WHERE ( 23 | ca.path = "Codesee-io/codesee-map-action" 24 | ) 25 | } 26 | RETURN DISTINCT w.url AS url; -------------------------------------------------------------------------------- /library/query_email_context_injection.yml: -------------------------------------------------------------------------------- 1 | id: RQ-4 2 | 3 | info: 4 | name: Email Context Injection 5 | severity: high 6 | description: Email Injection is caused by using email variables in inline scripts 7 | full-description: | 8 | GitHub allows creating accounts with email addresses that contain special characters, 9 | such as `+`, `@` and `"`. By using an email variable in an inline script, an attacker 10 | can inject arbitrary code into the build process. 11 | references: 12 | - https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions 13 | tags: 14 | - injection 15 | - unauthenticated 16 | 17 | query: | 18 | MATCH (w:Workflow)-[*]->(d:StepCodeDependency) 19 | WHERE ( 20 | "issues" in w.trigger OR 21 | "issue_comment" in w.trigger OR 22 | "pull_request_target" in w.trigger 23 | ) AND 24 | ( 25 | d.param IN [ 26 | "github.event.comment.author.email", 27 | "github.event.head_commit.committer.email" 28 | ] 29 | ) 30 | RETURN DISTINCT w.url AS url; -------------------------------------------------------------------------------- /library/query_enterprise_github_server.yml: -------------------------------------------------------------------------------- 1 | id: RQ-16 2 | 3 | info: 4 | name: Enterprise GitHub Server 5 | severity: info 6 | description: Checking out code from a GitHub Enterprise repository. 7 | full-description: | 8 | GitHub Enterprise Server is the on-premises version of GitHub, which you can deploy and manage in your own secure environment. 9 | Checking out code from GitHub Enterprise Server - combined with a command injection vulnerability - 10 | may lead to data exfiltration from private repositories hosted on GitHub Enterprise Server. 11 | references: 12 | - https://github.com/actions/checkout 13 | tags: 14 | - reconnaissance 15 | 16 | query: | 17 | MATCH (w:Workflow)-[*]->(s:Step)-[*]->(ca:CompositeAction) 18 | WHERE ( 19 | ca.path = "actions/checkout" AND 20 | any ( server IN s.with WHERE ( 21 | server CONTAINS "github-server-url" AND 22 | NOT server ENDS WITH "github.com" 23 | ) 24 | ) 25 | ) 26 | 27 | RETURN DISTINCT w.url AS url 28 | -------------------------------------------------------------------------------- /library/query_injectable_context_composite_action.yml: -------------------------------------------------------------------------------- 1 | id: RQ-15 2 | 3 | info: 4 | name: Injectable Composite Action (github context) 5 | severity: high 6 | description: Composite Actions that use injectable github context parameters in inline scripts can be used to inject arbitrary code. 7 | full-description: | 8 | Composite Actions can access the github context parameters. 9 | Some of these parameters can be controlled by the user, such as the commit message, the issue title, etc. 10 | If these parameters are used in inline scripts, an attacker can inject arbitrary code into the build process. 11 | references: 12 | - https://docs.github.com/en/actions/creating-actions/creating-a-composite-action 13 | - https://cycode.com/blog/cycode-secures-thousands-of-open-source-projects/ 14 | tags: 15 | - injection 16 | - unauthenticated 17 | 18 | query: | 19 | MATCH (w:Workflow)-[*]->(ca:CompositeAction)-->(cas:CompositeActionStep) 20 | WHERE ( 21 | ( 22 | "issues" in w.trigger OR 23 | "issue_comment" in w.trigger OR 24 | "pull_request_target" in w.trigger 25 | ) AND 26 | ANY(input IN [ 27 | "github.event.issue.title", 28 | "github.event.issue.body", 29 | "github.event.pull_request.title", 30 | "github.event.pull_request.body", 31 | "github.event.comment.body", 32 | "github.event.review.body", 33 | "github.event.review_comment.body", 34 | "github.event.pages.*.page_name", 35 | "github.event.commits.*.message", 36 | "github.event.head_commit.message", 37 | "github.event.head_commit.author.email", 38 | "github.event.head_commit.author.name", 39 | "github.event.commits.*.author.email", 40 | "github.event.commits.*.author.name", 41 | "github.event.pull_request.head.ref", 42 | "github.event.pull_request.head.label", 43 | "github.event.pull_request.head.repo.default_branch", 44 | "github.head_ref" 45 | ] WHERE cas.run CONTAINS input ) 46 | ) 47 | RETURN DISTINCT w.url AS url; -------------------------------------------------------------------------------- /library/query_injectable_input_composite_action.yml: -------------------------------------------------------------------------------- 1 | id: RQ-13 2 | 3 | info: 4 | name: Injectable Composite Action (input variable) 5 | severity: high 6 | description: Composite Actions that use input parameters in inline scripts can be used to inject arbitrary code. 7 | full-description: | 8 | Composite Actions can get input parameters from the workflow file. 9 | If these input parameters are used in inline scripts, an attacker can 10 | inject arbitrary code into the build process. 11 | references: 12 | - https://docs.github.com/en/actions/creating-actions/creating-a-composite-action 13 | - https://cycode.com/blog/cycode-secures-thousands-of-open-source-projects/ 14 | tags: 15 | - injection 16 | - unauthenticated 17 | 18 | query: | 19 | MATCH (w:Workflow)-[*]->(s:Step)-->(ca:CompositeAction)-->(cas:CompositeActionStep)-->(d:StepCodeDependency) 20 | WHERE ( 21 | ( 22 | "issues" in w.trigger OR 23 | "issue_comment" in w.trigger OR 24 | "pull_request_target" in w.trigger 25 | ) AND ( 26 | ca.using = "composite" AND 27 | NOT cas.run is null AND 28 | d.param STARTS WITH "inputs." 29 | ) AND ( 30 | ANY(input IN s.with WHERE 31 | ANY ( 32 | pattern IN [ 33 | "github.event.issue.title", 34 | "github.event.issue.body", 35 | "github.event.pull_request.title", 36 | "github.event.pull_request.body", 37 | "github.event.comment.body", 38 | "github.event.review.body", 39 | "github.event.review_comment.body", 40 | "github.event.pages.*.page_name", 41 | "github.event.commits.*.message", 42 | "github.event.head_commit.message", 43 | "github.event.head_commit.author.email", 44 | "github.event.head_commit.author.name", 45 | "github.event.commits.*.author.email", 46 | "github.event.commits.*.author.name", 47 | "github.event.pull_request.head.ref", 48 | "github.event.pull_request.head.label", 49 | "github.event.pull_request.head.repo.default_branch", 50 | "github.head_ref" 51 | ] WHERE input CONTAINS pattern 52 | ) 53 | ) 54 | ) 55 | ) 56 | RETURN DISTINCT s.url AS url; -------------------------------------------------------------------------------- /library/query_label_context_injection.yml: -------------------------------------------------------------------------------- 1 | id: RQ-5 2 | 3 | info: 4 | name: Label Context Injection 5 | severity: high 6 | description: Label Injection is caused by using label variables in inline scripts 7 | full-description: | 8 | Creating a new pull request could be submitted with a label that contains special characters. 9 | By using a label variable in an inline script, an attacker can inject arbitrary code into the build process. 10 | references: 11 | - https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions 12 | tags: 13 | - injection 14 | 15 | query: | 16 | MATCH (w:Workflow)-[*]->(d:StepCodeDependency) 17 | WHERE ( 18 | "issues" in w.trigger OR 19 | "issue_comment" in w.trigger OR 20 | "pull_request_target" in w.trigger 21 | ) AND 22 | ( 23 | d.param IN [ 24 | "github.event.pull_request.head.label" 25 | ] 26 | ) 27 | RETURN DISTINCT w.url AS url; -------------------------------------------------------------------------------- /library/query_message_context_injection.yml: -------------------------------------------------------------------------------- 1 | id: RQ-6 2 | 3 | info: 4 | name: Message Context Injection 5 | severity: high 6 | description: Commit Injection is caused by using commit message variables in inline scripts 7 | full-description: | 8 | Commit messages can contain any text and special characters. 9 | By using a commit message variable in an inline script, an attacker can inject arbitrary code 10 | into the build process. 11 | references: 12 | - https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions 13 | tags: 14 | - injection 15 | - unauthenticated 16 | 17 | query: | 18 | MATCH (w:Workflow)-[*]->(d:StepCodeDependency) 19 | WHERE ( 20 | "issues" in w.trigger OR 21 | "issue_comment" in w.trigger OR 22 | "pull_request_target" in w.trigger 23 | ) AND 24 | ( 25 | d.param IN [ 26 | "github.event.head_commit.message", 27 | "github.event.merge_group.head_commit.message" 28 | ] 29 | ) 30 | RETURN DISTINCT w.url AS url; -------------------------------------------------------------------------------- /library/query_priv_esc_workflow_run.yml: -------------------------------------------------------------------------------- 1 | id: RQ-7 2 | 3 | info: 4 | name: Privilege Escalation Workflow Run 5 | severity: critical 6 | description: Injection of malicious code that triggers a workflow run pipeline can lead to privilege escalation. 7 | full-description: | 8 | The pull request pipeline runs without access to secrets. 9 | However, if a pull request triggers a workflow run, the workflow run will then have access to secrets. 10 | This means that if an attacker can inject malicious code into the pull request workflow and then pass the malicious code to the workflow run, 11 | the attacker can gain access to secrets even though the original workflow did not have access to secrets. 12 | references: 13 | - https://www.legitsecurity.com/blog/github-privilege-escalation-vulnerability 14 | tags: 15 | - unauthenticated 16 | - injection 17 | - priv-esc 18 | 19 | query: | 20 | MATCH (w:Workflow)-[*]->(w2:Workflow) 21 | WHERE ( 22 | ( 23 | "pull_request" in w.trigger OR 24 | "pull_request_target" in w.trigger 25 | ) AND 26 | ( 27 | "workflow_run" in w2.trigger 28 | ) 29 | ) AND EXISTS { 30 | (w)-[*]->(d:StepCodeDependency) 31 | WHERE ( 32 | d.param IN [ 33 | "github.event.pull_request.title", 34 | "github.event.pull_request.body", 35 | "github.event.pull_request.head.ref", 36 | "github.event.pull_request.head.label", 37 | "github.event.pull_request.head.repo.default_branch" 38 | ] 39 | ) 40 | } 41 | RETURN DISTINCT w.url AS url; -------------------------------------------------------------------------------- /library/query_pull_request_target_injection.yml: -------------------------------------------------------------------------------- 1 | id: RQ-8 2 | 3 | info: 4 | name: Pull Request Target Injection 5 | severity: critical 6 | description: The pull_request_target runs in the context of the base repository, not the fork. 7 | full-description: | 8 | A pull_request_target operates within the context of the base repository of the pull request, 9 | which means that any tampering with the build process can potentially lead 10 | to the unauthorized extraction of sensitive information, such as secrets. 11 | references: 12 | - https://github.com/CycodeLabs/raven/tree/main/docs/pull_request_injections 13 | - https://cycode.com/blog/github-actions-vulnerabilities/ 14 | tags: 15 | - unauthenticated 16 | 17 | query: | 18 | MATCH (w:Workflow)-[*]->(j:Job) 19 | WHERE 20 | w.permissions is null AND 21 | "pull_request_target" in w.trigger AND 22 | EXISTS { 23 | (j)-->(s:Step)-->(ca:CompositeAction) 24 | WHERE ( 25 | ca.path = "actions/checkout" AND 26 | ANY(param IN s.with WHERE 27 | ( 28 | param STARTS WITH "ref" and 29 | ( 30 | param contains "head.sha" OR 31 | param contains "head.ref" 32 | ) 33 | ) 34 | ) 35 | ) 36 | } 37 | RETURN DISTINCT w.url AS url; -------------------------------------------------------------------------------- /library/query_ref_context_injection.yml: -------------------------------------------------------------------------------- 1 | id: RQ-9 2 | 3 | info: 4 | name: Branch Context Injection 5 | severity: high 6 | description: Branch Injection is caused by using ref/default_branch variables in inline scripts 7 | full-description: | 8 | Creating a new pull request could be submitted with a branch name that contains special characters. 9 | By using a ref/default_branch variable in an inline script, an attacker can inject arbitrary code into the build process. 10 | references: 11 | - https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions 12 | - https://cycode.com/blog/ci-story-how-we-found-critical-vulnerabilities-in-storybook-project/ 13 | tags: 14 | - injection 15 | - unauthenticated 16 | 17 | query: | 18 | MATCH (w:Workflow)-[*]->(d:StepCodeDependency) 19 | WHERE ( 20 | "issues" in w.trigger OR 21 | "issue_comment" in w.trigger OR 22 | "pull_request_target" in w.trigger 23 | ) AND 24 | ( 25 | d.param IN [ 26 | "github.event.pull_request.head.ref", 27 | "github.head_ref", 28 | "github.event.pull_request.head.repo.default_branch" 29 | ] 30 | ) 31 | RETURN DISTINCT w.url AS url; -------------------------------------------------------------------------------- /library/query_self_hosted_workflow.yml: -------------------------------------------------------------------------------- 1 | id: RQ-10 2 | 3 | info: 4 | name: Self Hosted Runner 5 | severity: medium 6 | description: Self Hosted runners should not be used for public repositories. 7 | full-description: | 8 | Self Hosted runners do not have the same security controls as GitHub Hosted runners and 9 | do not have the guarantees of a clean ephemeral environment. Self Hosted runners should 10 | only be used for private repositories. 11 | references: 12 | - https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions#hardening-for-self-hosted-runners 13 | tags: 14 | - unauthenticated 15 | - best-practice 16 | 17 | query: | 18 | MATCH (w:Workflow)-[*]->(j:Job) 19 | WHERE ( 20 | w.is_public = TRUE AND 21 | "self-hosted" in j.machine 22 | ) 23 | RETURN DISTINCT w.url AS url; 24 | -------------------------------------------------------------------------------- /library/query_title_context_injection.yml: -------------------------------------------------------------------------------- 1 | id: RQ-11 2 | 3 | info: 4 | name: Title Context Injection 5 | severity: critical 6 | description: Title Injection is caused by using title variables in inline scripts 7 | full-description: | 8 | Issues, comments, discussions and PR titles can contain any text and special characters. 9 | By using a body variable in an inline script, an attacker can inject arbitrary code 10 | into the build process. 11 | references: 12 | - https://github.com/CycodeLabs/raven/blob/main/docs/issue_injections/README.md 13 | - https://cycode.com/blog/github-actions-vulnerabilities/ 14 | - https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions 15 | tags: 16 | - injection 17 | - unauthenticated 18 | 19 | query: | 20 | MATCH (w:Workflow)-[*]->(d:StepCodeDependency) 21 | WHERE ( 22 | "issues" in w.trigger OR 23 | "issue_comment" in w.trigger OR 24 | "pull_request_target" in w.trigger 25 | ) AND 26 | ( 27 | d.param IN [ 28 | "github.event.issue.title", 29 | "github.event.pull_request.title", 30 | "github.event.pull_request.milestone.title" 31 | ] 32 | ) 33 | RETURN DISTINCT w.url AS url; -------------------------------------------------------------------------------- /library/query_unpinnable_action.yml: -------------------------------------------------------------------------------- 1 | id: RQ-12 2 | 3 | info: 4 | name: Unpinnable Action 5 | severity: low 6 | description: Unpinnable actions can lead to software supply chain attacks. 7 | full-description: | 8 | Actions can be pinned to a specific version to ensure that the same version is used 9 | every time the workflow is run. Even if the action is pinned, if that action itself 10 | uses an unpinned Docker Image or GitHub Action, the action can be updated without 11 | the workflow being updated. This can lead to software supply chain attacks. 12 | references: 13 | - https://www.paloaltonetworks.com/blog/prisma-cloud/unpinnable-actions-github-security/ 14 | tags: 15 | - supply-chain 16 | - best-practice 17 | 18 | query: | 19 | MATCH (ca:CompositeAction) 20 | WHERE ( 21 | ca.using = "docker" AND ( 22 | NOT ca.image CONTAINS "@sha256:" 23 | ) 24 | ) 25 | RETURN DISTINCT ca.url AS url; -------------------------------------------------------------------------------- /library/query_usage_of_outdated_node.yml: -------------------------------------------------------------------------------- 1 | id: RQ-14 2 | 3 | info: 4 | name: Usage of Outdated Node Version 5 | severity: low 6 | description: Using composite action that uses an outdated Node version. 7 | full-description: | 8 | Node12 active support ended at 20 Oct 2020 and ended security support at 30 Apr 2022. 9 | references: 10 | - https://endoflife.date/nodejs 11 | tags: 12 | - endoflife 13 | 14 | query: | 15 | MATCH (w:Workflow)-[*]->(ca:CompositeAction) 16 | WHERE ca.using = "node12" 17 | RETURN DISTINCT w.url AS url; -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from src.cmdline import execute 2 | 3 | 4 | def main(): 5 | execute() 6 | 7 | 8 | if __name__ == "__main__": 9 | main() 10 | -------------------------------------------------------------------------------- /requirements.in: -------------------------------------------------------------------------------- 1 | colorama 2 | loguru 3 | py2neo 4 | PyYAML 5 | redis 6 | requests 7 | slack_sdk 8 | tqdm -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.13 3 | # by the following command: 4 | # 5 | # pip-compile 6 | # 7 | certifi==2025.1.31 8 | # via 9 | # py2neo 10 | # requests 11 | charset-normalizer==3.4.1 12 | # via requests 13 | colorama==0.4.6 14 | # via -r requirements.in 15 | idna==3.10 16 | # via requests 17 | interchange==2021.0.4 18 | # via py2neo 19 | loguru==0.7.3 20 | # via -r requirements.in 21 | monotonic==1.6 22 | # via py2neo 23 | packaging==24.2 24 | # via py2neo 25 | pansi==2024.11.0 26 | # via py2neo 27 | pillow==11.1.0 28 | # via pansi 29 | py2neo==2021.2.4 30 | # via -r requirements.in 31 | pygments==2.19.1 32 | # via py2neo 33 | pytz==2025.1 34 | # via interchange 35 | pyyaml==6.0.2 36 | # via -r requirements.in 37 | redis==5.2.1 38 | # via -r requirements.in 39 | requests==2.32.3 40 | # via -r requirements.in 41 | six==1.17.0 42 | # via 43 | # interchange 44 | # py2neo 45 | slack-sdk==3.34.0 46 | # via -r requirements.in 47 | tqdm==4.67.1 48 | # via -r requirements.in 49 | urllib3==2.3.0 50 | # via 51 | # py2neo 52 | # requests 53 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from os import getenv 2 | from setuptools import find_packages 3 | from setuptools import setup 4 | from distutils import log 5 | import pathlib 6 | import sys 7 | 8 | 9 | __version__ = getenv("RAVEN_VERSION", "0.0.0") 10 | 11 | HERE = pathlib.Path(__file__).parent 12 | README = (HERE / "README.md").read_text() 13 | REQUIRMENTS = (HERE / "requirements.txt").read_text().splitlines() 14 | CURRENT_PYTHON = sys.version_info[:2] 15 | REQUIRED_PYTHON = (3, 9) 16 | if CURRENT_PYTHON < REQUIRED_PYTHON: 17 | log.fatal("Raven requires Python V3.9 or greater.") 18 | sys.exit(1) 19 | 20 | 21 | setup( 22 | name="raven-cycode", 23 | version=__version__, 24 | description="RAVEN (Risk Analysis and Vulnerability Enumeration for CI/CD)", 25 | long_description=README, 26 | long_description_content_type="text/markdown", 27 | url="https://github.com/CycodeLabs/raven", 28 | project_urls={"Source": "https://github.com/CycodeLabs/raven"}, 29 | author=["Cycode "], 30 | keywords=["cycode", "raven", "security", "ci/cd"], 31 | license="Apache License 2.0", 32 | python_requires=">=3.9", 33 | classifiers=[ 34 | "Programming Language :: Python :: 3", 35 | "Programming Language :: Python :: 3.9", 36 | "Programming Language :: Python :: 3.10", 37 | "Programming Language :: Python :: 3.11", 38 | "Programming Language :: Python :: 3.12", 39 | "Programming Language :: Python :: 3 :: Only", 40 | "Operating System :: Unix", 41 | "Operating System :: MacOS", 42 | "Intended Audience :: Science/Research", 43 | "Topic :: Security", 44 | ], 45 | install_requires=REQUIRMENTS, 46 | packages=find_packages(exclude=("tests", "tests.*")), 47 | entry_points={"console_scripts": ["raven = src.cmdline:execute"]}, 48 | ) 49 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | from src.common.ignore_warnings import ignore_warnings 2 | 3 | ignore_warnings() 4 | -------------------------------------------------------------------------------- /src/cmdline.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import src.logger.log as log 3 | from src.common.utils import validate_query_ids 4 | from src.downloader.download import ( 5 | download_all_workflows_and_actions, 6 | download_account_workflows_and_actions, 7 | ) 8 | from src.indexer.index import index_downloaded_workflows_and_actions 9 | from src.reporter.report import generate 10 | from src.config.config import ( 11 | load_downloader_config, 12 | load_indexer_config, 13 | load_reporter_config, 14 | ) 15 | from src.config.config import ( 16 | DEBUG_DEFAULT, 17 | MIN_STARS_DEFAULT, 18 | NEO4J_CLEAN_DEFAULT, 19 | NEO4J_URI_DEFAULT, 20 | NEO4J_USERNAME_DEFAULT, 21 | NEO4J_PASSWORD_DEFAULT, 22 | REDIS_HOST_DEFAULT, 23 | REDIS_PORT_DEFAULT, 24 | REDIS_CLEAN_DEFAULT, 25 | DOWNLOAD_COMMAND, 26 | DOWNLOAD_ACCOUNT_COMMAND, 27 | DOWNLOAD_CRAWL_COMMAND, 28 | INDEX_COMMAND, 29 | REPORT_COMMAND, 30 | QUERIES_PATH_DEFAULT, 31 | REPORT_RAW_FORMAT, 32 | REPORT_JSON_FORMAT, 33 | SEVERITY_LEVELS, 34 | QUERY_TAGS, 35 | QUERY_IDS, 36 | ) 37 | 38 | COMMAND_FUNCTIONS = { 39 | DOWNLOAD_COMMAND: { 40 | DOWNLOAD_CRAWL_COMMAND: download_all_workflows_and_actions, 41 | DOWNLOAD_ACCOUNT_COMMAND: download_account_workflows_and_actions, 42 | }, 43 | INDEX_COMMAND: index_downloaded_workflows_and_actions, 44 | REPORT_COMMAND: generate, 45 | } 46 | 47 | 48 | def execute() -> None: 49 | try: 50 | raven() 51 | log.catch_exit() 52 | except KeyboardInterrupt: 53 | log.catch_exit() 54 | except Exception as e: 55 | log.error(e) 56 | log.fail_exit() 57 | 58 | 59 | def raven() -> None: 60 | parser = argparse.ArgumentParser( 61 | description="Github Actions downloader and indexer" 62 | ) 63 | 64 | subparsers = parser.add_subparsers(dest="command", help="sub-command help") 65 | 66 | redis_parser = argparse.ArgumentParser(add_help=False) 67 | 68 | # Add redis arguments 69 | redis_parser.add_argument( 70 | "--redis-host", 71 | help=f"Redis host, default: {REDIS_HOST_DEFAULT}", 72 | default=REDIS_HOST_DEFAULT, 73 | ) 74 | redis_parser.add_argument( 75 | "--redis-port", 76 | type=int, 77 | help=f"Redis port, default: {REDIS_PORT_DEFAULT}", 78 | default=REDIS_PORT_DEFAULT, 79 | ) 80 | redis_parser.add_argument( 81 | "--clean-redis", 82 | "-cr", 83 | action="store_const", 84 | default=REDIS_CLEAN_DEFAULT, 85 | const=True, 86 | help=f"Whether to clean cache in the redis, default: {REDIS_CLEAN_DEFAULT}", 87 | ) 88 | 89 | neo4j_parser = argparse.ArgumentParser(add_help=False) 90 | neo4j_parser.add_argument( 91 | "--neo4j-uri", 92 | default=NEO4J_URI_DEFAULT, 93 | help=f"Neo4j URI endpoint, default: {NEO4J_URI_DEFAULT}", 94 | ) 95 | neo4j_parser.add_argument( 96 | "--neo4j-user", 97 | default=NEO4J_USERNAME_DEFAULT, 98 | help=f"Neo4j username, default: {NEO4J_USERNAME_DEFAULT}", 99 | ) 100 | neo4j_parser.add_argument( 101 | "--neo4j-pass", 102 | default=NEO4J_PASSWORD_DEFAULT, 103 | help=f"Neo4j password, default: {NEO4J_PASSWORD_DEFAULT}", 104 | ) 105 | neo4j_parser.add_argument( 106 | "--clean-neo4j", 107 | "-cn", 108 | action="store_const", 109 | default=NEO4J_CLEAN_DEFAULT, 110 | const=True, 111 | help=f"Whether to clean cache, and index from scratch, default: {NEO4J_CLEAN_DEFAULT}", 112 | ) 113 | 114 | download_parser_options = argparse.ArgumentParser(add_help=False) 115 | download_parser_options.add_argument( 116 | "--token", 117 | required=True, 118 | help="GITHUB_TOKEN to download data from Github API (Needed for effective rate-limiting)", 119 | ) 120 | download_parser_options.add_argument( 121 | "--debug", 122 | action="store_const", 123 | default=DEBUG_DEFAULT, 124 | const=True, 125 | help=f"Whether to print debug statements, default: {DEBUG_DEFAULT}", 126 | ) 127 | 128 | download_parser = subparsers.add_parser( 129 | "download", help="Download workflows into Redis database" 130 | ) 131 | 132 | download_sub_parser = download_parser.add_subparsers( 133 | dest="download_command", 134 | ) 135 | 136 | crawl_download_parser = download_sub_parser.add_parser( 137 | "crawl", 138 | help="Crawl Public GitHub repositories", 139 | parents=[download_parser_options, redis_parser], 140 | ) 141 | 142 | account_download_parser = download_sub_parser.add_parser( 143 | "account", 144 | help="Scan a specific GitHub account (user or organization)", 145 | parents=[download_parser_options, redis_parser], 146 | ) 147 | 148 | account_download_group = account_download_parser.add_mutually_exclusive_group( 149 | required=True 150 | ) 151 | 152 | account_download_group.add_argument( 153 | "--account-name", 154 | required=False, 155 | action="append", 156 | type=str, 157 | help="Account name for downloading the workflows, can be used multiple times", 158 | ) 159 | 160 | account_download_group.add_argument( 161 | "--personal", 162 | required=False, 163 | action="store_const", 164 | const=True, 165 | help="Download repositories owned by the authenticated user", 166 | ) 167 | 168 | crawl_download_parser.add_argument( 169 | "--max-stars", type=int, help="Maximum number of stars for a repository" 170 | ) 171 | crawl_download_parser.add_argument( 172 | "--min-stars", 173 | type=int, 174 | default=MIN_STARS_DEFAULT, 175 | help=f"Minimum number of stars for a repository, default: {MIN_STARS_DEFAULT}", 176 | ) 177 | 178 | # Index action 179 | index_parser = subparsers.add_parser( 180 | "index", 181 | parents=[redis_parser, neo4j_parser], 182 | help="Index the download workflows into Neo4j database", 183 | ) 184 | index_parser.add_argument( 185 | "--debug", 186 | action="store_const", 187 | default=DEBUG_DEFAULT, 188 | const=True, 189 | help=f"Whether to print debug statements, default: {DEBUG_DEFAULT}", 190 | ) 191 | 192 | report_parser = subparsers.add_parser( 193 | "report", 194 | parents=[redis_parser, neo4j_parser], 195 | help="Generate report from indexed Actions - Beta Version", 196 | ) 197 | 198 | report_parser.add_argument( 199 | "--tag", 200 | "-t", 201 | action="append", 202 | type=str, 203 | default=[], 204 | choices=QUERY_TAGS, 205 | help="Filter queries with specific tag", 206 | ) 207 | report_parser.add_argument( 208 | "--severity", 209 | "-s", 210 | type=str, 211 | default="info", 212 | choices=SEVERITY_LEVELS.keys(), 213 | help="Filter queries by severity level (default: info)", 214 | ) 215 | report_parser.add_argument( 216 | "--query_ids", 217 | "-id", 218 | type=validate_query_ids, 219 | default="", 220 | metavar=f"RQ-1,..,{QUERY_IDS[-1]}", 221 | help="Filter queries by query ids (example: RQ-2,RQ-8)", 222 | ) 223 | report_parser.add_argument( 224 | "--queries-path", 225 | "-dp", 226 | default=QUERIES_PATH_DEFAULT, 227 | help="Queries folder (default: library)", 228 | ) 229 | report_parser.add_argument( 230 | "--format", 231 | "-f", 232 | default=REPORT_RAW_FORMAT, 233 | choices=[REPORT_RAW_FORMAT, REPORT_JSON_FORMAT], 234 | help="Report format (default: raw)", 235 | ) 236 | 237 | format_sub_parser = report_parser.add_subparsers( 238 | dest="report_command", 239 | ) 240 | 241 | slack_parser = format_sub_parser.add_parser( 242 | "slack", 243 | help="Send report to slack channel", 244 | ) 245 | slack_parser.add_argument( 246 | "--slack-token", 247 | "-st", 248 | required=True, 249 | help="Send report to slack channel", 250 | ) 251 | slack_parser.add_argument( 252 | "--channel-id", 253 | "-ci", 254 | required=True, 255 | help="Send report to slack channel", 256 | ) 257 | 258 | args = parser.parse_args() 259 | 260 | if args.command in COMMAND_FUNCTIONS: 261 | if args.command == DOWNLOAD_COMMAND: 262 | if args.download_command: 263 | load_downloader_config(vars(args)) 264 | COMMAND_FUNCTIONS[args.command][args.download_command]() 265 | return 266 | else: 267 | download_parser.print_help() 268 | elif args.command == INDEX_COMMAND: 269 | load_indexer_config(vars(args)) 270 | COMMAND_FUNCTIONS[args.command]() 271 | elif args.command == REPORT_COMMAND: 272 | load_reporter_config(vars(args)) 273 | COMMAND_FUNCTIONS[args.command]() 274 | else: 275 | parser.print_help() 276 | -------------------------------------------------------------------------------- /src/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/src/common/__init__.py -------------------------------------------------------------------------------- /src/common/ignore_warnings.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | 4 | def ignore_warnings(): 5 | # Ignore urllib3 warning about OpenSSL version 6 | warnings.filterwarnings( 7 | "ignore", 8 | module="urllib3", 9 | message="urllib3 v2.0 only supports OpenSSL 1.1.1+.*", 10 | ) 11 | -------------------------------------------------------------------------------- /src/common/utils.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import re 3 | import io 4 | from typing import List, Dict, Union, Optional 5 | 6 | import yaml 7 | from py2neo.data import Node 8 | 9 | from src.storage.redis_connection import RedisConnection 10 | from src.config.config import Config, QUERY_IDS 11 | import src.logger.log as log 12 | from urllib.parse import urlparse, parse_qs 13 | 14 | 15 | def get_dependencies_in_code(code: str) -> List[str]: 16 | re_fmt = r"\$\{\{\s*([a-zA-Z0-9\-\._]*)\s*\}\}" 17 | return [match.group(1) for match in re.finditer(re_fmt, code)] 18 | 19 | 20 | def convert_dict_to_list(d: Union[Dict, str]) -> List: 21 | if isinstance(d, dict): 22 | return [f"{key}:{value}" for key, value in d.items()] 23 | else: 24 | return [d] 25 | 26 | 27 | def convert_workflow_to_unix_path(repo: str, workflow_name: str) -> str: 28 | return f"{repo}/.github/workflows/{workflow_name}" 29 | 30 | 31 | def convert_raw_github_url_to_github_com_url(raw_url: str): 32 | """ 33 | Convert a GitHub raw URL to its corresponding tree URL. 34 | convert_raw_to_tree_url("https://raw.githubusercontent.com/myorg/myrepo/master/.github/workflows/android.yml") 35 | >> "https://github.com/myorg/myrepo/tree/master/.github/workflows/android.yml" 36 | """ 37 | 38 | tree_url = raw_url.replace("raw.githubusercontent.com", "github.com") 39 | if is_url_contains_a_token(tree_url): 40 | tree_url = tree_url.split("?")[0] 41 | 42 | parts = tree_url.split("/") 43 | parts.insert(5, "tree") 44 | return "/".join(parts) 45 | 46 | 47 | def find_workflow_by_name(repo: str, workflow_name: str) -> str: 48 | """Tries to find workflow in specified repo, 49 | with the given workflow name 50 | 51 | Used to create connection based on "workflow_run" trigger (which gives workflow name) 52 | """ 53 | with RedisConnection(Config.redis_workflows_db) as workflows_db: 54 | for workflow in workflows_db.get_all_keys(): 55 | workflow = workflow.decode() 56 | 57 | if workflow.startswith(repo): 58 | data = workflows_db.get_value_from_hash( 59 | workflow, Config.redis_data_hash_field_name 60 | ).decode() 61 | 62 | # PyYAML has issues with tabs. 63 | data = data.replace("\t", " ") 64 | 65 | with io.StringIO() as f: 66 | f.write(data) 67 | f.seek(0) 68 | try: 69 | obj = yaml.load(f, yaml.loader.Loader) 70 | except yaml.scanner.ScannerError as e: 71 | log.error( 72 | f"[-] Failed loading: {workflow}. Exception: {e}. Skipping..." 73 | ) 74 | return 75 | 76 | # Could happen if the YAML is empty. 77 | if not obj: 78 | return 79 | 80 | if isinstance(obj, str): 81 | # Couldn't happen on rare cases. 82 | return 83 | 84 | if "name" in obj and obj["name"] == workflow_name: 85 | return workflow 86 | 87 | 88 | def get_repo_name_from_path(path: str) -> str: 89 | """ 90 | edgedb/edgedb-pkg/integration/linux/test/ubuntu-jammy/action.yml -> 91 | edgedb/edgedb-pkg 92 | 93 | slsa-framework/slsa-github-generator/.github/workflows/builder_go_slsa3.yml -> 94 | slsa-framework/slsa-github-generator 95 | """ 96 | return "/".join(path.split("/")[:2]) 97 | 98 | 99 | def find_uses_strings(workflow_content: str) -> List[str]: 100 | """Find patterns of usages for composite actions inside the workflow. 101 | E.g. if it uses "actions/checkout", so "actions/checkout" 102 | will be part of the returned list. 103 | """ 104 | re_fmt = r"[ \t]uses:\s*[\'\"]?([0-9a-zA-Z_\:\-/@\.]*)[\'\"]?" 105 | return [match.group(1) for match in re.finditer(re_fmt, workflow_content)] 106 | 107 | 108 | def is_url_contains_a_token(url) -> bool: 109 | """ 110 | Checks if the url contains arguments. 111 | E.g.: 112 | is_url_contains_a_token("https://raw.githubusercontent.com/RavenIntegrationTests/astro/main/.github/workflows/ci.yml?token=AAABBBCCC") 113 | >> True 114 | is_url_contains_a_token("https://raw.githubusercontent.com/RavenIntegrationTests/astro/main/.github/workflows/ci.yml") 115 | >> False 116 | """ 117 | parsed_url = urlparse(url) 118 | query_parameters = parse_qs(parsed_url.query) 119 | 120 | return "token" in query_parameters 121 | 122 | 123 | def str_to_bool(s: str) -> bool: 124 | return bool(int(s)) 125 | 126 | 127 | def raw_str_to_bool(s: str) -> bool: 128 | return True if s == "true" else False 129 | 130 | 131 | def validate_query_ids(ids_arg: str) -> list: 132 | """check if ids argument (ex: "RQ-1,RQ-3") in config.QUERY_IDS. 133 | return parsed list.""" 134 | if not ids_arg: 135 | return [] 136 | 137 | ids_list = ids_arg.split(",") 138 | if not set(ids_list).issubset(QUERY_IDS): 139 | raise argparse.ArgumentTypeError( 140 | f"Invalid choice: {ids_arg}. Choose from {','.join(QUERY_IDS)}" 141 | ) 142 | return ids_list 143 | -------------------------------------------------------------------------------- /src/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/src/config/__init__.py -------------------------------------------------------------------------------- /src/config/config.py: -------------------------------------------------------------------------------- 1 | from src.storage.neo4j_graph import GraphDb 2 | 3 | # Default Values 4 | DEBUG_DEFAULT = False 5 | MIN_STARS_DEFAULT = 1000 6 | REDIS_CLEAN_DEFAULT = False 7 | NEO4J_CLEAN_DEFAULT = False 8 | QUERIES_PATH_DEFAULT = "library" 9 | REPORT_RAW_FORMAT = "raw" 10 | REPORT_JSON_FORMAT = "json" 11 | SLACK_REPORTER = "slack" 12 | 13 | NEO4J_URI_DEFAULT = "neo4j://localhost:7687" 14 | NEO4J_USERNAME_DEFAULT = "neo4j" 15 | NEO4J_PASSWORD_DEFAULT = "123456789" 16 | 17 | REDIS_HOST_DEFAULT = "localhost" 18 | REDIS_PORT_DEFAULT = 6379 19 | 20 | # Constants 21 | REDIS_WORKFLOW_DOWNLOAD_HISTORY_SET = "workflow_download_history" 22 | REDIS_ACTION_DOWNLOAD_HISTORY_SET = "action_download_history" 23 | REDIS_WORKFLOW_INDEX_HISTORY_SET = "workflow_index_history" 24 | REDIS_ACTION_INDEX_HISTORY_SET = "action_index_history" 25 | REDIS_REF_POINTERS_HASH = "ref_pointers" 26 | # Field names to use in the hash of Actions and Workflows in the DB 27 | REDIS_DATA_HASH_FIELD_NAME = "data" 28 | REDIS_URL_HASH_FIELD_NAME = "url" 29 | REDIS_IS_PUBLIC_HASH_FIELD_NAME = "is_public" 30 | 31 | # The DB which contains the objects operations history (downloaded, indexed, etc.) and the ref pointers 32 | REDIS_OBJECTS_OPS_DB = 0 33 | # The DB which contains the downloaded workflows 34 | REDIS_WORKFLOWS_DB = 1 35 | # The DB which contains the downloaded actions 36 | REDIS_ACTIONS_DB = 2 37 | 38 | # CLI commands 39 | DOWNLOAD_COMMAND = "download" 40 | DOWNLOAD_ACCOUNT_COMMAND = "account" 41 | DOWNLOAD_CRAWL_COMMAND = "crawl" 42 | INDEX_COMMAND = "index" 43 | REPORT_COMMAND = "report" 44 | SEVERITY_LEVELS = { 45 | "info": 0, 46 | "low": 1, 47 | "medium": 2, 48 | "high": 3, 49 | "critical": 4, 50 | } 51 | QUERY_TAGS = [ 52 | "injection", 53 | "unauthenticated", 54 | "fixed", 55 | "priv-esc", 56 | "supply-chain", 57 | "best-practice", 58 | "endoflife", 59 | "reconnaissance", 60 | ] 61 | LAST_QUERY_ID = 17 62 | QUERY_IDS = [f"RQ-{num}" for num in range(1, LAST_QUERY_ID + 1)] 63 | 64 | 65 | def load_downloader_config(args) -> None: 66 | """Loading downloader subcommand config. 67 | Includes redis config. 68 | """ 69 | Config.debug = args.get("debug", DEBUG_DEFAULT) 70 | Config.github_token = args.get("token") 71 | Config.min_stars = args.get("min_stars", MIN_STARS_DEFAULT) 72 | Config.max_stars = args.get("max_stars") 73 | Config.account_name = args.get("account_name") 74 | Config.personal = args.get("personal") 75 | Config.clean_redis = args.get("clean_redis", REDIS_CLEAN_DEFAULT) 76 | 77 | load_redis_config(args) 78 | 79 | if Config.clean_redis: 80 | from src.storage.redis_utils import clean_redis_db 81 | 82 | clean_redis_db() 83 | 84 | 85 | def load_indexer_config(args) -> None: 86 | """Loading indexer subcommand config. 87 | Includes redis and neo4j config. 88 | """ 89 | Config.debug = args.get("debug", DEBUG_DEFAULT) 90 | Config.clean_neo4j = args.get("clean_neo4j", NEO4J_CLEAN_DEFAULT) 91 | Config.clean_redis = args.get("clean_redis", REDIS_CLEAN_DEFAULT) 92 | 93 | load_redis_config(args) 94 | load_neo4j_config(args) 95 | load_reporter_config(args) 96 | 97 | if Config.clean_neo4j or Config.graph.is_graph_empty(): 98 | from src.storage.redis_utils import clean_index 99 | from src.storage.neo4j_utils import clean_graph 100 | 101 | clean_graph() 102 | clean_index() 103 | 104 | 105 | def load_redis_config(args) -> None: 106 | Config.redis_host = args.get("redis_host", REDIS_HOST_DEFAULT) 107 | Config.redis_port = args.get("redis_port", REDIS_PORT_DEFAULT) 108 | 109 | 110 | def load_neo4j_config(args) -> None: 111 | Config.neo4j_uri = args.get("neo4j_uri", NEO4J_URI_DEFAULT) 112 | Config.neo4j_username = args.get("neo4j_user", NEO4J_USERNAME_DEFAULT) 113 | Config.neo4j_password = args.get("neo4j_pass", NEO4J_PASSWORD_DEFAULT) 114 | 115 | # Initializing the neo4j graph connection 116 | Config.graph = GraphDb( 117 | uri=Config.neo4j_uri, 118 | user=Config.neo4j_username, 119 | password=Config.neo4j_password, 120 | ) 121 | 122 | 123 | def load_reporter_config(args): 124 | Config.tags = args.get("tag") 125 | Config.severity = args.get("severity") 126 | Config.query_ids = args.get("query_ids") 127 | Config.queries_path = args.get("queries_path") 128 | Config.format = args.get("format") 129 | Config.reporter = args.get("report_command") 130 | Config.slack_token = args.get("slack_token") 131 | Config.channel_id = args.get("channel_id") 132 | 133 | load_redis_config(args) 134 | load_neo4j_config(args) 135 | 136 | 137 | class Config: 138 | # Global Config 139 | debug: bool = None 140 | 141 | # Downloader Config 142 | github_token: str = None 143 | min_stars: int = None 144 | max_stars: int = None 145 | account_name: list[str] = [] 146 | personal: bool = None 147 | 148 | # Indexer Configs 149 | clean_neo4j: bool = None 150 | 151 | # Redis Config 152 | redis_host: str = None 153 | redis_port: int = None 154 | clean_redis: bool = None 155 | 156 | # Redis Config Constants 157 | redis_objects_ops_db: int = REDIS_OBJECTS_OPS_DB 158 | redis_workflows_db: int = REDIS_WORKFLOWS_DB 159 | redis_actions_db: int = REDIS_ACTIONS_DB 160 | redis_data_hash_field_name: str = REDIS_DATA_HASH_FIELD_NAME 161 | redis_url_hash_field_name: str = REDIS_URL_HASH_FIELD_NAME 162 | redis_is_public_hash_field_name: str = REDIS_IS_PUBLIC_HASH_FIELD_NAME 163 | workflow_download_history_set: str = REDIS_WORKFLOW_DOWNLOAD_HISTORY_SET 164 | action_download_history_set: str = REDIS_ACTION_DOWNLOAD_HISTORY_SET 165 | workflow_index_history_set: str = REDIS_WORKFLOW_INDEX_HISTORY_SET 166 | action_index_history_set: str = REDIS_ACTION_INDEX_HISTORY_SET 167 | ref_pointers_hash: str = REDIS_REF_POINTERS_HASH 168 | 169 | # Report Config Constants 170 | tags: list = [] 171 | severity: str = None 172 | query_ids: list = [] 173 | format: str = None 174 | queries_path: str = QUERIES_PATH_DEFAULT 175 | reporter: str = None 176 | slack_token: str = None 177 | channel_id: str = None 178 | 179 | # Neo4j Config 180 | neo4j_uri: str = None 181 | neo4j_username: str = None 182 | neo4j_password: str = None 183 | graph: GraphDb = None 184 | -------------------------------------------------------------------------------- /src/downloader/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/src/downloader/__init__.py -------------------------------------------------------------------------------- /src/downloader/download.py: -------------------------------------------------------------------------------- 1 | from requests import get 2 | 3 | from src.config.config import Config 4 | from src.storage.redis_connection import RedisConnection 5 | from src.downloader.utils import ( 6 | insert_workflow_or_action_to_redis, 7 | add_ref_pointer_to_redis, 8 | ) 9 | from src.downloader.gh_api import ( 10 | get_account_generator, 11 | get_personal_account_generator, 12 | get_repository_generator, 13 | get_repository_workflows, 14 | get_repository_composite_action, 15 | get_repository_reusable_workflow, 16 | ) 17 | from src.common.utils import ( 18 | find_uses_strings, 19 | convert_workflow_to_unix_path, 20 | get_repo_name_from_path, 21 | convert_raw_github_url_to_github_com_url, 22 | is_url_contains_a_token, 23 | ) 24 | from src.workflow_components.dependency import UsesString, UsesStringType 25 | import src.logger.log as log 26 | 27 | 28 | def download_account_workflows_and_actions() -> None: 29 | """First, we define it as an organization or a user account. 30 | We iterate all the repositories and download the workflows and actions for both cases. 31 | 32 | For each repository we enumerating the .github/workflows directory, 33 | and downloading all the workflows. 34 | In addition if the repository contains action.yml file, it means it is a composite action, 35 | so we download it as well. 36 | 37 | For each such workflow we also scan if it uses additional external actions. 38 | If so, we download these as well. 39 | 40 | We are trying to cache the downloads as much as we can to reduce redundant download attempts. 41 | """ 42 | if Config.account_name: 43 | for account in Config.account_name: 44 | generator = get_account_generator(account) 45 | 46 | for repo in generator: 47 | download_workflows_and_actions(repo) 48 | 49 | elif Config.personal: 50 | generator = get_personal_account_generator() 51 | 52 | for repo in generator: 53 | download_workflows_and_actions(repo) 54 | 55 | else: 56 | raise Exception("Account name or personal flag must be provided.") 57 | 58 | 59 | def download_all_workflows_and_actions() -> None: 60 | """Iterating all repositories through Github search API. 61 | 62 | For each repository we enumerating the .github/workflows directory, 63 | and downloading all the workflows. 64 | In addition if the repository contains action.yml file, it means it is a composite action, 65 | so we download it as well. 66 | 67 | For each such workflow we also scan if it uses additional external actions. 68 | If so, we download these as well. 69 | 70 | We are trying to cache the downloads as much as we can to reduce redundant download attempts. 71 | """ 72 | 73 | log.info("[+] Starting repository iterator") 74 | generator = get_repository_generator(Config.min_stars, Config.max_stars) 75 | 76 | for repo in generator: 77 | download_workflows_and_actions(repo) 78 | 79 | 80 | def download_workflows_and_actions(repo: str) -> None: 81 | """The flow is the following: 82 | 83 | - First we enumerate .github/workflows directory for workflows 84 | - For each such workflow we download it 85 | - If that workflow contains uses:..., we analyze the string, and download the action or the reusable workflow. 86 | """ 87 | with RedisConnection(Config.redis_objects_ops_db) as ops_db: 88 | if ops_db.exists_in_set(Config.workflow_download_history_set, repo): 89 | log.debug(f"[!] Repo {repo} already scanned, skipping.") 90 | return 91 | 92 | workflows = get_repository_workflows(repo) 93 | is_public = 1 94 | 95 | log.debug(f"[+] Found {len(workflows)} workflows for {repo}") 96 | for name, url in workflows.items(): 97 | if is_url_contains_a_token(url): 98 | """ 99 | If the URL contains a token, it means it is a private repository. 100 | """ 101 | log.debug(f"[+] URL contains token argument - private repository") 102 | is_public = 0 103 | 104 | log.debug(f"[+] Fetching {name}") 105 | resp = get(url, timeout=10) 106 | 107 | if resp.status_code != 200: 108 | raise Exception( 109 | f"status code: {resp.status_code}. Response: {resp.text}" 110 | ) 111 | 112 | # We look for dependant external actions. 113 | uses_strings = find_uses_strings(resp.text) 114 | for uses_string in uses_strings: 115 | download_action_or_reusable_workflow(uses_string=uses_string, repo=repo) 116 | 117 | # Save workflow to redis 118 | workflow_unix_path = convert_workflow_to_unix_path(repo, name) 119 | github_url = convert_raw_github_url_to_github_com_url(url) 120 | insert_workflow_or_action_to_redis( 121 | db=Config.redis_workflows_db, 122 | object_path=workflow_unix_path, 123 | data=resp.text, 124 | github_url=github_url, 125 | is_public=is_public, 126 | ) 127 | 128 | # In the future, ref will be with commit sha 129 | add_ref_pointer_to_redis(workflow_unix_path, workflow_unix_path) 130 | 131 | ops_db.insert_to_set(Config.workflow_download_history_set, repo) 132 | 133 | 134 | def download_action_or_reusable_workflow(uses_string: str, repo: str) -> None: 135 | """Whenever we find that workflow is using a "uses:" string, 136 | it means we are referencing a composite action or reusable workflow, we try to fetch it. 137 | 138 | We use out utilitiy tooling to parse the uses string, because it can be quite complex. 139 | """ 140 | with RedisConnection(Config.redis_objects_ops_db) as ops_db: 141 | uses_string_obj = UsesString.analyze(uses_string=uses_string) 142 | full_path = uses_string_obj.get_full_path(repo) 143 | is_public = 1 144 | 145 | # If already scanned action 146 | if ops_db.exists_in_set(Config.action_download_history_set, full_path): 147 | return 148 | # If already scanned workflow - Have to check workflow db because only it contains the full workflow path. 149 | with RedisConnection(Config.redis_workflows_db) as workflows_db: 150 | if ( 151 | workflows_db.get_value_from_hash( 152 | full_path, Config.redis_data_hash_field_name 153 | ) 154 | is not None 155 | ): 156 | return 157 | 158 | if uses_string_obj.type == UsesStringType.REUSABLE_WORKFLOW: 159 | url = get_repository_reusable_workflow(full_path) 160 | elif uses_string_obj.type == UsesStringType.ACTION: 161 | url = get_repository_composite_action(full_path) 162 | else: 163 | # Can happen with docker references. 164 | return 165 | 166 | if url is None: 167 | # This actions might be a local action, or a docker action. 168 | 169 | if uses_string.startswith("./"): 170 | log.warning( 171 | f"[-] Local action '{uses_string}' not found in '{repo}', skipping." 172 | ) 173 | elif uses_string_obj.type == UsesStringType.ACTION: 174 | log.warning( 175 | f"[-] Action '{uses_string}' could not be found while scanning repo '{repo}', skipping." 176 | ) 177 | elif uses_string_obj.type == UsesStringType.REUSABLE_WORKFLOW: 178 | log.warning( 179 | f"[-] Reusable workflow '{uses_string}' could not be found while scanning repo '{repo}', skipping." 180 | ) 181 | else: 182 | log.warning( 183 | f"[-] Docker Action '{uses_string}' could not be found while scanning repo '{repo}', skipping." 184 | ) 185 | return 186 | 187 | if is_url_contains_a_token(url): 188 | log.debug(f"[+] URL contains token argument - private repository") 189 | is_public = 0 190 | 191 | resp = get(url, timeout=10) 192 | if resp.status_code != 200: 193 | raise Exception(f"status code: {resp.status_code}. Response: {resp.text}") 194 | 195 | # We look for dependant external actions. 196 | uses_strings = find_uses_strings(resp.text) 197 | new_repo = get_repo_name_from_path(full_path) 198 | 199 | for new_uses_string in uses_strings: 200 | # Some infinite loop I met in several repositories 201 | new_full_path = UsesString.analyze(new_uses_string).get_full_path(new_repo) 202 | if new_full_path == full_path: 203 | continue 204 | 205 | download_action_or_reusable_workflow( 206 | uses_string=new_uses_string, repo=new_repo 207 | ) 208 | 209 | if uses_string_obj.type == UsesStringType.REUSABLE_WORKFLOW: 210 | ops_db.insert_to_set(Config.workflow_download_history_set, full_path) 211 | 212 | insert_workflow_or_action_to_redis( 213 | db=Config.redis_workflows_db, 214 | object_path=full_path, 215 | data=resp.text, 216 | github_url=convert_raw_github_url_to_github_com_url(url), 217 | is_public=is_public, 218 | ) 219 | # In the future, ref will be with commit sha 220 | add_ref_pointer_to_redis(full_path, full_path) 221 | else: # UsesStringType.ACTION 222 | ops_db.insert_to_set(Config.action_download_history_set, full_path) 223 | insert_workflow_or_action_to_redis( 224 | db=Config.redis_actions_db, 225 | object_path=full_path, 226 | data=resp.text, 227 | github_url=convert_raw_github_url_to_github_com_url(url), 228 | is_public=is_public, 229 | ) 230 | # In the future, ref will be with commit sha 231 | add_ref_pointer_to_redis(full_path, full_path) 232 | -------------------------------------------------------------------------------- /src/downloader/gh_api.py: -------------------------------------------------------------------------------- 1 | import os 2 | import urllib 3 | from requests import get 4 | from typing import Dict, Any, Optional, Iterator, Optional 5 | from http import HTTPStatus 6 | from src.config.config import Config 7 | import src.logger.log as log 8 | 9 | """ 10 | Current rate limiting: 11 | 12 | Search API: 13 | - No token: 10 per minute 14 | - With token: 30 per minute 15 | 16 | Other standard API (contents): 17 | - With token: 5000 per hour 18 | githubusercontent API - None 19 | """ 20 | 21 | BASE_URL = "https://api.github.com" 22 | REPOSITORY_SEARCH_URL = ( 23 | BASE_URL 24 | + "/search/repositories?q={query}&sort=stars&order=desc&per_page=100&page={page}" 25 | ) 26 | 27 | 28 | ACCOUNT_INFO_URL = BASE_URL + "/users/{account_name}" 29 | USER_REPOSITORY_URL = BASE_URL + "/users/{user_name}/repos?per_page=100&page={page}" 30 | 31 | PERSONAL_USER_REPOSITORY_URL = ( 32 | BASE_URL + "/user/repos?type=owner&per_page=100&page={page}" 33 | ) 34 | 35 | ORGANIZATION_REPOSITORY_URL = ( 36 | BASE_URL + "/orgs/{organization_name}/repos?per_page=100&page={page}" 37 | ) 38 | CONTENTS_URL = BASE_URL + "/repos/{repo_path}/contents/{file_path}" 39 | 40 | REPOSITORY_QUERY_MIN = "stars:>={min_stars}" 41 | REPOSITORY_QUERY_MIN_MAX = "stars:{min_stars}..{max_stars}" 42 | 43 | headers = { 44 | "Accept": "application/vnd.github+json", 45 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.42", 46 | } 47 | 48 | 49 | def get_personal_account_generator() -> Iterator[str]: 50 | return get_user_repository_generator(user_name=None, is_personal_account=True) 51 | 52 | 53 | def get_account_generator(account_name: str) -> Iterator[str]: 54 | account_info = get_account_info(account_name=account_name) 55 | account_type = account_info.get("type") 56 | 57 | if account_type == "User": 58 | log.info(f"[+] Scanning user: {account_name}") 59 | return get_user_repository_generator( 60 | user_name=account_name, is_personal_account=False 61 | ) 62 | 63 | elif account_type == "Organization": 64 | log.info(f"[+] Scanning organization: {account_name}") 65 | return get_organization_repository_generator(account_name) 66 | 67 | else: 68 | log.error(f"[-] Failed to get account type for {account_name}") 69 | return None 70 | 71 | 72 | def get_user_repository_generator( 73 | user_name: str, is_personal_account: bool 74 | ) -> Iterator[str]: 75 | # Quering user repositories is not limited. We loop over each page, 76 | # and look for more repos. If there are no more repos, we break 77 | page = 1 78 | while True: 79 | log.info(f"[*] Querying page: {page}") 80 | repos = get_user_repositories( 81 | user_name=user_name, page=page, is_personal_account=is_personal_account 82 | ) 83 | if repos: 84 | for repo in repos: 85 | repo_star_count = int(repo["stargazers_count"]) 86 | log.debug( 87 | f"[+] About to download repository: {repo['full_name']}, Stars: {repo_star_count}" 88 | ) 89 | yield repo["full_name"] 90 | else: 91 | break 92 | 93 | page += 1 94 | 95 | 96 | def get_organization_repository_generator(organization_name: str) -> Iterator[str]: 97 | # Quering organization repositories is not limited. We loop over each page, 98 | # and look for more repos. If there are no more repos, we break 99 | page = 1 100 | while True: 101 | log.info(f"[*] Querying page: {page}") 102 | repos = get_organization_repositories( 103 | organization_name=organization_name, page=page 104 | ) 105 | if repos: 106 | for repo in repos: 107 | repo_star_count = int(repo["stargazers_count"]) 108 | log.debug( 109 | f"[+] About to download repository: {repo['full_name']}, Stars: {repo_star_count}" 110 | ) 111 | yield repo["full_name"] 112 | else: 113 | break 114 | 115 | page += 1 116 | 117 | 118 | def get_repository_generator( 119 | min_stars: int, 120 | max_stars: Optional[int] = 0, 121 | ) -> Iterator[str]: 122 | # Github allows only querying up to 1000 results, means 10 pages. 123 | 124 | # In addition, to make wider queries, we going to change the query after each 10 pages. 125 | # Because our query only do stars count, we can just narrow the stars, and keep querying. 126 | last_star_count = 0 127 | while True: 128 | more_results = False 129 | for page in range(1, 11): 130 | log.info(f"[*] Querying page: {page}") 131 | if not max_stars: 132 | query = REPOSITORY_QUERY_MIN.format(min_stars=min_stars) 133 | else: 134 | query = REPOSITORY_QUERY_MIN_MAX.format( 135 | min_stars=min_stars, max_stars=max_stars 136 | ) 137 | 138 | repos = get_repository_search( 139 | query=query, 140 | page=page, 141 | ) 142 | 143 | if repos: 144 | more_results = True 145 | for repo in repos: 146 | last_star_count = int(repo["stargazers_count"]) 147 | log.debug( 148 | f"[+] About to download repository: {repo['full_name']}, Stars: {last_star_count}" 149 | ) 150 | yield repo["full_name"] 151 | else: 152 | more_results = False 153 | break 154 | 155 | page += 1 156 | 157 | if not more_results: 158 | # Recieved no results. can quit. 159 | break 160 | else: 161 | max_stars = last_star_count + 1 162 | 163 | 164 | def get_account_info(account_name: str) -> Dict[str, Any]: 165 | """ 166 | Returns a dictionary with the account information. 167 | The objects look like this: 168 | { 169 | "login": "CycodeLabs", 170 | "type": "Organization", 171 | ... 172 | } 173 | """ 174 | headers["Authorization"] = f"Token {Config.github_token}" 175 | r = get(ACCOUNT_INFO_URL.format(account_name=account_name), headers=headers) 176 | 177 | if r.status_code != HTTPStatus.OK: 178 | log.error(f"[-] Failed fetching repositories for {account_name}") 179 | raise Exception(f"status code: {r.status_code}. Response: {r.text}") 180 | 181 | return r.json() 182 | 183 | 184 | def get_user_repositories( 185 | user_name: str, page: int, is_personal_account: bool 186 | ) -> list[dict]: 187 | """ 188 | Returns a list of all repositories for the specified user. 189 | The objects look like this: 190 | { 191 | "id": 000000000, 192 | "node_id": "R_...", 193 | "name": "example", 194 | "full_name": "example/example", 195 | "private": true, 196 | ... 197 | } 198 | """ 199 | headers["Authorization"] = f"Token {Config.github_token}" 200 | 201 | repo_endpoint = ( 202 | PERSONAL_USER_REPOSITORY_URL if is_personal_account else USER_REPOSITORY_URL 203 | ) 204 | r = get( 205 | repo_endpoint.format(user_name=user_name, page=page), 206 | headers=headers, 207 | ) 208 | 209 | if r.status_code != HTTPStatus.OK: 210 | log.error(f"[-] Failed fetching repositories") 211 | raise Exception(f"status code: {r.status_code}. Response: {r.text}") 212 | 213 | return r.json() 214 | 215 | 216 | def get_organization_repositories(organization_name: str, page: int) -> list[dict]: 217 | """ 218 | Returns a list of all repositories for the specified organization. 219 | The objects look like this: 220 | { 221 | "id": 000000000, 222 | "node_id": "R_...", 223 | "name": "example", 224 | "full_name": "example/example", 225 | "private": true, 226 | ... 227 | } 228 | """ 229 | headers["Authorization"] = f"Token {Config.github_token}" 230 | 231 | r = get( 232 | ORGANIZATION_REPOSITORY_URL.format( 233 | organization_name=organization_name, page=page 234 | ), 235 | headers=headers, 236 | ) 237 | if r.status_code != HTTPStatus.OK: 238 | log.error(f"[-] Failed fetching repositories for {organization_name}") 239 | raise Exception(f"status code: {r.status_code}. Response: {r.text}") 240 | 241 | return r.json() 242 | 243 | 244 | def get_repository_search(query: str, page: int = 1) -> Dict[str, Any]: 245 | headers["Authorization"] = f"Token {Config.github_token}" 246 | 247 | r = get( 248 | REPOSITORY_SEARCH_URL.format(query=urllib.parse.quote_plus(query), page=page), 249 | headers=headers, 250 | ) 251 | if r.status_code != 200: 252 | log.error(f"status code: {r.status_code}. Response: {r.text}") 253 | return {} 254 | 255 | return r.json()["items"] 256 | 257 | 258 | def get_repository_workflows(repo: str) -> Dict[str, str]: 259 | """Returns list of workflows for the specified repository. 260 | Returns a dictionary that maps workflow file name, to its donwloadable URL. 261 | 262 | e.g.: crowdin-upload.curriculum.yml -> 263 | https://raw.githubusercontent.com/freeCodeCamp/freeCodeCamp/main/ 264 | .github/workflows/crowdin-upload.curriculum.yml 265 | """ 266 | 267 | headers["Authorization"] = f"Token {Config.github_token}" 268 | 269 | file_path = ".github/workflows" 270 | r = get(CONTENTS_URL.format(repo_path=repo, file_path=file_path), headers=headers) 271 | if r.status_code == 404: 272 | return {} 273 | if r.status_code == 403 and int(r.headers["X-RateLimit-Remaining"]) == 0: 274 | import time 275 | 276 | time_to_sleep = int(r.headers["X-RateLimit-Reset"]) - time.time() + 1 277 | log.error( 278 | f"[*] Ratelimit for for contents API depleted. Sleeping {time_to_sleep} seconds" 279 | ) 280 | time.sleep(time_to_sleep) 281 | return get_repository_workflows(repo) 282 | if r.status_code != 200: 283 | log.error(f"status code: {r.status_code}. Response: {r.text}") 284 | return {} 285 | 286 | # When we have a single entry, the contents API returns dict instead of list. 287 | entries = None 288 | if isinstance(r.json(), list): 289 | entries = r.json() 290 | else: 291 | entries = [r.json()] 292 | 293 | workflows = {} 294 | for entry in entries: 295 | if entry["name"].endswith((".yml", ".yaml")): 296 | workflows[entry["name"]] = entry["download_url"] 297 | 298 | return workflows 299 | 300 | 301 | def get_repository_composite_action(path: str) -> str: 302 | """Returns downloadble URL for a composite action in the specific path. 303 | 304 | receives 'path_in_repo' relative path to the repository root to where search the action.yml. 305 | It should be a directory and not a file. (if file this is a reusable workflow) 306 | 307 | Raises exception if network error occured. 308 | """ 309 | path_splitted = path.split("/") 310 | repo = "/".join(path_splitted[:2]) 311 | relative_path = "/".join(path_splitted[2:]) 312 | 313 | headers["Authorization"] = f"Token {Config.github_token}" 314 | 315 | for suffix in ["action.yml", "action.yaml"]: 316 | file_path = os.path.join(relative_path, suffix) 317 | r = get( 318 | CONTENTS_URL.format(repo_path=repo, file_path=file_path), 319 | headers=headers, 320 | ) 321 | if r.status_code == 404: 322 | # can be both yml and yaml 323 | continue 324 | 325 | if r.status_code != 200: 326 | log.error(f"status code: {r.status_code}. Response: {r.text}") 327 | continue 328 | 329 | return r.json()["download_url"] 330 | 331 | 332 | def get_repository_reusable_workflow(path: str) -> str: 333 | """Returns downlodable URL for a reusable workflows in the specific path. 334 | 335 | Raises exception if network error occured. 336 | """ 337 | path_splitted = path.split("/") 338 | repo = "/".join(path_splitted[:2]) 339 | relative_path = "/".join(path_splitted[2:]) 340 | 341 | headers["Authorization"] = f"Token {Config.github_token}" 342 | 343 | r = get( 344 | CONTENTS_URL.format(repo_path=repo, file_path=relative_path), 345 | headers=headers, 346 | ) 347 | if r.status_code == 404: 348 | return 349 | if r.status_code != 200: 350 | log.error(f"status code: {r.status_code}. Response: {r.text}") 351 | return 352 | 353 | return r.json()["download_url"] 354 | -------------------------------------------------------------------------------- /src/downloader/utils.py: -------------------------------------------------------------------------------- 1 | from src.config.config import Config 2 | from src.storage.redis_connection import RedisConnection 3 | 4 | 5 | def insert_workflow_or_action_to_redis( 6 | db: str, object_path: str, data: str, github_url: str, is_public: bool 7 | ) -> None: 8 | """ 9 | Inserts Workflow or Composite Action data and metadata to Redis as a new hash. 10 | db (str): The Redis database to use. 11 | object_path (str): The path of the object to insert. 12 | data (str): Data of the object. 13 | github_url (str): The GitHub URL associated with the object. 14 | is_public (bool): Whether the object is public or not. 15 | """ 16 | with RedisConnection(db) as redis_db: 17 | redis_db.insert_to_hash(object_path, Config.redis_data_hash_field_name, data) 18 | redis_db.insert_to_hash( 19 | object_path, 20 | Config.redis_url_hash_field_name, 21 | github_url, 22 | ) 23 | redis_db.insert_to_hash( 24 | object_path, 25 | Config.redis_is_public_hash_field_name, 26 | is_public, 27 | ) 28 | 29 | 30 | def add_ref_pointer_to_redis(uses_path: str, processed_path: str): 31 | """ 32 | Adds a reference pointer to Redis for a given raw path and its path including the commit sha of the ref. 33 | For example: 34 | actions/checkout@v4 -> actions/checkout@c533a0a4cfc4962971818edcfac47a2899e69799 35 | repo/some/workflow.yml@master -> repo/some/workflow.yml@c533a0a4cfc4962971818edcfac47a2899e69799 36 | 37 | Args: 38 | uses_path (str): The raw path to be added as a key in the Redis hash, it is the output of dependency analysis. 39 | processed_path (str): The path of the object including the commit sha of the ref 40 | """ 41 | with RedisConnection(Config.redis_objects_ops_db) as ops_db: 42 | ops_db.insert_to_hash(Config.ref_pointers_hash, uses_path, processed_path) 43 | -------------------------------------------------------------------------------- /src/indexer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/src/indexer/__init__.py -------------------------------------------------------------------------------- /src/indexer/index.py: -------------------------------------------------------------------------------- 1 | import io 2 | 3 | import yaml 4 | from yaml.constructor import Constructor 5 | 6 | from src.storage.redis_connection import RedisConnection 7 | from src.config.config import Config 8 | from src.workflow_components.workflow import Workflow 9 | from src.workflow_components.composite_action import CompositeAction 10 | from tqdm import tqdm 11 | import src.logger.log as log 12 | from src.common.utils import str_to_bool 13 | 14 | 15 | # A hack to deny PyYAML to convert "on" tags into Python boolean values. 16 | def add_bool(self, node): 17 | return self.construct_scalar(node) 18 | 19 | 20 | Constructor.add_constructor("tag:yaml.org,2002:bool", add_bool) 21 | 22 | 23 | def index_downloaded_workflows_and_actions() -> None: 24 | index_downloaded_actions() 25 | index_downloaded_workflows() 26 | 27 | 28 | def index_downloaded_actions() -> None: 29 | with RedisConnection(Config.redis_actions_db) as actions_db: 30 | actions = [a.decode() for a in actions_db.get_all_keys()] 31 | log.info(f"[*] Indexing actions...") 32 | for action in tqdm(actions, desc="Indexing actions"): 33 | index_action_file(action) 34 | 35 | 36 | def index_downloaded_workflows() -> None: 37 | with RedisConnection(Config.redis_workflows_db) as workflows_db: 38 | workflows = [w.decode() for w in workflows_db.get_all_keys()] 39 | log.info(f"[*] Indexing workflows...") 40 | for workflow in tqdm(workflows, desc="Indexing workflows"): 41 | index_workflow_file(workflow) 42 | 43 | 44 | def index_action_file(action: str) -> None: 45 | try: 46 | with RedisConnection(Config.redis_objects_ops_db) as ops_db: 47 | if ops_db.exists_in_set(Config.action_index_history_set, action): 48 | return 49 | 50 | action_full_name = ops_db.get_value_from_hash( 51 | Config.ref_pointers_hash, action 52 | ).decode() 53 | with RedisConnection(Config.redis_actions_db) as actions_db: 54 | content = actions_db.get_value_from_hash( 55 | action_full_name, Config.redis_data_hash_field_name 56 | ).decode() 57 | url = actions_db.get_value_from_hash( 58 | action_full_name, Config.redis_url_hash_field_name 59 | ).decode() 60 | is_public = str_to_bool( 61 | actions_db.get_value_from_hash( 62 | action_full_name, Config.redis_is_public_hash_field_name 63 | ).decode() 64 | ) 65 | 66 | # PyYAML has issues with tabs. 67 | content = content.replace("\t", " ") 68 | 69 | with io.StringIO() as f: 70 | f.write(content) 71 | f.seek(0) 72 | try: 73 | obj = yaml.load(f, yaml.loader.Loader) 74 | except yaml.scanner.ScannerError as e: 75 | log.error( 76 | f"[-] Failed loading: {action_full_name}. Exception: {e}. Skipping..." 77 | ) 78 | return 79 | 80 | # Could happen if the YAML is empty. 81 | if not obj: 82 | return 83 | 84 | if isinstance(obj, str): 85 | # TODO: This is a symlink. We should handle it. 86 | # Only examples at the moment are for https://github.com/edgedb/edgedb-pkg 87 | # E.g., https://github.com/edgedb/edgedb-pkg/blob/master/integration/linux/build/centos-8/action.yml 88 | log.debug(f"[-] Symlink detected: {content}. Skipping...") 89 | return 90 | 91 | obj["path"] = action_full_name 92 | obj["url"] = url 93 | obj["is_public"] = is_public 94 | 95 | Config.graph.push_object(CompositeAction.from_dict(obj)) 96 | ops_db.insert_to_set(Config.action_index_history_set, action_full_name) 97 | except Exception as e: 98 | log.error(f"[-] Error while indexing {action}. {e}") 99 | 100 | 101 | def index_workflow_file(workflow: str) -> None: 102 | try: 103 | with RedisConnection(Config.redis_objects_ops_db) as ops_db: 104 | if ops_db.exists_in_set(Config.workflow_index_history_set, workflow): 105 | return 106 | 107 | workflow_full_name = ops_db.get_value_from_hash( 108 | Config.ref_pointers_hash, workflow 109 | ).decode() 110 | 111 | with RedisConnection(Config.redis_workflows_db) as workflows_db: 112 | content = workflows_db.get_value_from_hash( 113 | workflow_full_name, Config.redis_data_hash_field_name 114 | ).decode() 115 | url = workflows_db.get_value_from_hash( 116 | workflow_full_name, Config.redis_url_hash_field_name 117 | ).decode() 118 | is_public = str_to_bool( 119 | workflows_db.get_value_from_hash( 120 | workflow_full_name, Config.redis_is_public_hash_field_name 121 | ).decode() 122 | ) 123 | 124 | # PyYAML has issues with tabs. 125 | content = content.replace("\t", " ") 126 | 127 | with io.StringIO() as f: 128 | f.write(content) 129 | f.seek(0) 130 | try: 131 | obj = yaml.load(f, yaml.loader.Loader) 132 | except yaml.scanner.ScannerError as e: 133 | log.error( 134 | f"[-] Failed loading: {workflow_full_name}. Exception: {e}. Skipping..." 135 | ) 136 | return 137 | 138 | # Could happen if the YAML is empty. 139 | if not obj: 140 | return 141 | 142 | if isinstance(obj, str): 143 | # TODO: This is a symlink. We should handle it. 144 | # Only examples at the moment are for https://github.com/edgedb/edgedb-pkg 145 | # E.g., https://github.com/edgedb/edgedb-pkg/blob/master/integration/linux/build/centos-8/action.yml 146 | log.debug(f"[-] Symlink detected: {content}. Skipping...") 147 | return 148 | 149 | obj["path"] = workflow_full_name 150 | obj["url"] = url 151 | obj["is_public"] = is_public 152 | 153 | Config.graph.push_object(Workflow.from_dict(obj)) 154 | ops_db.insert_to_set(Config.workflow_index_history_set, workflow_full_name) 155 | 156 | except Exception as e: 157 | log.error(f"[-] Error while indexing {workflow}. {e}") 158 | -------------------------------------------------------------------------------- /src/logger/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/src/logger/__init__.py -------------------------------------------------------------------------------- /src/logger/log.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from typing import Any 3 | from loguru import logger 4 | 5 | logger.remove() 6 | logger.add( 7 | sys.stdout, 8 | format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {message}", 9 | colorize=True, 10 | ) 11 | 12 | 13 | def info(msg: str) -> None: 14 | logger.info(msg) 15 | 16 | 17 | def debug(msg: str) -> None: 18 | from src.config.config import Config 19 | 20 | if Config.debug: 21 | logger.debug(msg) 22 | 23 | 24 | def error(msg: str) -> None: 25 | logger.error(msg) 26 | 27 | 28 | def warning(msg: str) -> None: 29 | logger.warning(msg) 30 | 31 | 32 | def catch_exit() -> None: 33 | from src.config.config import Config 34 | 35 | if Config.github_token: 36 | print("""\n[x] Index results with: raven index""") 37 | 38 | elif Config.neo4j_uri: 39 | neo4j_server = Config.neo4j_uri.split("//")[1].split(":")[0] 40 | print(f"""\n[x] View results at: http://{neo4j_server}:7474""") 41 | 42 | sys.exit(0) 43 | 44 | 45 | def fail_exit() -> None: 46 | sys.exit(1) 47 | 48 | 49 | def success_exit() -> None: 50 | sys.exit(0) 51 | -------------------------------------------------------------------------------- /src/queries/__init__.py: -------------------------------------------------------------------------------- 1 | from src.config.config import Config, SEVERITY_LEVELS 2 | import json 3 | from colorama import Fore, Style, init 4 | import textwrap 5 | 6 | init() 7 | 8 | 9 | class Query(object): 10 | def __init__( 11 | self, 12 | id: str, 13 | name: str, 14 | description: str, 15 | tags: list, 16 | severity: str, 17 | query: list, 18 | ) -> None: 19 | self.id = id 20 | self.name = name 21 | self.description = description 22 | self.tags = tags 23 | self.severity = severity 24 | self.query = query 25 | self.result = None 26 | 27 | def filter(self) -> bool: 28 | return ( 29 | self.filter_queries_by_tags() 30 | and self.filter_queries_by_severity() 31 | and self.filter_queries_by_query_id() 32 | ) 33 | 34 | def filter_queries_by_severity(self): 35 | severity_level = SEVERITY_LEVELS.get(Config.severity, 0) 36 | severity_levels = [ 37 | severity 38 | for severity, level in SEVERITY_LEVELS.items() 39 | if level >= severity_level 40 | ] 41 | 42 | return self.severity in severity_levels 43 | 44 | def filter_queries_by_tags(self): 45 | if not Config.tags: 46 | # If no tags has been given, return all detections 47 | return True 48 | 49 | for tag in self.tags: 50 | # If this detection tag is matching the 51 | # supplied tags 52 | if tag in Config.tags: 53 | return True 54 | 55 | # If no detections found with the input tags 56 | # skip this detection 57 | return False 58 | 59 | def filter_queries_by_query_id(self): 60 | if not Config.query_ids: 61 | return True 62 | 63 | if self.id in Config.query_ids: 64 | return True 65 | 66 | return False 67 | 68 | def run(self) -> list: 69 | """ 70 | Will run the cypher code with the given query. 71 | and will return the matching workflow paths 72 | """ 73 | result = Config.graph.run_query(self.query) 74 | self.result = [dict(record).get("url") for record in result] 75 | 76 | def to_raw(self) -> str: 77 | report = "" 78 | description_length = 80 79 | 80 | report += f"{Fore.CYAN}Name:{Style.RESET_ALL} {self.name}\n" 81 | report += f"{Fore.CYAN}Severity:{Style.RESET_ALL} {self.severity}\n" 82 | 83 | wrapped_description = textwrap.fill(self.description, width=description_length) 84 | report += f"{Fore.CYAN}Description:{Style.RESET_ALL} {wrapped_description}\n" 85 | report += f"{Fore.CYAN}Tags:{Style.RESET_ALL} {self.tags}\n" 86 | 87 | report += f"{Fore.CYAN}Workflow URLS:{Style.RESET_ALL}\n" 88 | for url in self.result: 89 | report += f"- {url}\n" 90 | 91 | return report 92 | 93 | def to_json(self) -> str: 94 | return self._to_dict() 95 | 96 | def _to_dict(self) -> dict: 97 | return { 98 | "id": self.id, 99 | "name": self.name, 100 | "description": self.description, 101 | "tags": self.tags, 102 | "severity": self.severity, 103 | "result": self.result, 104 | } 105 | -------------------------------------------------------------------------------- /src/reporter/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/src/reporter/__init__.py -------------------------------------------------------------------------------- /src/reporter/report.py: -------------------------------------------------------------------------------- 1 | from src.config.config import ( 2 | Config, 3 | REPORT_RAW_FORMAT, 4 | REPORT_JSON_FORMAT, 5 | SLACK_REPORTER, 6 | ) 7 | from src.reporter import slack_reporter 8 | from src.logger.log import success_exit 9 | from os import listdir 10 | from os.path import join 11 | import yaml 12 | import json 13 | from src.queries import Query 14 | from typing import List 15 | 16 | 17 | def raw_reporter(queries: List[Query]) -> str: 18 | report = "\n" 19 | 20 | for query in queries: 21 | report += f"{query.to_raw()}\n" 22 | 23 | return report 24 | 25 | 26 | def json_reporter(queries: List[Query]) -> str: 27 | return json.dumps([query.to_json() for query in queries], indent=4) 28 | 29 | 30 | def get_queries() -> List[Query]: 31 | queries = [] 32 | for query_file in listdir(Config.queries_path): 33 | with open(join(Config.queries_path, query_file), "r") as raw_query: 34 | yml_query = yaml.safe_load(raw_query) 35 | detection_info = yml_query.get("info") 36 | 37 | query = Query( 38 | id=yml_query.get("id"), 39 | name=detection_info.get("name"), 40 | description=detection_info.get("description"), 41 | tags=detection_info.get("tags"), 42 | severity=detection_info.get("severity"), 43 | query=yml_query.get("query"), 44 | ) 45 | 46 | if query.filter(): 47 | queries.append(query) 48 | 49 | return queries 50 | 51 | 52 | def generate() -> None: 53 | queries = get_queries() 54 | for query in queries: 55 | query.run() 56 | 57 | filtered_queries = [query for query in queries if query.result] 58 | report = "" 59 | if Config.format == REPORT_RAW_FORMAT: 60 | report = raw_reporter(filtered_queries) 61 | elif Config.format == REPORT_JSON_FORMAT: 62 | report = json_reporter(filtered_queries) 63 | 64 | if Config.reporter == SLACK_REPORTER: 65 | if Config.slack_token and Config.channel_id: 66 | client = slack_reporter.Client(Config.slack_token) 67 | message = f"\n{report}\n" 68 | client.send_report(Config.channel_id, message) 69 | 70 | else: 71 | print( 72 | "[x] Please provide slack token and channel id to send report to slack." 73 | ) 74 | 75 | else: 76 | print(report) 77 | 78 | success_exit() 79 | -------------------------------------------------------------------------------- /src/reporter/slack_reporter.py: -------------------------------------------------------------------------------- 1 | from slack_sdk import WebClient 2 | from slack_sdk.errors import SlackApiError 3 | 4 | 5 | class Client(object): 6 | def __init__(self, token) -> None: 7 | self.client = WebClient(token=token) 8 | 9 | def send_report(self, channel_id, message): 10 | try: 11 | self.client.files_upload_v2( 12 | channel=channel_id, 13 | filename=f"raven_report", 14 | content=message, 15 | initial_comment="RAVEN Security Report", 16 | ) 17 | print(f"[x] Report sent successfully") 18 | 19 | except SlackApiError as e: 20 | print(f"[x] Failed to send report: {e.response['error']}") 21 | -------------------------------------------------------------------------------- /src/storage/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/src/storage/__init__.py -------------------------------------------------------------------------------- /src/storage/neo4j_graph.py: -------------------------------------------------------------------------------- 1 | from py2neo import Graph 2 | from py2neo.ogm import GraphObject 3 | from py2neo.data import Node 4 | from typing import List, Tuple, Optional 5 | import src.logger.log as log 6 | 7 | 8 | class GraphDb(object): 9 | def __init__(self, uri, user, password): 10 | self.graph = Graph(uri, auth=(user, password)) 11 | 12 | def is_graph_empty(self) -> bool: 13 | query = "MATCH (n) RETURN COUNT(n) as count" 14 | return self.graph.run(query).data()[0]["count"] == 0 15 | 16 | def push_object(self, obj: GraphObject): 17 | self.graph.merge(obj) 18 | 19 | def get_object(self, obj: GraphObject) -> Optional[GraphObject]: 20 | """Tries to find an object in the graph. 21 | Returns None if wasn't found. 22 | """ 23 | matched_obj = obj.__class__.match(self.graph, obj._id) 24 | if not matched_obj.exists(): 25 | return None 26 | else: 27 | return matched_obj.first() 28 | 29 | def get_or_create(self, obj: GraphObject) -> Tuple[GraphObject, bool]: 30 | """Tries to find a similar object using given object _id. 31 | If found one, returns it, together with True value. 32 | If not found, inserting the object given, and returns it with False value. 33 | """ 34 | matched_obj = obj.__class__.match(self.graph, obj._id) 35 | if not matched_obj.exists(): 36 | log.warning( 37 | f"WARNING: We didn't found object {obj._id} of type {obj.__class__.__name__}, so we created it." 38 | ) 39 | self.graph.push(obj) 40 | return obj, False 41 | else: 42 | return matched_obj.first(), True 43 | 44 | def get_all_nodes(self, node_type: str) -> List[Node]: 45 | """ 46 | Returns all nodeTypes nodes in the graph. 47 | """ 48 | return list(self.graph.nodes.match(node_type)) 49 | 50 | def clean_graph(self): 51 | self.graph.delete_all() 52 | 53 | def run_query(self, query: str) -> List[Node]: 54 | return list(self.graph.run(query)) 55 | -------------------------------------------------------------------------------- /src/storage/neo4j_utils.py: -------------------------------------------------------------------------------- 1 | from src.config.config import Config 2 | 3 | 4 | def clean_graph(): 5 | Config.graph.clean_graph() 6 | -------------------------------------------------------------------------------- /src/storage/redis_connection.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import redis 4 | from src.config.config import Config 5 | import src.logger.log as log 6 | 7 | 8 | class RedisConnection: 9 | def __init__(self, redis_db): 10 | self.redis_client = None 11 | self.redis_host = Config.redis_host 12 | self.redis_port = Config.redis_port 13 | self.redis_db = redis_db 14 | 15 | def __enter__(self) -> RedisConnection: 16 | try: 17 | self.redis_client = redis.Redis( 18 | host=self.redis_host, port=self.redis_port, db=self.redis_db 19 | ) 20 | except Exception as err: 21 | log.error(f"Failed to connect to Redis: {err}") 22 | 23 | return self 24 | 25 | def __exit__(self, exc_type, exc_value, traceback): 26 | if self.redis_client: 27 | self.redis_client.close() 28 | 29 | ## Hash functions 30 | def insert_to_hash(self, hash: str, field: str, value: str) -> None: 31 | try: 32 | self.redis_client.hset(hash, field, value) 33 | except redis.exceptions.ResponseError as e: 34 | log.error(f"Failed to set value: {e}") 35 | 36 | def get_value_from_hash(self, key: str, field: str) -> str: 37 | return self.redis_client.hget(key, field) 38 | 39 | ## String functions 40 | def insert_to_string(self, key: str, value: str) -> None: 41 | try: 42 | self.redis_client.set(key, value) 43 | except redis.exceptions.ResponseError as e: 44 | log.error(f"Failed to set value: {e}") 45 | 46 | def get_string(self, key: str) -> str: 47 | return self.redis_client.get(key) 48 | 49 | ## Set functions 50 | def insert_to_set(self, set: str, value: str) -> str: 51 | try: 52 | self.redis_client.sadd(set, value) 53 | except redis.exceptions.ResponseError as e: 54 | log.error(f"Failed to set value: {e}") 55 | 56 | def exists_in_set(self, set: str, value: str) -> bool: 57 | return bool(self.redis_client.sismember(set, value)) 58 | 59 | def get_set_length(self, set: str) -> int: 60 | return self.redis_client.scard(set) 61 | 62 | def get_set_values(self, set: str) -> set: 63 | return self.redis_client.smembers(set) 64 | 65 | ## General DB functions 66 | def delete_key(self, key: str) -> None: 67 | self.redis_client.delete(key) 68 | 69 | def flush_db(self) -> None: 70 | self.redis_client.flushdb() 71 | 72 | def get_all_keys(self) -> list: 73 | return self.redis_client.keys() 74 | -------------------------------------------------------------------------------- /src/storage/redis_utils.py: -------------------------------------------------------------------------------- 1 | from src.storage.redis_connection import RedisConnection 2 | from src.config.config import Config 3 | 4 | 5 | def clean_redis_db() -> None: 6 | # Flush all databases 7 | flush_db(Config.redis_objects_ops_db) 8 | flush_db(Config.redis_actions_db) 9 | flush_db(Config.redis_workflows_db) 10 | 11 | 12 | def clean_index() -> None: 13 | with RedisConnection(Config.redis_objects_ops_db) as ops_db: 14 | ops_db.delete_key(Config.workflow_index_history_set) 15 | ops_db.delete_key(Config.action_index_history_set) 16 | 17 | 18 | def flush_db(db_number) -> None: 19 | with RedisConnection(db_number) as db: 20 | db.flush_db() 21 | -------------------------------------------------------------------------------- /src/workflow_components/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/src/workflow_components/__init__.py -------------------------------------------------------------------------------- /src/workflow_components/composite_action.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from hashlib import md5 3 | 4 | from py2neo.ogm import GraphObject, RelatedTo, Property 5 | 6 | import src.workflow_components.workflow as workflow 7 | from src.config.config import Config 8 | from src.common.utils import ( 9 | get_dependencies_in_code, 10 | convert_dict_to_list, 11 | raw_str_to_bool, 12 | ) 13 | from src.workflow_components.dependency import UsesString, UsesStringType 14 | 15 | 16 | def get_or_create_composite_action(path: str) -> "CompositeAction": 17 | """Used when need to create relations with another action. 18 | If action wasn't indexed yet, we create a stub node, 19 | that will be enriched eventually. 20 | """ 21 | ca = CompositeAction(None, path) 22 | obj = Config.graph.get_object(ca) 23 | if not obj: 24 | # This is a legitimate behavior. 25 | # Once the action will be indexed, the node will be enriched. 26 | Config.graph.push_object(ca) 27 | obj = ca 28 | return obj 29 | 30 | 31 | class CompositeActionInput(GraphObject): 32 | __primarykey__ = "_id" 33 | 34 | _id = Property() 35 | name = Property() 36 | default = Property() 37 | description = Property() 38 | required = Property() 39 | url = Property() 40 | path = Property() 41 | 42 | def __init__(self, _id: str, path: str): 43 | self._id = _id 44 | self.path = path 45 | 46 | @staticmethod 47 | def from_dict(obj_dict) -> "CompositeActionInput": 48 | i = CompositeActionInput( 49 | _id=obj_dict["_id"], 50 | path=obj_dict["path"], 51 | ) 52 | 53 | i.name = obj_dict["name"] 54 | i.url = obj_dict["url"] 55 | 56 | if "default" in obj_dict: 57 | i.default = obj_dict["default"] 58 | 59 | if "description" in obj_dict: 60 | i.description = obj_dict["description"] 61 | 62 | i.required = raw_str_to_bool(obj_dict.get("required", "false")) 63 | 64 | return i 65 | 66 | 67 | class CompositeActionStep(GraphObject): 68 | __primarykey__ = "_id" 69 | 70 | _id = Property() 71 | name = Property() 72 | path = Property() 73 | run = Property() 74 | uses = Property() 75 | ref = Property() 76 | shell = Property() 77 | url = Property() 78 | with_prop = Property("with") 79 | 80 | action = RelatedTo("CompositeAction") 81 | using_param = RelatedTo(workflow.StepCodeDependency) 82 | 83 | def __init__(self, _id: str, path: str): 84 | self._id = _id 85 | self.path = path 86 | 87 | @staticmethod 88 | def from_dict(obj_dict) -> "CompositeActionStep": 89 | s = CompositeActionStep(_id=obj_dict["_id"], path=obj_dict["path"]) 90 | s.url = obj_dict["url"] 91 | if "id" in obj_dict: 92 | s.name = obj_dict["id"] 93 | if "run" in obj_dict: 94 | s.run = obj_dict["run"] 95 | 96 | # Adding ${{...}} dependencies as an entity. 97 | for code_dependency in get_dependencies_in_code(s.run): 98 | param = workflow.StepCodeDependency(code_dependency, s.path) 99 | param.url = s.url 100 | s.using_param.add(param) 101 | 102 | if "shell" in obj_dict: 103 | s.shell = obj_dict["shell"] 104 | elif "uses" in obj_dict: 105 | s.uses = obj_dict["uses"] 106 | # Uses string is quite complex, and may reference to several types of nodes. 107 | # In the case of action steps, it may only reference actions (and not reusable workflows). 108 | uses_string_obj = UsesString.analyze(uses_string=s.uses) 109 | if uses_string_obj.type == UsesStringType.ACTION: 110 | obj = get_or_create_composite_action( 111 | uses_string_obj.get_full_path(s.path) 112 | ) 113 | s.action.add(obj) 114 | 115 | if "with" in obj_dict: 116 | s.with_prop = convert_dict_to_list(obj_dict["with"]) 117 | 118 | if len(s.uses.split("@")) > 1: 119 | s.ref = s.uses.split("@")[1] 120 | 121 | return s 122 | 123 | 124 | class CompositeAction(GraphObject): 125 | __primarykey__ = "_id" 126 | 127 | _id = Property() 128 | name = Property() 129 | path = Property() 130 | using = Property() 131 | image = Property() 132 | url = Property() 133 | is_public = Property() 134 | 135 | composite_action_input = RelatedTo(CompositeActionInput) 136 | steps = RelatedTo(CompositeActionStep) 137 | 138 | def __init__(self, name: Optional[str], path: str): 139 | self.name = name 140 | self.path = path 141 | self._id = md5(path.encode()).hexdigest() 142 | 143 | @staticmethod 144 | def from_dict(obj_dict) -> "CompositeAction": 145 | ca = CompositeAction(name=obj_dict.get("name"), path=obj_dict["path"]) 146 | 147 | ca.url = obj_dict["url"] 148 | ca.is_public = obj_dict["is_public"] 149 | if "inputs" in obj_dict: 150 | for name, input in obj_dict["inputs"].items(): 151 | input["_id"] = md5(f"{ca._id}_{name}".encode()).hexdigest() 152 | input["name"] = name 153 | input["url"] = ca.url 154 | input["path"] = ca.path 155 | ca.composite_action_input.add(CompositeActionInput.from_dict(input)) 156 | 157 | if "runs" in obj_dict: 158 | d_runs = obj_dict["runs"] 159 | 160 | if "using" in d_runs: 161 | ca.using = d_runs["using"] 162 | 163 | if "image" in d_runs: 164 | ca.image = d_runs["image"] 165 | 166 | if "steps" in d_runs: 167 | for i, step in enumerate(d_runs["steps"]): 168 | step["_id"] = md5(f"{ca._id}_{i}".encode()).hexdigest() 169 | step["path"] = ca.path 170 | step["url"] = ca.url 171 | ca.steps.add(CompositeActionStep.from_dict(step)) 172 | 173 | return ca 174 | -------------------------------------------------------------------------------- /src/workflow_components/dependency.py: -------------------------------------------------------------------------------- 1 | import os 2 | from enum import Enum 3 | 4 | from src.common.utils import get_repo_name_from_path 5 | 6 | 7 | class UsesStringType(Enum): 8 | ACTION = 1 9 | REUSABLE_WORKFLOW = 2 10 | DOCKER = 3 11 | 12 | 13 | class UsesString(object): 14 | type: UsesStringType 15 | path: str # E.g., actions/checkout, ./.github/actions/action-setup 16 | ref: str # E.g., v3. Can be a branch name, tag name, or commit SHA 17 | is_relative: bool 18 | 19 | @staticmethod 20 | def analyze(uses_string: str) -> "UsesString": 21 | """Parses the uses string, and extract relevant information: 22 | - Whether path is relative or absolute 23 | - Reference type (reusable workflow/action/docker) 24 | - path and ref 25 | 26 | If analyzed path is relative, the full path should be fetched using `get_full_path`. 27 | 28 | The uses string could point to: 29 | - uses: actions/checkout@v3 (normal usage of external action) 30 | - uses: github/codeql-action/analyze@v1 (external action in a directory) 31 | - uses: ./.github/actions/action-setup (local external action pointing to action.yml) 32 | - uses: ./.github/actions/action-install (local external action pointing to a Dockerfile) 33 | - uses: ./.github/actions/build.yml (reusable workflow in local directory) 34 | - uses: octo-org/this-repo/.github/workflows/workflow-1.yml@latest (reusable workflow in other directory) 35 | - uses: docker://docker.io/library/golang:1.17.1-alpine@sha256:... (nothing to download) 36 | """ 37 | uses_string_obj = UsesString() 38 | uses_string_obj.is_relative = False 39 | 40 | uses_string_splitted = uses_string.split("@") 41 | uses_string_obj.path = uses_string_splitted[0] 42 | if len(uses_string_splitted) > 1: 43 | uses_string_obj.ref = uses_string_splitted[1] 44 | 45 | # Get rid of the irrelevant cases 46 | if uses_string_obj.path.startswith("docker://"): 47 | uses_string_obj.type = UsesStringType.DOCKER 48 | return uses_string_obj 49 | 50 | if uses_string_obj.path.endswith(".yml") or uses_string_obj.path.endswith( 51 | ".yaml" 52 | ): 53 | uses_string_obj.type = UsesStringType.REUSABLE_WORKFLOW 54 | else: 55 | uses_string_obj.type = UsesStringType.ACTION 56 | 57 | if uses_string_obj.path.startswith("./"): 58 | # local action or local reusable workflow 59 | uses_string_obj.is_relative = True 60 | return uses_string_obj 61 | 62 | # remote action or remote reusable workflow 63 | return uses_string_obj 64 | 65 | def get_full_path(self, file_path: str) -> str: 66 | """If the action or reusable workflow path is a relative path, 67 | to calculate the full path we need the current repository where is was found. 68 | """ 69 | if not self.is_relative: 70 | return self.path 71 | # We care only for the repository path, so we take the first two elements. 72 | 73 | repo = get_repo_name_from_path(file_path) 74 | # This is a trick to evaluate the path (e.g., "..", "./", etc.) 75 | return os.path.relpath(os.path.abspath(os.path.join(repo, self.path))) 76 | -------------------------------------------------------------------------------- /src/workflow_components/parsing_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any, List, Union, Optional 2 | 3 | 4 | def parse_workflow_trigger( 5 | trigger_obj: Union[str, List[str], Dict[str, Any]], 6 | ) -> List[str]: 7 | """Parse and normalize the trigger field of a workflow. 8 | Returns list of triggers. 9 | Examples for input and output: 10 | push -> ["push"] 11 | ["push"] -> ["push"] 12 | ["push", "pull_request"] -> ["push", "pull_request"] 13 | { 14 | "push": { 15 | "branches": [ 16 | "master" 17 | ] 18 | } 19 | } -> ["push"] 20 | """ 21 | if isinstance(trigger_obj, str): 22 | trigger_list = [trigger_obj] 23 | elif isinstance(trigger_obj, list): 24 | trigger_list = [] 25 | for elem in trigger_obj: 26 | if isinstance(elem, dict): 27 | trigger_list.extend(elem.keys()) 28 | else: 29 | trigger_list.append(elem) 30 | elif isinstance(trigger_obj, dict): 31 | trigger_list = list(trigger_obj.keys()) 32 | else: 33 | # Shouldn't happen. 34 | trigger_list = [] 35 | 36 | return trigger_list 37 | 38 | 39 | def parse_job_machine( 40 | runs_on_obj: Optional[Union[str, List[str], Dict[str, Any]]], 41 | ) -> Optional[List[str]]: 42 | """Parse runs-on field of a job. 43 | Examples for input and output: 44 | ubuntu-latest -> ["ubuntu-latest"] 45 | ["ubuntu-latest"] -> ["ubuntu-latest"] 46 | { 47 | "labels": [ 48 | "ubuntu-latest" 49 | ] 50 | } -> ["ubuntu-latest"] 51 | """ 52 | if isinstance(runs_on_obj, str): 53 | return [runs_on_obj] 54 | elif isinstance(runs_on_obj, list): 55 | return runs_on_obj 56 | elif isinstance(runs_on_obj, dict): 57 | return runs_on_obj["labels"] 58 | 59 | return None 60 | -------------------------------------------------------------------------------- /src/workflow_components/workflow.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Dict, Any 2 | from hashlib import md5 3 | 4 | from py2neo.ogm import GraphObject, RelatedTo, RelatedFrom, Property 5 | from src.config.config import Config 6 | from src.common.utils import ( 7 | get_dependencies_in_code, 8 | get_repo_name_from_path, 9 | convert_dict_to_list, 10 | find_workflow_by_name, 11 | raw_str_to_bool, 12 | ) 13 | from src.workflow_components.parsing_utils import ( 14 | parse_workflow_trigger, 15 | parse_job_machine, 16 | ) 17 | from src.workflow_components.dependency import UsesString, UsesStringType 18 | import src.logger.log as log 19 | 20 | 21 | def get_or_create_workflow(path: str) -> "Workflow": 22 | """Used when need to create relations with another workflow. 23 | If workflow wasn't indexed yet, we create a stub node, 24 | that will be enriched eventually. 25 | """ 26 | w = Workflow(None, path) 27 | obj = Config.graph.get_object(w) 28 | if not obj: 29 | # This is a legitimate behavior. 30 | # Once the workflow will be indexed, the node will be enriched. 31 | Config.graph.push_object(w) 32 | obj = w 33 | return obj 34 | 35 | 36 | class StepCodeDependency(GraphObject): 37 | __primarykey__ = "_id" 38 | 39 | _id = Property() 40 | param = Property() 41 | url = Property() 42 | path = Property() 43 | 44 | def __init__(self, param: str, path: str): 45 | self.param = param 46 | self.path = path 47 | self._id = md5(f"{param}_{path}".encode()).hexdigest() 48 | 49 | 50 | class Step(GraphObject): 51 | __primarykey__ = "_id" 52 | 53 | _id = Property() 54 | name = Property() 55 | path = Property() 56 | run = Property() 57 | uses = Property() 58 | ref = Property() 59 | with_prop = Property("with") 60 | url = Property() 61 | 62 | action = RelatedTo("src.workflow_components.composite_action.CompositeAction") 63 | reusable_workflow = RelatedTo("Workflow") 64 | using_param = RelatedTo("StepCodeDependency") 65 | 66 | def __init__(self, _id: str, name: Optional[str], path: str): 67 | self._id = _id 68 | self.name = name 69 | self.path = path 70 | 71 | @staticmethod 72 | def from_dict(obj_dict) -> "Step": 73 | s = Step(_id=obj_dict["_id"], name=obj_dict.get("name"), path=obj_dict["path"]) 74 | s.url = obj_dict["url"] 75 | if "run" in obj_dict: 76 | s.run = obj_dict["run"] 77 | 78 | # Adding ${{...}} dependencies as an entity. 79 | for code_dependency in get_dependencies_in_code(s.run): 80 | param = StepCodeDependency(code_dependency, s.path) 81 | param.url = s.url 82 | s.using_param.add(param) 83 | elif "uses" in obj_dict: 84 | s.uses = obj_dict["uses"] 85 | # Uses string is quite complex, and may reference to several types of nodes. 86 | # In the case of steps, it may only reference actions (and not reusable workflows). 87 | uses_string_obj = UsesString.analyze(uses_string=s.uses) 88 | if uses_string_obj.type == UsesStringType.ACTION: 89 | # Avoiding circular imports. 90 | import src.workflow_components.composite_action as composite_action 91 | 92 | obj = composite_action.get_or_create_composite_action( 93 | uses_string_obj.get_full_path(s.path) 94 | ) 95 | s.action.add(obj) 96 | 97 | if "with" in obj_dict: 98 | s.with_prop = convert_dict_to_list(obj_dict["with"]) 99 | 100 | if len(s.uses.split("@")) > 1: 101 | s.ref = s.uses.split("@")[1] 102 | return s 103 | 104 | 105 | class Job(GraphObject): 106 | __primarykey__ = "_id" 107 | 108 | _id = Property() 109 | name = Property() 110 | path = Property() 111 | machine = Property() 112 | uses = Property() 113 | ref = Property() 114 | url = Property() 115 | with_prop = Property("with") 116 | 117 | steps = RelatedTo(Step) 118 | reusable_workflow = RelatedTo("Workflow") 119 | 120 | def __init__(self, _id: str, name: str, path: str): 121 | self._id = _id 122 | self.name = name 123 | self.path = path 124 | 125 | @staticmethod 126 | def from_dict(obj_dict) -> "Job": 127 | j = Job(_id=obj_dict["_id"], name=obj_dict["name"], path=obj_dict["path"]) 128 | if "uses" in obj_dict: 129 | j.uses = obj_dict["uses"] 130 | # Uses string is quite complex, and may reference to several types of nodes. 131 | # In the case of jobs, it may only reference reusable workflows. 132 | uses_string_obj = UsesString.analyze(uses_string=j.uses) 133 | if uses_string_obj.type == UsesStringType.REUSABLE_WORKFLOW: 134 | obj = get_or_create_workflow(uses_string_obj.get_full_path(j.path)) 135 | j.reusable_workflow.add(obj) 136 | 137 | if "with" in obj_dict: 138 | j.with_prop = convert_dict_to_list(obj_dict["with"]) 139 | 140 | if len(j.uses.split("@")) > 1: 141 | j.ref = j.uses.split("@")[1] 142 | 143 | j.url = obj_dict["url"] 144 | if "steps" in obj_dict: 145 | j.machine = parse_job_machine(obj_dict.get("runs-on")) 146 | 147 | for i, step in enumerate(obj_dict["steps"]): 148 | step["_id"] = md5(f"{j._id}_{i}".encode()).hexdigest() 149 | step["path"] = j.path 150 | step["url"] = j.url 151 | j.steps.add(Step.from_dict(step)) 152 | 153 | return j 154 | 155 | 156 | class Workflow(GraphObject): 157 | __primarykey__ = "_id" 158 | 159 | _id = Property() 160 | name = Property() 161 | path = Property() 162 | trigger = Property() 163 | permissions = Property() 164 | url = Property() 165 | is_public = Property() 166 | 167 | jobs = RelatedTo(Job) 168 | triggered_by = RelatedTo("Workflow") 169 | reusable_workflow_input = RelatedTo("ReusableWorkflowInput") 170 | 171 | def __init__(self, name: Optional[str], path: str): 172 | self.name = name 173 | self.path = path 174 | self._id = md5(path.encode()).hexdigest() 175 | 176 | @staticmethod 177 | def from_dict(obj_dict: Dict[str, Any]) -> "Workflow": 178 | w = Workflow(name=obj_dict.get("name"), path=obj_dict["path"]) 179 | 180 | w.trigger = parse_workflow_trigger(obj_dict["on"]) 181 | 182 | w.url = obj_dict["url"] 183 | w.is_public = obj_dict["is_public"] 184 | 185 | # Handling special case of workflow_run 186 | # When we meet it, we want to create a special relation from the triggering workflow, 187 | # to the triggered one. 188 | # There are cases where the triggering workflow wasn't loaded yet. 189 | # In that case we creating a stub node for it, 190 | # and once we'll meet it, we'll enrich it. 191 | if "workflow_run" in w.trigger: 192 | workflow_run = obj_dict["on"]["workflow_run"] 193 | triggering_workflows = workflow_run["workflows"] 194 | types = workflow_run["types"] 195 | for workflow_name in triggering_workflows: 196 | repo = get_repo_name_from_path(w.path) 197 | w_path = find_workflow_by_name(repo, workflow_name) 198 | if w_path is None: 199 | log.debug( 200 | f"[-] Couldn't find the triggering workflow '{workflow_name}' in repository '{repo}'" 201 | ) 202 | else: 203 | w_triggering = get_or_create_workflow(w_path) 204 | w.triggered_by.add(w_triggering, types=types) 205 | 206 | # Handling special case of workflow_call 207 | # When we meet it, we want to create a special relation to inputs of the reusable workflow. 208 | # We continue to treat the workflow as a regular workflow, and not as a reusable workflow. 209 | # But the difference is that we connected the different inputs to the workflow. 210 | if "workflow_call" in w.trigger: 211 | wokrflow_call = obj_dict["on"]["workflow_call"] 212 | inputs = wokrflow_call["inputs"] 213 | for input_name, input in inputs.items(): 214 | input["_id"] = md5(f"{w._id}_{input_name}".encode()).hexdigest() 215 | input["name"] = input_name 216 | input["url"] = w.url 217 | input["path"] = w.path 218 | w.reusable_workflow_input.add(ReusableWorkflowInput.from_dict(input)) 219 | 220 | if "permissions" in obj_dict: 221 | w.permissions = convert_dict_to_list(obj_dict["permissions"]) 222 | 223 | for job_name, job in obj_dict["jobs"].items(): 224 | if not isinstance(job, dict): 225 | log.error("[-] Invalid job structure") 226 | raise Exception("Invalid job structure.") 227 | job["_id"] = md5(f"{w._id}_{job_name}".encode()).hexdigest() 228 | job["path"] = w.path 229 | job["name"] = job_name 230 | job["url"] = w.url 231 | w.jobs.add(Job.from_dict(job)) 232 | 233 | return w 234 | 235 | 236 | class ReusableWorkflowInput(GraphObject): 237 | __primarykey__ = "_id" 238 | 239 | _id = Property() 240 | name = Property() 241 | default = Property() 242 | description = Property() 243 | required = Property() 244 | path = Property() 245 | url = Property() 246 | 247 | def __init__(self, _id: str, path: str): 248 | self._id = _id 249 | self.path = path 250 | 251 | @staticmethod 252 | def from_dict(obj_dict) -> "ReusableWorkflowInput": 253 | i = ReusableWorkflowInput(_id=obj_dict["_id"], path=obj_dict["path"]) 254 | i.name = obj_dict["name"] 255 | i.url = obj_dict["url"] 256 | 257 | if "default" in obj_dict: 258 | i.default = obj_dict.get("default") 259 | 260 | if "description" in obj_dict: 261 | i.description = obj_dict.get("description") 262 | 263 | i.required = raw_str_to_bool(obj_dict.get("required", "false")) 264 | 265 | return i 266 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/tests/__init__.py -------------------------------------------------------------------------------- /tests/integration/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/tests/integration/__init__.py -------------------------------------------------------------------------------- /tests/integration/integration_consts.py: -------------------------------------------------------------------------------- 1 | ## Queries ## 2 | 3 | GET_RELATIONSHIPS_BY_PATH_QUERY = """ 4 | MATCH (s)-[r]->(e) 5 | where s.path in {paths_list} 6 | RETURN s, r, e 7 | """ 8 | 9 | GET_NODES_BY_PATH_QUERY = """ 10 | MATCH (s) 11 | where s.path in {paths_list} 12 | RETURN s 13 | """ 14 | 15 | START_NODE_INDEX = 0 16 | DEST_NODE_INDEX = 2 17 | 18 | ## Tests Configs ## 19 | TESTS_CONFIGS = [ 20 | { 21 | "test_name": "test_integration_1", 22 | "json_path": "tests/integration/structures_json/integration-1.json", 23 | "description": "Tests Integration 1's graph structure. This is a repository with a single workflow. The workflow has Jobs, Steps, and StepCodeDependency. It uses a composite action which is also in the organization. The Composite Action has Steps and StepCodeDependency. These are all the node types that we currently support.", 24 | "queries": { 25 | "nodes_query": GET_NODES_BY_PATH_QUERY, 26 | "relationships_query": GET_RELATIONSHIPS_BY_PATH_QUERY, 27 | "to_format": { 28 | "paths_list": [ 29 | "RavenIntegrationTests/Integration-1/.github/workflows/integration-workflow.yml", 30 | "RavenIntegrationTests/CompositeAction-Mock", 31 | ] 32 | }, 33 | }, 34 | }, 35 | { 36 | "test_name": "test_demo_index_repos", 37 | "json_path": "tests/integration/structures_json/demo-index.json", 38 | "description": "Tests Demo-[1-4]'s graph structures combined. These are four different repositories that have similar workflows. They all have a workflow that uses the checkout action.", 39 | "queries": { 40 | "nodes_query": GET_NODES_BY_PATH_QUERY, 41 | "relationships_query": GET_RELATIONSHIPS_BY_PATH_QUERY, 42 | "to_format": { 43 | "paths_list": [ 44 | "RavenIntegrationTests/Demo-1/.github/workflows/demo-workflow.yml", 45 | "RavenIntegrationTests/Demo-2/.github/workflows/demo-workflow.yml", 46 | "RavenIntegrationTests/Demo-3/.github/workflows/demo-workflow.yml", 47 | "RavenIntegrationTests/Demo-4/.github/workflows/demo-workflow.yml", 48 | "actions/checkout", 49 | ] 50 | }, 51 | }, 52 | }, 53 | { 54 | "test_name": "test_reusable_workflows", 55 | "json_path": "tests/integration/structures_json/reusable-workflows.json", 56 | "description": "Tests ReusableWorkflows-Mock's graph structure. This is a repository with two workflows. One of them uses the other as a reusable workflow.", 57 | "queries": { 58 | "nodes_query": GET_NODES_BY_PATH_QUERY, 59 | "relationships_query": GET_RELATIONSHIPS_BY_PATH_QUERY, 60 | "to_format": { 61 | "paths_list": [ 62 | "RavenIntegrationTests/ReusableWorkflows-Mock/.github/workflows/reuse_workflow.yml", 63 | "RavenIntegrationTests/ReusableWorkflows-Mock/.github/workflows/test.yml", 64 | ] 65 | }, 66 | }, 67 | }, 68 | ] 69 | -------------------------------------------------------------------------------- /tests/integration/structures_json/integration-1.json: -------------------------------------------------------------------------------- 1 | { 2 | "nodes": [ 3 | { 4 | "path": "RavenIntegrationTests/CompositeAction-Mock", 5 | "using": "composite", 6 | "name": "Example composite GitHub action", 7 | "is_public": true, 8 | "_id": "0cedc4763c9b12640b59748858f52ecb", 9 | "url": "https://github.com/RavenIntegrationTests/CompositeAction-Mock/tree/main/action.yml", 10 | "labels": [ 11 | "CompositeAction" 12 | ] 13 | }, 14 | { 15 | "path": "RavenIntegrationTests/CompositeAction-Mock", 16 | "default": "true", 17 | "name": "param1", 18 | "description": "Input parameter placeholder", 19 | "_id": "e0e8b6ca1b9aab6e0dd0e1fe70c88a08", 20 | "required": true, 21 | "url": "https://github.com/RavenIntegrationTests/CompositeAction-Mock/tree/main/action.yml", 22 | "labels": [ 23 | "CompositeActionInput" 24 | ] 25 | }, 26 | { 27 | "path": "RavenIntegrationTests/CompositeAction-Mock", 28 | "shell": "bash", 29 | "name": "context", 30 | "run": "echo \"action-result=${{ inputs.param1 }}\" >> $GITHUB_OUTPUT\n", 31 | "_id": "db50f26195dcd8ca600a046df26f0a3a", 32 | "url": "https://github.com/RavenIntegrationTests/CompositeAction-Mock/tree/main/action.yml", 33 | "labels": [ 34 | "CompositeActionStep" 35 | ] 36 | }, 37 | { 38 | "path": "RavenIntegrationTests/CompositeAction-Mock", 39 | "param": "inputs.param1", 40 | "_id": "004b8c21e56d8fbd8c089dfc0de3f70a", 41 | "url": "https://github.com/RavenIntegrationTests/CompositeAction-Mock/tree/main/action.yml", 42 | "labels": [ 43 | "StepCodeDependency" 44 | ] 45 | }, 46 | { 47 | "path": "RavenIntegrationTests/Integration-1/.github/workflows/integration-workflow.yml", 48 | "name": "integration_workflow", 49 | "is_public": true, 50 | "_id": "d65c066b4fe60e52c419b3e7043d297e", 51 | "trigger": [ 52 | "pull_request_target" 53 | ], 54 | "url": "https://github.com/RavenIntegrationTests/Integration-1/tree/main/.github/workflows/integration-workflow.yml", 55 | "labels": [ 56 | "Workflow" 57 | ] 58 | }, 59 | { 60 | "path": "RavenIntegrationTests/Integration-1/.github/workflows/integration-workflow.yml", 61 | "machine": [ 62 | "ubuntu-latest" 63 | ], 64 | "name": "first_job", 65 | "_id": "2007449e2ba101423871ac669de5b750", 66 | "url": "https://github.com/RavenIntegrationTests/Integration-1/tree/main/.github/workflows/integration-workflow.yml", 67 | "labels": [ 68 | "Job" 69 | ] 70 | }, 71 | { 72 | "path": "RavenIntegrationTests/Integration-1/.github/workflows/integration-workflow.yml", 73 | "ref": "v1", 74 | "uses": "RavenIntegrationTests/CompositeAction-Mock@v1", 75 | "_id": "88e6517ba8d71f0851e6f3b33ae2e51b", 76 | "url": "https://github.com/RavenIntegrationTests/Integration-1/tree/main/.github/workflows/integration-workflow.yml", 77 | "labels": [ 78 | "Step" 79 | ] 80 | }, 81 | { 82 | "path": "RavenIntegrationTests/Integration-1/.github/workflows/integration-workflow.yml", 83 | "ref": "main", 84 | "uses": "RavenIntegrationTests/CompositeAction-Mock@main", 85 | "_id": "a7957c48867f1f675ab6c9e4f1828c14", 86 | "url": "https://github.com/RavenIntegrationTests/Integration-1/tree/main/.github/workflows/integration-workflow.yml", 87 | "labels": [ 88 | "Step" 89 | ] 90 | }, 91 | { 92 | "path": "RavenIntegrationTests/Integration-1/.github/workflows/integration-workflow.yml", 93 | "name": "print_env", 94 | "run": "print_env", 95 | "_id": "2a12d8215584fab339b14da4d6a904ff", 96 | "url": "https://github.com/RavenIntegrationTests/Integration-1/tree/main/.github/workflows/integration-workflow.yml", 97 | "labels": [ 98 | "Step" 99 | ] 100 | }, 101 | { 102 | "path": "RavenIntegrationTests/Integration-1/.github/workflows/integration-workflow.yml", 103 | "machine": [ 104 | "ubuntu-latest" 105 | ], 106 | "name": "second_job", 107 | "_id": "57e4ebfad3aa1f852f256d59d7c7e982", 108 | "url": "https://github.com/RavenIntegrationTests/Integration-1/tree/main/.github/workflows/integration-workflow.yml", 109 | "labels": [ 110 | "Job" 111 | ] 112 | }, 113 | { 114 | "path": "RavenIntegrationTests/Integration-1/.github/workflows/integration-workflow.yml", 115 | "name": "print_pull_request_title", 116 | "run": "echo \"Pull request title is ${{ github.event.pull_request.title }}\"", 117 | "_id": "813206f991310b30c1405955aeefb00e", 118 | "url": "https://github.com/RavenIntegrationTests/Integration-1/tree/main/.github/workflows/integration-workflow.yml", 119 | "labels": [ 120 | "Step" 121 | ] 122 | }, 123 | { 124 | "path": "RavenIntegrationTests/Integration-1/.github/workflows/integration-workflow.yml", 125 | "param": "github.event.pull_request.title", 126 | "_id": "62cded2b531643f4d784c4e2e5c614d1", 127 | "url": "https://github.com/RavenIntegrationTests/Integration-1/tree/main/.github/workflows/integration-workflow.yml", 128 | "labels": [ 129 | "StepCodeDependency" 130 | ] 131 | } 132 | ], 133 | "relationships": [ 134 | { 135 | "start_node": "0cedc4763c9b12640b59748858f52ecb", 136 | "type": "COMPOSITE_ACTION_INPUT", 137 | "end_node": "e0e8b6ca1b9aab6e0dd0e1fe70c88a08" 138 | }, 139 | { 140 | "start_node": "db50f26195dcd8ca600a046df26f0a3a", 141 | "type": "USING_PARAM", 142 | "end_node": "004b8c21e56d8fbd8c089dfc0de3f70a" 143 | }, 144 | { 145 | "start_node": "0cedc4763c9b12640b59748858f52ecb", 146 | "type": "STEPS", 147 | "end_node": "db50f26195dcd8ca600a046df26f0a3a" 148 | }, 149 | { 150 | "start_node": "88e6517ba8d71f0851e6f3b33ae2e51b", 151 | "type": "ACTION", 152 | "end_node": "0cedc4763c9b12640b59748858f52ecb" 153 | }, 154 | { 155 | "start_node": "a7957c48867f1f675ab6c9e4f1828c14", 156 | "type": "ACTION", 157 | "end_node": "0cedc4763c9b12640b59748858f52ecb" 158 | }, 159 | { 160 | "start_node": "2007449e2ba101423871ac669de5b750", 161 | "type": "STEPS", 162 | "end_node": "88e6517ba8d71f0851e6f3b33ae2e51b" 163 | }, 164 | { 165 | "start_node": "2007449e2ba101423871ac669de5b750", 166 | "type": "STEPS", 167 | "end_node": "a7957c48867f1f675ab6c9e4f1828c14" 168 | }, 169 | { 170 | "start_node": "2007449e2ba101423871ac669de5b750", 171 | "type": "STEPS", 172 | "end_node": "2a12d8215584fab339b14da4d6a904ff" 173 | }, 174 | { 175 | "start_node": "813206f991310b30c1405955aeefb00e", 176 | "type": "USING_PARAM", 177 | "end_node": "62cded2b531643f4d784c4e2e5c614d1" 178 | }, 179 | { 180 | "start_node": "57e4ebfad3aa1f852f256d59d7c7e982", 181 | "type": "STEPS", 182 | "end_node": "813206f991310b30c1405955aeefb00e" 183 | }, 184 | { 185 | "start_node": "d65c066b4fe60e52c419b3e7043d297e", 186 | "type": "JOBS", 187 | "end_node": "2007449e2ba101423871ac669de5b750" 188 | }, 189 | { 190 | "start_node": "d65c066b4fe60e52c419b3e7043d297e", 191 | "type": "JOBS", 192 | "end_node": "57e4ebfad3aa1f852f256d59d7c7e982" 193 | } 194 | ] 195 | } -------------------------------------------------------------------------------- /tests/integration/structures_json/reusable-workflows.json: -------------------------------------------------------------------------------- 1 | { 2 | "nodes": [ 3 | { 4 | "path": "RavenIntegrationTests/ReusableWorkflows-Mock/.github/workflows/reuse_workflow.yml", 5 | "is_public": true, 6 | "name": "reusable_workflow", 7 | "trigger": [ 8 | "workflow_call" 9 | ], 10 | "_id": "ff1b0c2b61a25d227707be99c3901303", 11 | "url": "https://github.com/RavenIntegrationTests/ReusableWorkflows-Mock/tree/main/.github/workflows/reuse_workflow.yml", 12 | "labels": [ 13 | "Workflow" 14 | ] 15 | }, 16 | { 17 | "path": "RavenIntegrationTests/ReusableWorkflows-Mock/.github/workflows/reuse_workflow.yml", 18 | "default": "input_1_defult", 19 | "name": "input_1", 20 | "_id": "aaaaf3ef437b55388bec93f26c8f9c44", 21 | "url": "https://github.com/RavenIntegrationTests/ReusableWorkflows-Mock/tree/main/.github/workflows/reuse_workflow.yml", 22 | "required": true, 23 | "labels": [ 24 | "ReusableWorkflowInput" 25 | ] 26 | }, 27 | { 28 | "path": "RavenIntegrationTests/ReusableWorkflows-Mock/.github/workflows/reuse_workflow.yml", 29 | "default": 1, 30 | "name": "input_2", 31 | "_id": "3f81ef5510e05f61bfa26a950ebd2c3d", 32 | "required": false, 33 | "url": "https://github.com/RavenIntegrationTests/ReusableWorkflows-Mock/tree/main/.github/workflows/reuse_workflow.yml", 34 | "labels": [ 35 | "ReusableWorkflowInput" 36 | ] 37 | }, 38 | { 39 | "path": "RavenIntegrationTests/ReusableWorkflows-Mock/.github/workflows/reuse_workflow.yml", 40 | "default": "true", 41 | "name": "input_3", 42 | "_id": "2fe112d8d392c700041b353c90db4edf", 43 | "required": false, 44 | "url": "https://github.com/RavenIntegrationTests/ReusableWorkflows-Mock/tree/main/.github/workflows/reuse_workflow.yml", 45 | "labels": [ 46 | "ReusableWorkflowInput" 47 | ] 48 | }, 49 | { 50 | "path": "RavenIntegrationTests/ReusableWorkflows-Mock/.github/workflows/reuse_workflow.yml", 51 | "machine": [ 52 | "ubuntu-latest" 53 | ], 54 | "name": "test", 55 | "_id": "292255a5b241802f89614333a7a13539", 56 | "url": "https://github.com/RavenIntegrationTests/ReusableWorkflows-Mock/tree/main/.github/workflows/reuse_workflow.yml", 57 | "labels": [ 58 | "Job" 59 | ] 60 | }, 61 | { 62 | "path": "RavenIntegrationTests/ReusableWorkflows-Mock/.github/workflows/reuse_workflow.yml", 63 | "ref": "v4", 64 | "name": "Checkout", 65 | "uses": "actions/checkout@v4", 66 | "_id": "3cae77d1d794cf1a5dd88b4e2e38bd22", 67 | "url": "https://github.com/RavenIntegrationTests/ReusableWorkflows-Mock/tree/main/.github/workflows/reuse_workflow.yml", 68 | "labels": [ 69 | "Step" 70 | ] 71 | }, 72 | { 73 | "path": "RavenIntegrationTests/ReusableWorkflows-Mock/.github/workflows/reuse_workflow.yml", 74 | "name": "Print Input", 75 | "run": "echo \"${{ inputs.input_1 }}\"", 76 | "_id": "7f294647090543c69b742cb12d98bd00", 77 | "url": "https://github.com/RavenIntegrationTests/ReusableWorkflows-Mock/tree/main/.github/workflows/reuse_workflow.yml", 78 | "labels": [ 79 | "Step" 80 | ] 81 | }, 82 | { 83 | "path": "RavenIntegrationTests/ReusableWorkflows-Mock/.github/workflows/reuse_workflow.yml", 84 | "param": "inputs.input_1", 85 | "_id": "92b692254230795df97f94ad6e6243a7", 86 | "url": "https://github.com/RavenIntegrationTests/ReusableWorkflows-Mock/tree/main/.github/workflows/reuse_workflow.yml", 87 | "labels": [ 88 | "StepCodeDependency" 89 | ] 90 | } 91 | ], 92 | "relationships": [ 93 | { 94 | "start_node": "ff1b0c2b61a25d227707be99c3901303", 95 | "type": "REUSABLE_WORKFLOW_INPUT", 96 | "end_node": "aaaaf3ef437b55388bec93f26c8f9c44" 97 | }, 98 | { 99 | "start_node": "ff1b0c2b61a25d227707be99c3901303", 100 | "type": "REUSABLE_WORKFLOW_INPUT", 101 | "end_node": "3f81ef5510e05f61bfa26a950ebd2c3d" 102 | }, 103 | { 104 | "start_node": "ff1b0c2b61a25d227707be99c3901303", 105 | "type": "REUSABLE_WORKFLOW_INPUT", 106 | "end_node": "2fe112d8d392c700041b353c90db4edf" 107 | }, 108 | { 109 | "start_node": "3cae77d1d794cf1a5dd88b4e2e38bd22", 110 | "type": "ACTION", 111 | "end_node": "d35e7df441120da9624b8c11e36151be" 112 | }, 113 | { 114 | "start_node": "7f294647090543c69b742cb12d98bd00", 115 | "type": "USING_PARAM", 116 | "end_node": "92b692254230795df97f94ad6e6243a7" 117 | }, 118 | { 119 | "start_node": "292255a5b241802f89614333a7a13539", 120 | "type": "STEPS", 121 | "end_node": "3cae77d1d794cf1a5dd88b4e2e38bd22" 122 | }, 123 | { 124 | "start_node": "292255a5b241802f89614333a7a13539", 125 | "type": "STEPS", 126 | "end_node": "7f294647090543c69b742cb12d98bd00" 127 | }, 128 | { 129 | "start_node": "ff1b0c2b61a25d227707be99c3901303", 130 | "type": "JOBS", 131 | "end_node": "292255a5b241802f89614333a7a13539" 132 | } 133 | ] 134 | } -------------------------------------------------------------------------------- /tests/integration/test_graph_structures.py: -------------------------------------------------------------------------------- 1 | from colorama import Fore, Style 2 | from tests.utils import ( 3 | get_graph_structure, 4 | assert_graph_structures, 5 | ) 6 | from tests.integration.integration_consts import TESTS_CONFIGS 7 | from tests.tests_init import init_integration_env 8 | 9 | 10 | def test_graph_structure() -> None: 11 | """ 12 | Tests the graph structure of the integration tests. 13 | It will loop over each test config dictionary on TESTS_CONFIGS list and assert the graph structure is as expected. 14 | """ 15 | init_integration_env() 16 | for test_config in TESTS_CONFIGS: 17 | print( 18 | f"{Fore.CYAN}Running integration test: {test_config['test_name']}.{Style.RESET_ALL}" 19 | ) 20 | 21 | # Get the queries from the test config 22 | query_config = test_config["queries"] 23 | nodes_query = query_config["nodes_query"].format(**query_config["to_format"]) 24 | relationships_query = query_config["relationships_query"].format( 25 | **query_config["to_format"] 26 | ) 27 | 28 | # Get the graph structure from the queries and assert it 29 | graph_structure = get_graph_structure(nodes_query, relationships_query) 30 | assert_graph_structures(graph_structure, test_config["json_path"]) 31 | -------------------------------------------------------------------------------- /tests/tests_init.py: -------------------------------------------------------------------------------- 1 | from os import getenv 2 | from src.config.config import load_downloader_config, load_indexer_config 3 | from src.downloader.download import download_account_workflows_and_actions 4 | from src.indexer.index import index_downloaded_workflows_and_actions 5 | 6 | 7 | def init_integration_env(): 8 | load_integration_tests_config() 9 | download_account_workflows_and_actions() 10 | index_downloaded_workflows_and_actions() 11 | 12 | 13 | def load_integration_tests_config() -> None: 14 | load_downloader_config( 15 | { 16 | "debug": False, 17 | "token": getenv("GITHUB_TOKEN"), 18 | "account_name": ["RavenIntegrationTests"], 19 | "redis_host": "raven-redis-test", 20 | "redis_port": 6379, 21 | "clean_redis": True, 22 | } 23 | ) 24 | 25 | load_indexer_config( 26 | { 27 | "debug": False, 28 | "redis_host": "raven-redis-test", 29 | "redis_port": 6379, 30 | "clean_redis": True, 31 | "neo4j_uri": "neo4j://raven-neo4j-test:7687", 32 | "neo4j_user": "neo4j", 33 | "neo4j_pass": "123456789", 34 | "threads": 1, 35 | "clean_neo4j": True, 36 | } 37 | ) 38 | -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/tests/unit/__init__.py -------------------------------------------------------------------------------- /tests/unit/test_composite_action.py: -------------------------------------------------------------------------------- 1 | import src.workflow_components.composite_action as composite_action 2 | from tests.utils import load_test_config, assert_action_inputs 3 | 4 | load_test_config() 5 | 6 | 7 | def test_composite_action_from_dist_node(): 8 | ca_d = { 9 | "name": "Create Issue From File", 10 | "description": "An action to create an issue using content from a file", 11 | "inputs": { 12 | "token": { 13 | "description": "The GitHub authentication token", 14 | "default": "${{ github.token }}", 15 | "required": True, 16 | }, 17 | "repository": { 18 | "description": "The target GitHub repository", 19 | "default": "${{ github.repository }}", 20 | }, 21 | "issue-number": { 22 | "description": "The issue number of an existing issue to update" 23 | }, 24 | "title": {"description": "The title of the issue", "required": "true"}, 25 | "content-filepath": {"description": "The file path to the issue content"}, 26 | "labels": {"description": "A comma or newline-separated list of labels"}, 27 | "assignees": { 28 | "description": "A comma or newline-separated list of assignees (GitHub usernames)" 29 | }, 30 | }, 31 | "outputs": {"issue-number": {"description": "The number of the created issue"}}, 32 | "runs": {"using": "node16", "main": "dist/index.js"}, 33 | "branding": {"icon": "alert-circle", "color": "orange"}, 34 | "path": "data/actions/peter-evans|create-issue-from-file|action.yml", 35 | "url": "https://github.com/CycodeLabs/Raven/pull/1", 36 | "is_public": True, 37 | } 38 | 39 | ca = composite_action.CompositeAction.from_dict(ca_d) 40 | 41 | assert ca.name == ca_d["name"] 42 | assert ca.path == ca_d["path"] 43 | assert ca.using == "node16" 44 | assert ca.url == ca_d["url"] 45 | assert ca.is_public == ca_d["is_public"] 46 | assert ca.image is None 47 | assert len(ca.steps) == 0 48 | 49 | assert_action_inputs(ca, ca_d) 50 | 51 | 52 | def test_composite_action_from_dict_dockerfile(): 53 | ca_d = { 54 | "name": "Automatic Rebase", 55 | "description": "Automatically rebases PR on '/rebase' comment", 56 | "maintainer": "Cirrus Labs", 57 | "runs": {"using": "docker", "image": "Dockerfile"}, 58 | "inputs": { 59 | "autosquash": { 60 | "description": "Should the rebase autosquash fixup and squash commits", 61 | "required": "false", 62 | "default": "false", 63 | } 64 | }, 65 | "branding": {"icon": "git-pull-request", "color": "purple"}, 66 | "path": "data/actions/cirrus-actions|rebase|action.yml", 67 | "url": "https://github.com/CycodeLabs/Raven/pull/1", 68 | "is_public": True, 69 | } 70 | 71 | ca = composite_action.CompositeAction.from_dict(ca_d) 72 | 73 | assert ca.name == ca_d["name"] 74 | assert ca.path == ca_d["path"] 75 | assert ca.using == "docker" 76 | assert ca.image == "Dockerfile" 77 | assert ca.url == ca_d["url"] 78 | assert ca.is_public == ca_d["is_public"] 79 | assert len(ca.steps) == 0 80 | 81 | assert_action_inputs(ca, ca_d) 82 | 83 | 84 | def test_composite_action_from_dict_image(): 85 | ca_d = { 86 | "name": "Image Actions", 87 | "author": "Calibre", 88 | "description": "Compresses Images for the Web", 89 | "inputs": { 90 | "githubToken": {"description": "GitHub Token", "required": "true"}, 91 | }, 92 | "outputs": { 93 | "markdown": { 94 | "description": "Output param used to store the Markdown summary for subsequent actions to use" 95 | } 96 | }, 97 | "runs": { 98 | "using": "docker", 99 | "image": "docker://ghcr.io/calibreapp/image-actions/image-actions:main", 100 | }, 101 | "branding": {"icon": "image", "color": "green"}, 102 | "path": "data/actions/calibreapp|image-actions|action.yml", 103 | "url": "https://github.com/CycodeLabs/Raven/pull/1", 104 | "is_public": True, 105 | } 106 | 107 | ca = composite_action.CompositeAction.from_dict(ca_d) 108 | 109 | assert ca.name == ca_d["name"] 110 | assert ca.path == ca_d["path"] 111 | assert ca.using == "docker" 112 | assert ca.url == ca_d["url"] 113 | assert ca.is_public == ca_d["is_public"] 114 | assert ca.image == "docker://ghcr.io/calibreapp/image-actions/image-actions:main" 115 | assert len(ca.steps) == 0 116 | 117 | assert_action_inputs(ca, ca_d) 118 | 119 | 120 | def test_composite_action_from_dict_steps(): 121 | ca_d = { 122 | "name": "Install development tools", 123 | "description": "GitHub Action for installing development tools", 124 | "inputs": { 125 | "tool": { 126 | "description": "Tools to install (comma-separated list)", 127 | "required": "true", 128 | }, 129 | "checksum": { 130 | "description": "Whether to enable checksums", 131 | "required": "false", 132 | "default": "true", 133 | }, 134 | }, 135 | "runs": { 136 | "using": "composite", 137 | "steps": [ 138 | { 139 | "run": 'bash --noprofile --norc "${GITHUB_ACTION_PATH:?}/main.sh"', 140 | "shell": "bash", 141 | "env": { 142 | "INPUT_TOOL": "${{ inputs.tool }}", 143 | "INPUT_CHECKSUM": "${{ inputs.checksum }}", 144 | }, 145 | } 146 | ], 147 | }, 148 | "path": "data/actions/taiki-e|install-action|action.yml", 149 | "url": "https://github.com/CycodeLabs/Raven/pull/1", 150 | "is_public": True, 151 | } 152 | 153 | ca = composite_action.CompositeAction.from_dict(ca_d) 154 | 155 | assert ca.name == ca_d["name"] 156 | assert ca.path == ca_d["path"] 157 | assert ca.using == "composite" 158 | assert ca.url == ca_d["url"] 159 | assert ca.is_public == ca_d["is_public"] 160 | assert ca.image is None 161 | assert len(ca.steps) == 1 162 | 163 | assert_action_inputs(ca, ca_d) 164 | 165 | 166 | def test_composite_action_step_from_dict_run(): 167 | step_d = { 168 | "run": ': install rustup if needed\nif ! command -v rustup &>/dev/null; then\n curl --proto \'=https\' --tlsv1.2 --retry 10 --retry-connrefused --location --silent --show-error --fail "https://sh.rustup.rs" | sh -s -- --default-toolchain none -y\n echo "${CARGO_HOME:-$HOME/.cargo}/bin" >> $GITHUB_PATH\nfi\n', 169 | "if": "runner.os != 'Windows'", 170 | "shell": "bash", 171 | "_id": "4eba12855ade10f6e8dda0456946ffa1", 172 | "path": "data/actions/dtolnay|rust-toolchain|action.yml", 173 | "url": "https://github.com/CycodeLabs/Raven/pull/1", 174 | } 175 | 176 | step = composite_action.CompositeActionStep.from_dict(step_d) 177 | 178 | assert step._id == step_d["_id"] 179 | assert step.name is None 180 | assert step.path == step_d["path"] 181 | assert step.run == step_d["run"] 182 | assert step.uses is None 183 | assert step.ref is None 184 | assert step.shell == step_d["shell"] 185 | assert step.with_prop is None 186 | assert step.url == step_d["url"] 187 | assert len(step.action) == 0 188 | assert len(step.using_param) == 0 189 | 190 | 191 | def test_composite_action_step_from_dict_run_dependency(): 192 | step_d = { 193 | "run": "${{ github.action_path }}/setup_pip.ps1", 194 | "shell": "pwsh", 195 | "env": { 196 | "PYTHON_VERSION": "${{ steps.setup.outputs.python-version }}", 197 | "SETUP_PYTHON_PATH": "${{ steps.setup.outputs.python-path }}", 198 | }, 199 | "_id": "f85b9778e35a1273d88c7dabdb210eaf", 200 | "path": "data/actions/ytdl-org|setup-python|action.yml", 201 | "url": "https://github.com/CycodeLabs/Raven/pull/1", 202 | } 203 | 204 | step = composite_action.CompositeActionStep.from_dict(step_d) 205 | 206 | assert step._id == step_d["_id"] 207 | assert step.name is None 208 | assert step.path == step_d["path"] 209 | assert step.run == step_d["run"] 210 | assert step.uses is None 211 | assert step.ref is None 212 | assert step.shell == step_d["shell"] 213 | assert step.url == step_d["url"] 214 | assert step.with_prop is None 215 | assert len(step.action) == 0 216 | assert len(step.using_param) == 1 217 | 218 | 219 | def test_composite_action_step_from_dict_using(): 220 | step_d = { 221 | "uses": "actions/setup-python@bd6b4b6205c4dbad673328db7b31b7fab9e241c0", 222 | "id": "setup", 223 | "with": { 224 | "python-version": "${{ steps.build.outputs.python-version }}", 225 | "cache": "${{ inputs.cache }}", 226 | "architecture": "${{ steps.build.outputs.architecture }}", 227 | "check-latest": "${{ inputs.check-latest }}", 228 | "token": "${{ inputs.token }}", 229 | "cache-dependency-path": "${{ inputs.cache-dependency-path }}", 230 | "update-environment": "${{ inputs.update-environment }}", 231 | }, 232 | "_id": "11e15e6b7424478c2e32fd22ed477c21", 233 | "path": "data/actions/ytdl-org|setup-python|action.yml", 234 | "url": "https://github.com/CycodeLabs/Raven/pull/1", 235 | } 236 | 237 | step = composite_action.CompositeActionStep.from_dict(step_d) 238 | assert step._id == step_d["_id"] 239 | assert step.name == step_d["id"] 240 | assert step.path == step_d["path"] 241 | assert step.run is None 242 | assert step.uses == step_d["uses"] 243 | assert step.ref == "bd6b4b6205c4dbad673328db7b31b7fab9e241c0" 244 | assert step.shell is None 245 | assert step.url == step_d["url"] 246 | assert step.with_prop == [ 247 | "python-version:${{ steps.build.outputs.python-version }}", 248 | "cache:${{ inputs.cache }}", 249 | "architecture:${{ steps.build.outputs.architecture }}", 250 | "check-latest:${{ inputs.check-latest }}", 251 | "token:${{ inputs.token }}", 252 | "cache-dependency-path:${{ inputs.cache-dependency-path }}", 253 | "update-environment:${{ inputs.update-environment }}", 254 | ] 255 | assert len(step.using_param) == 0 256 | -------------------------------------------------------------------------------- /tests/unit/test_dependency.py: -------------------------------------------------------------------------------- 1 | from tests.utils import load_test_config 2 | import src.workflow_components.dependency as dependency 3 | 4 | load_test_config() 5 | 6 | 7 | def test_uses_string_analyze(): 8 | test_cases = [ 9 | ( 10 | "actions/checkout@v2", 11 | False, 12 | "actions/checkout", 13 | ), 14 | ( 15 | "github/codeql-action/analyze@v1", 16 | False, 17 | "github/codeql-action/analyze", 18 | ), 19 | ( 20 | "./.github/actions/action-setup", 21 | True, 22 | "./.github/actions/action-setup", 23 | ), 24 | ( 25 | "./.github/actions/build.yml", 26 | True, 27 | "./.github/actions/build.yml", 28 | ), 29 | ( 30 | "octo-org/this-repo/.github/workflows/workflow-1.yml@latest", 31 | False, 32 | "octo-org/this-repo/.github/workflows/workflow-1.yml", 33 | ), 34 | ( 35 | "docker://docker.io/library/golang:1.17.1-alpine@sha256:abcd", 36 | False, 37 | "docker://docker.io/library/golang:1.17.1-alpine", 38 | ), 39 | ] 40 | 41 | for test_case in test_cases: 42 | uses_string_obj = dependency.UsesString.analyze(test_case[0]) 43 | assert ( 44 | uses_string_obj.is_relative == test_case[1] 45 | and uses_string_obj.path == test_case[2] 46 | ) 47 | -------------------------------------------------------------------------------- /tests/unit/test_parsing_utils.py: -------------------------------------------------------------------------------- 1 | from src.workflow_components.parsing_utils import ( 2 | parse_workflow_trigger, 3 | parse_job_machine, 4 | ) 5 | 6 | 7 | def test_parse_workflow_trigger(): 8 | test_cases = [ 9 | ("push", ["push"]), 10 | (["push"], ["push"]), 11 | (["push", "pull_request"], ["push", "pull_request"]), 12 | ( 13 | {"push": {"branches": ["master"]}}, 14 | ["push"], 15 | ), 16 | (None, []), 17 | ] 18 | 19 | for test_case in test_cases: 20 | assert parse_workflow_trigger(test_case[0]) == test_case[1] 21 | 22 | 23 | def test_parse_job_machine(): 24 | test_cases = [ 25 | ("ubuntu-latest", ["ubuntu-latest"]), 26 | ( 27 | {"labels": ["ubuntu-latest", "self-hosted"]}, 28 | ["ubuntu-latest", "self-hosted"], 29 | ), 30 | (["ubuntu-latest", "self-hosted"], ["ubuntu-latest", "self-hosted"]), 31 | (None, None), 32 | ] 33 | 34 | for test_case in test_cases: 35 | assert parse_job_machine(test_case[0]) == test_case[1] 36 | -------------------------------------------------------------------------------- /tests/unit/test_report.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from src.config.config import LAST_QUERY_ID, QUERIES_PATH_DEFAULT 3 | from yaml import safe_load 4 | 5 | query_dir = Path(__file__).parent.parent.parent / QUERIES_PATH_DEFAULT 6 | 7 | RQ_PREFIX = "RQ-" 8 | 9 | 10 | def test_report(): 11 | assert query_dir.exists(), f"Directory {query_dir} doesn't exist" 12 | query_files = list(query_dir.glob("query*.yml")) 13 | assert ( 14 | len(query_files) > 0 15 | ), f"Directory {query_dir} doesn't contain any query*.yml files" 16 | 17 | # get query ids from files in query_dir 18 | query_ids = [] 19 | for query_file in query_files: 20 | with open(query_file, "r") as query: 21 | parsed_query = safe_load(query) 22 | if not parsed_query: 23 | raise ValueError(f"{query_file} is not a valid query file") 24 | 25 | query_id = parsed_query.get("id") 26 | try: 27 | int(query_id.split(RQ_PREFIX)[1]) 28 | except ValueError: 29 | raise ValueError(f"Query {query_file} has invalid id") 30 | 31 | query_info = parsed_query.get("info") 32 | 33 | assert parsed_query["query"], f"Query in {query_file} is empty" 34 | assert query_info["name"], f"Query in {query_file} has no name" 35 | assert query_info["severity"], f"Query in {query_file} has no severity" 36 | assert query_info[ 37 | "description" 38 | ], f"Query in {query_file} has no description" 39 | assert query_info["tags"], f"Query in {query_file} has no tags" 40 | 41 | query_ids.append(parsed_query.get("id")) 42 | 43 | try: 44 | max_id_num = max([int(query_id.split(RQ_PREFIX)[1]) for query_id in query_ids]) 45 | except ValueError: 46 | raise ValueError(f"Added query has invalid id") 47 | 48 | # sequence 49 | assert set(query_ids) == set( 50 | [f"RQ-{num}" for num in range(1, max_id_num + 1)] 51 | ), f"Query ids in {query_dir} are not continuous from 1 to {max_id_num}: {query_ids}" 52 | 53 | # last id in files == config.LAST_QUERY_ID 54 | assert ( 55 | LAST_QUERY_ID == max_id_num 56 | ), f"LAST_QUERY_ID in config ({LAST_QUERY_ID}) != max id in query files ({max_id_num})" 57 | -------------------------------------------------------------------------------- /tests/unit/test_utils.py: -------------------------------------------------------------------------------- 1 | from tests.utils import load_test_config 2 | import src.common.utils as utils 3 | 4 | load_test_config() 5 | 6 | 7 | def test_get_dependencies_in_code(): 8 | test_cases = [ 9 | ("this is ${{github.event.issue.title}}", "github.event.issue.title"), 10 | ("this is ${{ github.event.issue.title}}", "github.event.issue.title"), 11 | ("this is ${{github.event.issue-title}}", "github.event.issue-title"), 12 | ("this is ${{github.event.issue_title}}", "github.event.issue_title"), 13 | ("this is\n\n${{github.event.issue.title}}\n", "github.event.issue.title"), 14 | ] 15 | 16 | for test_case in test_cases: 17 | assert utils.get_dependencies_in_code(test_case[0]) == [test_case[1]] 18 | 19 | 20 | def test_convert_dict_to_list(): 21 | test_cases = [ 22 | ({"a": "b"}, ["a:b"]), 23 | ({"a": "b", "c": "d"}, ["a:b", "c:d"]), 24 | ("a:b", ["a:b"]), 25 | ] 26 | 27 | for test_case in test_cases: 28 | assert utils.convert_dict_to_list(test_case[0]) == test_case[1] 29 | 30 | 31 | def test_get_repo_full_name_from_path(): 32 | assert ( 33 | utils.get_repo_name_from_path( 34 | "edgedb/edgedb-pkg/integration/linux/test/ubuntu-jammy/action.yml" 35 | ) 36 | == "edgedb/edgedb-pkg" 37 | ) 38 | assert ( 39 | utils.get_repo_name_from_path( 40 | "slsa-framework/slsa-github-generator/.github/workflows/builder_go_slsa3.yml" 41 | ) 42 | == "slsa-framework/slsa-github-generator" 43 | ) 44 | 45 | 46 | def test_find_uses_strings(): 47 | test_cases = [ 48 | (" uses: actions/checkout@v2", ["actions/checkout@v2"]), 49 | (" uses: actions/checkout@abcd", ["actions/checkout@abcd"]), 50 | (" uses: actions/checkout@side-branch", ["actions/checkout@side-branch"]), 51 | ( 52 | " uses: .github/workflows/my-workflow.yml@main", 53 | [".github/workflows/my-workflow.yml@main"], 54 | ), 55 | ( 56 | " uses: actions/checkout@v2\n uses: actions/checkout@v1", 57 | ["actions/checkout@v2", "actions/checkout@v1"], 58 | ), 59 | ] 60 | 61 | for test_case in test_cases: 62 | assert utils.find_uses_strings(test_case[0]) == test_case[1] 63 | -------------------------------------------------------------------------------- /tests/unit/test_workflow.py: -------------------------------------------------------------------------------- 1 | import src.workflow_components.workflow as workflow 2 | from tests.utils import load_test_config, assert_reusable_workflow_inputs 3 | 4 | load_test_config() 5 | 6 | 7 | def test_job_from_dict_steps(): 8 | job_d = { 9 | "name": "issue-commented", 10 | "runs-on": "ubuntu-latest", 11 | "steps": [ 12 | { 13 | "name": "Generate GitHub App token", 14 | "uses": "electron/github-app-auth-action@cc6751b3b5e4edc5b9a4ad0a021ac455653b6dc8", 15 | "id": "generate-token", 16 | "with": {"creds": "${{ secrets.ISSUE_TRIAGE_GH_APP_CREDS }}"}, 17 | }, 18 | ], 19 | "_id": "6347a06af34cc01c884c110fd9db8964", 20 | "path": "electron/electron/.github/workflows/issue-commented.yml", 21 | "url": "https://github.com/CycodeLabs/Raven/pull/1", 22 | } 23 | 24 | job = workflow.Job.from_dict(job_d) 25 | 26 | assert job._id == job_d["_id"] 27 | assert job.name == job_d["name"] 28 | assert job.path == job_d["path"] 29 | assert job.machine == [job_d["runs-on"]] 30 | assert job.uses is None 31 | assert job.ref is None 32 | assert job.with_prop is None 33 | assert job.url == job_d["url"] 34 | assert len(job.steps) == 1 35 | assert len(job.reusable_workflow) == 0 36 | 37 | 38 | def test_workflow_from_dict(): 39 | workflow_d = { 40 | "name": "Release notes", 41 | "on": {"push": {"branches": ["main"]}, "workflow_dispatch": None}, 42 | "permissions": {"contents": "read"}, 43 | "jobs": { 44 | "update_release_draft": { 45 | "permissions": {"contents": "write", "pull-requests": "write"}, 46 | "runs-on": "ubuntu-latest", 47 | "if": "github.repository == 'twbs/bootstrap'", 48 | "steps": [ 49 | { 50 | "uses": "release-drafter/release-drafter@v5", 51 | "env": {"GITHUB_TOKEN": "${{ secrets.GITHUB_TOKEN }}"}, 52 | } 53 | ], 54 | } 55 | }, 56 | "path": "twbs/bootstrap/.github/workflows/release-notes.yml", 57 | "url": "https://github.com/CycodeLabs/Raven/pull/1", 58 | "is_public": True, 59 | } 60 | 61 | wf = workflow.Workflow.from_dict(workflow_d) 62 | 63 | assert wf.name == workflow_d["name"] 64 | assert wf.path == workflow_d["path"] 65 | assert wf.trigger == ["push", "workflow_dispatch"] 66 | assert wf.permissions == ["contents:read"] 67 | assert wf.url == workflow_d["url"] 68 | assert len(wf.jobs) == 1 69 | 70 | 71 | def test_job_from_dict_uses(): 72 | job_d = { 73 | "name": "test-firefox-safari", 74 | "uses": "./.github/workflows/build_reusable.yml", 75 | "with": { 76 | "skipForDocsOnly": "yes", 77 | }, 78 | "secrets": "inherit", 79 | "_id": "f796b4c01ecb6021e6a30ec7466ab11a", 80 | "path": "vercel/next.js/.github/workflows/build_and_test.yml", 81 | "url": "https://github.com/CycodeLabs/Raven/pull/1", 82 | } 83 | 84 | job = workflow.Job.from_dict(job_d) 85 | 86 | assert job._id == job_d["_id"] 87 | assert job.name == job_d["name"] 88 | assert job.path == job_d["path"] 89 | assert job.machine is None 90 | assert job.uses == job_d["uses"] 91 | assert job.ref is None 92 | assert job.url == job_d["url"] 93 | assert job.with_prop == ["skipForDocsOnly:yes"] 94 | assert len(job.steps) == 0 95 | 96 | 97 | def test_step_from_dict_uses(): 98 | step_d = { 99 | "name": "Generate GitHub App token", 100 | "uses": "electron/github-app-auth-action@cc6751b3b5e4edc5b9a4ad0a021ac455653b6dc8", 101 | "with": {"creds": "${{ secrets.ISSUE_TRIAGE_GH_APP_CREDS }}"}, 102 | "_id": "9a42f7bb6c8e5be00c1d36d54ac7bdb6", 103 | "path": "electron/electron/.github/workflows/issue-commented.yml", 104 | "url": "https://github.com/CycodeLabs/Raven/pull/1", 105 | } 106 | 107 | step = workflow.Step.from_dict(step_d) 108 | 109 | assert step._id == step_d["_id"] 110 | assert step.name == step_d["name"] 111 | assert step.path == step_d["path"] 112 | assert step.run is None 113 | assert step.uses == step_d["uses"] 114 | assert step.url == step_d["url"] 115 | assert step.ref == "cc6751b3b5e4edc5b9a4ad0a021ac455653b6dc8" 116 | assert step.with_prop == ["creds:${{ secrets.ISSUE_TRIAGE_GH_APP_CREDS }}"] 117 | 118 | 119 | def test_step_from_dict_run(): 120 | step_d = { 121 | "name": "Autolabel based on affected areas", 122 | "run": "echo ${{ github.event.issue.body }}", 123 | "_id": "1386cfbaf5513e27c090 133287e01fe", 124 | "path": "vercel/next.js/.github/workflows/issue_validator.yml", 125 | "url": "https://github.com/CycodeLabs/Raven/pull/1", 126 | } 127 | 128 | step = workflow.Step.from_dict(step_d) 129 | 130 | assert step._id == step_d["_id"] 131 | assert step.name == step_d["name"] 132 | assert step.path == step_d["path"] 133 | assert step.uses is None 134 | assert step.run == step_d["run"] 135 | assert step.ref is None 136 | assert step.url == step_d["url"] 137 | assert step.with_prop is None 138 | assert len(step.using_param) == 1 139 | 140 | 141 | def test_reusable_workflow_from_dict(): 142 | workflow_d = { 143 | "name": "Release notes", 144 | "on": { 145 | "workflow_call": { 146 | "inputs": { 147 | "input_1": { 148 | "required": True, 149 | "default": "default_value_1", 150 | "description": "description_1", 151 | }, 152 | "input_2": { 153 | "required": False, 154 | "default": "default_value_2", 155 | "description": "description_2", 156 | }, 157 | } 158 | } 159 | }, 160 | "permissions": {"contents": "read"}, 161 | "jobs": { 162 | "update_release_draft": { 163 | "permissions": {"contents": "write", "pull-requests": "write"}, 164 | "runs-on": "ubuntu-latest", 165 | "if": "github.repository == 'twbs/bootstrap'", 166 | "steps": [ 167 | { 168 | "uses": "release-drafter/release-drafter@v5", 169 | "env": {"GITHUB_TOKEN": "${{ secrets.GITHUB_TOKEN }}"}, 170 | } 171 | ], 172 | } 173 | }, 174 | "path": "twbs/bootstrap/.github/workflows/release-notes.yml", 175 | "url": "https://github.com/CycodeLabs/Raven/pull/1", 176 | "is_public": True, 177 | } 178 | 179 | wf = workflow.Workflow.from_dict(workflow_d) 180 | 181 | assert wf.name == workflow_d["name"] 182 | assert wf.path == workflow_d["path"] 183 | assert wf.trigger == ["workflow_call"] 184 | assert wf.permissions == ["contents:read"] 185 | assert wf.url == workflow_d["url"] 186 | assert len(wf.jobs) == 1 187 | 188 | assert_reusable_workflow_inputs(wf, workflow_d) 189 | -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | from py2neo.ogm import GraphObject 2 | import json 3 | from src.config.config import Config 4 | from src.workflow_components.composite_action import CompositeAction 5 | from src.workflow_components.workflow import Workflow 6 | from typing import Tuple, List, Dict, Optional 7 | from tests.integration.integration_consts import START_NODE_INDEX, DEST_NODE_INDEX 8 | from src.common.utils import raw_str_to_bool 9 | from hashlib import md5 10 | 11 | 12 | class GraphDbMock(object): 13 | def __init__(self): 14 | pass 15 | 16 | def push_object(self, obj: GraphObject): 17 | pass 18 | 19 | def get_object(self, obj: GraphObject) -> Optional[GraphObject]: 20 | return None 21 | 22 | def get_or_create(self, obj: GraphObject) -> Tuple[GraphObject, bool]: 23 | return None, True 24 | 25 | 26 | def load_test_config() -> None: 27 | Config.graph = GraphDbMock() 28 | 29 | 30 | def get_nodes_as_dicts(node_type: str, paths: Optional[List[str]] = []) -> List[Dict]: 31 | """ 32 | - node_type (str): The type of the node to filter by. 33 | - path (list, str, optional): List of all the paths to filter nodes by. 34 | 35 | Returns A list of nodes as dictionaries that match the given type and paths. 36 | """ 37 | nodes = Config.graph.get_all_nodes(node_type) 38 | if paths: 39 | return [dict(node) for node in nodes if node.get("path") in paths] 40 | else: 41 | return [dict(node) for node in nodes] 42 | 43 | 44 | def query_graph_for_nodes(query: str) -> List[Dict]: 45 | """ 46 | Returns dictionary representations of the nodes returned by the query. 47 | """ 48 | nodes_query = Config.graph.run_query(query) 49 | nodes = [] 50 | for node in nodes_query: 51 | node_obj = node.values()[0] 52 | extracted_node = dict(node_obj) 53 | extracted_node["labels"] = list(node_obj._labels) 54 | nodes.append(extracted_node) 55 | return nodes 56 | 57 | 58 | def query_graph_for_relationships(query: str) -> List[Dict]: 59 | """ 60 | Returns dictionary representations of the relationships returned by the query. 61 | """ 62 | relationships_query = Config.graph.run_query(query) 63 | relationships = [] 64 | for rq in relationships_query: 65 | r_dict = { 66 | "start_node": rq[0].get("_id"), 67 | "type": rq[1].__class__.__name__, 68 | "end_node": rq[2].get("_id"), 69 | } 70 | relationships.append(r_dict) 71 | return relationships 72 | 73 | 74 | def get_graph_structure(nodes_query: str, relationships_query: str) -> Dict: 75 | """ 76 | Recieves a query for nodes and a query for relationships. 77 | Returns a dictionary representation of the graph structure. 78 | """ 79 | nodes = query_graph_for_nodes(nodes_query) 80 | relationships = query_graph_for_relationships(relationships_query) 81 | return {"nodes": nodes, "relationships": relationships} 82 | 83 | 84 | def get_sorted_lists_of_nodes_and_relationships( 85 | graph_structure: Dict, 86 | ) -> Tuple[List, List]: 87 | """ 88 | Recieves a graph structure and returns sorted lists of nodes and relationships. 89 | """ 90 | nodes = graph_structure.get("nodes") 91 | relationships = graph_structure.get("relationships") 92 | 93 | nodes.sort(key=lambda x: x.get("_id")) 94 | relationships.sort(key=lambda x: (x.get("start_node"), x.get("end_node"))) 95 | 96 | return nodes, relationships 97 | 98 | 99 | def get_dicts_differences(dict1: Dict, dict2: Dict) -> Dict: 100 | """ 101 | Recieves two dictionaries and returns the differences between them. 102 | """ 103 | keys = set(dict1.keys()).union(set(dict2.keys())) 104 | differences = {} 105 | for key in keys: 106 | if dict1.get(key) != dict2.get(key): 107 | differences[key] = [dict1.get(key), dict2.get(key)] 108 | 109 | return differences 110 | 111 | 112 | def assert_graph_structures(graph_structure: Dict, snapshot_path: str) -> None: 113 | """ 114 | Recieves a graph structure and a path to a json file containing a graph structure snapshot. 115 | """ 116 | with open(snapshot_path, "r") as f: 117 | snapshot_structure = json.load(f) 118 | 119 | snapshot_nodes, snapshot_relations = get_sorted_lists_of_nodes_and_relationships( 120 | snapshot_structure 121 | ) 122 | graph_nodes, graph_relations = get_sorted_lists_of_nodes_and_relationships( 123 | graph_structure 124 | ) 125 | 126 | # Asserting nodes 127 | for node in snapshot_nodes: 128 | assert ( 129 | node == graph_nodes[snapshot_nodes.index(node)] 130 | ), f"Properties of nodes on the same index is not equal\n{get_dicts_differences(node, graph_nodes[snapshot_nodes.index(node)])}\n\nIn snapshot:\n{node}\nIn graph:\n{graph_nodes[snapshot_nodes.index(node)]}" 131 | 132 | # Asserting relationships 133 | for relationship in snapshot_relations: 134 | assert ( 135 | relationship == graph_relations[snapshot_relations.index(relationship)] 136 | ), f"Properties of relationships on the same index of graph and snapshot is not equal\n\n{get_dicts_differences(relationship, graph_relations[snapshot_relations.index(relationship)])}\nIn snapshot:\n{relationship}\nIn graph:\n{graph_relations[snapshot_relations.index(relationship)]}" 137 | 138 | 139 | def assert_action_inputs(ca: CompositeAction, ca_d: Dict): 140 | """ 141 | This function asserts that the action inputs are equal to those in the JSON file. 142 | Each composite action is connected to multiple action inputs. 143 | Each input contains different properties such as name, default, description, and required. 144 | 145 | Using `ca.inputs.triples()`, we are iterating over all the inputs of the composite action. 146 | For each input, we check the following: 147 | 1) The ID, name, and URL of the composite action are equal to the ID, name, and URL of the input. 148 | 2) The id of the composite action input is the md5 hash of the composite action id and the input name. 149 | 3) Check that the default, description, and required properties are equal to those in the JSON file. 150 | 151 | Each input is a tuple containing a source node (in this case, will always be the composite action identifier) 152 | the relation type and the destination node (the input itself identifier). 153 | """ 154 | for input in ca.composite_action_input.triples(): 155 | ca_d_input = ca_d["inputs"][input[DEST_NODE_INDEX].name] 156 | 157 | assert input[START_NODE_INDEX]._id == ca._id 158 | assert input[DEST_NODE_INDEX].name == ca_d_input["name"] 159 | assert input[DEST_NODE_INDEX].url == ca_d["url"] 160 | assert ( 161 | input[DEST_NODE_INDEX]._id 162 | == md5(f"{ca._id}_{ca_d_input.get('name')}".encode()).hexdigest() 163 | ) 164 | 165 | if "required" in ca_d_input: 166 | assert input[DEST_NODE_INDEX].required == raw_str_to_bool( 167 | ca_d_input["required"] 168 | ) 169 | 170 | if "default" in ca_d_input: 171 | assert input[DEST_NODE_INDEX].default == ca_d_input["default"] 172 | 173 | if "description" in ca_d_input: 174 | assert input[DEST_NODE_INDEX].description == ca_d_input["description"] 175 | 176 | 177 | def assert_reusable_workflow_inputs(w: Workflow, workflow_d: Dict): 178 | """ 179 | This function asserts that the reusable workflow inputs are equal to those in the JSON file. 180 | Each reusable workflow is connected to multiple reusable workflow inputs. 181 | Each input contains different properties such as name, default, description, and required. 182 | 183 | Using `w.reusable_workflow_input.triples()`, we are iterating over all the inputs of the reusable workflow. 184 | For each input, we check the following: 185 | 1) The ID, name, and URL of the workflow are equal to the ID, name, and URL of the input. 186 | 2) The id of the reusable workflow input is the md5 hash of the workflow id and the input name. 187 | 3) Check that the default, description, and required properties are equal to those in the JSON file. 188 | 189 | Each input is a tuple containing a source node (in this case, will always be the reusable workflow) 190 | the relation type and the destination node (the input itself identifier). 191 | """ 192 | for input in w.reusable_workflow_input.triples(): 193 | workflow_d_input = workflow_d["on"]["workflow_call"]["inputs"][ 194 | input[DEST_NODE_INDEX].name 195 | ] 196 | 197 | assert input[START_NODE_INDEX]._id == w._id 198 | assert input[DEST_NODE_INDEX].name == workflow_d_input["name"] 199 | assert input[DEST_NODE_INDEX].url == workflow_d["url"] 200 | assert ( 201 | input[DEST_NODE_INDEX]._id 202 | == md5(f"{w._id}_{workflow_d_input.get('name')}".encode()).hexdigest() 203 | ) 204 | 205 | if "required" in workflow_d_input: 206 | assert input[DEST_NODE_INDEX].required == raw_str_to_bool( 207 | workflow_d_input["required"] 208 | ) 209 | 210 | if "default" in workflow_d_input: 211 | assert input[DEST_NODE_INDEX].default == workflow_d_input["default"] 212 | 213 | if "description" in workflow_d_input: 214 | assert input[DEST_NODE_INDEX].description == workflow_d_input["description"] 215 | --------------------------------------------------------------------------------