├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    └── workflows
    │   ├── release.yml
    │   └── test_pr.yml
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE.md
├── MANIFEST.in
├── Makefile
├── README.md
├── assets
    └── images
    │   ├── Cygives-darkmode.svg
    │   ├── Cygives-lightmode.svg
    │   ├── infrastructure.png
    │   ├── raven-dark-mode.png
    │   └── raven-light-mode.png
├── deployment
    ├── docker-compose.yml
    ├── test.docker-compose.yml
    └── test.dockerfile
├── dev-requirements.in
├── dev-requirements.txt
├── docs
    ├── Codesee Injections
    │   └── README.md
    ├── Issue Injections
    │   ├── README.md
    │   └── issue_injection.png
    ├── Multi Prerequisite Exploits
    │   ├── Exploiting Download Action in Workflows with Missing path Parameter.md
    │   └── README.md
    ├── Pull Request Injections
    │   └── README.md
    ├── README.md
    └── templates
    │   └── issues.md
├── library
    ├── query_body_context_injection.yml
    ├── query_build_artifact_leaks_the_github_token.yml
    ├── query_checkout_on_issue.yml
    ├── query_codesee_injection.yml
    ├── query_email_context_injection.yml
    ├── query_enterprise_github_server.yml
    ├── query_injectable_context_composite_action.yml
    ├── query_injectable_input_composite_action.yml
    ├── query_label_context_injection.yml
    ├── query_message_context_injection.yml
    ├── query_priv_esc_workflow_run.yml
    ├── query_pull_request_target_injection.yml
    ├── query_ref_context_injection.yml
    ├── query_self_hosted_workflow.yml
    ├── query_title_context_injection.yml
    ├── query_unpinnable_action.yml
    └── query_usage_of_outdated_node.yml
├── main.py
├── requirements.in
├── requirements.txt
├── setup.py
├── src
    ├── __init__.py
    ├── cmdline.py
    ├── common
    │   ├── __init__.py
    │   ├── ignore_warnings.py
    │   └── utils.py
    ├── config
    │   ├── __init__.py
    │   └── config.py
    ├── downloader
    │   ├── __init__.py
    │   ├── download.py
    │   ├── gh_api.py
    │   └── utils.py
    ├── indexer
    │   ├── __init__.py
    │   └── index.py
    ├── logger
    │   ├── __init__.py
    │   └── log.py
    ├── queries
    │   └── __init__.py
    ├── reporter
    │   ├── __init__.py
    │   ├── report.py
    │   └── slack_reporter.py
    ├── storage
    │   ├── __init__.py
    │   ├── neo4j_graph.py
    │   ├── neo4j_utils.py
    │   ├── redis_connection.py
    │   └── redis_utils.py
    └── workflow_components
    │   ├── __init__.py
    │   ├── composite_action.py
    │   ├── dependency.py
    │   ├── parsing_utils.py
    │   └── workflow.py
└── tests
    ├── __init__.py
    ├── integration
        ├── __init__.py
        ├── integration_consts.py
        ├── structures_json
        │   ├── demo-index.json
        │   ├── integration-1.json
        │   └── reusable-workflows.json
        └── test_graph_structures.py
    ├── tests_init.py
    ├── unit
        ├── __init__.py
        ├── test_composite_action.py
        ├── test_dependency.py
        ├── test_parsing_utils.py
        ├── test_report.py
        ├── test_utils.py
        └── test_workflow.py
    └── utils.py


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help RAVEN improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Version [e.g. 22]
29 | 
30 | **Additional context**
31 | Add any other context about the problem here.
32 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: release
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [published]
 6 | 
 7 | jobs:
 8 |   test_release:
 9 |     permissions:
10 |       contents: read
11 |     name: Test Release
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - uses: cycodelabs/cimon-action@v0
15 |       with:
16 |         # Turn prevent once policy is verified
17 |         # prevent: true
18 |         client-id: ${{ secrets.CIMON_CLIENT_ID }}
19 |         secret: ${{ secrets.CIMON_SECRET }}
20 |         fail-on-error: true
21 | 
22 |     - name: Checkout
23 |       uses: actions/checkout@v4
24 | 
25 |     - name: Test Organization
26 |       env:
27 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
28 |       run: |
29 |         make test-build
30 | 
31 |   publish:
32 |     name: Release to PyPI
33 |     runs-on: ubuntu-latest
34 |     needs: test_release
35 |     permissions:
36 |       id-token: write
37 |     if: ${{ startsWith(github.ref_name, 'v') }}
38 |     steps:
39 |       - uses: cycodelabs/cimon-action@v0
40 |         with:
41 |           # Turn prevent once policy is verified
42 |           # prevent: true
43 |           client-id: ${{ secrets.CIMON_CLIENT_ID }}
44 |           secret: ${{ secrets.CIMON_SECRET }}
45 |           fail-on-error: true
46 | 
47 |       - name: Checkout
48 |         uses: actions/checkout@v4
49 |         with:
50 |           ref: ${{ github.ref }}
51 | 
52 |       - name: Set up Python
53 |         uses: actions/setup-python@v5
54 |         with:
55 |           python-version: '3.12'
56 | 
57 |       - name: Install dependencies
58 |         run: |
59 |           python -m pip install --upgrade pip
60 |           pip install build
61 | 
62 |       - name: Build package
63 |         env:
64 |           RAVEN_VERSION: ${{ github.ref_name }}
65 |         run: python -m build
66 | 
67 |       - name: Publish package
68 |         uses: pypa/gh-action-pypi-publish@release/v1


--------------------------------------------------------------------------------
/.github/workflows/test_pr.yml:
--------------------------------------------------------------------------------
  1 | name: Test PR
  2 | 
  3 | on: [pull_request]
  4 | 
  5 | permissions:
  6 |   contents: read
  7 | 
  8 | jobs:
  9 |   python-style:
 10 |     name: Style-check and lint Python files
 11 |     runs-on: ubuntu-latest
 12 |     steps:
 13 |       - uses: cycodelabs/cimon-action@v0
 14 |         with:
 15 |           prevent: true
 16 |           client-id: ${{ secrets.CIMON_CLIENT_ID }}
 17 |           secret: ${{ secrets.CIMON_SECRET }}
 18 |           fail-on-error: true
 19 |           allowed-hosts: >
 20 |             files.pythonhosted.org
 21 |             pypi.org
 22 | 
 23 |       - name: Checkout
 24 |         uses: actions/checkout@v4
 25 | 
 26 |       - name: Install dependencies
 27 |         run: python -m pip install black flake8
 28 | 
 29 |       - name: Black
 30 |         run: python -m black --diff --check .
 31 | 
 32 |   # A job that runs integration tests in an isolated environment against
 33 |   # a predefined organization: RavenIntegrationTests
 34 |   test_raven:
 35 |     runs-on: ubuntu-latest
 36 |     steps:
 37 |       - uses: cycodelabs/cimon-action@v0
 38 |         with:
 39 |           # Turn prevent once cimon docker compose bug is fixed
 40 |           # prevent: true
 41 |           client-id: ${{ secrets.CIMON_CLIENT_ID }}
 42 |           secret: ${{ secrets.CIMON_SECRET }}
 43 |           fail-on-error: true
 44 |           allowed-hosts: >
 45 |             auth.docker.io
 46 |             pypi.org
 47 | 
 48 |       - name: Checkout
 49 |         uses: actions/checkout@v4
 50 | 
 51 |       - name: Test Organization
 52 |         env:
 53 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 54 |         run: |
 55 |           make test-build
 56 | 
 57 |   # A job for testing the setup process and unit tests of RAVEN against
 58 |   # different versions of Python
 59 |   test_raven_package:
 60 |     runs-on: ubuntu-latest
 61 | 
 62 |     strategy:
 63 |       matrix:
 64 |         python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
 65 | 
 66 |     steps:
 67 |       - uses: cycodelabs/cimon-action@v0
 68 |         with:
 69 |           # Turn prevent once cimon docker compose bug is fixed
 70 |           # prevent: true
 71 |           client-id: ${{ secrets.CIMON_CLIENT_ID }}
 72 |           secret: ${{ secrets.CIMON_SECRET }}
 73 |           fail-on-error: true
 74 |           allowed-hosts: >
 75 |             raw.githubusercontent.com
 76 |             files.pythonhosted.org
 77 |             pypi.org
 78 | 
 79 |       - name: Checkout
 80 |         uses: actions/checkout@v4
 81 | 
 82 |       - name: Set up Python ${{ matrix.python-version }}
 83 |         uses: actions/setup-python@v5
 84 |         with:
 85 |           python-version: ${{ matrix.python-version }}
 86 |       
 87 |       - name: Build Package
 88 |         run: python -m pip install -r dev-requirements.txt .
 89 | 
 90 |       - name: Setup environment
 91 |         run: make setup
 92 | 
 93 |       - name: Test Raven
 94 |         env:
 95 |             GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 96 |         run: |
 97 |           raven download account --token $GITHUB_TOKEN --account-name RavenIntegrationTests
 98 |           raven index
 99 |           raven report --format json | jq > /dev/null
100 | 
101 |       - name: Run Unit Tests
102 |         run: pytest -v tests/unit


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | /data
  2 | /data_backup
  3 | .vscode/
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | pip-wheel-metadata/
 28 | share/python-wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | MANIFEST
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .nox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | *.py,cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # Local debugging - Run tests outside containers
 89 | temp_test_raven.py
 90 | temp/
 91 | 
 92 | # pyenv
 93 | .python-version
 94 | 
 95 | # pipenv
 96 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 97 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 98 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 99 | #   install all needed dependencies.
100 | #Pipfile.lock
101 | 
102 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
103 | __pypackages__/
104 | 
105 | # Celery stuff
106 | celerybeat-schedule
107 | celerybeat.pid
108 | 
109 | # SageMath parsed files
110 | *.sage.py
111 | 
112 | # Environments
113 | .env
114 | .venv
115 | env/
116 | venv/
117 | ENV/
118 | env.bak/
119 | venv.bak/
120 | 
121 | # Spyder project settings
122 | .spyderproject
123 | .spyproject
124 | 
125 | # Rope project settings
126 | .ropeproject
127 | 
128 | # mkdocs documentation
129 | /site
130 | 
131 | # Mac
132 | .DS_Store
133 | 
134 | # mypy
135 | .mypy_cache/
136 | .dmypy.json
137 | dmypy.json
138 | 
139 | # Pyre type checker
140 | .pyre/
141 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | We encourage contributions from the community to help improve our tooling and research. We manage contributions primarily through GitHub Issues and Pull Requests.
 4 | 
 5 | If you have a feature request, bug report, or any improvement suggestions, please create an issue to discuss it. To start contributing, you may check [good first issue](https://github.com/CycodeLabs/Raven/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) label to get started quickly into the code base.
 6 | 
 7 | To contribute code changes, fork our repository, make your modifications, and then submit a pull request.
 8 | 
 9 | ## Development
10 | 
11 | To prepare a development environment, follow these instructions:
12 | 
13 | **Step 1**: Clone the project
14 | 
15 | ```bash
16 | git clone https://github.com/CycodeLabs/raven.git
17 | cd raven
18 | ```
19 | 
20 | **Step 2**: Create a virtual environment and install requirements
21 | 
22 | ```bash
23 | python3 -m venv .venv
24 | source .venv/bin/activate
25 | pip3 install -r requirements.txt
26 | ```
27 | 
28 | **Step 3**: Make code modifications
29 | 
30 | **Step 4**: Setup the Redis server and the Neo4j database
31 | 
32 | ```bash
33 | make setup
34 | ```
35 | 
36 | **Step 5**: Run Raven
37 | 
38 | ```bash
39 | python3 main.py -h
40 | ```
41 | 
42 | **Step 6**: Test Raven
43 | 
44 | ```bash
45 | make test-build
46 | ```
47 | 
48 | Feel free to reach out to the development team through research@cycode.com. We appreciate your collaboration and look forward to your valuable contributions!
49 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md LICENSE requirements.txt main.py
2 | recursive-include tests *.py
3 | recursive-include src *


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | setup:
 2 | 	@echo "Building Services..."
 3 | 	docker compose -p raven --file deployment/docker-compose.yml up -d
 4 | 
 5 | clean-setup:
 6 | 	@echo "Stopping Services..."
 7 | 	docker compose -p raven --file deployment/docker-compose.yml down
 8 | 
 9 | stop:
10 | 	@echo "Stopping Services..."
11 | 	docker compose -p raven --file deployment/docker-compose.yml down
12 | 
13 | test-build:
14 | 	@echo "Running Tests in isolated environment..."
15 | 	docker compose -p test-raven --file deployment/test.docker-compose.yml up --force-recreate --build --abort-on-container-exit
16 | 
17 | test-run:
18 | 	@echo "DO NOT USE DIRECTLY; PLEASE USE: make test-build"
19 | 	@echo "Running Tests..."
20 | 	@pytest -v tests


--------------------------------------------------------------------------------
/assets/images/infrastructure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/assets/images/infrastructure.png


--------------------------------------------------------------------------------
/assets/images/raven-dark-mode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/assets/images/raven-dark-mode.png


--------------------------------------------------------------------------------
/assets/images/raven-light-mode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/assets/images/raven-light-mode.png


--------------------------------------------------------------------------------
/deployment/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3"
 2 | 
 3 | services:
 4 |   neo4j:
 5 |     image: neo4j:5.26.2
 6 |     container_name: raven-neo4j
 7 |     environment:
 8 |       NEO4J_AUTH: neo4j/123456789 # Change 'password' to your desired Neo4j password
 9 |     ports:
10 |       - "7474:7474"
11 |       - "7687:7687"
12 |     healthcheck:
13 |       test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:7474"]
14 |       interval: 10s
15 |       timeout: 5s
16 |       retries: 3
17 |     volumes:
18 |       - raven-neo4j:/data
19 | 
20 |   redis:
21 |     image: redis:7.4.2
22 |     container_name: raven-redis
23 |     depends_on:
24 |       neo4j:
25 |         condition: service_healthy
26 |     ports:
27 |       - "6379:6379"
28 |     healthcheck:
29 |       test: ["CMD", "redis-cli", "ping"]
30 |       interval: 10s
31 |       timeout: 5s
32 |       retries: 3
33 |     volumes:
34 |       - raven-redis:/data
35 | 
36 | volumes:
37 |   raven-redis:
38 |   raven-neo4j:


--------------------------------------------------------------------------------
/deployment/test.docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3"
 2 | 
 3 | services:
 4 |   neo4j-test:
 5 |     image: neo4j:5.26.2
 6 |     container_name: raven-neo4j-test
 7 |     environment:
 8 |       NEO4J_AUTH: neo4j/123456789 # Change 'password' to your desired Neo4j password
 9 |     ports:
10 |       - "7474:7474"
11 |       - "7687:7687"
12 |     healthcheck:
13 |       test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:7474"]
14 |       interval: 10s
15 |       timeout: 5s
16 |       retries: 3
17 | 
18 |   redis-test:
19 |     image: redis:7.4.2
20 |     container_name: raven-redis-test
21 |     ports:
22 |       - "6379:6379"
23 |     healthcheck:
24 |       test: ["CMD", "redis-cli", "ping"]
25 |       interval: 10s
26 |       timeout: 5s
27 |       retries: 3
28 | 
29 |   raven-test:
30 |     build:
31 |       dockerfile: ./deployment/test.dockerfile
32 |       context: ..
33 |     depends_on:
34 |       neo4j-test:
35 |         condition: service_healthy
36 |       redis-test:
37 |         condition: service_healthy
38 |     environment:
39 |       - GITHUB_TOKEN=$GITHUB_TOKEN
40 |     container_name: raven-engine-test
41 | 


--------------------------------------------------------------------------------
/deployment/test.dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.9.21
 2 | 
 3 | # Set the working directory
 4 | RUN mkdir -p /raven
 5 | RUN mkdir -p /raven/src
 6 | RUN mkdir -p /raven/tests
 7 | 
 8 | # Copy the current directory contents into the container at /raven
 9 | WORKDIR /raven
10 | COPY Makefile dev-requirements.txt /raven/
11 | COPY src /raven/src
12 | COPY library /raven/library
13 | COPY tests /raven/tests
14 | 
15 | # Install any needed packages specified in requirements.txt
16 | RUN pip3 install -r dev-requirements.txt
17 | 
18 | # Run RAVEN tests
19 | CMD ["make", "test-run"]


--------------------------------------------------------------------------------
/dev-requirements.in:
--------------------------------------------------------------------------------
1 | -r requirements.in
2 | pytest


--------------------------------------------------------------------------------
/dev-requirements.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is autogenerated by pip-compile with Python 3.13
 3 | # by the following command:
 4 | #
 5 | #    pip-compile --no-annotate --output-file=dev-requirements.txt dev-requirements.in
 6 | #
 7 | certifi==2025.1.31
 8 | charset-normalizer==3.4.1
 9 | colorama==0.4.6
10 | idna==3.10
11 | iniconfig==2.0.0
12 | interchange==2021.0.4
13 | loguru==0.7.3
14 | monotonic==1.6
15 | packaging==24.2
16 | pansi==2024.11.0
17 | pillow==11.1.0
18 | pluggy==1.5.0
19 | py2neo==2021.2.4
20 | pygments==2.19.1
21 | pytest==8.3.4
22 | pytz==2025.1
23 | pyyaml==6.0.2
24 | redis==5.2.1
25 | requests==2.32.3
26 | six==1.17.0
27 | slack-sdk==3.34.0
28 | tqdm==4.67.1
29 | urllib3==2.3.0
30 | 


--------------------------------------------------------------------------------
/docs/Codesee Injections/README.md:
--------------------------------------------------------------------------------
 1 | # CodeSee Injections
 2 | 
 3 | ## Overview
 4 | CodeSee is a startup company providing methods to visualize your codebase and effective tooling for reviewing and collaboration. Similar to other developer-centric products (e.g., CodeCov), during CodeSee integration, it creates a new Github Actions workflow that embeds its capabilities for every pull request, allowing developers efficiently review the added code. Cycode discovered a branch name injection vulnerability in `codesee-map-action` that may allow Remote Code Execution (RCE) on the pipeline.
 5 | 
 6 | ## Description
 7 | Calling `Codesee-io/codesee-map-action` action up to the vulnerable versions with branch name containing malicious code injection payload such as `a";ls;"` allowed to inject code to CodeSee NPM package. Malicious threat actors can create a malicious script file that would be fetched together with a forked pull request that will be executed inside the pipeline.
 8 | 
 9 | ## Remediation
10 | This issue was fixed in version `0.376.0`. CodeSee’s internal review discovered this injection vulnerability entered their system as a result of a logical bug in the code used to escape the user-supplied branch name. In addition to repairing that logic directly, they replaced all command executions in their code analysis to run without a shell, ensuring no subtle escaping logic was required. As the CLI is written in node, this involved replacing calls to child_process.exec with child_process.execFile.
11 | 
12 | To further mitigate any other future vulnerabilities, CodeSee introduced the following permissions in the workflow to minimize that:
13 | ``` yaml
14 | permissions: read-all
15 | ```
16 | Even if such vulnerabilities have been found, the GITHUB_TOKEN won’t have sufficient permissions to perform any malicious activity.
17 | 
18 | ## References
19 | - [Cycode Collaborates with CodeSee to Secure the Pipelines of Thousands of Open-Source Projects](https://cycode.com/blog/cycode-secures-thousands-of-open-source-projects/)
20 | 
21 | ## Real-World Examples
22 | ### freeCodeCamp/freeCodeCamp - 374K ⭐️
23 | 
24 | * **Description**: This workflow used `Codesee-io/codesee-map-action` latest version with `mapUpload` parameter.
25 | * **Commit Link**: [0871341c9cbf96ab455bc3e0bce636e2ef2a2be2](https://github.com/freeCodeCamp/freeCodeCamp/commit/0871341c9cbf96ab455bc3e0bce636e2ef2a2be2)
26 | * **Remediation**: Removed usage of CodeSee map action.
27 | 
28 | ### slimtoolkit/slim - 17.3K ⭐️
29 | 
30 | * **Description**: This workflow used `Codesee-io/codesee-map-action` latest version with `mapUpload` parameter.
31 | * **Commit Link**: [bb846649cb3dfaad83c3b2ccbee552786c7dc635](https://github.com/slimtoolkit/slim/commit/bb846649cb3dfaad83c3b2ccbee552786c7dc635)
32 | * **Remediation**: Removed usage of CodeSee map action.
33 | 
34 | ### statelyai/xstate - 24.8K ⭐️
35 | 
36 | * **Description**: This workflow used `Codesee-io/codesee-map-action` latest version with `mapUpload` parameter.
37 | * **Commit Link**: N/A
38 | * **Remediation**: Updated Through CodeSee package fix.
39 | 
40 | ## Detections
41 | 
42 | ### CodeSee Usage
43 | Initially, verify if the workflow hasn't altered the default workflow permissions, then confirm if the workflow uses the `Codesee-io/codesee-map-action` action. Then, verify manually that the workflow is using the vulnerable versions.
44 | 
45 | ``` cypher
46 | MATCH (w:Workflow)
47 | WHERE
48 |     w.permissions is null AND
49 |     EXISTS {
50 |         (w)-[*]->(ca:CompositeAction)
51 |         WHERE (
52 |             ca.path = "Codesee-io/codesee-map-action"
53 |         )
54 |     }
55 | RETURN DISTINCT w.path, w.url;
56 | ```


--------------------------------------------------------------------------------
/docs/Issue Injections/README.md:
--------------------------------------------------------------------------------
  1 | # Issue Injections
  2 | 
  3 | 
  4 | ## Overview
  5 | GitHub Actions can be initiated based on an issue event. The workflow can access and utilize the information present in the issue's title or body, including printing it or performing other actions within the workflow. However, it's important to be aware that malicious actors might attempt to exploit this functionality by inserting harmful payloads into the issue's title or body. This could potentially lead to the execution of malicious code within the workflow.
  6 | 
  7 | 
  8 | ## Description
  9 | This issue arises when GitHub Actions workflows, triggered by issue events, process the issue details, such as the title or body, without proper input validation or sanitization.
 10 | 
 11 | Let’s take the following workflow as an example:
 12 | ``` yaml
 13 | name: Issues Injections
 14 | 
 15 | on:
 16 |   issues:
 17 |     types: [opened]
 18 | 
 19 | jobs:
 20 |   print_issue_title:
 21 |     runs-on: ubuntu-latest
 22 | 
 23 |     name: Print issue title
 24 |     steps:
 25 |     - run: echo "${{github.event.issue.title}}"
 26 | ```
 27 | 
 28 | Threat actors can exploit this by injecting malicious payloads into the issue title. By injecting malicious code to the workflow, an attacker can exfiltrate the pipeline secrets or, with the proper permissions, use the `GITHUB_TOKEN` environment variable to push new code to the repository.
 29 | 
 30 | ## Remediation
 31 | * Avoid directly executing or interpreting user-supplied data as code or command arguments.
 32 | * Always load user input to environment variables first.
 33 | 
 34 | ## References
 35 | - [Cycode Discovers Vulnerabilities in CI/CD Pipelines of Popular Open-Source Projects](https://cycode.com/blog/github-actions-vulnerabilities/)
 36 | 
 37 | ## Real-World Examples
 38 | 
 39 | 
 40 | ### fauna/faunadb-js - 694 ⭐️
 41 | * **Description**: This workflow runs when an issue is being opened. Lines 26 and 27 use the issue body and title in an insecure manner, at `create-jira-tickets.yml`.
 42 | * **Fix Commit Link**: [ee6f53f9c985bde41976743530e3846dee058587](https://github.com/fauna/faunadb-js/commit/ee6f53f9c985bde41976743530e3846dee058587)
 43 | * **Remediation**: Removed the workflow.
 44 | 
 45 | ### wireapp/wire-ios - 3.2K ⭐️
 46 | * **Description**: This workflow runs when an issue is being opened. Line 15 use the issue title at `issue.yml`.
 47 | * **Fix Commit Link**: [650fb1aa51a1c843c10bc89a11732b45a6345b00](https://github.com/withastro/astro/commit/650fb1aa51a1c843c10bc89a11732b45a6345b00)
 48 | * **Remediation**: Removed direct call to `${{ github.event.issue.title }}"`.
 49 | 
 50 | ### withastro/astro - 35.2K ⭐️
 51 | * **Description**: This workflow runs when an issue is being opened. Line 26 use the issue title at `reviewBot.yml`.
 52 | * **Fix Commit Link**: [9d39d6c93b5a58a0bc8c1aba10e0d67756359630](https://github.com/wireapp/wire-ios/commit/9d39d6c93b5a58a0bc8c1aba10e0d67756359630)
 53 | * **Remediation**: Replaced direct call to `${{ github.event.issue.title }}"` with environment variable `ISSUE_TITLE: ${{ github.event.issue.title }}`.
 54 | 
 55 | ### kiegroup/kogito-runtimes - 458 ⭐️
 56 | * **Description**: This workflow runs when an issue is being opened. Line 11 use the issue title at `issues.yml`.
 57 | * **Fix Commit Link**: [53c18e5372e5306e0aa580f201f820b80359ad11](https://github.com/kiegroup/kogito-runtimes/commit/53c18e5372e5306e0aa580f201f820b80359ad11)
 58 | * **Remediation**: Removed direct call to `${{ github.event.issue.title }}"`.
 59 | 
 60 | 
 61 | ### Ombi-app/Ombi - 3.4k ⭐️
 62 | * **Description**: This workflow runs when an issue is being opened. Line 13 use the issue body at `issue-check.yml`.
 63 | * **Fix Commit Link**: [5cc0d7727d72fe1fee8a3f6c3874d44a5b785de4](https://github.com/Ombi-app/Ombi/commit/5cc0d7727d72fe1fee8a3f6c3874d44a5b785de4)
 64 | * **Remediation**: Removed direct call to `${{ github.event.issue.title }}"`.
 65 | 
 66 | 
 67 | ## Detections
 68 | 
 69 | ### Issue + Command Injection
 70 | This detection identifies workflows triggered by events like issue comments, issues that depend on specific GitHub issue-related data.
 71 | ``` cypher
 72 | MATCH (w:Workflow)-[*]->(d:StepCodeDependency)
 73 | WHERE
 74 |     (
 75 |         "issue_comment" in w.trigger OR
 76 |         "issues" in w.trigger
 77 |     ) AND
 78 |     (
 79 |         d.param IN ["github.event.issue.title", "github.event.issue.body"]
 80 |     )
 81 | RETURN DISTINCT w.path, w.url;
 82 | ```
 83 | 
 84 | <img width="1180" alt="image" src="issue_injection.png">
 85 | 
 86 | 
 87 | ### Issue Comment + Checkout
 88 | This detection identifies workflows triggered by issue events where a job involves checking out code from a repository ("actions/checkout") on issue event.
 89 | 
 90 | ``` cypher
 91 | MATCH (w:Workflow)-[*]->(j:Job)
 92 | WHERE
 93 |     (
 94 |         "issue_comment" in w.trigger OR
 95 |         "issues" in w.trigger
 96 |     ) AND
 97 |     EXISTS {
 98 |         (j)-->(s:Step)-->(ca:CompositeAction)
 99 |         WHERE (
100 |             ca.path = "actions/checkout" AND
101 |             ANY(param IN s.with WHERE 
102 |                 (
103 |                     param STARTS WITH "ref" and 
104 |                     (
105 |                         param contains "head.sha" OR
106 |                         param contains "head.ref"
107 |                     )
108 |                 )
109 |             )
110 |         )
111 |     }
112 | RETURN w.path, w.url
113 | ```


--------------------------------------------------------------------------------
/docs/Issue Injections/issue_injection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/docs/Issue Injections/issue_injection.png


--------------------------------------------------------------------------------
/docs/Multi Prerequisite Exploits/Exploiting Download Action in Workflows with Missing path Parameter.md:
--------------------------------------------------------------------------------
 1 | # Exploiting Download Action in Workflows with Missing "path" Parameter
 2 | 
 3 | > **Note**: Read more about this vulnerability here [Cycode Blog Post](https://cycode.com/blog/analyzing-the-vulnerability-that-could-have-compromised-microsoft-365-users/). Read the blog post to learn more. 
 4 | 
 5 | ## The Problem
 6 | 
 7 | Due to the limitations in native GitHub actions for sharing artifacts across different workflows, developers often resort to custom actions and APIs. One such widely-used custom action is `dawidd6/action-download-artifact`, which currently has over 12,000 dependent repositories.
 8 | 
 9 | This custom action and the underlying GitHub API for downloading artifacts didn't discriminate between artifacts created by the base repository and those from a forked repository. This oversight could lead to a workflow to download and process poisoned artifacts, introducing vulnerabilities into the software supply chain.
10 | 
11 | Without specifying the `path` parameter to indicate where the action should extract the artifact, the action will default to extracting it into the current working directory. This can overwrite original files in the repository and may lead to malicious activity on the build system, posing a risk of a significant supply chain attack.
12 | 
13 | Github updated its API for GetArtifact and ListArtifacts endpoints to provide more information to help developers differentiate between trusted and untrusted artifacts.
14 | 
15 | 
16 | ### In Simple Terms
17 | 
18 | An attacker can fork a repository, create a malicious artifact, and then inject this artifact into the original repository's workflow. The workflow who downloads the artifact, in turn, would unknowingly use the tainted artifact.
19 | 
20 | ## References
21 | - [From Default to Secure: Analyzing the Vulnerability that Could Have Compromised Microsoft 365 Users](https://cycode.com/blog/analyzing-the-vulnerability-that-could-have-compromised-microsoft-365-users/).
22 | - [Novel Pipeline Vulnerability Discovered; Rust  Found Vulnerable](https://www.legitsecurity.com/blog/artifact-poisoning-vulnerability-discovered-in-rust).
23 | 
24 | ## Real-World Examples
25 | 
26 | ### tiangolo/fastapi - 64k ⭐️
27 | * **Description**: Unsafe handling of artifacts download and extract the artifact into the current working directory.
28 | * **Fix Commit Link**: [9efab1bd96ef061edf1753626573a0a2be1eef09](github.com/tiangolo/fastapi/commit/9efab1bd96ef061edf1753626573a0a2be1eef09)
29 | * **Remediation**: Created a specific directory for uploading and extracting artifacts, `site`.
30 | 
31 | ### microsoft/fluentui - 16k ⭐️
32 | * **Description**: Unsafe handling of artifacts within the build process, leading to code execution on the build system and a potentially significant supply chain attack that would deliver malware to all Microsoft 365 users.
33 | * **Fix Commit Link**: [2ea6195152131766641311ee5604e746b578d8e7](https://github.com/microsoft/fluentui/commit/2ea6195152131766641311ee5604e746b578d8e7)
34 | * **Remediation**: Removed the workflow.
35 | 
36 | 
37 | ### tiangolo/sqlmodel - 11k ⭐️
38 | * **Description**: Unsafe handling of artifacts download and extract the artifact into the current working directory.
39 | * **Fix Commit Link**: [2ea6195152131766641311ee5604e746b578d8e7](https://github.com/tiangolo/sqlmodel/commit/cf36b2d9baccf527bc61071850f102e2cd8bf6bf)
40 | * **Remediation**: Created a specific directory for uploading and extracting artifact, `site`.
41 |   
42 | ## Query
43 | 
44 | ``` cypher
45 | MATCH p=(w1:Workflow)-->(w2:Workflow)-[*]->(s:Step)-->(ca:CompositeAction) 
46 | WHERE (
47 |         "pull_request" in w1.trigger OR
48 |         "pull_request_target" in w1.trigger OR
49 |         "issue_comment" in w1.trigger OR
50 |         "issues" in w1.trigger
51 |     ) AND (
52 |         ca.path = "dawidd6/action-download-artifact"
53 |     ) AND (
54 |         not ANY(param IN s.with WHERE 
55 |             (
56 |                 param contains "path"
57 |             )
58 |         )
59 |     ) AND
60 |     EXISTS {
61 |             (w2)-[*]->(caTmp:CompositeAction)
62 |             WHERE caTmp.path = "actions/checkout"
63 | }
64 | RETURN DISTINCT w1.url, w2.url;
65 | ```
66 | 
67 | > **Note**: According to the release documentation, in version 2.28.0 of `dawidd6/action-download-artifact`, the action will ignore forks when downloading artifacts.


--------------------------------------------------------------------------------
/docs/Multi Prerequisite Exploits/README.md:
--------------------------------------------------------------------------------
 1 | # Multi-Prerequisite Exploits
 2 | 
 3 | ## Overview
 4 | 
 5 | This section is dedicated to exploring and documenting exploits that require multiple conditions to be met for the vulnerability to be triggered. These are complex vulnerabilities that may involve a series of specific events, configurations, or sequences of actions to exploit. 
 6 | 
 7 | Understanding these types of exploits can be essential for comprehensive security assessments, as they can often slip through the cracks of simpler vulnerability scans.
 8 | 
 9 | ## Exploits Documented Here
10 | 
11 | 
12 | 
13 | Stay tuned for more in-depth content and real-world examples.# Multi-Prerequisite Exploits
14 | 


--------------------------------------------------------------------------------
/docs/Pull Request Injections/README.md:
--------------------------------------------------------------------------------
  1 | # Vulnerability Name
  2 | 
  3 | ## Overview
  4 | Pull Requests (PRs) are a cornerstone of collaborative coding but can become a security loophole when integrated with automated workflows like GitHub Actions. Without proper input validation or sanitation, attackers can exploit this by injecting malicious code into PR titles, descriptions, or file changes. These injections can compromise the integrity of the entire codebase by executing unauthorized commands, code, or even exfiltrating sensitive information. This documentation aims to explore the vulnerabilities, real-world examples, remediation strategies, and detection techniques associated with pull request injections.
  5 | 
  6 | 
  7 | ## Description
  8 | We will present two scenarios of pull request injections in vulnerable workflows:
  9 | 
 10 | ### 1. pull_request + Pull Request Title:
 11 | In this scenario, workflows trigger on pull request events and execute jobs that depend on the pull request title (github.event.pull_request.title) without any permissions checks or input sanitization.
 12 | ```yaml
 13 | on:
 14 |   pull_request:
 15 |     types: [opened, synchronize]
 16 | 
 17 | jobs:
 18 |   use_pr_title:
 19 |     runs-on: ubuntu-latest
 20 |     steps:
 21 |     - name: Print PR Title
 22 |       run: echo "Pull Request Title is ${{ github.event.pull_request.title }}"
 23 | ```
 24 | 
 25 | ### 2. pull_request_target + Checkout
 26 | Using the `pull_request_target` event in a GitHub Actions workflow is risky because it runs in the context of the base repository, not the fork. This means it has access to secrets and write permissions to the repository. The real danger arises when such a workflow is combined with `checkout` action, which checks out code from an incoming, potentially untrusted pull request and then executes scripts or runs commands based on that code. Without proper permissions checks, this could allow a malicious actor to run untrusted code in a privileged environment, potentially leading to unauthorized access or data leaks.
 27 | 
 28 | ```yaml
 29 | on:
 30 |   pull_request_target:
 31 |     types: [opened, synchronize]
 32 | 
 33 | jobs:
 34 |   checkout_code:
 35 |     runs-on: ubuntu-latest
 36 |     steps:
 37 |     # Checks out code from the incoming pull request
 38 |     - name: Checkout code
 39 |       uses: actions/checkout@v2
 40 |       with:
 41 |         ref: ${{ github.event.pull_request.head.sha }}
 42 |     
 43 |     # Executes scripts or runs commands based on the checked out code
 44 |     - name: Build and deploy
 45 |       run: make deploy
 46 | ```
 47 | ## Remediation
 48 | * **Input Validation**: Sanitize and validate data from pull request titles or other user-generated fields before using them in your workflows.
 49 | 
 50 | * **Limited Permissions**: Minimize the permissions granted to GitHub Actions. Use read-only permissions where possible.
 51 | 
 52 | * **Workflow Segregation**: Consider using separate workflows for trusted and untrusted events to minimize risk.
 53 | 
 54 | * **Manual Approval**: Use manual approval of actions run.
 55 |   
 56 | ## References
 57 | - [Cycode Discovers Vulnerabilities in CI/CD Pipelines of Popular Open-Source Projects](https://cycode.com/blog/github-actions-vulnerabilities/)
 58 | 
 59 | ## Real-World Examples
 60 | ### fauna/faunadb-js - 694 ⭐️
 61 | * **Description**: This workflow runs when a pull_request is created. Lines 32 and 33 use the pull request's body and title in an insecure manner, at `create-jira-tickets.yml`.
 62 | * **Fix Commit Link**: [ee6f53f9c985bde41976743530e3846dee058587](https://github.com/fauna/faunadb-js/commit/ee6f53f9c985bde41976743530e3846dee058587)
 63 | * **Remediation**: Removed the workflow.
 64 | 
 65 | ## Queries
 66 | ### 1. Pull Request + Pull Request Title 
 67 | 
 68 | This query looks for GitHub Actions workflows that are triggered by pull requests and specifically focuses on those that don't have defined permissions. It then identifies any jobs and steps within those workflows that use the pull request title (github.event.pull_request.title) in some way. The goal is to find potential security risks arising from the use of unsanitized pull request titles.
 69 | 
 70 | ``` cypher
 71 | MATCH (w:Workflow)-[*]->(j:Job)-->(s:Step)-->(dep:StepCodeDependency)
 72 | WHERE
 73 |     w.permissions IS NULL AND
 74 |     "pull_request" IN w.trigger AND
 75 |     s.run IS NOT NULL AND
 76 |     dep.param = "github.event.pull_request.title"
 77 | RETURN DISTINCT w, j, s, dep;
 78 | ```
 79 | 
 80 | ### 2. Pull Request Target + Checkout
 81 | 
 82 | This query aims to identify workflows that are triggered by the `pull_request_target` event and don't have specified permissions. It then looks for jobs within those workflows that use the actions/checkout action to checkout code based on pull request data. The query focuses on parameters that start with "ref" and contain either head.sha or head.ref. Due to its broad nature, this query might produce many false positives, but it's designed to flag potentially risky configurations involving `pull_request_target` and code checkout.
 83 | 
 84 | ``` cypher
 85 | MATCH (w:Workflow)-[*]->(j:Job)
 86 | WHERE
 87 |     w.permissions is null AND
 88 |     "pull_request_target" in w.trigger AND
 89 |     EXISTS {
 90 |         (j)-->(s:Step)-->(ca:CompositeAction)
 91 |         WHERE (
 92 |             ANY(param IN s.with WHERE
 93 |                 (
 94 |                     param STARTS WITH "ref" and 
 95 |                     (
 96 |                         param contains "head.sha" OR
 97 |                         param contains "head.ref"
 98 |                     )
 99 |                 )
100 |             )
101 |         )
102 |     }
103 | RETURN DISTINCT w.path, w.url;
104 | ```
105 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # Exploit Documentations 📚
 2 | 
 3 | Welcome to the GitHub Actions Exploit Documentation directory! This is where you'll find in-depth write-ups on security vulnerabilities specific to GitHub Actions workflows and the relevant queries to find the in Raven's ecosystem.
 4 | 
 5 | ## What's Inside?
 6 | 
 7 | - Analyses of GitHub Actions vulnerabilities and examples of them.
 8 | - Cypher queries to identify vulnerable repositories on Neo4j.
 9 | - Mitigation suggestions and preventive best practices.
10 | - Source references, such as research articles and GitHub issues.
11 | 
12 | ## Quick Links
13 | 
14 | - [Issue Injections](/docs/Issue%20Injections/README.md)
15 | - [Pull Request Injections](/docs/Pull%20Request%20Injections/README.md)
16 | - [Workflow Run Injections](/docs/Multi%20Prerequisite%20Exploits/README.md)
17 | - [CodeSee Injections](/docs/Codesee%20Injections/README.md)
18 | 
19 | ## Notes
20 | 
21 | - The information presented here is based on rigorous research and real-world examples.
22 | - Always act responsibly and ethically when using this information.
23 | 
24 | ## Contributions
25 | 
26 | We encourage you to contribute by submitting more GitHub Actions exploit documentation and queries, or by improving the existing write-ups. Contributions are always welcome!


--------------------------------------------------------------------------------
/docs/templates/issues.md:
--------------------------------------------------------------------------------
 1 | # Vulnerability Name
 2 | 
 3 | ## Overview
 4 | Provide a brief introduction to the specific vulnerability type.
 5 | 
 6 | ## Description
 7 | Include a detailed description of the vulnerability, explaining what it is, how it can be exploited, and why it's important to detect and remediate it.
 8 | 
 9 | ## Remediation
10 | Provide guidance on how to remediate the vulnerability once it's detected. This may include steps to update GitHub Actions configurations, change specific workflow files, or apply best practices.
11 | 
12 | ## References
13 | Include links to external resources, documentation, or security advisories related to this vulnerability type. This can help users understand the issue better and find additional information.
14 | 
15 | ## Real-World Examples
16 | 
17 | ### Repository Name
18 | 
19 | * **Description**: Briefly describe the vulnerability that was present in this repository's GitHub Actions workflow.
20 | * **Commit Link**: Provide links to the specific commits in the repository where the vulnerability existed.
21 | * **Remediation**: Explain how the vulnerability was fixed in this repository. Include links to relevant code changes or pull requests.
22 | 
23 | 
24 | ## Detections
25 | Include sample Cypher queries that users can run against their indexed GitHub Actions workflows in the Neo4j database to detect instances of this vulnerability. Make sure to explain the purpose of each query and any parameters that need to be configured.
26 | 
27 | ### Example-1
28 | This Cypher query identifies workflows triggered by events like issue comments, issues, or pull request targets that depend on specific GitHub event-related data.
29 | ``` cypher
30 | MATCH (w:Workflow)-[*]->(d:StepCodeDependency)
31 | WHERE
32 |     (
33 |         "issue_comment" in w.trigger OR
34 |         "issues" in w.trigger OR
35 |         "pull_request_target" in w.trigger
36 |     ) AND
37 |     (
38 |         d.param IN ["github.event.issue.title", "github.event.issue.body", "github.event.pull_request.title", "github.event.pull_request.body", "github.event.comment.body", "github.event.review.body", "github.event.review_comment.body", "github.event.pages.*.page_name", "github.event.commits.*.message", "github.event.head_commit.message", "github.event.head_commit.author.email", "github.event.head_commit.author.name", "github.event.commits.*.author.email", "github.event.commits.*.author.name", "github.event.pull_request.head.ref", "github.event.pull_request.head.label", "github.event.pull_request.head.repo.default_branch", "github.head_ref"]
39 |     )
40 | RETURN DISTINCT w.path, w.url;
41 | ```


--------------------------------------------------------------------------------
/library/query_body_context_injection.yml:
--------------------------------------------------------------------------------
 1 | id: RQ-1
 2 | 
 3 | info:
 4 |   name: Body Context Injection
 5 |   severity: critical
 6 |   description: Body Injection is caused by using body variables in inline scripts
 7 |   full-description: |
 8 |     Issues, comments, discussions and PR bodies can contain any text and special characters.
 9 |     By using a body variable in an inline script, an attacker can inject arbitrary code
10 |     into the build process.
11 |   references:
12 |     - https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions
13 |     - https://cycode.com/blog/github-actions-vulnerabilities/
14 |     - https://github.com/CycodeLabs/raven/blob/main/docs/issue_injections/README.md
15 |   tags:
16 |     - injection
17 |     - unauthenticated
18 | 
19 | query: |
20 |   MATCH (w:Workflow)-[*]->(d:StepCodeDependency)
21 |     WHERE (
22 |         "issues" in w.trigger OR
23 |         "issue_comment" in w.trigger OR
24 |         "pull_request_target" in w.trigger
25 |     ) AND
26 |       (
27 |           d.param IN [
28 |             "github.event.comment.body",
29 |             "github.event.issue.body",
30 |             "github.event.discussion.body",
31 |             "github.event.pull_request.body"
32 |           ]
33 |       )
34 |   RETURN DISTINCT w.url AS url;


--------------------------------------------------------------------------------
/library/query_build_artifact_leaks_the_github_token.yml:
--------------------------------------------------------------------------------
 1 | id: RQ-17
 2 | 
 3 | info:
 4 |   name: Build Artifact Leaks the GitHub Token
 5 |   severity: critical
 6 |   description: Including `actions/checkout` and `actions/upload-artifact` in a workflow can expose the `GITHUB_TOKEN` in the build artifact if the root directory is uploaded.
 7 |   full-description: |
 8 |     When you use the `actions/checkout` action, the `GITHUB_TOKEN` is automatically added to the 
 9 |     `.git/config` file. If you subsequently use the `actions/upload-artifact` action with the path
10 |     set to the root directory, the `.git/config` file will be included in the build artifact.
11 |     This can expose the `GITHUB_TOKEN` within the artifact.
12 |   references:
13 |     - https://unit42.paloaltonetworks.com/github-repo-artifacts-leak-tokens/
14 |   tags:
15 |     - unauthenticated
16 | 
17 | query: |
18 |   MATCH (w:Workflow)-[*]->(j:Job)
19 |   WHERE
20 |       EXISTS {
21 |           MATCH (j)-->(s:Step)-->(ca:CompositeAction)
22 |           WHERE ca.path = "actions/checkout"
23 |       } 
24 |       AND
25 |       EXISTS {
26 |           MATCH (j)-->(s:Step)-->(ca:CompositeAction)
27 |           WHERE ca.path = "actions/upload-artifact" 
28 |           AND "path:." IN s.with
29 |       }
30 |   RETURN DISTINCT w.url AS url;


--------------------------------------------------------------------------------
/library/query_checkout_on_issue.yml:
--------------------------------------------------------------------------------
 1 | id: RQ-2
 2 | 
 3 | info:
 4 |   name: Checkout On New Issue
 5 |   severity: critical
 6 |   description: Workflows triggered by issue events, where a job involves checking out code from a repository ("actions/checkout") on an issue event.
 7 |   full-description:
 8 |   references:
 9 |     - https://github.com/CycodeLabs/raven/tree/main/docs/issue_injections
10 |     - https://cycode.com/blog/github-actions-vulnerabilities/
11 |   tags:
12 |     - unauthenticated
13 | 
14 | query: |
15 |   MATCH (w:Workflow)-[*]->(j:Job)
16 |   WHERE
17 |       (
18 |           "issue_comment" in w.trigger OR
19 |           "issues" in w.trigger
20 |       ) AND
21 |       EXISTS {
22 |           (j)-->(s:Step)-->(ca:CompositeAction)
23 |           WHERE (
24 |               ca.path = "actions/checkout" AND
25 |               ANY(param IN s.with WHERE 
26 |                   (
27 |                       param STARTS WITH "ref" and 
28 |                       (
29 |                           param contains "head.sha" OR
30 |                           param contains "head.ref"
31 |                       )
32 |                   )
33 |               )
34 |           )
35 |       }
36 |   RETURN DISTINCT w.url AS url;


--------------------------------------------------------------------------------
/library/query_codesee_injection.yml:
--------------------------------------------------------------------------------
 1 | id: RQ-3
 2 | 
 3 | info:
 4 |   name: CodeSee Injection
 5 |   severity: info
 6 |   description: CodeSee NPM package before v0.376.0 allowed code injection vulnerability.
 7 |   full-description:
 8 |   references:
 9 |     - https://github.com/CycodeLabs/raven/tree/main/docs/codesee_injections
10 |     - https://cycode.com/blog/cycode-secures-thousands-of-open-source-projects/
11 |   tags:
12 |     - unauthenticated
13 |     - injection
14 |     - fixed
15 | 
16 | query: |
17 |     MATCH (w:Workflow)
18 |     WHERE
19 |         w.permissions is null AND
20 |         EXISTS {
21 |             (w)-[*]->(ca:CompositeAction)
22 |             WHERE (
23 |                 ca.path = "Codesee-io/codesee-map-action"
24 |             )
25 |         }
26 |     RETURN DISTINCT w.url AS url;


--------------------------------------------------------------------------------
/library/query_email_context_injection.yml:
--------------------------------------------------------------------------------
 1 | id: RQ-4
 2 | 
 3 | info:
 4 |   name: Email Context Injection
 5 |   severity: high
 6 |   description: Email Injection is caused by using email variables in inline scripts
 7 |   full-description: |
 8 |     GitHub allows creating accounts with email addresses that contain special characters,
 9 |     such as `+`, `@` and `"`. By using an email variable in an inline script, an attacker
10 |     can inject arbitrary code into the build process.
11 |   references:
12 |     - https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions
13 |   tags:
14 |     - injection
15 |     - unauthenticated
16 | 
17 | query: |
18 |   MATCH (w:Workflow)-[*]->(d:StepCodeDependency)
19 |     WHERE (
20 |         "issues" in w.trigger OR
21 |         "issue_comment" in w.trigger OR
22 |         "pull_request_target" in w.trigger
23 |     ) AND
24 |       (
25 |           d.param IN [
26 |             "github.event.comment.author.email",
27 |             "github.event.head_commit.committer.email"
28 |           ]
29 |       )
30 |   RETURN DISTINCT w.url AS url;


--------------------------------------------------------------------------------
/library/query_enterprise_github_server.yml:
--------------------------------------------------------------------------------
 1 | id: RQ-16
 2 | 
 3 | info:
 4 |   name: Enterprise GitHub Server
 5 |   severity: info
 6 |   description: Checking out code from a GitHub Enterprise repository.
 7 |   full-description: |
 8 |     GitHub Enterprise Server is the on-premises version of GitHub, which you can deploy and manage in your own secure environment.
 9 |     Checking out code from GitHub Enterprise Server - combined with a command injection vulnerability - 
10 |     may lead to data exfiltration from private repositories hosted on GitHub Enterprise Server.
11 |   references:
12 |     - https://github.com/actions/checkout
13 |   tags:
14 |     - reconnaissance
15 | 
16 | query: |
17 |   MATCH (w:Workflow)-[*]->(s:Step)-[*]->(ca:CompositeAction)
18 |   WHERE (
19 |       ca.path = "actions/checkout" AND
20 |       any ( server IN s.with WHERE (
21 |           server CONTAINS "github-server-url" AND
22 |           NOT server ENDS WITH "github.com"
23 |           )
24 |       )
25 |   )
26 | 
27 |   RETURN DISTINCT w.url AS url
28 | 


--------------------------------------------------------------------------------
/library/query_injectable_context_composite_action.yml:
--------------------------------------------------------------------------------
 1 | id: RQ-15
 2 | 
 3 | info:
 4 |   name: Injectable Composite Action (github context)
 5 |   severity: high
 6 |   description: Composite Actions that use injectable github context parameters in inline scripts can be used to inject arbitrary code.
 7 |   full-description: |
 8 |     Composite Actions can access the github context parameters.
 9 |     Some of these parameters can be controlled by the user, such as the commit message, the issue title, etc.
10 |     If these parameters are used in inline scripts, an attacker can inject arbitrary code into the build process.
11 |   references:
12 |     - https://docs.github.com/en/actions/creating-actions/creating-a-composite-action
13 |     - https://cycode.com/blog/cycode-secures-thousands-of-open-source-projects/
14 |   tags:
15 |     - injection
16 |     - unauthenticated
17 | 
18 | query: |
19 |   MATCH (w:Workflow)-[*]->(ca:CompositeAction)-->(cas:CompositeActionStep)
20 |   WHERE (
21 |       (
22 |           "issues" in w.trigger OR
23 |           "issue_comment" in w.trigger OR
24 |           "pull_request_target" in w.trigger
25 |       ) AND
26 |       ANY(input IN [
27 |                       "github.event.issue.title",
28 |                       "github.event.issue.body",
29 |                       "github.event.pull_request.title",
30 |                       "github.event.pull_request.body",
31 |                       "github.event.comment.body",
32 |                       "github.event.review.body",
33 |                       "github.event.review_comment.body",
34 |                       "github.event.pages.*.page_name",
35 |                       "github.event.commits.*.message",
36 |                       "github.event.head_commit.message",
37 |                       "github.event.head_commit.author.email",
38 |                       "github.event.head_commit.author.name",
39 |                       "github.event.commits.*.author.email",
40 |                       "github.event.commits.*.author.name",
41 |                       "github.event.pull_request.head.ref",
42 |                       "github.event.pull_request.head.label",
43 |                       "github.event.pull_request.head.repo.default_branch",
44 |                       "github.head_ref"
45 |                     ] WHERE cas.run CONTAINS input )
46 |   )
47 |   RETURN DISTINCT w.url AS url;


--------------------------------------------------------------------------------
/library/query_injectable_input_composite_action.yml:
--------------------------------------------------------------------------------
 1 | id: RQ-13
 2 | 
 3 | info:
 4 |   name: Injectable Composite Action (input variable)
 5 |   severity: high
 6 |   description: Composite Actions that use input parameters in inline scripts can be used to inject arbitrary code.
 7 |   full-description: |
 8 |     Composite Actions can get input parameters from the workflow file.
 9 |     If these input parameters are used in inline scripts, an attacker can 
10 |     inject arbitrary code into the build process.
11 |   references:
12 |     - https://docs.github.com/en/actions/creating-actions/creating-a-composite-action
13 |     - https://cycode.com/blog/cycode-secures-thousands-of-open-source-projects/
14 |   tags:
15 |     - injection
16 |     - unauthenticated
17 | 
18 | query: |
19 |   MATCH (w:Workflow)-[*]->(s:Step)-->(ca:CompositeAction)-->(cas:CompositeActionStep)-->(d:StepCodeDependency)
20 |   WHERE (
21 |       (
22 |           "issues" in w.trigger OR
23 |           "issue_comment" in w.trigger OR
24 |           "pull_request_target" in w.trigger
25 |       ) AND (
26 |           ca.using = "composite" AND
27 |           NOT cas.run is null AND
28 |           d.param STARTS WITH "inputs."
29 |       ) AND (
30 |           ANY(input IN s.with WHERE
31 |               ANY (
32 |                   pattern IN [
33 |                       "github.event.issue.title",
34 |                       "github.event.issue.body",
35 |                       "github.event.pull_request.title",
36 |                       "github.event.pull_request.body",
37 |                       "github.event.comment.body",
38 |                       "github.event.review.body",
39 |                       "github.event.review_comment.body",
40 |                       "github.event.pages.*.page_name",
41 |                       "github.event.commits.*.message",
42 |                       "github.event.head_commit.message",
43 |                       "github.event.head_commit.author.email",
44 |                       "github.event.head_commit.author.name",
45 |                       "github.event.commits.*.author.email",
46 |                       "github.event.commits.*.author.name",
47 |                       "github.event.pull_request.head.ref",
48 |                       "github.event.pull_request.head.label",
49 |                       "github.event.pull_request.head.repo.default_branch",
50 |                       "github.head_ref"
51 |                   ] WHERE input CONTAINS pattern
52 |               )
53 |           )
54 |       )
55 |   )
56 |   RETURN DISTINCT s.url AS url;


--------------------------------------------------------------------------------
/library/query_label_context_injection.yml:
--------------------------------------------------------------------------------
 1 | id: RQ-5
 2 | 
 3 | info:
 4 |   name: Label Context Injection
 5 |   severity: high
 6 |   description: Label Injection is caused by using label variables in inline scripts
 7 |   full-description: |
 8 |     Creating a new pull request could be submitted with a label that contains special characters.
 9 |     By using a label variable in an inline script, an attacker can inject arbitrary code into the build process.
10 |   references:
11 |     - https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions
12 |   tags:
13 |     - injection
14 | 
15 | query: |
16 |   MATCH (w:Workflow)-[*]->(d:StepCodeDependency)
17 |     WHERE (
18 |         "issues" in w.trigger OR
19 |         "issue_comment" in w.trigger OR
20 |         "pull_request_target" in w.trigger
21 |     ) AND
22 |       (
23 |           d.param IN [
24 |             "github.event.pull_request.head.label"
25 |           ]
26 |       )
27 |   RETURN DISTINCT w.url AS url;


--------------------------------------------------------------------------------
/library/query_message_context_injection.yml:
--------------------------------------------------------------------------------
 1 | id: RQ-6
 2 | 
 3 | info:
 4 |   name: Message Context Injection
 5 |   severity: high
 6 |   description: Commit Injection is caused by using commit message variables in inline scripts
 7 |   full-description: |
 8 |     Commit messages can contain any text and special characters.
 9 |     By using a commit message variable in an inline script, an attacker can inject arbitrary code
10 |     into the build process.
11 |   references:
12 |     - https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions
13 |   tags:
14 |     - injection
15 |     - unauthenticated
16 | 
17 | query: |
18 |   MATCH (w:Workflow)-[*]->(d:StepCodeDependency)
19 |     WHERE (
20 |         "issues" in w.trigger OR
21 |         "issue_comment" in w.trigger OR
22 |         "pull_request_target" in w.trigger
23 |     ) AND
24 |       (
25 |           d.param IN [
26 |             "github.event.head_commit.message",
27 |             "github.event.merge_group.head_commit.message"
28 |           ]
29 |       )
30 |   RETURN DISTINCT w.url AS url;


--------------------------------------------------------------------------------
/library/query_priv_esc_workflow_run.yml:
--------------------------------------------------------------------------------
 1 | id: RQ-7
 2 | 
 3 | info:
 4 |   name: Privilege Escalation Workflow Run
 5 |   severity: critical
 6 |   description: Injection of malicious code that triggers a workflow run pipeline can lead to privilege escalation.
 7 |   full-description: |
 8 |     The pull request pipeline runs without access to secrets.
 9 |     However, if a pull request triggers a workflow run, the workflow run will then have access to secrets.
10 |     This means that if an attacker can inject malicious code into the pull request workflow and then pass the malicious code to the workflow run,
11 |     the attacker can gain access to secrets even though the original workflow did not have access to secrets.
12 |   references:
13 |     - https://www.legitsecurity.com/blog/github-privilege-escalation-vulnerability
14 |   tags:
15 |     - unauthenticated
16 |     - injection
17 |     - priv-esc
18 | 
19 | query: |
20 |   MATCH (w:Workflow)-[*]->(w2:Workflow)
21 |   WHERE (
22 |       (
23 |           "pull_request" in w.trigger OR
24 |           "pull_request_target" in w.trigger
25 |       ) AND
26 |       (
27 |           "workflow_run" in w2.trigger
28 |       )
29 |   ) AND EXISTS {
30 |           (w)-[*]->(d:StepCodeDependency)
31 |           WHERE (
32 |               d.param IN [
33 |                   "github.event.pull_request.title",
34 |                   "github.event.pull_request.body",
35 |                   "github.event.pull_request.head.ref",
36 |                   "github.event.pull_request.head.label",
37 |                   "github.event.pull_request.head.repo.default_branch"
38 |               ]
39 |           )
40 |       }
41 |   RETURN DISTINCT w.url AS url;


--------------------------------------------------------------------------------
/library/query_pull_request_target_injection.yml:
--------------------------------------------------------------------------------
 1 | id: RQ-8
 2 | 
 3 | info:
 4 |   name: Pull Request Target Injection
 5 |   severity: critical
 6 |   description: The pull_request_target runs in the context of the base repository, not the fork.
 7 |   full-description: |
 8 |     A pull_request_target operates within the context of the base repository of the pull request,
 9 |     which means that any tampering with the build process can potentially lead 
10 |     to the unauthorized extraction of sensitive information, such as secrets.
11 |   references:
12 |     - https://github.com/CycodeLabs/raven/tree/main/docs/pull_request_injections
13 |     - https://cycode.com/blog/github-actions-vulnerabilities/
14 |   tags:
15 |     - unauthenticated
16 | 
17 | query: |
18 |   MATCH (w:Workflow)-[*]->(j:Job)
19 |   WHERE
20 |       w.permissions is null AND
21 |       "pull_request_target" in w.trigger AND
22 |       EXISTS {
23 |           (j)-->(s:Step)-->(ca:CompositeAction)
24 |           WHERE (
25 |               ca.path = "actions/checkout" AND
26 |               ANY(param IN s.with WHERE 
27 |                   (
28 |                       param STARTS WITH "ref" and 
29 |                       (
30 |                           param contains "head.sha" OR
31 |                           param contains "head.ref"
32 |                       )
33 |                   )
34 |               )
35 |           )
36 |       }
37 |   RETURN DISTINCT w.url AS url;


--------------------------------------------------------------------------------
/library/query_ref_context_injection.yml:
--------------------------------------------------------------------------------
 1 | id: RQ-9
 2 | 
 3 | info:
 4 |   name: Branch Context Injection
 5 |   severity: high
 6 |   description: Branch Injection is caused by using ref/default_branch variables in inline scripts
 7 |   full-description: |
 8 |     Creating a new pull request could be submitted with a branch name that contains special characters.
 9 |     By using a ref/default_branch variable in an inline script, an attacker can inject arbitrary code into the build process.
10 |   references:
11 |     - https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions
12 |     - https://cycode.com/blog/ci-story-how-we-found-critical-vulnerabilities-in-storybook-project/
13 |   tags:
14 |     - injection
15 |     - unauthenticated
16 | 
17 | query: |
18 |   MATCH (w:Workflow)-[*]->(d:StepCodeDependency)
19 |     WHERE (
20 |         "issues" in w.trigger OR
21 |         "issue_comment" in w.trigger OR
22 |         "pull_request_target" in w.trigger
23 |     ) AND
24 |       (
25 |           d.param IN [
26 |             "github.event.pull_request.head.ref",
27 |             "github.head_ref",
28 |             "github.event.pull_request.head.repo.default_branch"
29 |           ]
30 |       )
31 |   RETURN DISTINCT w.url AS url;


--------------------------------------------------------------------------------
/library/query_self_hosted_workflow.yml:
--------------------------------------------------------------------------------
 1 | id: RQ-10
 2 | 
 3 | info:
 4 |   name: Self Hosted Runner
 5 |   severity: medium
 6 |   description: Self Hosted runners should not be used for public repositories.
 7 |   full-description: |
 8 |     Self Hosted runners do not have the same security controls as GitHub Hosted runners and
 9 |     do not have the guarantees of a clean ephemeral environment. Self Hosted runners should
10 |     only be used for private repositories.
11 |   references:
12 |     - https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions#hardening-for-self-hosted-runners
13 |   tags:
14 |     - unauthenticated
15 |     - best-practice
16 | 
17 | query: |
18 |   MATCH (w:Workflow)-[*]->(j:Job)
19 |     WHERE (
20 |         w.is_public = TRUE AND
21 |         "self-hosted" in j.machine
22 |     )
23 |   RETURN DISTINCT w.url AS url;
24 | 


--------------------------------------------------------------------------------
/library/query_title_context_injection.yml:
--------------------------------------------------------------------------------
 1 | id: RQ-11
 2 | 
 3 | info:
 4 |   name: Title Context Injection
 5 |   severity: critical
 6 |   description: Title Injection is caused by using title variables in inline scripts
 7 |   full-description: |
 8 |     Issues, comments, discussions and PR titles can contain any text and special characters.
 9 |     By using a body variable in an inline script, an attacker can inject arbitrary code
10 |     into the build process.
11 |   references:
12 |     - https://github.com/CycodeLabs/raven/blob/main/docs/issue_injections/README.md
13 |     - https://cycode.com/blog/github-actions-vulnerabilities/
14 |     - https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions
15 |   tags:
16 |     - injection
17 |     - unauthenticated
18 | 
19 | query: |
20 |   MATCH (w:Workflow)-[*]->(d:StepCodeDependency)
21 |     WHERE (
22 |         "issues" in w.trigger OR
23 |         "issue_comment" in w.trigger OR
24 |         "pull_request_target" in w.trigger
25 |     ) AND
26 |       (
27 |           d.param IN [
28 |             "github.event.issue.title",
29 |             "github.event.pull_request.title",
30 |             "github.event.pull_request.milestone.title"
31 |           ]
32 |       )
33 |   RETURN DISTINCT w.url AS url;


--------------------------------------------------------------------------------
/library/query_unpinnable_action.yml:
--------------------------------------------------------------------------------
 1 | id: RQ-12
 2 | 
 3 | info:
 4 |   name: Unpinnable Action
 5 |   severity: low
 6 |   description: Unpinnable actions can lead to software supply chain attacks.
 7 |   full-description: |
 8 |     Actions can be pinned to a specific version to ensure that the same version is used
 9 |     every time the workflow is run. Even if the action is pinned, if that action itself
10 |     uses an unpinned Docker Image or GitHub Action, the action can be updated without
11 |     the workflow being updated. This can lead to software supply chain attacks.
12 |   references:
13 |     - https://www.paloaltonetworks.com/blog/prisma-cloud/unpinnable-actions-github-security/
14 |   tags:
15 |     - supply-chain
16 |     - best-practice
17 | 
18 | query: |
19 |   MATCH (ca:CompositeAction)
20 |   WHERE (
21 |       ca.using = "docker" AND (
22 |           NOT ca.image CONTAINS "@sha256:"
23 |       )
24 |   )
25 |   RETURN DISTINCT ca.url AS url;


--------------------------------------------------------------------------------
/library/query_usage_of_outdated_node.yml:
--------------------------------------------------------------------------------
 1 | id: RQ-14
 2 | 
 3 | info:
 4 |   name: Usage of Outdated Node Version
 5 |   severity: low
 6 |   description: Using composite action that uses an outdated Node version.
 7 |   full-description: |
 8 |     Node12 active support ended at 20 Oct 2020 and ended security support at 30 Apr 2022.
 9 |   references:
10 |     - https://endoflife.date/nodejs
11 |   tags:
12 |     - endoflife
13 | 
14 | query: |
15 |   MATCH (w:Workflow)-[*]->(ca:CompositeAction)
16 |   WHERE ca.using = "node12"
17 |   RETURN DISTINCT w.url AS url;


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | from src.cmdline import execute
 2 | 
 3 | 
 4 | def main():
 5 |     execute()
 6 | 
 7 | 
 8 | if __name__ == "__main__":
 9 |     main()
10 | 


--------------------------------------------------------------------------------
/requirements.in:
--------------------------------------------------------------------------------
1 | colorama
2 | loguru
3 | py2neo
4 | PyYAML
5 | redis
6 | requests
7 | slack_sdk
8 | tqdm


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is autogenerated by pip-compile with Python 3.13
 3 | # by the following command:
 4 | #
 5 | #    pip-compile
 6 | #
 7 | certifi==2025.1.31
 8 |     # via
 9 |     #   py2neo
10 |     #   requests
11 | charset-normalizer==3.4.1
12 |     # via requests
13 | colorama==0.4.6
14 |     # via -r requirements.in
15 | idna==3.10
16 |     # via requests
17 | interchange==2021.0.4
18 |     # via py2neo
19 | loguru==0.7.3
20 |     # via -r requirements.in
21 | monotonic==1.6
22 |     # via py2neo
23 | packaging==24.2
24 |     # via py2neo
25 | pansi==2024.11.0
26 |     # via py2neo
27 | pillow==11.1.0
28 |     # via pansi
29 | py2neo==2021.2.4
30 |     # via -r requirements.in
31 | pygments==2.19.1
32 |     # via py2neo
33 | pytz==2025.1
34 |     # via interchange
35 | pyyaml==6.0.2
36 |     # via -r requirements.in
37 | redis==5.2.1
38 |     # via -r requirements.in
39 | requests==2.32.3
40 |     # via -r requirements.in
41 | six==1.17.0
42 |     # via
43 |     #   interchange
44 |     #   py2neo
45 | slack-sdk==3.34.0
46 |     # via -r requirements.in
47 | tqdm==4.67.1
48 |     # via -r requirements.in
49 | urllib3==2.3.0
50 |     # via
51 |     #   py2neo
52 |     #   requests
53 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from os import getenv
 2 | from setuptools import find_packages
 3 | from setuptools import setup
 4 | from distutils import log
 5 | import pathlib
 6 | import sys
 7 | 
 8 | 
 9 | __version__ = getenv("RAVEN_VERSION", "0.0.0")
10 | 
11 | HERE = pathlib.Path(__file__).parent
12 | README = (HERE / "README.md").read_text()
13 | REQUIRMENTS = (HERE / "requirements.txt").read_text().splitlines()
14 | CURRENT_PYTHON = sys.version_info[:2]
15 | REQUIRED_PYTHON = (3, 9)
16 | if CURRENT_PYTHON < REQUIRED_PYTHON:
17 |     log.fatal("Raven requires Python V3.9 or greater.")
18 |     sys.exit(1)
19 | 
20 | 
21 | setup(
22 |     name="raven-cycode",
23 |     version=__version__,
24 |     description="RAVEN (Risk Analysis and Vulnerability Enumeration for CI/CD)",
25 |     long_description=README,
26 |     long_description_content_type="text/markdown",
27 |     url="https://github.com/CycodeLabs/raven",
28 |     project_urls={"Source": "https://github.com/CycodeLabs/raven"},
29 |     author=["Cycode <research@cycode.com>"],
30 |     keywords=["cycode", "raven", "security", "ci/cd"],
31 |     license="Apache License 2.0",
32 |     python_requires=">=3.9",
33 |     classifiers=[
34 |         "Programming Language :: Python :: 3",
35 |         "Programming Language :: Python :: 3.9",
36 |         "Programming Language :: Python :: 3.10",
37 |         "Programming Language :: Python :: 3.11",
38 |         "Programming Language :: Python :: 3.12",
39 |         "Programming Language :: Python :: 3 :: Only",
40 |         "Operating System :: Unix",
41 |         "Operating System :: MacOS",
42 |         "Intended Audience :: Science/Research",
43 |         "Topic :: Security",
44 |     ],
45 |     install_requires=REQUIRMENTS,
46 |     packages=find_packages(exclude=("tests", "tests.*")),
47 |     entry_points={"console_scripts": ["raven = src.cmdline:execute"]},
48 | )
49 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
1 | from src.common.ignore_warnings import ignore_warnings
2 | 
3 | ignore_warnings()
4 | 


--------------------------------------------------------------------------------
/src/cmdline.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import src.logger.log as log
  3 | from src.common.utils import validate_query_ids
  4 | from src.downloader.download import (
  5 |     download_all_workflows_and_actions,
  6 |     download_account_workflows_and_actions,
  7 | )
  8 | from src.indexer.index import index_downloaded_workflows_and_actions
  9 | from src.reporter.report import generate
 10 | from src.config.config import (
 11 |     load_downloader_config,
 12 |     load_indexer_config,
 13 |     load_reporter_config,
 14 | )
 15 | from src.config.config import (
 16 |     DEBUG_DEFAULT,
 17 |     MIN_STARS_DEFAULT,
 18 |     NEO4J_CLEAN_DEFAULT,
 19 |     NEO4J_URI_DEFAULT,
 20 |     NEO4J_USERNAME_DEFAULT,
 21 |     NEO4J_PASSWORD_DEFAULT,
 22 |     REDIS_HOST_DEFAULT,
 23 |     REDIS_PORT_DEFAULT,
 24 |     REDIS_CLEAN_DEFAULT,
 25 |     DOWNLOAD_COMMAND,
 26 |     DOWNLOAD_ACCOUNT_COMMAND,
 27 |     DOWNLOAD_CRAWL_COMMAND,
 28 |     INDEX_COMMAND,
 29 |     REPORT_COMMAND,
 30 |     QUERIES_PATH_DEFAULT,
 31 |     REPORT_RAW_FORMAT,
 32 |     REPORT_JSON_FORMAT,
 33 |     SEVERITY_LEVELS,
 34 |     QUERY_TAGS,
 35 |     QUERY_IDS,
 36 | )
 37 | 
 38 | COMMAND_FUNCTIONS = {
 39 |     DOWNLOAD_COMMAND: {
 40 |         DOWNLOAD_CRAWL_COMMAND: download_all_workflows_and_actions,
 41 |         DOWNLOAD_ACCOUNT_COMMAND: download_account_workflows_and_actions,
 42 |     },
 43 |     INDEX_COMMAND: index_downloaded_workflows_and_actions,
 44 |     REPORT_COMMAND: generate,
 45 | }
 46 | 
 47 | 
 48 | def execute() -> None:
 49 |     try:
 50 |         raven()
 51 |         log.catch_exit()
 52 |     except KeyboardInterrupt:
 53 |         log.catch_exit()
 54 |     except Exception as e:
 55 |         log.error(e)
 56 |         log.fail_exit()
 57 | 
 58 | 
 59 | def raven() -> None:
 60 |     parser = argparse.ArgumentParser(
 61 |         description="Github Actions downloader and indexer"
 62 |     )
 63 | 
 64 |     subparsers = parser.add_subparsers(dest="command", help="sub-command help")
 65 | 
 66 |     redis_parser = argparse.ArgumentParser(add_help=False)
 67 | 
 68 |     # Add redis arguments
 69 |     redis_parser.add_argument(
 70 |         "--redis-host",
 71 |         help=f"Redis host, default: {REDIS_HOST_DEFAULT}",
 72 |         default=REDIS_HOST_DEFAULT,
 73 |     )
 74 |     redis_parser.add_argument(
 75 |         "--redis-port",
 76 |         type=int,
 77 |         help=f"Redis port, default: {REDIS_PORT_DEFAULT}",
 78 |         default=REDIS_PORT_DEFAULT,
 79 |     )
 80 |     redis_parser.add_argument(
 81 |         "--clean-redis",
 82 |         "-cr",
 83 |         action="store_const",
 84 |         default=REDIS_CLEAN_DEFAULT,
 85 |         const=True,
 86 |         help=f"Whether to clean cache in the redis, default: {REDIS_CLEAN_DEFAULT}",
 87 |     )
 88 | 
 89 |     neo4j_parser = argparse.ArgumentParser(add_help=False)
 90 |     neo4j_parser.add_argument(
 91 |         "--neo4j-uri",
 92 |         default=NEO4J_URI_DEFAULT,
 93 |         help=f"Neo4j URI endpoint, default: {NEO4J_URI_DEFAULT}",
 94 |     )
 95 |     neo4j_parser.add_argument(
 96 |         "--neo4j-user",
 97 |         default=NEO4J_USERNAME_DEFAULT,
 98 |         help=f"Neo4j username, default: {NEO4J_USERNAME_DEFAULT}",
 99 |     )
100 |     neo4j_parser.add_argument(
101 |         "--neo4j-pass",
102 |         default=NEO4J_PASSWORD_DEFAULT,
103 |         help=f"Neo4j password, default: {NEO4J_PASSWORD_DEFAULT}",
104 |     )
105 |     neo4j_parser.add_argument(
106 |         "--clean-neo4j",
107 |         "-cn",
108 |         action="store_const",
109 |         default=NEO4J_CLEAN_DEFAULT,
110 |         const=True,
111 |         help=f"Whether to clean cache, and index from scratch, default: {NEO4J_CLEAN_DEFAULT}",
112 |     )
113 | 
114 |     download_parser_options = argparse.ArgumentParser(add_help=False)
115 |     download_parser_options.add_argument(
116 |         "--token",
117 |         required=True,
118 |         help="GITHUB_TOKEN to download data from Github API (Needed for effective rate-limiting)",
119 |     )
120 |     download_parser_options.add_argument(
121 |         "--debug",
122 |         action="store_const",
123 |         default=DEBUG_DEFAULT,
124 |         const=True,
125 |         help=f"Whether to print debug statements, default: {DEBUG_DEFAULT}",
126 |     )
127 | 
128 |     download_parser = subparsers.add_parser(
129 |         "download", help="Download workflows into Redis database"
130 |     )
131 | 
132 |     download_sub_parser = download_parser.add_subparsers(
133 |         dest="download_command",
134 |     )
135 | 
136 |     crawl_download_parser = download_sub_parser.add_parser(
137 |         "crawl",
138 |         help="Crawl Public GitHub repositories",
139 |         parents=[download_parser_options, redis_parser],
140 |     )
141 | 
142 |     account_download_parser = download_sub_parser.add_parser(
143 |         "account",
144 |         help="Scan a specific GitHub account (user or organization)",
145 |         parents=[download_parser_options, redis_parser],
146 |     )
147 | 
148 |     account_download_group = account_download_parser.add_mutually_exclusive_group(
149 |         required=True
150 |     )
151 | 
152 |     account_download_group.add_argument(
153 |         "--account-name",
154 |         required=False,
155 |         action="append",
156 |         type=str,
157 |         help="Account name for downloading the workflows, can be used multiple times",
158 |     )
159 | 
160 |     account_download_group.add_argument(
161 |         "--personal",
162 |         required=False,
163 |         action="store_const",
164 |         const=True,
165 |         help="Download repositories owned by the authenticated user",
166 |     )
167 | 
168 |     crawl_download_parser.add_argument(
169 |         "--max-stars", type=int, help="Maximum number of stars for a repository"
170 |     )
171 |     crawl_download_parser.add_argument(
172 |         "--min-stars",
173 |         type=int,
174 |         default=MIN_STARS_DEFAULT,
175 |         help=f"Minimum number of stars for a repository, default: {MIN_STARS_DEFAULT}",
176 |     )
177 | 
178 |     # Index action
179 |     index_parser = subparsers.add_parser(
180 |         "index",
181 |         parents=[redis_parser, neo4j_parser],
182 |         help="Index the download workflows into Neo4j database",
183 |     )
184 |     index_parser.add_argument(
185 |         "--debug",
186 |         action="store_const",
187 |         default=DEBUG_DEFAULT,
188 |         const=True,
189 |         help=f"Whether to print debug statements, default: {DEBUG_DEFAULT}",
190 |     )
191 | 
192 |     report_parser = subparsers.add_parser(
193 |         "report",
194 |         parents=[redis_parser, neo4j_parser],
195 |         help="Generate report from indexed Actions - Beta Version",
196 |     )
197 | 
198 |     report_parser.add_argument(
199 |         "--tag",
200 |         "-t",
201 |         action="append",
202 |         type=str,
203 |         default=[],
204 |         choices=QUERY_TAGS,
205 |         help="Filter queries with specific tag",
206 |     )
207 |     report_parser.add_argument(
208 |         "--severity",
209 |         "-s",
210 |         type=str,
211 |         default="info",
212 |         choices=SEVERITY_LEVELS.keys(),
213 |         help="Filter queries by severity level (default: info)",
214 |     )
215 |     report_parser.add_argument(
216 |         "--query_ids",
217 |         "-id",
218 |         type=validate_query_ids,
219 |         default="",
220 |         metavar=f"RQ-1,..,{QUERY_IDS[-1]}",
221 |         help="Filter queries by query ids (example: RQ-2,RQ-8)",
222 |     )
223 |     report_parser.add_argument(
224 |         "--queries-path",
225 |         "-dp",
226 |         default=QUERIES_PATH_DEFAULT,
227 |         help="Queries folder (default: library)",
228 |     )
229 |     report_parser.add_argument(
230 |         "--format",
231 |         "-f",
232 |         default=REPORT_RAW_FORMAT,
233 |         choices=[REPORT_RAW_FORMAT, REPORT_JSON_FORMAT],
234 |         help="Report format (default: raw)",
235 |     )
236 | 
237 |     format_sub_parser = report_parser.add_subparsers(
238 |         dest="report_command",
239 |     )
240 | 
241 |     slack_parser = format_sub_parser.add_parser(
242 |         "slack",
243 |         help="Send report to slack channel",
244 |     )
245 |     slack_parser.add_argument(
246 |         "--slack-token",
247 |         "-st",
248 |         required=True,
249 |         help="Send report to slack channel",
250 |     )
251 |     slack_parser.add_argument(
252 |         "--channel-id",
253 |         "-ci",
254 |         required=True,
255 |         help="Send report to slack channel",
256 |     )
257 | 
258 |     args = parser.parse_args()
259 | 
260 |     if args.command in COMMAND_FUNCTIONS:
261 |         if args.command == DOWNLOAD_COMMAND:
262 |             if args.download_command:
263 |                 load_downloader_config(vars(args))
264 |                 COMMAND_FUNCTIONS[args.command][args.download_command]()
265 |                 return
266 |             else:
267 |                 download_parser.print_help()
268 |         elif args.command == INDEX_COMMAND:
269 |             load_indexer_config(vars(args))
270 |             COMMAND_FUNCTIONS[args.command]()
271 |         elif args.command == REPORT_COMMAND:
272 |             load_reporter_config(vars(args))
273 |             COMMAND_FUNCTIONS[args.command]()
274 |     else:
275 |         parser.print_help()
276 | 


--------------------------------------------------------------------------------
/src/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/src/common/__init__.py


--------------------------------------------------------------------------------
/src/common/ignore_warnings.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | 
 4 | def ignore_warnings():
 5 |     # Ignore urllib3 warning about OpenSSL version
 6 |     warnings.filterwarnings(
 7 |         "ignore",
 8 |         module="urllib3",
 9 |         message="urllib3 v2.0 only supports OpenSSL 1.1.1+.*",
10 |     )
11 | 


--------------------------------------------------------------------------------
/src/common/utils.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import re
  3 | import io
  4 | from typing import List, Dict, Union, Optional
  5 | 
  6 | import yaml
  7 | from py2neo.data import Node
  8 | 
  9 | from src.storage.redis_connection import RedisConnection
 10 | from src.config.config import Config, QUERY_IDS
 11 | import src.logger.log as log
 12 | from urllib.parse import urlparse, parse_qs
 13 | 
 14 | 
 15 | def get_dependencies_in_code(code: str) -> List[str]:
 16 |     re_fmt = r"\$\{\{\s*([a-zA-Z0-9\-\._]*)\s*\}\}"
 17 |     return [match.group(1) for match in re.finditer(re_fmt, code)]
 18 | 
 19 | 
 20 | def convert_dict_to_list(d: Union[Dict, str]) -> List:
 21 |     if isinstance(d, dict):
 22 |         return [f"{key}:{value}" for key, value in d.items()]
 23 |     else:
 24 |         return [d]
 25 | 
 26 | 
 27 | def convert_workflow_to_unix_path(repo: str, workflow_name: str) -> str:
 28 |     return f"{repo}/.github/workflows/{workflow_name}"
 29 | 
 30 | 
 31 | def convert_raw_github_url_to_github_com_url(raw_url: str):
 32 |     """
 33 |     Convert a GitHub raw URL to its corresponding tree URL.
 34 |     convert_raw_to_tree_url("https://raw.githubusercontent.com/myorg/myrepo/master/.github/workflows/android.yml")
 35 |         >> "https://github.com/myorg/myrepo/tree/master/.github/workflows/android.yml"
 36 |     """
 37 | 
 38 |     tree_url = raw_url.replace("raw.githubusercontent.com", "github.com")
 39 |     if is_url_contains_a_token(tree_url):
 40 |         tree_url = tree_url.split("?")[0]
 41 | 
 42 |     parts = tree_url.split("/")
 43 |     parts.insert(5, "tree")
 44 |     return "/".join(parts)
 45 | 
 46 | 
 47 | def find_workflow_by_name(repo: str, workflow_name: str) -> str:
 48 |     """Tries to find workflow in specified repo,
 49 |     with the given workflow name
 50 | 
 51 |     Used to create connection based on "workflow_run" trigger (which gives workflow name)
 52 |     """
 53 |     with RedisConnection(Config.redis_workflows_db) as workflows_db:
 54 |         for workflow in workflows_db.get_all_keys():
 55 |             workflow = workflow.decode()
 56 | 
 57 |             if workflow.startswith(repo):
 58 |                 data = workflows_db.get_value_from_hash(
 59 |                     workflow, Config.redis_data_hash_field_name
 60 |                 ).decode()
 61 | 
 62 |                 # PyYAML has issues with tabs.
 63 |                 data = data.replace("\t", "  ")
 64 | 
 65 |                 with io.StringIO() as f:
 66 |                     f.write(data)
 67 |                     f.seek(0)
 68 |                     try:
 69 |                         obj = yaml.load(f, yaml.loader.Loader)
 70 |                     except yaml.scanner.ScannerError as e:
 71 |                         log.error(
 72 |                             f"[-] Failed loading: {workflow}. Exception: {e}. Skipping..."
 73 |                         )
 74 |                         return
 75 | 
 76 |                 # Could happen if the YAML is empty.
 77 |                 if not obj:
 78 |                     return
 79 | 
 80 |                 if isinstance(obj, str):
 81 |                     # Couldn't happen on rare cases.
 82 |                     return
 83 | 
 84 |                 if "name" in obj and obj["name"] == workflow_name:
 85 |                     return workflow
 86 | 
 87 | 
 88 | def get_repo_name_from_path(path: str) -> str:
 89 |     """
 90 |     edgedb/edgedb-pkg/integration/linux/test/ubuntu-jammy/action.yml ->
 91 |     edgedb/edgedb-pkg
 92 | 
 93 |     slsa-framework/slsa-github-generator/.github/workflows/builder_go_slsa3.yml ->
 94 |     slsa-framework/slsa-github-generator
 95 |     """
 96 |     return "/".join(path.split("/")[:2])
 97 | 
 98 | 
 99 | def find_uses_strings(workflow_content: str) -> List[str]:
100 |     """Find patterns of usages for composite actions inside the workflow.
101 |     E.g. if it uses "actions/checkout", so "actions/checkout"
102 |     will be part of the returned list.
103 |     """
104 |     re_fmt = r"[ \t]uses:\s*[\'\"]?([0-9a-zA-Z_\:\-/@\.]*)[\'\"]?"
105 |     return [match.group(1) for match in re.finditer(re_fmt, workflow_content)]
106 | 
107 | 
108 | def is_url_contains_a_token(url) -> bool:
109 |     """
110 |     Checks if the url contains arguments.
111 |     E.g.:
112 |     is_url_contains_a_token("https://raw.githubusercontent.com/RavenIntegrationTests/astro/main/.github/workflows/ci.yml?token=AAABBBCCC")
113 |         >> True
114 |     is_url_contains_a_token("https://raw.githubusercontent.com/RavenIntegrationTests/astro/main/.github/workflows/ci.yml")
115 |         >> False
116 |     """
117 |     parsed_url = urlparse(url)
118 |     query_parameters = parse_qs(parsed_url.query)
119 | 
120 |     return "token" in query_parameters
121 | 
122 | 
123 | def str_to_bool(s: str) -> bool:
124 |     return bool(int(s))
125 | 
126 | 
127 | def raw_str_to_bool(s: str) -> bool:
128 |     return True if s == "true" else False
129 | 
130 | 
131 | def validate_query_ids(ids_arg: str) -> list:
132 |     """check if ids argument (ex: "RQ-1,RQ-3") in config.QUERY_IDS.
133 |     return parsed list."""
134 |     if not ids_arg:
135 |         return []
136 | 
137 |     ids_list = ids_arg.split(",")
138 |     if not set(ids_list).issubset(QUERY_IDS):
139 |         raise argparse.ArgumentTypeError(
140 |             f"Invalid choice: {ids_arg}. Choose from {','.join(QUERY_IDS)}"
141 |         )
142 |     return ids_list
143 | 


--------------------------------------------------------------------------------
/src/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/src/config/__init__.py


--------------------------------------------------------------------------------
/src/config/config.py:
--------------------------------------------------------------------------------
  1 | from src.storage.neo4j_graph import GraphDb
  2 | 
  3 | # Default Values
  4 | DEBUG_DEFAULT = False
  5 | MIN_STARS_DEFAULT = 1000
  6 | REDIS_CLEAN_DEFAULT = False
  7 | NEO4J_CLEAN_DEFAULT = False
  8 | QUERIES_PATH_DEFAULT = "library"
  9 | REPORT_RAW_FORMAT = "raw"
 10 | REPORT_JSON_FORMAT = "json"
 11 | SLACK_REPORTER = "slack"
 12 | 
 13 | NEO4J_URI_DEFAULT = "neo4j://localhost:7687"
 14 | NEO4J_USERNAME_DEFAULT = "neo4j"
 15 | NEO4J_PASSWORD_DEFAULT = "123456789"
 16 | 
 17 | REDIS_HOST_DEFAULT = "localhost"
 18 | REDIS_PORT_DEFAULT = 6379
 19 | 
 20 | # Constants
 21 | REDIS_WORKFLOW_DOWNLOAD_HISTORY_SET = "workflow_download_history"
 22 | REDIS_ACTION_DOWNLOAD_HISTORY_SET = "action_download_history"
 23 | REDIS_WORKFLOW_INDEX_HISTORY_SET = "workflow_index_history"
 24 | REDIS_ACTION_INDEX_HISTORY_SET = "action_index_history"
 25 | REDIS_REF_POINTERS_HASH = "ref_pointers"
 26 | # Field names to use in the hash of Actions and Workflows in the DB
 27 | REDIS_DATA_HASH_FIELD_NAME = "data"
 28 | REDIS_URL_HASH_FIELD_NAME = "url"
 29 | REDIS_IS_PUBLIC_HASH_FIELD_NAME = "is_public"
 30 | 
 31 | # The DB which contains the objects operations history (downloaded, indexed, etc.) and the ref pointers
 32 | REDIS_OBJECTS_OPS_DB = 0
 33 | # The DB which contains the downloaded workflows
 34 | REDIS_WORKFLOWS_DB = 1
 35 | # The DB which contains the downloaded actions
 36 | REDIS_ACTIONS_DB = 2
 37 | 
 38 | # CLI commands
 39 | DOWNLOAD_COMMAND = "download"
 40 | DOWNLOAD_ACCOUNT_COMMAND = "account"
 41 | DOWNLOAD_CRAWL_COMMAND = "crawl"
 42 | INDEX_COMMAND = "index"
 43 | REPORT_COMMAND = "report"
 44 | SEVERITY_LEVELS = {
 45 |     "info": 0,
 46 |     "low": 1,
 47 |     "medium": 2,
 48 |     "high": 3,
 49 |     "critical": 4,
 50 | }
 51 | QUERY_TAGS = [
 52 |     "injection",
 53 |     "unauthenticated",
 54 |     "fixed",
 55 |     "priv-esc",
 56 |     "supply-chain",
 57 |     "best-practice",
 58 |     "endoflife",
 59 |     "reconnaissance",
 60 | ]
 61 | LAST_QUERY_ID = 17
 62 | QUERY_IDS = [f"RQ-{num}" for num in range(1, LAST_QUERY_ID + 1)]
 63 | 
 64 | 
 65 | def load_downloader_config(args) -> None:
 66 |     """Loading downloader subcommand config.
 67 |     Includes redis config.
 68 |     """
 69 |     Config.debug = args.get("debug", DEBUG_DEFAULT)
 70 |     Config.github_token = args.get("token")
 71 |     Config.min_stars = args.get("min_stars", MIN_STARS_DEFAULT)
 72 |     Config.max_stars = args.get("max_stars")
 73 |     Config.account_name = args.get("account_name")
 74 |     Config.personal = args.get("personal")
 75 |     Config.clean_redis = args.get("clean_redis", REDIS_CLEAN_DEFAULT)
 76 | 
 77 |     load_redis_config(args)
 78 | 
 79 |     if Config.clean_redis:
 80 |         from src.storage.redis_utils import clean_redis_db
 81 | 
 82 |         clean_redis_db()
 83 | 
 84 | 
 85 | def load_indexer_config(args) -> None:
 86 |     """Loading indexer subcommand config.
 87 |     Includes redis and neo4j config.
 88 |     """
 89 |     Config.debug = args.get("debug", DEBUG_DEFAULT)
 90 |     Config.clean_neo4j = args.get("clean_neo4j", NEO4J_CLEAN_DEFAULT)
 91 |     Config.clean_redis = args.get("clean_redis", REDIS_CLEAN_DEFAULT)
 92 | 
 93 |     load_redis_config(args)
 94 |     load_neo4j_config(args)
 95 |     load_reporter_config(args)
 96 | 
 97 |     if Config.clean_neo4j or Config.graph.is_graph_empty():
 98 |         from src.storage.redis_utils import clean_index
 99 |         from src.storage.neo4j_utils import clean_graph
100 | 
101 |         clean_graph()
102 |         clean_index()
103 | 
104 | 
105 | def load_redis_config(args) -> None:
106 |     Config.redis_host = args.get("redis_host", REDIS_HOST_DEFAULT)
107 |     Config.redis_port = args.get("redis_port", REDIS_PORT_DEFAULT)
108 | 
109 | 
110 | def load_neo4j_config(args) -> None:
111 |     Config.neo4j_uri = args.get("neo4j_uri", NEO4J_URI_DEFAULT)
112 |     Config.neo4j_username = args.get("neo4j_user", NEO4J_USERNAME_DEFAULT)
113 |     Config.neo4j_password = args.get("neo4j_pass", NEO4J_PASSWORD_DEFAULT)
114 | 
115 |     # Initializing the neo4j graph connection
116 |     Config.graph = GraphDb(
117 |         uri=Config.neo4j_uri,
118 |         user=Config.neo4j_username,
119 |         password=Config.neo4j_password,
120 |     )
121 | 
122 | 
123 | def load_reporter_config(args):
124 |     Config.tags = args.get("tag")
125 |     Config.severity = args.get("severity")
126 |     Config.query_ids = args.get("query_ids")
127 |     Config.queries_path = args.get("queries_path")
128 |     Config.format = args.get("format")
129 |     Config.reporter = args.get("report_command")
130 |     Config.slack_token = args.get("slack_token")
131 |     Config.channel_id = args.get("channel_id")
132 | 
133 |     load_redis_config(args)
134 |     load_neo4j_config(args)
135 | 
136 | 
137 | class Config:
138 |     # Global Config
139 |     debug: bool = None
140 | 
141 |     # Downloader Config
142 |     github_token: str = None
143 |     min_stars: int = None
144 |     max_stars: int = None
145 |     account_name: list[str] = []
146 |     personal: bool = None
147 | 
148 |     # Indexer Configs
149 |     clean_neo4j: bool = None
150 | 
151 |     # Redis Config
152 |     redis_host: str = None
153 |     redis_port: int = None
154 |     clean_redis: bool = None
155 | 
156 |     # Redis Config Constants
157 |     redis_objects_ops_db: int = REDIS_OBJECTS_OPS_DB
158 |     redis_workflows_db: int = REDIS_WORKFLOWS_DB
159 |     redis_actions_db: int = REDIS_ACTIONS_DB
160 |     redis_data_hash_field_name: str = REDIS_DATA_HASH_FIELD_NAME
161 |     redis_url_hash_field_name: str = REDIS_URL_HASH_FIELD_NAME
162 |     redis_is_public_hash_field_name: str = REDIS_IS_PUBLIC_HASH_FIELD_NAME
163 |     workflow_download_history_set: str = REDIS_WORKFLOW_DOWNLOAD_HISTORY_SET
164 |     action_download_history_set: str = REDIS_ACTION_DOWNLOAD_HISTORY_SET
165 |     workflow_index_history_set: str = REDIS_WORKFLOW_INDEX_HISTORY_SET
166 |     action_index_history_set: str = REDIS_ACTION_INDEX_HISTORY_SET
167 |     ref_pointers_hash: str = REDIS_REF_POINTERS_HASH
168 | 
169 |     # Report Config Constants
170 |     tags: list = []
171 |     severity: str = None
172 |     query_ids: list = []
173 |     format: str = None
174 |     queries_path: str = QUERIES_PATH_DEFAULT
175 |     reporter: str = None
176 |     slack_token: str = None
177 |     channel_id: str = None
178 | 
179 |     # Neo4j Config
180 |     neo4j_uri: str = None
181 |     neo4j_username: str = None
182 |     neo4j_password: str = None
183 |     graph: GraphDb = None
184 | 


--------------------------------------------------------------------------------
/src/downloader/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/src/downloader/__init__.py


--------------------------------------------------------------------------------
/src/downloader/download.py:
--------------------------------------------------------------------------------
  1 | from requests import get
  2 | 
  3 | from src.config.config import Config
  4 | from src.storage.redis_connection import RedisConnection
  5 | from src.downloader.utils import (
  6 |     insert_workflow_or_action_to_redis,
  7 |     add_ref_pointer_to_redis,
  8 | )
  9 | from src.downloader.gh_api import (
 10 |     get_account_generator,
 11 |     get_personal_account_generator,
 12 |     get_repository_generator,
 13 |     get_repository_workflows,
 14 |     get_repository_composite_action,
 15 |     get_repository_reusable_workflow,
 16 | )
 17 | from src.common.utils import (
 18 |     find_uses_strings,
 19 |     convert_workflow_to_unix_path,
 20 |     get_repo_name_from_path,
 21 |     convert_raw_github_url_to_github_com_url,
 22 |     is_url_contains_a_token,
 23 | )
 24 | from src.workflow_components.dependency import UsesString, UsesStringType
 25 | import src.logger.log as log
 26 | 
 27 | 
 28 | def download_account_workflows_and_actions() -> None:
 29 |     """First, we define it as an organization or a user account.
 30 |     We iterate all the repositories and download the workflows and actions for both cases.
 31 | 
 32 |     For each repository we enumerating the .github/workflows directory,
 33 |     and downloading all the workflows.
 34 |     In addition if the repository contains action.yml file, it means it is a composite action,
 35 |     so we download it as well.
 36 | 
 37 |     For each such workflow we also scan if it uses additional external actions.
 38 |     If so, we download these as well.
 39 | 
 40 |     We are trying to cache the downloads as much as we can to reduce redundant download attempts.
 41 |     """
 42 |     if Config.account_name:
 43 |         for account in Config.account_name:
 44 |             generator = get_account_generator(account)
 45 | 
 46 |             for repo in generator:
 47 |                 download_workflows_and_actions(repo)
 48 | 
 49 |     elif Config.personal:
 50 |         generator = get_personal_account_generator()
 51 | 
 52 |         for repo in generator:
 53 |             download_workflows_and_actions(repo)
 54 | 
 55 |     else:
 56 |         raise Exception("Account name or personal flag must be provided.")
 57 | 
 58 | 
 59 | def download_all_workflows_and_actions() -> None:
 60 |     """Iterating all repositories through Github search API.
 61 | 
 62 |     For each repository we enumerating the .github/workflows directory,
 63 |     and downloading all the workflows.
 64 |     In addition if the repository contains action.yml file, it means it is a composite action,
 65 |     so we download it as well.
 66 | 
 67 |     For each such workflow we also scan if it uses additional external actions.
 68 |     If so, we download these as well.
 69 | 
 70 |     We are trying to cache the downloads as much as we can to reduce redundant download attempts.
 71 |     """
 72 | 
 73 |     log.info("[+] Starting repository iterator")
 74 |     generator = get_repository_generator(Config.min_stars, Config.max_stars)
 75 | 
 76 |     for repo in generator:
 77 |         download_workflows_and_actions(repo)
 78 | 
 79 | 
 80 | def download_workflows_and_actions(repo: str) -> None:
 81 |     """The flow is the following:
 82 | 
 83 |     - First we enumerate .github/workflows directory for workflows
 84 |     - For each such workflow we download it
 85 |     - If that workflow contains uses:..., we analyze the string, and download the action or the reusable workflow.
 86 |     """
 87 |     with RedisConnection(Config.redis_objects_ops_db) as ops_db:
 88 |         if ops_db.exists_in_set(Config.workflow_download_history_set, repo):
 89 |             log.debug(f"[!] Repo {repo} already scanned, skipping.")
 90 |             return
 91 | 
 92 |         workflows = get_repository_workflows(repo)
 93 |         is_public = 1
 94 | 
 95 |         log.debug(f"[+] Found {len(workflows)} workflows for {repo}")
 96 |         for name, url in workflows.items():
 97 |             if is_url_contains_a_token(url):
 98 |                 """
 99 |                 If the URL contains a token, it means it is a private repository.
100 |                 """
101 |                 log.debug(f"[+] URL contains token argument - private repository")
102 |                 is_public = 0
103 | 
104 |             log.debug(f"[+] Fetching {name}")
105 |             resp = get(url, timeout=10)
106 | 
107 |             if resp.status_code != 200:
108 |                 raise Exception(
109 |                     f"status code: {resp.status_code}. Response: {resp.text}"
110 |                 )
111 | 
112 |             # We look for dependant external actions.
113 |             uses_strings = find_uses_strings(resp.text)
114 |             for uses_string in uses_strings:
115 |                 download_action_or_reusable_workflow(uses_string=uses_string, repo=repo)
116 | 
117 |             # Save workflow to redis
118 |             workflow_unix_path = convert_workflow_to_unix_path(repo, name)
119 |             github_url = convert_raw_github_url_to_github_com_url(url)
120 |             insert_workflow_or_action_to_redis(
121 |                 db=Config.redis_workflows_db,
122 |                 object_path=workflow_unix_path,
123 |                 data=resp.text,
124 |                 github_url=github_url,
125 |                 is_public=is_public,
126 |             )
127 | 
128 |             # In the future, ref will be with commit sha
129 |             add_ref_pointer_to_redis(workflow_unix_path, workflow_unix_path)
130 | 
131 |         ops_db.insert_to_set(Config.workflow_download_history_set, repo)
132 | 
133 | 
134 | def download_action_or_reusable_workflow(uses_string: str, repo: str) -> None:
135 |     """Whenever we find that workflow is using a "uses:" string,
136 |     it means we are referencing a composite action or reusable workflow, we try to fetch it.
137 | 
138 |     We use out utilitiy tooling to parse the uses string, because it can be quite complex.
139 |     """
140 |     with RedisConnection(Config.redis_objects_ops_db) as ops_db:
141 |         uses_string_obj = UsesString.analyze(uses_string=uses_string)
142 |         full_path = uses_string_obj.get_full_path(repo)
143 |         is_public = 1
144 | 
145 |         # If already scanned action
146 |         if ops_db.exists_in_set(Config.action_download_history_set, full_path):
147 |             return
148 |         # If already scanned workflow - Have to check workflow db because only it contains the full workflow path.
149 |         with RedisConnection(Config.redis_workflows_db) as workflows_db:
150 |             if (
151 |                 workflows_db.get_value_from_hash(
152 |                     full_path, Config.redis_data_hash_field_name
153 |                 )
154 |                 is not None
155 |             ):
156 |                 return
157 | 
158 |         if uses_string_obj.type == UsesStringType.REUSABLE_WORKFLOW:
159 |             url = get_repository_reusable_workflow(full_path)
160 |         elif uses_string_obj.type == UsesStringType.ACTION:
161 |             url = get_repository_composite_action(full_path)
162 |         else:
163 |             # Can happen with docker references.
164 |             return
165 | 
166 |         if url is None:
167 |             # This actions might be a local action, or a docker action.
168 | 
169 |             if uses_string.startswith("./"):
170 |                 log.warning(
171 |                     f"[-] Local action '{uses_string}' not found in '{repo}', skipping."
172 |                 )
173 |             elif uses_string_obj.type == UsesStringType.ACTION:
174 |                 log.warning(
175 |                     f"[-] Action '{uses_string}' could not be found while scanning repo '{repo}', skipping."
176 |                 )
177 |             elif uses_string_obj.type == UsesStringType.REUSABLE_WORKFLOW:
178 |                 log.warning(
179 |                     f"[-] Reusable workflow '{uses_string}' could not be found while scanning repo '{repo}', skipping."
180 |                 )
181 |             else:
182 |                 log.warning(
183 |                     f"[-] Docker Action '{uses_string}' could not be found while scanning repo '{repo}', skipping."
184 |                 )
185 |             return
186 | 
187 |         if is_url_contains_a_token(url):
188 |             log.debug(f"[+] URL contains token argument - private repository")
189 |             is_public = 0
190 | 
191 |         resp = get(url, timeout=10)
192 |         if resp.status_code != 200:
193 |             raise Exception(f"status code: {resp.status_code}. Response: {resp.text}")
194 | 
195 |         # We look for dependant external actions.
196 |         uses_strings = find_uses_strings(resp.text)
197 |         new_repo = get_repo_name_from_path(full_path)
198 | 
199 |         for new_uses_string in uses_strings:
200 |             # Some infinite loop I met in several repositories
201 |             new_full_path = UsesString.analyze(new_uses_string).get_full_path(new_repo)
202 |             if new_full_path == full_path:
203 |                 continue
204 | 
205 |             download_action_or_reusable_workflow(
206 |                 uses_string=new_uses_string, repo=new_repo
207 |             )
208 | 
209 |         if uses_string_obj.type == UsesStringType.REUSABLE_WORKFLOW:
210 |             ops_db.insert_to_set(Config.workflow_download_history_set, full_path)
211 | 
212 |             insert_workflow_or_action_to_redis(
213 |                 db=Config.redis_workflows_db,
214 |                 object_path=full_path,
215 |                 data=resp.text,
216 |                 github_url=convert_raw_github_url_to_github_com_url(url),
217 |                 is_public=is_public,
218 |             )
219 |             # In the future, ref will be with commit sha
220 |             add_ref_pointer_to_redis(full_path, full_path)
221 |         else:  # UsesStringType.ACTION
222 |             ops_db.insert_to_set(Config.action_download_history_set, full_path)
223 |             insert_workflow_or_action_to_redis(
224 |                 db=Config.redis_actions_db,
225 |                 object_path=full_path,
226 |                 data=resp.text,
227 |                 github_url=convert_raw_github_url_to_github_com_url(url),
228 |                 is_public=is_public,
229 |             )
230 |             # In the future, ref will be with commit sha
231 |             add_ref_pointer_to_redis(full_path, full_path)
232 | 


--------------------------------------------------------------------------------
/src/downloader/gh_api.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import urllib
  3 | from requests import get
  4 | from typing import Dict, Any, Optional, Iterator, Optional
  5 | from http import HTTPStatus
  6 | from src.config.config import Config
  7 | import src.logger.log as log
  8 | 
  9 | """
 10 | Current rate limiting:
 11 | 
 12 | Search API:
 13 | - No token: 10 per minute
 14 | - With token: 30 per minute
 15 | 
 16 | Other standard API (contents):
 17 | - With token: 5000 per hour
 18 | githubusercontent API - None
 19 | """
 20 | 
 21 | BASE_URL = "https://api.github.com"
 22 | REPOSITORY_SEARCH_URL = (
 23 |     BASE_URL
 24 |     + "/search/repositories?q={query}&sort=stars&order=desc&per_page=100&page={page}"
 25 | )
 26 | 
 27 | 
 28 | ACCOUNT_INFO_URL = BASE_URL + "/users/{account_name}"
 29 | USER_REPOSITORY_URL = BASE_URL + "/users/{user_name}/repos?per_page=100&page={page}"
 30 | 
 31 | PERSONAL_USER_REPOSITORY_URL = (
 32 |     BASE_URL + "/user/repos?type=owner&per_page=100&page={page}"
 33 | )
 34 | 
 35 | ORGANIZATION_REPOSITORY_URL = (
 36 |     BASE_URL + "/orgs/{organization_name}/repos?per_page=100&page={page}"
 37 | )
 38 | CONTENTS_URL = BASE_URL + "/repos/{repo_path}/contents/{file_path}"
 39 | 
 40 | REPOSITORY_QUERY_MIN = "stars:>={min_stars}"
 41 | REPOSITORY_QUERY_MIN_MAX = "stars:{min_stars}..{max_stars}"
 42 | 
 43 | headers = {
 44 |     "Accept": "application/vnd.github+json",
 45 |     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.42",
 46 | }
 47 | 
 48 | 
 49 | def get_personal_account_generator() -> Iterator[str]:
 50 |     return get_user_repository_generator(user_name=None, is_personal_account=True)
 51 | 
 52 | 
 53 | def get_account_generator(account_name: str) -> Iterator[str]:
 54 |     account_info = get_account_info(account_name=account_name)
 55 |     account_type = account_info.get("type")
 56 | 
 57 |     if account_type == "User":
 58 |         log.info(f"[+] Scanning user: {account_name}")
 59 |         return get_user_repository_generator(
 60 |             user_name=account_name, is_personal_account=False
 61 |         )
 62 | 
 63 |     elif account_type == "Organization":
 64 |         log.info(f"[+] Scanning organization: {account_name}")
 65 |         return get_organization_repository_generator(account_name)
 66 | 
 67 |     else:
 68 |         log.error(f"[-] Failed to get account type for {account_name}")
 69 |         return None
 70 | 
 71 | 
 72 | def get_user_repository_generator(
 73 |     user_name: str, is_personal_account: bool
 74 | ) -> Iterator[str]:
 75 |     # Quering user repositories is not limited. We loop over each page,
 76 |     # and look for more repos. If there are no more repos, we break
 77 |     page = 1
 78 |     while True:
 79 |         log.info(f"[*] Querying page: {page}")
 80 |         repos = get_user_repositories(
 81 |             user_name=user_name, page=page, is_personal_account=is_personal_account
 82 |         )
 83 |         if repos:
 84 |             for repo in repos:
 85 |                 repo_star_count = int(repo["stargazers_count"])
 86 |                 log.debug(
 87 |                     f"[+] About to download repository: {repo['full_name']}, Stars: {repo_star_count}"
 88 |                 )
 89 |                 yield repo["full_name"]
 90 |         else:
 91 |             break
 92 | 
 93 |         page += 1
 94 | 
 95 | 
 96 | def get_organization_repository_generator(organization_name: str) -> Iterator[str]:
 97 |     # Quering organization repositories is not limited. We loop over each page,
 98 |     # and look for more repos. If there are no more repos, we break
 99 |     page = 1
100 |     while True:
101 |         log.info(f"[*] Querying page: {page}")
102 |         repos = get_organization_repositories(
103 |             organization_name=organization_name, page=page
104 |         )
105 |         if repos:
106 |             for repo in repos:
107 |                 repo_star_count = int(repo["stargazers_count"])
108 |                 log.debug(
109 |                     f"[+] About to download repository: {repo['full_name']}, Stars: {repo_star_count}"
110 |                 )
111 |                 yield repo["full_name"]
112 |         else:
113 |             break
114 | 
115 |         page += 1
116 | 
117 | 
118 | def get_repository_generator(
119 |     min_stars: int,
120 |     max_stars: Optional[int] = 0,
121 | ) -> Iterator[str]:
122 |     # Github allows only querying up to 1000 results, means 10 pages.
123 | 
124 |     # In addition, to make wider queries, we going to change the query after each 10 pages.
125 |     # Because our query only do stars count, we can just narrow the stars, and keep querying.
126 |     last_star_count = 0
127 |     while True:
128 |         more_results = False
129 |         for page in range(1, 11):
130 |             log.info(f"[*] Querying page: {page}")
131 |             if not max_stars:
132 |                 query = REPOSITORY_QUERY_MIN.format(min_stars=min_stars)
133 |             else:
134 |                 query = REPOSITORY_QUERY_MIN_MAX.format(
135 |                     min_stars=min_stars, max_stars=max_stars
136 |                 )
137 | 
138 |             repos = get_repository_search(
139 |                 query=query,
140 |                 page=page,
141 |             )
142 | 
143 |             if repos:
144 |                 more_results = True
145 |                 for repo in repos:
146 |                     last_star_count = int(repo["stargazers_count"])
147 |                     log.debug(
148 |                         f"[+] About to download repository: {repo['full_name']}, Stars: {last_star_count}"
149 |                     )
150 |                     yield repo["full_name"]
151 |             else:
152 |                 more_results = False
153 |                 break
154 | 
155 |             page += 1
156 | 
157 |         if not more_results:
158 |             # Recieved no results. can quit.
159 |             break
160 |         else:
161 |             max_stars = last_star_count + 1
162 | 
163 | 
164 | def get_account_info(account_name: str) -> Dict[str, Any]:
165 |     """
166 |     Returns a dictionary with the account information.
167 |     The objects look like this:
168 |     {
169 |         "login": "CycodeLabs",
170 |         "type": "Organization",
171 |         ...
172 |     }
173 |     """
174 |     headers["Authorization"] = f"Token {Config.github_token}"
175 |     r = get(ACCOUNT_INFO_URL.format(account_name=account_name), headers=headers)
176 | 
177 |     if r.status_code != HTTPStatus.OK:
178 |         log.error(f"[-] Failed fetching repositories for {account_name}")
179 |         raise Exception(f"status code: {r.status_code}. Response: {r.text}")
180 | 
181 |     return r.json()
182 | 
183 | 
184 | def get_user_repositories(
185 |     user_name: str, page: int, is_personal_account: bool
186 | ) -> list[dict]:
187 |     """
188 |     Returns a list of all repositories for the specified user.
189 |     The objects look like this:
190 |     {
191 |         "id": 000000000,
192 |         "node_id": "R_...",
193 |         "name": "example",
194 |         "full_name": "example/example",
195 |         "private": true,
196 |         ...
197 |     }
198 |     """
199 |     headers["Authorization"] = f"Token {Config.github_token}"
200 | 
201 |     repo_endpoint = (
202 |         PERSONAL_USER_REPOSITORY_URL if is_personal_account else USER_REPOSITORY_URL
203 |     )
204 |     r = get(
205 |         repo_endpoint.format(user_name=user_name, page=page),
206 |         headers=headers,
207 |     )
208 | 
209 |     if r.status_code != HTTPStatus.OK:
210 |         log.error(f"[-] Failed fetching repositories")
211 |         raise Exception(f"status code: {r.status_code}. Response: {r.text}")
212 | 
213 |     return r.json()
214 | 
215 | 
216 | def get_organization_repositories(organization_name: str, page: int) -> list[dict]:
217 |     """
218 |     Returns a list of all repositories for the specified organization.
219 |     The objects look like this:
220 |     {
221 |         "id": 000000000,
222 |         "node_id": "R_...",
223 |         "name": "example",
224 |         "full_name": "example/example",
225 |         "private": true,
226 |         ...
227 |     }
228 |     """
229 |     headers["Authorization"] = f"Token {Config.github_token}"
230 | 
231 |     r = get(
232 |         ORGANIZATION_REPOSITORY_URL.format(
233 |             organization_name=organization_name, page=page
234 |         ),
235 |         headers=headers,
236 |     )
237 |     if r.status_code != HTTPStatus.OK:
238 |         log.error(f"[-] Failed fetching repositories for {organization_name}")
239 |         raise Exception(f"status code: {r.status_code}. Response: {r.text}")
240 | 
241 |     return r.json()
242 | 
243 | 
244 | def get_repository_search(query: str, page: int = 1) -> Dict[str, Any]:
245 |     headers["Authorization"] = f"Token {Config.github_token}"
246 | 
247 |     r = get(
248 |         REPOSITORY_SEARCH_URL.format(query=urllib.parse.quote_plus(query), page=page),
249 |         headers=headers,
250 |     )
251 |     if r.status_code != 200:
252 |         log.error(f"status code: {r.status_code}. Response: {r.text}")
253 |         return {}
254 | 
255 |     return r.json()["items"]
256 | 
257 | 
258 | def get_repository_workflows(repo: str) -> Dict[str, str]:
259 |     """Returns list of workflows for the specified repository.
260 |     Returns a dictionary that maps workflow file name, to its donwloadable URL.
261 | 
262 |     e.g.: crowdin-upload.curriculum.yml ->
263 |     https://raw.githubusercontent.com/freeCodeCamp/freeCodeCamp/main/
264 |     .github/workflows/crowdin-upload.curriculum.yml
265 |     """
266 | 
267 |     headers["Authorization"] = f"Token {Config.github_token}"
268 | 
269 |     file_path = ".github/workflows"
270 |     r = get(CONTENTS_URL.format(repo_path=repo, file_path=file_path), headers=headers)
271 |     if r.status_code == 404:
272 |         return {}
273 |     if r.status_code == 403 and int(r.headers["X-RateLimit-Remaining"]) == 0:
274 |         import time
275 | 
276 |         time_to_sleep = int(r.headers["X-RateLimit-Reset"]) - time.time() + 1
277 |         log.error(
278 |             f"[*] Ratelimit for for contents API depleted. Sleeping {time_to_sleep} seconds"
279 |         )
280 |         time.sleep(time_to_sleep)
281 |         return get_repository_workflows(repo)
282 |     if r.status_code != 200:
283 |         log.error(f"status code: {r.status_code}. Response: {r.text}")
284 |         return {}
285 | 
286 |     # When we have a single entry, the contents API returns dict instead of list.
287 |     entries = None
288 |     if isinstance(r.json(), list):
289 |         entries = r.json()
290 |     else:
291 |         entries = [r.json()]
292 | 
293 |     workflows = {}
294 |     for entry in entries:
295 |         if entry["name"].endswith((".yml", ".yaml")):
296 |             workflows[entry["name"]] = entry["download_url"]
297 | 
298 |     return workflows
299 | 
300 | 
301 | def get_repository_composite_action(path: str) -> str:
302 |     """Returns downloadble URL for a composite action in the specific path.
303 | 
304 |     receives 'path_in_repo' relative path to the repository root to where search the action.yml.
305 |     It should be a directory and not a file. (if file this is a reusable workflow)
306 | 
307 |     Raises exception if network error occured.
308 |     """
309 |     path_splitted = path.split("/")
310 |     repo = "/".join(path_splitted[:2])
311 |     relative_path = "/".join(path_splitted[2:])
312 | 
313 |     headers["Authorization"] = f"Token {Config.github_token}"
314 | 
315 |     for suffix in ["action.yml", "action.yaml"]:
316 |         file_path = os.path.join(relative_path, suffix)
317 |         r = get(
318 |             CONTENTS_URL.format(repo_path=repo, file_path=file_path),
319 |             headers=headers,
320 |         )
321 |         if r.status_code == 404:
322 |             # can be both yml and yaml
323 |             continue
324 | 
325 |         if r.status_code != 200:
326 |             log.error(f"status code: {r.status_code}. Response: {r.text}")
327 |             continue
328 | 
329 |         return r.json()["download_url"]
330 | 
331 | 
332 | def get_repository_reusable_workflow(path: str) -> str:
333 |     """Returns downlodable URL for a reusable workflows in the specific path.
334 | 
335 |     Raises exception if network error occured.
336 |     """
337 |     path_splitted = path.split("/")
338 |     repo = "/".join(path_splitted[:2])
339 |     relative_path = "/".join(path_splitted[2:])
340 | 
341 |     headers["Authorization"] = f"Token {Config.github_token}"
342 | 
343 |     r = get(
344 |         CONTENTS_URL.format(repo_path=repo, file_path=relative_path),
345 |         headers=headers,
346 |     )
347 |     if r.status_code == 404:
348 |         return
349 |     if r.status_code != 200:
350 |         log.error(f"status code: {r.status_code}. Response: {r.text}")
351 |         return
352 | 
353 |     return r.json()["download_url"]
354 | 


--------------------------------------------------------------------------------
/src/downloader/utils.py:
--------------------------------------------------------------------------------
 1 | from src.config.config import Config
 2 | from src.storage.redis_connection import RedisConnection
 3 | 
 4 | 
 5 | def insert_workflow_or_action_to_redis(
 6 |     db: str, object_path: str, data: str, github_url: str, is_public: bool
 7 | ) -> None:
 8 |     """
 9 |     Inserts Workflow or Composite Action data and metadata to Redis as a new hash.
10 |     db (str): The Redis database to use.
11 |     object_path (str): The path of the object to insert.
12 |     data (str): Data of the object.
13 |     github_url (str): The GitHub URL associated with the object.
14 |     is_public (bool): Whether the object is public or not.
15 |     """
16 |     with RedisConnection(db) as redis_db:
17 |         redis_db.insert_to_hash(object_path, Config.redis_data_hash_field_name, data)
18 |         redis_db.insert_to_hash(
19 |             object_path,
20 |             Config.redis_url_hash_field_name,
21 |             github_url,
22 |         )
23 |         redis_db.insert_to_hash(
24 |             object_path,
25 |             Config.redis_is_public_hash_field_name,
26 |             is_public,
27 |         )
28 | 
29 | 
30 | def add_ref_pointer_to_redis(uses_path: str, processed_path: str):
31 |     """
32 |     Adds a reference pointer to Redis for a given raw path and its path including the commit sha of the ref.
33 |     For example:
34 |     actions/checkout@v4 -> actions/checkout@c533a0a4cfc4962971818edcfac47a2899e69799
35 |     repo/some/workflow.yml@master -> repo/some/workflow.yml@c533a0a4cfc4962971818edcfac47a2899e69799
36 | 
37 |     Args:
38 |         uses_path (str): The raw path to be added as a key in the Redis hash, it is the output of dependency analysis.
39 |         processed_path (str): The path of the object including the commit sha of the ref
40 |     """
41 |     with RedisConnection(Config.redis_objects_ops_db) as ops_db:
42 |         ops_db.insert_to_hash(Config.ref_pointers_hash, uses_path, processed_path)
43 | 


--------------------------------------------------------------------------------
/src/indexer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/src/indexer/__init__.py


--------------------------------------------------------------------------------
/src/indexer/index.py:
--------------------------------------------------------------------------------
  1 | import io
  2 | 
  3 | import yaml
  4 | from yaml.constructor import Constructor
  5 | 
  6 | from src.storage.redis_connection import RedisConnection
  7 | from src.config.config import Config
  8 | from src.workflow_components.workflow import Workflow
  9 | from src.workflow_components.composite_action import CompositeAction
 10 | from tqdm import tqdm
 11 | import src.logger.log as log
 12 | from src.common.utils import str_to_bool
 13 | 
 14 | 
 15 | # A hack to deny PyYAML to convert "on" tags into Python boolean values.
 16 | def add_bool(self, node):
 17 |     return self.construct_scalar(node)
 18 | 
 19 | 
 20 | Constructor.add_constructor("tag:yaml.org,2002:bool", add_bool)
 21 | 
 22 | 
 23 | def index_downloaded_workflows_and_actions() -> None:
 24 |     index_downloaded_actions()
 25 |     index_downloaded_workflows()
 26 | 
 27 | 
 28 | def index_downloaded_actions() -> None:
 29 |     with RedisConnection(Config.redis_actions_db) as actions_db:
 30 |         actions = [a.decode() for a in actions_db.get_all_keys()]
 31 |         log.info(f"[*] Indexing actions...")
 32 |         for action in tqdm(actions, desc="Indexing actions"):
 33 |             index_action_file(action)
 34 | 
 35 | 
 36 | def index_downloaded_workflows() -> None:
 37 |     with RedisConnection(Config.redis_workflows_db) as workflows_db:
 38 |         workflows = [w.decode() for w in workflows_db.get_all_keys()]
 39 |         log.info(f"[*] Indexing workflows...")
 40 |         for workflow in tqdm(workflows, desc="Indexing workflows"):
 41 |             index_workflow_file(workflow)
 42 | 
 43 | 
 44 | def index_action_file(action: str) -> None:
 45 |     try:
 46 |         with RedisConnection(Config.redis_objects_ops_db) as ops_db:
 47 |             if ops_db.exists_in_set(Config.action_index_history_set, action):
 48 |                 return
 49 | 
 50 |             action_full_name = ops_db.get_value_from_hash(
 51 |                 Config.ref_pointers_hash, action
 52 |             ).decode()
 53 |             with RedisConnection(Config.redis_actions_db) as actions_db:
 54 |                 content = actions_db.get_value_from_hash(
 55 |                     action_full_name, Config.redis_data_hash_field_name
 56 |                 ).decode()
 57 |                 url = actions_db.get_value_from_hash(
 58 |                     action_full_name, Config.redis_url_hash_field_name
 59 |                 ).decode()
 60 |                 is_public = str_to_bool(
 61 |                     actions_db.get_value_from_hash(
 62 |                         action_full_name, Config.redis_is_public_hash_field_name
 63 |                     ).decode()
 64 |                 )
 65 | 
 66 |             # PyYAML has issues with tabs.
 67 |             content = content.replace("\t", "  ")
 68 | 
 69 |             with io.StringIO() as f:
 70 |                 f.write(content)
 71 |                 f.seek(0)
 72 |                 try:
 73 |                     obj = yaml.load(f, yaml.loader.Loader)
 74 |                 except yaml.scanner.ScannerError as e:
 75 |                     log.error(
 76 |                         f"[-] Failed loading: {action_full_name}. Exception: {e}. Skipping..."
 77 |                     )
 78 |                     return
 79 | 
 80 |             # Could happen if the YAML is empty.
 81 |             if not obj:
 82 |                 return
 83 | 
 84 |             if isinstance(obj, str):
 85 |                 # TODO: This is a symlink. We should handle it.
 86 |                 # Only examples at the moment are for https://github.com/edgedb/edgedb-pkg
 87 |                 # E.g., https://github.com/edgedb/edgedb-pkg/blob/master/integration/linux/build/centos-8/action.yml
 88 |                 log.debug(f"[-] Symlink detected: {content}. Skipping...")
 89 |                 return
 90 | 
 91 |             obj["path"] = action_full_name
 92 |             obj["url"] = url
 93 |             obj["is_public"] = is_public
 94 | 
 95 |             Config.graph.push_object(CompositeAction.from_dict(obj))
 96 |             ops_db.insert_to_set(Config.action_index_history_set, action_full_name)
 97 |     except Exception as e:
 98 |         log.error(f"[-] Error while indexing {action}. {e}")
 99 | 
100 | 
101 | def index_workflow_file(workflow: str) -> None:
102 |     try:
103 |         with RedisConnection(Config.redis_objects_ops_db) as ops_db:
104 |             if ops_db.exists_in_set(Config.workflow_index_history_set, workflow):
105 |                 return
106 | 
107 |             workflow_full_name = ops_db.get_value_from_hash(
108 |                 Config.ref_pointers_hash, workflow
109 |             ).decode()
110 | 
111 |             with RedisConnection(Config.redis_workflows_db) as workflows_db:
112 |                 content = workflows_db.get_value_from_hash(
113 |                     workflow_full_name, Config.redis_data_hash_field_name
114 |                 ).decode()
115 |                 url = workflows_db.get_value_from_hash(
116 |                     workflow_full_name, Config.redis_url_hash_field_name
117 |                 ).decode()
118 |                 is_public = str_to_bool(
119 |                     workflows_db.get_value_from_hash(
120 |                         workflow_full_name, Config.redis_is_public_hash_field_name
121 |                     ).decode()
122 |                 )
123 | 
124 |             # PyYAML has issues with tabs.
125 |             content = content.replace("\t", "  ")
126 | 
127 |             with io.StringIO() as f:
128 |                 f.write(content)
129 |                 f.seek(0)
130 |                 try:
131 |                     obj = yaml.load(f, yaml.loader.Loader)
132 |                 except yaml.scanner.ScannerError as e:
133 |                     log.error(
134 |                         f"[-] Failed loading: {workflow_full_name}. Exception: {e}. Skipping..."
135 |                     )
136 |                     return
137 | 
138 |             # Could happen if the YAML is empty.
139 |             if not obj:
140 |                 return
141 | 
142 |             if isinstance(obj, str):
143 |                 # TODO: This is a symlink. We should handle it.
144 |                 # Only examples at the moment are for https://github.com/edgedb/edgedb-pkg
145 |                 # E.g., https://github.com/edgedb/edgedb-pkg/blob/master/integration/linux/build/centos-8/action.yml
146 |                 log.debug(f"[-] Symlink detected: {content}. Skipping...")
147 |                 return
148 | 
149 |             obj["path"] = workflow_full_name
150 |             obj["url"] = url
151 |             obj["is_public"] = is_public
152 | 
153 |             Config.graph.push_object(Workflow.from_dict(obj))
154 |             ops_db.insert_to_set(Config.workflow_index_history_set, workflow_full_name)
155 | 
156 |     except Exception as e:
157 |         log.error(f"[-] Error while indexing {workflow}. {e}")
158 | 


--------------------------------------------------------------------------------
/src/logger/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/src/logger/__init__.py


--------------------------------------------------------------------------------
/src/logger/log.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from typing import Any
 3 | from loguru import logger
 4 | 
 5 | logger.remove()
 6 | logger.add(
 7 |     sys.stdout,
 8 |     format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>",
 9 |     colorize=True,
10 | )
11 | 
12 | 
13 | def info(msg: str) -> None:
14 |     logger.info(msg)
15 | 
16 | 
17 | def debug(msg: str) -> None:
18 |     from src.config.config import Config
19 | 
20 |     if Config.debug:
21 |         logger.debug(msg)
22 | 
23 | 
24 | def error(msg: str) -> None:
25 |     logger.error(msg)
26 | 
27 | 
28 | def warning(msg: str) -> None:
29 |     logger.warning(msg)
30 | 
31 | 
32 | def catch_exit() -> None:
33 |     from src.config.config import Config
34 | 
35 |     if Config.github_token:
36 |         print("""\n[x] Index results with: raven index""")
37 | 
38 |     elif Config.neo4j_uri:
39 |         neo4j_server = Config.neo4j_uri.split("//")[1].split(":")[0]
40 |         print(f"""\n[x] View results at: http://{neo4j_server}:7474""")
41 | 
42 |     sys.exit(0)
43 | 
44 | 
45 | def fail_exit() -> None:
46 |     sys.exit(1)
47 | 
48 | 
49 | def success_exit() -> None:
50 |     sys.exit(0)
51 | 


--------------------------------------------------------------------------------
/src/queries/__init__.py:
--------------------------------------------------------------------------------
  1 | from src.config.config import Config, SEVERITY_LEVELS
  2 | import json
  3 | from colorama import Fore, Style, init
  4 | import textwrap
  5 | 
  6 | init()
  7 | 
  8 | 
  9 | class Query(object):
 10 |     def __init__(
 11 |         self,
 12 |         id: str,
 13 |         name: str,
 14 |         description: str,
 15 |         tags: list,
 16 |         severity: str,
 17 |         query: list,
 18 |     ) -> None:
 19 |         self.id = id
 20 |         self.name = name
 21 |         self.description = description
 22 |         self.tags = tags
 23 |         self.severity = severity
 24 |         self.query = query
 25 |         self.result = None
 26 | 
 27 |     def filter(self) -> bool:
 28 |         return (
 29 |             self.filter_queries_by_tags()
 30 |             and self.filter_queries_by_severity()
 31 |             and self.filter_queries_by_query_id()
 32 |         )
 33 | 
 34 |     def filter_queries_by_severity(self):
 35 |         severity_level = SEVERITY_LEVELS.get(Config.severity, 0)
 36 |         severity_levels = [
 37 |             severity
 38 |             for severity, level in SEVERITY_LEVELS.items()
 39 |             if level >= severity_level
 40 |         ]
 41 | 
 42 |         return self.severity in severity_levels
 43 | 
 44 |     def filter_queries_by_tags(self):
 45 |         if not Config.tags:
 46 |             # If no tags has been given, return all detections
 47 |             return True
 48 | 
 49 |         for tag in self.tags:
 50 |             # If this detection tag is matching the
 51 |             # supplied tags
 52 |             if tag in Config.tags:
 53 |                 return True
 54 | 
 55 |         # If no detections found with the input tags
 56 |         # skip this detection
 57 |         return False
 58 | 
 59 |     def filter_queries_by_query_id(self):
 60 |         if not Config.query_ids:
 61 |             return True
 62 | 
 63 |         if self.id in Config.query_ids:
 64 |             return True
 65 | 
 66 |         return False
 67 | 
 68 |     def run(self) -> list:
 69 |         """
 70 |         Will run the cypher code with the given query.
 71 |         and will return the matching workflow paths
 72 |         """
 73 |         result = Config.graph.run_query(self.query)
 74 |         self.result = [dict(record).get("url") for record in result]
 75 | 
 76 |     def to_raw(self) -> str:
 77 |         report = ""
 78 |         description_length = 80
 79 | 
 80 |         report += f"{Fore.CYAN}Name:{Style.RESET_ALL} {self.name}\n"
 81 |         report += f"{Fore.CYAN}Severity:{Style.RESET_ALL} {self.severity}\n"
 82 | 
 83 |         wrapped_description = textwrap.fill(self.description, width=description_length)
 84 |         report += f"{Fore.CYAN}Description:{Style.RESET_ALL} {wrapped_description}\n"
 85 |         report += f"{Fore.CYAN}Tags:{Style.RESET_ALL} {self.tags}\n"
 86 | 
 87 |         report += f"{Fore.CYAN}Workflow URLS:{Style.RESET_ALL}\n"
 88 |         for url in self.result:
 89 |             report += f"- {url}\n"
 90 | 
 91 |         return report
 92 | 
 93 |     def to_json(self) -> str:
 94 |         return self._to_dict()
 95 | 
 96 |     def _to_dict(self) -> dict:
 97 |         return {
 98 |             "id": self.id,
 99 |             "name": self.name,
100 |             "description": self.description,
101 |             "tags": self.tags,
102 |             "severity": self.severity,
103 |             "result": self.result,
104 |         }
105 | 


--------------------------------------------------------------------------------
/src/reporter/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/src/reporter/__init__.py


--------------------------------------------------------------------------------
/src/reporter/report.py:
--------------------------------------------------------------------------------
 1 | from src.config.config import (
 2 |     Config,
 3 |     REPORT_RAW_FORMAT,
 4 |     REPORT_JSON_FORMAT,
 5 |     SLACK_REPORTER,
 6 | )
 7 | from src.reporter import slack_reporter
 8 | from src.logger.log import success_exit
 9 | from os import listdir
10 | from os.path import join
11 | import yaml
12 | import json
13 | from src.queries import Query
14 | from typing import List
15 | 
16 | 
17 | def raw_reporter(queries: List[Query]) -> str:
18 |     report = "\n"
19 | 
20 |     for query in queries:
21 |         report += f"{query.to_raw()}\n"
22 | 
23 |     return report
24 | 
25 | 
26 | def json_reporter(queries: List[Query]) -> str:
27 |     return json.dumps([query.to_json() for query in queries], indent=4)
28 | 
29 | 
30 | def get_queries() -> List[Query]:
31 |     queries = []
32 |     for query_file in listdir(Config.queries_path):
33 |         with open(join(Config.queries_path, query_file), "r") as raw_query:
34 |             yml_query = yaml.safe_load(raw_query)
35 |             detection_info = yml_query.get("info")
36 | 
37 |             query = Query(
38 |                 id=yml_query.get("id"),
39 |                 name=detection_info.get("name"),
40 |                 description=detection_info.get("description"),
41 |                 tags=detection_info.get("tags"),
42 |                 severity=detection_info.get("severity"),
43 |                 query=yml_query.get("query"),
44 |             )
45 | 
46 |             if query.filter():
47 |                 queries.append(query)
48 | 
49 |     return queries
50 | 
51 | 
52 | def generate() -> None:
53 |     queries = get_queries()
54 |     for query in queries:
55 |         query.run()
56 | 
57 |     filtered_queries = [query for query in queries if query.result]
58 |     report = ""
59 |     if Config.format == REPORT_RAW_FORMAT:
60 |         report = raw_reporter(filtered_queries)
61 |     elif Config.format == REPORT_JSON_FORMAT:
62 |         report = json_reporter(filtered_queries)
63 | 
64 |     if Config.reporter == SLACK_REPORTER:
65 |         if Config.slack_token and Config.channel_id:
66 |             client = slack_reporter.Client(Config.slack_token)
67 |             message = f"\n{report}\n"
68 |             client.send_report(Config.channel_id, message)
69 | 
70 |         else:
71 |             print(
72 |                 "[x] Please provide slack token and channel id to send report to slack."
73 |             )
74 | 
75 |     else:
76 |         print(report)
77 | 
78 |     success_exit()
79 | 


--------------------------------------------------------------------------------
/src/reporter/slack_reporter.py:
--------------------------------------------------------------------------------
 1 | from slack_sdk import WebClient
 2 | from slack_sdk.errors import SlackApiError
 3 | 
 4 | 
 5 | class Client(object):
 6 |     def __init__(self, token) -> None:
 7 |         self.client = WebClient(token=token)
 8 | 
 9 |     def send_report(self, channel_id, message):
10 |         try:
11 |             self.client.files_upload_v2(
12 |                 channel=channel_id,
13 |                 filename=f"raven_report",
14 |                 content=message,
15 |                 initial_comment="RAVEN Security Report",
16 |             )
17 |             print(f"[x] Report sent successfully")
18 | 
19 |         except SlackApiError as e:
20 |             print(f"[x] Failed to send report: {e.response['error']}")
21 | 


--------------------------------------------------------------------------------
/src/storage/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/src/storage/__init__.py


--------------------------------------------------------------------------------
/src/storage/neo4j_graph.py:
--------------------------------------------------------------------------------
 1 | from py2neo import Graph
 2 | from py2neo.ogm import GraphObject
 3 | from py2neo.data import Node
 4 | from typing import List, Tuple, Optional
 5 | import src.logger.log as log
 6 | 
 7 | 
 8 | class GraphDb(object):
 9 |     def __init__(self, uri, user, password):
10 |         self.graph = Graph(uri, auth=(user, password))
11 | 
12 |     def is_graph_empty(self) -> bool:
13 |         query = "MATCH (n) RETURN COUNT(n) as count"
14 |         return self.graph.run(query).data()[0]["count"] == 0
15 | 
16 |     def push_object(self, obj: GraphObject):
17 |         self.graph.merge(obj)
18 | 
19 |     def get_object(self, obj: GraphObject) -> Optional[GraphObject]:
20 |         """Tries to find an object in the graph.
21 |         Returns None if wasn't found.
22 |         """
23 |         matched_obj = obj.__class__.match(self.graph, obj._id)
24 |         if not matched_obj.exists():
25 |             return None
26 |         else:
27 |             return matched_obj.first()
28 | 
29 |     def get_or_create(self, obj: GraphObject) -> Tuple[GraphObject, bool]:
30 |         """Tries to find a similar object using given object _id.
31 |         If found one, returns it, together with True value.
32 |         If not found, inserting the object given, and returns it with False value.
33 |         """
34 |         matched_obj = obj.__class__.match(self.graph, obj._id)
35 |         if not matched_obj.exists():
36 |             log.warning(
37 |                 f"WARNING: We didn't found object {obj._id} of type {obj.__class__.__name__}, so we created it."
38 |             )
39 |             self.graph.push(obj)
40 |             return obj, False
41 |         else:
42 |             return matched_obj.first(), True
43 | 
44 |     def get_all_nodes(self, node_type: str) -> List[Node]:
45 |         """
46 |         Returns all nodeTypes nodes in the graph.
47 |         """
48 |         return list(self.graph.nodes.match(node_type))
49 | 
50 |     def clean_graph(self):
51 |         self.graph.delete_all()
52 | 
53 |     def run_query(self, query: str) -> List[Node]:
54 |         return list(self.graph.run(query))
55 | 


--------------------------------------------------------------------------------
/src/storage/neo4j_utils.py:
--------------------------------------------------------------------------------
1 | from src.config.config import Config
2 | 
3 | 
4 | def clean_graph():
5 |     Config.graph.clean_graph()
6 | 


--------------------------------------------------------------------------------
/src/storage/redis_connection.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import redis
 4 | from src.config.config import Config
 5 | import src.logger.log as log
 6 | 
 7 | 
 8 | class RedisConnection:
 9 |     def __init__(self, redis_db):
10 |         self.redis_client = None
11 |         self.redis_host = Config.redis_host
12 |         self.redis_port = Config.redis_port
13 |         self.redis_db = redis_db
14 | 
15 |     def __enter__(self) -> RedisConnection:
16 |         try:
17 |             self.redis_client = redis.Redis(
18 |                 host=self.redis_host, port=self.redis_port, db=self.redis_db
19 |             )
20 |         except Exception as err:
21 |             log.error(f"Failed to connect to Redis: {err}")
22 | 
23 |         return self
24 | 
25 |     def __exit__(self, exc_type, exc_value, traceback):
26 |         if self.redis_client:
27 |             self.redis_client.close()
28 | 
29 |     ## Hash functions
30 |     def insert_to_hash(self, hash: str, field: str, value: str) -> None:
31 |         try:
32 |             self.redis_client.hset(hash, field, value)
33 |         except redis.exceptions.ResponseError as e:
34 |             log.error(f"Failed to set value: {e}")
35 | 
36 |     def get_value_from_hash(self, key: str, field: str) -> str:
37 |         return self.redis_client.hget(key, field)
38 | 
39 |     ## String functions
40 |     def insert_to_string(self, key: str, value: str) -> None:
41 |         try:
42 |             self.redis_client.set(key, value)
43 |         except redis.exceptions.ResponseError as e:
44 |             log.error(f"Failed to set value: {e}")
45 | 
46 |     def get_string(self, key: str) -> str:
47 |         return self.redis_client.get(key)
48 | 
49 |     ## Set functions
50 |     def insert_to_set(self, set: str, value: str) -> str:
51 |         try:
52 |             self.redis_client.sadd(set, value)
53 |         except redis.exceptions.ResponseError as e:
54 |             log.error(f"Failed to set value: {e}")
55 | 
56 |     def exists_in_set(self, set: str, value: str) -> bool:
57 |         return bool(self.redis_client.sismember(set, value))
58 | 
59 |     def get_set_length(self, set: str) -> int:
60 |         return self.redis_client.scard(set)
61 | 
62 |     def get_set_values(self, set: str) -> set:
63 |         return self.redis_client.smembers(set)
64 | 
65 |     ## General DB functions
66 |     def delete_key(self, key: str) -> None:
67 |         self.redis_client.delete(key)
68 | 
69 |     def flush_db(self) -> None:
70 |         self.redis_client.flushdb()
71 | 
72 |     def get_all_keys(self) -> list:
73 |         return self.redis_client.keys()
74 | 


--------------------------------------------------------------------------------
/src/storage/redis_utils.py:
--------------------------------------------------------------------------------
 1 | from src.storage.redis_connection import RedisConnection
 2 | from src.config.config import Config
 3 | 
 4 | 
 5 | def clean_redis_db() -> None:
 6 |     # Flush all databases
 7 |     flush_db(Config.redis_objects_ops_db)
 8 |     flush_db(Config.redis_actions_db)
 9 |     flush_db(Config.redis_workflows_db)
10 | 
11 | 
12 | def clean_index() -> None:
13 |     with RedisConnection(Config.redis_objects_ops_db) as ops_db:
14 |         ops_db.delete_key(Config.workflow_index_history_set)
15 |         ops_db.delete_key(Config.action_index_history_set)
16 | 
17 | 
18 | def flush_db(db_number) -> None:
19 |     with RedisConnection(db_number) as db:
20 |         db.flush_db()
21 | 


--------------------------------------------------------------------------------
/src/workflow_components/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/src/workflow_components/__init__.py


--------------------------------------------------------------------------------
/src/workflow_components/composite_action.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | from hashlib import md5
  3 | 
  4 | from py2neo.ogm import GraphObject, RelatedTo, Property
  5 | 
  6 | import src.workflow_components.workflow as workflow
  7 | from src.config.config import Config
  8 | from src.common.utils import (
  9 |     get_dependencies_in_code,
 10 |     convert_dict_to_list,
 11 |     raw_str_to_bool,
 12 | )
 13 | from src.workflow_components.dependency import UsesString, UsesStringType
 14 | 
 15 | 
 16 | def get_or_create_composite_action(path: str) -> "CompositeAction":
 17 |     """Used when need to create relations with another action.
 18 |     If action wasn't indexed yet, we create a stub node,
 19 |     that will be enriched eventually.
 20 |     """
 21 |     ca = CompositeAction(None, path)
 22 |     obj = Config.graph.get_object(ca)
 23 |     if not obj:
 24 |         # This is a legitimate behavior.
 25 |         # Once the action will be indexed, the node will be enriched.
 26 |         Config.graph.push_object(ca)
 27 |         obj = ca
 28 |     return obj
 29 | 
 30 | 
 31 | class CompositeActionInput(GraphObject):
 32 |     __primarykey__ = "_id"
 33 | 
 34 |     _id = Property()
 35 |     name = Property()
 36 |     default = Property()
 37 |     description = Property()
 38 |     required = Property()
 39 |     url = Property()
 40 |     path = Property()
 41 | 
 42 |     def __init__(self, _id: str, path: str):
 43 |         self._id = _id
 44 |         self.path = path
 45 | 
 46 |     @staticmethod
 47 |     def from_dict(obj_dict) -> "CompositeActionInput":
 48 |         i = CompositeActionInput(
 49 |             _id=obj_dict["_id"],
 50 |             path=obj_dict["path"],
 51 |         )
 52 | 
 53 |         i.name = obj_dict["name"]
 54 |         i.url = obj_dict["url"]
 55 | 
 56 |         if "default" in obj_dict:
 57 |             i.default = obj_dict["default"]
 58 | 
 59 |         if "description" in obj_dict:
 60 |             i.description = obj_dict["description"]
 61 | 
 62 |         i.required = raw_str_to_bool(obj_dict.get("required", "false"))
 63 | 
 64 |         return i
 65 | 
 66 | 
 67 | class CompositeActionStep(GraphObject):
 68 |     __primarykey__ = "_id"
 69 | 
 70 |     _id = Property()
 71 |     name = Property()
 72 |     path = Property()
 73 |     run = Property()
 74 |     uses = Property()
 75 |     ref = Property()
 76 |     shell = Property()
 77 |     url = Property()
 78 |     with_prop = Property("with")
 79 | 
 80 |     action = RelatedTo("CompositeAction")
 81 |     using_param = RelatedTo(workflow.StepCodeDependency)
 82 | 
 83 |     def __init__(self, _id: str, path: str):
 84 |         self._id = _id
 85 |         self.path = path
 86 | 
 87 |     @staticmethod
 88 |     def from_dict(obj_dict) -> "CompositeActionStep":
 89 |         s = CompositeActionStep(_id=obj_dict["_id"], path=obj_dict["path"])
 90 |         s.url = obj_dict["url"]
 91 |         if "id" in obj_dict:
 92 |             s.name = obj_dict["id"]
 93 |         if "run" in obj_dict:
 94 |             s.run = obj_dict["run"]
 95 | 
 96 |             # Adding ${{...}} dependencies as an entity.
 97 |             for code_dependency in get_dependencies_in_code(s.run):
 98 |                 param = workflow.StepCodeDependency(code_dependency, s.path)
 99 |                 param.url = s.url
100 |                 s.using_param.add(param)
101 | 
102 |             if "shell" in obj_dict:
103 |                 s.shell = obj_dict["shell"]
104 |         elif "uses" in obj_dict:
105 |             s.uses = obj_dict["uses"]
106 |             # Uses string is quite complex, and may reference to several types of nodes.
107 |             # In the case of action steps, it may only reference actions (and not reusable workflows).
108 |             uses_string_obj = UsesString.analyze(uses_string=s.uses)
109 |             if uses_string_obj.type == UsesStringType.ACTION:
110 |                 obj = get_or_create_composite_action(
111 |                     uses_string_obj.get_full_path(s.path)
112 |                 )
113 |                 s.action.add(obj)
114 | 
115 |             if "with" in obj_dict:
116 |                 s.with_prop = convert_dict_to_list(obj_dict["with"])
117 | 
118 |             if len(s.uses.split("@")) > 1:
119 |                 s.ref = s.uses.split("@")[1]
120 | 
121 |         return s
122 | 
123 | 
124 | class CompositeAction(GraphObject):
125 |     __primarykey__ = "_id"
126 | 
127 |     _id = Property()
128 |     name = Property()
129 |     path = Property()
130 |     using = Property()
131 |     image = Property()
132 |     url = Property()
133 |     is_public = Property()
134 | 
135 |     composite_action_input = RelatedTo(CompositeActionInput)
136 |     steps = RelatedTo(CompositeActionStep)
137 | 
138 |     def __init__(self, name: Optional[str], path: str):
139 |         self.name = name
140 |         self.path = path
141 |         self._id = md5(path.encode()).hexdigest()
142 | 
143 |     @staticmethod
144 |     def from_dict(obj_dict) -> "CompositeAction":
145 |         ca = CompositeAction(name=obj_dict.get("name"), path=obj_dict["path"])
146 | 
147 |         ca.url = obj_dict["url"]
148 |         ca.is_public = obj_dict["is_public"]
149 |         if "inputs" in obj_dict:
150 |             for name, input in obj_dict["inputs"].items():
151 |                 input["_id"] = md5(f"{ca._id}_{name}".encode()).hexdigest()
152 |                 input["name"] = name
153 |                 input["url"] = ca.url
154 |                 input["path"] = ca.path
155 |                 ca.composite_action_input.add(CompositeActionInput.from_dict(input))
156 | 
157 |         if "runs" in obj_dict:
158 |             d_runs = obj_dict["runs"]
159 | 
160 |             if "using" in d_runs:
161 |                 ca.using = d_runs["using"]
162 | 
163 |             if "image" in d_runs:
164 |                 ca.image = d_runs["image"]
165 | 
166 |             if "steps" in d_runs:
167 |                 for i, step in enumerate(d_runs["steps"]):
168 |                     step["_id"] = md5(f"{ca._id}_{i}".encode()).hexdigest()
169 |                     step["path"] = ca.path
170 |                     step["url"] = ca.url
171 |                     ca.steps.add(CompositeActionStep.from_dict(step))
172 | 
173 |         return ca
174 | 


--------------------------------------------------------------------------------
/src/workflow_components/dependency.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from enum import Enum
 3 | 
 4 | from src.common.utils import get_repo_name_from_path
 5 | 
 6 | 
 7 | class UsesStringType(Enum):
 8 |     ACTION = 1
 9 |     REUSABLE_WORKFLOW = 2
10 |     DOCKER = 3
11 | 
12 | 
13 | class UsesString(object):
14 |     type: UsesStringType
15 |     path: str  # E.g., actions/checkout, ./.github/actions/action-setup
16 |     ref: str  # E.g., v3. Can be a branch name, tag name, or commit SHA
17 |     is_relative: bool
18 | 
19 |     @staticmethod
20 |     def analyze(uses_string: str) -> "UsesString":
21 |         """Parses the uses string, and extract relevant information:
22 |         - Whether path is relative or absolute
23 |         - Reference type (reusable workflow/action/docker)
24 |         - path and ref
25 | 
26 |         If analyzed path is relative, the full path should be fetched using `get_full_path`.
27 | 
28 |         The uses string could point to:
29 |         - uses: actions/checkout@v3 (normal usage of external action)
30 |         - uses: github/codeql-action/analyze@v1 (external action in a directory)
31 |         - uses: ./.github/actions/action-setup (local external action pointing to action.yml)
32 |         - uses: ./.github/actions/action-install (local external action pointing to a Dockerfile)
33 |         - uses: ./.github/actions/build.yml (reusable workflow in local directory)
34 |         - uses: octo-org/this-repo/.github/workflows/workflow-1.yml@latest (reusable workflow in other directory)
35 |         - uses: docker://docker.io/library/golang:1.17.1-alpine@sha256:... (nothing to download)
36 |         """
37 |         uses_string_obj = UsesString()
38 |         uses_string_obj.is_relative = False
39 | 
40 |         uses_string_splitted = uses_string.split("@")
41 |         uses_string_obj.path = uses_string_splitted[0]
42 |         if len(uses_string_splitted) > 1:
43 |             uses_string_obj.ref = uses_string_splitted[1]
44 | 
45 |         # Get rid of the irrelevant cases
46 |         if uses_string_obj.path.startswith("docker://"):
47 |             uses_string_obj.type = UsesStringType.DOCKER
48 |             return uses_string_obj
49 | 
50 |         if uses_string_obj.path.endswith(".yml") or uses_string_obj.path.endswith(
51 |             ".yaml"
52 |         ):
53 |             uses_string_obj.type = UsesStringType.REUSABLE_WORKFLOW
54 |         else:
55 |             uses_string_obj.type = UsesStringType.ACTION
56 | 
57 |         if uses_string_obj.path.startswith("./"):
58 |             # local action or local reusable workflow
59 |             uses_string_obj.is_relative = True
60 |             return uses_string_obj
61 | 
62 |         # remote action or remote reusable workflow
63 |         return uses_string_obj
64 | 
65 |     def get_full_path(self, file_path: str) -> str:
66 |         """If the action or reusable workflow path is a relative path,
67 |         to calculate the full path we need the current repository where is was found.
68 |         """
69 |         if not self.is_relative:
70 |             return self.path
71 |         # We care only for the repository path, so we take the first two elements.
72 | 
73 |         repo = get_repo_name_from_path(file_path)
74 |         # This is a trick to evaluate the path (e.g., "..", "./", etc.)
75 |         return os.path.relpath(os.path.abspath(os.path.join(repo, self.path)))
76 | 


--------------------------------------------------------------------------------
/src/workflow_components/parsing_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Any, List, Union, Optional
 2 | 
 3 | 
 4 | def parse_workflow_trigger(
 5 |     trigger_obj: Union[str, List[str], Dict[str, Any]],
 6 | ) -> List[str]:
 7 |     """Parse and normalize the trigger field of a workflow.
 8 |     Returns list of triggers.
 9 |     Examples for input and output:
10 |         push -> ["push"]
11 |         ["push"] -> ["push"]
12 |         ["push", "pull_request"] -> ["push", "pull_request"]
13 |         {
14 |             "push": {
15 |                 "branches": [
16 |                     "master"
17 |                 ]
18 |             }
19 |         } -> ["push"]
20 |     """
21 |     if isinstance(trigger_obj, str):
22 |         trigger_list = [trigger_obj]
23 |     elif isinstance(trigger_obj, list):
24 |         trigger_list = []
25 |         for elem in trigger_obj:
26 |             if isinstance(elem, dict):
27 |                 trigger_list.extend(elem.keys())
28 |             else:
29 |                 trigger_list.append(elem)
30 |     elif isinstance(trigger_obj, dict):
31 |         trigger_list = list(trigger_obj.keys())
32 |     else:
33 |         # Shouldn't happen.
34 |         trigger_list = []
35 | 
36 |     return trigger_list
37 | 
38 | 
39 | def parse_job_machine(
40 |     runs_on_obj: Optional[Union[str, List[str], Dict[str, Any]]],
41 | ) -> Optional[List[str]]:
42 |     """Parse runs-on field of a job.
43 |     Examples for input and output:
44 |         ubuntu-latest -> ["ubuntu-latest"]
45 |         ["ubuntu-latest"] -> ["ubuntu-latest"]
46 |         {
47 |             "labels": [
48 |                 "ubuntu-latest"
49 |             ]
50 |         } -> ["ubuntu-latest"]
51 |     """
52 |     if isinstance(runs_on_obj, str):
53 |         return [runs_on_obj]
54 |     elif isinstance(runs_on_obj, list):
55 |         return runs_on_obj
56 |     elif isinstance(runs_on_obj, dict):
57 |         return runs_on_obj["labels"]
58 | 
59 |     return None
60 | 


--------------------------------------------------------------------------------
/src/workflow_components/workflow.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional, Dict, Any
  2 | from hashlib import md5
  3 | 
  4 | from py2neo.ogm import GraphObject, RelatedTo, RelatedFrom, Property
  5 | from src.config.config import Config
  6 | from src.common.utils import (
  7 |     get_dependencies_in_code,
  8 |     get_repo_name_from_path,
  9 |     convert_dict_to_list,
 10 |     find_workflow_by_name,
 11 |     raw_str_to_bool,
 12 | )
 13 | from src.workflow_components.parsing_utils import (
 14 |     parse_workflow_trigger,
 15 |     parse_job_machine,
 16 | )
 17 | from src.workflow_components.dependency import UsesString, UsesStringType
 18 | import src.logger.log as log
 19 | 
 20 | 
 21 | def get_or_create_workflow(path: str) -> "Workflow":
 22 |     """Used when need to create relations with another workflow.
 23 |     If workflow wasn't indexed yet, we create a stub node,
 24 |     that will be enriched eventually.
 25 |     """
 26 |     w = Workflow(None, path)
 27 |     obj = Config.graph.get_object(w)
 28 |     if not obj:
 29 |         # This is a legitimate behavior.
 30 |         # Once the workflow will be indexed, the node will be enriched.
 31 |         Config.graph.push_object(w)
 32 |         obj = w
 33 |     return obj
 34 | 
 35 | 
 36 | class StepCodeDependency(GraphObject):
 37 |     __primarykey__ = "_id"
 38 | 
 39 |     _id = Property()
 40 |     param = Property()
 41 |     url = Property()
 42 |     path = Property()
 43 | 
 44 |     def __init__(self, param: str, path: str):
 45 |         self.param = param
 46 |         self.path = path
 47 |         self._id = md5(f"{param}_{path}".encode()).hexdigest()
 48 | 
 49 | 
 50 | class Step(GraphObject):
 51 |     __primarykey__ = "_id"
 52 | 
 53 |     _id = Property()
 54 |     name = Property()
 55 |     path = Property()
 56 |     run = Property()
 57 |     uses = Property()
 58 |     ref = Property()
 59 |     with_prop = Property("with")
 60 |     url = Property()
 61 | 
 62 |     action = RelatedTo("src.workflow_components.composite_action.CompositeAction")
 63 |     reusable_workflow = RelatedTo("Workflow")
 64 |     using_param = RelatedTo("StepCodeDependency")
 65 | 
 66 |     def __init__(self, _id: str, name: Optional[str], path: str):
 67 |         self._id = _id
 68 |         self.name = name
 69 |         self.path = path
 70 | 
 71 |     @staticmethod
 72 |     def from_dict(obj_dict) -> "Step":
 73 |         s = Step(_id=obj_dict["_id"], name=obj_dict.get("name"), path=obj_dict["path"])
 74 |         s.url = obj_dict["url"]
 75 |         if "run" in obj_dict:
 76 |             s.run = obj_dict["run"]
 77 | 
 78 |             # Adding ${{...}} dependencies as an entity.
 79 |             for code_dependency in get_dependencies_in_code(s.run):
 80 |                 param = StepCodeDependency(code_dependency, s.path)
 81 |                 param.url = s.url
 82 |                 s.using_param.add(param)
 83 |         elif "uses" in obj_dict:
 84 |             s.uses = obj_dict["uses"]
 85 |             # Uses string is quite complex, and may reference to several types of nodes.
 86 |             # In the case of steps, it may only reference actions (and not reusable workflows).
 87 |             uses_string_obj = UsesString.analyze(uses_string=s.uses)
 88 |             if uses_string_obj.type == UsesStringType.ACTION:
 89 |                 # Avoiding circular imports.
 90 |                 import src.workflow_components.composite_action as composite_action
 91 | 
 92 |                 obj = composite_action.get_or_create_composite_action(
 93 |                     uses_string_obj.get_full_path(s.path)
 94 |                 )
 95 |                 s.action.add(obj)
 96 | 
 97 |             if "with" in obj_dict:
 98 |                 s.with_prop = convert_dict_to_list(obj_dict["with"])
 99 | 
100 |             if len(s.uses.split("@")) > 1:
101 |                 s.ref = s.uses.split("@")[1]
102 |         return s
103 | 
104 | 
105 | class Job(GraphObject):
106 |     __primarykey__ = "_id"
107 | 
108 |     _id = Property()
109 |     name = Property()
110 |     path = Property()
111 |     machine = Property()
112 |     uses = Property()
113 |     ref = Property()
114 |     url = Property()
115 |     with_prop = Property("with")
116 | 
117 |     steps = RelatedTo(Step)
118 |     reusable_workflow = RelatedTo("Workflow")
119 | 
120 |     def __init__(self, _id: str, name: str, path: str):
121 |         self._id = _id
122 |         self.name = name
123 |         self.path = path
124 | 
125 |     @staticmethod
126 |     def from_dict(obj_dict) -> "Job":
127 |         j = Job(_id=obj_dict["_id"], name=obj_dict["name"], path=obj_dict["path"])
128 |         if "uses" in obj_dict:
129 |             j.uses = obj_dict["uses"]
130 |             # Uses string is quite complex, and may reference to several types of nodes.
131 |             # In the case of jobs, it may only reference reusable workflows.
132 |             uses_string_obj = UsesString.analyze(uses_string=j.uses)
133 |             if uses_string_obj.type == UsesStringType.REUSABLE_WORKFLOW:
134 |                 obj = get_or_create_workflow(uses_string_obj.get_full_path(j.path))
135 |                 j.reusable_workflow.add(obj)
136 | 
137 |             if "with" in obj_dict:
138 |                 j.with_prop = convert_dict_to_list(obj_dict["with"])
139 | 
140 |             if len(j.uses.split("@")) > 1:
141 |                 j.ref = j.uses.split("@")[1]
142 | 
143 |         j.url = obj_dict["url"]
144 |         if "steps" in obj_dict:
145 |             j.machine = parse_job_machine(obj_dict.get("runs-on"))
146 | 
147 |             for i, step in enumerate(obj_dict["steps"]):
148 |                 step["_id"] = md5(f"{j._id}_{i}".encode()).hexdigest()
149 |                 step["path"] = j.path
150 |                 step["url"] = j.url
151 |                 j.steps.add(Step.from_dict(step))
152 | 
153 |         return j
154 | 
155 | 
156 | class Workflow(GraphObject):
157 |     __primarykey__ = "_id"
158 | 
159 |     _id = Property()
160 |     name = Property()
161 |     path = Property()
162 |     trigger = Property()
163 |     permissions = Property()
164 |     url = Property()
165 |     is_public = Property()
166 | 
167 |     jobs = RelatedTo(Job)
168 |     triggered_by = RelatedTo("Workflow")
169 |     reusable_workflow_input = RelatedTo("ReusableWorkflowInput")
170 | 
171 |     def __init__(self, name: Optional[str], path: str):
172 |         self.name = name
173 |         self.path = path
174 |         self._id = md5(path.encode()).hexdigest()
175 | 
176 |     @staticmethod
177 |     def from_dict(obj_dict: Dict[str, Any]) -> "Workflow":
178 |         w = Workflow(name=obj_dict.get("name"), path=obj_dict["path"])
179 | 
180 |         w.trigger = parse_workflow_trigger(obj_dict["on"])
181 | 
182 |         w.url = obj_dict["url"]
183 |         w.is_public = obj_dict["is_public"]
184 | 
185 |         # Handling special case of workflow_run
186 |         # When we meet it, we want to create a special relation from the triggering workflow,
187 |         # to the triggered one.
188 |         # There are cases where the triggering workflow wasn't loaded yet.
189 |         # In that case we creating a stub node for it,
190 |         # and once we'll meet it, we'll enrich it.
191 |         if "workflow_run" in w.trigger:
192 |             workflow_run = obj_dict["on"]["workflow_run"]
193 |             triggering_workflows = workflow_run["workflows"]
194 |             types = workflow_run["types"]
195 |             for workflow_name in triggering_workflows:
196 |                 repo = get_repo_name_from_path(w.path)
197 |                 w_path = find_workflow_by_name(repo, workflow_name)
198 |                 if w_path is None:
199 |                     log.debug(
200 |                         f"[-] Couldn't find the triggering workflow '{workflow_name}' in repository '{repo}'"
201 |                     )
202 |                 else:
203 |                     w_triggering = get_or_create_workflow(w_path)
204 |                     w.triggered_by.add(w_triggering, types=types)
205 | 
206 |         # Handling special case of workflow_call
207 |         # When we meet it, we want to create a special relation to inputs of the reusable workflow.
208 |         # We continue to treat the workflow as a regular workflow, and not as a reusable workflow.
209 |         # But the difference is that we connected the different inputs to the workflow.
210 |         if "workflow_call" in w.trigger:
211 |             wokrflow_call = obj_dict["on"]["workflow_call"]
212 |             inputs = wokrflow_call["inputs"]
213 |             for input_name, input in inputs.items():
214 |                 input["_id"] = md5(f"{w._id}_{input_name}".encode()).hexdigest()
215 |                 input["name"] = input_name
216 |                 input["url"] = w.url
217 |                 input["path"] = w.path
218 |                 w.reusable_workflow_input.add(ReusableWorkflowInput.from_dict(input))
219 | 
220 |         if "permissions" in obj_dict:
221 |             w.permissions = convert_dict_to_list(obj_dict["permissions"])
222 | 
223 |         for job_name, job in obj_dict["jobs"].items():
224 |             if not isinstance(job, dict):
225 |                 log.error("[-] Invalid job structure")
226 |                 raise Exception("Invalid job structure.")
227 |             job["_id"] = md5(f"{w._id}_{job_name}".encode()).hexdigest()
228 |             job["path"] = w.path
229 |             job["name"] = job_name
230 |             job["url"] = w.url
231 |             w.jobs.add(Job.from_dict(job))
232 | 
233 |         return w
234 | 
235 | 
236 | class ReusableWorkflowInput(GraphObject):
237 |     __primarykey__ = "_id"
238 | 
239 |     _id = Property()
240 |     name = Property()
241 |     default = Property()
242 |     description = Property()
243 |     required = Property()
244 |     path = Property()
245 |     url = Property()
246 | 
247 |     def __init__(self, _id: str, path: str):
248 |         self._id = _id
249 |         self.path = path
250 | 
251 |     @staticmethod
252 |     def from_dict(obj_dict) -> "ReusableWorkflowInput":
253 |         i = ReusableWorkflowInput(_id=obj_dict["_id"], path=obj_dict["path"])
254 |         i.name = obj_dict["name"]
255 |         i.url = obj_dict["url"]
256 | 
257 |         if "default" in obj_dict:
258 |             i.default = obj_dict.get("default")
259 | 
260 |         if "description" in obj_dict:
261 |             i.description = obj_dict.get("description")
262 | 
263 |         i.required = raw_str_to_bool(obj_dict.get("required", "false"))
264 | 
265 |         return i
266 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/tests/__init__.py


--------------------------------------------------------------------------------
/tests/integration/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/tests/integration/__init__.py


--------------------------------------------------------------------------------
/tests/integration/integration_consts.py:
--------------------------------------------------------------------------------
 1 | ## Queries ##
 2 | 
 3 | GET_RELATIONSHIPS_BY_PATH_QUERY = """
 4 | MATCH (s)-[r]->(e)
 5 | where s.path in {paths_list}
 6 | RETURN s, r, e
 7 | """
 8 | 
 9 | GET_NODES_BY_PATH_QUERY = """
10 | MATCH (s)
11 | where s.path in {paths_list}
12 | RETURN s
13 | """
14 | 
15 | START_NODE_INDEX = 0
16 | DEST_NODE_INDEX = 2
17 | 
18 | ## Tests Configs ##
19 | TESTS_CONFIGS = [
20 |     {
21 |         "test_name": "test_integration_1",
22 |         "json_path": "tests/integration/structures_json/integration-1.json",
23 |         "description": "Tests Integration 1's graph structure. This is a repository with a single workflow. The workflow has Jobs, Steps, and StepCodeDependency. It uses a composite action which is also in the organization. The Composite Action has Steps and StepCodeDependency. These are all the node types that we currently support.",
24 |         "queries": {
25 |             "nodes_query": GET_NODES_BY_PATH_QUERY,
26 |             "relationships_query": GET_RELATIONSHIPS_BY_PATH_QUERY,
27 |             "to_format": {
28 |                 "paths_list": [
29 |                     "RavenIntegrationTests/Integration-1/.github/workflows/integration-workflow.yml",
30 |                     "RavenIntegrationTests/CompositeAction-Mock",
31 |                 ]
32 |             },
33 |         },
34 |     },
35 |     {
36 |         "test_name": "test_demo_index_repos",
37 |         "json_path": "tests/integration/structures_json/demo-index.json",
38 |         "description": "Tests Demo-[1-4]'s graph structures combined. These are four different repositories that have similar workflows. They all have a workflow that uses the checkout action.",
39 |         "queries": {
40 |             "nodes_query": GET_NODES_BY_PATH_QUERY,
41 |             "relationships_query": GET_RELATIONSHIPS_BY_PATH_QUERY,
42 |             "to_format": {
43 |                 "paths_list": [
44 |                     "RavenIntegrationTests/Demo-1/.github/workflows/demo-workflow.yml",
45 |                     "RavenIntegrationTests/Demo-2/.github/workflows/demo-workflow.yml",
46 |                     "RavenIntegrationTests/Demo-3/.github/workflows/demo-workflow.yml",
47 |                     "RavenIntegrationTests/Demo-4/.github/workflows/demo-workflow.yml",
48 |                     "actions/checkout",
49 |                 ]
50 |             },
51 |         },
52 |     },
53 |     {
54 |         "test_name": "test_reusable_workflows",
55 |         "json_path": "tests/integration/structures_json/reusable-workflows.json",
56 |         "description": "Tests ReusableWorkflows-Mock's graph structure. This is a repository with two workflows. One of them uses the other as a reusable workflow.",
57 |         "queries": {
58 |             "nodes_query": GET_NODES_BY_PATH_QUERY,
59 |             "relationships_query": GET_RELATIONSHIPS_BY_PATH_QUERY,
60 |             "to_format": {
61 |                 "paths_list": [
62 |                     "RavenIntegrationTests/ReusableWorkflows-Mock/.github/workflows/reuse_workflow.yml",
63 |                     "RavenIntegrationTests/ReusableWorkflows-Mock/.github/workflows/test.yml",
64 |                 ]
65 |             },
66 |         },
67 |     },
68 | ]
69 | 


--------------------------------------------------------------------------------
/tests/integration/structures_json/integration-1.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "nodes": [
  3 |         {
  4 |             "path": "RavenIntegrationTests/CompositeAction-Mock",
  5 |             "using": "composite",
  6 |             "name": "Example composite GitHub action",
  7 |             "is_public": true,
  8 |             "_id": "0cedc4763c9b12640b59748858f52ecb",
  9 |             "url": "https://github.com/RavenIntegrationTests/CompositeAction-Mock/tree/main/action.yml",
 10 |             "labels": [
 11 |                 "CompositeAction"
 12 |             ]
 13 |         },
 14 |         {
 15 |             "path": "RavenIntegrationTests/CompositeAction-Mock",
 16 |             "default": "true",
 17 |             "name": "param1",
 18 |             "description": "Input parameter placeholder",
 19 |             "_id": "e0e8b6ca1b9aab6e0dd0e1fe70c88a08",
 20 |             "required": true,
 21 |             "url": "https://github.com/RavenIntegrationTests/CompositeAction-Mock/tree/main/action.yml",
 22 |             "labels": [
 23 |                 "CompositeActionInput"
 24 |             ]
 25 |         },
 26 |         {
 27 |             "path": "RavenIntegrationTests/CompositeAction-Mock",
 28 |             "shell": "bash",
 29 |             "name": "context",
 30 |             "run": "echo \"action-result=${{ inputs.param1 }}\" >> $GITHUB_OUTPUT\n",
 31 |             "_id": "db50f26195dcd8ca600a046df26f0a3a",
 32 |             "url": "https://github.com/RavenIntegrationTests/CompositeAction-Mock/tree/main/action.yml",
 33 |             "labels": [
 34 |                 "CompositeActionStep"
 35 |             ]
 36 |         },
 37 |         {
 38 |             "path": "RavenIntegrationTests/CompositeAction-Mock",
 39 |             "param": "inputs.param1",
 40 |             "_id": "004b8c21e56d8fbd8c089dfc0de3f70a",
 41 |             "url": "https://github.com/RavenIntegrationTests/CompositeAction-Mock/tree/main/action.yml",
 42 |             "labels": [
 43 |                 "StepCodeDependency"
 44 |             ]
 45 |         },
 46 |         {
 47 |             "path": "RavenIntegrationTests/Integration-1/.github/workflows/integration-workflow.yml",
 48 |             "name": "integration_workflow",
 49 |             "is_public": true,
 50 |             "_id": "d65c066b4fe60e52c419b3e7043d297e",
 51 |             "trigger": [
 52 |                 "pull_request_target"
 53 |             ],
 54 |             "url": "https://github.com/RavenIntegrationTests/Integration-1/tree/main/.github/workflows/integration-workflow.yml",
 55 |             "labels": [
 56 |                 "Workflow"
 57 |             ]
 58 |         },
 59 |         {
 60 |             "path": "RavenIntegrationTests/Integration-1/.github/workflows/integration-workflow.yml",
 61 |             "machine": [
 62 |                 "ubuntu-latest"
 63 |             ],
 64 |             "name": "first_job",
 65 |             "_id": "2007449e2ba101423871ac669de5b750",
 66 |             "url": "https://github.com/RavenIntegrationTests/Integration-1/tree/main/.github/workflows/integration-workflow.yml",
 67 |             "labels": [
 68 |                 "Job"
 69 |             ]
 70 |         },
 71 |         {
 72 |             "path": "RavenIntegrationTests/Integration-1/.github/workflows/integration-workflow.yml",
 73 |             "ref": "v1",
 74 |             "uses": "RavenIntegrationTests/CompositeAction-Mock@v1",
 75 |             "_id": "88e6517ba8d71f0851e6f3b33ae2e51b",
 76 |             "url": "https://github.com/RavenIntegrationTests/Integration-1/tree/main/.github/workflows/integration-workflow.yml",
 77 |             "labels": [
 78 |                 "Step"
 79 |             ]
 80 |         },
 81 |         {
 82 |             "path": "RavenIntegrationTests/Integration-1/.github/workflows/integration-workflow.yml",
 83 |             "ref": "main",
 84 |             "uses": "RavenIntegrationTests/CompositeAction-Mock@main",
 85 |             "_id": "a7957c48867f1f675ab6c9e4f1828c14",
 86 |             "url": "https://github.com/RavenIntegrationTests/Integration-1/tree/main/.github/workflows/integration-workflow.yml",
 87 |             "labels": [
 88 |                 "Step"
 89 |             ]
 90 |         },
 91 |         {
 92 |             "path": "RavenIntegrationTests/Integration-1/.github/workflows/integration-workflow.yml",
 93 |             "name": "print_env",
 94 |             "run": "print_env",
 95 |             "_id": "2a12d8215584fab339b14da4d6a904ff",
 96 |             "url": "https://github.com/RavenIntegrationTests/Integration-1/tree/main/.github/workflows/integration-workflow.yml",
 97 |             "labels": [
 98 |                 "Step"
 99 |             ]
100 |         },
101 |         {
102 |             "path": "RavenIntegrationTests/Integration-1/.github/workflows/integration-workflow.yml",
103 |             "machine": [
104 |                 "ubuntu-latest"
105 |             ],
106 |             "name": "second_job",
107 |             "_id": "57e4ebfad3aa1f852f256d59d7c7e982",
108 |             "url": "https://github.com/RavenIntegrationTests/Integration-1/tree/main/.github/workflows/integration-workflow.yml",
109 |             "labels": [
110 |                 "Job"
111 |             ]
112 |         },
113 |         {
114 |             "path": "RavenIntegrationTests/Integration-1/.github/workflows/integration-workflow.yml",
115 |             "name": "print_pull_request_title",
116 |             "run": "echo \"Pull request title is ${{ github.event.pull_request.title }}\"",
117 |             "_id": "813206f991310b30c1405955aeefb00e",
118 |             "url": "https://github.com/RavenIntegrationTests/Integration-1/tree/main/.github/workflows/integration-workflow.yml",
119 |             "labels": [
120 |                 "Step"
121 |             ]
122 |         },
123 |         {
124 |             "path": "RavenIntegrationTests/Integration-1/.github/workflows/integration-workflow.yml",
125 |             "param": "github.event.pull_request.title",
126 |             "_id": "62cded2b531643f4d784c4e2e5c614d1",
127 |             "url": "https://github.com/RavenIntegrationTests/Integration-1/tree/main/.github/workflows/integration-workflow.yml",
128 |             "labels": [
129 |                 "StepCodeDependency"
130 |             ]
131 |         }
132 |     ],
133 |     "relationships": [
134 |         {
135 |             "start_node": "0cedc4763c9b12640b59748858f52ecb",
136 |             "type": "COMPOSITE_ACTION_INPUT",
137 |             "end_node": "e0e8b6ca1b9aab6e0dd0e1fe70c88a08"
138 |         },
139 |         {
140 |             "start_node": "db50f26195dcd8ca600a046df26f0a3a",
141 |             "type": "USING_PARAM",
142 |             "end_node": "004b8c21e56d8fbd8c089dfc0de3f70a"
143 |         },
144 |         {
145 |             "start_node": "0cedc4763c9b12640b59748858f52ecb",
146 |             "type": "STEPS",
147 |             "end_node": "db50f26195dcd8ca600a046df26f0a3a"
148 |         },
149 |         {
150 |             "start_node": "88e6517ba8d71f0851e6f3b33ae2e51b",
151 |             "type": "ACTION",
152 |             "end_node": "0cedc4763c9b12640b59748858f52ecb"
153 |         },
154 |         {
155 |             "start_node": "a7957c48867f1f675ab6c9e4f1828c14",
156 |             "type": "ACTION",
157 |             "end_node": "0cedc4763c9b12640b59748858f52ecb"
158 |         },
159 |         {
160 |             "start_node": "2007449e2ba101423871ac669de5b750",
161 |             "type": "STEPS",
162 |             "end_node": "88e6517ba8d71f0851e6f3b33ae2e51b"
163 |         },
164 |         {
165 |             "start_node": "2007449e2ba101423871ac669de5b750",
166 |             "type": "STEPS",
167 |             "end_node": "a7957c48867f1f675ab6c9e4f1828c14"
168 |         },
169 |         {
170 |             "start_node": "2007449e2ba101423871ac669de5b750",
171 |             "type": "STEPS",
172 |             "end_node": "2a12d8215584fab339b14da4d6a904ff"
173 |         },
174 |         {
175 |             "start_node": "813206f991310b30c1405955aeefb00e",
176 |             "type": "USING_PARAM",
177 |             "end_node": "62cded2b531643f4d784c4e2e5c614d1"
178 |         },
179 |         {
180 |             "start_node": "57e4ebfad3aa1f852f256d59d7c7e982",
181 |             "type": "STEPS",
182 |             "end_node": "813206f991310b30c1405955aeefb00e"
183 |         },
184 |         {
185 |             "start_node": "d65c066b4fe60e52c419b3e7043d297e",
186 |             "type": "JOBS",
187 |             "end_node": "2007449e2ba101423871ac669de5b750"
188 |         },
189 |         {
190 |             "start_node": "d65c066b4fe60e52c419b3e7043d297e",
191 |             "type": "JOBS",
192 |             "end_node": "57e4ebfad3aa1f852f256d59d7c7e982"
193 |         }
194 |     ]
195 | }


--------------------------------------------------------------------------------
/tests/integration/structures_json/reusable-workflows.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "nodes": [
  3 |         {
  4 |             "path": "RavenIntegrationTests/ReusableWorkflows-Mock/.github/workflows/reuse_workflow.yml",
  5 |             "is_public": true,
  6 |             "name": "reusable_workflow",
  7 |             "trigger": [
  8 |                 "workflow_call"
  9 |             ],
 10 |             "_id": "ff1b0c2b61a25d227707be99c3901303",
 11 |             "url": "https://github.com/RavenIntegrationTests/ReusableWorkflows-Mock/tree/main/.github/workflows/reuse_workflow.yml",
 12 |             "labels": [
 13 |                 "Workflow"
 14 |             ]
 15 |         },
 16 |         {
 17 |             "path": "RavenIntegrationTests/ReusableWorkflows-Mock/.github/workflows/reuse_workflow.yml",
 18 |             "default": "input_1_defult",
 19 |             "name": "input_1",
 20 |             "_id": "aaaaf3ef437b55388bec93f26c8f9c44",
 21 |             "url": "https://github.com/RavenIntegrationTests/ReusableWorkflows-Mock/tree/main/.github/workflows/reuse_workflow.yml",
 22 |             "required": true,
 23 |             "labels": [
 24 |                 "ReusableWorkflowInput"
 25 |             ]
 26 |         },
 27 |         {
 28 |             "path": "RavenIntegrationTests/ReusableWorkflows-Mock/.github/workflows/reuse_workflow.yml",
 29 |             "default": 1,
 30 |             "name": "input_2",
 31 |             "_id": "3f81ef5510e05f61bfa26a950ebd2c3d",
 32 |             "required": false,
 33 |             "url": "https://github.com/RavenIntegrationTests/ReusableWorkflows-Mock/tree/main/.github/workflows/reuse_workflow.yml",
 34 |             "labels": [
 35 |                 "ReusableWorkflowInput"
 36 |             ]
 37 |         },
 38 |         {
 39 |             "path": "RavenIntegrationTests/ReusableWorkflows-Mock/.github/workflows/reuse_workflow.yml",
 40 |             "default": "true",
 41 |             "name": "input_3",
 42 |             "_id": "2fe112d8d392c700041b353c90db4edf",
 43 |             "required": false,
 44 |             "url": "https://github.com/RavenIntegrationTests/ReusableWorkflows-Mock/tree/main/.github/workflows/reuse_workflow.yml",
 45 |             "labels": [
 46 |                 "ReusableWorkflowInput"
 47 |             ]
 48 |         },
 49 |         {
 50 |             "path": "RavenIntegrationTests/ReusableWorkflows-Mock/.github/workflows/reuse_workflow.yml",
 51 |             "machine": [
 52 |                 "ubuntu-latest"
 53 |             ],
 54 |             "name": "test",
 55 |             "_id": "292255a5b241802f89614333a7a13539",
 56 |             "url": "https://github.com/RavenIntegrationTests/ReusableWorkflows-Mock/tree/main/.github/workflows/reuse_workflow.yml",
 57 |             "labels": [
 58 |                 "Job"
 59 |             ]
 60 |         },
 61 |         {
 62 |             "path": "RavenIntegrationTests/ReusableWorkflows-Mock/.github/workflows/reuse_workflow.yml",
 63 |             "ref": "v4",
 64 |             "name": "Checkout",
 65 |             "uses": "actions/checkout@v4",
 66 |             "_id": "3cae77d1d794cf1a5dd88b4e2e38bd22",
 67 |             "url": "https://github.com/RavenIntegrationTests/ReusableWorkflows-Mock/tree/main/.github/workflows/reuse_workflow.yml",
 68 |             "labels": [
 69 |                 "Step"
 70 |             ]
 71 |         },
 72 |         {
 73 |             "path": "RavenIntegrationTests/ReusableWorkflows-Mock/.github/workflows/reuse_workflow.yml",
 74 |             "name": "Print Input",
 75 |             "run": "echo \"${{ inputs.input_1 }}\"",
 76 |             "_id": "7f294647090543c69b742cb12d98bd00",
 77 |             "url": "https://github.com/RavenIntegrationTests/ReusableWorkflows-Mock/tree/main/.github/workflows/reuse_workflow.yml",
 78 |             "labels": [
 79 |                 "Step"
 80 |             ]
 81 |         },
 82 |         {
 83 |             "path": "RavenIntegrationTests/ReusableWorkflows-Mock/.github/workflows/reuse_workflow.yml",
 84 |             "param": "inputs.input_1",
 85 |             "_id": "92b692254230795df97f94ad6e6243a7",
 86 |             "url": "https://github.com/RavenIntegrationTests/ReusableWorkflows-Mock/tree/main/.github/workflows/reuse_workflow.yml",
 87 |             "labels": [
 88 |                 "StepCodeDependency"
 89 |             ]
 90 |         }
 91 |     ],
 92 |     "relationships": [
 93 |         {
 94 |             "start_node": "ff1b0c2b61a25d227707be99c3901303",
 95 |             "type": "REUSABLE_WORKFLOW_INPUT",
 96 |             "end_node": "aaaaf3ef437b55388bec93f26c8f9c44"
 97 |         },
 98 |         {
 99 |             "start_node": "ff1b0c2b61a25d227707be99c3901303",
100 |             "type": "REUSABLE_WORKFLOW_INPUT",
101 |             "end_node": "3f81ef5510e05f61bfa26a950ebd2c3d"
102 |         },
103 |         {
104 |             "start_node": "ff1b0c2b61a25d227707be99c3901303",
105 |             "type": "REUSABLE_WORKFLOW_INPUT",
106 |             "end_node": "2fe112d8d392c700041b353c90db4edf"
107 |         },
108 |         {
109 |             "start_node": "3cae77d1d794cf1a5dd88b4e2e38bd22",
110 |             "type": "ACTION",
111 |             "end_node": "d35e7df441120da9624b8c11e36151be"
112 |         },
113 |         {
114 |             "start_node": "7f294647090543c69b742cb12d98bd00",
115 |             "type": "USING_PARAM",
116 |             "end_node": "92b692254230795df97f94ad6e6243a7"
117 |         },
118 |         {
119 |             "start_node": "292255a5b241802f89614333a7a13539",
120 |             "type": "STEPS",
121 |             "end_node": "3cae77d1d794cf1a5dd88b4e2e38bd22"
122 |         },
123 |         {
124 |             "start_node": "292255a5b241802f89614333a7a13539",
125 |             "type": "STEPS",
126 |             "end_node": "7f294647090543c69b742cb12d98bd00"
127 |         },
128 |         {
129 |             "start_node": "ff1b0c2b61a25d227707be99c3901303",
130 |             "type": "JOBS",
131 |             "end_node": "292255a5b241802f89614333a7a13539"
132 |         }
133 |     ]
134 | }


--------------------------------------------------------------------------------
/tests/integration/test_graph_structures.py:
--------------------------------------------------------------------------------
 1 | from colorama import Fore, Style
 2 | from tests.utils import (
 3 |     get_graph_structure,
 4 |     assert_graph_structures,
 5 | )
 6 | from tests.integration.integration_consts import TESTS_CONFIGS
 7 | from tests.tests_init import init_integration_env
 8 | 
 9 | 
10 | def test_graph_structure() -> None:
11 |     """
12 |     Tests the graph structure of the integration tests.
13 |     It will loop over each test config dictionary on TESTS_CONFIGS list and assert the graph structure is as expected.
14 |     """
15 |     init_integration_env()
16 |     for test_config in TESTS_CONFIGS:
17 |         print(
18 |             f"{Fore.CYAN}Running integration test: {test_config['test_name']}.{Style.RESET_ALL}"
19 |         )
20 | 
21 |         # Get the queries from the test config
22 |         query_config = test_config["queries"]
23 |         nodes_query = query_config["nodes_query"].format(**query_config["to_format"])
24 |         relationships_query = query_config["relationships_query"].format(
25 |             **query_config["to_format"]
26 |         )
27 | 
28 |         # Get the graph structure from the queries and assert it
29 |         graph_structure = get_graph_structure(nodes_query, relationships_query)
30 |         assert_graph_structures(graph_structure, test_config["json_path"])
31 | 


--------------------------------------------------------------------------------
/tests/tests_init.py:
--------------------------------------------------------------------------------
 1 | from os import getenv
 2 | from src.config.config import load_downloader_config, load_indexer_config
 3 | from src.downloader.download import download_account_workflows_and_actions
 4 | from src.indexer.index import index_downloaded_workflows_and_actions
 5 | 
 6 | 
 7 | def init_integration_env():
 8 |     load_integration_tests_config()
 9 |     download_account_workflows_and_actions()
10 |     index_downloaded_workflows_and_actions()
11 | 
12 | 
13 | def load_integration_tests_config() -> None:
14 |     load_downloader_config(
15 |         {
16 |             "debug": False,
17 |             "token": getenv("GITHUB_TOKEN"),
18 |             "account_name": ["RavenIntegrationTests"],
19 |             "redis_host": "raven-redis-test",
20 |             "redis_port": 6379,
21 |             "clean_redis": True,
22 |         }
23 |     )
24 | 
25 |     load_indexer_config(
26 |         {
27 |             "debug": False,
28 |             "redis_host": "raven-redis-test",
29 |             "redis_port": 6379,
30 |             "clean_redis": True,
31 |             "neo4j_uri": "neo4j://raven-neo4j-test:7687",
32 |             "neo4j_user": "neo4j",
33 |             "neo4j_pass": "123456789",
34 |             "threads": 1,
35 |             "clean_neo4j": True,
36 |         }
37 |     )
38 | 


--------------------------------------------------------------------------------
/tests/unit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CycodeLabs/raven/2e4dc57ce2f360a3cbef01b404adda9133fedbc2/tests/unit/__init__.py


--------------------------------------------------------------------------------
/tests/unit/test_composite_action.py:
--------------------------------------------------------------------------------
  1 | import src.workflow_components.composite_action as composite_action
  2 | from tests.utils import load_test_config, assert_action_inputs
  3 | 
  4 | load_test_config()
  5 | 
  6 | 
  7 | def test_composite_action_from_dist_node():
  8 |     ca_d = {
  9 |         "name": "Create Issue From File",
 10 |         "description": "An action to create an issue using content from a file",
 11 |         "inputs": {
 12 |             "token": {
 13 |                 "description": "The GitHub authentication token",
 14 |                 "default": "${{ github.token }}",
 15 |                 "required": True,
 16 |             },
 17 |             "repository": {
 18 |                 "description": "The target GitHub repository",
 19 |                 "default": "${{ github.repository }}",
 20 |             },
 21 |             "issue-number": {
 22 |                 "description": "The issue number of an existing issue to update"
 23 |             },
 24 |             "title": {"description": "The title of the issue", "required": "true"},
 25 |             "content-filepath": {"description": "The file path to the issue content"},
 26 |             "labels": {"description": "A comma or newline-separated list of labels"},
 27 |             "assignees": {
 28 |                 "description": "A comma or newline-separated list of assignees (GitHub usernames)"
 29 |             },
 30 |         },
 31 |         "outputs": {"issue-number": {"description": "The number of the created issue"}},
 32 |         "runs": {"using": "node16", "main": "dist/index.js"},
 33 |         "branding": {"icon": "alert-circle", "color": "orange"},
 34 |         "path": "data/actions/peter-evans|create-issue-from-file|action.yml",
 35 |         "url": "https://github.com/CycodeLabs/Raven/pull/1",
 36 |         "is_public": True,
 37 |     }
 38 | 
 39 |     ca = composite_action.CompositeAction.from_dict(ca_d)
 40 | 
 41 |     assert ca.name == ca_d["name"]
 42 |     assert ca.path == ca_d["path"]
 43 |     assert ca.using == "node16"
 44 |     assert ca.url == ca_d["url"]
 45 |     assert ca.is_public == ca_d["is_public"]
 46 |     assert ca.image is None
 47 |     assert len(ca.steps) == 0
 48 | 
 49 |     assert_action_inputs(ca, ca_d)
 50 | 
 51 | 
 52 | def test_composite_action_from_dict_dockerfile():
 53 |     ca_d = {
 54 |         "name": "Automatic Rebase",
 55 |         "description": "Automatically rebases PR on '/rebase' comment",
 56 |         "maintainer": "Cirrus Labs",
 57 |         "runs": {"using": "docker", "image": "Dockerfile"},
 58 |         "inputs": {
 59 |             "autosquash": {
 60 |                 "description": "Should the rebase autosquash fixup and squash commits",
 61 |                 "required": "false",
 62 |                 "default": "false",
 63 |             }
 64 |         },
 65 |         "branding": {"icon": "git-pull-request", "color": "purple"},
 66 |         "path": "data/actions/cirrus-actions|rebase|action.yml",
 67 |         "url": "https://github.com/CycodeLabs/Raven/pull/1",
 68 |         "is_public": True,
 69 |     }
 70 | 
 71 |     ca = composite_action.CompositeAction.from_dict(ca_d)
 72 | 
 73 |     assert ca.name == ca_d["name"]
 74 |     assert ca.path == ca_d["path"]
 75 |     assert ca.using == "docker"
 76 |     assert ca.image == "Dockerfile"
 77 |     assert ca.url == ca_d["url"]
 78 |     assert ca.is_public == ca_d["is_public"]
 79 |     assert len(ca.steps) == 0
 80 | 
 81 |     assert_action_inputs(ca, ca_d)
 82 | 
 83 | 
 84 | def test_composite_action_from_dict_image():
 85 |     ca_d = {
 86 |         "name": "Image Actions",
 87 |         "author": "Calibre",
 88 |         "description": "Compresses Images for the Web",
 89 |         "inputs": {
 90 |             "githubToken": {"description": "GitHub Token", "required": "true"},
 91 |         },
 92 |         "outputs": {
 93 |             "markdown": {
 94 |                 "description": "Output param used to store the Markdown summary for subsequent actions to use"
 95 |             }
 96 |         },
 97 |         "runs": {
 98 |             "using": "docker",
 99 |             "image": "docker://ghcr.io/calibreapp/image-actions/image-actions:main",
100 |         },
101 |         "branding": {"icon": "image", "color": "green"},
102 |         "path": "data/actions/calibreapp|image-actions|action.yml",
103 |         "url": "https://github.com/CycodeLabs/Raven/pull/1",
104 |         "is_public": True,
105 |     }
106 | 
107 |     ca = composite_action.CompositeAction.from_dict(ca_d)
108 | 
109 |     assert ca.name == ca_d["name"]
110 |     assert ca.path == ca_d["path"]
111 |     assert ca.using == "docker"
112 |     assert ca.url == ca_d["url"]
113 |     assert ca.is_public == ca_d["is_public"]
114 |     assert ca.image == "docker://ghcr.io/calibreapp/image-actions/image-actions:main"
115 |     assert len(ca.steps) == 0
116 | 
117 |     assert_action_inputs(ca, ca_d)
118 | 
119 | 
120 | def test_composite_action_from_dict_steps():
121 |     ca_d = {
122 |         "name": "Install development tools",
123 |         "description": "GitHub Action for installing development tools",
124 |         "inputs": {
125 |             "tool": {
126 |                 "description": "Tools to install (comma-separated list)",
127 |                 "required": "true",
128 |             },
129 |             "checksum": {
130 |                 "description": "Whether to enable checksums",
131 |                 "required": "false",
132 |                 "default": "true",
133 |             },
134 |         },
135 |         "runs": {
136 |             "using": "composite",
137 |             "steps": [
138 |                 {
139 |                     "run": 'bash --noprofile --norc "${GITHUB_ACTION_PATH:?}/main.sh"',
140 |                     "shell": "bash",
141 |                     "env": {
142 |                         "INPUT_TOOL": "${{ inputs.tool }}",
143 |                         "INPUT_CHECKSUM": "${{ inputs.checksum }}",
144 |                     },
145 |                 }
146 |             ],
147 |         },
148 |         "path": "data/actions/taiki-e|install-action|action.yml",
149 |         "url": "https://github.com/CycodeLabs/Raven/pull/1",
150 |         "is_public": True,
151 |     }
152 | 
153 |     ca = composite_action.CompositeAction.from_dict(ca_d)
154 | 
155 |     assert ca.name == ca_d["name"]
156 |     assert ca.path == ca_d["path"]
157 |     assert ca.using == "composite"
158 |     assert ca.url == ca_d["url"]
159 |     assert ca.is_public == ca_d["is_public"]
160 |     assert ca.image is None
161 |     assert len(ca.steps) == 1
162 | 
163 |     assert_action_inputs(ca, ca_d)
164 | 
165 | 
166 | def test_composite_action_step_from_dict_run():
167 |     step_d = {
168 |         "run": ': install rustup if needed\nif ! command -v rustup &>/dev/null; then\n  curl --proto \'=https\' --tlsv1.2 --retry 10 --retry-connrefused --location --silent --show-error --fail "https://sh.rustup.rs" | sh -s -- --default-toolchain none -y\n  echo "${CARGO_HOME:-$HOME/.cargo}/bin" >> $GITHUB_PATH\nfi\n',
169 |         "if": "runner.os != 'Windows'",
170 |         "shell": "bash",
171 |         "_id": "4eba12855ade10f6e8dda0456946ffa1",
172 |         "path": "data/actions/dtolnay|rust-toolchain|action.yml",
173 |         "url": "https://github.com/CycodeLabs/Raven/pull/1",
174 |     }
175 | 
176 |     step = composite_action.CompositeActionStep.from_dict(step_d)
177 | 
178 |     assert step._id == step_d["_id"]
179 |     assert step.name is None
180 |     assert step.path == step_d["path"]
181 |     assert step.run == step_d["run"]
182 |     assert step.uses is None
183 |     assert step.ref is None
184 |     assert step.shell == step_d["shell"]
185 |     assert step.with_prop is None
186 |     assert step.url == step_d["url"]
187 |     assert len(step.action) == 0
188 |     assert len(step.using_param) == 0
189 | 
190 | 
191 | def test_composite_action_step_from_dict_run_dependency():
192 |     step_d = {
193 |         "run": "${{ github.action_path }}/setup_pip.ps1",
194 |         "shell": "pwsh",
195 |         "env": {
196 |             "PYTHON_VERSION": "${{ steps.setup.outputs.python-version }}",
197 |             "SETUP_PYTHON_PATH": "${{ steps.setup.outputs.python-path }}",
198 |         },
199 |         "_id": "f85b9778e35a1273d88c7dabdb210eaf",
200 |         "path": "data/actions/ytdl-org|setup-python|action.yml",
201 |         "url": "https://github.com/CycodeLabs/Raven/pull/1",
202 |     }
203 | 
204 |     step = composite_action.CompositeActionStep.from_dict(step_d)
205 | 
206 |     assert step._id == step_d["_id"]
207 |     assert step.name is None
208 |     assert step.path == step_d["path"]
209 |     assert step.run == step_d["run"]
210 |     assert step.uses is None
211 |     assert step.ref is None
212 |     assert step.shell == step_d["shell"]
213 |     assert step.url == step_d["url"]
214 |     assert step.with_prop is None
215 |     assert len(step.action) == 0
216 |     assert len(step.using_param) == 1
217 | 
218 | 
219 | def test_composite_action_step_from_dict_using():
220 |     step_d = {
221 |         "uses": "actions/setup-python@bd6b4b6205c4dbad673328db7b31b7fab9e241c0",
222 |         "id": "setup",
223 |         "with": {
224 |             "python-version": "${{ steps.build.outputs.python-version }}",
225 |             "cache": "${{ inputs.cache }}",
226 |             "architecture": "${{ steps.build.outputs.architecture }}",
227 |             "check-latest": "${{ inputs.check-latest }}",
228 |             "token": "${{ inputs.token }}",
229 |             "cache-dependency-path": "${{ inputs.cache-dependency-path }}",
230 |             "update-environment": "${{ inputs.update-environment }}",
231 |         },
232 |         "_id": "11e15e6b7424478c2e32fd22ed477c21",
233 |         "path": "data/actions/ytdl-org|setup-python|action.yml",
234 |         "url": "https://github.com/CycodeLabs/Raven/pull/1",
235 |     }
236 | 
237 |     step = composite_action.CompositeActionStep.from_dict(step_d)
238 |     assert step._id == step_d["_id"]
239 |     assert step.name == step_d["id"]
240 |     assert step.path == step_d["path"]
241 |     assert step.run is None
242 |     assert step.uses == step_d["uses"]
243 |     assert step.ref == "bd6b4b6205c4dbad673328db7b31b7fab9e241c0"
244 |     assert step.shell is None
245 |     assert step.url == step_d["url"]
246 |     assert step.with_prop == [
247 |         "python-version:${{ steps.build.outputs.python-version }}",
248 |         "cache:${{ inputs.cache }}",
249 |         "architecture:${{ steps.build.outputs.architecture }}",
250 |         "check-latest:${{ inputs.check-latest }}",
251 |         "token:${{ inputs.token }}",
252 |         "cache-dependency-path:${{ inputs.cache-dependency-path }}",
253 |         "update-environment:${{ inputs.update-environment }}",
254 |     ]
255 |     assert len(step.using_param) == 0
256 | 


--------------------------------------------------------------------------------
/tests/unit/test_dependency.py:
--------------------------------------------------------------------------------
 1 | from tests.utils import load_test_config
 2 | import src.workflow_components.dependency as dependency
 3 | 
 4 | load_test_config()
 5 | 
 6 | 
 7 | def test_uses_string_analyze():
 8 |     test_cases = [
 9 |         (
10 |             "actions/checkout@v2",
11 |             False,
12 |             "actions/checkout",
13 |         ),
14 |         (
15 |             "github/codeql-action/analyze@v1",
16 |             False,
17 |             "github/codeql-action/analyze",
18 |         ),
19 |         (
20 |             "./.github/actions/action-setup",
21 |             True,
22 |             "./.github/actions/action-setup",
23 |         ),
24 |         (
25 |             "./.github/actions/build.yml",
26 |             True,
27 |             "./.github/actions/build.yml",
28 |         ),
29 |         (
30 |             "octo-org/this-repo/.github/workflows/workflow-1.yml@latest",
31 |             False,
32 |             "octo-org/this-repo/.github/workflows/workflow-1.yml",
33 |         ),
34 |         (
35 |             "docker://docker.io/library/golang:1.17.1-alpine@sha256:abcd",
36 |             False,
37 |             "docker://docker.io/library/golang:1.17.1-alpine",
38 |         ),
39 |     ]
40 | 
41 |     for test_case in test_cases:
42 |         uses_string_obj = dependency.UsesString.analyze(test_case[0])
43 |         assert (
44 |             uses_string_obj.is_relative == test_case[1]
45 |             and uses_string_obj.path == test_case[2]
46 |         )
47 | 


--------------------------------------------------------------------------------
/tests/unit/test_parsing_utils.py:
--------------------------------------------------------------------------------
 1 | from src.workflow_components.parsing_utils import (
 2 |     parse_workflow_trigger,
 3 |     parse_job_machine,
 4 | )
 5 | 
 6 | 
 7 | def test_parse_workflow_trigger():
 8 |     test_cases = [
 9 |         ("push", ["push"]),
10 |         (["push"], ["push"]),
11 |         (["push", "pull_request"], ["push", "pull_request"]),
12 |         (
13 |             {"push": {"branches": ["master"]}},
14 |             ["push"],
15 |         ),
16 |         (None, []),
17 |     ]
18 | 
19 |     for test_case in test_cases:
20 |         assert parse_workflow_trigger(test_case[0]) == test_case[1]
21 | 
22 | 
23 | def test_parse_job_machine():
24 |     test_cases = [
25 |         ("ubuntu-latest", ["ubuntu-latest"]),
26 |         (
27 |             {"labels": ["ubuntu-latest", "self-hosted"]},
28 |             ["ubuntu-latest", "self-hosted"],
29 |         ),
30 |         (["ubuntu-latest", "self-hosted"], ["ubuntu-latest", "self-hosted"]),
31 |         (None, None),
32 |     ]
33 | 
34 |     for test_case in test_cases:
35 |         assert parse_job_machine(test_case[0]) == test_case[1]
36 | 


--------------------------------------------------------------------------------
/tests/unit/test_report.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from src.config.config import LAST_QUERY_ID, QUERIES_PATH_DEFAULT
 3 | from yaml import safe_load
 4 | 
 5 | query_dir = Path(__file__).parent.parent.parent / QUERIES_PATH_DEFAULT
 6 | 
 7 | RQ_PREFIX = "RQ-"
 8 | 
 9 | 
10 | def test_report():
11 |     assert query_dir.exists(), f"Directory {query_dir} doesn't exist"
12 |     query_files = list(query_dir.glob("query*.yml"))
13 |     assert (
14 |         len(query_files) > 0
15 |     ), f"Directory {query_dir} doesn't contain any query*.yml files"
16 | 
17 |     # get query ids from files in query_dir
18 |     query_ids = []
19 |     for query_file in query_files:
20 |         with open(query_file, "r") as query:
21 |             parsed_query = safe_load(query)
22 |             if not parsed_query:
23 |                 raise ValueError(f"{query_file} is not a valid query file")
24 | 
25 |             query_id = parsed_query.get("id")
26 |             try:
27 |                 int(query_id.split(RQ_PREFIX)[1])
28 |             except ValueError:
29 |                 raise ValueError(f"Query {query_file} has invalid id")
30 | 
31 |             query_info = parsed_query.get("info")
32 | 
33 |             assert parsed_query["query"], f"Query in {query_file} is empty"
34 |             assert query_info["name"], f"Query in {query_file} has no name"
35 |             assert query_info["severity"], f"Query in {query_file} has no severity"
36 |             assert query_info[
37 |                 "description"
38 |             ], f"Query in {query_file} has no description"
39 |             assert query_info["tags"], f"Query in {query_file} has no tags"
40 | 
41 |             query_ids.append(parsed_query.get("id"))
42 | 
43 |     try:
44 |         max_id_num = max([int(query_id.split(RQ_PREFIX)[1]) for query_id in query_ids])
45 |     except ValueError:
46 |         raise ValueError(f"Added query has invalid id")
47 | 
48 |     # sequence
49 |     assert set(query_ids) == set(
50 |         [f"RQ-{num}" for num in range(1, max_id_num + 1)]
51 |     ), f"Query ids in {query_dir} are not continuous from 1 to {max_id_num}: {query_ids}"
52 | 
53 |     # last id in files == config.LAST_QUERY_ID
54 |     assert (
55 |         LAST_QUERY_ID == max_id_num
56 |     ), f"LAST_QUERY_ID in config ({LAST_QUERY_ID}) != max id in query files ({max_id_num})"
57 | 


--------------------------------------------------------------------------------
/tests/unit/test_utils.py:
--------------------------------------------------------------------------------
 1 | from tests.utils import load_test_config
 2 | import src.common.utils as utils
 3 | 
 4 | load_test_config()
 5 | 
 6 | 
 7 | def test_get_dependencies_in_code():
 8 |     test_cases = [
 9 |         ("this is ${{github.event.issue.title}}", "github.event.issue.title"),
10 |         ("this is ${{    github.event.issue.title}}", "github.event.issue.title"),
11 |         ("this is ${{github.event.issue-title}}", "github.event.issue-title"),
12 |         ("this is ${{github.event.issue_title}}", "github.event.issue_title"),
13 |         ("this is\n\n${{github.event.issue.title}}\n", "github.event.issue.title"),
14 |     ]
15 | 
16 |     for test_case in test_cases:
17 |         assert utils.get_dependencies_in_code(test_case[0]) == [test_case[1]]
18 | 
19 | 
20 | def test_convert_dict_to_list():
21 |     test_cases = [
22 |         ({"a": "b"}, ["a:b"]),
23 |         ({"a": "b", "c": "d"}, ["a:b", "c:d"]),
24 |         ("a:b", ["a:b"]),
25 |     ]
26 | 
27 |     for test_case in test_cases:
28 |         assert utils.convert_dict_to_list(test_case[0]) == test_case[1]
29 | 
30 | 
31 | def test_get_repo_full_name_from_path():
32 |     assert (
33 |         utils.get_repo_name_from_path(
34 |             "edgedb/edgedb-pkg/integration/linux/test/ubuntu-jammy/action.yml"
35 |         )
36 |         == "edgedb/edgedb-pkg"
37 |     )
38 |     assert (
39 |         utils.get_repo_name_from_path(
40 |             "slsa-framework/slsa-github-generator/.github/workflows/builder_go_slsa3.yml"
41 |         )
42 |         == "slsa-framework/slsa-github-generator"
43 |     )
44 | 
45 | 
46 | def test_find_uses_strings():
47 |     test_cases = [
48 |         (" uses: actions/checkout@v2", ["actions/checkout@v2"]),
49 |         (" uses: actions/checkout@abcd", ["actions/checkout@abcd"]),
50 |         (" uses: actions/checkout@side-branch", ["actions/checkout@side-branch"]),
51 |         (
52 |             " uses: .github/workflows/my-workflow.yml@main",
53 |             [".github/workflows/my-workflow.yml@main"],
54 |         ),
55 |         (
56 |             " uses: actions/checkout@v2\n uses: actions/checkout@v1",
57 |             ["actions/checkout@v2", "actions/checkout@v1"],
58 |         ),
59 |     ]
60 | 
61 |     for test_case in test_cases:
62 |         assert utils.find_uses_strings(test_case[0]) == test_case[1]
63 | 


--------------------------------------------------------------------------------
/tests/unit/test_workflow.py:
--------------------------------------------------------------------------------
  1 | import src.workflow_components.workflow as workflow
  2 | from tests.utils import load_test_config, assert_reusable_workflow_inputs
  3 | 
  4 | load_test_config()
  5 | 
  6 | 
  7 | def test_job_from_dict_steps():
  8 |     job_d = {
  9 |         "name": "issue-commented",
 10 |         "runs-on": "ubuntu-latest",
 11 |         "steps": [
 12 |             {
 13 |                 "name": "Generate GitHub App token",
 14 |                 "uses": "electron/github-app-auth-action@cc6751b3b5e4edc5b9a4ad0a021ac455653b6dc8",
 15 |                 "id": "generate-token",
 16 |                 "with": {"creds": "${{ secrets.ISSUE_TRIAGE_GH_APP_CREDS }}"},
 17 |             },
 18 |         ],
 19 |         "_id": "6347a06af34cc01c884c110fd9db8964",
 20 |         "path": "electron/electron/.github/workflows/issue-commented.yml",
 21 |         "url": "https://github.com/CycodeLabs/Raven/pull/1",
 22 |     }
 23 | 
 24 |     job = workflow.Job.from_dict(job_d)
 25 | 
 26 |     assert job._id == job_d["_id"]
 27 |     assert job.name == job_d["name"]
 28 |     assert job.path == job_d["path"]
 29 |     assert job.machine == [job_d["runs-on"]]
 30 |     assert job.uses is None
 31 |     assert job.ref is None
 32 |     assert job.with_prop is None
 33 |     assert job.url == job_d["url"]
 34 |     assert len(job.steps) == 1
 35 |     assert len(job.reusable_workflow) == 0
 36 | 
 37 | 
 38 | def test_workflow_from_dict():
 39 |     workflow_d = {
 40 |         "name": "Release notes",
 41 |         "on": {"push": {"branches": ["main"]}, "workflow_dispatch": None},
 42 |         "permissions": {"contents": "read"},
 43 |         "jobs": {
 44 |             "update_release_draft": {
 45 |                 "permissions": {"contents": "write", "pull-requests": "write"},
 46 |                 "runs-on": "ubuntu-latest",
 47 |                 "if": "github.repository == 'twbs/bootstrap'",
 48 |                 "steps": [
 49 |                     {
 50 |                         "uses": "release-drafter/release-drafter@v5",
 51 |                         "env": {"GITHUB_TOKEN": "${{ secrets.GITHUB_TOKEN }}"},
 52 |                     }
 53 |                 ],
 54 |             }
 55 |         },
 56 |         "path": "twbs/bootstrap/.github/workflows/release-notes.yml",
 57 |         "url": "https://github.com/CycodeLabs/Raven/pull/1",
 58 |         "is_public": True,
 59 |     }
 60 | 
 61 |     wf = workflow.Workflow.from_dict(workflow_d)
 62 | 
 63 |     assert wf.name == workflow_d["name"]
 64 |     assert wf.path == workflow_d["path"]
 65 |     assert wf.trigger == ["push", "workflow_dispatch"]
 66 |     assert wf.permissions == ["contents:read"]
 67 |     assert wf.url == workflow_d["url"]
 68 |     assert len(wf.jobs) == 1
 69 | 
 70 | 
 71 | def test_job_from_dict_uses():
 72 |     job_d = {
 73 |         "name": "test-firefox-safari",
 74 |         "uses": "./.github/workflows/build_reusable.yml",
 75 |         "with": {
 76 |             "skipForDocsOnly": "yes",
 77 |         },
 78 |         "secrets": "inherit",
 79 |         "_id": "f796b4c01ecb6021e6a30ec7466ab11a",
 80 |         "path": "vercel/next.js/.github/workflows/build_and_test.yml",
 81 |         "url": "https://github.com/CycodeLabs/Raven/pull/1",
 82 |     }
 83 | 
 84 |     job = workflow.Job.from_dict(job_d)
 85 | 
 86 |     assert job._id == job_d["_id"]
 87 |     assert job.name == job_d["name"]
 88 |     assert job.path == job_d["path"]
 89 |     assert job.machine is None
 90 |     assert job.uses == job_d["uses"]
 91 |     assert job.ref is None
 92 |     assert job.url == job_d["url"]
 93 |     assert job.with_prop == ["skipForDocsOnly:yes"]
 94 |     assert len(job.steps) == 0
 95 | 
 96 | 
 97 | def test_step_from_dict_uses():
 98 |     step_d = {
 99 |         "name": "Generate GitHub App token",
100 |         "uses": "electron/github-app-auth-action@cc6751b3b5e4edc5b9a4ad0a021ac455653b6dc8",
101 |         "with": {"creds": "${{ secrets.ISSUE_TRIAGE_GH_APP_CREDS }}"},
102 |         "_id": "9a42f7bb6c8e5be00c1d36d54ac7bdb6",
103 |         "path": "electron/electron/.github/workflows/issue-commented.yml",
104 |         "url": "https://github.com/CycodeLabs/Raven/pull/1",
105 |     }
106 | 
107 |     step = workflow.Step.from_dict(step_d)
108 | 
109 |     assert step._id == step_d["_id"]
110 |     assert step.name == step_d["name"]
111 |     assert step.path == step_d["path"]
112 |     assert step.run is None
113 |     assert step.uses == step_d["uses"]
114 |     assert step.url == step_d["url"]
115 |     assert step.ref == "cc6751b3b5e4edc5b9a4ad0a021ac455653b6dc8"
116 |     assert step.with_prop == ["creds:${{ secrets.ISSUE_TRIAGE_GH_APP_CREDS }}"]
117 | 
118 | 
119 | def test_step_from_dict_run():
120 |     step_d = {
121 |         "name": "Autolabel based on affected areas",
122 |         "run": "echo ${{ github.event.issue.body }}",
123 |         "_id": "1386cfbaf5513e27c090 133287e01fe",
124 |         "path": "vercel/next.js/.github/workflows/issue_validator.yml",
125 |         "url": "https://github.com/CycodeLabs/Raven/pull/1",
126 |     }
127 | 
128 |     step = workflow.Step.from_dict(step_d)
129 | 
130 |     assert step._id == step_d["_id"]
131 |     assert step.name == step_d["name"]
132 |     assert step.path == step_d["path"]
133 |     assert step.uses is None
134 |     assert step.run == step_d["run"]
135 |     assert step.ref is None
136 |     assert step.url == step_d["url"]
137 |     assert step.with_prop is None
138 |     assert len(step.using_param) == 1
139 | 
140 | 
141 | def test_reusable_workflow_from_dict():
142 |     workflow_d = {
143 |         "name": "Release notes",
144 |         "on": {
145 |             "workflow_call": {
146 |                 "inputs": {
147 |                     "input_1": {
148 |                         "required": True,
149 |                         "default": "default_value_1",
150 |                         "description": "description_1",
151 |                     },
152 |                     "input_2": {
153 |                         "required": False,
154 |                         "default": "default_value_2",
155 |                         "description": "description_2",
156 |                     },
157 |                 }
158 |             }
159 |         },
160 |         "permissions": {"contents": "read"},
161 |         "jobs": {
162 |             "update_release_draft": {
163 |                 "permissions": {"contents": "write", "pull-requests": "write"},
164 |                 "runs-on": "ubuntu-latest",
165 |                 "if": "github.repository == 'twbs/bootstrap'",
166 |                 "steps": [
167 |                     {
168 |                         "uses": "release-drafter/release-drafter@v5",
169 |                         "env": {"GITHUB_TOKEN": "${{ secrets.GITHUB_TOKEN }}"},
170 |                     }
171 |                 ],
172 |             }
173 |         },
174 |         "path": "twbs/bootstrap/.github/workflows/release-notes.yml",
175 |         "url": "https://github.com/CycodeLabs/Raven/pull/1",
176 |         "is_public": True,
177 |     }
178 | 
179 |     wf = workflow.Workflow.from_dict(workflow_d)
180 | 
181 |     assert wf.name == workflow_d["name"]
182 |     assert wf.path == workflow_d["path"]
183 |     assert wf.trigger == ["workflow_call"]
184 |     assert wf.permissions == ["contents:read"]
185 |     assert wf.url == workflow_d["url"]
186 |     assert len(wf.jobs) == 1
187 | 
188 |     assert_reusable_workflow_inputs(wf, workflow_d)
189 | 


--------------------------------------------------------------------------------
/tests/utils.py:
--------------------------------------------------------------------------------
  1 | from py2neo.ogm import GraphObject
  2 | import json
  3 | from src.config.config import Config
  4 | from src.workflow_components.composite_action import CompositeAction
  5 | from src.workflow_components.workflow import Workflow
  6 | from typing import Tuple, List, Dict, Optional
  7 | from tests.integration.integration_consts import START_NODE_INDEX, DEST_NODE_INDEX
  8 | from src.common.utils import raw_str_to_bool
  9 | from hashlib import md5
 10 | 
 11 | 
 12 | class GraphDbMock(object):
 13 |     def __init__(self):
 14 |         pass
 15 | 
 16 |     def push_object(self, obj: GraphObject):
 17 |         pass
 18 | 
 19 |     def get_object(self, obj: GraphObject) -> Optional[GraphObject]:
 20 |         return None
 21 | 
 22 |     def get_or_create(self, obj: GraphObject) -> Tuple[GraphObject, bool]:
 23 |         return None, True
 24 | 
 25 | 
 26 | def load_test_config() -> None:
 27 |     Config.graph = GraphDbMock()
 28 | 
 29 | 
 30 | def get_nodes_as_dicts(node_type: str, paths: Optional[List[str]] = []) -> List[Dict]:
 31 |     """
 32 |     - node_type (str): The type of the node to filter by.
 33 |     - path (list, str, optional): List of all the paths to filter nodes by.
 34 | 
 35 |     Returns A list of nodes as dictionaries that match the given type and paths.
 36 |     """
 37 |     nodes = Config.graph.get_all_nodes(node_type)
 38 |     if paths:
 39 |         return [dict(node) for node in nodes if node.get("path") in paths]
 40 |     else:
 41 |         return [dict(node) for node in nodes]
 42 | 
 43 | 
 44 | def query_graph_for_nodes(query: str) -> List[Dict]:
 45 |     """
 46 |     Returns dictionary representations of the nodes returned by the query.
 47 |     """
 48 |     nodes_query = Config.graph.run_query(query)
 49 |     nodes = []
 50 |     for node in nodes_query:
 51 |         node_obj = node.values()[0]
 52 |         extracted_node = dict(node_obj)
 53 |         extracted_node["labels"] = list(node_obj._labels)
 54 |         nodes.append(extracted_node)
 55 |     return nodes
 56 | 
 57 | 
 58 | def query_graph_for_relationships(query: str) -> List[Dict]:
 59 |     """
 60 |     Returns dictionary representations of the relationships returned by the query.
 61 |     """
 62 |     relationships_query = Config.graph.run_query(query)
 63 |     relationships = []
 64 |     for rq in relationships_query:
 65 |         r_dict = {
 66 |             "start_node": rq[0].get("_id"),
 67 |             "type": rq[1].__class__.__name__,
 68 |             "end_node": rq[2].get("_id"),
 69 |         }
 70 |         relationships.append(r_dict)
 71 |     return relationships
 72 | 
 73 | 
 74 | def get_graph_structure(nodes_query: str, relationships_query: str) -> Dict:
 75 |     """
 76 |     Recieves a query for nodes and a query for relationships.
 77 |     Returns a dictionary representation of the graph structure.
 78 |     """
 79 |     nodes = query_graph_for_nodes(nodes_query)
 80 |     relationships = query_graph_for_relationships(relationships_query)
 81 |     return {"nodes": nodes, "relationships": relationships}
 82 | 
 83 | 
 84 | def get_sorted_lists_of_nodes_and_relationships(
 85 |     graph_structure: Dict,
 86 | ) -> Tuple[List, List]:
 87 |     """
 88 |     Recieves a graph structure and returns sorted lists of nodes and relationships.
 89 |     """
 90 |     nodes = graph_structure.get("nodes")
 91 |     relationships = graph_structure.get("relationships")
 92 | 
 93 |     nodes.sort(key=lambda x: x.get("_id"))
 94 |     relationships.sort(key=lambda x: (x.get("start_node"), x.get("end_node")))
 95 | 
 96 |     return nodes, relationships
 97 | 
 98 | 
 99 | def get_dicts_differences(dict1: Dict, dict2: Dict) -> Dict:
100 |     """
101 |     Recieves two dictionaries and returns the differences between them.
102 |     """
103 |     keys = set(dict1.keys()).union(set(dict2.keys()))
104 |     differences = {}
105 |     for key in keys:
106 |         if dict1.get(key) != dict2.get(key):
107 |             differences[key] = [dict1.get(key), dict2.get(key)]
108 | 
109 |     return differences
110 | 
111 | 
112 | def assert_graph_structures(graph_structure: Dict, snapshot_path: str) -> None:
113 |     """
114 |     Recieves a graph structure and a path to a json file containing a graph structure snapshot.
115 |     """
116 |     with open(snapshot_path, "r") as f:
117 |         snapshot_structure = json.load(f)
118 | 
119 |     snapshot_nodes, snapshot_relations = get_sorted_lists_of_nodes_and_relationships(
120 |         snapshot_structure
121 |     )
122 |     graph_nodes, graph_relations = get_sorted_lists_of_nodes_and_relationships(
123 |         graph_structure
124 |     )
125 | 
126 |     # Asserting nodes
127 |     for node in snapshot_nodes:
128 |         assert (
129 |             node == graph_nodes[snapshot_nodes.index(node)]
130 |         ), f"Properties of nodes on the same index is not equal\n{get_dicts_differences(node, graph_nodes[snapshot_nodes.index(node)])}\n\nIn snapshot:\n{node}\nIn graph:\n{graph_nodes[snapshot_nodes.index(node)]}"
131 | 
132 |     # Asserting relationships
133 |     for relationship in snapshot_relations:
134 |         assert (
135 |             relationship == graph_relations[snapshot_relations.index(relationship)]
136 |         ), f"Properties of relationships on the same index of graph and snapshot is not equal\n\n{get_dicts_differences(relationship, graph_relations[snapshot_relations.index(relationship)])}\nIn snapshot:\n{relationship}\nIn graph:\n{graph_relations[snapshot_relations.index(relationship)]}"
137 | 
138 | 
139 | def assert_action_inputs(ca: CompositeAction, ca_d: Dict):
140 |     """
141 |     This function asserts that the action inputs are equal to those in the JSON file.
142 |     Each composite action is connected to multiple action inputs.
143 |     Each input contains different properties such as name, default, description, and required.
144 | 
145 |     Using `ca.inputs.triples()`, we are iterating over all the inputs of the composite action.
146 |     For each input, we check the following:
147 |     1) The ID, name, and URL of the composite action are equal to the ID, name, and URL of the input.
148 |     2) The id of the composite action input is the md5 hash of the composite action id and the input name.
149 |     3) Check that the default, description, and required properties are equal to those in the JSON file.
150 | 
151 |     Each input is a tuple containing a source node (in this case, will always be the composite action identifier)
152 |     the relation type and the destination node (the input itself identifier).
153 |     """
154 |     for input in ca.composite_action_input.triples():
155 |         ca_d_input = ca_d["inputs"][input[DEST_NODE_INDEX].name]
156 | 
157 |         assert input[START_NODE_INDEX]._id == ca._id
158 |         assert input[DEST_NODE_INDEX].name == ca_d_input["name"]
159 |         assert input[DEST_NODE_INDEX].url == ca_d["url"]
160 |         assert (
161 |             input[DEST_NODE_INDEX]._id
162 |             == md5(f"{ca._id}_{ca_d_input.get('name')}".encode()).hexdigest()
163 |         )
164 | 
165 |         if "required" in ca_d_input:
166 |             assert input[DEST_NODE_INDEX].required == raw_str_to_bool(
167 |                 ca_d_input["required"]
168 |             )
169 | 
170 |         if "default" in ca_d_input:
171 |             assert input[DEST_NODE_INDEX].default == ca_d_input["default"]
172 | 
173 |         if "description" in ca_d_input:
174 |             assert input[DEST_NODE_INDEX].description == ca_d_input["description"]
175 | 
176 | 
177 | def assert_reusable_workflow_inputs(w: Workflow, workflow_d: Dict):
178 |     """
179 |     This function asserts that the reusable workflow inputs are equal to those in the JSON file.
180 |     Each reusable workflow is connected to multiple reusable workflow inputs.
181 |     Each input contains different properties such as name, default, description, and required.
182 | 
183 |     Using `w.reusable_workflow_input.triples()`, we are iterating over all the inputs of the reusable workflow.
184 |     For each input, we check the following:
185 |     1) The ID, name, and URL of the workflow are equal to the ID, name, and URL of the input.
186 |     2) The id of the reusable workflow input is the md5 hash of the workflow id and the input name.
187 |     3) Check that the default, description, and required properties are equal to those in the JSON file.
188 | 
189 |     Each input is a tuple containing a source node (in this case, will always be the reusable workflow)
190 |     the relation type and the destination node (the input itself identifier).
191 |     """
192 |     for input in w.reusable_workflow_input.triples():
193 |         workflow_d_input = workflow_d["on"]["workflow_call"]["inputs"][
194 |             input[DEST_NODE_INDEX].name
195 |         ]
196 | 
197 |         assert input[START_NODE_INDEX]._id == w._id
198 |         assert input[DEST_NODE_INDEX].name == workflow_d_input["name"]
199 |         assert input[DEST_NODE_INDEX].url == workflow_d["url"]
200 |         assert (
201 |             input[DEST_NODE_INDEX]._id
202 |             == md5(f"{w._id}_{workflow_d_input.get('name')}".encode()).hexdigest()
203 |         )
204 | 
205 |         if "required" in workflow_d_input:
206 |             assert input[DEST_NODE_INDEX].required == raw_str_to_bool(
207 |                 workflow_d_input["required"]
208 |             )
209 | 
210 |         if "default" in workflow_d_input:
211 |             assert input[DEST_NODE_INDEX].default == workflow_d_input["default"]
212 | 
213 |         if "description" in workflow_d_input:
214 |             assert input[DEST_NODE_INDEX].description == workflow_d_input["description"]
215 | 


--------------------------------------------------------------------------------