├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── dependabot.yml └── workflows │ ├── cd.yaml │ └── ci.yaml ├── .gitignore ├── .hooks ├── commit-msg.json ├── partials │ └── .gitkeep ├── pre-commit.json └── shared │ └── .gitkeep ├── .mookme.json ├── .mypy.ini ├── .pylintrc ├── .style.yapf ├── .vscode ├── extensions.json └── settings.json ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── README.md ├── commitlint.config.js ├── doc ├── banner.png ├── detector.drawio └── detector.jpg ├── docker-entrypoint.sh ├── graphinder ├── __init__.py ├── __main__.py ├── entities │ ├── __init__.py │ ├── errors.py │ ├── io.py │ ├── pool.py │ └── tasks.py ├── io │ ├── __init__.py │ ├── printers.py │ ├── providers.py │ ├── readers.py │ └── writers.py ├── main.py ├── pool │ ├── __init__.py │ ├── detectors.py │ ├── domain.py │ ├── extractors.py │ ├── routine.py │ └── tasks.py ├── py.typed └── utils │ ├── __init__.py │ ├── assets.py │ ├── filters.py │ ├── finders.py │ ├── logger.py │ └── webhook.py ├── install-dev.sh ├── package.json ├── poetry.lock ├── pyproject.toml └── tests ├── __init__.py └── unit ├── __init__.py ├── entities ├── test_errors.py ├── test_io.py ├── test_pool.py └── test_task.py ├── io ├── test_printers.py ├── test_providers.py ├── test_readers.py ├── test_readers.txt └── test_writers.py ├── pool ├── test_detectors.py ├── test_domain.py ├── test_extractors.py ├── test_routine.py └── test_tasks.py ├── test_main.py └── utils ├── test_assets.py ├── test_filters.py ├── test_finders.py ├── test_logger.py └── test_webhook.py /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Lines starting with '#' are comments. 2 | # Each line is a file pattern followed by one or more owners. 3 | 4 | # More details are here: https://help.github.com/articles/about-codeowners/ 5 | 6 | # The '*' pattern is global owners. 7 | 8 | # Order is important. The last matching pattern has the most precedence. 9 | # The folders are ordered as follows: 10 | 11 | # In each subsection folders are ordered first by depth, then alphabetically. 12 | # This should make it easy to add new rules without breaking existing ones. 13 | 14 | # Global rule: 15 | * @c3b5aw 16 | 17 | graphinder/ @iCarossio 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[BUG] " 5 | labels: bug 6 | assignees: c3b5aw 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Ran on '..' 16 | 2. Parameters '..' 17 | 18 | **Expected behavior** 19 | A clear and concise description of what you expected to happen. 20 | 21 | **Screenshots** 22 | If applicable, add screenshots to help explain your problem. 23 | 24 | **Desktop (please complete the following information):** 25 | - OS: [e.g. Ubuntu 20.04] 26 | - Python Version: [e.g. 3.10.1] 27 | - Graphinder version [e.g. 1.3.1] 28 | 29 | **Additional context** 30 | Add any other context about the problem here. 31 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement 6 | assignees: c3b5aw 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | # Maintain dependencies for GitHub Actions 9 | - package-ecosystem: "github-actions" 10 | directory: "/" 11 | schedule: 12 | interval: "weekly" 13 | 14 | # Maintain dependencies for npm 15 | - package-ecosystem: "pip" 16 | directory: "/" 17 | schedule: 18 | interval: "weekly" 19 | -------------------------------------------------------------------------------- /.github/workflows/cd.yaml: -------------------------------------------------------------------------------- 1 | name: CD 2 | 3 | on: 4 | push: 5 | tags: 6 | - "v*" 7 | 8 | jobs: 9 | release: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout 13 | uses: actions/checkout@v3 14 | - name: Release 15 | uses: softprops/action-gh-release@v1 16 | 17 | pypi-push: 18 | runs-on: ubuntu-latest 19 | steps: 20 | - uses: actions/checkout@v3 21 | - uses: actions/setup-python@v4 22 | with: 23 | python-version: '3.10' 24 | - name: Install dependencies 25 | run: | 26 | pip install poetry 27 | - name: Setup poetry 28 | run: | 29 | poetry config virtualenvs.in-project true 30 | poetry install --no-dev 31 | - name: Build package 32 | run: | 33 | source .venv/bin/activate 34 | poetry build 35 | - name: Upload package 36 | uses: pypa/gh-action-pypi-publish@release/v1 37 | with: 38 | user: __token__ 39 | password: ${{ secrets.PYPI_TOKEN }} 40 | 41 | docker-push: 42 | runs-on: ubuntu-latest 43 | steps: 44 | - uses: actions/checkout@v3 45 | - name: Set up QEMU 46 | uses: docker/setup-qemu-action@v2 47 | - name: Set up Docker Buildx 48 | uses: docker/setup-buildx-action@v2 49 | - name: Login to DockerHub 50 | if: github.event_name != 'pull_request' 51 | uses: docker/login-action@v2 52 | with: 53 | username: ${{ secrets.DOCKERHUB_USERNAME }} 54 | password: ${{ secrets.DOCKERHUB_TOKEN }} 55 | logout: true 56 | - name: Docker meta 57 | id: meta 58 | uses: docker/metadata-action@v4 59 | with: 60 | images: ${{ secrets.DOCKERHUB_REPO}}/graphinder 61 | - name: Build and push 62 | uses: docker/build-push-action@v4 63 | with: 64 | push: ${{ github.event_name != 'pull_request' }} 65 | context: . 66 | platforms: linux/amd64,linux/arm64 67 | tags: ${{ steps.meta.outputs.tags }} 68 | labels: ${{ steps.meta.outputs.labels }} 69 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | push: 4 | pull_request: 5 | types: [opened, reopened] 6 | 7 | env: 8 | MODULE_NAME: graphinder 9 | MIN_TEST_COV: 80 10 | 11 | jobs: 12 | CI: 13 | runs-on: ubuntu-20.04 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | os: [ubuntu-latest] 18 | python-version: ['3.10'] 19 | steps: 20 | - uses: actions/checkout@v3 21 | - uses: actions/setup-python@v4 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | - name: Install dependencies 25 | run: | 26 | pip install poetry 27 | - name: Setup poetry 28 | run: | 29 | poetry config virtualenvs.in-project true 30 | poetry install 31 | - name: Run lint 32 | if: always() 33 | run: | 34 | source .venv/bin/activate 35 | isort -m 9 --line-length 160 $MODULE_NAME tests --check-only 36 | pylint --load-plugins pylint_quotes $MODULE_NAME tests 37 | docformatter --wrap-summaries 160 --wrap-descriptions 160 -cr $MODULE_NAME tests 38 | yapf -rd $MODULE_NAME tests 39 | mypy -V 40 | mypy $MODULE_NAME tests 41 | - name: Run tests 42 | run: | 43 | source .venv/bin/activate 44 | pytest --reruns=3 --cov=$MODULE_NAME --cov-report=xml --cov-fail-under=$MIN_TEST_COV tests/unit 45 | - name: Upload coverage to Codecov 46 | uses: codecov/codecov-action@v3 47 | with: 48 | token: ${{ secrets.CODECOV_TOKEN }} 49 | files: ./coverage.xml 50 | flags: unittests 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ############################ 2 | ######### General ########## 3 | ############################ 4 | 5 | # macOS 6 | .DS_Store 7 | .AppleDouble 8 | .LSOverride 9 | .DocumentRevisions-V100 10 | .fseventsd 11 | .Spotlight-V100 12 | .TemporaryItems 13 | .Trashes 14 | .VolumeIcon.icns 15 | .com.apple.timemachine.donotpresent 16 | .AppleDB 17 | .AppleDesktop 18 | Network Trash Folder 19 | Temporary Items 20 | .apdisk 21 | ._* 22 | Icon 23 | 24 | # Linux 25 | *~ 26 | 27 | # Windows 28 | Thumbs.db 29 | ehthumbs.db 30 | Desktop.ini 31 | $RECYCLE.BIN/ 32 | *.cab 33 | *.msi 34 | *.msm 35 | *.msp 36 | 37 | # Packages 38 | *.7z 39 | *.csv 40 | *.dat 41 | *.dmg 42 | *.gz 43 | *.iso 44 | *.jar 45 | *.rar 46 | *.tar 47 | *.zip 48 | *.com 49 | *.class 50 | *.dll 51 | *.exe 52 | *.o 53 | *.seed 54 | *.so 55 | *.swo 56 | *.swp 57 | *.swn 58 | *.swm 59 | *.out 60 | *.pid 61 | *.prof 62 | 63 | # Editors 64 | .idea 65 | *.suo 66 | *.ntvs* 67 | *.njsproj 68 | *.sln 69 | 70 | 71 | ############################ 72 | ######### Specific ######### 73 | ############################ 74 | 75 | # Python 76 | __pycache__/ 77 | *.egg-info/ 78 | .ipynb_checkpoints 79 | *.pyc 80 | *.pyo 81 | *.ipynb 82 | *.env 83 | .mypy_cache 84 | .pytest_cache 85 | .coverage 86 | .venv 87 | build/ 88 | dist/ 89 | setup.py 90 | *.spec 91 | snap 92 | coverage.xml 93 | 94 | # Logs 95 | logs 96 | *.log* 97 | storage 98 | 99 | # Mookme 100 | node_modules 101 | package-lock.json 102 | **/.hooks/*.local.json 103 | 104 | # Escape 105 | .escaperc.json 106 | _debug/ 107 | security-report.pdf 108 | .tmp 109 | 110 | # Assets 111 | 112 | subfinder 113 | results.json 114 | -------------------------------------------------------------------------------- /.hooks/commit-msg.json: -------------------------------------------------------------------------------- 1 | { 2 | "steps": [{ 3 | "name": "commit lint", 4 | "command": "cat {args} | ./node_modules/@commitlint/cli/cli.js" 5 | }] 6 | } -------------------------------------------------------------------------------- /.hooks/partials/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Escape-Technologies/graphinder/3478723764e111296138be4c7a91a39787116834/.hooks/partials/.gitkeep -------------------------------------------------------------------------------- /.hooks/pre-commit.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "python", 3 | "venvActivate": "./.venv/bin/activate", 4 | "steps": [ 5 | { 6 | "name": "autoflake", 7 | "command": "autoflake -ri --remove-unused-variable --ignore-init-module-imports --remove-all-unused-imports graphinder tests" 8 | }, 9 | { 10 | "name": "isort", 11 | "command": "isort -m 9 --line-length 160 graphinder tests" 12 | }, 13 | { 14 | "name": "unify", 15 | "command": "unify -ri graphinder tests" 16 | }, 17 | { 18 | "name": "docformatter", 19 | "command": "docformatter --wrap-summaries 160 --wrap-descriptions 160 -ri graphinder tests" 20 | }, 21 | { 22 | "name": "yapf", 23 | "command": "yapf -ri graphinder tests" 24 | }, 25 | { 26 | "name": "pylint", 27 | "command": "pylint --load-plugins pylint_quotes graphinder tests" 28 | }, 29 | { 30 | "name": "mypy", 31 | "command": "mypy graphinder tests" 32 | }, 33 | { 34 | "name": "pytest", 35 | "command": "pytest --reruns=3 tests/unit" 36 | } 37 | ] 38 | } 39 | -------------------------------------------------------------------------------- /.hooks/shared/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Escape-Technologies/graphinder/3478723764e111296138be4c7a91a39787116834/.hooks/shared/.gitkeep -------------------------------------------------------------------------------- /.mookme.json: -------------------------------------------------------------------------------- 1 | { 2 | "packagesPath": ".", 3 | "packages": [], 4 | "addedBehavior": "addAndCommit" 5 | } -------------------------------------------------------------------------------- /.mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | python_version = 3.7 3 | disallow_untyped_defs=True 4 | disallow_untyped_calls=True 5 | disallow_incomplete_defs=True 6 | disallow_untyped_decorators=True 7 | strict_equality=True 8 | show_error_codes=True 9 | warn_unreachable=True 10 | warn_redundant_casts=True 11 | warn_unused_ignores=True 12 | warn_unused_configs=True 13 | pretty=True 14 | exclude=(build|dist|setup.py|.venv) 15 | disable_error_code = attr-defined 16 | 17 | [mypy-pydash.*] 18 | ignore_missing_imports = True 19 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | init-hook="from pylint.config import find_pylintrc; import os, sys; sys.path.append(os.path.dirname(find_pylintrc()))" 3 | ignore=.venv,setup.py 4 | 5 | [DESIGN] 6 | 7 | # Maximum number of characters on a single line. 8 | max-line-length=160 9 | # Good variable names which should always be accepted, separated by a comma 10 | good-names=i,e 11 | # Maximum number of branch for function / method body 12 | max-branches=15 13 | # Maximum number of arguments for function / method 14 | max-args=10 15 | # Variable naming style 16 | variable-rgx=([a-z_][a-z0-9_]{1,30}|[a-zA-Z0-9_]{1})$ 17 | # Argument naming style 18 | argument-rgx=([a-z_][a-z0-9_]{1,30}|[a-zA-Z0-9_]{1})$ 19 | # Attribute naming style 20 | attr-rgx=([a-z_][a-z0-9_]{1,30}|[a-zA-Z0-9_]{1})$ 21 | # Maximum number of public methods per class 22 | max-public-methods=30 23 | # Maximum number of locals for function / method body 24 | max-locals=20 25 | # Make docstring compulsory for all functions 26 | no-docstring-rgx=$^ 27 | 28 | # MODULE-PARAM: pylint-quotes 29 | string-quote=single 30 | triple-quote=double 31 | docstring-quote=double 32 | 33 | [ELIF] 34 | 35 | # Maximum number of nested blocks for function/method body 36 | max-nested-blocks=8 37 | 38 | 39 | [SIMILARITIES] 40 | 41 | # Minimum lines number of a similarity. 42 | min-similarity-lines=15 43 | # Ignore comments when computing similarities. 44 | ignore-comments=yes 45 | # Ignore docstrings when computing similarities. 46 | ignore-docstrings=yes 47 | # Ignore imports when computing similarities. 48 | ignore-imports=yes 49 | 50 | 51 | [MESSAGES CONTROL] 52 | 53 | #TODO: DEV - Fix these one day: broad-except, fixme 54 | disable=logging-format-interpolation, logging-fstring-interpolation, broad-except, fixme 55 | extension-pkg-whitelist=lxml 56 | -------------------------------------------------------------------------------- /.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | ################################################## 3 | ################################################## 4 | ################### ESCAPE KOBS ################## 5 | ################################################## 6 | ################################################## 7 | 8 | # The column limit. 9 | column_limit=160 10 | 11 | # Split before the closing bracket if a list or dict literal doesn't fit on a single line. 12 | split_before_closing_bracket=False 13 | 14 | # Split before a dictionary or set generator (comp_for). For example, note 15 | # the split before the 'for': 16 | # 17 | # foo = { 18 | # variable: 'Hello world, have a nice day!' 19 | # for variable in bar if variable != 42 20 | # } 21 | split_before_dict_set_generator=False 22 | 23 | # Split named assignments onto individual lines. 24 | split_before_named_assigns=False 25 | 26 | # Allow splitting before a default / named assignment in an argument list. 27 | allow_split_before_default_or_named_assigns=False 28 | 29 | # Allow splits before the dictionary value. 30 | allow_split_before_dict_value=True 31 | 32 | # Let spacing indicate operator precedence. For example: 33 | # 34 | # a = 1 * 2 + 3 / 4 35 | # b = 1 / 2 - 3 * 4 36 | # c = (1 + 2) * (3 - 4) 37 | # d = (1 - 2) / (3 + 4) 38 | # e = 1 * 2 - 3 39 | # f = 1 + 2 + 3 + 4 40 | # 41 | # will be formatted as follows to indicate precedence: 42 | # 43 | # a = 1*2 + 3/4 44 | # b = 1/2 - 3*4 45 | # c = (1+2) * (3-4) 46 | # d = (1-2) / (3+4) 47 | # e = 1*2 - 3 48 | # f = 1 + 2 + 3 + 4 49 | # 50 | arithmetic_precedence_indication=True 51 | 52 | # Insert a blank line before a class-level docstring. 53 | blank_line_before_class_docstring=True 54 | 55 | # Insert a blank line before a module docstring. 56 | blank_line_before_module_docstring=True 57 | 58 | # Insert a blank line before a 'def' or 'class' immediately nested 59 | # within another 'def' or 'class'. For example: 60 | # 61 | # class Foo: 62 | # # <------ this blank line 63 | # def method(): 64 | # ... 65 | blank_line_before_nested_class_or_def=True 66 | 67 | # Do not split consecutive brackets. Only relevant when 68 | # dedent_closing_brackets is set. For example: 69 | # 70 | # call_func_that_takes_a_dict( 71 | # { 72 | # 'key1': 'value1', 73 | # 'key2': 'value2', 74 | # } 75 | # ) 76 | # 77 | # would reformat to: 78 | # 79 | # call_func_that_takes_a_dict({ 80 | # 'key1': 'value1', 81 | # 'key2': 'value2', 82 | # }) 83 | coalesce_brackets=True 84 | 85 | # Indent the dictionary value if it cannot fit on the same line as the 86 | # dictionary key. For example: 87 | # 88 | # config = { 89 | # 'key1': 90 | # 'value1', 91 | # 'key2': value1 + 92 | # value2, 93 | # } 94 | indent_dictionary_value=True 95 | 96 | # Split before arguments 97 | split_all_comma_separated_values=False 98 | 99 | # Set to True to prefer splitting before '+', '-', '*', '/', '//', or '@' 100 | # rather than after. 101 | split_before_arithmetic_operator=True 102 | 103 | # Split before the '.' if we need to split a longer expression: 104 | # 105 | # foo = ('This is a really long string: {}, {}, {}, {}'.format(a, b, c, d)) 106 | # 107 | # would reformat to something like: 108 | # 109 | # foo = ('This is a really long string: {}, {}, {}, {}' 110 | # .format(a, b, c, d)) 111 | split_before_dot=True 112 | 113 | 114 | ################################################## 115 | ################################################## 116 | ################## DEFAULT KNOBS ################# 117 | ################################################## 118 | ################################################## 119 | 120 | # Align closing bracket with visual indentation. 121 | align_closing_bracket_with_visual_indent=True 122 | 123 | # Allow dictionary keys to exist on multiple lines. For example: 124 | # 125 | # x = { 126 | # ('this is the first element of a tuple', 127 | # 'this is the second element of a tuple'): 128 | # value, 129 | # } 130 | allow_multiline_dictionary_keys=False 131 | 132 | # Allow lambdas to be formatted on more than one line. 133 | allow_multiline_lambdas=False 134 | 135 | # Number of blank lines surrounding top-level function and class 136 | # definitions. 137 | blank_lines_around_top_level_definition=2 138 | 139 | # The style for continuation alignment. Possible values are: 140 | # 141 | # - SPACE: Use spaces for continuation alignment. This is default behavior. 142 | # - FIXED: Use fixed number (CONTINUATION_INDENT_WIDTH) of columns 143 | # (ie: CONTINUATION_INDENT_WIDTH/INDENT_WIDTH tabs or 144 | # CONTINUATION_INDENT_WIDTH spaces) for continuation alignment. 145 | # - VALIGN-RIGHT: Vertically align continuation lines to multiple of 146 | # INDENT_WIDTH columns. Slightly right (one tab or a few spaces) if 147 | # cannot vertically align continuation lines with indent characters. 148 | continuation_align_style=SPACE 149 | 150 | # Indent width used for line continuations. 151 | continuation_indent_width=4 152 | 153 | # Put closing brackets on a separate line, dedented, if the bracketed 154 | # expression can't fit in a single line. Applies to all kinds of brackets, 155 | # including function definitions and calls. For example: 156 | # 157 | # config = { 158 | # 'key1': 'value1', 159 | # 'key2': 'value2', 160 | # } # <--- this bracket is dedented and on a separate line 161 | # 162 | # time_series = self.remote_client.query_entity_counters( 163 | # entity='dev3246.region1', 164 | # key='dns.query_latency_tcp', 165 | # transform=Transformation.AVERAGE(window=timedelta(seconds=60)), 166 | # start_ts=now()-timedelta(days=3), 167 | # end_ts=now(), 168 | # ) # <--- this bracket is dedented and on a separate line 169 | dedent_closing_brackets=True 170 | 171 | # Disable the heuristic which places each list element on a separate line 172 | # if the list is comma-terminated. 173 | disable_ending_comma_heuristic=False 174 | 175 | # Place each dictionary entry onto its own line. 176 | each_dict_entry_on_separate_line=True 177 | 178 | # Require multiline dictionary even if it would normally fit on one line. 179 | # For example: 180 | # 181 | # config = { 182 | # 'key1': 'value1' 183 | # } 184 | force_multiline_dict=False 185 | 186 | # The regex for an i18n comment. The presence of this comment stops 187 | # reformatting of that line, because the comments are required to be 188 | # next to the string they translate. 189 | i18n_comment= 190 | 191 | # The i18n function call names. The presence of this function stops 192 | # reformattting on that line, because the string it has cannot be moved 193 | # away from the i18n comment. 194 | i18n_function_call= 195 | 196 | # Indent blank lines. 197 | indent_blank_lines=False 198 | 199 | # Put closing brackets on a separate line, indented, if the bracketed 200 | # expression can't fit in a single line. Applies to all kinds of brackets, 201 | # including function definitions and calls. For example: 202 | # 203 | # config = { 204 | # 'key1': 'value1', 205 | # 'key2': 'value2', 206 | # } # <--- this bracket is indented and on a separate line 207 | # 208 | # time_series = self.remote_client.query_entity_counters( 209 | # entity='dev3246.region1', 210 | # key='dns.query_latency_tcp', 211 | # transform=Transformation.AVERAGE(window=timedelta(seconds=60)), 212 | # start_ts=now()-timedelta(days=3), 213 | # end_ts=now(), 214 | # ) # <--- this bracket is indented and on a separate line 215 | indent_closing_brackets=False 216 | 217 | 218 | 219 | # The number of columns to use for indentation. 220 | indent_width=4 221 | 222 | # Join short lines into one line. E.g., single line 'if' statements. 223 | join_multiple_lines=True 224 | 225 | # Do not include spaces around selected binary operators. For example: 226 | # 227 | # 1 + 2 * 3 - 4 / 5 228 | # 229 | # will be formatted as follows when configured with "*,/": 230 | # 231 | # 1 + 2*3 - 4/5 232 | no_spaces_around_selected_binary_operators= 233 | 234 | # Use spaces around default or named assigns. 235 | spaces_around_default_or_named_assign=False 236 | 237 | # Adds a space after the opening '{' and before the ending '}' dict delimiters. 238 | # 239 | # {1: 2} 240 | # 241 | # will be formatted as: 242 | # 243 | # { 1: 2 } 244 | spaces_around_dict_delimiters=False 245 | 246 | # Adds a space after the opening '[' and before the ending ']' list delimiters. 247 | # 248 | # [1, 2] 249 | # 250 | # will be formatted as: 251 | # 252 | # [ 1, 2 ] 253 | spaces_around_list_delimiters=False 254 | 255 | # Use spaces around the power operator. 256 | spaces_around_power_operator=False 257 | 258 | # Use spaces around the subscript / slice operator. For example: 259 | # 260 | # my_list[1 : 10 : 2] 261 | spaces_around_subscript_colon=False 262 | 263 | # Adds a space after the opening '(' and before the ending ')' tuple delimiters. 264 | # 265 | # (1, 2, 3) 266 | # 267 | # will be formatted as: 268 | # 269 | # ( 1, 2, 3 ) 270 | spaces_around_tuple_delimiters=False 271 | 272 | # The number of spaces required before a trailing comment. 273 | # This can be a single value (representing the number of spaces 274 | # before each trailing comment) or list of values (representing 275 | # alignment column values; trailing comments within a block will 276 | # be aligned to the first column value that is greater than the maximum 277 | # line length within the block). For example: 278 | # 279 | # With spaces_before_comment=5: 280 | # 281 | # 1 + 1 # Adding values 282 | # 283 | # will be formatted as: 284 | # 285 | # 1 + 1 # Adding values <-- 5 spaces between the end of the statement and comment 286 | # 287 | # With spaces_before_comment=15, 20: 288 | # 289 | # 1 + 1 # Adding values 290 | # two + two # More adding 291 | # 292 | # longer_statement # This is a longer statement 293 | # short # This is a shorter statement 294 | # 295 | # a_very_long_statement_that_extends_beyond_the_final_column # Comment 296 | # short # This is a shorter statement 297 | # 298 | # will be formatted as: 299 | # 300 | # 1 + 1 # Adding values <-- end of line comments in block aligned to col 15 301 | # two + two # More adding 302 | # 303 | # longer_statement # This is a longer statement <-- end of line comments in block aligned to col 20 304 | # short # This is a shorter statement 305 | # 306 | # a_very_long_statement_that_extends_beyond_the_final_column # Comment <-- the end of line comments are aligned based on the line length 307 | # short # This is a shorter statement 308 | # 309 | spaces_before_comment=2 310 | 311 | # Insert a space between the ending comma and closing bracket of a list, 312 | # etc. 313 | space_between_ending_comma_and_closing_bracket=True 314 | 315 | # Use spaces inside brackets, braces, and parentheses. For example: 316 | # 317 | # method_call( 1 ) 318 | # my_dict[ 3 ][ 1 ][ get_index( *args, **kwargs ) ] 319 | # my_set = { 1, 2, 3 } 320 | space_inside_brackets=False 321 | 322 | # Split before arguments, but do not split all subexpressions recursively 323 | # (unless needed). 324 | split_all_top_level_comma_separated_values=False 325 | 326 | 327 | # Set to True to prefer splitting before '&', '|' or '^' rather than 328 | # after. 329 | split_before_bitwise_operator=True 330 | 331 | # Split after the opening paren which surrounds an expression if it doesn't 332 | # fit on a single line. 333 | split_before_expression_after_opening_paren=False 334 | 335 | # If an argument / parameter list is going to be split, then split before 336 | # the first argument. 337 | split_before_first_argument=False 338 | 339 | # Set to True to prefer splitting before 'and' or 'or' rather than 340 | # after. 341 | split_before_logical_operator=True 342 | 343 | # Set to True to split list comprehensions and generators that have 344 | # non-trivial expressions and multiple clauses before each of these 345 | # clauses. For example: 346 | # 347 | # result = [ 348 | # a_long_var + 100 for a_long_var in xrange(1000) 349 | # if a_long_var % 10] 350 | # 351 | # would reformat to something like: 352 | # 353 | # result = [ 354 | # a_long_var + 100 355 | # for a_long_var in xrange(1000) 356 | # if a_long_var % 10] 357 | split_complex_comprehension=False 358 | 359 | # The penalty for splitting right after the opening bracket. 360 | split_penalty_after_opening_bracket=300 361 | 362 | # The penalty for splitting the line after a unary operator. 363 | split_penalty_after_unary_operator=10000 364 | 365 | # The penalty of splitting the line around the '+', '-', '*', '/', '//', 366 | # ``%``, and '@' operators. 367 | split_penalty_arithmetic_operator=300 368 | 369 | # The penalty for splitting right before an if expression. 370 | split_penalty_before_if_expr=0 371 | 372 | # The penalty of splitting the line around the '&', '|', and '^' 373 | # operators. 374 | split_penalty_bitwise_operator=300 375 | 376 | # The penalty for splitting a list comprehension or generator 377 | # expression. 378 | split_penalty_comprehension=80 379 | 380 | # The penalty for characters over the column limit. 381 | split_penalty_excess_character=7000 382 | 383 | # The penalty incurred by adding a line split to the unwrapped line. The 384 | # more line splits added the higher the penalty. 385 | split_penalty_for_added_line_split=30 386 | 387 | # The penalty of splitting a list of "import as" names. For example: 388 | # 389 | # from a_very_long_or_indented_module_name_yada_yad import (long_argument_1, 390 | # long_argument_2, 391 | # long_argument_3) 392 | # 393 | # would reformat to something like: 394 | # 395 | # from a_very_long_or_indented_module_name_yada_yad import ( 396 | # long_argument_1, long_argument_2, long_argument_3) 397 | split_penalty_import_names=0 398 | 399 | # The penalty of splitting the line around the 'and' and 'or' 400 | # operators. 401 | split_penalty_logical_operator=300 402 | 403 | # Use the Tab character for indentation. 404 | use_tabs=False 405 | -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": [ 3 | "britesnow.vscode-toggle-quotes", 4 | "eamodio.gitlens", 5 | "davidanson.vscode-markdownlint", 6 | "redhat.vscode-yaml", 7 | "ms-python.vscode-pylance", 8 | "bungcip.better-toml", 9 | "emeraldwalk.runonsave", 10 | "matangover.mypy", 11 | "njqdev.vscode-python-typehint", 12 | "dongli.python-preview", 13 | "frhtylcn.pythonsnippets", 14 | "kevinrose.vsc-python-indent", 15 | "LittleFoxTeam.vscode-python-test-adapter", 16 | "ryanluker.vscode-coverage-gutters" 17 | ] 18 | } -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.languageServer": "Pylance", 3 | "python.linting.pylintEnabled": true, 4 | "python.linting.enabled": true, 5 | "python.linting.pylintPath": "pylint", 6 | "python.formatting.provider": "yapf", 7 | "editor.formatOnSave": true, 8 | "python.defaultInterpreterPath": ".venv/bin/python", 9 | "mypy.runUsingActiveInterpreter": true, 10 | "emeraldwalk.runonsave": { 11 | "commands": [ 12 | { 13 | "match": "\\.py$", 14 | "cmd": "cd ${workspaceFolder} && isort -m 9 --line-length 159 graphinder tests" 15 | }, 16 | { 17 | "match": "\\.py$", 18 | "cmd": "cd ${workspaceFolder} && autoflake --in-place --remove-unused-variables --remove-all-unused-imports graphinder/**/*.py" 19 | }, 20 | { 21 | "match": "\\.py$", 22 | "cmd": "cd ${workspaceFolder} && docformatter --wrap-summaries 160 --wrap-descriptions 160 -ri graphinder test" 23 | }, 24 | { 25 | "match": "\\.py$", 26 | "cmd": "cd ${workspaceFolder} && unify -ri graphinder tests" 27 | } 28 | ] 29 | }, 30 | "python.testing.pytestEnabled": true, 31 | "python.testing.pytestPath": "../scripts/pytest-with-cov" 32 | } -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | When contributing to this repository, please first discuss the change you wish to make via issue, 4 | email, or any other method with the owners of this repository before making a change. 5 | 6 | Please note we have a code of conduct, please follow it in all your interactions with the project. 7 | 8 | ## Pull Request Process 9 | 10 | 1. Ensure any install or build dependencies are removed before the end of the layer when doing a 11 | build. 12 | 2. Update the README.md with details of changes to the interface, this includes new environment 13 | variables, exposed ports, useful file locations and container parameters. 14 | 3. You may merge the Pull Request in once you have the sign-off of two other developers, or if you 15 | do not have permission to do that, you may request the second reviewer to merge it for you. 16 | 17 | ## Code of Conduct 18 | 19 | ### Our Pledge 20 | 21 | In the interest of fostering an open and welcoming environment, we as 22 | contributors and maintainers pledge to making participation in our project and 23 | our community a harassment-free experience for everyone, regardless of age, body 24 | size, disability, ethnicity, gender identity and expression, level of experience, 25 | nationality, personal appearance, race, religion, or sexual identity and 26 | orientation. 27 | 28 | ### Our Standards 29 | 30 | Examples of behavior that contributes to creating a positive environment 31 | include: 32 | 33 | * Using welcoming and inclusive language 34 | * Being respectful of differing viewpoints and experiences 35 | * Gracefully accepting constructive criticism 36 | * Focusing on what is best for the community 37 | * Showing empathy towards other community members 38 | 39 | Examples of unacceptable behavior by participants include: 40 | 41 | * The use of sexualized language or imagery and unwelcome sexual attention or 42 | advances 43 | * Trolling, insulting/derogatory comments, and personal or political attacks 44 | * Public or private harassment 45 | * Publishing others' private information, such as a physical or electronic 46 | address, without explicit permission 47 | * Other conduct which could reasonably be considered inappropriate in a 48 | professional setting 49 | 50 | ### Our Responsibilities 51 | 52 | Project maintainers are responsible for clarifying the standards of acceptable 53 | behavior and are expected to take appropriate and fair corrective action in 54 | response to any instances of unacceptable behavior. 55 | 56 | Project maintainers have the right and responsibility to remove, edit, or 57 | reject comments, commits, code, wiki edits, issues, and other contributions 58 | that are not aligned to this Code of Conduct, or to ban temporarily or 59 | permanently any contributor for other behaviors that they deem inappropriate, 60 | threatening, offensive, or harmful. 61 | 62 | ### Scope 63 | 64 | This Code of Conduct applies both within project spaces and in public spaces 65 | when an individual is representing the project or its community. Examples of 66 | representing a project or community include using an official project e-mail 67 | address, posting via an official social media account, or acting as an appointed 68 | representative at an online or offline event. Representation of a project may be 69 | further defined and clarified by project maintainers. 70 | 71 | ### Enforcement 72 | 73 | All complaints will be reviewed and investigated and will result in a response that 74 | is deemed necessary and appropriate to the circumstances. The project team is 75 | obligated to maintain confidentiality with regard to the reporter of an incident. 76 | Further details of specific enforcement policies may be posted separately. 77 | 78 | Project maintainers who do not follow or enforce the Code of Conduct in good 79 | faith may face temporary or permanent repercussions as determined by other 80 | members of the project's leadership. 81 | 82 | ### Attribution 83 | 84 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 85 | available at [http://contributor-covenant.org/version/1/4][version] 86 | 87 | [homepage]: http://contributor-covenant.org 88 | [version]: http://contributor-covenant.org/version/1/4/ 89 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # BASE 2 | FROM python:3.10-alpine as python-base 3 | 4 | ENV APP_NAME="graphinder" \ 5 | POETRY_HOME="/opt/poetry" \ 6 | POETRY_VIRTUALENVS_IN_PROJECT=true \ 7 | PIP_NO_CACHE_DIR=off \ 8 | PYSETUP_PATH="/opt/pysetup" \ 9 | PYTHONDONTWRITEBYTECODE=1 \ 10 | VENV_PATH="/opt/pysetup/.venv" \ 11 | PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" 12 | 13 | # BUILDER 14 | FROM python-base as builder-base 15 | 16 | WORKDIR $PYSETUP_PATH 17 | RUN apk add build-base zlib-dev libffi-dev 18 | RUN pip install poetry 19 | 20 | COPY ./poetry.lock ./pyproject.toml ./README.md ./ 21 | RUN poetry install --no-dev --no-root 22 | 23 | COPY ./$APP_NAME ./$APP_NAME 24 | RUN poetry install --no-dev 25 | 26 | # RELEASE 27 | FROM python-base as release 28 | 29 | ENV PYTHONWARNINGS="ignore" 30 | 31 | COPY --from=builder-base $VENV_PATH $VENV_PATH 32 | COPY ./$APP_NAME /$APP_NAME/ 33 | COPY ./docker-entrypoint.sh /docker-entrypoint.sh 34 | 35 | RUN chmod +x /docker-entrypoint.sh 36 | 37 | ENTRYPOINT /docker-entrypoint.sh $0 $@ 38 | CMD ["-h"] 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2021-2022 Escape Technologies SAS (https://escape.tech/) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Graphinder ![PyPI](https://img.shields.io/pypi/v/graphinder) [![CI](https://github.com/Escape-Technologies/graphinder/actions/workflows/ci.yaml/badge.svg)](https://github.com/Escape-Technologies/graphinder/actions/workflows/ci.yaml) [![codecov](https://codecov.io/gh/Escape-Technologies/graphinder/branch/main/graph/badge.svg?token=4KGK1LTHRO)](https://codecov.io/gh/Escape-Technologies/graphinder) 2 | 3 | Graphinder is a tool that extracts all GraphQL endpoints from a given domain. 4 | 5 | ![Banner](doc/banner.png) 6 | 7 | ![Docker Pulls](https://img.shields.io/docker/pulls/escapetech/graphinder) 8 | ![Docker Image Size (latest by date)](https://img.shields.io/docker/image-size/escapetech/graphinder) 9 | ![PyPI - Downloads](https://img.shields.io/pypi/dm/graphinder) 10 | 11 | ## Run with docker 12 | 13 | ```bash 14 | docker pull escapetech/graphinder 15 | docker run -it --rm escapetech/graphinder -d example.com 16 | ``` 17 | 18 | If you want to save your results.json file, you can use: 19 | 20 | ```bash 21 | docker run -it --name graphinder escapetech/graphinder -d example.com 22 | docker cp graphinder:/graphinder/results.json results.json 23 | docker rm -f graphinder 24 | ``` 25 | 26 | ## Install using Pip 27 | 28 | ```bash 29 | pip install graphinder 30 | 31 | # using specific python binary 32 | python3 -m pip install graphinder 33 | ``` 34 | 35 | Run it with 36 | 37 | ```bash 38 | graphinder ... 39 | ``` 40 | 41 | ## Usage 42 | 43 | A Scan consistes of: 44 | 45 | - Running specific domain (`-d`, `--domain`). 46 | - Searching all scripts loaded by the browser for graphql endpoint (`-s`, `--script`) 47 | - Brute forcing the directories of all discovered urls (`-b`, `--bruteforce`) 48 | - Using precision mode (`-p`, `--precision`) 49 | 50 | By default, bruteforce and script search are enabled. 51 | 52 | ```bash 53 | graphinder -d example.com 54 | ``` 55 | 56 | ```bash 57 | graphinder -f domains.txt 58 | ``` 59 | 60 | ### Extra features 61 | 62 | - `--no-bruteforce`: Disable bruteforce 63 | - `--no-script`: Disable script search 64 | - `-p --precision --no-precision`: Enable/disable precision mode (default: enabled) (precision mode is slower but more accurate) 65 | - `-w --max-workers `: Maximum of concurrent workers on multiple domains. 66 | - `-o --output-file `: Output the results to file 67 | - `-v --verbose --no-verbose`: Verbose mode 68 | - `-r --reduce`: The maximum number of subdomains to scan. 69 | - `-wb --webhook_url`: The discord webhook url to send the results to. 70 | 71 | If you experience any issues, irregularities or networking bottlenecks, please reduce your number of workers, otherwise, better is your network, the more workers you can have. 72 | 73 | ## Local installation 74 | 75 | Clone the repository and run the installation script 76 | 77 | ```bash 78 | git clone https://github.com/Escape-Technologies/graphinder.git 79 | cd graphinder 80 | ./install-dev.sh 81 | ``` 82 | 83 | Run this command to enter the virtual enviroment 84 | 85 | ```bash 86 | poetry shell 87 | ``` 88 | 89 | Profit ! 90 | 91 | ```bash 92 | graphinder -d example.com 93 | ``` 94 | 95 | ## How do you make sure this is a valid graphql endpoint ? 96 | 97 | ![detector](doc/detector.jpg) 98 | 99 | ## Contributing 100 | 101 | Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change. 102 | 103 | Please make sure to update tests as appropriate. 104 | 105 | ## License ![PyPI - License](https://img.shields.io/pypi/l/graphinder) 106 | 107 | [MIT](https://choosealicense.com/licenses/mit/) 108 | -------------------------------------------------------------------------------- /commitlint.config.js: -------------------------------------------------------------------------------- 1 | const Configuration = { 2 | /* 3 | * Resolve and load @commitlint/config-conventional from node_modules. 4 | * Referenced packages must be installed 5 | */ 6 | extends: ['@commitlint/config-angular'], 7 | /* 8 | * Resolve and load @commitlint/format from node_modules. 9 | * Referenced package must be installed 10 | */ 11 | formatter: '@commitlint/format', 12 | /* 13 | * Whether commitlint uses the default ignore rules. 14 | */ 15 | defaultIgnores: true, 16 | /* 17 | * Custom URL to show upon failure 18 | */ 19 | helpUrl: 20 | 'https://github.com/angular/angular/blob/22b96b9/CONTRIBUTING.md#-commit-message-guidelines', 21 | rules: { 22 | "type-enum": [2, 'always' ,["ci", "docs", "feat", "fix", "refactor", "test", "chore"]], 23 | }, 24 | }; 25 | 26 | module.exports = Configuration; 27 | -------------------------------------------------------------------------------- /doc/banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Escape-Technologies/graphinder/3478723764e111296138be4c7a91a39787116834/doc/banner.png -------------------------------------------------------------------------------- /doc/detector.drawio: -------------------------------------------------------------------------------- 1 | 7V3bkps2GH4az7QX60GAOFxmD2k6TdO0ybTN1Q4G2ZBg5IC86+3TV5izJGzZ5mDHuzPJIiGE0Pef9Us70e6Wm19iZ+X/jj0UTlTF20y0+4mqqhZU6a+05iWrAZZhZzWLOPDyuqriU/AfyiuVvHYdeChpNCQYhyRYNStdHEXIJY06J47xc7PZHIfNt66cBeIqPrlOyNf+E3jEz2otqFT171Cw8Is3AyW/s3SKxnlF4jsefq5VaQ8T7S7GmGRXy80dCtPZK+Yle+5ty91yYDGKiMwD79D6T9P87bNvwj/8X2fv1it1c5PDk5CX4oORR78/L+KY+HiBIyd8qGpvY7yOPJT2qtBS1eY9xitaCWjlV0TISw6msyaYVvlkGeZ36YDjl3/z57eFL2lhCovi/aZ+8/4lL2VjTQfYOgV5VYLXsZu3eq8/fnBv1w/vwPIW/fV49/WzC24KUnLiBSI75geUQFESR3iJ6HjoczEKHRI8Ncfh5KS2KNtVaNCLHBAxOLsG+eSE6/xNE9UI6XBv55h+cB024/saFzduku3Ev6ENgLXabCeuuE+vFunvh+WKpB+ywgnZfs/3NUqvsu7paLM3ZI05GmlSwLMfEPRp5Wwn/JnKgSbarSA9oZigTa2Kn+f8rp2zUCFEtLz8XHGkWvCdX+PG4rlTkBGShSFAZng+SjHKbwLraP7ohe7zRz/iYEuoOY6WygBpwqlS+1G1Zo8Ze+adMJCVozoeRXAWqKHIe5MqKlqOcISymrdB+imVDDxCWm5LH1Ec0GlCcVG5Cci/1eO09KUQyvS66iktnCZ228lKRuxqvdAfaJIfVBUpeuP6MbRmR6rOdJRNBNcRxdl5qTVbpQ2S9vGy79GAsnNcN/C09gaEDJ9lI+6W61q1mhc8FSrnjjKJE0RJTSPV7goe8BziTB8fycsKRc4SFS1mMfsM2xOr2ny8nK0TWbXG6TABX7SqNWgy4hCAqQ441QZUgWoDqjml4qMf9VZ0fBlmYV2gqhICtUN5psnKM3MgO3LXKPuwI3mO68F2PInJSocsZzLdHtt4BPCSuEtoRhxqw3TIcnovloHOuBimCQc1RfU+WTRxfbR0LoVBzdG9O5sD4wtKuHmiX0yak5GQGH9DdzjEccUIc8oFTJUTBouIFl06QylL3abzF7hO+Ca/sQw8b8v1otlv4lPnRNgNIAbrbpv61LY5TKAAEq03mSnSYQwg2fQXUTrAYOM7q7TdcrNII5XTeYifXd+JydSJIkyo7MDRI6iBE6J5Cu/WREfxwxNKLfVdfvdJc844xlDnecASzHePEy4KcXRmpjclUn9Gep35Jqr2dvvTDWKcxgBwati1Hw5AsR0PrCm0+wLR7BPEi/K1WLh0k/paSg0uUxKvPv0ue5SAVGkZloXD/S44rBEIco45c8cL8JZED1pL5bVW6MxQ+BEnQdqAVscZAe/VZmpHFoTGMhvkuMsQMldf6qykrNFCvTKRicpHq5ivjM92vjAmnihFlrOMEzlL7IhBJiRpsaHVFkesPbTatatWzNBFMXWc90vLttINk0MVNpES+G0iH6H097pn8vZA1ynmju8k/jp2UoRXyA3mAYovwuSxOSl8hiaP2h75OAUzRMVsnEzgrTKB99MlSpI0z+MSUAPAvATYYC+wlUApN/RfhOl/uqJfBm6GweLGy8MRgJLy46lF842ipHjBfI62XyyLGPGdiD7ohjipJZCcN1I6uAQOszjgPmBu3n7cICS7xK5VcZKxgpDqtQeGWQmn827doIBofC7BNfGIrjO+kiVngfeHh8gqeHWNzsA10ngz4LpEl3JurMKH5q9JdLGqBNpjqxLe3OLQKKbwPSNtZpgQvBTMMcGr3fJt5iSBO92UuNbFIjvrzFrW3HKR63IUQe/MLKjDjiSZxfANgDxOugAnNmrXGU46r/IHw4k+8+0YqDyILE8XQWWpM80wOlI6VhMqWyDihoVK3Q9VLSLuhk5CZ7kJxG5BVEXPK9dwfxScn8E9QqeoOzG03aqD9mQN7+3INpmODo6RSw7Y3J1O3PaB0u3tw9rbsNG+n3RlXSLz5EQqznPyiyz8/Rn5aYHL7D+KF2RyXOGYTAOZdXubNZFlmQYeyTSd0ZHEnqHrVFwm48ZqkrY57E1xidJmOlZc5UKvPjlgoVe8p6fB+SJpohwsTTqUH9K5GnrLivJA2plNEDpWO5tslMxiQsQdaWfuPeYAm3d0iYwWPsuhT1V4bFr7mRC92rLTbRiitxgaMiQ3su3viFXTXRE9o8WLuEErM+qHtbdObr/bhOXaawMwbUEsw5qw1RbT3reVqrL7sNSW3SPDcBtQ2f2WxpHsBjSGDwwwrDULJcIww5PUsAdL7Mxn2U+Ko1o7PxIp9h9mulBSNGRJcVQP/0cixcNiRcdax4cc3tBbCv64hitgz3I4nmhUJtPElDwV4lDT1WY3DLHnNckOrF9bUWIJ/9zCVPP5XBUvhXnGzIAdhamAqhjT5rq+KUjxK4JHjSUWODXbKfykWBWUOYroRxE5I5tMFydygG4eKnOMMWTOYfHWy6bhcW0thYk/GCzlSdMwm0TbGw2XHQ9JkgUb9OdUyNLUeOKOEVJcSO94cTfw0pvBe4hXlfxmc0HKNG+9OnaPP3Jq0NQrg/eargoeAKzzxoffonVd+OhqiyQcDRHeSbsuRPj00fPimOtO7gUmK9BGZ5jr3kwFFHhugPB5BFcFCBupHh0Qk3d4rgoQqJ0bIBKB2MtetB83EsKGFeDR0TybPVZa05s99ezemiNmlsptXemCQW1WYgK5HUM6bCeN0xj0dceQ2NZgog2jbxmyeNU2GFBnvqSlFNveil32ivCoz4HXtKzXPV6yzFWu14yVK2+Jzu85KynYi0wTWIfDyjTeOowRWcfpWSxvnTBBE/Wuuuzcis+33Z9gww+DG1BZ3HhdBAY16y3eWNsBnPJTECXEiUj48vMrjBWMBg+jkP/6g5GP8JUwfo7Xr+wnxK085rrOfqKDQPvDjTfaBbhlV6/MJwZRkPQkPJ+/PxAFm5DgbZD8TafWyxB01sT/C31fBzGdLXjfA3xdBLIGMl7YAwUF+A3KhPY12Iz6cMcx0GL1Z0SzqFL111i1h/8B -------------------------------------------------------------------------------- /doc/detector.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Escape-Technologies/graphinder/3478723764e111296138be4c7a91a39787116834/doc/detector.jpg -------------------------------------------------------------------------------- /docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | . /opt/pysetup/.venv/bin/activate 6 | 7 | exec python3 -m graphinder "$@" 8 | -------------------------------------------------------------------------------- /graphinder/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: skip-file 2 | 3 | from graphinder.main import async_main, cli, main # noqa 4 | -------------------------------------------------------------------------------- /graphinder/__main__.py: -------------------------------------------------------------------------------- 1 | """Entry point for python cli.""" 2 | 3 | from .main import cli 4 | 5 | if __name__ == '__main__': 6 | cli() 7 | -------------------------------------------------------------------------------- /graphinder/entities/__init__.py: -------------------------------------------------------------------------------- 1 | """Entities init.""" 2 | -------------------------------------------------------------------------------- /graphinder/entities/errors.py: -------------------------------------------------------------------------------- 1 | """Errors entities.""" 2 | 3 | from asyncio import TimeoutError as AsyncioTimeoutError 4 | from socket import gaierror 5 | 6 | from aiohttp.client_exceptions import ClientError, ClientPayloadError 7 | 8 | AwaitableRequestException: tuple = ( 9 | ClientError, 10 | AsyncioTimeoutError, 11 | gaierror, 12 | UnicodeError, 13 | ValueError, 14 | ClientPayloadError, 15 | ) 16 | -------------------------------------------------------------------------------- /graphinder/entities/io.py: -------------------------------------------------------------------------------- 1 | """Entities for `io` module.""" 2 | 3 | from typing import Dict, Set 4 | 5 | from graphinder.entities.pool import Url 6 | 7 | Results = Dict[str, Set[Url]] 8 | -------------------------------------------------------------------------------- /graphinder/entities/pool.py: -------------------------------------------------------------------------------- 1 | """Pool entities.""" 2 | 3 | 4 | class Url(str): 5 | 6 | """URL entity.""" 7 | -------------------------------------------------------------------------------- /graphinder/entities/tasks.py: -------------------------------------------------------------------------------- 1 | """Tasks entities.""" 2 | 3 | from enum import Enum 4 | from typing import List 5 | 6 | 7 | class TaskTags(Enum): 8 | 9 | """Task tags.""" 10 | 11 | FETCH_SCRIPT = 0 12 | FETCH_PAGE_SCRIPTS = 1 13 | FETCH_ENDPOINT = 2 14 | 15 | 16 | # pylint: disable=too-few-public-methods 17 | class Task: 18 | 19 | """Task representation.""" 20 | 21 | def __init__( 22 | self, 23 | domain_url: str, 24 | tag: TaskTags, 25 | url: str, 26 | ) -> None: 27 | """Init task.""" 28 | 29 | self.domain_url = domain_url 30 | self.tag = tag 31 | self.url = url[:-1] if url.endswith('/') else url 32 | 33 | 34 | TasksList = List[Task] 35 | -------------------------------------------------------------------------------- /graphinder/io/__init__.py: -------------------------------------------------------------------------------- 1 | """IO init.""" 2 | -------------------------------------------------------------------------------- /graphinder/io/printers.py: -------------------------------------------------------------------------------- 1 | """I/O for prints.""" 2 | 3 | from graphinder.entities.io import Results 4 | 5 | 6 | def display_results(results: Results) -> None: 7 | """Prints the results.""" 8 | 9 | for domain in results: 10 | print(f'{domain} - {len(results[domain])}') 11 | for result in sorted(results[domain]): 12 | print(f'\t{result}') 13 | -------------------------------------------------------------------------------- /graphinder/io/providers.py: -------------------------------------------------------------------------------- 1 | """Provide datas to graphinder.""" 2 | 3 | from typing import List 4 | 5 | 6 | def gql_endpoints_characterizer() -> List[str]: 7 | """Return list of most common GQL endpoints. 8 | 9 | - Versioning has a huge cost on the performance of the scanner. 10 | - We try to minimize the cost by using the most common endpoints only. 11 | """ 12 | 13 | characterizers: List[str] = [ 14 | 'graphql', 15 | 'appsync', 16 | 'altair', 17 | 'explorer', 18 | 'graphiql', 19 | 'playground', 20 | 'subscriptions', 21 | 'graph', 22 | 'graphiql.css', 23 | 'graphiql/finland', 24 | 'graphiql.js', 25 | 'graphiql.min.css', 26 | 'graphiql.min.js', 27 | 'graphiql.php', 28 | 'graphql/console', 29 | 'graphql-explorer', 30 | 'graphql.php', 31 | 'graphql/schema.json', 32 | 'graphql/schema.xml', 33 | 'graphql/schema.yaml', 34 | 'graphql/v1', 35 | 'graphql/v2', 36 | 'api/graphql', 37 | ] 38 | 39 | versioned_characterizers: List[str] = [] 40 | versions = ['v1', 'v2'] 41 | for version in versions: 42 | for char in characterizers[:8]: 43 | if any(v in char for v in versions): 44 | continue 45 | versioned_characterizers.append(f'{version}/{char}') 46 | 47 | return characterizers + versioned_characterizers 48 | -------------------------------------------------------------------------------- /graphinder/io/readers.py: -------------------------------------------------------------------------------- 1 | """I/O readers.""" 2 | 3 | from io import TextIOWrapper 4 | from typing import List, Optional 5 | 6 | from graphinder.pool.domain import Domain 7 | from graphinder.utils.filters import transform_url_in_domain 8 | from graphinder.utils.logger import get_logger 9 | 10 | 11 | def read_domains( 12 | file: Optional[TextIOWrapper], 13 | domain: Optional[str], 14 | precision_mode: bool = False, 15 | ) -> List[Domain]: 16 | """Read domains from file.""" 17 | 18 | if domain is not None: 19 | clean = transform_url_in_domain(domain) 20 | if clean is not None: 21 | return [Domain(clean, precision_mode)] 22 | return [] 23 | 24 | if file is None: 25 | get_logger().warning('no input file specified, skipping reading domains..') 26 | return [] 27 | 28 | urls: List[str] = list(set(file.read().splitlines())) 29 | domains: List[Domain] = [] 30 | for url in urls: 31 | clean = transform_url_in_domain(url) 32 | if clean is not None: 33 | domains.append(Domain(clean, precision_mode)) 34 | 35 | get_logger().info(f'found { len(domains) } domains.') 36 | 37 | return domains 38 | -------------------------------------------------------------------------------- /graphinder/io/writers.py: -------------------------------------------------------------------------------- 1 | """I/O writers.""" 2 | 3 | import json 4 | from io import TextIOWrapper 5 | from typing import Any 6 | 7 | from graphinder.entities.io import Results 8 | 9 | 10 | class ResultEncoder(json.JSONEncoder): 11 | 12 | """JSON encoder for `set` type.""" 13 | 14 | def default(self, o: Any) -> Any: 15 | """Encode `set` type.""" 16 | 17 | if isinstance(o, set): 18 | return list(o) 19 | raise NotImplementedError() 20 | 21 | 22 | def write_results( 23 | output_file: TextIOWrapper, 24 | results: Results, 25 | ) -> None: 26 | """Saves the results.""" 27 | 28 | json.dump( 29 | results, 30 | output_file, 31 | indent=4, 32 | cls=ResultEncoder, 33 | sort_keys=True, 34 | ) 35 | -------------------------------------------------------------------------------- /graphinder/main.py: -------------------------------------------------------------------------------- 1 | """The CLI.""" 2 | 3 | import argparse 4 | import asyncio 5 | import logging 6 | import sys 7 | from datetime import date 8 | from typing import List, Optional 9 | 10 | import pkg_resources 11 | 12 | from graphinder.entities.io import Results 13 | from graphinder.pool import main_routine 14 | from graphinder.utils.assets import fetch_assets 15 | from graphinder.utils.logger import setup_logger 16 | 17 | __version__ = pkg_resources.get_distribution(__package__ or __name__).version 18 | 19 | 20 | def argument_builder(args: List[str]) -> argparse.Namespace: 21 | """Builds the arguments.""" 22 | 23 | parser = argparse.ArgumentParser() 24 | parser.add_argument( 25 | '--domain', 26 | '-d', 27 | dest='domain', 28 | type=str, 29 | help='Domain to scan', 30 | ) 31 | parser.add_argument( 32 | '--output-file', 33 | '-o', 34 | dest='output_file', 35 | type=argparse.FileType('w'), 36 | help='The path of the results file', 37 | ) 38 | parser.add_argument( 39 | '--verbose', 40 | '-v', 41 | dest='verbose_mode', 42 | type=bool, 43 | help='Verbose', 44 | default=False, 45 | ) 46 | parser.add_argument( 47 | '--no-script', 48 | '-ns', 49 | dest='no_script_mode', 50 | help='Disable script scanning', 51 | action='store_true', 52 | ) 53 | parser.add_argument( 54 | '--quiet', 55 | '-q', 56 | dest='quiet_mode', 57 | help='Quiet', 58 | action='store_true', 59 | ) 60 | parser.add_argument( 61 | '--no-bruteforce', 62 | '-nb', 63 | dest='no_bruteforce_mode', 64 | help='Disable directory scanning', 65 | action='store_true', 66 | ) 67 | parser.add_argument( 68 | '--precision', 69 | '-p', 70 | dest='precision_mode', 71 | type=bool, 72 | help='Use precision mode', 73 | default=True, 74 | ) 75 | parser.add_argument( 76 | '--reduce', 77 | '-r', 78 | dest='reduce_mode', 79 | type=int, 80 | help='The maximum number of subdomains to scan.', 81 | default=100, 82 | ) 83 | parser.add_argument( 84 | '--webhook_url', 85 | '-wb', 86 | dest='webhook_url', 87 | type=str, 88 | help='The webhook url to send results.', 89 | default=None, 90 | ) 91 | 92 | return parser.parse_args(args) 93 | 94 | 95 | def validate_arguments( 96 | logger: logging.Logger, 97 | args: argparse.Namespace, 98 | ) -> bool: 99 | """Validates the arguments.""" 100 | 101 | if args.domain is None: 102 | logger.error('no domain provided') 103 | return False 104 | 105 | if args.no_script_mode and args.no_bruteforce_mode: 106 | logger.error('no scanning mode selected.') 107 | return False 108 | 109 | if args.precision_mode: 110 | logger.info('precision mode enabled') 111 | 112 | return True 113 | 114 | 115 | # pylint: disable=trailing-whitespace 116 | def cli() -> None: 117 | """Entry point of the CLI program.""" 118 | 119 | print( 120 | r""" 121 | ____ _ _ _ 122 | / ___|_ __ __ _ _ __ | |__ (_)_ __ __| | ___ _ __ 123 | | | _| '__/ _` | '_ \| '_ \| | '_ \ / _` |/ _ \ '__| 124 | | |_| | | | (_| | |_) | | | | | | | | (_| | __/ | 125 | \____|_| \__,_| .__/|_| |_|_|_| |_|\__,_|\___|_| 126 | |_| 127 | """ 128 | ) 129 | 130 | print(' Maintainer https://escape.tech') 131 | print(' Blog https://blog.escape.tech') 132 | print(' DockerHub https://hub.docker.com/r/escapetech/graphinder') 133 | print(' Contribute https://github.com/Escape-Technologies/graphinder') 134 | print('') 135 | print(f' (c) 2021 - { date.today().year } Escape Technologies - Version: {__version__}') 136 | print('\n' * 2) 137 | 138 | main() 139 | 140 | 141 | async def async_main( 142 | argv: Optional[List[str]] = None, 143 | logger: Optional[logging.Logger] = None, 144 | ) -> Results: 145 | """Async main.""" 146 | 147 | return await loop(argv, logger) 148 | 149 | 150 | def main( 151 | argv: Optional[List[str]] = None, 152 | logger: Optional[logging.Logger] = None, 153 | ) -> Results: 154 | """Main.""" 155 | 156 | return asyncio.run(loop(argv, logger)) 157 | 158 | 159 | async def loop( 160 | argv: Optional[List[str]] = None, 161 | logger: Optional[logging.Logger] = None, 162 | ) -> Results: 163 | """Ignites arguments.""" 164 | 165 | if argv is None: 166 | argv = sys.argv[1:] 167 | 168 | args: argparse.Namespace = argument_builder(argv) 169 | 170 | logger = setup_logger( 171 | verbose_mode=args.verbose_mode, 172 | quiet_mode=args.quiet_mode, 173 | logger=logger, 174 | ) 175 | if not validate_arguments(logger, args): 176 | return {} 177 | 178 | fetch_assets() 179 | return await main_routine(args) 180 | -------------------------------------------------------------------------------- /graphinder/pool/__init__.py: -------------------------------------------------------------------------------- 1 | """Pool init.""" 2 | 3 | from graphinder.pool.routine import main_routine # noqa 4 | -------------------------------------------------------------------------------- /graphinder/pool/detectors.py: -------------------------------------------------------------------------------- 1 | """All functions for detection.""" 2 | 3 | import asyncio 4 | import json 5 | import logging 6 | import re 7 | from typing import Any, Coroutine, Dict, Optional, Tuple 8 | 9 | import aiohttp 10 | 11 | from graphinder.io.providers import gql_endpoints_characterizer 12 | 13 | 14 | def _look_like_graphql_url(url: str) -> Tuple[bool, Optional[str]]: 15 | """Check if the url looks like a GraphQL endpoint.""" 16 | 17 | for part in gql_endpoints_characterizer(): 18 | if part in url: 19 | return True, part 20 | 21 | return False, None 22 | 23 | 24 | def _replace_last_resource(url: str, resource: str) -> Optional[str]: 25 | """Replace the last resource in the url with the given resource.""" 26 | 27 | # https://hello.com 28 | if url.count('/') <= 2: 29 | return None 30 | 31 | # https://hello.com 32 | if url.count('/') == 3 and url.endswith('/'): 33 | return None 34 | 35 | lst = url.split('/') 36 | if lst[-1] == '': 37 | # https://hello.com/aaa/ 38 | del lst[-1] 39 | lst[-1] = resource 40 | return '/'.join(lst) + '/' 41 | 42 | # else # https://hello.com/aaa 43 | lst[-1] = resource 44 | 45 | return '/'.join(lst) 46 | 47 | 48 | async def _looks_different_than_closest_route( 49 | session: aiohttp.ClientSession, 50 | url: str, 51 | original_body: str, 52 | ) -> bool: 53 | """Check if a close route to the same endpoint is different than the original one.""" 54 | 55 | look_likes, characterizer = _look_like_graphql_url(url) 56 | if look_likes and characterizer: 57 | 58 | random_url = _replace_last_resource(url, 'random') 59 | if random_url is None: 60 | return False 61 | 62 | async with session.post( 63 | random_url, 64 | json={'query': 'query { __typename }'}, 65 | timeout=10, 66 | ) as random_resp: 67 | random_text_body = await random_resp.text() 68 | 69 | if random_text_body != original_body: 70 | return True 71 | 72 | return False 73 | 74 | 75 | async def empty_post_request( 76 | session: aiohttp.ClientSession, 77 | url: str, 78 | timeout: int, 79 | ) -> bool: 80 | """Send empty post request. 81 | 82 | If the response contains a GraphQL like JSON body, this must be a honeypot. 83 | """ 84 | 85 | try: 86 | async with session.post(url, timeout=timeout) as request: 87 | response = await request.json() 88 | _ = response['data']['__typename'] 89 | 90 | return True 91 | 92 | except Exception: 93 | return False 94 | 95 | 96 | async def analyze_schema(text_body: str) -> Tuple[bool, bool]: 97 | """Perform futher analysis of the schema request.""" 98 | 99 | is_valid = 'introspection' in text_body 100 | return is_valid, is_valid 101 | 102 | 103 | async def analyze_typename( 104 | # session: aiohttp.ClientSession, 105 | # url: str, 106 | text_body: str, 107 | json_body: Dict, 108 | ) -> Tuple[bool, bool]: 109 | """Perform futher analysis of the typename request.""" 110 | 111 | error_messages = json_body.get('errors', [{}]) 112 | if isinstance(error_messages, list) \ 113 | and len(error_messages) > 0 \ 114 | and isinstance(error_messages[0], dict) \ 115 | and error_messages[0].get('message') is not None: 116 | # Handle hasura errors 117 | if 'query is not in any of the allowlists' in text_body.lower(): 118 | return True, True 119 | 120 | return True, False 121 | 122 | # Handle looks_like 123 | # if (await _looks_different_than_closest_route( 124 | # session, 125 | # url, 126 | # text_body, 127 | # )): 128 | # return True, False 129 | 130 | # Handle not found pages 131 | if json_body.get('message') is not None and \ 132 | '404' not in text_body and \ 133 | not re.search(r'not.found', text_body, re.IGNORECASE): 134 | return True, False 135 | 136 | return False, False 137 | 138 | 139 | # pylint: disable=too-few-public-methods 140 | class GraphQLEndpointDetector: 141 | 142 | """Check if the url is a valid GraphQL endpoint.""" 143 | 144 | valid_auth: bool = False 145 | valid_graphql: bool = False 146 | 147 | _session: aiohttp.ClientSession 148 | _url: str 149 | _timeout: int 150 | _logger: Optional[logging.Logger] 151 | 152 | def __init__( 153 | self, 154 | session: aiohttp.ClientSession, 155 | url: str, 156 | logger: Optional[logging.Logger] = None, 157 | timeout: int = 10, 158 | ) -> None: 159 | """Initialize the detector.""" 160 | 161 | self._session = session 162 | self._url = url 163 | self._timeout = timeout 164 | self._logger = logger 165 | 166 | session.headers.update({'Content-Type': 'application/json'}) 167 | 168 | async def _send_request( 169 | self, 170 | matching_key: str, 171 | payload: Optional[Dict], 172 | ) -> Tuple[bool, Optional[str], Optional[dict]]: 173 | """Send a request to the url. 174 | 175 | Returns: 176 | bool: If the request was successful. 177 | Optional[str]: The text body. 178 | Optional[dict]: The json body. 179 | """ 180 | 181 | try: 182 | async with self._session.post( 183 | self._url, 184 | json=payload, 185 | timeout=self._timeout, 186 | ) as req: 187 | text_body = await req.text() 188 | json_body = json.loads(text_body) 189 | 190 | if json_body.get('data', {}).get(matching_key) is not None: 191 | return True, None, None 192 | 193 | return False, text_body, json_body 194 | 195 | except Exception as e: 196 | if self._logger: 197 | self._logger.debug(f'Error while sending request to {self._url}: {e}') 198 | 199 | return False, None, None 200 | 201 | async def detect(self) -> Tuple[bool, bool]: 202 | """Detect if the url is a GraphQL endpoint.""" 203 | 204 | post_request_task: Coroutine[bool, Any, Any] = empty_post_request( 205 | self._session, 206 | self._url, 207 | self._timeout, 208 | ) 209 | query_tasks = [ 210 | self._send_request( 211 | '__typename', 212 | {'query': 'query { __typename }'}, 213 | ), 214 | self._send_request( 215 | '__schema', 216 | {'query': 'query { __schema { queryType { name } } }'}, 217 | ), 218 | ] 219 | 220 | # If post request worked, it means likely honey pot 221 | post_request_status = await post_request_task 222 | if post_request_status: 223 | return False, False 224 | 225 | for query in asyncio.as_completed(query_tasks): 226 | status, text_body, json_body = await query 227 | if status: 228 | self.valid_graphql = True 229 | self.valid_auth = True 230 | break 231 | 232 | if not text_body or not json_body: 233 | continue 234 | 235 | further_analysis = await analyze_typename(text_body, json_body) \ 236 | if query_tasks[0] else await analyze_schema(text_body) 237 | if further_analysis[0]: 238 | self.valid_graphql = True 239 | if further_analysis[1]: 240 | self.valid_auth = True 241 | 242 | return self.valid_graphql, self.valid_auth 243 | 244 | 245 | async def is_gql_endpoint( 246 | url: str, 247 | session: Optional[aiohttp.ClientSession] = None, 248 | headers: Optional[Dict] = None, 249 | logger: Optional[logging.Logger] = None, 250 | ) -> Tuple[bool, bool]: 251 | """Check if the given url seems to be GraphQL endpoint. 252 | 253 | Args: 254 | session: aiohttp session with auth headers 255 | url: url to check 256 | logger: logger to use 257 | 258 | Returns: 259 | bool: True if the url is a GraphQL endpoint, False otherwise. 260 | bool: True if the authentication is valid, False otherwise. 261 | """ 262 | 263 | assert url, 'URL is required' 264 | 265 | headers = headers or {} 266 | 267 | # Open new session if necessary 268 | has_opened_new_session = False 269 | if not session: 270 | session = aiohttp.ClientSession(headers=headers) 271 | has_opened_new_session = True 272 | else: 273 | assert isinstance(session, aiohttp.ClientSession), 'Valid session must be provided' 274 | 275 | detector = GraphQLEndpointDetector( 276 | session, 277 | url, 278 | logger, 279 | ) 280 | 281 | status = await detector.detect() 282 | 283 | if has_opened_new_session: 284 | if logger: 285 | logger.debug('Closing previously opened session') 286 | await session.close() 287 | 288 | return status 289 | -------------------------------------------------------------------------------- /graphinder/pool/domain.py: -------------------------------------------------------------------------------- 1 | """Domain class.""" 2 | 3 | import asyncio 4 | import os 5 | from typing import List, Optional, Set 6 | 7 | import aiohttp 8 | 9 | from graphinder.entities.pool import Url 10 | from graphinder.pool.detectors import is_gql_endpoint 11 | from graphinder.pool.extractors import extract_script_urls_from_page, extract_urls_from_script 12 | from graphinder.utils.filters import remove_duplicate_domains 13 | from graphinder.utils.logger import get_logger 14 | 15 | 16 | class Domain: 17 | 18 | """Domain entity.""" 19 | 20 | semaphore: Optional[asyncio.Semaphore] 21 | session: aiohttp.ClientSession 22 | 23 | def __init__( 24 | self, 25 | url: str, 26 | precision_mode: bool = False, 27 | ) -> None: 28 | """Init domain.""" 29 | 30 | self.url = url 31 | self.logger = get_logger() 32 | self.subdomains: List[str] = [] 33 | self.already_fetched: Set[str] = set() 34 | 35 | if precision_mode: 36 | self.semaphore = asyncio.Semaphore(100) 37 | else: 38 | self.semaphore = None 39 | 40 | self.results: Set[Url] = set() 41 | 42 | def fetch_subdomains( 43 | self, 44 | reduce: int = 100, 45 | ) -> None: 46 | """Fetch subdomains.""" 47 | 48 | self.logger.info('fetching subdomains...') 49 | 50 | _finder = os.popen(f'./subfinder -d {self.url} -silent -timeout 5') 51 | 52 | self.subdomains = _finder.read().split('\n') 53 | 54 | self.subdomains = remove_duplicate_domains(self.subdomains) 55 | self.logger.info(f'{self.url} - found { len(self.subdomains) } subdomains.') 56 | 57 | if len(self.subdomains) > reduce: 58 | self.logger.debug('reducing the number of subdomains.') 59 | self.subdomains = self.subdomains[:reduce] 60 | 61 | async def fetch_script( 62 | self, 63 | url: str, 64 | ) -> Set[Url]: 65 | """Fetch script for endpoints.""" 66 | 67 | self.logger.debug(f'fetching script {url}...') 68 | 69 | return await extract_urls_from_script(self.session, url) 70 | 71 | async def fetch_page_scripts( 72 | self, 73 | url: str, 74 | ) -> Set[Url]: 75 | """Fetch page for scripts url.""" 76 | 77 | self.logger.debug(f'fetching page scripts {url}...') 78 | 79 | return await extract_script_urls_from_page(self.session, url) 80 | 81 | async def fetch_endpoint( 82 | self, 83 | url: str, 84 | ) -> None: 85 | """Fetch endpoint and determinate if this is a GQL endpoint.""" 86 | 87 | self.logger.debug(f'fetching endpoint {url}...') 88 | 89 | if (await is_gql_endpoint(url, session=self.session))[0]: 90 | self.logger.info(f'found GQL endpoint {url}.') 91 | self.results.add(Url(url)) 92 | -------------------------------------------------------------------------------- /graphinder/pool/extractors.py: -------------------------------------------------------------------------------- 1 | """All GQL endpoint extractor functions.""" 2 | 3 | from typing import List, Optional, Set 4 | from urllib.parse import urljoin 5 | 6 | import aiohttp 7 | from bs4 import BeautifulSoup as bs4 # type: ignore[import] 8 | 9 | from graphinder.entities.errors import AwaitableRequestException 10 | from graphinder.entities.pool import Url 11 | from graphinder.utils.filters import filter_common, remove_suffix 12 | from graphinder.utils.finders import find_script_fetch_graphql, find_script_full_urls, find_script_window_base_urls 13 | 14 | 15 | def extract_scripts_from_html( 16 | url: str, 17 | html: str, 18 | ) -> List[str]: 19 | """Get any scripts files from html page.""" 20 | 21 | soup = bs4(html, 'html.parser') 22 | 23 | scripts_files = [] 24 | for script in soup.find_all('script'): 25 | 26 | src: Optional[str] = script.attrs.get('src') 27 | 28 | if src: 29 | script_url = urljoin(url, script.attrs.get('src')) 30 | scripts_files.append(script_url) 31 | 32 | return scripts_files 33 | 34 | 35 | async def extract_script_urls_from_page( 36 | session: aiohttp.ClientSession, 37 | url: str, 38 | ) -> Set[Url]: 39 | """This extractor will check all scripts on the page for GQL endpoints.""" 40 | 41 | urls: Set[Url] = set() 42 | 43 | try: 44 | async with session.get(url, timeout=10) as page: 45 | _html: str = await page.text() 46 | _script_urls = extract_scripts_from_html(url, _html) 47 | 48 | for script_url in _script_urls: 49 | if url not in script_url: 50 | continue 51 | 52 | urls.add(Url(script_url)) 53 | 54 | except AwaitableRequestException: 55 | pass 56 | 57 | return urls 58 | 59 | 60 | def extract_scripts_from_raw_js( 61 | url: str, 62 | script_file: str, 63 | ) -> Set[str]: 64 | """Extract all urls from a script file by using combination of regex.""" 65 | 66 | urls: List[str] = find_script_full_urls(script_file) + find_script_window_base_urls(url, script_file) + find_script_fetch_graphql(url, script_file) 67 | 68 | return filter_common(set(urls)) 69 | 70 | 71 | async def extract_urls_from_script( 72 | session: aiohttp.ClientSession, 73 | url: str, 74 | ) -> Set[Url]: 75 | """Extract urls from scripts.""" 76 | 77 | potentials_gqls: Set[Url] = set() 78 | if not url.endswith('.js'): 79 | return set() 80 | 81 | try: 82 | domain_url: str = '/'.join(url.split('/')[:3]) 83 | async with session.get(url, timeout=10) as script: 84 | _content: str = await script.text() 85 | 86 | _urls: Set[str] = extract_scripts_from_raw_js(domain_url, _content) 87 | 88 | for potential in _urls: 89 | if not potential.endswith('/graphql') or not domain_url in potential: 90 | potential = f'{remove_suffix(potential, "/graphql")}/graphql' 91 | 92 | potentials_gqls.add(Url(potential)) 93 | 94 | except AwaitableRequestException: 95 | pass 96 | 97 | return potentials_gqls 98 | -------------------------------------------------------------------------------- /graphinder/pool/routine.py: -------------------------------------------------------------------------------- 1 | """Functions to manage pooling.""" 2 | 3 | import argparse 4 | from typing import Dict, Set, Union, cast 5 | 6 | from graphinder.entities.io import Results 7 | from graphinder.entities.tasks import TasksList 8 | from graphinder.io.printers import display_results 9 | from graphinder.io.writers import write_results 10 | from graphinder.pool.domain import Domain, Url 11 | from graphinder.pool.tasks import consume_tasks, init_domain_tasks 12 | from graphinder.utils.filters import filter_urls 13 | from graphinder.utils.logger import get_logger 14 | from graphinder.utils.webhook import send_webhook 15 | 16 | 17 | async def domain_routine( 18 | domain: Domain, 19 | args: argparse.Namespace, 20 | ) -> Dict[str, Union[str, Set[Url]]]: 21 | """Start domain routine.""" 22 | 23 | _tasks: TasksList = init_domain_tasks(domain, args) 24 | _urls: Set[Url] = await consume_tasks(_tasks, domain) 25 | 26 | return {'domain': domain.url, 'urls': filter_urls(_urls)} 27 | 28 | 29 | async def main_routine(args: argparse.Namespace) -> Results: 30 | """Main pool routine.""" 31 | 32 | logger = get_logger() 33 | logger.info('starting main routine..') 34 | 35 | domain: Domain = Domain(args.domain, args.precision_mode) 36 | logger.info(f'running scan on {domain.url}') 37 | 38 | output_file = args.output_file 39 | del args.output_file 40 | 41 | result = await domain_routine(domain, args) 42 | results: Results = cast(Results, {result['domain']: result['urls']}) 43 | 44 | if not args.quiet_mode: 45 | display_results(results) 46 | 47 | if output_file is not None: 48 | write_results(output_file, results.copy()) 49 | 50 | if args.webhook_url is not None: 51 | send_webhook(args.webhook_url, results) 52 | 53 | return results 54 | -------------------------------------------------------------------------------- /graphinder/pool/tasks.py: -------------------------------------------------------------------------------- 1 | """Define tasks for the pool.""" 2 | 3 | import argparse 4 | import asyncio 5 | from typing import Set 6 | 7 | import aiohttp 8 | 9 | from graphinder.entities.tasks import Task, TasksList, TaskTags 10 | from graphinder.io.providers import gql_endpoints_characterizer 11 | from graphinder.pool.domain import Domain, Url 12 | from graphinder.utils.filters import remove_suffix 13 | 14 | 15 | def generate_scripts_tasks(domain: Domain) -> TasksList: 16 | """Generate scripts tasks.""" 17 | 18 | tasks: TasksList = [] 19 | 20 | for subdomain in domain.subdomains: 21 | if subdomain: 22 | tasks.append(Task(domain.url, TaskTags.FETCH_PAGE_SCRIPTS, f'http://{subdomain}/')) 23 | 24 | domain.logger.debug(f'{len(tasks)} scripts tasks generated.') 25 | return tasks 26 | 27 | 28 | def generate_bruteforce_tasks(domain: Domain) -> TasksList: 29 | """Generate bruteforce tasks.""" 30 | 31 | tasks: TasksList = [] 32 | 33 | for subdomain in domain.subdomains: 34 | if subdomain: 35 | for directory in gql_endpoints_characterizer(): 36 | url: str = f'http://{remove_suffix(subdomain, "/graphql").rstrip("/")}' + '/' + directory 37 | tasks.append(Task(domain.url, TaskTags.FETCH_ENDPOINT, url)) 38 | 39 | domain.logger.debug(f'{len(tasks)} bruteforce tasks generated.') 40 | return tasks 41 | 42 | 43 | def generate_tasks( 44 | domain: Domain, 45 | args: argparse.Namespace, 46 | ) -> TasksList: 47 | """Generate tasks depending on settings.""" 48 | 49 | tasks: TasksList = [] 50 | 51 | if not args.no_script_mode: 52 | tasks += generate_scripts_tasks(domain) 53 | 54 | if not args.no_bruteforce_mode: 55 | tasks += generate_bruteforce_tasks(domain) 56 | 57 | domain.logger.info(f'{len(tasks)} tasks generated.') 58 | return tasks 59 | 60 | 61 | def init_domain_tasks( 62 | domain: Domain, 63 | args: argparse.Namespace, 64 | ) -> TasksList: 65 | """Init domain tasks.""" 66 | 67 | domain.fetch_subdomains(args.reduce_mode) 68 | return generate_tasks(domain, args) 69 | 70 | 71 | async def add_tasks( 72 | domain: Domain, 73 | urls: Set[Url], 74 | tag: TaskTags, 75 | ) -> None: 76 | """Add tasks.""" 77 | 78 | for url in urls: 79 | asyncio.create_task(process_task(Task(domain.url, tag, url), domain)) 80 | 81 | 82 | async def process_task( 83 | task: Task, 84 | domain: Domain, 85 | ) -> None: 86 | """Process task.""" 87 | 88 | # precision_mode: Lock the task semaphore for the given domain. 89 | if domain.semaphore: 90 | await domain.semaphore.acquire() 91 | 92 | # Prevent fetching the same URL twice. 93 | domain.already_fetched.add(task.url) 94 | 95 | # process task using the correct method. 96 | if task.tag == TaskTags.FETCH_SCRIPT: 97 | _urls = await domain.fetch_script(task.url) 98 | if _urls: 99 | await add_tasks(domain, _urls, TaskTags.FETCH_ENDPOINT) 100 | elif task.tag == TaskTags.FETCH_PAGE_SCRIPTS: 101 | _urls = await domain.fetch_page_scripts(task.url) 102 | if _urls: 103 | await add_tasks(domain, _urls, TaskTags.FETCH_SCRIPT) 104 | elif task.tag == TaskTags.FETCH_ENDPOINT: 105 | await domain.fetch_endpoint(task.url) 106 | else: 107 | raise NotImplementedError() 108 | 109 | # precision_mode: Release semaphore for the given domain. 110 | if domain.semaphore: 111 | domain.semaphore.release() 112 | 113 | 114 | async def consume_tasks( 115 | tasks: TasksList, 116 | domain: Domain, 117 | ) -> Set[Url]: 118 | """Consume tasks.""" 119 | 120 | connector = aiohttp.TCPConnector(limit=100, ttl_dns_cache=600) 121 | domain.session = aiohttp.ClientSession(connector=connector) 122 | 123 | await asyncio.gather(*[process_task(task, domain) for task in tasks]) 124 | 125 | await domain.session.close() 126 | 127 | return domain.results 128 | -------------------------------------------------------------------------------- /graphinder/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Escape-Technologies/graphinder/3478723764e111296138be4c7a91a39787116834/graphinder/py.typed -------------------------------------------------------------------------------- /graphinder/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """Utils init.""" 2 | -------------------------------------------------------------------------------- /graphinder/utils/assets.py: -------------------------------------------------------------------------------- 1 | """Assets utils.""" 2 | 3 | import os 4 | import platform 5 | import urllib.request 6 | import zipfile 7 | 8 | from graphinder.utils.logger import get_logger 9 | 10 | 11 | def _compose_subfinder_url( 12 | _os: str = platform.system().lower(), 13 | _processor: str = platform.processor(), 14 | ) -> str: 15 | """Compose the subfinder url.""" 16 | 17 | base_url: str = 'https://github.com/projectdiscovery/subfinder/releases/download/v2.5.1/subfinder_2.5.1_' 18 | 19 | if 'linux' in _os: 20 | base_url += 'linux' 21 | elif 'darwin' in _os: 22 | base_url += 'macOS' 23 | elif 'win' in _os: 24 | base_url += 'windows' 25 | else: 26 | raise NotImplementedError('OS not supported.') 27 | 28 | if '386' in _processor: 29 | base_url += '_386' 30 | elif 'arm' in _processor: 31 | base_url += '_arm64' 32 | else: 33 | base_url += '_amd64' 34 | 35 | return base_url + '.zip' 36 | 37 | 38 | def _extract_file(file_path: str) -> None: 39 | """Extract file depending on his extension.""" 40 | 41 | with zipfile.ZipFile(file_path, 'r') as zip_ref: 42 | zip_ref.extractall('.') 43 | 44 | os.chmod('subfinder', 0o755) 45 | 46 | 47 | def fetch_assets() -> None: 48 | """Fetches the assets.""" 49 | logger = get_logger() 50 | 51 | if os.path.exists('subfinder'): 52 | logger.debug('subfinder present, skipping.') 53 | return 54 | 55 | subfinder_url = _compose_subfinder_url() 56 | 57 | logger.info('downloading subfinder...') 58 | urllib.request.urlretrieve(subfinder_url, 'subfinder.zip') 59 | 60 | logger.info('extracting subfinder...') 61 | _extract_file('subfinder.zip') 62 | 63 | logger.info('removing subfinder archive...') 64 | os.remove('subfinder.zip') 65 | -------------------------------------------------------------------------------- /graphinder/utils/filters.py: -------------------------------------------------------------------------------- 1 | """All filters functions.""" 2 | 3 | import re 4 | from typing import Dict, List, Optional, Set 5 | 6 | from graphinder.entities.pool import Url 7 | from graphinder.io.providers import gql_endpoints_characterizer 8 | from graphinder.utils.logger import get_logger 9 | 10 | 11 | def filter_common(urls: Set[str]) -> Set[str]: 12 | """Remove commonly found urls in javascript files of a webpage such as w3.org.""" 13 | 14 | common_strings = [ 15 | 'w3.org', 16 | 'localhost', 17 | 'schema.org', 18 | 'sentry.io', 19 | 'git.io', 20 | 'github.com', 21 | 'nuxtjs.org', 22 | 'momentjs.com', 23 | 'fb.me', 24 | 'reactjs.org', 25 | 'slack', 26 | 'jquery', 27 | 'google', 28 | 'twitter', 29 | 'elastic.co', 30 | 'formatjs.io' 31 | 'icann.org' #TODO: find more of those 32 | ] 33 | 34 | urls_filtered = urls.copy() 35 | for url in urls: 36 | if '://a' in url and url.endswith('a'): 37 | urls_filtered.remove(url) 38 | elif '://x' in url and url.endswith('x'): 39 | urls_filtered.remove(url) 40 | elif any(common in url for common in common_strings): 41 | urls_filtered.remove(url) 42 | 43 | return urls_filtered 44 | 45 | 46 | def filter_urls(urls: Set[Url]) -> Set[Url]: 47 | """Remove urls that are not valid.""" 48 | 49 | # We will re-populate the list of endpoints, sorted in len order to unpack them. 50 | _endpoints: List[str] = gql_endpoints_characterizer() 51 | _endpoints.sort(key=len, reverse=True) 52 | 53 | # Let's unpack the list of endpoints. 54 | unpacked_urls: Dict[str, List[Url]] = {} 55 | for url in urls: 56 | for endpoint in _endpoints: 57 | if url.endswith(endpoint): 58 | 59 | unpacked_url = remove_suffix(url, endpoint) 60 | if unpacked_url not in unpacked_urls: 61 | unpacked_urls[unpacked_url] = [] 62 | 63 | unpacked_urls[unpacked_url].append(url) 64 | 65 | break 66 | 67 | # Reconstruct the list of endpoints. 68 | # Attempt to find a full /graphql path. 69 | # Otherwise, use the smaller one. 70 | filtered_urls: Set[Url] = set() 71 | for base_url, _urls in unpacked_urls.items(): 72 | 73 | default_match: bool = False 74 | for _url in _urls: 75 | if _url[len(base_url):] == 'graphql': 76 | filtered_urls.add(_url) 77 | default_match = True 78 | 79 | break 80 | 81 | if not default_match: 82 | filtered_urls.add(min(_urls, key=len)) 83 | 84 | return filtered_urls 85 | 86 | 87 | def remove_duplicate_domains(domains: List[str]) -> List[str]: 88 | """if domains has example.com and www.example.com this will remove www.example.com.""" 89 | 90 | corrected_domains: List[str] = [] 91 | for domain in domains: 92 | if domain.startswith('www.'): 93 | if domain.lstrip('www.') in domains: 94 | continue 95 | corrected_domains.append(domain) 96 | 97 | return corrected_domains 98 | 99 | 100 | def transform_url_in_domain(url: str) -> Optional[str]: 101 | """Transform a given url in domain. 102 | 103 | http(s)://(www.) 104 | """ 105 | 106 | if 'https://' in url or 'http://' in url: # here the url can even ben contained in a string it will still work (e.g. csv) 107 | search = re.search(r'(?:https?://(?:www.)?(?P[^\s/]+)/?)', url) 108 | if search is not None: 109 | return search.group('url') 110 | 111 | get_logger().error(f'{ url } does not contain any valid domain') 112 | return None 113 | 114 | # here the url is already a domain name 115 | return url.replace('www.', '').split('/')[0] 116 | 117 | 118 | def remove_suffix(input_string: str, suffix: str) -> str: 119 | """Pre 3.9: https://docs.python.org/3/library/stdtypes.html#str.removesuffix.""" 120 | 121 | if suffix and input_string.endswith(suffix): 122 | return input_string[:-len(suffix)] 123 | return input_string 124 | -------------------------------------------------------------------------------- /graphinder/utils/finders.py: -------------------------------------------------------------------------------- 1 | """All finder utils used by extractors.""" 2 | 3 | import re 4 | from typing import List 5 | 6 | 7 | def find_script_full_urls(script_file: str) -> List[str]: 8 | """Extract full urls from script file.""" 9 | 10 | return re.findall(r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+', script_file) 11 | 12 | 13 | def find_script_window_base_urls( 14 | domain: str, 15 | script_file: str, 16 | ) -> List[str]: 17 | """Extract window.__BASE_URL__ urls from script file. 18 | 19 | - window.__BASE_URL__ +"/graphql" 20 | - window.__BASE_URL__+"/api/graphql" 21 | - window.__BASE_URL__ + "/api/v1/graphql" 22 | """ 23 | 24 | urls: List[str] = re.findall(r'window.__BASE_URL__ ?\+ ?\"\S{0,15}/graphql\"+', script_file) 25 | 26 | # Replace window.__BASE_URL__ with domain and strip ending `"` 27 | return [domain + url[url.find('"') + 1:-1] for url in urls] 28 | 29 | 30 | def find_script_fetch_graphql( 31 | domain: str, 32 | script_file: str, 33 | ) -> List[str]: 34 | """Extract potential fetch/axios("/graphql") urls from script file. 35 | 36 | - ("/graphql" 37 | - ("/api/graphql" 38 | - ("/api/v1/graphql" 39 | """ 40 | 41 | urls: List[str] = re.findall(r'\(\"\S{0,15}/graphql\"+', script_file) 42 | 43 | # Remove starting `("` and ending `"` 44 | return [domain + url[2:-1] for url in urls] 45 | -------------------------------------------------------------------------------- /graphinder/utils/logger.py: -------------------------------------------------------------------------------- 1 | """Utilities functions that are needed but re-usable in any projects.""" 2 | 3 | import logging 4 | import warnings 5 | from typing import Optional 6 | 7 | 8 | def disable_internal_loggers() -> None: 9 | """Disable internal loggers.""" 10 | 11 | logging.getLogger('asyncio').setLevel(logging.ERROR) 12 | warnings.simplefilter('ignore') 13 | 14 | 15 | def setup_logger( 16 | verbose_mode: bool = False, 17 | quiet_mode: bool = False, 18 | logger: Optional[logging.Logger] = None, 19 | ) -> logging.Logger: 20 | """Setup logger.""" 21 | 22 | disable_internal_loggers() 23 | 24 | log_level: int = logging.DEBUG if verbose_mode else logging.INFO 25 | if quiet_mode: 26 | log_level = logging.ERROR 27 | 28 | if logger: 29 | logger = get_logger() 30 | logger.setLevel(log_level) 31 | return logger 32 | 33 | log_format: str = '%(asctime)s,%(msecs)04d - %(levelname)s - %(name)s - %(message)s' 34 | 35 | logging.basicConfig(level=log_level, datefmt='%H:%M:%S', format=log_format) 36 | 37 | logging.addLevelName(logging.DEBUG, '\x1b[32;1mDBG\x1b[0m') 38 | logging.addLevelName(logging.INFO, '\x1b[37;1mINF\x1b[0m') 39 | logging.addLevelName(logging.WARNING, '\x1b[33;1mWRN\x1b[0m') 40 | logging.addLevelName(logging.ERROR, '\x1b[31;1mERR\x1b[0m') 41 | 42 | return get_logger() 43 | 44 | 45 | def get_logger() -> logging.Logger: 46 | """Get logger for specified module.""" 47 | 48 | return logging.getLogger('graphinder') 49 | -------------------------------------------------------------------------------- /graphinder/utils/webhook.py: -------------------------------------------------------------------------------- 1 | """Discord webhook utils.""" 2 | 3 | import random 4 | from typing import Any, Dict 5 | 6 | import requests 7 | 8 | from graphinder.entities.io import Results 9 | 10 | 11 | def format_webhook(results: Results) -> dict: 12 | """Format embeds for webhook.""" 13 | 14 | base: Dict[str, Any] = { 15 | 'username': 'Graphinder', 16 | 'embeds': [], 17 | } 18 | 19 | for domain, urls in results.items(): 20 | base['embeds'].append({ 21 | 'title': domain, 22 | 'description': '\n'.join(urls), 23 | 'color': random.randint(0, 16777215), 24 | }) 25 | 26 | return base 27 | 28 | 29 | def send_webhook( 30 | webhook_url: str, 31 | results: Results, 32 | ) -> bool: 33 | """Send discord webhook.""" 34 | 35 | body = format_webhook(results) 36 | 37 | r = requests.post(url=webhook_url, json=body, timeout=5) 38 | 39 | return r.status_code == 204 40 | -------------------------------------------------------------------------------- /install-dev.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Check if pip is installed, and install it if necessary 4 | echo "---- Updating pip ----" 5 | curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py 6 | python get-pip.py 7 | echo "---- Pip updated ----" 8 | rm get-pip.py 9 | if ! pip --version > /dev/null 2>&1; then 10 | echo "Installing pip..." 11 | curl -sSL https://bootstrap.pypa.io/get-pip.py | python 12 | fi 13 | echo "---- Installing Python Poetry ----" 14 | echo "---- Updating pip ----" 15 | pip install -U pip 16 | pip install -U poetry 17 | poetry config virtualenvs.in-project true 18 | 19 | echo "---- Installing Python dependencies ----" 20 | poetry install 21 | 22 | echo "\n\n\n\n\n---- Git hooks init (using mookme) ----" 23 | npm install 24 | npx mookme init --only-hook --skip-types-selection 25 | 26 | echo "\n\n\n\n\n---- Your working directory is all set :) ----" 27 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "@commitlint/cli": "^12.1.4", 4 | "@commitlint/config-angular": "^12.1.4", 5 | "@escape.tech/mookme": "^2.2.0-beta.2" 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "graphinder" 3 | version = "2.0.0b4" 4 | description = "Escape Graphinder" 5 | authors = ["Escape Technologies SAS "] 6 | maintainers = [ 7 | "Karim Rustom ", 8 | "Antoine Carossio ", 9 | "Swan " 10 | ] 11 | license = "MIT" 12 | packages = [ 13 | { include = "graphinder" } 14 | ] 15 | readme = "README.md" 16 | "homepage" = "https://escape.tech/" 17 | "repository" = "https://github.com/Escape-Technologies/graphinder" 18 | 19 | [tool.poetry.urls] 20 | "Bug Tracker" = "https://github.com/Escape-Technologies/graphinder/issues" 21 | 22 | [tool.poetry.scripts] 23 | graphinder = 'graphinder:cli' 24 | 25 | [tool.coverage.run] 26 | omit = [ 27 | 'graphinder/__main__.py', 28 | 'tests/*' 29 | ] 30 | 31 | [tool.pytest.ini_options] 32 | asyncio_mode = 'strict' 33 | 34 | [tool.poetry.dependencies] 35 | aiohttp = {extras = ["speedups"], version = "^3.8.1"} 36 | beautifulsoup4 = ">=4,<5" 37 | python = ">=3.10,<4.0" 38 | requests = "^2.27.1" 39 | 40 | [tool.poetry.dev-dependencies] 41 | autoflake = "^2.0" 42 | docformatter = "^1.5" 43 | isort = "^5.10.1" 44 | mypy = "^0.982" 45 | poetryup = "^0.12.3" 46 | pylint = "^2.15.9" 47 | pylint-quotes = "^0.2.3" 48 | pytest = "^7.2.0" 49 | pytest-asyncio = "^0.20.2" 50 | pytest-cov = "^4.0.0" 51 | pytest-mock = "^3.10.0" 52 | pytest-rerunfailures = "^10.3" 53 | setuptools = ">=50" 54 | snakeviz = "^2.1.1" 55 | types-PyYAML = "^6.0.11" 56 | types-requests = "^2.28.11" 57 | types-setuptools = "^67.6.0" 58 | unify = "^0.5" 59 | vulture = "^2.6" 60 | yapf = "^0.32.0" 61 | 62 | [build-system] 63 | requires = ["poetry-core>=1.0.0"] 64 | build-backend = "poetry.core.masonry.api" 65 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Escape-Technologies/graphinder/3478723764e111296138be4c7a91a39787116834/tests/__init__.py -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Escape-Technologies/graphinder/3478723764e111296138be4c7a91a39787116834/tests/unit/__init__.py -------------------------------------------------------------------------------- /tests/unit/entities/test_errors.py: -------------------------------------------------------------------------------- 1 | """Test entities/errors.py.""" 2 | 3 | from asyncio import TimeoutError as AsyncioTimeoutError 4 | from socket import gaierror 5 | 6 | from aiohttp.client_exceptions import ClientError 7 | 8 | from graphinder.entities.errors import AwaitableRequestException 9 | 10 | 11 | def test_awaitable_exception_type() -> None: 12 | """Test AwaitableRequestException type.""" 13 | 14 | assert len(AwaitableRequestException) == 6 15 | 16 | assert UnicodeError in AwaitableRequestException 17 | assert ValueError in AwaitableRequestException 18 | assert ClientError in AwaitableRequestException 19 | assert AsyncioTimeoutError in AwaitableRequestException 20 | assert gaierror in AwaitableRequestException 21 | assert UnicodeError in AwaitableRequestException 22 | -------------------------------------------------------------------------------- /tests/unit/entities/test_io.py: -------------------------------------------------------------------------------- 1 | """Test entities/io.py.""" 2 | 3 | from graphinder.entities.io import Results 4 | from graphinder.entities.pool import Url 5 | 6 | 7 | def test_results_type() -> None: 8 | """Test Results type.""" 9 | 10 | r: Results = {'domain': set()} 11 | r['domain'].add(Url('https://example.com')) 12 | 13 | assert len(r) == 1 14 | assert len(r['domain']) == 1 15 | -------------------------------------------------------------------------------- /tests/unit/entities/test_pool.py: -------------------------------------------------------------------------------- 1 | """Test entities/pool.py.""" 2 | 3 | from graphinder.entities.pool import Url 4 | 5 | 6 | def test_url_type() -> None: 7 | """Test Url type.""" 8 | 9 | url: Url = Url('https://example.com') 10 | 11 | assert isinstance(url, str) 12 | -------------------------------------------------------------------------------- /tests/unit/entities/test_task.py: -------------------------------------------------------------------------------- 1 | """Test entities/tasks.py.""" 2 | 3 | from graphinder.entities.tasks import Task, TaskTags 4 | 5 | 6 | # pylint: disable=no-member,protected-access 7 | def test_tasktags() -> None: 8 | """TaskTags test.""" 9 | 10 | assert len(TaskTags) == 3, 'Excepted 3 different task tags. Please update this test.' 11 | 12 | assert 0 in TaskTags._value2member_map_ 13 | assert 1 in TaskTags._value2member_map_ 14 | assert 2 in TaskTags._value2member_map_ 15 | 16 | 17 | def test_task() -> None: 18 | """Task test.""" 19 | 20 | task: Task = Task('https://example.com', TaskTags.FETCH_SCRIPT, 'https://example.com/script.js') 21 | 22 | assert task.domain_url == 'https://example.com' 23 | assert task.tag == TaskTags.FETCH_SCRIPT 24 | assert task.url == 'https://example.com/script.js' 25 | -------------------------------------------------------------------------------- /tests/unit/io/test_printers.py: -------------------------------------------------------------------------------- 1 | """Test io/printers.py.""" 2 | 3 | from typing import Any 4 | 5 | from graphinder.entities.io import Results 6 | from graphinder.entities.pool import Url 7 | from graphinder.io.printers import display_results 8 | 9 | 10 | def test_display_results(capsys: Any) -> None: 11 | """display_results test.""" 12 | 13 | results: Results = { 14 | 'example.com': {Url('http://example.com/')}, 15 | 'example.org': {Url('http://example.org/')}, 16 | } 17 | display_results(results) 18 | 19 | assert capsys.readouterr().out == 'example.com - 1\n\thttp://example.com/\nexample.org - 1\n\thttp://example.org/\n' 20 | -------------------------------------------------------------------------------- /tests/unit/io/test_providers.py: -------------------------------------------------------------------------------- 1 | """Test io/providers.py.""" 2 | 3 | from graphinder.io.providers import gql_endpoints_characterizer 4 | 5 | 6 | def test_gql_endpoints_characterizer() -> None: 7 | """gql_endpoints_characterizer test.""" 8 | 9 | endpoints = gql_endpoints_characterizer() 10 | 11 | assert len(endpoints) == len(set(endpoints)), 'There should be no duplicates.' 12 | assert len(endpoints) == 23 + (2*8), 'There should be no more endpoints. Please update the test if you added more.' 13 | -------------------------------------------------------------------------------- /tests/unit/io/test_readers.py: -------------------------------------------------------------------------------- 1 | """Test io/readers.py.""" 2 | 3 | from typing import List 4 | 5 | from graphinder.io.readers import read_domains 6 | from graphinder.pool.domain import Domain 7 | from graphinder.utils.logger import setup_logger 8 | 9 | 10 | def test_read_domains_input_domain() -> None: 11 | """read_domains test with input domain.""" 12 | 13 | out: List[Domain] = read_domains(None, 'example.com') 14 | 15 | assert len(out) == 1 16 | assert out[0].url == 'example.com' 17 | 18 | 19 | def test_read_domains_wrong_input_file() -> None: 20 | """read_domains test with wrong input file.""" 21 | 22 | try: 23 | _: List[Domain] = read_domains(None, None) 24 | except AttributeError: 25 | pass 26 | 27 | 28 | def test_read_domains_input_file() -> None: 29 | """read_domains test with input file.""" 30 | 31 | setup_logger(False) 32 | 33 | with open('tests/unit/io/test_readers.txt', 'r', encoding='utf-8') as input_file: 34 | out: List[Domain] = read_domains(input_file, None) 35 | 36 | str_out = set(domain.url for domain in out) 37 | 38 | assert {'example.com', 'example.org', 'example.fr'} == str_out 39 | -------------------------------------------------------------------------------- /tests/unit/io/test_readers.txt: -------------------------------------------------------------------------------- 1 | example.org 2 | example.com 3 | example.com/ 4 | www.example.com 5 | www.example.com/hello 6 | https://example.com 7 | http://example.com 8 | http://example.com/ 9 | https://example.com/ 10 | https://example.com/hello 11 | http://example.com/hello 12 | https://example.com/hello/ 13 | http://example.com/hello/ 14 | https://example.com/hello?query=false 15 | http://example.com/hello?query=false 16 | https://www.example.com 17 | http://www.example.com 18 | My super compagny,http://example.com/hello?query=false 19 | My super compagny,http://example.fr/hello?query=false, useless comment -------------------------------------------------------------------------------- /tests/unit/io/test_writers.py: -------------------------------------------------------------------------------- 1 | """Test io/writter.py.""" 2 | 3 | import json 4 | import os 5 | 6 | from graphinder.entities.io import Results 7 | from graphinder.entities.pool import Url 8 | from graphinder.io.writers import ResultEncoder, write_results 9 | 10 | 11 | def test_result_encoder() -> None: 12 | """ResultEncoder test with wrong structure.""" 13 | 14 | r = ResultEncoder() 15 | 16 | r.default(set()) 17 | 18 | try: 19 | r.default(list()) 20 | assert False, 'ResultEncoder should raise an exception.' 21 | except NotImplementedError: 22 | pass 23 | 24 | 25 | def test_write_results() -> None: 26 | """write_results test.""" 27 | 28 | results: Results = { 29 | 'example.com': {Url('http://example.com/')}, 30 | 'example.org': {Url('http://example.org/')}, 31 | } 32 | 33 | with open('test_write_results.json', 'w', encoding='utf-8') as output_file: 34 | write_results(output_file, results) 35 | 36 | with open('test_write_results.json', 'r', encoding='utf-8') as output_file: 37 | results_from_file = json.load(output_file) 38 | 39 | for result in results_from_file.copy(): 40 | results_from_file[result] = set(results_from_file[result]) 41 | 42 | assert results_from_file == results 43 | assert os.path.isfile('test_write_results.json') 44 | 45 | os.remove('test_write_results.json') 46 | -------------------------------------------------------------------------------- /tests/unit/pool/test_detectors.py: -------------------------------------------------------------------------------- 1 | """Test pool/detectors.py.""" 2 | 3 | import aiohttp 4 | import pytest 5 | 6 | from graphinder.pool.detectors import _look_like_graphql_url, is_gql_endpoint 7 | 8 | 9 | def test_look_like_graphql_url() -> None: 10 | """_look_like_graphql_url test.""" 11 | 12 | assert _look_like_graphql_url('https://example.com') == (False, None) 13 | assert _look_like_graphql_url('https://example.com/graphql') == (True, 'graphql') 14 | 15 | 16 | @pytest.mark.asyncio 17 | async def test_is_gql_endpoint() -> None: 18 | """is_gql_endpoint test.""" 19 | 20 | async with aiohttp.ClientSession() as session: 21 | 22 | assert not (await is_gql_endpoint( 23 | 'https://example.com', 24 | session=session, 25 | ))[0] 26 | assert (await is_gql_endpoint( 27 | 'https://gontoz.escape.tech', 28 | session=session, 29 | ))[0] 30 | -------------------------------------------------------------------------------- /tests/unit/pool/test_domain.py: -------------------------------------------------------------------------------- 1 | """Test pool/domain.py.""" 2 | 3 | from typing import Set 4 | 5 | import aiohttp 6 | import pytest 7 | 8 | from graphinder.entities.pool import Url 9 | from graphinder.pool.domain import Domain 10 | from graphinder.utils.assets import fetch_assets 11 | from graphinder.utils.logger import setup_logger 12 | 13 | 14 | @pytest.mark.asyncio 15 | async def test_domain_class() -> None: 16 | """Domain class test.""" 17 | 18 | setup_logger(False) 19 | 20 | domain: Domain = Domain('example.com') 21 | domain.session = aiohttp.ClientSession() 22 | 23 | fetch_assets() 24 | 25 | domain.fetch_subdomains() 26 | assert len(domain.subdomains) == 100, 'There should be max 100 subdomain.' 27 | 28 | assert await domain.fetch_script('https://example.com') == set() 29 | await domain.session.close() 30 | assert domain.session.closed 31 | 32 | 33 | @pytest.mark.asyncio 34 | async def test_domain_class_2() -> None: 35 | """More domain class test.""" 36 | 37 | setup_logger(False) 38 | domain: Domain = Domain('example2.com') 39 | domain.session = aiohttp.ClientSession() 40 | 41 | res: Set[Url] = await domain.fetch_script('https://cdn.jsdelivr.net/npm/graphql-playground-react/build/static/js/middleware.js') 42 | assert len(res) == 13 43 | 44 | res = await domain.fetch_page_scripts('https://gontoz.escape.tech/') 45 | assert len(res) == 0 46 | 47 | await domain.fetch_endpoint('https://gontoz.escape.tech/graphql') 48 | assert len(domain.results) == 1 49 | 50 | await domain.session.close() 51 | assert domain.session.closed 52 | -------------------------------------------------------------------------------- /tests/unit/pool/test_extractors.py: -------------------------------------------------------------------------------- 1 | """Test pool/extractors.py.""" 2 | -------------------------------------------------------------------------------- /tests/unit/pool/test_routine.py: -------------------------------------------------------------------------------- 1 | """Test pool/routine.py.""" 2 | 3 | 4 | def test_domain_routine() -> None: 5 | """domain_routine test.""" 6 | 7 | 8 | def test_process_pool() -> None: 9 | """domain_routine test.""" 10 | 11 | 12 | def test_main_routine() -> None: 13 | """main_routine test.""" 14 | -------------------------------------------------------------------------------- /tests/unit/pool/test_tasks.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=redefined-outer-name 2 | 3 | """Test pool/tasks.py.""" 4 | 5 | import argparse 6 | import asyncio 7 | 8 | import pytest 9 | 10 | from graphinder.entities.tasks import Task, TasksList, TaskTags 11 | from graphinder.io.providers import gql_endpoints_characterizer 12 | from graphinder.main import argument_builder 13 | from graphinder.pool.domain import Domain, Url 14 | from graphinder.pool.tasks import add_tasks, generate_bruteforce_tasks, generate_scripts_tasks, generate_tasks, init_domain_tasks, process_task 15 | from graphinder.utils.assets import fetch_assets 16 | 17 | 18 | @pytest.fixture 19 | def domain() -> Domain: 20 | """Domain fixture.""" 21 | 22 | _domain: Domain = Domain(Url('example.com')) 23 | _domain.subdomains = ['api.example.com', 'test.example.com'] 24 | 25 | return _domain 26 | 27 | 28 | def test_generate_scripts_tasks(domain: Domain) -> None: 29 | """generate_scripts_tasks test.""" 30 | 31 | tasks: TasksList = generate_scripts_tasks(domain) 32 | 33 | assert len(tasks) == 2, 'There should be 2 tasks.' 34 | 35 | 36 | def test_generate_bruteforce_tasks(domain: Domain) -> None: 37 | """generate_bruteforce_tasks test.""" 38 | 39 | tasks: TasksList = generate_bruteforce_tasks(domain) 40 | 41 | assert len(tasks) == 2 * len(gql_endpoints_characterizer()), 'There should be 2 * gql_endpoints_characterizer tasks.' 42 | 43 | 44 | def test_generate_tasks(domain: Domain) -> None: 45 | """generate_tasks test.""" 46 | 47 | args: argparse.Namespace = argument_builder([]) 48 | 49 | tasks: TasksList = generate_tasks(domain, args) 50 | assert len(tasks) == 2 * len(gql_endpoints_characterizer()) + 2, 'There should be 2 * gql_endpoints_characterizer + 2 tasks.' 51 | 52 | args = argument_builder(['--no-script']) 53 | tasks = generate_tasks(domain, args) 54 | assert len(tasks) == 2 * len(gql_endpoints_characterizer()), 'There should be 2 * gql_endpoints_characterizer tasks.' 55 | 56 | args = argument_builder(['--no-bruteforce']) 57 | tasks = generate_tasks(domain, args) 58 | assert len(tasks) == 2, 'There should be 2 tasks.' 59 | 60 | args = argument_builder(['--no-script', '--no-bruteforce']) 61 | tasks = generate_tasks(domain, args) 62 | assert len(tasks) == 0, 'There should be 0 tasks.' 63 | 64 | 65 | def test_init_domain_tasks(domain: Domain) -> None: 66 | """init_domain_tasks test.""" 67 | 68 | fetch_assets() 69 | 70 | tasks: TasksList = init_domain_tasks(domain, argument_builder([])) 71 | 72 | assert len(tasks) == 100 * len(gql_endpoints_characterizer()) + 100, 'There should be 100 * gql_endpoints_characterizer + 100 tasks.' 73 | 74 | 75 | @pytest.mark.asyncio 76 | async def test_add_tasks() -> None: 77 | """add_tasks test.""" 78 | 79 | assert len(asyncio.all_tasks()) == 1, 'There should be 1 tasks.' 80 | 81 | await add_tasks(Domain(Url('example.com')), {Url('http://example.com/')}, TaskTags.FETCH_PAGE_SCRIPTS) 82 | 83 | assert len(asyncio.all_tasks()) == 2, 'There should be 2 tasks.' 84 | 85 | 86 | @pytest.mark.asyncio 87 | async def test_process_task() -> None: 88 | """process_task test.""" 89 | 90 | assert len(asyncio.all_tasks()) == 1, 'There should be 1 tasks.' 91 | 92 | try: 93 | task = Task('example.com', 'unknown tag', 'example.com') # type: ignore[arg-type] 94 | await process_task(task, Domain('example.com')) 95 | 96 | assert False, 'Unknown tag should raise an error.' 97 | except NotImplementedError: 98 | pass 99 | 100 | 101 | @pytest.mark.asyncio 102 | async def test_consume_tasks() -> None: 103 | """consume_tasks test.""" 104 | -------------------------------------------------------------------------------- /tests/unit/test_main.py: -------------------------------------------------------------------------------- 1 | """Test main.py.""" 2 | 3 | import argparse 4 | 5 | import pkg_resources 6 | 7 | from graphinder.main import __version__, argument_builder, main, validate_arguments 8 | from graphinder.utils.logger import get_logger 9 | 10 | 11 | def test_version() -> None: 12 | """version test.""" 13 | assert __version__ == pkg_resources.get_distribution('graphinder').version, 'Version has been changed, please update the test.' 14 | 15 | 16 | def test_argument_builder() -> None: 17 | """argument_builder test.""" 18 | 19 | args: argparse.Namespace = argument_builder([]) 20 | 21 | assert args.domain is None 22 | assert not args.verbose_mode 23 | assert not args.no_script_mode 24 | assert not args.no_bruteforce_mode 25 | assert args.reduce_mode == 100 26 | 27 | args = argument_builder(['-d', 'example.com']) 28 | 29 | assert args.domain == 'example.com' 30 | 31 | args = argument_builder(['--no-bruteforce']) 32 | 33 | assert args.no_bruteforce_mode 34 | 35 | 36 | def test_validate_arguments() -> None: 37 | """validate_arguments test.""" 38 | 39 | logger = get_logger() 40 | args: argparse.Namespace = argument_builder([]) 41 | 42 | assert not validate_arguments(logger, args) 43 | 44 | args = argument_builder(['-d', 'example.com']) 45 | assert validate_arguments(logger, args) 46 | 47 | args = argument_builder(['--no-script', '--no-bruteforce']) 48 | assert not validate_arguments(logger, args) 49 | 50 | args = argument_builder(['-d', 'example.com', '--no-script', '--no-bruteforce']) 51 | assert not validate_arguments(logger, args) 52 | 53 | 54 | def test_main() -> None: 55 | """main test.""" 56 | 57 | assert not main([]) 58 | 59 | 60 | def test_full_run() -> None: 61 | """Test a complete run.""" 62 | 63 | assert main(['-d', 'example.com']) == {'example.com': set()} 64 | -------------------------------------------------------------------------------- /tests/unit/utils/test_assets.py: -------------------------------------------------------------------------------- 1 | """Test utils/assets.py.""" 2 | 3 | import os 4 | import urllib.request 5 | 6 | from graphinder.utils.assets import _compose_subfinder_url, _extract_file, fetch_assets 7 | 8 | 9 | def test_fetch_assets() -> None: 10 | """fetch_assets test.""" 11 | 12 | path: str = 'subfinder' 13 | 14 | if os.path.isfile(path): 15 | os.remove(path) 16 | 17 | fetch_assets() 18 | assert os.path.isfile(path) 19 | 20 | fetch_assets() 21 | assert os.path.isfile(path) 22 | 23 | if os.path.isfile(path): 24 | os.remove(path) 25 | 26 | 27 | def test_extract_file_zip() -> None: 28 | """_extract_file test for zip.""" 29 | 30 | name: str = 'subfinder' 31 | 32 | if os.path.isfile(name): 33 | os.remove(name) 34 | 35 | url = _compose_subfinder_url('linux', 'amd64') 36 | urllib.request.urlretrieve(url, f'{name}.zip') 37 | assert os.path.isfile(f'{name}.zip'), f'{name}.zip not found.' 38 | 39 | _extract_file(f'{name}.zip') 40 | 41 | assert os.path.isfile(name), f'{name} not found.' 42 | 43 | os.remove(f'{name}') 44 | os.remove(f'{name}.zip') 45 | 46 | 47 | def test_compose_subfinder_url() -> None: 48 | """_compose_subfinder_url test.""" 49 | 50 | assert _compose_subfinder_url('linux', 'amd64') == 'https://github.com/projectdiscovery/subfinder/releases/download/v2.5.1/subfinder_2.5.1_linux_amd64.zip' 51 | assert _compose_subfinder_url('darwin', 'amd64') == 'https://github.com/projectdiscovery/subfinder/releases/download/v2.5.1/subfinder_2.5.1_macOS_amd64.zip' 52 | assert _compose_subfinder_url('win', 'i386') == 'https://github.com/projectdiscovery/subfinder/releases/download/v2.5.1/subfinder_2.5.1_windows_386.zip' 53 | assert _compose_subfinder_url('darwin', 'arm') == 'https://github.com/projectdiscovery/subfinder/releases/download/v2.5.1/subfinder_2.5.1_macOS_arm64.zip' 54 | 55 | try: 56 | _compose_subfinder_url('unknown os', 'amd64') 57 | assert False, 'Expected NotImplementedError.' 58 | except NotImplementedError: 59 | pass 60 | -------------------------------------------------------------------------------- /tests/unit/utils/test_filters.py: -------------------------------------------------------------------------------- 1 | """Test utils/filters.py.""" 2 | 3 | from typing import List, Set 4 | 5 | import pytest 6 | 7 | from graphinder.entities.pool import Url 8 | from graphinder.io.providers import gql_endpoints_characterizer 9 | from graphinder.utils.filters import filter_common, filter_urls, remove_duplicate_domains, transform_url_in_domain 10 | 11 | 12 | def test_filter_common() -> None: 13 | """test for filter_common.""" 14 | 15 | _input: Set[str] = { 16 | 'http://a', 17 | 'http://x', 18 | 'https://w3.org', 19 | 'https://localhost', 20 | 'https://schema.org', 21 | 'https://sentry.io', 22 | 'https://git.io', 23 | 'https://github.com', 24 | 'https://nuxtjs.org', 25 | 'https://momentjs.com', 26 | 'https://fb.me', 27 | 'https://reactjs.org', 28 | 'https://slack', 29 | 'https://google', 30 | 'https://twitter', 31 | 'https://example.com', 32 | 'https://apps.example.com', 33 | 'https://www.example.com', 34 | 'https://example.com/graphql', 35 | 'https://example.com/api/graphql', 36 | 'https://example.com/api/v1/graphql', 37 | 'https://example.com/graphql', 38 | 'https://example.com/api/graphql', 39 | 'https://example.com/api/v1/graphql', 40 | } 41 | 42 | assert filter_common(_input) == { 43 | 'https://example.com', 'https://apps.example.com', 'https://www.example.com', 'https://example.com/graphql', 'https://example.com/api/graphql', 44 | 'https://example.com/api/v1/graphql', 'https://example.com/graphql', 'https://example.com/api/graphql', 'https://example.com/api/v1/graphql' 45 | } 46 | 47 | 48 | def test_filter_urls() -> None: 49 | """test for filter_urls.""" 50 | 51 | _input: Set[Url] = set() 52 | 53 | for url in gql_endpoints_characterizer(): 54 | _input.add(Url('https://example.com/' + url)) 55 | 56 | assert filter_urls(_input) == {Url('https://example.com/graphql')} 57 | 58 | 59 | def test_remove_duplicate_domains() -> None: 60 | """test for duplicate_domain.""" 61 | 62 | domains: List[str] = [ 63 | 'example.com', 64 | 'www.example.com', 65 | ] 66 | 67 | assert remove_duplicate_domains(domains) == [ 68 | 'example.com', 69 | ] 70 | 71 | 72 | @pytest.mark.parametrize('url,expected', [ 73 | ('https://example.com', 'example.com'), 74 | ('https://example.com/', 'example.com'), 75 | ]) 76 | def test_transform_url_in_domain( 77 | url: str, 78 | expected: str, 79 | ) -> None: 80 | """test for transform_url_in_domain.""" 81 | 82 | assert transform_url_in_domain(url) == expected 83 | -------------------------------------------------------------------------------- /tests/unit/utils/test_finders.py: -------------------------------------------------------------------------------- 1 | """Test utils/finders.py.""" 2 | 3 | from typing import List 4 | 5 | from graphinder.utils.finders import find_script_fetch_graphql, find_script_full_urls, find_script_window_base_urls 6 | 7 | 8 | def test_find_script_full_urls() -> None: 9 | """find_script_full_urls test.""" 10 | 11 | script_file: str = """ 12 | https://example.com 13 | https://apps.example.com 14 | https://www.example.com 15 | """ 16 | 17 | urls: List[str] = find_script_full_urls(script_file) 18 | 19 | assert urls == [ 20 | 'https://example.com', 21 | 'https://apps.example.com', 22 | 'https://www.example.com', 23 | ] 24 | 25 | 26 | def test_find_script_window_base_urls() -> None: 27 | """find_script_window_base_urls test.""" 28 | 29 | script_file: str = """ 30 | {var e=new ze({uri:window.__BASE_URL__+"/graphql",credentials:"same-origin"} 31 | window.__BASE_URL__+"/api/graphql" 32 | window.__BASE_URL__ + "/api/v1/graphql" 33 | """ 34 | 35 | urls: List[str] = find_script_window_base_urls('https://example.com', script_file) 36 | 37 | assert urls == [ 38 | 'https://example.com/graphql', 39 | 'https://example.com/api/graphql', 40 | 'https://example.com/api/v1/graphql', 41 | ] 42 | 43 | 44 | def test_find_script_fetch_graphql() -> None: 45 | """find_script_fetch_graphql test.""" 46 | 47 | script_file: str = """ 48 | function s(e){return e.options.siteId?fetch("/graphql",{method:"POST",credentials:"same-origin",headers:{"Content-Type":"application/json"},body:JSON.stringify({query:"{ me { id ... 49 | fetch("/api/graphql") 50 | fetch("/api/v1/graphql") 51 | """ 52 | 53 | urls: List[str] = find_script_fetch_graphql('https://example.com', script_file) 54 | 55 | assert urls == [ 56 | 'https://example.com/graphql', 57 | 'https://example.com/api/graphql', 58 | 'https://example.com/api/v1/graphql', 59 | ] 60 | -------------------------------------------------------------------------------- /tests/unit/utils/test_logger.py: -------------------------------------------------------------------------------- 1 | """Test utils/logger.py.""" 2 | 3 | import logging 4 | from typing import Any 5 | 6 | from graphinder.utils.logger import disable_internal_loggers, get_logger, setup_logger 7 | 8 | 9 | def test_get_logger() -> None: 10 | """get_logger test.""" 11 | 12 | logger: logging.Logger = get_logger() 13 | assert logger.name == 'graphinder' 14 | 15 | assert isinstance(logger, logging.Logger) 16 | 17 | 18 | def test_setup_logger(caplog: Any) -> None: 19 | """setup_logger test.""" 20 | 21 | caplog.set_level(0) 22 | 23 | logger: logging.Logger = setup_logger(False) 24 | assert logger.name == 'graphinder' 25 | 26 | logger.info('test info') 27 | logger.debug('test debug') 28 | 29 | assert 'test info' in caplog.text 30 | assert 'test debug' in caplog.text 31 | 32 | 33 | def test_disable_internal_loggers(caplog: Any) -> None: 34 | """disable_internal_loggers test.""" 35 | 36 | caplog.set_level(0) 37 | 38 | disable_internal_loggers() 39 | logger: logging.Logger = logging.getLogger('asyncio') 40 | 41 | logger.info('test info') 42 | 43 | assert 'test info' not in caplog.text 44 | -------------------------------------------------------------------------------- /tests/unit/utils/test_webhook.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=redefined-outer-name 2 | 3 | """Test for utils/webhook.py.""" 4 | 5 | import pytest 6 | from pytest_mock import MockerFixture 7 | 8 | from graphinder.entities.io import Results 9 | from graphinder.entities.pool import Url 10 | from graphinder.utils.webhook import format_webhook, send_webhook 11 | 12 | 13 | @pytest.fixture 14 | def result_one_domain() -> Results: 15 | """Return a Results object with one domain.""" 16 | 17 | return { 18 | 'example.com': { 19 | Url('http://www.example.com/graphql'), 20 | Url('http://admin.example.com/graphql'), 21 | } 22 | } 23 | 24 | 25 | @pytest.fixture 26 | def result_multiple_domain() -> Results: 27 | """Return a Results object with multiples domain.""" 28 | 29 | return { 30 | 'example.com': { 31 | Url('http://www.example.com/graphql'), 32 | Url('http://admin.example.com/graphql'), 33 | }, 34 | 'example2.com': { 35 | Url('http://www.example2.com/graphql'), 36 | Url('http://admin.example2.com/graphql'), 37 | } 38 | } 39 | 40 | 41 | def test_format_webhook_single(result_one_domain: Results) -> None: 42 | """Test for format_webhook.""" 43 | 44 | formatted = format_webhook(result_one_domain) 45 | 46 | assert formatted['username'] == 'Graphinder' 47 | assert 0 <= formatted['embeds'][0]['color'] <= 16777215 48 | assert formatted['embeds'][0]['title'] == 'example.com' 49 | assert formatted['embeds'][0]['description'] == '\n'.join(result_one_domain['example.com']) 50 | 51 | 52 | def test_format_webhook_multiple(result_multiple_domain: Results) -> None: 53 | """Test for format_webhook.""" 54 | 55 | formatted = format_webhook(result_multiple_domain) 56 | 57 | assert len(formatted['embeds']) == 2 58 | 59 | 60 | def test_send_webhook(mocker: MockerFixture, result_one_domain: Results) -> None: 61 | """Test for send_webhook.""" 62 | 63 | mocker.patch('requests.post', return_value=mocker.Mock(status_code=204)) 64 | 65 | url = 'http://mocked.com/webhook' 66 | 67 | assert send_webhook(url, result_one_domain) 68 | --------------------------------------------------------------------------------