├── .github
    ├── CODEOWNERS
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    ├── dependabot.yml
    └── workflows
    │   ├── cd.yaml
    │   └── ci.yaml
├── .gitignore
├── .hooks
    ├── commit-msg.json
    ├── partials
    │   └── .gitkeep
    ├── pre-commit.json
    └── shared
    │   └── .gitkeep
├── .mookme.json
├── .mypy.ini
├── .pylintrc
├── .style.yapf
├── .vscode
    ├── extensions.json
    └── settings.json
├── CONTRIBUTING.md
├── Dockerfile
├── LICENSE
├── README.md
├── commitlint.config.js
├── doc
    ├── banner.png
    ├── detector.drawio
    └── detector.jpg
├── docker-entrypoint.sh
├── graphinder
    ├── __init__.py
    ├── __main__.py
    ├── entities
    │   ├── __init__.py
    │   ├── errors.py
    │   ├── io.py
    │   ├── pool.py
    │   └── tasks.py
    ├── io
    │   ├── __init__.py
    │   ├── printers.py
    │   ├── providers.py
    │   ├── readers.py
    │   └── writers.py
    ├── main.py
    ├── pool
    │   ├── __init__.py
    │   ├── detectors.py
    │   ├── domain.py
    │   ├── extractors.py
    │   ├── routine.py
    │   └── tasks.py
    ├── py.typed
    └── utils
    │   ├── __init__.py
    │   ├── assets.py
    │   ├── filters.py
    │   ├── finders.py
    │   ├── logger.py
    │   └── webhook.py
├── install-dev.sh
├── package.json
├── poetry.lock
├── pyproject.toml
└── tests
    ├── __init__.py
    └── unit
        ├── __init__.py
        ├── entities
            ├── test_errors.py
            ├── test_io.py
            ├── test_pool.py
            └── test_task.py
        ├── io
            ├── test_printers.py
            ├── test_providers.py
            ├── test_readers.py
            ├── test_readers.txt
            └── test_writers.py
        ├── pool
            ├── test_detectors.py
            ├── test_domain.py
            ├── test_extractors.py
            ├── test_routine.py
            └── test_tasks.py
        ├── test_main.py
        └── utils
            ├── test_assets.py
            ├── test_filters.py
            ├── test_finders.py
            ├── test_logger.py
            └── test_webhook.py


/.github/CODEOWNERS:
--------------------------------------------------------------------------------
 1 | # Lines starting with '#' are comments.
 2 | # Each line is a file pattern followed by one or more owners.
 3 | 
 4 | # More details are here: https://help.github.com/articles/about-codeowners/
 5 | 
 6 | # The '*' pattern is global owners.
 7 | 
 8 | # Order is important. The last matching pattern has the most precedence.
 9 | # The folders are ordered as follows:
10 | 
11 | # In each subsection folders are ordered first by depth, then alphabetically.
12 | # This should make it easy to add new rules without breaking existing ones.
13 | 
14 | # Global rule:
15 | *           @c3b5aw
16 | 
17 | graphinder/ @iCarossio
18 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: "[BUG] "
 5 | labels: bug
 6 | assignees: c3b5aw
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Ran on '..'
16 | 2. Parameters '..'
17 | 
18 | **Expected behavior**
19 | A clear and concise description of what you expected to happen.
20 | 
21 | **Screenshots**
22 | If applicable, add screenshots to help explain your problem.
23 | 
24 | **Desktop (please complete the following information):**
25 |  - OS: [e.g. Ubuntu 20.04]
26 |  - Python Version: [e.g. 3.10.1]
27 |  - Graphinder version [e.g. 1.3.1]
28 | 
29 | **Additional context**
30 | Add any other context about the problem here.
31 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: enhancement
 6 | assignees: c3b5aw
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   # Maintain dependencies for GitHub Actions
 9 |   - package-ecosystem: "github-actions"
10 |     directory: "/"
11 |     schedule:
12 |       interval: "weekly"
13 | 
14 |   # Maintain dependencies for npm
15 |   - package-ecosystem: "pip"
16 |     directory: "/"
17 |     schedule:
18 |       interval: "weekly"
19 | 


--------------------------------------------------------------------------------
/.github/workflows/cd.yaml:
--------------------------------------------------------------------------------
 1 | name: CD
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - "v*"
 7 | 
 8 | jobs:
 9 |   release:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - name: Checkout
13 |         uses: actions/checkout@v3
14 |       - name: Release
15 |         uses: softprops/action-gh-release@v1
16 | 
17 |   pypi-push:
18 |     runs-on: ubuntu-latest
19 |     steps:
20 |       - uses: actions/checkout@v3
21 |       - uses: actions/setup-python@v4
22 |         with:
23 |           python-version: '3.10'
24 |       - name: Install dependencies
25 |         run: |
26 |           pip install poetry
27 |       - name: Setup poetry
28 |         run: |
29 |           poetry config virtualenvs.in-project true
30 |           poetry install --no-dev
31 |       - name: Build package
32 |         run: |
33 |           source .venv/bin/activate
34 |           poetry build
35 |       - name: Upload package
36 |         uses: pypa/gh-action-pypi-publish@release/v1
37 |         with:
38 |           user: __token__
39 |           password: ${{ secrets.PYPI_TOKEN }}
40 | 
41 |   docker-push:
42 |     runs-on: ubuntu-latest
43 |     steps:
44 |       - uses: actions/checkout@v3
45 |       - name: Set up QEMU
46 |         uses: docker/setup-qemu-action@v2
47 |       - name: Set up Docker Buildx
48 |         uses: docker/setup-buildx-action@v2
49 |       - name: Login to DockerHub
50 |         if: github.event_name != 'pull_request'
51 |         uses: docker/login-action@v2
52 |         with:
53 |           username: ${{ secrets.DOCKERHUB_USERNAME }}
54 |           password: ${{ secrets.DOCKERHUB_TOKEN }}
55 |           logout: true
56 |       - name: Docker meta
57 |         id: meta
58 |         uses: docker/metadata-action@v4
59 |         with:
60 |           images: ${{ secrets.DOCKERHUB_REPO}}/graphinder
61 |       - name: Build and push
62 |         uses: docker/build-push-action@v4
63 |         with:
64 |           push: ${{ github.event_name != 'pull_request' }}
65 |           context: .
66 |           platforms: linux/amd64,linux/arm64
67 |           tags: ${{ steps.meta.outputs.tags }}
68 |           labels: ${{ steps.meta.outputs.labels }}
69 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | on:
 3 |   push:
 4 |   pull_request:
 5 |     types: [opened, reopened]
 6 | 
 7 | env:
 8 |   MODULE_NAME: graphinder
 9 |   MIN_TEST_COV: 80
10 | 
11 | jobs:
12 |   CI:
13 |     runs-on: ubuntu-20.04
14 |     strategy:
15 |       fail-fast: false
16 |       matrix:
17 |         os: [ubuntu-latest]
18 |         python-version: ['3.10']
19 |     steps:
20 |       - uses: actions/checkout@v3
21 |       - uses: actions/setup-python@v4
22 |         with:
23 |           python-version: ${{ matrix.python-version }}
24 |       - name: Install dependencies
25 |         run: |
26 |           pip install poetry
27 |       - name: Setup poetry
28 |         run: |
29 |           poetry config virtualenvs.in-project true
30 |           poetry install
31 |       - name: Run lint
32 |         if: always()
33 |         run: |
34 |           source .venv/bin/activate
35 |           isort -m 9 --line-length 160 $MODULE_NAME tests --check-only
36 |           pylint --load-plugins pylint_quotes $MODULE_NAME tests
37 |           docformatter --wrap-summaries 160 --wrap-descriptions 160 -cr $MODULE_NAME tests
38 |           yapf -rd $MODULE_NAME tests
39 |           mypy -V
40 |           mypy $MODULE_NAME tests
41 |       - name: Run tests
42 |         run: |
43 |           source .venv/bin/activate
44 |           pytest --reruns=3 --cov=$MODULE_NAME --cov-report=xml --cov-fail-under=$MIN_TEST_COV tests/unit
45 |       - name: Upload coverage to Codecov
46 |         uses: codecov/codecov-action@v3
47 |         with:
48 |           token: ${{ secrets.CODECOV_TOKEN }}
49 |           files: ./coverage.xml
50 |           flags: unittests
51 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | ############################
  2 | ######### General ##########
  3 | ############################
  4 | 
  5 | # macOS
  6 | .DS_Store
  7 | .AppleDouble
  8 | .LSOverride
  9 | .DocumentRevisions-V100
 10 | .fseventsd
 11 | .Spotlight-V100
 12 | .TemporaryItems
 13 | .Trashes
 14 | .VolumeIcon.icns
 15 | .com.apple.timemachine.donotpresent
 16 | .AppleDB
 17 | .AppleDesktop
 18 | Network Trash Folder
 19 | Temporary Items
 20 | .apdisk
 21 | ._*
 22 | Icon
 23 | 
 24 | # Linux
 25 | *~
 26 | 
 27 | # Windows
 28 | Thumbs.db
 29 | ehthumbs.db
 30 | Desktop.ini
 31 | $RECYCLE.BIN/
 32 | *.cab
 33 | *.msi
 34 | *.msm
 35 | *.msp
 36 | 
 37 | # Packages
 38 | *.7z
 39 | *.csv
 40 | *.dat
 41 | *.dmg
 42 | *.gz
 43 | *.iso
 44 | *.jar
 45 | *.rar
 46 | *.tar
 47 | *.zip
 48 | *.com
 49 | *.class
 50 | *.dll
 51 | *.exe
 52 | *.o
 53 | *.seed
 54 | *.so
 55 | *.swo
 56 | *.swp
 57 | *.swn
 58 | *.swm
 59 | *.out
 60 | *.pid
 61 | *.prof
 62 | 
 63 | # Editors
 64 | .idea
 65 | *.suo
 66 | *.ntvs*
 67 | *.njsproj
 68 | *.sln
 69 | 
 70 | 
 71 | ############################
 72 | ######### Specific #########
 73 | ############################
 74 | 
 75 | # Python
 76 | __pycache__/
 77 | *.egg-info/
 78 | .ipynb_checkpoints
 79 | *.pyc
 80 | *.pyo
 81 | *.ipynb
 82 | *.env
 83 | .mypy_cache
 84 | .pytest_cache
 85 | .coverage
 86 | .venv
 87 | build/
 88 | dist/
 89 | setup.py
 90 | *.spec
 91 | snap
 92 | coverage.xml
 93 | 
 94 | # Logs
 95 | logs
 96 | *.log*
 97 | storage
 98 | 
 99 | # Mookme
100 | node_modules
101 | package-lock.json
102 | **/.hooks/*.local.json
103 | 
104 | # Escape
105 | .escaperc.json
106 | _debug/
107 | security-report.pdf
108 | .tmp
109 | 
110 | # Assets
111 | 
112 | subfinder
113 | results.json
114 | 


--------------------------------------------------------------------------------
/.hooks/commit-msg.json:
--------------------------------------------------------------------------------
1 | {
2 |     "steps": [{
3 |         "name": "commit lint",
4 |         "command": "cat {args} | ./node_modules/@commitlint/cli/cli.js"
5 |     }]
6 | }


--------------------------------------------------------------------------------
/.hooks/partials/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Escape-Technologies/graphinder/3478723764e111296138be4c7a91a39787116834/.hooks/partials/.gitkeep


--------------------------------------------------------------------------------
/.hooks/pre-commit.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "type": "python",
 3 |     "venvActivate": "./.venv/bin/activate",
 4 |     "steps": [
 5 |         {
 6 |             "name": "autoflake",
 7 |             "command": "autoflake -ri --remove-unused-variable --ignore-init-module-imports --remove-all-unused-imports graphinder tests"
 8 |         },
 9 |         {
10 |             "name": "isort",
11 |             "command": "isort -m 9 --line-length 160 graphinder tests"
12 |         },
13 |         {
14 |             "name": "unify",
15 |             "command": "unify -ri graphinder tests"
16 |         },
17 |         {
18 |             "name": "docformatter",
19 |             "command": "docformatter --wrap-summaries 160 --wrap-descriptions 160 -ri graphinder tests"
20 |         },
21 |         {
22 |             "name": "yapf",
23 |             "command": "yapf -ri graphinder tests"
24 |         },
25 |         {
26 |             "name": "pylint",
27 |             "command": "pylint --load-plugins pylint_quotes graphinder tests"
28 |         },
29 |         {
30 |             "name": "mypy",
31 |             "command": "mypy graphinder tests"
32 |         },
33 |         {
34 |             "name": "pytest",
35 |             "command": "pytest --reruns=3 tests/unit"
36 |         }
37 |     ]
38 | }
39 | 


--------------------------------------------------------------------------------
/.hooks/shared/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Escape-Technologies/graphinder/3478723764e111296138be4c7a91a39787116834/.hooks/shared/.gitkeep


--------------------------------------------------------------------------------
/.mookme.json:
--------------------------------------------------------------------------------
1 | {
2 |   "packagesPath": ".",
3 |   "packages": [],
4 |   "addedBehavior": "addAndCommit"
5 | }


--------------------------------------------------------------------------------
/.mypy.ini:
--------------------------------------------------------------------------------
 1 | [mypy]
 2 | python_version = 3.7
 3 | disallow_untyped_defs=True
 4 | disallow_untyped_calls=True
 5 | disallow_incomplete_defs=True
 6 | disallow_untyped_decorators=True
 7 | strict_equality=True
 8 | show_error_codes=True
 9 | warn_unreachable=True
10 | warn_redundant_casts=True
11 | warn_unused_ignores=True
12 | warn_unused_configs=True
13 | pretty=True
14 | exclude=(build|dist|setup.py|.venv)
15 | disable_error_code = attr-defined
16 | 
17 | [mypy-pydash.*]
18 | ignore_missing_imports = True
19 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
 1 | [MASTER]
 2 | init-hook="from pylint.config import find_pylintrc; import os, sys; sys.path.append(os.path.dirname(find_pylintrc()))"
 3 | ignore=.venv,setup.py
 4 | 
 5 | [DESIGN]
 6 | 
 7 | # Maximum number of characters on a single line.
 8 | max-line-length=160
 9 | # Good variable names which should always be accepted, separated by a comma
10 | good-names=i,e
11 | # Maximum number of branch for function / method body
12 | max-branches=15
13 | # Maximum number of arguments for function / method
14 | max-args=10
15 | # Variable naming style
16 | variable-rgx=([a-z_][a-z0-9_]{1,30}|[a-zA-Z0-9_]{1})$
17 | # Argument naming style
18 | argument-rgx=([a-z_][a-z0-9_]{1,30}|[a-zA-Z0-9_]{1})$
19 | # Attribute naming style
20 | attr-rgx=([a-z_][a-z0-9_]{1,30}|[a-zA-Z0-9_]{1})$
21 | # Maximum number of public methods per class
22 | max-public-methods=30
23 | # Maximum number of locals for function / method body
24 | max-locals=20
25 | # Make docstring compulsory for all functions
26 | no-docstring-rgx=$^ 
27 | 
28 | # MODULE-PARAM: pylint-quotes
29 | string-quote=single
30 | triple-quote=double
31 | docstring-quote=double
32 | 
33 | [ELIF]
34 | 
35 | # Maximum number of nested blocks for function/method body
36 | max-nested-blocks=8
37 | 
38 | 
39 | [SIMILARITIES]
40 | 
41 | # Minimum lines number of a similarity.
42 | min-similarity-lines=15
43 | # Ignore comments when computing similarities.
44 | ignore-comments=yes
45 | # Ignore docstrings when computing similarities.
46 | ignore-docstrings=yes
47 | # Ignore imports when computing similarities.
48 | ignore-imports=yes
49 | 
50 | 
51 | [MESSAGES CONTROL]
52 | 
53 | #TODO: DEV - Fix these one day: broad-except, fixme
54 | disable=logging-format-interpolation, logging-fstring-interpolation, broad-except, fixme
55 | extension-pkg-whitelist=lxml
56 | 


--------------------------------------------------------------------------------
/.style.yapf:
--------------------------------------------------------------------------------
  1 | [style]
  2 | ##################################################
  3 | ##################################################
  4 | ################### ESCAPE KOBS ##################
  5 | ##################################################
  6 | ##################################################
  7 | 
  8 | # The column limit.
  9 | column_limit=160
 10 | 
 11 | # Split before the closing bracket if a list or dict literal doesn't fit on a single line.
 12 | split_before_closing_bracket=False
 13 | 
 14 | # Split before a dictionary or set generator (comp_for). For example, note
 15 | # the split before the 'for':
 16 | #
 17 | #   foo = {
 18 | #       variable: 'Hello world, have a nice day!'
 19 | #       for variable in bar if variable != 42
 20 | #   }
 21 | split_before_dict_set_generator=False
 22 | 
 23 | # Split named assignments onto individual lines.
 24 | split_before_named_assigns=False
 25 | 
 26 | # Allow splitting before a default / named assignment in an argument list.
 27 | allow_split_before_default_or_named_assigns=False
 28 | 
 29 | # Allow splits before the dictionary value.
 30 | allow_split_before_dict_value=True
 31 | 
 32 | #   Let spacing indicate operator precedence. For example:
 33 | #
 34 | #     a = 1 * 2 + 3 / 4
 35 | #     b = 1 / 2 - 3 * 4
 36 | #     c = (1 + 2) * (3 - 4)
 37 | #     d = (1 - 2) / (3 + 4)
 38 | #     e = 1 * 2 - 3
 39 | #     f = 1 + 2 + 3 + 4
 40 | #
 41 | # will be formatted as follows to indicate precedence:
 42 | #
 43 | #     a = 1*2 + 3/4
 44 | #     b = 1/2 - 3*4
 45 | #     c = (1+2) * (3-4)
 46 | #     d = (1-2) / (3+4)
 47 | #     e = 1*2 - 3
 48 | #     f = 1 + 2 + 3 + 4
 49 | #
 50 | arithmetic_precedence_indication=True
 51 | 
 52 | # Insert a blank line before a class-level docstring.
 53 | blank_line_before_class_docstring=True
 54 | 
 55 | # Insert a blank line before a module docstring.
 56 | blank_line_before_module_docstring=True
 57 | 
 58 | # Insert a blank line before a 'def' or 'class' immediately nested
 59 | # within another 'def' or 'class'. For example:
 60 | #
 61 | #   class Foo:
 62 | #                      # <------ this blank line
 63 | #     def method():
 64 | #       ...
 65 | blank_line_before_nested_class_or_def=True
 66 | 
 67 | # Do not split consecutive brackets. Only relevant when
 68 | # dedent_closing_brackets is set. For example:
 69 | #
 70 | #    call_func_that_takes_a_dict(
 71 | #        {
 72 | #            'key1': 'value1',
 73 | #            'key2': 'value2',
 74 | #        }
 75 | #    )
 76 | #
 77 | # would reformat to:
 78 | #
 79 | #    call_func_that_takes_a_dict({
 80 | #        'key1': 'value1',
 81 | #        'key2': 'value2',
 82 | #    })
 83 | coalesce_brackets=True
 84 | 
 85 | # Indent the dictionary value if it cannot fit on the same line as the
 86 | # dictionary key. For example:
 87 | #
 88 | #   config = {
 89 | #       'key1':
 90 | #           'value1',
 91 | #       'key2': value1 +
 92 | #               value2,
 93 | #   }
 94 | indent_dictionary_value=True
 95 | 
 96 | # Split before arguments
 97 | split_all_comma_separated_values=False
 98 | 
 99 | # Set to True to prefer splitting before '+', '-', '*', '/', '//', or '@'
100 | # rather than after.
101 | split_before_arithmetic_operator=True
102 | 
103 | # Split before the '.' if we need to split a longer expression:
104 | #
105 | #   foo = ('This is a really long string: {}, {}, {}, {}'.format(a, b, c, d))
106 | #
107 | # would reformat to something like:
108 | #
109 | #   foo = ('This is a really long string: {}, {}, {}, {}'
110 | #          .format(a, b, c, d))
111 | split_before_dot=True
112 | 
113 | 
114 | ##################################################
115 | ##################################################
116 | ################## DEFAULT KNOBS #################
117 | ##################################################
118 | ##################################################
119 | 
120 | # Align closing bracket with visual indentation.
121 | align_closing_bracket_with_visual_indent=True
122 | 
123 | # Allow dictionary keys to exist on multiple lines. For example:
124 | #
125 | #   x = {
126 | #       ('this is the first element of a tuple',
127 | #        'this is the second element of a tuple'):
128 | #            value,
129 | #   }
130 | allow_multiline_dictionary_keys=False
131 | 
132 | # Allow lambdas to be formatted on more than one line.
133 | allow_multiline_lambdas=False
134 | 
135 | # Number of blank lines surrounding top-level function and class
136 | # definitions.
137 | blank_lines_around_top_level_definition=2
138 | 
139 | # The style for continuation alignment. Possible values are:
140 | #
141 | # - SPACE: Use spaces for continuation alignment. This is default behavior.
142 | # - FIXED: Use fixed number (CONTINUATION_INDENT_WIDTH) of columns
143 | #   (ie: CONTINUATION_INDENT_WIDTH/INDENT_WIDTH tabs or
144 | #   CONTINUATION_INDENT_WIDTH spaces) for continuation alignment.
145 | # - VALIGN-RIGHT: Vertically align continuation lines to multiple of
146 | #   INDENT_WIDTH columns. Slightly right (one tab or a few spaces) if
147 | #   cannot vertically align continuation lines with indent characters.
148 | continuation_align_style=SPACE
149 | 
150 | # Indent width used for line continuations.
151 | continuation_indent_width=4
152 | 
153 | # Put closing brackets on a separate line, dedented, if the bracketed
154 | # expression can't fit in a single line. Applies to all kinds of brackets,
155 | # including function definitions and calls. For example:
156 | #
157 | #   config = {
158 | #       'key1': 'value1',
159 | #       'key2': 'value2',
160 | #   }        # <--- this bracket is dedented and on a separate line
161 | #
162 | #   time_series = self.remote_client.query_entity_counters(
163 | #       entity='dev3246.region1',
164 | #       key='dns.query_latency_tcp',
165 | #       transform=Transformation.AVERAGE(window=timedelta(seconds=60)),
166 | #       start_ts=now()-timedelta(days=3),
167 | #       end_ts=now(),
168 | #   )        # <--- this bracket is dedented and on a separate line
169 | dedent_closing_brackets=True
170 | 
171 | # Disable the heuristic which places each list element on a separate line
172 | # if the list is comma-terminated.
173 | disable_ending_comma_heuristic=False
174 | 
175 | # Place each dictionary entry onto its own line.
176 | each_dict_entry_on_separate_line=True
177 | 
178 | # Require multiline dictionary even if it would normally fit on one line.
179 | # For example:
180 | #
181 | #   config = {
182 | #       'key1': 'value1'
183 | #   }
184 | force_multiline_dict=False
185 | 
186 | # The regex for an i18n comment. The presence of this comment stops
187 | # reformatting of that line, because the comments are required to be
188 | # next to the string they translate.
189 | i18n_comment=
190 | 
191 | # The i18n function call names. The presence of this function stops
192 | # reformattting on that line, because the string it has cannot be moved
193 | # away from the i18n comment.
194 | i18n_function_call=
195 | 
196 | # Indent blank lines.
197 | indent_blank_lines=False
198 | 
199 | # Put closing brackets on a separate line, indented, if the bracketed
200 | # expression can't fit in a single line. Applies to all kinds of brackets,
201 | # including function definitions and calls. For example:
202 | #
203 | #   config = {
204 | #       'key1': 'value1',
205 | #       'key2': 'value2',
206 | #       }        # <--- this bracket is indented and on a separate line
207 | #
208 | #   time_series = self.remote_client.query_entity_counters(
209 | #       entity='dev3246.region1',
210 | #       key='dns.query_latency_tcp',
211 | #       transform=Transformation.AVERAGE(window=timedelta(seconds=60)),
212 | #       start_ts=now()-timedelta(days=3),
213 | #       end_ts=now(),
214 | #       )        # <--- this bracket is indented and on a separate line
215 | indent_closing_brackets=False
216 | 
217 | 
218 | 
219 | # The number of columns to use for indentation.
220 | indent_width=4
221 | 
222 | # Join short lines into one line. E.g., single line 'if' statements.
223 | join_multiple_lines=True
224 | 
225 | # Do not include spaces around selected binary operators. For example:
226 | #
227 | #   1 + 2 * 3 - 4 / 5
228 | #
229 | # will be formatted as follows when configured with "*,/":
230 | #
231 | #   1 + 2*3 - 4/5
232 | no_spaces_around_selected_binary_operators=
233 | 
234 | # Use spaces around default or named assigns.
235 | spaces_around_default_or_named_assign=False
236 | 
237 | # Adds a space after the opening '{' and before the ending '}' dict delimiters.
238 | #
239 | #   {1: 2}
240 | #
241 | # will be formatted as:
242 | #
243 | #   { 1: 2 }
244 | spaces_around_dict_delimiters=False
245 | 
246 | # Adds a space after the opening '[' and before the ending ']' list delimiters.
247 | #
248 | #   [1, 2]
249 | #
250 | # will be formatted as:
251 | #
252 | #   [ 1, 2 ]
253 | spaces_around_list_delimiters=False
254 | 
255 | # Use spaces around the power operator.
256 | spaces_around_power_operator=False
257 | 
258 | # Use spaces around the subscript / slice operator.  For example:
259 | #
260 | #   my_list[1 : 10 : 2]
261 | spaces_around_subscript_colon=False
262 | 
263 | # Adds a space after the opening '(' and before the ending ')' tuple delimiters.
264 | #
265 | #   (1, 2, 3)
266 | #
267 | # will be formatted as:
268 | #
269 | #   ( 1, 2, 3 )
270 | spaces_around_tuple_delimiters=False
271 | 
272 | # The number of spaces required before a trailing comment.
273 | # This can be a single value (representing the number of spaces
274 | # before each trailing comment) or list of values (representing
275 | # alignment column values; trailing comments within a block will
276 | # be aligned to the first column value that is greater than the maximum
277 | # line length within the block). For example:
278 | #
279 | # With spaces_before_comment=5:
280 | #
281 | #   1 + 1 # Adding values
282 | #
283 | # will be formatted as:
284 | #
285 | #   1 + 1     # Adding values <-- 5 spaces between the end of the statement and comment
286 | #
287 | # With spaces_before_comment=15, 20:
288 | #
289 | #   1 + 1 # Adding values
290 | #   two + two # More adding
291 | #
292 | #   longer_statement # This is a longer statement
293 | #   short # This is a shorter statement
294 | #
295 | #   a_very_long_statement_that_extends_beyond_the_final_column # Comment
296 | #   short # This is a shorter statement
297 | #
298 | # will be formatted as:
299 | #
300 | #   1 + 1          # Adding values <-- end of line comments in block aligned to col 15
301 | #   two + two      # More adding
302 | #
303 | #   longer_statement    # This is a longer statement <-- end of line comments in block aligned to col 20
304 | #   short               # This is a shorter statement
305 | #
306 | #   a_very_long_statement_that_extends_beyond_the_final_column  # Comment <-- the end of line comments are aligned based on the line length
307 | #   short                                                       # This is a shorter statement
308 | #
309 | spaces_before_comment=2
310 | 
311 | # Insert a space between the ending comma and closing bracket of a list,
312 | # etc.
313 | space_between_ending_comma_and_closing_bracket=True
314 | 
315 | # Use spaces inside brackets, braces, and parentheses.  For example:
316 | #
317 | #   method_call( 1 )
318 | #   my_dict[ 3 ][ 1 ][ get_index( *args, **kwargs ) ]
319 | #   my_set = { 1, 2, 3 }
320 | space_inside_brackets=False
321 | 
322 | # Split before arguments, but do not split all subexpressions recursively
323 | # (unless needed).
324 | split_all_top_level_comma_separated_values=False
325 | 
326 | 
327 | # Set to True to prefer splitting before '&', '|' or '^' rather than
328 | # after.
329 | split_before_bitwise_operator=True
330 | 
331 | # Split after the opening paren which surrounds an expression if it doesn't
332 | # fit on a single line.
333 | split_before_expression_after_opening_paren=False
334 | 
335 | # If an argument / parameter list is going to be split, then split before
336 | # the first argument.
337 | split_before_first_argument=False
338 | 
339 | # Set to True to prefer splitting before 'and' or 'or' rather than
340 | # after.
341 | split_before_logical_operator=True
342 | 
343 | # Set to True to split list comprehensions and generators that have
344 | # non-trivial expressions and multiple clauses before each of these
345 | # clauses. For example:
346 | #
347 | #   result = [
348 | #       a_long_var + 100 for a_long_var in xrange(1000)
349 | #       if a_long_var % 10]
350 | #
351 | # would reformat to something like:
352 | #
353 | #   result = [
354 | #       a_long_var + 100
355 | #       for a_long_var in xrange(1000)
356 | #       if a_long_var % 10]
357 | split_complex_comprehension=False
358 | 
359 | # The penalty for splitting right after the opening bracket.
360 | split_penalty_after_opening_bracket=300
361 | 
362 | # The penalty for splitting the line after a unary operator.
363 | split_penalty_after_unary_operator=10000
364 | 
365 | # The penalty of splitting the line around the '+', '-', '*', '/', '//',
366 | # ``%``, and '@' operators.
367 | split_penalty_arithmetic_operator=300
368 | 
369 | # The penalty for splitting right before an if expression.
370 | split_penalty_before_if_expr=0
371 | 
372 | # The penalty of splitting the line around the '&', '|', and '^'
373 | # operators.
374 | split_penalty_bitwise_operator=300
375 | 
376 | # The penalty for splitting a list comprehension or generator
377 | # expression.
378 | split_penalty_comprehension=80
379 | 
380 | # The penalty for characters over the column limit.
381 | split_penalty_excess_character=7000
382 | 
383 | # The penalty incurred by adding a line split to the unwrapped line. The
384 | # more line splits added the higher the penalty.
385 | split_penalty_for_added_line_split=30
386 | 
387 | # The penalty of splitting a list of "import as" names. For example:
388 | #
389 | #   from a_very_long_or_indented_module_name_yada_yad import (long_argument_1,
390 | #                                                             long_argument_2,
391 | #                                                             long_argument_3)
392 | #
393 | # would reformat to something like:
394 | #
395 | #   from a_very_long_or_indented_module_name_yada_yad import (
396 | #       long_argument_1, long_argument_2, long_argument_3)
397 | split_penalty_import_names=0
398 | 
399 | # The penalty of splitting the line around the 'and' and 'or'
400 | # operators.
401 | split_penalty_logical_operator=300
402 | 
403 | # Use the Tab character for indentation.
404 | use_tabs=False
405 | 


--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "recommendations": [
 3 |         "britesnow.vscode-toggle-quotes",
 4 |         "eamodio.gitlens",
 5 |         "davidanson.vscode-markdownlint",
 6 |         "redhat.vscode-yaml",
 7 |         "ms-python.vscode-pylance",
 8 |         "bungcip.better-toml",
 9 |         "emeraldwalk.runonsave",
10 |         "matangover.mypy",
11 |         "njqdev.vscode-python-typehint",
12 |         "dongli.python-preview",
13 |         "frhtylcn.pythonsnippets",
14 |         "kevinrose.vsc-python-indent",
15 |         "LittleFoxTeam.vscode-python-test-adapter",
16 |         "ryanluker.vscode-coverage-gutters"
17 |     ]
18 | }


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "python.languageServer": "Pylance",
 3 |   "python.linting.pylintEnabled": true,
 4 |   "python.linting.enabled": true,
 5 |   "python.linting.pylintPath": "pylint",
 6 |   "python.formatting.provider": "yapf",
 7 |   "editor.formatOnSave": true,
 8 |   "python.defaultInterpreterPath": ".venv/bin/python",
 9 |   "mypy.runUsingActiveInterpreter": true,
10 |   "emeraldwalk.runonsave": {
11 |     "commands": [
12 |       {
13 |         "match": "\\.py$",
14 |         "cmd": "cd ${workspaceFolder} && isort -m 9 --line-length 159 graphinder tests"
15 |       },
16 |       {
17 |         "match": "\\.py$",
18 |         "cmd": "cd ${workspaceFolder} && autoflake --in-place --remove-unused-variables --remove-all-unused-imports graphinder/**/*.py"
19 |       },
20 |       {
21 |         "match": "\\.py$",
22 |         "cmd": "cd ${workspaceFolder} && docformatter --wrap-summaries 160 --wrap-descriptions 160 -ri graphinder test"
23 |       },
24 |       {
25 |         "match": "\\.py$",
26 |         "cmd": "cd ${workspaceFolder} && unify -ri graphinder tests"
27 |       }
28 |     ]
29 |   },
30 |   "python.testing.pytestEnabled": true,
31 |   "python.testing.pytestPath": "../scripts/pytest-with-cov"
32 | }


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | When contributing to this repository, please first discuss the change you wish to make via issue,
 4 | email, or any other method with the owners of this repository before making a change.
 5 | 
 6 | Please note we have a code of conduct, please follow it in all your interactions with the project.
 7 | 
 8 | ## Pull Request Process
 9 | 
10 | 1. Ensure any install or build dependencies are removed before the end of the layer when doing a
11 |    build.
12 | 2. Update the README.md with details of changes to the interface, this includes new environment
13 |    variables, exposed ports, useful file locations and container parameters.
14 | 3. You may merge the Pull Request in once you have the sign-off of two other developers, or if you
15 |    do not have permission to do that, you may request the second reviewer to merge it for you.
16 | 
17 | ## Code of Conduct
18 | 
19 | ### Our Pledge
20 | 
21 | In the interest of fostering an open and welcoming environment, we as
22 | contributors and maintainers pledge to making participation in our project and
23 | our community a harassment-free experience for everyone, regardless of age, body
24 | size, disability, ethnicity, gender identity and expression, level of experience,
25 | nationality, personal appearance, race, religion, or sexual identity and
26 | orientation.
27 | 
28 | ### Our Standards
29 | 
30 | Examples of behavior that contributes to creating a positive environment
31 | include:
32 | 
33 | * Using welcoming and inclusive language
34 | * Being respectful of differing viewpoints and experiences
35 | * Gracefully accepting constructive criticism
36 | * Focusing on what is best for the community
37 | * Showing empathy towards other community members
38 | 
39 | Examples of unacceptable behavior by participants include:
40 | 
41 | * The use of sexualized language or imagery and unwelcome sexual attention or
42 | advances
43 | * Trolling, insulting/derogatory comments, and personal or political attacks
44 | * Public or private harassment
45 | * Publishing others' private information, such as a physical or electronic
46 |   address, without explicit permission
47 | * Other conduct which could reasonably be considered inappropriate in a
48 |   professional setting
49 | 
50 | ### Our Responsibilities
51 | 
52 | Project maintainers are responsible for clarifying the standards of acceptable
53 | behavior and are expected to take appropriate and fair corrective action in
54 | response to any instances of unacceptable behavior.
55 | 
56 | Project maintainers have the right and responsibility to remove, edit, or
57 | reject comments, commits, code, wiki edits, issues, and other contributions
58 | that are not aligned to this Code of Conduct, or to ban temporarily or
59 | permanently any contributor for other behaviors that they deem inappropriate,
60 | threatening, offensive, or harmful.
61 | 
62 | ### Scope
63 | 
64 | This Code of Conduct applies both within project spaces and in public spaces
65 | when an individual is representing the project or its community. Examples of
66 | representing a project or community include using an official project e-mail
67 | address, posting via an official social media account, or acting as an appointed
68 | representative at an online or offline event. Representation of a project may be
69 | further defined and clarified by project maintainers.
70 | 
71 | ### Enforcement
72 | 
73 | All complaints will be reviewed and investigated and will result in a response that
74 | is deemed necessary and appropriate to the circumstances. The project team is
75 | obligated to maintain confidentiality with regard to the reporter of an incident.
76 | Further details of specific enforcement policies may be posted separately.
77 | 
78 | Project maintainers who do not follow or enforce the Code of Conduct in good
79 | faith may face temporary or permanent repercussions as determined by other
80 | members of the project's leadership.
81 | 
82 | ### Attribution
83 | 
84 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
85 | available at [http://contributor-covenant.org/version/1/4][version]
86 | 
87 | [homepage]: http://contributor-covenant.org
88 | [version]: http://contributor-covenant.org/version/1/4/
89 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # BASE
 2 | FROM  python:3.10-alpine as python-base
 3 | 
 4 | ENV APP_NAME="graphinder" \
 5 |     POETRY_HOME="/opt/poetry" \
 6 |     POETRY_VIRTUALENVS_IN_PROJECT=true \
 7 |     PIP_NO_CACHE_DIR=off \
 8 |     PYSETUP_PATH="/opt/pysetup" \
 9 |     PYTHONDONTWRITEBYTECODE=1 \
10 |     VENV_PATH="/opt/pysetup/.venv" \
11 |     PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH"
12 | 
13 | # BUILDER
14 | FROM python-base as builder-base
15 | 
16 | WORKDIR $PYSETUP_PATH
17 | RUN apk add build-base zlib-dev libffi-dev
18 | RUN pip install poetry
19 | 
20 | COPY ./poetry.lock ./pyproject.toml ./README.md ./
21 | RUN poetry install --no-dev --no-root
22 | 
23 | COPY ./$APP_NAME ./$APP_NAME
24 | RUN poetry install --no-dev
25 | 
26 | # RELEASE
27 | FROM python-base as release
28 | 
29 | ENV PYTHONWARNINGS="ignore"
30 | 
31 | COPY --from=builder-base $VENV_PATH $VENV_PATH
32 | COPY ./$APP_NAME /$APP_NAME/
33 | COPY ./docker-entrypoint.sh /docker-entrypoint.sh
34 | 
35 | RUN chmod +x /docker-entrypoint.sh
36 | 
37 | ENTRYPOINT /docker-entrypoint.sh $0 $@
38 | CMD ["-h"]
39 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2021-2022 Escape Technologies SAS (https://escape.tech/)
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Graphinder ![PyPI](https://img.shields.io/pypi/v/graphinder) [![CI](https://github.com/Escape-Technologies/graphinder/actions/workflows/ci.yaml/badge.svg)](https://github.com/Escape-Technologies/graphinder/actions/workflows/ci.yaml) [![codecov](https://codecov.io/gh/Escape-Technologies/graphinder/branch/main/graph/badge.svg?token=4KGK1LTHRO)](https://codecov.io/gh/Escape-Technologies/graphinder)
  2 | 
  3 | Graphinder is a tool that extracts all GraphQL endpoints from a given domain.
  4 | 
  5 | ![Banner](doc/banner.png)
  6 | 
  7 | ![Docker Pulls](https://img.shields.io/docker/pulls/escapetech/graphinder)
  8 | ![Docker Image Size (latest by date)](https://img.shields.io/docker/image-size/escapetech/graphinder)
  9 | ![PyPI - Downloads](https://img.shields.io/pypi/dm/graphinder)
 10 | 
 11 | ## Run with docker
 12 | 
 13 | ```bash
 14 | docker pull escapetech/graphinder
 15 | docker run -it --rm escapetech/graphinder -d example.com
 16 | ```
 17 | 
 18 | If you want to save your results.json file, you can use:
 19 | 
 20 | ```bash
 21 | docker run -it --name graphinder escapetech/graphinder -d example.com
 22 | docker cp graphinder:/graphinder/results.json results.json
 23 | docker rm -f graphinder
 24 | ```
 25 | 
 26 | ## Install using Pip
 27 | 
 28 | ```bash
 29 | pip install graphinder
 30 | 
 31 | # using specific python binary
 32 | python3 -m pip install graphinder
 33 | ```
 34 | 
 35 | Run it with
 36 | 
 37 | ```bash
 38 | graphinder ...
 39 | ```
 40 | 
 41 | ## Usage
 42 | 
 43 | A Scan consistes of:
 44 | 
 45 | - Running specific domain (`-d`, `--domain`).
 46 | - Searching all scripts loaded by the browser for graphql endpoint (`-s`, `--script`)
 47 | - Brute forcing the directories of all discovered urls (`-b`, `--bruteforce`)
 48 | - Using precision mode (`-p`, `--precision`)
 49 | 
 50 | By default, bruteforce and script search are enabled.
 51 | 
 52 | ```bash
 53 | graphinder -d example.com
 54 | ```
 55 | 
 56 | ```bash
 57 | graphinder -f domains.txt
 58 | ```
 59 | 
 60 | ### Extra features
 61 | 
 62 | - `--no-bruteforce`: Disable bruteforce
 63 | - `--no-script`: Disable script search
 64 | - `-p --precision --no-precision`: Enable/disable precision mode (default: enabled) (precision mode is slower but more accurate)
 65 | - `-w --max-workers <int>`: Maximum of concurrent workers on multiple domains.
 66 | - `-o --output-file <FILE_PATH>`: Output the results to file
 67 | - `-v --verbose --no-verbose`: Verbose mode
 68 | - `-r --reduce`: The maximum number of subdomains to scan.
 69 | - `-wb --webhook_url`: The discord webhook url to send the results to.
 70 | 
 71 | If you experience any issues, irregularities or networking bottlenecks, please reduce your number of workers, otherwise, better is your network, the more workers you can have.
 72 | 
 73 | ## Local installation
 74 | 
 75 | Clone the repository and run the installation script
 76 | 
 77 | ```bash
 78 | git clone https://github.com/Escape-Technologies/graphinder.git
 79 | cd graphinder
 80 | ./install-dev.sh
 81 | ```
 82 | 
 83 | Run this command to enter the virtual enviroment
 84 | 
 85 | ```bash
 86 | poetry shell
 87 | ```
 88 | 
 89 | Profit !
 90 | 
 91 | ```bash
 92 | graphinder -d example.com
 93 | ```
 94 | 
 95 | ## How do you make sure this is a valid graphql endpoint ?
 96 | 
 97 | ![detector](doc/detector.jpg)
 98 | 
 99 | ## Contributing
100 | 
101 | Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change.
102 | 
103 | Please make sure to update tests as appropriate.
104 | 
105 | ## License ![PyPI - License](https://img.shields.io/pypi/l/graphinder)
106 | 
107 | [MIT](https://choosealicense.com/licenses/mit/)
108 | 


--------------------------------------------------------------------------------
/commitlint.config.js:
--------------------------------------------------------------------------------
 1 | const Configuration = {
 2 |   /*
 3 |      * Resolve and load @commitlint/config-conventional from node_modules.
 4 |      * Referenced packages must be installed
 5 |      */
 6 |   extends: ['@commitlint/config-angular'],
 7 |   /*
 8 |      * Resolve and load @commitlint/format from node_modules.
 9 |      * Referenced package must be installed
10 |      */
11 |   formatter: '@commitlint/format',
12 |   /*
13 |      * Whether commitlint uses the default ignore rules.
14 |      */
15 |   defaultIgnores: true,
16 |   /*
17 |      * Custom URL to show upon failure
18 |      */
19 |   helpUrl:
20 |       'https://github.com/angular/angular/blob/22b96b9/CONTRIBUTING.md#-commit-message-guidelines',
21 |    rules: {
22 |       "type-enum": [2, 'always' ,["ci", "docs", "feat", "fix", "refactor", "test", "chore"]],
23 |    },
24 | };
25 | 
26 | module.exports = Configuration;
27 | 


--------------------------------------------------------------------------------
/doc/banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Escape-Technologies/graphinder/3478723764e111296138be4c7a91a39787116834/doc/banner.png


--------------------------------------------------------------------------------
/doc/detector.drawio:
--------------------------------------------------------------------------------
1 | <mxfile host="app.diagrams.net" modified="2022-09-09T08:40:27.167Z" agent="5.0 (X11)" etag="7gwKm2NG8NafKePchV_E" version="20.3.0" type="device"><diagram id="a1lEERqIIQEF1KGC2gHr" name="Page-1">7V3bkps2GH4az7QX60GAOFxmD2k6TdO0ybTN1Q4G2ZBg5IC86+3TV5izJGzZ5mDHuzPJIiGE0Pef9Us70e6Wm19iZ+X/jj0UTlTF20y0+4mqqhZU6a+05iWrAZZhZzWLOPDyuqriU/AfyiuVvHYdeChpNCQYhyRYNStdHEXIJY06J47xc7PZHIfNt66cBeIqPrlOyNf+E3jEz2otqFT171Cw8Is3AyW/s3SKxnlF4jsefq5VaQ8T7S7GmGRXy80dCtPZK+Yle+5ty91yYDGKiMwD79D6T9P87bNvwj/8X2fv1it1c5PDk5CX4oORR78/L+KY+HiBIyd8qGpvY7yOPJT2qtBS1eY9xitaCWjlV0TISw6msyaYVvlkGeZ36YDjl3/z57eFL2lhCovi/aZ+8/4lL2VjTQfYOgV5VYLXsZu3eq8/fnBv1w/vwPIW/fV49/WzC24KUnLiBSI75geUQFESR3iJ6HjoczEKHRI8Ncfh5KS2KNtVaNCLHBAxOLsG+eSE6/xNE9UI6XBv55h+cB024/saFzduku3Ev6ENgLXabCeuuE+vFunvh+WKpB+ywgnZfs/3NUqvsu7paLM3ZI05GmlSwLMfEPRp5Wwn/JnKgSbarSA9oZigTa2Kn+f8rp2zUCFEtLz8XHGkWvCdX+PG4rlTkBGShSFAZng+SjHKbwLraP7ohe7zRz/iYEuoOY6WygBpwqlS+1G1Zo8Ze+adMJCVozoeRXAWqKHIe5MqKlqOcISymrdB+imVDDxCWm5LH1Ec0GlCcVG5Cci/1eO09KUQyvS66iktnCZ228lKRuxqvdAfaJIfVBUpeuP6MbRmR6rOdJRNBNcRxdl5qTVbpQ2S9vGy79GAsnNcN/C09gaEDJ9lI+6W61q1mhc8FSrnjjKJE0RJTSPV7goe8BziTB8fycsKRc4SFS1mMfsM2xOr2ny8nK0TWbXG6TABX7SqNWgy4hCAqQ441QZUgWoDqjml4qMf9VZ0fBlmYV2gqhICtUN5psnKM3MgO3LXKPuwI3mO68F2PInJSocsZzLdHtt4BPCSuEtoRhxqw3TIcnovloHOuBimCQc1RfU+WTRxfbR0LoVBzdG9O5sD4wtKuHmiX0yak5GQGH9DdzjEccUIc8oFTJUTBouIFl06QylL3abzF7hO+Ca/sQw8b8v1otlv4lPnRNgNIAbrbpv61LY5TKAAEq03mSnSYQwg2fQXUTrAYOM7q7TdcrNII5XTeYifXd+JydSJIkyo7MDRI6iBE6J5Cu/WREfxwxNKLfVdfvdJc844xlDnecASzHePEy4KcXRmpjclUn9Gep35Jqr2dvvTDWKcxgBwati1Hw5AsR0PrCm0+wLR7BPEi/K1WLh0k/paSg0uUxKvPv0ue5SAVGkZloXD/S44rBEIco45c8cL8JZED1pL5bVW6MxQ+BEnQdqAVscZAe/VZmpHFoTGMhvkuMsQMldf6qykrNFCvTKRicpHq5ivjM92vjAmnihFlrOMEzlL7IhBJiRpsaHVFkesPbTatatWzNBFMXWc90vLttINk0MVNpES+G0iH6H097pn8vZA1ynmju8k/jp2UoRXyA3mAYovwuSxOSl8hiaP2h75OAUzRMVsnEzgrTKB99MlSpI0z+MSUAPAvATYYC+wlUApN/RfhOl/uqJfBm6GweLGy8MRgJLy46lF842ipHjBfI62XyyLGPGdiD7ohjipJZCcN1I6uAQOszjgPmBu3n7cICS7xK5VcZKxgpDqtQeGWQmn827doIBofC7BNfGIrjO+kiVngfeHh8gqeHWNzsA10ngz4LpEl3JurMKH5q9JdLGqBNpjqxLe3OLQKKbwPSNtZpgQvBTMMcGr3fJt5iSBO92UuNbFIjvrzFrW3HKR63IUQe/MLKjDjiSZxfANgDxOugAnNmrXGU46r/IHw4k+8+0YqDyILE8XQWWpM80wOlI6VhMqWyDihoVK3Q9VLSLuhk5CZ7kJxG5BVEXPK9dwfxScn8E9QqeoOzG03aqD9mQN7+3INpmODo6RSw7Y3J1O3PaB0u3tw9rbsNG+n3RlXSLz5EQqznPyiyz8/Rn5aYHL7D+KF2RyXOGYTAOZdXubNZFlmQYeyTSd0ZHEnqHrVFwm48ZqkrY57E1xidJmOlZc5UKvPjlgoVe8p6fB+SJpohwsTTqUH9K5GnrLivJA2plNEDpWO5tslMxiQsQdaWfuPeYAm3d0iYwWPsuhT1V4bFr7mRC92rLTbRiitxgaMiQ3su3viFXTXRE9o8WLuEErM+qHtbdObr/bhOXaawMwbUEsw5qw1RbT3reVqrL7sNSW3SPDcBtQ2f2WxpHsBjSGDwwwrDULJcIww5PUsAdL7Mxn2U+Ko1o7PxIp9h9mulBSNGRJcVQP/0cixcNiRcdax4cc3tBbCv64hitgz3I4nmhUJtPElDwV4lDT1WY3DLHnNckOrF9bUWIJ/9zCVPP5XBUvhXnGzIAdhamAqhjT5rq+KUjxK4JHjSUWODXbKfykWBWUOYroRxE5I5tMFydygG4eKnOMMWTOYfHWy6bhcW0thYk/GCzlSdMwm0TbGw2XHQ9JkgUb9OdUyNLUeOKOEVJcSO94cTfw0pvBe4hXlfxmc0HKNG+9OnaPP3Jq0NQrg/eargoeAKzzxoffonVd+OhqiyQcDRHeSbsuRPj00fPimOtO7gUmK9BGZ5jr3kwFFHhugPB5BFcFCBupHh0Qk3d4rgoQqJ0bIBKB2MtetB83EsKGFeDR0TybPVZa05s99ezemiNmlsptXemCQW1WYgK5HUM6bCeN0xj0dceQ2NZgog2jbxmyeNU2GFBnvqSlFNveil32ivCoz4HXtKzXPV6yzFWu14yVK2+Jzu85KynYi0wTWIfDyjTeOowRWcfpWSxvnTBBE/Wuuuzcis+33Z9gww+DG1BZ3HhdBAY16y3eWNsBnPJTECXEiUj48vMrjBWMBg+jkP/6g5GP8JUwfo7Xr+wnxK085rrOfqKDQPvDjTfaBbhlV6/MJwZRkPQkPJ+/PxAFm5DgbZD8TafWyxB01sT/C31fBzGdLXjfA3xdBLIGMl7YAwUF+A3KhPY12Iz6cMcx0GL1Z0SzqFL111i1h/8B</diagram></mxfile>


--------------------------------------------------------------------------------
/doc/detector.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Escape-Technologies/graphinder/3478723764e111296138be4c7a91a39787116834/doc/detector.jpg


--------------------------------------------------------------------------------
/docker-entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | set -e
4 | 
5 | . /opt/pysetup/.venv/bin/activate
6 | 
7 | exec python3 -m graphinder "$@"
8 | 


--------------------------------------------------------------------------------
/graphinder/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: skip-file
2 | 
3 | from graphinder.main import async_main, cli, main  # noqa
4 | 


--------------------------------------------------------------------------------
/graphinder/__main__.py:
--------------------------------------------------------------------------------
1 | """Entry point for python cli."""
2 | 
3 | from .main import cli
4 | 
5 | if __name__ == '__main__':
6 |     cli()
7 | 


--------------------------------------------------------------------------------
/graphinder/entities/__init__.py:
--------------------------------------------------------------------------------
1 | """Entities init."""
2 | 


--------------------------------------------------------------------------------
/graphinder/entities/errors.py:
--------------------------------------------------------------------------------
 1 | """Errors entities."""
 2 | 
 3 | from asyncio import TimeoutError as AsyncioTimeoutError
 4 | from socket import gaierror
 5 | 
 6 | from aiohttp.client_exceptions import ClientError, ClientPayloadError
 7 | 
 8 | AwaitableRequestException: tuple = (
 9 |     ClientError,
10 |     AsyncioTimeoutError,
11 |     gaierror,
12 |     UnicodeError,
13 |     ValueError,
14 |     ClientPayloadError,
15 | )
16 | 


--------------------------------------------------------------------------------
/graphinder/entities/io.py:
--------------------------------------------------------------------------------
1 | """Entities for `io` module."""
2 | 
3 | from typing import Dict, Set
4 | 
5 | from graphinder.entities.pool import Url
6 | 
7 | Results = Dict[str, Set[Url]]
8 | 


--------------------------------------------------------------------------------
/graphinder/entities/pool.py:
--------------------------------------------------------------------------------
1 | """Pool entities."""
2 | 
3 | 
4 | class Url(str):
5 | 
6 |     """URL entity."""
7 | 


--------------------------------------------------------------------------------
/graphinder/entities/tasks.py:
--------------------------------------------------------------------------------
 1 | """Tasks entities."""
 2 | 
 3 | from enum import Enum
 4 | from typing import List
 5 | 
 6 | 
 7 | class TaskTags(Enum):
 8 | 
 9 |     """Task tags."""
10 | 
11 |     FETCH_SCRIPT = 0
12 |     FETCH_PAGE_SCRIPTS = 1
13 |     FETCH_ENDPOINT = 2
14 | 
15 | 
16 | # pylint: disable=too-few-public-methods
17 | class Task:
18 | 
19 |     """Task representation."""
20 | 
21 |     def __init__(
22 |         self,
23 |         domain_url: str,
24 |         tag: TaskTags,
25 |         url: str,
26 |     ) -> None:
27 |         """Init task."""
28 | 
29 |         self.domain_url = domain_url
30 |         self.tag = tag
31 |         self.url = url[:-1] if url.endswith('/') else url
32 | 
33 | 
34 | TasksList = List[Task]
35 | 


--------------------------------------------------------------------------------
/graphinder/io/__init__.py:
--------------------------------------------------------------------------------
1 | """IO init."""
2 | 


--------------------------------------------------------------------------------
/graphinder/io/printers.py:
--------------------------------------------------------------------------------
 1 | """I/O for prints."""
 2 | 
 3 | from graphinder.entities.io import Results
 4 | 
 5 | 
 6 | def display_results(results: Results) -> None:
 7 |     """Prints the results."""
 8 | 
 9 |     for domain in results:
10 |         print(f'{domain} - {len(results[domain])}')
11 |         for result in sorted(results[domain]):
12 |             print(f'\t{result}')
13 | 


--------------------------------------------------------------------------------
/graphinder/io/providers.py:
--------------------------------------------------------------------------------
 1 | """Provide datas to graphinder."""
 2 | 
 3 | from typing import List
 4 | 
 5 | 
 6 | def gql_endpoints_characterizer() -> List[str]:
 7 |     """Return list of most common GQL endpoints.
 8 | 
 9 |     - Versioning has a huge cost on the performance of the scanner.
10 |     - We try to minimize the cost by using the most common endpoints only.
11 |     """
12 | 
13 |     characterizers: List[str] = [
14 |         'graphql',
15 |         'appsync',
16 |         'altair',
17 |         'explorer',
18 |         'graphiql',
19 |         'playground',
20 |         'subscriptions',
21 |         'graph',
22 |         'graphiql.css',
23 |         'graphiql/finland',
24 |         'graphiql.js',
25 |         'graphiql.min.css',
26 |         'graphiql.min.js',
27 |         'graphiql.php',
28 |         'graphql/console',
29 |         'graphql-explorer',
30 |         'graphql.php',
31 |         'graphql/schema.json',
32 |         'graphql/schema.xml',
33 |         'graphql/schema.yaml',
34 |         'graphql/v1',
35 |         'graphql/v2',
36 |         'api/graphql',
37 |     ]
38 | 
39 |     versioned_characterizers: List[str] = []
40 |     versions = ['v1', 'v2']
41 |     for version in versions:
42 |         for char in characterizers[:8]:
43 |             if any(v in char for v in versions):
44 |                 continue
45 |             versioned_characterizers.append(f'{version}/{char}')
46 | 
47 |     return characterizers + versioned_characterizers
48 | 


--------------------------------------------------------------------------------
/graphinder/io/readers.py:
--------------------------------------------------------------------------------
 1 | """I/O readers."""
 2 | 
 3 | from io import TextIOWrapper
 4 | from typing import List, Optional
 5 | 
 6 | from graphinder.pool.domain import Domain
 7 | from graphinder.utils.filters import transform_url_in_domain
 8 | from graphinder.utils.logger import get_logger
 9 | 
10 | 
11 | def read_domains(
12 |     file: Optional[TextIOWrapper],
13 |     domain: Optional[str],
14 |     precision_mode: bool = False,
15 | ) -> List[Domain]:
16 |     """Read domains from file."""
17 | 
18 |     if domain is not None:
19 |         clean = transform_url_in_domain(domain)
20 |         if clean is not None:
21 |             return [Domain(clean, precision_mode)]
22 |         return []
23 | 
24 |     if file is None:
25 |         get_logger().warning('no input file specified, skipping reading domains..')
26 |         return []
27 | 
28 |     urls: List[str] = list(set(file.read().splitlines()))
29 |     domains: List[Domain] = []
30 |     for url in urls:
31 |         clean = transform_url_in_domain(url)
32 |         if clean is not None:
33 |             domains.append(Domain(clean, precision_mode))
34 | 
35 |     get_logger().info(f'found { len(domains) } domains.')
36 | 
37 |     return domains
38 | 


--------------------------------------------------------------------------------
/graphinder/io/writers.py:
--------------------------------------------------------------------------------
 1 | """I/O writers."""
 2 | 
 3 | import json
 4 | from io import TextIOWrapper
 5 | from typing import Any
 6 | 
 7 | from graphinder.entities.io import Results
 8 | 
 9 | 
10 | class ResultEncoder(json.JSONEncoder):
11 | 
12 |     """JSON encoder for `set` type."""
13 | 
14 |     def default(self, o: Any) -> Any:
15 |         """Encode `set` type."""
16 | 
17 |         if isinstance(o, set):
18 |             return list(o)
19 |         raise NotImplementedError()
20 | 
21 | 
22 | def write_results(
23 |     output_file: TextIOWrapper,
24 |     results: Results,
25 | ) -> None:
26 |     """Saves the results."""
27 | 
28 |     json.dump(
29 |         results,
30 |         output_file,
31 |         indent=4,
32 |         cls=ResultEncoder,
33 |         sort_keys=True,
34 |     )
35 | 


--------------------------------------------------------------------------------
/graphinder/main.py:
--------------------------------------------------------------------------------
  1 | """The CLI."""
  2 | 
  3 | import argparse
  4 | import asyncio
  5 | import logging
  6 | import sys
  7 | from datetime import date
  8 | from typing import List, Optional
  9 | 
 10 | import pkg_resources
 11 | 
 12 | from graphinder.entities.io import Results
 13 | from graphinder.pool import main_routine
 14 | from graphinder.utils.assets import fetch_assets
 15 | from graphinder.utils.logger import setup_logger
 16 | 
 17 | __version__ = pkg_resources.get_distribution(__package__ or __name__).version
 18 | 
 19 | 
 20 | def argument_builder(args: List[str]) -> argparse.Namespace:
 21 |     """Builds the arguments."""
 22 | 
 23 |     parser = argparse.ArgumentParser()
 24 |     parser.add_argument(
 25 |         '--domain',
 26 |         '-d',
 27 |         dest='domain',
 28 |         type=str,
 29 |         help='Domain to scan',
 30 |     )
 31 |     parser.add_argument(
 32 |         '--output-file',
 33 |         '-o',
 34 |         dest='output_file',
 35 |         type=argparse.FileType('w'),
 36 |         help='The path of the results file',
 37 |     )
 38 |     parser.add_argument(
 39 |         '--verbose',
 40 |         '-v',
 41 |         dest='verbose_mode',
 42 |         type=bool,
 43 |         help='Verbose',
 44 |         default=False,
 45 |     )
 46 |     parser.add_argument(
 47 |         '--no-script',
 48 |         '-ns',
 49 |         dest='no_script_mode',
 50 |         help='Disable script scanning',
 51 |         action='store_true',
 52 |     )
 53 |     parser.add_argument(
 54 |         '--quiet',
 55 |         '-q',
 56 |         dest='quiet_mode',
 57 |         help='Quiet',
 58 |         action='store_true',
 59 |     )
 60 |     parser.add_argument(
 61 |         '--no-bruteforce',
 62 |         '-nb',
 63 |         dest='no_bruteforce_mode',
 64 |         help='Disable directory scanning',
 65 |         action='store_true',
 66 |     )
 67 |     parser.add_argument(
 68 |         '--precision',
 69 |         '-p',
 70 |         dest='precision_mode',
 71 |         type=bool,
 72 |         help='Use precision mode',
 73 |         default=True,
 74 |     )
 75 |     parser.add_argument(
 76 |         '--reduce',
 77 |         '-r',
 78 |         dest='reduce_mode',
 79 |         type=int,
 80 |         help='The maximum number of subdomains to scan.',
 81 |         default=100,
 82 |     )
 83 |     parser.add_argument(
 84 |         '--webhook_url',
 85 |         '-wb',
 86 |         dest='webhook_url',
 87 |         type=str,
 88 |         help='The webhook url to send results.',
 89 |         default=None,
 90 |     )
 91 | 
 92 |     return parser.parse_args(args)
 93 | 
 94 | 
 95 | def validate_arguments(
 96 |     logger: logging.Logger,
 97 |     args: argparse.Namespace,
 98 | ) -> bool:
 99 |     """Validates the arguments."""
100 | 
101 |     if args.domain is None:
102 |         logger.error('no domain provided')
103 |         return False
104 | 
105 |     if args.no_script_mode and args.no_bruteforce_mode:
106 |         logger.error('no scanning mode selected.')
107 |         return False
108 | 
109 |     if args.precision_mode:
110 |         logger.info('precision mode enabled')
111 | 
112 |     return True
113 | 
114 | 
115 | # pylint: disable=trailing-whitespace
116 | def cli() -> None:
117 |     """Entry point of the CLI program."""
118 | 
119 |     print(
120 |         r"""
121 |    ____                 _     _           _           
122 |   / ___|_ __ __ _ _ __ | |__ (_)_ __   __| | ___ _ __ 
123 |  | |  _| '__/ _` | '_ \| '_ \| | '_ \ / _` |/ _ \ '__|
124 |  | |_| | | | (_| | |_) | | | | | | | | (_| |  __/ |   
125 |   \____|_|  \__,_| .__/|_| |_|_|_| |_|\__,_|\___|_|   
126 |                  |_|                                  
127 |     """
128 |     )
129 | 
130 |     print('    Maintainer   https://escape.tech')
131 |     print('    Blog         https://blog.escape.tech')
132 |     print('    DockerHub    https://hub.docker.com/r/escapetech/graphinder')
133 |     print('    Contribute   https://github.com/Escape-Technologies/graphinder')
134 |     print('')
135 |     print(f'   (c) 2021 - { date.today().year } Escape Technologies - Version: {__version__}')
136 |     print('\n' * 2)
137 | 
138 |     main()
139 | 
140 | 
141 | async def async_main(
142 |     argv: Optional[List[str]] = None,
143 |     logger: Optional[logging.Logger] = None,
144 | ) -> Results:
145 |     """Async main."""
146 | 
147 |     return await loop(argv, logger)
148 | 
149 | 
150 | def main(
151 |     argv: Optional[List[str]] = None,
152 |     logger: Optional[logging.Logger] = None,
153 | ) -> Results:
154 |     """Main."""
155 | 
156 |     return asyncio.run(loop(argv, logger))
157 | 
158 | 
159 | async def loop(
160 |     argv: Optional[List[str]] = None,
161 |     logger: Optional[logging.Logger] = None,
162 | ) -> Results:
163 |     """Ignites arguments."""
164 | 
165 |     if argv is None:
166 |         argv = sys.argv[1:]
167 | 
168 |     args: argparse.Namespace = argument_builder(argv)
169 | 
170 |     logger = setup_logger(
171 |         verbose_mode=args.verbose_mode,
172 |         quiet_mode=args.quiet_mode,
173 |         logger=logger,
174 |     )
175 |     if not validate_arguments(logger, args):
176 |         return {}
177 | 
178 |     fetch_assets()
179 |     return await main_routine(args)
180 | 


--------------------------------------------------------------------------------
/graphinder/pool/__init__.py:
--------------------------------------------------------------------------------
1 | """Pool init."""
2 | 
3 | from graphinder.pool.routine import main_routine  # noqa
4 | 


--------------------------------------------------------------------------------
/graphinder/pool/detectors.py:
--------------------------------------------------------------------------------
  1 | """All functions for detection."""
  2 | 
  3 | import asyncio
  4 | import json
  5 | import logging
  6 | import re
  7 | from typing import Any, Coroutine, Dict, Optional, Tuple
  8 | 
  9 | import aiohttp
 10 | 
 11 | from graphinder.io.providers import gql_endpoints_characterizer
 12 | 
 13 | 
 14 | def _look_like_graphql_url(url: str) -> Tuple[bool, Optional[str]]:
 15 |     """Check if the url looks like a GraphQL endpoint."""
 16 | 
 17 |     for part in gql_endpoints_characterizer():
 18 |         if part in url:
 19 |             return True, part
 20 | 
 21 |     return False, None
 22 | 
 23 | 
 24 | def _replace_last_resource(url: str, resource: str) -> Optional[str]:
 25 |     """Replace the last resource in the url with the given resource."""
 26 | 
 27 |     # https://hello.com
 28 |     if url.count('/') <= 2:
 29 |         return None
 30 | 
 31 |     # https://hello.com
 32 |     if url.count('/') == 3 and url.endswith('/'):
 33 |         return None
 34 | 
 35 |     lst = url.split('/')
 36 |     if lst[-1] == '':
 37 |         # https://hello.com/aaa/
 38 |         del lst[-1]
 39 |         lst[-1] = resource
 40 |         return '/'.join(lst) + '/'
 41 | 
 42 |     # else # https://hello.com/aaa
 43 |     lst[-1] = resource
 44 | 
 45 |     return '/'.join(lst)
 46 | 
 47 | 
 48 | async def _looks_different_than_closest_route(
 49 |     session: aiohttp.ClientSession,
 50 |     url: str,
 51 |     original_body: str,
 52 | ) -> bool:
 53 |     """Check if a close route to the same endpoint is different than the original one."""
 54 | 
 55 |     look_likes, characterizer = _look_like_graphql_url(url)
 56 |     if look_likes and characterizer:
 57 | 
 58 |         random_url = _replace_last_resource(url, 'random')
 59 |         if random_url is None:
 60 |             return False
 61 | 
 62 |         async with session.post(
 63 |             random_url,
 64 |             json={'query': 'query {  __typename }'},
 65 |             timeout=10,
 66 |         ) as random_resp:
 67 |             random_text_body = await random_resp.text()
 68 | 
 69 |             if random_text_body != original_body:
 70 |                 return True
 71 | 
 72 |     return False
 73 | 
 74 | 
 75 | async def empty_post_request(
 76 |     session: aiohttp.ClientSession,
 77 |     url: str,
 78 |     timeout: int,
 79 | ) -> bool:
 80 |     """Send empty post request.
 81 | 
 82 |     If the response contains a GraphQL like JSON body, this must be a honeypot.
 83 |     """
 84 | 
 85 |     try:
 86 |         async with session.post(url, timeout=timeout) as request:
 87 |             response = await request.json()
 88 |             _ = response['data']['__typename']
 89 | 
 90 |         return True
 91 | 
 92 |     except Exception:
 93 |         return False
 94 | 
 95 | 
 96 | async def analyze_schema(text_body: str) -> Tuple[bool, bool]:
 97 |     """Perform futher analysis of the schema request."""
 98 | 
 99 |     is_valid = 'introspection' in text_body
100 |     return is_valid, is_valid
101 | 
102 | 
103 | async def analyze_typename(
104 |     # session: aiohttp.ClientSession,
105 |     # url: str,
106 |     text_body: str,
107 |     json_body: Dict,
108 | ) -> Tuple[bool, bool]:
109 |     """Perform futher analysis of the typename request."""
110 | 
111 |     error_messages = json_body.get('errors', [{}])
112 |     if isinstance(error_messages, list) \
113 |         and len(error_messages) > 0 \
114 |         and isinstance(error_messages[0], dict) \
115 |         and error_messages[0].get('message') is not None:
116 |         # Handle hasura errors
117 |         if 'query is not in any of the allowlists' in text_body.lower():
118 |             return True, True
119 | 
120 |         return True, False
121 | 
122 |     # Handle looks_like
123 |     # if (await _looks_different_than_closest_route(
124 |     #     session,
125 |     #     url,
126 |     #     text_body,
127 |     # )):
128 |     #     return True, False
129 | 
130 |     # Handle not found pages
131 |     if json_body.get('message') is not None and \
132 |         '404' not in text_body and \
133 |         not re.search(r'not.found', text_body, re.IGNORECASE):
134 |         return True, False
135 | 
136 |     return False, False
137 | 
138 | 
139 | # pylint: disable=too-few-public-methods
140 | class GraphQLEndpointDetector:
141 | 
142 |     """Check if the url is a valid GraphQL endpoint."""
143 | 
144 |     valid_auth: bool = False
145 |     valid_graphql: bool = False
146 | 
147 |     _session: aiohttp.ClientSession
148 |     _url: str
149 |     _timeout: int
150 |     _logger: Optional[logging.Logger]
151 | 
152 |     def __init__(
153 |         self,
154 |         session: aiohttp.ClientSession,
155 |         url: str,
156 |         logger: Optional[logging.Logger] = None,
157 |         timeout: int = 10,
158 |     ) -> None:
159 |         """Initialize the detector."""
160 | 
161 |         self._session = session
162 |         self._url = url
163 |         self._timeout = timeout
164 |         self._logger = logger
165 | 
166 |         session.headers.update({'Content-Type': 'application/json'})
167 | 
168 |     async def _send_request(
169 |         self,
170 |         matching_key: str,
171 |         payload: Optional[Dict],
172 |     ) -> Tuple[bool, Optional[str], Optional[dict]]:
173 |         """Send a request to the url.
174 | 
175 |         Returns:
176 |             bool: If the request was successful.
177 |             Optional[str]: The text body.
178 |             Optional[dict]: The json body.
179 |         """
180 | 
181 |         try:
182 |             async with self._session.post(
183 |                 self._url,
184 |                 json=payload,
185 |                 timeout=self._timeout,
186 |             ) as req:
187 |                 text_body = await req.text()
188 |                 json_body = json.loads(text_body)
189 | 
190 |                 if json_body.get('data', {}).get(matching_key) is not None:
191 |                     return True, None, None
192 | 
193 |                 return False, text_body, json_body
194 | 
195 |         except Exception as e:
196 |             if self._logger:
197 |                 self._logger.debug(f'Error while sending request to {self._url}: {e}')
198 | 
199 |         return False, None, None
200 | 
201 |     async def detect(self) -> Tuple[bool, bool]:
202 |         """Detect if the url is a GraphQL endpoint."""
203 | 
204 |         post_request_task: Coroutine[bool, Any, Any] = empty_post_request(
205 |             self._session,
206 |             self._url,
207 |             self._timeout,
208 |         )
209 |         query_tasks = [
210 |             self._send_request(
211 |                 '__typename',
212 |                 {'query': 'query { __typename }'},
213 |             ),
214 |             self._send_request(
215 |                 '__schema',
216 |                 {'query': 'query { __schema { queryType { name } } }'},
217 |             ),
218 |         ]
219 | 
220 |         # If post request worked, it means likely honey pot
221 |         post_request_status = await post_request_task
222 |         if post_request_status:
223 |             return False, False
224 | 
225 |         for query in asyncio.as_completed(query_tasks):
226 |             status, text_body, json_body = await query
227 |             if status:
228 |                 self.valid_graphql = True
229 |                 self.valid_auth = True
230 |                 break
231 | 
232 |             if not text_body or not json_body:
233 |                 continue
234 | 
235 |             further_analysis = await analyze_typename(text_body, json_body) \
236 |                 if query_tasks[0] else await analyze_schema(text_body)
237 |             if further_analysis[0]:
238 |                 self.valid_graphql = True
239 |             if further_analysis[1]:
240 |                 self.valid_auth = True
241 | 
242 |         return self.valid_graphql, self.valid_auth
243 | 
244 | 
245 | async def is_gql_endpoint(
246 |     url: str,
247 |     session: Optional[aiohttp.ClientSession] = None,
248 |     headers: Optional[Dict] = None,
249 |     logger: Optional[logging.Logger] = None,
250 | ) -> Tuple[bool, bool]:
251 |     """Check if the given url seems to be GraphQL endpoint.
252 | 
253 |     Args:
254 |         session: aiohttp session with auth headers
255 |         url: url to check
256 |         logger: logger to use
257 | 
258 |     Returns:
259 |         bool: True if the url is a GraphQL endpoint, False otherwise.
260 |         bool: True if the authentication is valid, False otherwise.
261 |     """
262 | 
263 |     assert url, 'URL is required'
264 | 
265 |     headers = headers or {}
266 | 
267 |     # Open new session if necessary
268 |     has_opened_new_session = False
269 |     if not session:
270 |         session = aiohttp.ClientSession(headers=headers)
271 |         has_opened_new_session = True
272 |     else:
273 |         assert isinstance(session, aiohttp.ClientSession), 'Valid session must be provided'
274 | 
275 |     detector = GraphQLEndpointDetector(
276 |         session,
277 |         url,
278 |         logger,
279 |     )
280 | 
281 |     status = await detector.detect()
282 | 
283 |     if has_opened_new_session:
284 |         if logger:
285 |             logger.debug('Closing previously opened session')
286 |         await session.close()
287 | 
288 |     return status
289 | 


--------------------------------------------------------------------------------
/graphinder/pool/domain.py:
--------------------------------------------------------------------------------
 1 | """Domain class."""
 2 | 
 3 | import asyncio
 4 | import os
 5 | from typing import List, Optional, Set
 6 | 
 7 | import aiohttp
 8 | 
 9 | from graphinder.entities.pool import Url
10 | from graphinder.pool.detectors import is_gql_endpoint
11 | from graphinder.pool.extractors import extract_script_urls_from_page, extract_urls_from_script
12 | from graphinder.utils.filters import remove_duplicate_domains
13 | from graphinder.utils.logger import get_logger
14 | 
15 | 
16 | class Domain:
17 | 
18 |     """Domain entity."""
19 | 
20 |     semaphore: Optional[asyncio.Semaphore]
21 |     session: aiohttp.ClientSession
22 | 
23 |     def __init__(
24 |         self,
25 |         url: str,
26 |         precision_mode: bool = False,
27 |     ) -> None:
28 |         """Init domain."""
29 | 
30 |         self.url = url
31 |         self.logger = get_logger()
32 |         self.subdomains: List[str] = []
33 |         self.already_fetched: Set[str] = set()
34 | 
35 |         if precision_mode:
36 |             self.semaphore = asyncio.Semaphore(100)
37 |         else:
38 |             self.semaphore = None
39 | 
40 |         self.results: Set[Url] = set()
41 | 
42 |     def fetch_subdomains(
43 |         self,
44 |         reduce: int = 100,
45 |     ) -> None:
46 |         """Fetch subdomains."""
47 | 
48 |         self.logger.info('fetching subdomains...')
49 | 
50 |         _finder = os.popen(f'./subfinder -d {self.url} -silent -timeout 5')
51 | 
52 |         self.subdomains = _finder.read().split('\n')
53 | 
54 |         self.subdomains = remove_duplicate_domains(self.subdomains)
55 |         self.logger.info(f'{self.url} - found { len(self.subdomains) } subdomains.')
56 | 
57 |         if len(self.subdomains) > reduce:
58 |             self.logger.debug('reducing the number of subdomains.')
59 |             self.subdomains = self.subdomains[:reduce]
60 | 
61 |     async def fetch_script(
62 |         self,
63 |         url: str,
64 |     ) -> Set[Url]:
65 |         """Fetch script for endpoints."""
66 | 
67 |         self.logger.debug(f'fetching script {url}...')
68 | 
69 |         return await extract_urls_from_script(self.session, url)
70 | 
71 |     async def fetch_page_scripts(
72 |         self,
73 |         url: str,
74 |     ) -> Set[Url]:
75 |         """Fetch page for scripts url."""
76 | 
77 |         self.logger.debug(f'fetching page scripts {url}...')
78 | 
79 |         return await extract_script_urls_from_page(self.session, url)
80 | 
81 |     async def fetch_endpoint(
82 |         self,
83 |         url: str,
84 |     ) -> None:
85 |         """Fetch endpoint and determinate if this is a GQL endpoint."""
86 | 
87 |         self.logger.debug(f'fetching endpoint {url}...')
88 | 
89 |         if (await is_gql_endpoint(url, session=self.session))[0]:
90 |             self.logger.info(f'found GQL endpoint {url}.')
91 |             self.results.add(Url(url))
92 | 


--------------------------------------------------------------------------------
/graphinder/pool/extractors.py:
--------------------------------------------------------------------------------
 1 | """All GQL endpoint extractor functions."""
 2 | 
 3 | from typing import List, Optional, Set
 4 | from urllib.parse import urljoin
 5 | 
 6 | import aiohttp
 7 | from bs4 import BeautifulSoup as bs4  # type: ignore[import]
 8 | 
 9 | from graphinder.entities.errors import AwaitableRequestException
10 | from graphinder.entities.pool import Url
11 | from graphinder.utils.filters import filter_common, remove_suffix
12 | from graphinder.utils.finders import find_script_fetch_graphql, find_script_full_urls, find_script_window_base_urls
13 | 
14 | 
15 | def extract_scripts_from_html(
16 |     url: str,
17 |     html: str,
18 | ) -> List[str]:
19 |     """Get any scripts files from html page."""
20 | 
21 |     soup = bs4(html, 'html.parser')
22 | 
23 |     scripts_files = []
24 |     for script in soup.find_all('script'):
25 | 
26 |         src: Optional[str] = script.attrs.get('src')
27 | 
28 |         if src:
29 |             script_url = urljoin(url, script.attrs.get('src'))
30 |             scripts_files.append(script_url)
31 | 
32 |     return scripts_files
33 | 
34 | 
35 | async def extract_script_urls_from_page(
36 |     session: aiohttp.ClientSession,
37 |     url: str,
38 | ) -> Set[Url]:
39 |     """This extractor will check all scripts on the page for GQL endpoints."""
40 | 
41 |     urls: Set[Url] = set()
42 | 
43 |     try:
44 |         async with session.get(url, timeout=10) as page:
45 |             _html: str = await page.text()
46 |             _script_urls = extract_scripts_from_html(url, _html)
47 | 
48 |             for script_url in _script_urls:
49 |                 if url not in script_url:
50 |                     continue
51 | 
52 |                 urls.add(Url(script_url))
53 | 
54 |     except AwaitableRequestException:
55 |         pass
56 | 
57 |     return urls
58 | 
59 | 
60 | def extract_scripts_from_raw_js(
61 |     url: str,
62 |     script_file: str,
63 | ) -> Set[str]:
64 |     """Extract all urls from a script file by using combination of regex."""
65 | 
66 |     urls: List[str] = find_script_full_urls(script_file) + find_script_window_base_urls(url, script_file) + find_script_fetch_graphql(url, script_file)
67 | 
68 |     return filter_common(set(urls))
69 | 
70 | 
71 | async def extract_urls_from_script(
72 |     session: aiohttp.ClientSession,
73 |     url: str,
74 | ) -> Set[Url]:
75 |     """Extract urls from scripts."""
76 | 
77 |     potentials_gqls: Set[Url] = set()
78 |     if not url.endswith('.js'):
79 |         return set()
80 | 
81 |     try:
82 |         domain_url: str = '/'.join(url.split('/')[:3])
83 |         async with session.get(url, timeout=10) as script:
84 |             _content: str = await script.text()
85 | 
86 |             _urls: Set[str] = extract_scripts_from_raw_js(domain_url, _content)
87 | 
88 |             for potential in _urls:
89 |                 if not potential.endswith('/graphql') or not domain_url in potential:
90 |                     potential = f'{remove_suffix(potential, "/graphql")}/graphql'
91 | 
92 |                 potentials_gqls.add(Url(potential))
93 | 
94 |     except AwaitableRequestException:
95 |         pass
96 | 
97 |     return potentials_gqls
98 | 


--------------------------------------------------------------------------------
/graphinder/pool/routine.py:
--------------------------------------------------------------------------------
 1 | """Functions to manage pooling."""
 2 | 
 3 | import argparse
 4 | from typing import Dict, Set, Union, cast
 5 | 
 6 | from graphinder.entities.io import Results
 7 | from graphinder.entities.tasks import TasksList
 8 | from graphinder.io.printers import display_results
 9 | from graphinder.io.writers import write_results
10 | from graphinder.pool.domain import Domain, Url
11 | from graphinder.pool.tasks import consume_tasks, init_domain_tasks
12 | from graphinder.utils.filters import filter_urls
13 | from graphinder.utils.logger import get_logger
14 | from graphinder.utils.webhook import send_webhook
15 | 
16 | 
17 | async def domain_routine(
18 |     domain: Domain,
19 |     args: argparse.Namespace,
20 | ) -> Dict[str, Union[str, Set[Url]]]:
21 |     """Start domain routine."""
22 | 
23 |     _tasks: TasksList = init_domain_tasks(domain, args)
24 |     _urls: Set[Url] = await consume_tasks(_tasks, domain)
25 | 
26 |     return {'domain': domain.url, 'urls': filter_urls(_urls)}
27 | 
28 | 
29 | async def main_routine(args: argparse.Namespace) -> Results:
30 |     """Main pool routine."""
31 | 
32 |     logger = get_logger()
33 |     logger.info('starting main routine..')
34 | 
35 |     domain: Domain = Domain(args.domain, args.precision_mode)
36 |     logger.info(f'running scan on {domain.url}')
37 | 
38 |     output_file = args.output_file
39 |     del args.output_file
40 | 
41 |     result = await domain_routine(domain, args)
42 |     results: Results = cast(Results, {result['domain']: result['urls']})
43 | 
44 |     if not args.quiet_mode:
45 |         display_results(results)
46 | 
47 |     if output_file is not None:
48 |         write_results(output_file, results.copy())
49 | 
50 |     if args.webhook_url is not None:
51 |         send_webhook(args.webhook_url, results)
52 | 
53 |     return results
54 | 


--------------------------------------------------------------------------------
/graphinder/pool/tasks.py:
--------------------------------------------------------------------------------
  1 | """Define tasks for the pool."""
  2 | 
  3 | import argparse
  4 | import asyncio
  5 | from typing import Set
  6 | 
  7 | import aiohttp
  8 | 
  9 | from graphinder.entities.tasks import Task, TasksList, TaskTags
 10 | from graphinder.io.providers import gql_endpoints_characterizer
 11 | from graphinder.pool.domain import Domain, Url
 12 | from graphinder.utils.filters import remove_suffix
 13 | 
 14 | 
 15 | def generate_scripts_tasks(domain: Domain) -> TasksList:
 16 |     """Generate scripts tasks."""
 17 | 
 18 |     tasks: TasksList = []
 19 | 
 20 |     for subdomain in domain.subdomains:
 21 |         if subdomain:
 22 |             tasks.append(Task(domain.url, TaskTags.FETCH_PAGE_SCRIPTS, f'http://{subdomain}/'))
 23 | 
 24 |     domain.logger.debug(f'{len(tasks)} scripts tasks generated.')
 25 |     return tasks
 26 | 
 27 | 
 28 | def generate_bruteforce_tasks(domain: Domain) -> TasksList:
 29 |     """Generate bruteforce tasks."""
 30 | 
 31 |     tasks: TasksList = []
 32 | 
 33 |     for subdomain in domain.subdomains:
 34 |         if subdomain:
 35 |             for directory in gql_endpoints_characterizer():
 36 |                 url: str = f'http://{remove_suffix(subdomain, "/graphql").rstrip("/")}' + '/' + directory
 37 |                 tasks.append(Task(domain.url, TaskTags.FETCH_ENDPOINT, url))
 38 | 
 39 |     domain.logger.debug(f'{len(tasks)} bruteforce tasks generated.')
 40 |     return tasks
 41 | 
 42 | 
 43 | def generate_tasks(
 44 |     domain: Domain,
 45 |     args: argparse.Namespace,
 46 | ) -> TasksList:
 47 |     """Generate tasks depending on settings."""
 48 | 
 49 |     tasks: TasksList = []
 50 | 
 51 |     if not args.no_script_mode:
 52 |         tasks += generate_scripts_tasks(domain)
 53 | 
 54 |     if not args.no_bruteforce_mode:
 55 |         tasks += generate_bruteforce_tasks(domain)
 56 | 
 57 |     domain.logger.info(f'{len(tasks)} tasks generated.')
 58 |     return tasks
 59 | 
 60 | 
 61 | def init_domain_tasks(
 62 |     domain: Domain,
 63 |     args: argparse.Namespace,
 64 | ) -> TasksList:
 65 |     """Init domain tasks."""
 66 | 
 67 |     domain.fetch_subdomains(args.reduce_mode)
 68 |     return generate_tasks(domain, args)
 69 | 
 70 | 
 71 | async def add_tasks(
 72 |     domain: Domain,
 73 |     urls: Set[Url],
 74 |     tag: TaskTags,
 75 | ) -> None:
 76 |     """Add tasks."""
 77 | 
 78 |     for url in urls:
 79 |         asyncio.create_task(process_task(Task(domain.url, tag, url), domain))
 80 | 
 81 | 
 82 | async def process_task(
 83 |     task: Task,
 84 |     domain: Domain,
 85 | ) -> None:
 86 |     """Process task."""
 87 | 
 88 |     # precision_mode: Lock the task semaphore for the given domain.
 89 |     if domain.semaphore:
 90 |         await domain.semaphore.acquire()
 91 | 
 92 |     # Prevent fetching the same URL twice.
 93 |     domain.already_fetched.add(task.url)
 94 | 
 95 |     # process task using the correct method.
 96 |     if task.tag == TaskTags.FETCH_SCRIPT:
 97 |         _urls = await domain.fetch_script(task.url)
 98 |         if _urls:
 99 |             await add_tasks(domain, _urls, TaskTags.FETCH_ENDPOINT)
100 |     elif task.tag == TaskTags.FETCH_PAGE_SCRIPTS:
101 |         _urls = await domain.fetch_page_scripts(task.url)
102 |         if _urls:
103 |             await add_tasks(domain, _urls, TaskTags.FETCH_SCRIPT)
104 |     elif task.tag == TaskTags.FETCH_ENDPOINT:
105 |         await domain.fetch_endpoint(task.url)
106 |     else:
107 |         raise NotImplementedError()
108 | 
109 |     # precision_mode: Release semaphore for the given domain.
110 |     if domain.semaphore:
111 |         domain.semaphore.release()
112 | 
113 | 
114 | async def consume_tasks(
115 |     tasks: TasksList,
116 |     domain: Domain,
117 | ) -> Set[Url]:
118 |     """Consume tasks."""
119 | 
120 |     connector = aiohttp.TCPConnector(limit=100, ttl_dns_cache=600)
121 |     domain.session = aiohttp.ClientSession(connector=connector)
122 | 
123 |     await asyncio.gather(*[process_task(task, domain) for task in tasks])
124 | 
125 |     await domain.session.close()
126 | 
127 |     return domain.results
128 | 


--------------------------------------------------------------------------------
/graphinder/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Escape-Technologies/graphinder/3478723764e111296138be4c7a91a39787116834/graphinder/py.typed


--------------------------------------------------------------------------------
/graphinder/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """Utils init."""
2 | 


--------------------------------------------------------------------------------
/graphinder/utils/assets.py:
--------------------------------------------------------------------------------
 1 | """Assets utils."""
 2 | 
 3 | import os
 4 | import platform
 5 | import urllib.request
 6 | import zipfile
 7 | 
 8 | from graphinder.utils.logger import get_logger
 9 | 
10 | 
11 | def _compose_subfinder_url(
12 |     _os: str = platform.system().lower(),
13 |     _processor: str = platform.processor(),
14 | ) -> str:
15 |     """Compose the subfinder url."""
16 | 
17 |     base_url: str = 'https://github.com/projectdiscovery/subfinder/releases/download/v2.5.1/subfinder_2.5.1_'
18 | 
19 |     if 'linux' in _os:
20 |         base_url += 'linux'
21 |     elif 'darwin' in _os:
22 |         base_url += 'macOS'
23 |     elif 'win' in _os:
24 |         base_url += 'windows'
25 |     else:
26 |         raise NotImplementedError('OS not supported.')
27 | 
28 |     if '386' in _processor:
29 |         base_url += '_386'
30 |     elif 'arm' in _processor:
31 |         base_url += '_arm64'
32 |     else:
33 |         base_url += '_amd64'
34 | 
35 |     return base_url + '.zip'
36 | 
37 | 
38 | def _extract_file(file_path: str) -> None:
39 |     """Extract file depending on his extension."""
40 | 
41 |     with zipfile.ZipFile(file_path, 'r') as zip_ref:
42 |         zip_ref.extractall('.')
43 | 
44 |     os.chmod('subfinder', 0o755)
45 | 
46 | 
47 | def fetch_assets() -> None:
48 |     """Fetches the assets."""
49 |     logger = get_logger()
50 | 
51 |     if os.path.exists('subfinder'):
52 |         logger.debug('subfinder present, skipping.')
53 |         return
54 | 
55 |     subfinder_url = _compose_subfinder_url()
56 | 
57 |     logger.info('downloading subfinder...')
58 |     urllib.request.urlretrieve(subfinder_url, 'subfinder.zip')
59 | 
60 |     logger.info('extracting subfinder...')
61 |     _extract_file('subfinder.zip')
62 | 
63 |     logger.info('removing subfinder archive...')
64 |     os.remove('subfinder.zip')
65 | 


--------------------------------------------------------------------------------
/graphinder/utils/filters.py:
--------------------------------------------------------------------------------
  1 | """All filters functions."""
  2 | 
  3 | import re
  4 | from typing import Dict, List, Optional, Set
  5 | 
  6 | from graphinder.entities.pool import Url
  7 | from graphinder.io.providers import gql_endpoints_characterizer
  8 | from graphinder.utils.logger import get_logger
  9 | 
 10 | 
 11 | def filter_common(urls: Set[str]) -> Set[str]:
 12 |     """Remove commonly found urls in javascript files of a webpage such as w3.org."""
 13 | 
 14 |     common_strings = [
 15 |         'w3.org',
 16 |         'localhost',
 17 |         'schema.org',
 18 |         'sentry.io',
 19 |         'git.io',
 20 |         'github.com',
 21 |         'nuxtjs.org',
 22 |         'momentjs.com',
 23 |         'fb.me',
 24 |         'reactjs.org',
 25 |         'slack',
 26 |         'jquery',
 27 |         'google',
 28 |         'twitter',
 29 |         'elastic.co',
 30 |         'formatjs.io'
 31 |         'icann.org'  #TODO: find more of those
 32 |     ]
 33 | 
 34 |     urls_filtered = urls.copy()
 35 |     for url in urls:
 36 |         if '://a' in url and url.endswith('a'):
 37 |             urls_filtered.remove(url)
 38 |         elif '://x' in url and url.endswith('x'):
 39 |             urls_filtered.remove(url)
 40 |         elif any(common in url for common in common_strings):
 41 |             urls_filtered.remove(url)
 42 | 
 43 |     return urls_filtered
 44 | 
 45 | 
 46 | def filter_urls(urls: Set[Url]) -> Set[Url]:
 47 |     """Remove urls that are not valid."""
 48 | 
 49 |     # We will re-populate the list of endpoints, sorted in len order to unpack them.
 50 |     _endpoints: List[str] = gql_endpoints_characterizer()
 51 |     _endpoints.sort(key=len, reverse=True)
 52 | 
 53 |     # Let's unpack the list of endpoints.
 54 |     unpacked_urls: Dict[str, List[Url]] = {}
 55 |     for url in urls:
 56 |         for endpoint in _endpoints:
 57 |             if url.endswith(endpoint):
 58 | 
 59 |                 unpacked_url = remove_suffix(url, endpoint)
 60 |                 if unpacked_url not in unpacked_urls:
 61 |                     unpacked_urls[unpacked_url] = []
 62 | 
 63 |                 unpacked_urls[unpacked_url].append(url)
 64 | 
 65 |                 break
 66 | 
 67 |     # Reconstruct the list of endpoints.
 68 |     # Attempt to find a full /graphql path.
 69 |     # Otherwise, use the smaller one.
 70 |     filtered_urls: Set[Url] = set()
 71 |     for base_url, _urls in unpacked_urls.items():
 72 | 
 73 |         default_match: bool = False
 74 |         for _url in _urls:
 75 |             if _url[len(base_url):] == 'graphql':
 76 |                 filtered_urls.add(_url)
 77 |                 default_match = True
 78 | 
 79 |                 break
 80 | 
 81 |         if not default_match:
 82 |             filtered_urls.add(min(_urls, key=len))
 83 | 
 84 |     return filtered_urls
 85 | 
 86 | 
 87 | def remove_duplicate_domains(domains: List[str]) -> List[str]:
 88 |     """if domains has example.com and www.example.com this will remove www.example.com."""
 89 | 
 90 |     corrected_domains: List[str] = []
 91 |     for domain in domains:
 92 |         if domain.startswith('www.'):
 93 |             if domain.lstrip('www.') in domains:
 94 |                 continue
 95 |         corrected_domains.append(domain)
 96 | 
 97 |     return corrected_domains
 98 | 
 99 | 
100 | def transform_url_in_domain(url: str) -> Optional[str]:
101 |     """Transform a given url in domain.
102 | 
103 |     http(s)://(www.)
104 |     """
105 | 
106 |     if 'https://' in url or 'http://' in url:  # here the url can even ben contained in a string it will still work (e.g. csv)
107 |         search = re.search(r'(?:https?://(?:www.)?(?P<url>[^\s/]+)/?)', url)
108 |         if search is not None:
109 |             return search.group('url')
110 | 
111 |         get_logger().error(f'{ url } does not contain any valid domain')
112 |         return None
113 | 
114 |     # here the url is already a domain name
115 |     return url.replace('www.', '').split('/')[0]
116 | 
117 | 
118 | def remove_suffix(input_string: str, suffix: str) -> str:
119 |     """Pre 3.9: https://docs.python.org/3/library/stdtypes.html#str.removesuffix."""
120 | 
121 |     if suffix and input_string.endswith(suffix):
122 |         return input_string[:-len(suffix)]
123 |     return input_string
124 | 


--------------------------------------------------------------------------------
/graphinder/utils/finders.py:
--------------------------------------------------------------------------------
 1 | """All finder utils used by extractors."""
 2 | 
 3 | import re
 4 | from typing import List
 5 | 
 6 | 
 7 | def find_script_full_urls(script_file: str) -> List[str]:
 8 |     """Extract full urls from script file."""
 9 | 
10 |     return re.findall(r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+', script_file)
11 | 
12 | 
13 | def find_script_window_base_urls(
14 |     domain: str,
15 |     script_file: str,
16 | ) -> List[str]:
17 |     """Extract window.__BASE_URL__ urls from script file.
18 | 
19 |     - window.__BASE_URL__ +"/graphql"
20 |     - window.__BASE_URL__+"/api/graphql"
21 |     - window.__BASE_URL__ + "/api/v1/graphql"
22 |     """
23 | 
24 |     urls: List[str] = re.findall(r'window.__BASE_URL__ ?\+ ?\"\S{0,15}/graphql\"+', script_file)
25 | 
26 |     # Replace window.__BASE_URL__ with domain and strip ending `"`
27 |     return [domain + url[url.find('"') + 1:-1] for url in urls]
28 | 
29 | 
30 | def find_script_fetch_graphql(
31 |     domain: str,
32 |     script_file: str,
33 | ) -> List[str]:
34 |     """Extract potential fetch/axios("/graphql") urls from script file.
35 | 
36 |     - ("/graphql"
37 |     - ("/api/graphql"
38 |     - ("/api/v1/graphql"
39 |     """
40 | 
41 |     urls: List[str] = re.findall(r'\(\"\S{0,15}/graphql\"+', script_file)
42 | 
43 |     # Remove starting `("` and ending `"`
44 |     return [domain + url[2:-1] for url in urls]
45 | 


--------------------------------------------------------------------------------
/graphinder/utils/logger.py:
--------------------------------------------------------------------------------
 1 | """Utilities functions that are needed but re-usable in any projects."""
 2 | 
 3 | import logging
 4 | import warnings
 5 | from typing import Optional
 6 | 
 7 | 
 8 | def disable_internal_loggers() -> None:
 9 |     """Disable internal loggers."""
10 | 
11 |     logging.getLogger('asyncio').setLevel(logging.ERROR)
12 |     warnings.simplefilter('ignore')
13 | 
14 | 
15 | def setup_logger(
16 |     verbose_mode: bool = False,
17 |     quiet_mode: bool = False,
18 |     logger: Optional[logging.Logger] = None,
19 | ) -> logging.Logger:
20 |     """Setup logger."""
21 | 
22 |     disable_internal_loggers()
23 | 
24 |     log_level: int = logging.DEBUG if verbose_mode else logging.INFO
25 |     if quiet_mode:
26 |         log_level = logging.ERROR
27 | 
28 |     if logger:
29 |         logger = get_logger()
30 |         logger.setLevel(log_level)
31 |         return logger
32 | 
33 |     log_format: str = '%(asctime)s,%(msecs)04d - %(levelname)s - %(name)s - %(message)s'
34 | 
35 |     logging.basicConfig(level=log_level, datefmt='%H:%M:%S', format=log_format)
36 | 
37 |     logging.addLevelName(logging.DEBUG, '\x1b[32;1mDBG\x1b[0m')
38 |     logging.addLevelName(logging.INFO, '\x1b[37;1mINF\x1b[0m')
39 |     logging.addLevelName(logging.WARNING, '\x1b[33;1mWRN\x1b[0m')
40 |     logging.addLevelName(logging.ERROR, '\x1b[31;1mERR\x1b[0m')
41 | 
42 |     return get_logger()
43 | 
44 | 
45 | def get_logger() -> logging.Logger:
46 |     """Get logger for specified module."""
47 | 
48 |     return logging.getLogger('graphinder')
49 | 


--------------------------------------------------------------------------------
/graphinder/utils/webhook.py:
--------------------------------------------------------------------------------
 1 | """Discord webhook utils."""
 2 | 
 3 | import random
 4 | from typing import Any, Dict
 5 | 
 6 | import requests
 7 | 
 8 | from graphinder.entities.io import Results
 9 | 
10 | 
11 | def format_webhook(results: Results) -> dict:
12 |     """Format embeds for webhook."""
13 | 
14 |     base: Dict[str, Any] = {
15 |         'username': 'Graphinder',
16 |         'embeds': [],
17 |     }
18 | 
19 |     for domain, urls in results.items():
20 |         base['embeds'].append({
21 |             'title': domain,
22 |             'description': '\n'.join(urls),
23 |             'color': random.randint(0, 16777215),
24 |         })
25 | 
26 |     return base
27 | 
28 | 
29 | def send_webhook(
30 |     webhook_url: str,
31 |     results: Results,
32 | ) -> bool:
33 |     """Send discord webhook."""
34 | 
35 |     body = format_webhook(results)
36 | 
37 |     r = requests.post(url=webhook_url, json=body, timeout=5)
38 | 
39 |     return r.status_code == 204
40 | 


--------------------------------------------------------------------------------
/install-dev.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Check if pip is installed, and install it if necessary
 4 | echo "---- Updating pip ----"
 5 | curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
 6 | python get-pip.py
 7 | echo "---- Pip updated ----"
 8 | rm get-pip.py
 9 | if ! pip --version > /dev/null 2>&1; then
10 |     echo "Installing pip..."
11 |     curl -sSL https://bootstrap.pypa.io/get-pip.py | python
12 | fi
13 | echo "---- Installing Python Poetry ----"
14 | echo "---- Updating pip ----"
15 | pip install -U pip
16 | pip install -U poetry
17 | poetry config virtualenvs.in-project true
18 | 
19 | echo "---- Installing Python dependencies ----"
20 | poetry install
21 | 
22 | echo "\n\n\n\n\n---- Git hooks init (using mookme) ----"
23 | npm install
24 | npx mookme init --only-hook --skip-types-selection
25 | 
26 | echo "\n\n\n\n\n---- Your working directory is all set :) ----"
27 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 |   "dependencies": {
3 |     "@commitlint/cli": "^12.1.4",
4 |     "@commitlint/config-angular": "^12.1.4",
5 |     "@escape.tech/mookme": "^2.2.0-beta.2"
6 |   }
7 | }
8 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "graphinder"
 3 | version = "2.0.0b4"
 4 | description = "Escape Graphinder"
 5 | authors = ["Escape Technologies SAS <ping@escape.tech>"]
 6 | maintainers = [
 7 |     "Karim Rustom <rustom@escape.tech>",
 8 |     "Antoine Carossio <antoine@escape.tech>",
 9 |     "Swan <swan@escape.tech>"
10 | ]
11 | license = "MIT"
12 | packages = [
13 |     { include = "graphinder" }
14 | ]
15 | readme = "README.md"
16 | "homepage" = "https://escape.tech/"
17 | "repository" = "https://github.com/Escape-Technologies/graphinder"
18 | 
19 | [tool.poetry.urls]
20 | "Bug Tracker" = "https://github.com/Escape-Technologies/graphinder/issues"
21 | 
22 | [tool.poetry.scripts]
23 | graphinder = 'graphinder:cli'
24 | 
25 | [tool.coverage.run]
26 | omit = [
27 |     'graphinder/__main__.py',
28 |     'tests/*'
29 | ]
30 | 
31 | [tool.pytest.ini_options]
32 | asyncio_mode = 'strict'
33 | 
34 | [tool.poetry.dependencies]
35 | aiohttp = {extras = ["speedups"], version = "^3.8.1"}
36 | beautifulsoup4 = ">=4,<5"
37 | python = ">=3.10,<4.0"
38 | requests = "^2.27.1"
39 | 
40 | [tool.poetry.dev-dependencies]
41 | autoflake = "^2.0"
42 | docformatter = "^1.5"
43 | isort = "^5.10.1"
44 | mypy = "^0.982"
45 | poetryup = "^0.12.3"
46 | pylint = "^2.15.9"
47 | pylint-quotes = "^0.2.3"
48 | pytest = "^7.2.0"
49 | pytest-asyncio = "^0.20.2"
50 | pytest-cov = "^4.0.0"
51 | pytest-mock = "^3.10.0"
52 | pytest-rerunfailures = "^10.3"
53 | setuptools = ">=50"
54 | snakeviz = "^2.1.1"
55 | types-PyYAML = "^6.0.11"
56 | types-requests = "^2.28.11"
57 | types-setuptools = "^67.6.0"
58 | unify = "^0.5"
59 | vulture = "^2.6"
60 | yapf = "^0.32.0"
61 | 
62 | [build-system]
63 | requires = ["poetry-core>=1.0.0"]
64 | build-backend = "poetry.core.masonry.api"
65 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Escape-Technologies/graphinder/3478723764e111296138be4c7a91a39787116834/tests/__init__.py


--------------------------------------------------------------------------------
/tests/unit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Escape-Technologies/graphinder/3478723764e111296138be4c7a91a39787116834/tests/unit/__init__.py


--------------------------------------------------------------------------------
/tests/unit/entities/test_errors.py:
--------------------------------------------------------------------------------
 1 | """Test entities/errors.py."""
 2 | 
 3 | from asyncio import TimeoutError as AsyncioTimeoutError
 4 | from socket import gaierror
 5 | 
 6 | from aiohttp.client_exceptions import ClientError
 7 | 
 8 | from graphinder.entities.errors import AwaitableRequestException
 9 | 
10 | 
11 | def test_awaitable_exception_type() -> None:
12 |     """Test AwaitableRequestException type."""
13 | 
14 |     assert len(AwaitableRequestException) == 6
15 | 
16 |     assert UnicodeError in AwaitableRequestException
17 |     assert ValueError in AwaitableRequestException
18 |     assert ClientError in AwaitableRequestException
19 |     assert AsyncioTimeoutError in AwaitableRequestException
20 |     assert gaierror in AwaitableRequestException
21 |     assert UnicodeError in AwaitableRequestException
22 | 


--------------------------------------------------------------------------------
/tests/unit/entities/test_io.py:
--------------------------------------------------------------------------------
 1 | """Test entities/io.py."""
 2 | 
 3 | from graphinder.entities.io import Results
 4 | from graphinder.entities.pool import Url
 5 | 
 6 | 
 7 | def test_results_type() -> None:
 8 |     """Test Results type."""
 9 | 
10 |     r: Results = {'domain': set()}
11 |     r['domain'].add(Url('https://example.com'))
12 | 
13 |     assert len(r) == 1
14 |     assert len(r['domain']) == 1
15 | 


--------------------------------------------------------------------------------
/tests/unit/entities/test_pool.py:
--------------------------------------------------------------------------------
 1 | """Test entities/pool.py."""
 2 | 
 3 | from graphinder.entities.pool import Url
 4 | 
 5 | 
 6 | def test_url_type() -> None:
 7 |     """Test Url type."""
 8 | 
 9 |     url: Url = Url('https://example.com')
10 | 
11 |     assert isinstance(url, str)
12 | 


--------------------------------------------------------------------------------
/tests/unit/entities/test_task.py:
--------------------------------------------------------------------------------
 1 | """Test entities/tasks.py."""
 2 | 
 3 | from graphinder.entities.tasks import Task, TaskTags
 4 | 
 5 | 
 6 | # pylint: disable=no-member,protected-access
 7 | def test_tasktags() -> None:
 8 |     """TaskTags test."""
 9 | 
10 |     assert len(TaskTags) == 3, 'Excepted 3 different task tags. Please update this test.'
11 | 
12 |     assert 0 in TaskTags._value2member_map_
13 |     assert 1 in TaskTags._value2member_map_
14 |     assert 2 in TaskTags._value2member_map_
15 | 
16 | 
17 | def test_task() -> None:
18 |     """Task test."""
19 | 
20 |     task: Task = Task('https://example.com', TaskTags.FETCH_SCRIPT, 'https://example.com/script.js')
21 | 
22 |     assert task.domain_url == 'https://example.com'
23 |     assert task.tag == TaskTags.FETCH_SCRIPT
24 |     assert task.url == 'https://example.com/script.js'
25 | 


--------------------------------------------------------------------------------
/tests/unit/io/test_printers.py:
--------------------------------------------------------------------------------
 1 | """Test io/printers.py."""
 2 | 
 3 | from typing import Any
 4 | 
 5 | from graphinder.entities.io import Results
 6 | from graphinder.entities.pool import Url
 7 | from graphinder.io.printers import display_results
 8 | 
 9 | 
10 | def test_display_results(capsys: Any) -> None:
11 |     """display_results test."""
12 | 
13 |     results: Results = {
14 |         'example.com': {Url('http://example.com/')},
15 |         'example.org': {Url('http://example.org/')},
16 |     }
17 |     display_results(results)
18 | 
19 |     assert capsys.readouterr().out == 'example.com - 1\n\thttp://example.com/\nexample.org - 1\n\thttp://example.org/\n'
20 | 


--------------------------------------------------------------------------------
/tests/unit/io/test_providers.py:
--------------------------------------------------------------------------------
 1 | """Test io/providers.py."""
 2 | 
 3 | from graphinder.io.providers import gql_endpoints_characterizer
 4 | 
 5 | 
 6 | def test_gql_endpoints_characterizer() -> None:
 7 |     """gql_endpoints_characterizer test."""
 8 | 
 9 |     endpoints = gql_endpoints_characterizer()
10 | 
11 |     assert len(endpoints) == len(set(endpoints)), 'There should be no duplicates.'
12 |     assert len(endpoints) == 23 + (2*8), 'There should be no more endpoints. Please update the test if you added more.'
13 | 


--------------------------------------------------------------------------------
/tests/unit/io/test_readers.py:
--------------------------------------------------------------------------------
 1 | """Test io/readers.py."""
 2 | 
 3 | from typing import List
 4 | 
 5 | from graphinder.io.readers import read_domains
 6 | from graphinder.pool.domain import Domain
 7 | from graphinder.utils.logger import setup_logger
 8 | 
 9 | 
10 | def test_read_domains_input_domain() -> None:
11 |     """read_domains test with input domain."""
12 | 
13 |     out: List[Domain] = read_domains(None, 'example.com')
14 | 
15 |     assert len(out) == 1
16 |     assert out[0].url == 'example.com'
17 | 
18 | 
19 | def test_read_domains_wrong_input_file() -> None:
20 |     """read_domains test with wrong input file."""
21 | 
22 |     try:
23 |         _: List[Domain] = read_domains(None, None)
24 |     except AttributeError:
25 |         pass
26 | 
27 | 
28 | def test_read_domains_input_file() -> None:
29 |     """read_domains test with input file."""
30 | 
31 |     setup_logger(False)
32 | 
33 |     with open('tests/unit/io/test_readers.txt', 'r', encoding='utf-8') as input_file:
34 |         out: List[Domain] = read_domains(input_file, None)
35 | 
36 |     str_out = set(domain.url for domain in out)
37 | 
38 |     assert {'example.com', 'example.org', 'example.fr'} == str_out
39 | 


--------------------------------------------------------------------------------
/tests/unit/io/test_readers.txt:
--------------------------------------------------------------------------------
 1 | example.org
 2 | example.com
 3 | example.com/
 4 | www.example.com
 5 | www.example.com/hello
 6 | https://example.com
 7 | http://example.com
 8 | http://example.com/
 9 | https://example.com/
10 | https://example.com/hello
11 | http://example.com/hello
12 | https://example.com/hello/
13 | http://example.com/hello/
14 | https://example.com/hello?query=false
15 | http://example.com/hello?query=false
16 | https://www.example.com
17 | http://www.example.com
18 | My super compagny,http://example.com/hello?query=false
19 | My super compagny,http://example.fr/hello?query=false, useless comment


--------------------------------------------------------------------------------
/tests/unit/io/test_writers.py:
--------------------------------------------------------------------------------
 1 | """Test io/writter.py."""
 2 | 
 3 | import json
 4 | import os
 5 | 
 6 | from graphinder.entities.io import Results
 7 | from graphinder.entities.pool import Url
 8 | from graphinder.io.writers import ResultEncoder, write_results
 9 | 
10 | 
11 | def test_result_encoder() -> None:
12 |     """ResultEncoder test with wrong structure."""
13 | 
14 |     r = ResultEncoder()
15 | 
16 |     r.default(set())
17 | 
18 |     try:
19 |         r.default(list())
20 |         assert False, 'ResultEncoder should raise an exception.'
21 |     except NotImplementedError:
22 |         pass
23 | 
24 | 
25 | def test_write_results() -> None:
26 |     """write_results test."""
27 | 
28 |     results: Results = {
29 |         'example.com': {Url('http://example.com/')},
30 |         'example.org': {Url('http://example.org/')},
31 |     }
32 | 
33 |     with open('test_write_results.json', 'w', encoding='utf-8') as output_file:
34 |         write_results(output_file, results)
35 | 
36 |     with open('test_write_results.json', 'r', encoding='utf-8') as output_file:
37 |         results_from_file = json.load(output_file)
38 | 
39 |     for result in results_from_file.copy():
40 |         results_from_file[result] = set(results_from_file[result])
41 | 
42 |     assert results_from_file == results
43 |     assert os.path.isfile('test_write_results.json')
44 | 
45 |     os.remove('test_write_results.json')
46 | 


--------------------------------------------------------------------------------
/tests/unit/pool/test_detectors.py:
--------------------------------------------------------------------------------
 1 | """Test pool/detectors.py."""
 2 | 
 3 | import aiohttp
 4 | import pytest
 5 | 
 6 | from graphinder.pool.detectors import _look_like_graphql_url, is_gql_endpoint
 7 | 
 8 | 
 9 | def test_look_like_graphql_url() -> None:
10 |     """_look_like_graphql_url test."""
11 | 
12 |     assert _look_like_graphql_url('https://example.com') == (False, None)
13 |     assert _look_like_graphql_url('https://example.com/graphql') == (True, 'graphql')
14 | 
15 | 
16 | @pytest.mark.asyncio
17 | async def test_is_gql_endpoint() -> None:
18 |     """is_gql_endpoint test."""
19 | 
20 |     async with aiohttp.ClientSession() as session:
21 | 
22 |         assert not (await is_gql_endpoint(
23 |             'https://example.com',
24 |             session=session,
25 |         ))[0]
26 |         assert (await is_gql_endpoint(
27 |             'https://gontoz.escape.tech',
28 |             session=session,
29 |         ))[0]
30 | 


--------------------------------------------------------------------------------
/tests/unit/pool/test_domain.py:
--------------------------------------------------------------------------------
 1 | """Test pool/domain.py."""
 2 | 
 3 | from typing import Set
 4 | 
 5 | import aiohttp
 6 | import pytest
 7 | 
 8 | from graphinder.entities.pool import Url
 9 | from graphinder.pool.domain import Domain
10 | from graphinder.utils.assets import fetch_assets
11 | from graphinder.utils.logger import setup_logger
12 | 
13 | 
14 | @pytest.mark.asyncio
15 | async def test_domain_class() -> None:
16 |     """Domain class test."""
17 | 
18 |     setup_logger(False)
19 | 
20 |     domain: Domain = Domain('example.com')
21 |     domain.session = aiohttp.ClientSession()
22 | 
23 |     fetch_assets()
24 | 
25 |     domain.fetch_subdomains()
26 |     assert len(domain.subdomains) == 100, 'There should be max 100 subdomain.'
27 | 
28 |     assert await domain.fetch_script('https://example.com') == set()
29 |     await domain.session.close()
30 |     assert domain.session.closed
31 | 
32 | 
33 | @pytest.mark.asyncio
34 | async def test_domain_class_2() -> None:
35 |     """More domain class test."""
36 | 
37 |     setup_logger(False)
38 |     domain: Domain = Domain('example2.com')
39 |     domain.session = aiohttp.ClientSession()
40 | 
41 |     res: Set[Url] = await domain.fetch_script('https://cdn.jsdelivr.net/npm/graphql-playground-react/build/static/js/middleware.js')
42 |     assert len(res) == 13
43 | 
44 |     res = await domain.fetch_page_scripts('https://gontoz.escape.tech/')
45 |     assert len(res) == 0
46 | 
47 |     await domain.fetch_endpoint('https://gontoz.escape.tech/graphql')
48 |     assert len(domain.results) == 1
49 | 
50 |     await domain.session.close()
51 |     assert domain.session.closed
52 | 


--------------------------------------------------------------------------------
/tests/unit/pool/test_extractors.py:
--------------------------------------------------------------------------------
1 | """Test pool/extractors.py."""
2 | 


--------------------------------------------------------------------------------
/tests/unit/pool/test_routine.py:
--------------------------------------------------------------------------------
 1 | """Test pool/routine.py."""
 2 | 
 3 | 
 4 | def test_domain_routine() -> None:
 5 |     """domain_routine test."""
 6 | 
 7 | 
 8 | def test_process_pool() -> None:
 9 |     """domain_routine test."""
10 | 
11 | 
12 | def test_main_routine() -> None:
13 |     """main_routine test."""
14 | 


--------------------------------------------------------------------------------
/tests/unit/pool/test_tasks.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=redefined-outer-name
  2 | 
  3 | """Test pool/tasks.py."""
  4 | 
  5 | import argparse
  6 | import asyncio
  7 | 
  8 | import pytest
  9 | 
 10 | from graphinder.entities.tasks import Task, TasksList, TaskTags
 11 | from graphinder.io.providers import gql_endpoints_characterizer
 12 | from graphinder.main import argument_builder
 13 | from graphinder.pool.domain import Domain, Url
 14 | from graphinder.pool.tasks import add_tasks, generate_bruteforce_tasks, generate_scripts_tasks, generate_tasks, init_domain_tasks, process_task
 15 | from graphinder.utils.assets import fetch_assets
 16 | 
 17 | 
 18 | @pytest.fixture
 19 | def domain() -> Domain:
 20 |     """Domain fixture."""
 21 | 
 22 |     _domain: Domain = Domain(Url('example.com'))
 23 |     _domain.subdomains = ['api.example.com', 'test.example.com']
 24 | 
 25 |     return _domain
 26 | 
 27 | 
 28 | def test_generate_scripts_tasks(domain: Domain) -> None:
 29 |     """generate_scripts_tasks test."""
 30 | 
 31 |     tasks: TasksList = generate_scripts_tasks(domain)
 32 | 
 33 |     assert len(tasks) == 2, 'There should be 2 tasks.'
 34 | 
 35 | 
 36 | def test_generate_bruteforce_tasks(domain: Domain) -> None:
 37 |     """generate_bruteforce_tasks test."""
 38 | 
 39 |     tasks: TasksList = generate_bruteforce_tasks(domain)
 40 | 
 41 |     assert len(tasks) == 2 * len(gql_endpoints_characterizer()), 'There should be 2 * gql_endpoints_characterizer tasks.'
 42 | 
 43 | 
 44 | def test_generate_tasks(domain: Domain) -> None:
 45 |     """generate_tasks test."""
 46 | 
 47 |     args: argparse.Namespace = argument_builder([])
 48 | 
 49 |     tasks: TasksList = generate_tasks(domain, args)
 50 |     assert len(tasks) == 2 * len(gql_endpoints_characterizer()) + 2, 'There should be 2 * gql_endpoints_characterizer + 2 tasks.'
 51 | 
 52 |     args = argument_builder(['--no-script'])
 53 |     tasks = generate_tasks(domain, args)
 54 |     assert len(tasks) == 2 * len(gql_endpoints_characterizer()), 'There should be 2 * gql_endpoints_characterizer tasks.'
 55 | 
 56 |     args = argument_builder(['--no-bruteforce'])
 57 |     tasks = generate_tasks(domain, args)
 58 |     assert len(tasks) == 2, 'There should be 2 tasks.'
 59 | 
 60 |     args = argument_builder(['--no-script', '--no-bruteforce'])
 61 |     tasks = generate_tasks(domain, args)
 62 |     assert len(tasks) == 0, 'There should be 0 tasks.'
 63 | 
 64 | 
 65 | def test_init_domain_tasks(domain: Domain) -> None:
 66 |     """init_domain_tasks test."""
 67 | 
 68 |     fetch_assets()
 69 | 
 70 |     tasks: TasksList = init_domain_tasks(domain, argument_builder([]))
 71 | 
 72 |     assert len(tasks) == 100 * len(gql_endpoints_characterizer()) + 100, 'There should be 100 * gql_endpoints_characterizer + 100 tasks.'
 73 | 
 74 | 
 75 | @pytest.mark.asyncio
 76 | async def test_add_tasks() -> None:
 77 |     """add_tasks test."""
 78 | 
 79 |     assert len(asyncio.all_tasks()) == 1, 'There should be 1 tasks.'
 80 | 
 81 |     await add_tasks(Domain(Url('example.com')), {Url('http://example.com/')}, TaskTags.FETCH_PAGE_SCRIPTS)
 82 | 
 83 |     assert len(asyncio.all_tasks()) == 2, 'There should be 2 tasks.'
 84 | 
 85 | 
 86 | @pytest.mark.asyncio
 87 | async def test_process_task() -> None:
 88 |     """process_task test."""
 89 | 
 90 |     assert len(asyncio.all_tasks()) == 1, 'There should be 1 tasks.'
 91 | 
 92 |     try:
 93 |         task = Task('example.com', 'unknown tag', 'example.com')  # type: ignore[arg-type]
 94 |         await process_task(task, Domain('example.com'))
 95 | 
 96 |         assert False, 'Unknown tag should raise an error.'
 97 |     except NotImplementedError:
 98 |         pass
 99 | 
100 | 
101 | @pytest.mark.asyncio
102 | async def test_consume_tasks() -> None:
103 |     """consume_tasks test."""
104 | 


--------------------------------------------------------------------------------
/tests/unit/test_main.py:
--------------------------------------------------------------------------------
 1 | """Test main.py."""
 2 | 
 3 | import argparse
 4 | 
 5 | import pkg_resources
 6 | 
 7 | from graphinder.main import __version__, argument_builder, main, validate_arguments
 8 | from graphinder.utils.logger import get_logger
 9 | 
10 | 
11 | def test_version() -> None:
12 |     """version test."""
13 |     assert __version__ == pkg_resources.get_distribution('graphinder').version, 'Version has been changed, please update the test.'
14 | 
15 | 
16 | def test_argument_builder() -> None:
17 |     """argument_builder test."""
18 | 
19 |     args: argparse.Namespace = argument_builder([])
20 | 
21 |     assert args.domain is None
22 |     assert not args.verbose_mode
23 |     assert not args.no_script_mode
24 |     assert not args.no_bruteforce_mode
25 |     assert args.reduce_mode == 100
26 | 
27 |     args = argument_builder(['-d', 'example.com'])
28 | 
29 |     assert args.domain == 'example.com'
30 | 
31 |     args = argument_builder(['--no-bruteforce'])
32 | 
33 |     assert args.no_bruteforce_mode
34 | 
35 | 
36 | def test_validate_arguments() -> None:
37 |     """validate_arguments test."""
38 | 
39 |     logger = get_logger()
40 |     args: argparse.Namespace = argument_builder([])
41 | 
42 |     assert not validate_arguments(logger, args)
43 | 
44 |     args = argument_builder(['-d', 'example.com'])
45 |     assert validate_arguments(logger, args)
46 | 
47 |     args = argument_builder(['--no-script', '--no-bruteforce'])
48 |     assert not validate_arguments(logger, args)
49 | 
50 |     args = argument_builder(['-d', 'example.com', '--no-script', '--no-bruteforce'])
51 |     assert not validate_arguments(logger, args)
52 | 
53 | 
54 | def test_main() -> None:
55 |     """main test."""
56 | 
57 |     assert not main([])
58 | 
59 | 
60 | def test_full_run() -> None:
61 |     """Test a complete run."""
62 | 
63 |     assert main(['-d', 'example.com']) == {'example.com': set()}
64 | 


--------------------------------------------------------------------------------
/tests/unit/utils/test_assets.py:
--------------------------------------------------------------------------------
 1 | """Test utils/assets.py."""
 2 | 
 3 | import os
 4 | import urllib.request
 5 | 
 6 | from graphinder.utils.assets import _compose_subfinder_url, _extract_file, fetch_assets
 7 | 
 8 | 
 9 | def test_fetch_assets() -> None:
10 |     """fetch_assets test."""
11 | 
12 |     path: str = 'subfinder'
13 | 
14 |     if os.path.isfile(path):
15 |         os.remove(path)
16 | 
17 |     fetch_assets()
18 |     assert os.path.isfile(path)
19 | 
20 |     fetch_assets()
21 |     assert os.path.isfile(path)
22 | 
23 |     if os.path.isfile(path):
24 |         os.remove(path)
25 | 
26 | 
27 | def test_extract_file_zip() -> None:
28 |     """_extract_file test for zip."""
29 | 
30 |     name: str = 'subfinder'
31 | 
32 |     if os.path.isfile(name):
33 |         os.remove(name)
34 | 
35 |     url = _compose_subfinder_url('linux', 'amd64')
36 |     urllib.request.urlretrieve(url, f'{name}.zip')
37 |     assert os.path.isfile(f'{name}.zip'), f'{name}.zip not found.'
38 | 
39 |     _extract_file(f'{name}.zip')
40 | 
41 |     assert os.path.isfile(name), f'{name} not found.'
42 | 
43 |     os.remove(f'{name}')
44 |     os.remove(f'{name}.zip')
45 | 
46 | 
47 | def test_compose_subfinder_url() -> None:
48 |     """_compose_subfinder_url test."""
49 | 
50 |     assert _compose_subfinder_url('linux', 'amd64') == 'https://github.com/projectdiscovery/subfinder/releases/download/v2.5.1/subfinder_2.5.1_linux_amd64.zip'
51 |     assert _compose_subfinder_url('darwin', 'amd64') == 'https://github.com/projectdiscovery/subfinder/releases/download/v2.5.1/subfinder_2.5.1_macOS_amd64.zip'
52 |     assert _compose_subfinder_url('win', 'i386') == 'https://github.com/projectdiscovery/subfinder/releases/download/v2.5.1/subfinder_2.5.1_windows_386.zip'
53 |     assert _compose_subfinder_url('darwin', 'arm') == 'https://github.com/projectdiscovery/subfinder/releases/download/v2.5.1/subfinder_2.5.1_macOS_arm64.zip'
54 | 
55 |     try:
56 |         _compose_subfinder_url('unknown os', 'amd64')
57 |         assert False, 'Expected NotImplementedError.'
58 |     except NotImplementedError:
59 |         pass
60 | 


--------------------------------------------------------------------------------
/tests/unit/utils/test_filters.py:
--------------------------------------------------------------------------------
 1 | """Test utils/filters.py."""
 2 | 
 3 | from typing import List, Set
 4 | 
 5 | import pytest
 6 | 
 7 | from graphinder.entities.pool import Url
 8 | from graphinder.io.providers import gql_endpoints_characterizer
 9 | from graphinder.utils.filters import filter_common, filter_urls, remove_duplicate_domains, transform_url_in_domain
10 | 
11 | 
12 | def test_filter_common() -> None:
13 |     """test for filter_common."""
14 | 
15 |     _input: Set[str] = {
16 |         'http://a',
17 |         'http://x',
18 |         'https://w3.org',
19 |         'https://localhost',
20 |         'https://schema.org',
21 |         'https://sentry.io',
22 |         'https://git.io',
23 |         'https://github.com',
24 |         'https://nuxtjs.org',
25 |         'https://momentjs.com',
26 |         'https://fb.me',
27 |         'https://reactjs.org',
28 |         'https://slack',
29 |         'https://google',
30 |         'https://twitter',
31 |         'https://example.com',
32 |         'https://apps.example.com',
33 |         'https://www.example.com',
34 |         'https://example.com/graphql',
35 |         'https://example.com/api/graphql',
36 |         'https://example.com/api/v1/graphql',
37 |         'https://example.com/graphql',
38 |         'https://example.com/api/graphql',
39 |         'https://example.com/api/v1/graphql',
40 |     }
41 | 
42 |     assert filter_common(_input) == {
43 |         'https://example.com', 'https://apps.example.com', 'https://www.example.com', 'https://example.com/graphql', 'https://example.com/api/graphql',
44 |         'https://example.com/api/v1/graphql', 'https://example.com/graphql', 'https://example.com/api/graphql', 'https://example.com/api/v1/graphql'
45 |     }
46 | 
47 | 
48 | def test_filter_urls() -> None:
49 |     """test for filter_urls."""
50 | 
51 |     _input: Set[Url] = set()
52 | 
53 |     for url in gql_endpoints_characterizer():
54 |         _input.add(Url('https://example.com/' + url))
55 | 
56 |     assert filter_urls(_input) == {Url('https://example.com/graphql')}
57 | 
58 | 
59 | def test_remove_duplicate_domains() -> None:
60 |     """test for duplicate_domain."""
61 | 
62 |     domains: List[str] = [
63 |         'example.com',
64 |         'www.example.com',
65 |     ]
66 | 
67 |     assert remove_duplicate_domains(domains) == [
68 |         'example.com',
69 |     ]
70 | 
71 | 
72 | @pytest.mark.parametrize('url,expected', [
73 |     ('https://example.com', 'example.com'),
74 |     ('https://example.com/', 'example.com'),
75 | ])
76 | def test_transform_url_in_domain(
77 |     url: str,
78 |     expected: str,
79 | ) -> None:
80 |     """test for transform_url_in_domain."""
81 | 
82 |     assert transform_url_in_domain(url) == expected
83 | 


--------------------------------------------------------------------------------
/tests/unit/utils/test_finders.py:
--------------------------------------------------------------------------------
 1 | """Test utils/finders.py."""
 2 | 
 3 | from typing import List
 4 | 
 5 | from graphinder.utils.finders import find_script_fetch_graphql, find_script_full_urls, find_script_window_base_urls
 6 | 
 7 | 
 8 | def test_find_script_full_urls() -> None:
 9 |     """find_script_full_urls test."""
10 | 
11 |     script_file: str = """
12 |         https://example.com
13 |         https://apps.example.com
14 |         https://www.example.com
15 |     """
16 | 
17 |     urls: List[str] = find_script_full_urls(script_file)
18 | 
19 |     assert urls == [
20 |         'https://example.com',
21 |         'https://apps.example.com',
22 |         'https://www.example.com',
23 |     ]
24 | 
25 | 
26 | def test_find_script_window_base_urls() -> None:
27 |     """find_script_window_base_urls test."""
28 | 
29 |     script_file: str = """
30 |         {var e=new ze({uri:window.__BASE_URL__+"/graphql",credentials:"same-origin"}
31 |         window.__BASE_URL__+"/api/graphql"
32 |         window.__BASE_URL__ + "/api/v1/graphql"
33 |     """
34 | 
35 |     urls: List[str] = find_script_window_base_urls('https://example.com', script_file)
36 | 
37 |     assert urls == [
38 |         'https://example.com/graphql',
39 |         'https://example.com/api/graphql',
40 |         'https://example.com/api/v1/graphql',
41 |     ]
42 | 
43 | 
44 | def test_find_script_fetch_graphql() -> None:
45 |     """find_script_fetch_graphql test."""
46 | 
47 |     script_file: str = """
48 |         function s(e){return e.options.siteId?fetch("/graphql",{method:"POST",credentials:"same-origin",headers:{"Content-Type":"application/json"},body:JSON.stringify({query:"{ me { id ...
49 |         fetch("/api/graphql")
50 |         fetch("/api/v1/graphql")
51 |     """
52 | 
53 |     urls: List[str] = find_script_fetch_graphql('https://example.com', script_file)
54 | 
55 |     assert urls == [
56 |         'https://example.com/graphql',
57 |         'https://example.com/api/graphql',
58 |         'https://example.com/api/v1/graphql',
59 |     ]
60 | 


--------------------------------------------------------------------------------
/tests/unit/utils/test_logger.py:
--------------------------------------------------------------------------------
 1 | """Test utils/logger.py."""
 2 | 
 3 | import logging
 4 | from typing import Any
 5 | 
 6 | from graphinder.utils.logger import disable_internal_loggers, get_logger, setup_logger
 7 | 
 8 | 
 9 | def test_get_logger() -> None:
10 |     """get_logger test."""
11 | 
12 |     logger: logging.Logger = get_logger()
13 |     assert logger.name == 'graphinder'
14 | 
15 |     assert isinstance(logger, logging.Logger)
16 | 
17 | 
18 | def test_setup_logger(caplog: Any) -> None:
19 |     """setup_logger test."""
20 | 
21 |     caplog.set_level(0)
22 | 
23 |     logger: logging.Logger = setup_logger(False)
24 |     assert logger.name == 'graphinder'
25 | 
26 |     logger.info('test info')
27 |     logger.debug('test debug')
28 | 
29 |     assert 'test info' in caplog.text
30 |     assert 'test debug' in caplog.text
31 | 
32 | 
33 | def test_disable_internal_loggers(caplog: Any) -> None:
34 |     """disable_internal_loggers test."""
35 | 
36 |     caplog.set_level(0)
37 | 
38 |     disable_internal_loggers()
39 |     logger: logging.Logger = logging.getLogger('asyncio')
40 | 
41 |     logger.info('test info')
42 | 
43 |     assert 'test info' not in caplog.text
44 | 


--------------------------------------------------------------------------------
/tests/unit/utils/test_webhook.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=redefined-outer-name
 2 | 
 3 | """Test for utils/webhook.py."""
 4 | 
 5 | import pytest
 6 | from pytest_mock import MockerFixture
 7 | 
 8 | from graphinder.entities.io import Results
 9 | from graphinder.entities.pool import Url
10 | from graphinder.utils.webhook import format_webhook, send_webhook
11 | 
12 | 
13 | @pytest.fixture
14 | def result_one_domain() -> Results:
15 |     """Return a Results object with one domain."""
16 | 
17 |     return {
18 |         'example.com': {
19 |             Url('http://www.example.com/graphql'),
20 |             Url('http://admin.example.com/graphql'),
21 |         }
22 |     }
23 | 
24 | 
25 | @pytest.fixture
26 | def result_multiple_domain() -> Results:
27 |     """Return a Results object with multiples domain."""
28 | 
29 |     return {
30 |         'example.com': {
31 |             Url('http://www.example.com/graphql'),
32 |             Url('http://admin.example.com/graphql'),
33 |         },
34 |         'example2.com': {
35 |             Url('http://www.example2.com/graphql'),
36 |             Url('http://admin.example2.com/graphql'),
37 |         }
38 |     }
39 | 
40 | 
41 | def test_format_webhook_single(result_one_domain: Results) -> None:
42 |     """Test for format_webhook."""
43 | 
44 |     formatted = format_webhook(result_one_domain)
45 | 
46 |     assert formatted['username'] == 'Graphinder'
47 |     assert 0 <= formatted['embeds'][0]['color'] <= 16777215
48 |     assert formatted['embeds'][0]['title'] == 'example.com'
49 |     assert formatted['embeds'][0]['description'] == '\n'.join(result_one_domain['example.com'])
50 | 
51 | 
52 | def test_format_webhook_multiple(result_multiple_domain: Results) -> None:
53 |     """Test for format_webhook."""
54 | 
55 |     formatted = format_webhook(result_multiple_domain)
56 | 
57 |     assert len(formatted['embeds']) == 2
58 | 
59 | 
60 | def test_send_webhook(mocker: MockerFixture, result_one_domain: Results) -> None:
61 |     """Test for send_webhook."""
62 | 
63 |     mocker.patch('requests.post', return_value=mocker.Mock(status_code=204))
64 | 
65 |     url = 'http://mocked.com/webhook'
66 | 
67 |     assert send_webhook(url, result_one_domain)
68 | 


--------------------------------------------------------------------------------