├── .flake8
├── .github
    └── workflows
    │   └── pre-commit.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CHANGELOG.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── censeye
    ├── __init__.py
    ├── __main__.py
    ├── __version__.py
    ├── aggregator.py
    ├── censeye.py
    ├── cli.py
    ├── config.py
    ├── const.py
    ├── gadget.py
    └── gadgets
    │   ├── __init__.py
    │   ├── nobbler.py
    │   ├── opendir.py
    │   ├── threatfox.py
    │   └── vt.py
├── config.yaml
├── pyproject.toml
├── requirements.txt
├── setup.py
└── static
    ├── 2024-11-26_13-19.png
    ├── cert_history.png
    ├── diag.png
    ├── gadget_nobbler.png
    ├── gadget_open_dir.png
    ├── logo.png
    └── query_prefix_01.png


/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | max-line-length = 88
 3 | exclude =
 4 |     .venv,
 5 |     # No need to traverse our git directory
 6 |     .git,
 7 |     # There's no value in checking cache directories
 8 |     __pycache__,
 9 |     # This contains our built documentation
10 |     build,
11 |     # This contains builds
12 |     dist
13 | extend-ignore = E203,E501,E701
14 | 


--------------------------------------------------------------------------------
/.github/workflows/pre-commit.yml:
--------------------------------------------------------------------------------
 1 | name: pre-commit
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     branches: [main, develop]
 7 | 
 8 | jobs:
 9 |   pre-commit:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v3
13 |       - uses: actions/setup-python@v3
14 |         with:
15 |           python-version: 3.9
16 |       - uses: pre-commit/action@v3.0.1
17 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib64/
 18 | parts/
 19 | sdist/
 20 | var/
 21 | wheels/
 22 | share/python-wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .nox/
 42 | .coverage
 43 | .coverage.*
 44 | .cache
 45 | nosetests.xml
 46 | coverage.xml
 47 | *.cover
 48 | *.py,cover
 49 | .hypothesis/
 50 | .pytest_cache/
 51 | cover/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | db.sqlite3
 61 | db.sqlite3-journal
 62 | 
 63 | # Flask stuff:
 64 | instance/
 65 | .webassets-cache
 66 | 
 67 | # Scrapy stuff:
 68 | .scrapy
 69 | 
 70 | # Sphinx documentation
 71 | docs/_build/
 72 | 
 73 | # PyBuilder
 74 | .pybuilder/
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | #   For a library or package, you might want to ignore these files since the code is
 86 | #   intended to run in multiple environments; otherwise, check them in:
 87 | # .python-version
 88 | 
 89 | # pipenv
 90 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 91 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 92 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 93 | #   install all needed dependencies.
 94 | #Pipfile.lock
 95 | 
 96 | # poetry
 97 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 98 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
 99 | #   commonly ignored for libraries.
100 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
101 | #poetry.lock
102 | 
103 | # pdm
104 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
105 | #pdm.lock
106 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
107 | #   in version control.
108 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
109 | .pdm.toml
110 | .pdm-python
111 | .pdm-build/
112 | 
113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
114 | __pypackages__/
115 | 
116 | # Celery stuff
117 | celerybeat-schedule
118 | celerybeat.pid
119 | 
120 | # SageMath parsed files
121 | *.sage.py
122 | 
123 | # Environments
124 | .env
125 | .venv
126 | env/
127 | venv/
128 | ENV/
129 | env.bak/
130 | venv.bak/
131 | 
132 | # Spyder project settings
133 | .spyderproject
134 | .spyproject
135 | 
136 | # Rope project settings
137 | .ropeproject
138 | 
139 | # mkdocs documentation
140 | /site
141 | 
142 | # mypy
143 | .mypy_cache/
144 | .dmypy.json
145 | dmypy.json
146 | 
147 | # Pyre type checker
148 | .pyre/
149 | 
150 | # pytype static type analyzer
151 | .pytype/
152 | 
153 | # Cython debug symbols
154 | cython_debug/
155 | 
156 | # PyCharm
157 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
158 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
159 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
160 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
161 | #.idea/
162 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # Install pre-commit hook
 2 | #   pre-commit install
 3 | # Apply to all files without committing:
 4 | #   pre-commit run --all-files
 5 | # Update this file:
 6 | #   pre-commit autoupdate
 7 | repos:
 8 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 9 |     rev: v5.0.0
10 |     hooks:
11 |       - id: check-case-conflict
12 |       - id: check-toml
13 |       - id: check-yaml
14 |       - id: end-of-file-fixer
15 |       - id: mixed-line-ending
16 |       - id: trailing-whitespace
17 |       - id: requirements-txt-fixer
18 |   - repo: https://github.com/asottile/pyupgrade
19 |     rev: v3.19.1
20 |     hooks:
21 |       - id: pyupgrade
22 |         args: [--py39-plus]
23 |   - repo: https://github.com/psf/black-pre-commit-mirror
24 |     rev: 24.10.0
25 |     hooks:
26 |       - id: black
27 |         language_version: python3.9
28 |   - repo: https://github.com/pycqa/isort
29 |     rev: 5.13.2
30 |     hooks:
31 |       - id: isort
32 |   - repo: https://github.com/pycqa/flake8
33 |     rev: 7.1.1
34 |     hooks:
35 |       - id: flake8
36 |         additional_dependencies: [flake8-bugbear]
37 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # ChangeLog
 2 | 
 3 | ## 1.0.0
 4 | 
 5 | ### New Features
 6 | 
 7 | - Gadgets (plugins)
 8 |   * A new plugin system has been implemented called "Censeye Gadgets" which allow users to extend the functionality of Censeye by either generating queries using host data, or labeling hosts based on the results of queries. Query generators are treated just like baseline searches, so auto-pivoting will follow queries generated by gadgets.
 9 |   * Current list of gadgets can be viewed by running `censeye --list-gadgets`
10 |   * New gadgets:
11 |     - Host Labelers:
12 |         * `virustotal`: This gadget will query the VirusTotal API for the host's IP address and label the host with the results (`in-virustotal`).
13 |         * `threatfox`: This gadget will query the ThreatFox API for the host's IP address and label the host with the results (`in-threatfox`).
14 |     - Query Generators:
15 |         * `open-directory`: When a service is found with an open directory listing, this gadget will attempt to parse out the file names from the HTTP response body and generate queries for each file found.
16 |         * `nobbler`: When the `service_name` is `UNKNOWN`, it is often more effective to search the first N bytes of the response rather than analyzing the entire response. So this gadget will generate queries for the first (configurable array of offsets) N bytes of the response.
17 |             - Check out how `nobbler` helped us identify Metasploit payloads:
18 | 
19 | ```
20 | $ censeye --gadget nobbler 45.XXX.XXX.XX
21 | 
22 | 45.XXX.XXX.XX (depth: 0) (Via: None --  -- ['remote-access', 'database', 'in-threatfox', 'in-virustotal'])
23 |      Hosts   Key                                      Val
24 |  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
25 | ... snip snip ...
26 |         92   nobbler.gadget.censeye                   services.banner_hex=5748*
27 |         32   nobbler.gadget.censeye                   services.banner_hex=574831ff*
28 |         14   nobbler.gadget.censeye                   services.banner_hex=574831ff48c7c6c4*
29 |         14   nobbler.gadget.censeye                   services.banner_hex=574831ff48c7c6c4072e0048c7c20700*
30 | ... snip snip ...
31 | ```
32 | 
33 | Here is how the `open-directory` gadget looks:
34 | 
35 | ```
36 | 80.XX.XX.XXX (depth: 0) (Via: None --  -- ['torrent', 'file-sharing', 'open-dir', 'suspicious-open-dir'])
37 | 
38 |     Hosts   Key                                                            Val
39 |  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
40 |  ... snip snip ...
41 |      1508   open-directory.gadget.censeye                                  services:(labels=open-dir and http.response.body='*2.png*')
42 |        98   open-directory.gadget.censeye                                  services:(labels=open-dir and http.response.body='*Office/*')
43 |         3   open-directory.gadget.censeye                                  services:(labels=open-dir and http.response.body='*Filmek/*')
44 |         2   open-directory.gadget.censeye                                  services:(labels=open-dir and http.response.body='*Spotify.apk*')
45 |         2   open-directory.gadget.censeye                                  services:(labels=open-dir and http.response.body='*%c3%9aj%20mappa/*')
46 |         1   open-directory.gadget.censeye                                  services:(labels=open-dir and http.response.body='*Platformer%20teszt_1_1.0.apk*')
47 |         1   open-directory.gadget.censeye                                  services:(labels=open-dir and http.response.body='*2024-03-17_20h12_04.png*')
48 |         1   open-directory.gadget.censeye                                  services:(labels=open-dir and http.response.body='*RazorEngine.zip*')
49 | ... snip snip ...
50 | 
51 | Interesting search terms: 4
52 |  - services:(labels=open-dir and http.response.body='*%c3%9aj%20mappa/*')
53 |  - services:(labels=open-dir and http.response.body='*Filmek/*')
54 |  - services:(labels=open-dir and http.response.body='*Office/*')
55 |  - services:(labels=open-dir and http.response.body='*Spotify.apk*')
56 | ```
57 | 
58 | 
59 | - Added several useful fields from `COBALT_STRIKE` beacon service into the default fields configuration.
60 | - Added all (current) gadgets to the default configuration, but they are disabled by default.
61 | 	- Can be enabled in the configuration file, or using the `--gadget` argument.
62 | - Proper Packaging and distribution: now on [pypi](https://pypi.org/project/censeye/)!
63 | - Code was restructured and refactored in order to make lives easier in the future now that it's no longer an internal-only script.
64 | - Documentation was updated for all the new features.
65 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2024, Censys, Inc.
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 5 | 
 6 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 7 | 
 8 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 9 | 
10 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
11 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE
3 | include requirements.txt
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Contents
  2 | 
  3 | * [Censeye](#censeye)
  4 |    * [Introduction](#introduction)
  5 |    * [Setup](#setup)
  6 |    * [How?](#how)
  7 |    * [Warning](#warning)
  8 |    * [Usage](#usage)
  9 |    * [Reporting](#reporting)
 10 |    * [Auto Pivoting](#auto-pivoting)
 11 |    * [Historical Certificates](#historical-certificates)
 12 |    * [Query Prefix Filtering](#query-prefix-filtering)
 13 |    * [Saving reports](#saving-reports)
 14 |    * [Gadgets](#gadgets)
 15 |       * [Query Generators](#query-generators)
 16 |       * [Host Labelers](#host-labelers)
 17 |       * [Developing New Gadgets](#developing-new-gadgets)
 18 |    * [Configuration](#configuration)
 19 |       * [Configuring Rarity](#configuring-rarity)
 20 |       * [Configuring Fields](#configuring-fields)
 21 |          * [Ignoring field values](#ignoring-field-values)
 22 |          * [Field weights](#field-weights)
 23 |          * [Value-only fields](#value-only-fields)
 24 |       * [Configuring Gadgets](#configuring-gadgets)
 25 |          * [Open Directory Gadget Configuration](#open-directory-gadget-configuration)
 26 |          * [Nobbler Gadget Configuration](#nobbler-gadget-configuration)
 27 |    * [Workspaces](#workspaces)
 28 |    * [Contributing](#contributing)
 29 |       * [Developer Setup](#developer-setup)
 30 | 
 31 | # Censeye
 32 | 
 33 | ## Introduction
 34 | 
 35 | This tool is designed to help researchers identify hosts with characteristics similar to a given target. For instance, if you come across a suspicious host, the tool enables you to determine the most effective Censys search terms for discovering related infrastructure. Once those search terms are identified, the utility can automatically query the Censys API to fetch hosts matching those criteria, download the results, and repeat the analysis on the newly found hosts.
 36 | 
 37 | Censeye was hacked together over the course of a few weeks to automate routine tasks performed by our research team. While it has proven useful in streamlining daily workflows, its effectiveness may vary depending on specific use cases.
 38 | 
 39 | ## Setup
 40 | 
 41 | Install the tool using pip:
 42 | 
 43 | ```shell
 44 | pip install censeye
 45 | censeye --help
 46 | ```
 47 | 
 48 | **Note**: Censeye requires the latest version of [censys-python](https://github.com/censys/censys-python) and a Censys API key, this is configured via the `censys` command-line tool:
 49 | 
 50 | ```shell
 51 | $ censys config
 52 | 
 53 | Censys API ID: XXX
 54 | Censys API Secret: XXX
 55 | Do you want color output? [y/n]: y
 56 | 
 57 | Successfully authenticated for your@email.com
 58 | ```
 59 | 
 60 | ## How?
 61 | 
 62 | ![diagram](./static/diag.png)
 63 | 
 64 | <BS>
 65 | The visual representation above outlines how Censeye operates. In textual form, the tool follows a straightforward workflow:
 66 | 
 67 | 1. **Fetch Initial Host Data**
 68 |    Use the Censys Host API to retrieve data for a specified host.
 69 | 
 70 | 2. **Generate Search Queries**
 71 |    For each [keyword](https://search.censys.io/search/definitions?resource=hosts) found in the host data (see: [Configuration](#configuration)), generate a valid Censys search query that matches the corresponding key-value pair.
 72 |    Example:
 73 |    `services.ssh.server_host_key.fingerprint_sha256=531a33202a58e4437317f8086d1847a6e770b2017b34b6676a033e9dc30a319c`
 74 | 
 75 | 3. **Aggregate Data Using Reporting API**
 76 |    Leverage the Censys Reporting API to generate aggregate reports for each search query, using `ip` as the "breakdown" with a bucket count of `1`. The `total` value is used to determine the number of hosts matching each query.
 77 | 
 78 | 4. **Identify "Interesting" Queries**
 79 |    Censys search queries with a host count (aka: [rarity](#configuring-rarity) ) between 2 and a configurable maximum are tagged as as "interesting." These queries represent search terms observed on the host that are also found in a limited number of other hosts.
 80 | 
 81 | 5. **Recursive Pivoting (Optional)**
 82 |    If the `--depth` flag is set to a value greater than zero, the tool uses the Censys Search API to fetch a list of hosts matching the "interesting" search queries. It then loops back to Step 1 for these newly discovered hosts, repeating the process until the specified depth is reached.
 83 | 
 84 |    **Note:** Queries are never reused across different depths. For example, a query identified at depth 1 will not be applied at depths 2 or beyond.
 85 | 
 86 | Censeye includes multiple layers of caching and filtering, all of which can be adjusted to suit specific requirements.
 87 | 
 88 | ## Warning
 89 | 
 90 | This tool is not intended for correlating vast numbers of hosts. Instead, it focuses on identifying connections using unique search key/value pairs. If your goal is to explore questions like "What other services do servers running Apache also host?" this is not the right tool.
 91 | 
 92 | Additionally, Censeye can be quite query-intensive. The auto-pivoting feature, in particular, requires a significant number of queries, making it less practical for those with limited query access (e.g., users outside of Censys).
 93 | 
 94 | **Use this tool at your own discretion. We are not responsible for any depletion of your quotas resulting from its use.**
 95 | 
 96 | ## Usage
 97 | 
 98 | ```plain
 99 | Usage: censeye [OPTIONS] [IP]
100 | 
101 | Options:
102 |   -d, --depth INTEGER             [auto-pivoting] search depth (0 is single host, 1 is all the hosts that host found,
103 |                                   etc...)
104 |   --workers INTEGER               number of workers to run queries in parallel
105 |   -w, --workspace TEXT            directory for caching results (defaults to XDG configuration path)
106 |   -m, --max-search-results INTEGER
107 |                                   maximum number of censys search results to process
108 |   -ll, --log-level TEXT           set the logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
109 |   -s, --save TEXT                 save report to a file
110 |   -p, --pivot-threshold INTEGER   maximum number of hosts for a search term that will trigger a pivot (default: 120)
111 |   -a, --at-time [%Y-%m-%d %H:%M:%S|%Y-%m-%d]
112 |                                   historical host data at_time.
113 |   -q, --query-prefix TEXT         prefix to add to all queries (useful for filtering, the ' and ' is added automatically)
114 |   --input-workers INTEGER         number of parallel workers to process inputs (e.g., only has an effect on stdin inputs)
115 |   -qp, --query-prefix-count       If the --query-prefix is set, this will return a count of hosts for both the filtered and
116 |                                   unfiltered results.
117 |   -c, --config TEXT               configuration file path
118 |   -mp, -M, --min-pivot-weight FLOAT
119 |                                   [auto-pivoting] only pivot into fields with a weight greater-than or equal-to this number
120 |                                   (see configuration)
121 |   --fast                          [auto-pivoting] alias for --min-pivot-weight 1.0
122 |   --slow                          [auto-pivoting] alias for --min-pivot-weight 0.0
123 |   -G, --gadget TEXT               list of gadgets to load
124 |   --list-gadgets                  list available gadgets
125 |   --version                       Show the version and exit.
126 |   -h, --help                      Show this message and exit.
127 | ```
128 | 
129 | These options will all override the settings in the [configuration](#configuration) file.
130 | 
131 | If an IP is not specified in the arguments, the default behavior is to read IPs from stdin. This enables integration with other tools to seed input for this utility. For example:
132 | 
133 | ```shell
134 | censys search labels=c2 | jq '.[].ip' | censeye
135 | ```
136 | 
137 | ## Reporting
138 | 
139 | ![simple screenshot](./static/2024-11-26_13-19.png)
140 | 
141 | Above is a screenshot of a very simple report generated by Censeye for a single host. Each row contains three columns:
142 | 
143 | 1. The number of matching hosts for the given field.
144 | 2. The key.
145 | 3. The value of the key.
146 | 
147 | If your terminal supports it, each row is clickable and will navigate to the Censys website for the corresponding datapoint.
148 | 
149 | The next report, labeled `Interesting search terms`, is an aggregate list of all Censys search statements that fall within the [rarity](#configuring-rarity) threshold—also referred to as "Interesting search terms."
150 | 
151 | ## Auto Pivoting
152 | 
153 | Like web crawlers discover websites, Censeye can be used to crawl Censys!
154 | 
155 | When the `--depth` argument is set to a value greater than zero, the "interesting" fields are used to query the search API to retrieve lists of matching hosts. These hosts are then fed back into Censeye as input to generate additional reports.
156 | 
157 | Furthermore, the output will include a new section labeled the `Pivot Tree`. For example:
158 | 
159 | ```plain
160 | Pivot Tree:
161 | 5.188.87.38
162 | ├── 5.178.1.11      (via: services.ssh.server_host_key.fingerprint_sha256="f95812cbb46f0a664a8f2200592369b105d17dfe8255054963aac4e2df53df51") ['remote-access']
163 | ├── 147.78.46.112   (via: services.ssh.server_host_key.fingerprint_sha256="f95812cbb46f0a664a8f2200592369b105d17dfe8255054963aac4e2df53df51") ['remote-access']
164 | ├── 179.60.149.209  (via: services.ssh.server_host_key.fingerprint_sha256="f95812cbb46f0a664a8f2200592369b105d17dfe8255054963aac4e2df53df51") ['remote-access']
165 | │   ├── 5.161.114.184   (via: services.ssh.server_host_key.fingerprint_sha256="6278464bcad66259d2cd62deeb11c8488f170a1a650d5748bd7a8610026ca634") ['remote-access']
166 | │   ├── 185.232.67.15   (via: services.ssh.server_host_key.fingerprint_sha256="6278464bcad66259d2cd62deeb11c8488f170a1a650d5748bd7a8610026ca634") ['remote-access']
167 | │   │   ├── 193.29.13.183   (via: services.ssh.server_host_key.fingerprint_sha256="bd613b3be57f18c3bceb0aaf86a28ad8b6df7f9bccacf58044f1068d1787f8a5") ['remote-access']
168 | │   │   ├── 45.227.252.245  (via: services.ssh.server_host_key.fingerprint_sha256="bd613b3be57f18c3bceb0aaf86a28ad8b6df7f9bccacf58044f1068d1787f8a5") ['remote-access']
169 | │   │   ├── 45.145.20.211   (via: services.ssh.server_host_key.fingerprint_sha256="bd613b3be57f18c3bceb0aaf86a28ad8b6df7f9bccacf58044f1068d1787f8a5") ['remote-access']
170 | │   │   ├── 193.142.30.165  (via: services.ssh.server_host_key.fingerprint_sha256="bd613b3be57f18c3bceb0aaf86a28ad8b6df7f9bccacf58044f1068d1787f8a5") ['remote-access']
171 | │   ├── 77.220.213.90   (via: services.ssh.server_host_key.fingerprint_sha256="6278464bcad66259d2cd62deeb11c8488f170a1a650d5748bd7a8610026ca634") ['remote-access']
172 | ... snip snip ...
173 | ```
174 | 
175 | Here, our initial input was the host `5.188.87.38`. Using the host details from this IP, we identified an SSH fingerprint that appeared on a limited number of other hosts. Censeye then fetched those matching hosts and generated reports for them.
176 | 
177 | One of the matching hosts was `179.60.149.209`, and you can see how Censeye discovered that host through the `via:` statement in the report:
178 | 
179 | ```plain
180 | ├── 179.60.149.209  (via: services.ssh.server_host_key.fingerprint_sha256="f95812cbb46f0a664a8f2200592369b105d17dfe8255054963aac4e2df53df51")
181 | ```
182 | 
183 | - `179.60.149.209` was found using the search query `services.ssh.server_host_key.fingerprint_sha256="f95812cbb46f0a664a8f2200592369b105d17dfe8255054963aac4e2df53df51"` that was found on `5.188.87.38`
184 | - `185.232.67.15` was found using the search query `services.ssh.server_host_key.fingerprint_sha256="6278464bcad66259d2cd62deeb11c8488f170a1a650d5748bd7a8610026ca634"` which was found running on `179.60.149.209`
185 | - `193.29.13.183` was found using the search query `services.ssh.server_host_key.fingerprint_sha256="bd613b3be57f18c3bceb0aaf86a28ad8b6df7f9bccacf58044f1068d1787f8a5"` which was found running on `185.232.67.15`
186 | 
187 | ## Historical Certificates
188 | 
189 | There are some special cases for reporting, one of which involves TLS certificate fingerprints. If a certificate is found on a host and it is unique to that host (i.e., only observed on the current host being analyzed), Censeye will query historical data in Censys and report all hosts in the past that have used this certificate.
190 | 
191 | ![tls history](./static/cert_history.png)
192 | 
193 | In this screenshot, we see that `113.250.188.15` has a TLS fingerprint `e426a94594510a5c2adb1f0ba062ed2c76756416dfe22b83121e5351031a5e1b` which is unique to this IP at present. However, the certificate has been observed on other hosts in the past. Notice the count column presented as `1 (+2)`. This indicates that there is only one current host with this certificate, but historical data reveals two additional hosts.
194 | 
195 | Historical certificate observations are also displayed as a tree beneath the main table. Each of these fields is clickable (if supported by your terminal) and links to the corresponding host on the given date.
196 | 
197 | These historical hosts are also included in [auto-pivoting](#auto-pivoting) if the `--depth` argument is set to a value greater than zero. In this case, the tool will use the host data from the time the certificate was observed to guide the crawler.
198 | 
199 | ## Query Prefix Filtering
200 | 
201 | One of the things we use this tool here at Censys for is to use hosts that we already know are malicious to find other hosts that may be malicious that we have not labeled as such. For example:
202 | 
203 | ```shell
204 | censys search 'labels=c2' | jq '.[].ip' | censeye --query-prefix 'not labels=c2'
205 | ```
206 | 
207 | This `--query-prefix` flag tells Censeye that for every aggregation report that it generates, add the `not labels=c2` to the query. The goal here is to look at hosts already labeled as a `c2` to find other hosts not labeled as `c2`.
208 | 
209 | ![query prefix example](./static/query_prefix_01.png)
210 | 
211 | In the above example under "Interesting search terms" we can see the resulting search terms that matched our rarity configuration. Note that there are several rows that have a count of `0`, this is because those fields were _only_ found on hosts already labeled `c2`.
212 | 
213 | ## Saving reports
214 | 
215 | If you wish to save the report as an HTML file, simply pass the `--save` flag with an output filename, and the whole thing is there.
216 | 
217 | ## Gadgets
218 | 
219 | Censeye "Gadgets" are bits of code that extend Censeye in two ways (currently): Query Generators, and Host Lableers. By default, all of the loaded gadgets are disabled, and can be enabled with the `--gadget` flag, or by adding them to the configuration file.
220 | 
221 | ```yaml
222 | gadgets:
223 |   - gadget: open-directory
224 |     enabled: true
225 |   - gadget: nobbler
226 |     enabled: true
227 |   - gadget: virustotal
228 |     enabled: true
229 |   - gadget: threatfox
230 |     enabled: true
231 | ```
232 | 
233 | A list of gadgets and their underlying documentation may be viewed with the `--list-gadgets` flag.
234 | 
235 | ```shell
236 | ~$ censeye --list-gadgets
237 |   name           │ aliases        │ desc
238 |  ════════════════╪════════════════╪═════════════════════════════════════════════════════════════════════════════════════════════════════════════════════
239 |   open-directory │ odir, open-dir │ When a service is found with an open directory listing, this gadget will attempt to parse out the file names from
240 |                  │                │ the HTTP response body and generate queries for each file found.
241 |                  │                │
242 |                  │                │ This is useful for finding additional hosts with the same specific files.
243 |                  │                │
244 |                  │                │ Configuration
245 |                  │                │  - max_files: The maximum number of files to generate queries for.
246 |                  │                │    default: 32
247 |                  │                │  - min_chars: The minimum number of characters a file name must have to be considered.
248 |                  │                │    default: 2
249 |                  │                │
250 |   threatfox      │ tf             │ Gadget to label hosts that are present in ThreatFox.
251 |   virustotal     │ vt             │ A simple VirusTotal API client which will label the host if it is found to be malicious.
252 |                  │                │
253 |                  │                │     Configuration:
254 |                  │                │      - VT_API_KEY: *ENVVAR* VirusTotal API key
255 |                  │                │
256 |   nobbler        │ nob, nblr      │ When the service_name is UNKNOWN, it is often more effective to search the first N bytes of the response rather
257 |                  │                │ than analyzing the entire response.
258 |                  │                │
259 |                  │                │ Many services include a fixed header or a "magic number" at the beginning of their responses, followed by dynamic
260 |                  │                │ data at a later offset. This feature generates queries that focus on the initial N bytes of the response at various
261 |                  │                │ offsets while using wildcards for the remaining data.
262 |                  │                │
263 |                  │                │ The goal is to make the search more generalizable: analyzing the full UNKNOWN response might only match a specific
264 |                  │                │ host, whereas examining just the initial N bytes is likely to match similar services across multiple hosts.
265 |                  │                │
266 |                  │                │ Configuration:
267 |                  │                │  - iterations: A list of integers specifying the number of bytes to examine at the start of the response.
268 |                  │                │  - default: [4, 8, 16, 32]
269 |                  │                │    - services.banner_hex=XXXXXXXX*
270 |                  │                │    - services.banner_hex=XXXXXXXXXXXXXXXX*
271 |                  │                │    - services.banner_hex=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*
272 |                  │                │    - services.banner_hex=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*
273 | ```
274 | 
275 | ### Query Generators
276 | 
277 | Query Generator gadgets are used to generate additional queries for reporting and pivoting just like any other configured field.
278 | 
279 | Currently, there are two query generator gadgets included with Censeye:
280 | 
281 | ![open-directory example](./static/gadget_open_dir.png)
282 | 
283 | "open-directory": This gadget will parse out file names in HTTP response bodies from the Censys host result that have a known open-directory. These filenames will then be expanded to a proper Censys query (e.g., `services:(labels=open-dir and http.response.body='*$FILENAME*')`) to find other open-directories on other hosts.
284 | 
285 | ![nobbler example](./static/gadget_nobbler.png)
286 | 
287 | "nobbler": This gadget will look at `UNKNOWN` services and generate one or more queries that attempt to find other hosts with the same banner, but at different offsets. The idea that many unknown responses will contain some binary protocol where there may be a header or some common element at the start of the response, but the actual data may be dynamic.
288 | 
289 | ### Host Labelers
290 | 
291 | Host Labeler gadgets are used to add additional labels to the host data.
292 | 
293 | Currently, there are two host labeler gadgets included with Censeye:
294 | 
295 | - "virustotal" : This gadget will query the VirusTotal API for the IP address and add the results as a label to the host data.
296 | - "threatfox" : This gadget will query the ThreatFox API for the IP address and add the results as a label to the host data.
297 | 
298 | ### Developing New Gadgets
299 | 
300 | There are several good examples in `./censeye/gadgets` that can be used as a template for developing new gadgets. The `open-directory` gadget is a good example of a query generator, and the `virustotal` gadget is a good example of a host labeler.
301 | 
302 | ## Configuration
303 | 
304 | Censeye ships with a built-in configuration file that defines the general settings along with the [keyword definitions](https://search.censys.io/search/definitions?resource=hosts) that are used to generate reports. But this can be overloaded by using the `--config` argument or the file at `~/.config/censys/censeye.yaml` will tried by default. The following is a snippet of this configuration file:
305 | 
306 | ```yaml
307 | rarity:
308 |   min: 2               # minimum host count for a field to be treated as "interesting"
309 |   max: 120             # maximum host count for a field to be treated as "interesting"
310 | 
311 | fields:
312 |   - field: services.ssh.server_host_key.fingerprint_sha256
313 |     weight: 1.0
314 |   - field: services.http.response.body_hash
315 |     weight: 1.0
316 |     ignore:
317 |       - "sha1:4dcf84abb6c414259c1d5aec9a5598eebfcea842"
318 |       - "sha256:036bacf3bd34365006eac2a78e4520a953a6250e9550dcf9c9d4b0678c225b4c"
319 |   - field: services.tls.certificates.leaf_data.issuer_dn
320 |     weight: 1.0
321 |     ignore:
322 |       - "C=US, O=DigiCert Inc, CN=DigiCert Global G2 TLS RSA SHA256 2020 CA1"
323 |   - field: services.tls.certificates.leaf_data.subject.organization
324 |     weight: 1.0
325 |   - field: ~services.tls.certificates.leaf_data.subject.organization
326 |     weight: 0.5
327 |     ignore:
328 |       - "Cloudflare, Inc."
329 |   - field: services.http.response.html_tags
330 |     weight: 0.9
331 |     ignore:
332 |       - "<title>301 Moved Permanently</title>"
333 |       - "<title>403 Forbidden</title>"
334 |       - "<title> 403 Forbidden </title>"
335 |   - field: services.http.response.headers
336 |     weight: 0.8
337 |     ignore:
338 |       - "Location": ["*/"]
339 |       - "Vary": ["Accept-Encoding"]
340 |       - "Content-Type":
341 |           - "text/html"
342 |           - "text/html; charset=UTF-8"
343 |           - "text/html;charset=UTF-8"
344 |           - "text/html; charset=utf-8"
345 |       - "Connection":
346 |           - "close"
347 |           - "keep-alive"
348 |           - "Keep-Alive"
349 | ```
350 | 
351 | ### Configuring Rarity
352 | 
353 | The rarity setting defines what constitutes an "interesting" search term. Once an aggregation report is fetched for a given search statement, the term is flagged as "interesting" if the number of matching hosts is greater than `min` but less than `max`.
354 | 
355 | If the `--depth` flag is set, these "interesting" search terms are used to pivot and discover _other_ hosts. Otherwise, the final report for the host will "feature" these search terms in two ways:
356 | 
357 | 1. The report will include different colors and highlighting for the matching rows.
358 | 2. The final output will contain an aggregate list of "interesting search terms."
359 | 
360 | ### Configuring Fields
361 | 
362 | Censeye does not generate aggregate reports for every single field in a host result, as some fields are more useful than others. Instead, it focuses on fields explicitly defined as relevant for reporting.
363 | 
364 | Each field definition includes two configurable options:
365 | 
366 | 1. **Ignored Values**: Specific values within the field that should be excluded from the report.
367 | 2. **Weight**: The relative importance of the field, which can influence prioritization in reporting and analysis.
368 | 
369 | #### Ignoring field values
370 | 
371 | The `ignored` configuration tells the utility to exclude certain values from generating reports. For example, the `services.http.response.body_hash` field in the configuration may specify two values to ignore:
372 | 
373 | - `"sha1:4dcf84abb6c414259c1d5aec9a5598eebfcea842"`
374 | - `"sha256:036bacf3bd34365006eac2a78e4520a953a6250e9550dcf9c9d4b0678c225b4c"`
375 | 
376 | When analyzing a host's result, if the _value_ of that field matches one of these configured values, a report will not be generated for that _specific_ field.
377 | 
378 | HTTP response headers are handled slightly differently. Instead of ignoring individual values, the configuration defines an array of key-value pairs to ignore. If the response header key-value pairs on a host match any of those defined in the configuration, a report will not be generated.
379 | 
380 | The goal of this feature is to optimize the tool's performance by reducing processing time and pre-filtering well-known search statements that are unlikely to provide useful insights.
381 | 
382 | #### Field weights
383 | 
384 | Field weights influence how Censeye pivots during its analysis and are directly tied to the `--min-pivot-weight` argument (default: `0.0`).
385 | 
386 | Each field is assigned a weight ranging from `0.0` to `1.0`, with a default of `0.0`. When the `--depth` flag is set, fields with a weight below the specified `--min-pivot-weight` value will be excluded from pivoting. In other words, these fields will not be used to identify other matching hosts for further reporting.
387 | 
388 | This allows users to prioritize certain fields over others, tailoring the analysis to focus on more relevant or significant fields.
389 | 
390 | **Note**: the argument `--fast` is an alias for `--min-pivot-weight 1.0` and `--slow` is an alias for `--min-pivot-weight 0.0`.
391 | 
392 | #### Value-only fields
393 | 
394 | In the above configuration, some fields are prefixed with a `~` character, for example:
395 | 
396 | ```yaml
397 |   - field: ~services.tls.certificates.leaf_data.subject.organization
398 |     weight: 0.5
399 |     ignore:
400 |       - "Cloudflare, Inc."
401 | ```
402 | 
403 | In this case, if a host includes the `services.tls.certificates.leaf_data.subject.organization` field in its data, the value is used as a wildcard search in Censys. The resulting search statement will resemble the following:
404 | 
405 | ```plain
406 | (not services.tls.certificates.leaf_data.subject.organization=$VALUE) and "$VALUE"
407 | ```
408 | 
409 | The idea is to determine the number of hosts where that value is found anywhere in the data, not just within the specific field itself.
410 | 
411 | ### Configuring Gadgets
412 | 
413 | Each gadget has its own set of configuration directives which can also be manipulated in the configuration file under the `gadgets` directive. The following is the format of this:
414 | 
415 | ```yaml
416 | gadgets:
417 |   - gadget: <GADGET_NAME>
418 |     enabled: <TRUE|FALSE>
419 |     config:
420 |       <GADGET_SPECIFIC_CONFIGURATION>
421 | ```
422 | 
423 | If the gadget is a query generator, this means that it can be used for auto-pivoting, and must have a configured field and weight, just like real fields. Gadgets are given the namespace `<GADGET_NAME>.gadget.censeye`, so a field configuration for the `open-directory` gadget would look like this:
424 | 
425 | ```yaml
426 | fields:
427 |   - field: open-directory.gadget.censeye
428 |     weight: 1.0
429 | ```
430 | 
431 | Note that you can leave the `enabled` directive to `false` and enable the gadget with the `--gadget` flag while maintaining the configuration in the configuration file.
432 | 
433 | #### Open Directory Gadget Configuration
434 | 
435 | The `open-directory` gadget has two configuration options:
436 | 
437 | - `max_files`: The maximum number of files to generate queries for. Default: `32`
438 | - `min_chars`: The minimum number of characters a file name must have to be considered. Default: `2`
439 | 
440 | #### Nobbler Gadget Configuration
441 | 
442 | The `nobbler` gadget has one configuration option:
443 | 
444 | - `iterations`: A list of integers specifying the number of bytes to examine at the start of the response. Default: `[4, 8, 16, 32]
445 | 
446 | ## Workspaces
447 | 
448 | Censeye caches almost everything it does to avoid running the same queries for the same data repeatedly—which would be inefficient and time-consuming. A "workspace" is essentially a directory where the cache is stored. It is recommended to use a unique workspace (configured via the `--workspace` flag) and stick with it for as long as possible. Once you begin a hunt, continue using the same workspace to leverage the cache and minimize round-trip times (RTT).
449 | 
450 | If, for some reason, you want all data to be fetched fresh from the API, you can use the `--no-cache` option. However, this is generally not recommended unless absolutely necessary.
451 | 
452 | ## Contributing
453 | 
454 | If you have any ideas for improvements or new features, please feel free to open an issue or a pull request. We are always looking for ways to make this tool more useful and efficient.
455 | 
456 | ### Developer Setup
457 | 
458 | To set up a development environment, you can use the following commands:
459 | 
460 | ```shell
461 | git clone https://github.com/Censys-Research/censeye.git
462 | cd censeye
463 | python -m venv .venv && source .venv/bin/activate
464 | pip install -e ".[dev]"
465 | pre-commit install
466 | ```
467 | 


--------------------------------------------------------------------------------
/censeye/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Censys-Research/censeye/280184680e214b21dc21465eaab17eed140e7369/censeye/__init__.py


--------------------------------------------------------------------------------
/censeye/__main__.py:
--------------------------------------------------------------------------------
1 | from .cli import main
2 | 
3 | if __name__ == "__main__":
4 |     main()
5 | 


--------------------------------------------------------------------------------
/censeye/__version__.py:
--------------------------------------------------------------------------------
1 | VERSION = (1, 0, 1)
2 | 
3 | __version__ = ".".join(map(str, VERSION))
4 | 


--------------------------------------------------------------------------------
/censeye/aggregator.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import hashlib
  3 | import logging
  4 | import os
  5 | import pickle
  6 | import urllib.parse
  7 | from typing import Any, Optional
  8 | 
  9 | from censys.search import SearchClient
 10 | 
 11 | from .config import Config
 12 | from .const import USER_AGENT
 13 | from .gadget import GADGET_NAMESPACE, Gadget, QueryGeneratorGadget
 14 | 
 15 | 
 16 | class Aggregator:
 17 |     MAX_VALUE_LENGTH = 255  # we don't search for kv pairs longer than this value.
 18 | 
 19 |     def __init__(
 20 |         self,
 21 |         cache_dir=None,
 22 |         query_prefix=None,
 23 |         duo_reporting=False,
 24 |         config: Optional[Config] = None,
 25 |         armed_gadgets: Optional[set[Gadget]] = None,
 26 |     ):
 27 |         self.client = SearchClient(user_agent=USER_AGENT)
 28 |         self.seen_hosts: set[str] = set()
 29 |         self.seen_queries: set[tuple[str, Any]] = set()
 30 |         self.cache_dir = cache_dir
 31 |         self.num_queries = 0
 32 |         self.query_prefix = query_prefix
 33 |         self.duo_reporting = duo_reporting
 34 |         if config is None:
 35 |             config = Config()
 36 |         self.config = config
 37 |         self.workers = config.workers
 38 |         if armed_gadgets is None:
 39 |             armed_gadgets = set()
 40 |         self.gadgets = armed_gadgets
 41 | 
 42 |         if self.cache_dir:
 43 |             os.makedirs(self.cache_dir, exist_ok=True)
 44 | 
 45 |     def _is_kv_filtered(self, k, v, parent=None):
 46 |         import json
 47 | 
 48 |         logging.debug(f"Checking if {json.dumps(k)}={v} is filtered")
 49 |         if len(str(v)) > self.MAX_VALUE_LENGTH:
 50 |             return True
 51 | 
 52 |         if parent and self.config[parent]:
 53 |             field = self.config[parent]
 54 |             if not field:
 55 |                 return False
 56 | 
 57 |             for ent in field.ignore:
 58 |                 if k in ent and (v in ent[k] or "*" in ent[k]):
 59 |                     return True
 60 | 
 61 |         elif k in self.config:
 62 |             field = self.config[k]
 63 |             if not field:
 64 |                 return False
 65 |             return v in field.ignore
 66 | 
 67 |         return False
 68 | 
 69 |     def _generate_header_queries(self, headers, parent_key):
 70 |         """
 71 |         converts censys header fields to censys-like queries
 72 |         """
 73 |         results = []
 74 | 
 75 |         if not isinstance(headers, dict):
 76 |             return results
 77 | 
 78 |         for k, v in headers.items():
 79 |             if k == "_encoding":
 80 |                 # internal censys thing, just discard
 81 |                 continue
 82 | 
 83 |             # if v is not a list, discard
 84 |             if not isinstance(v, list):
 85 |                 continue
 86 | 
 87 |             # censys stores headers with dashes converted to underscores
 88 |             header_key = k.replace("_", "-")
 89 |             for val in v:
 90 |                 if "<REDACTED>" in v:
 91 |                     # discard redacted values
 92 |                     continue
 93 | 
 94 |                 if header_key == "Location":
 95 |                     # censys stores locations as full URLs, we need to wildcard it minus
 96 |                     # the host for any further searching
 97 |                     if not self._is_kv_filtered(header_key, val, parent_key):
 98 |                         results.append(
 99 |                             (
100 |                                 parent_key,
101 |                                 f"(key: '{header_key}' and value.headers: '{val}')",
102 |                             )
103 |                         )
104 | 
105 |                     u = urllib.parse.urlparse(val)
106 |                     val = f"*{u.path}"
107 | 
108 |                 if self._is_kv_filtered(header_key, val, parent_key):
109 |                     dstr = val[:50] + "..." if len(val) > 50 else val
110 |                     logging.debug(
111 |                         f"Excluding {header_key}={dstr}, it's not in our allowed header"
112 |                         " k/v's"
113 |                     )
114 |                     continue
115 | 
116 |                 val = val.replace("'", "\\'")
117 | 
118 |                 # generate a censys-like header query
119 |                 results.append(
120 |                     (parent_key, f"(key: '{header_key}' and value.headers: '{val}')")
121 |                 )
122 | 
123 |         return results
124 | 
125 |     def _generate_queries(self, data, parent_key=""):
126 |         """
127 |         converts censys field data to censys-like queries
128 |         """
129 |         results = []
130 | 
131 |         if isinstance(data, dict):
132 |             for k, v in data.items():
133 |                 if k.startswith("_"):
134 |                     # internal censys thing, discard
135 |                     continue
136 | 
137 |                 key = f"{parent_key}.{k}" if parent_key else k
138 | 
139 |                 if k == "headers":
140 |                     # special case for headers
141 |                     results.extend(self._generate_header_queries(v, key))
142 |                 else:
143 |                     results.extend(self._generate_queries(v, key))
144 | 
145 |         elif isinstance(data, list):
146 |             for _, v in enumerate(data):
147 |                 key = f"{parent_key}"
148 |                 results.extend(self._generate_queries(v, key))
149 |         else:
150 |             if self._is_kv_filtered(parent_key, data):
151 |                 # make a shorter copy of data
152 |                 dstr = data[:50] + "..." if len(data) > 50 else data
153 |                 logging.debug(
154 |                     f"Excluding {parent_key}={dstr}, it's not in our allowed k/v's"
155 |                 )
156 |                 return []
157 | 
158 |             if len(str(data)) > self.MAX_VALUE_LENGTH:
159 |                 logging.debug(f"Excluding {parent_key}, it's too long")
160 |                 return []
161 | 
162 |             kvpair = f"{parent_key}={data}"
163 |             # have we seen this already? Then don't dup.
164 |             if kvpair not in self.seen_queries:
165 |                 results.append((parent_key, data))
166 | 
167 |         return results
168 | 
169 |     def _cache_file(self, q):
170 |         m = hashlib.md5(q.encode()).hexdigest()
171 |         return os.path.join(self.cache_dir, f"{m}.pkl") if self.cache_dir else None
172 | 
173 |     def _load_from_cache(self, q):
174 |         cache_file = self._cache_file(q)
175 | 
176 |         if cache_file and os.path.exists(cache_file):
177 |             with open(cache_file, "rb") as f:
178 |                 return pickle.load(f)
179 | 
180 |         return None
181 | 
182 |     def _save_to_cache(self, q, res):
183 |         cache_file = self._cache_file(q)
184 |         logging.debug(f"Saving cache for query: {q} {cache_file}")
185 | 
186 |         if cache_file:
187 |             with open(cache_file, "wb") as f:
188 |                 pickle.dump(res, f)
189 | 
190 |     def _get_certificate_observations(self, fingerprint):
191 |         cached_res = self._load_from_cache(fingerprint)
192 |         if cached_res:
193 |             logging.debug(
194 |                 f"Found cached certificate observations for {fingerprint}: {cached_res}"
195 |             )
196 |             return cached_res
197 | 
198 |         try:
199 |             obs = self.client.v2.certs.get_observations(fingerprint)
200 |             self.num_queries += 1
201 | 
202 |             ret = dict()
203 | 
204 |             for ent in obs.get("observations", []):
205 |                 if "ip" not in ent:
206 |                     continue
207 |                 if "last_observed_at" not in ent:
208 |                     continue
209 | 
210 |                 ret[ent["ip"]] = ent["last_observed_at"]
211 | 
212 |             logging.debug(
213 |                 f"Found {len(ret)} observations for certificate {fingerprint}"
214 |             )
215 |             self._save_to_cache(fingerprint, ret)
216 |             return ret
217 |         except Exception:
218 |             logging.warning(
219 |                 f"Failed to fetch certificate observations for {fingerprint}"
220 |             )
221 |             return {}
222 | 
223 |     async def _get_aggregate_report(self, key, value):
224 |         """
225 |         fetches aggregate report on a query from censys, the value of which is the number of hosts
226 |         """
227 |         if self._is_kv_filtered(key, value):
228 |             logging.debug(f"Excluding {key}={value}, it's not in our allowed k/v's")
229 |             return None
230 | 
231 |         if isinstance(value, (int, float)):
232 |             value = str(value)
233 | 
234 |         if "headers" not in key:
235 |             value = (
236 |                 value.replace("\\", "\\\\")
237 |                 .replace('"', '\\"')
238 |                 .replace("\n", "\\n")
239 |                 .replace("\r", "\\r")
240 |             )
241 | 
242 |         query = f'{key}="{value}"'
243 | 
244 |         if key.startswith("~"):
245 |             if value.startswith("*"):
246 |                 logging.debug(f"Excluding query: {value}, it's a wildcard match")
247 |                 return None
248 |             query = f'"{value}"'
249 |         elif "headers" in key:
250 |             query = f"{key}:{value}"
251 |         elif key.endswith(f".{GADGET_NAMESPACE}"):
252 |             # this query was generated by a gadget, so we expect the value to be the query we want to run.
253 |             query = value
254 | 
255 |         if query == '""':
256 |             return None
257 | 
258 |         if key.startswith("~"):
259 |             # yes we do this again because we want to check for empty query beforehand
260 |             # instead of just doing a raw string query, let's remove the original results from our report
261 |             query = f'(not {key[1:]}:"{value}") and {query}'
262 | 
263 |         async def _aggregate_report(q):
264 |             cached_result = self._load_from_cache(q)
265 | 
266 |             if cached_result:
267 |                 logging.debug(f"Found cached result for query: {q}")
268 |                 return cached_result
269 | 
270 |             logging.info(f"fetching aggregate report for query: {q}")
271 | 
272 |             try:
273 |                 report = await asyncio.to_thread(
274 |                     self.client.v2.hosts.aggregate, q, "ip", num_buckets=1
275 |                 )
276 |                 self.num_queries += 1
277 | 
278 |                 host_count = report["total"]
279 |                 ret = {"key": key, "val": value, "query": q, "hosts": host_count}
280 |                 self._save_to_cache(q, ret)
281 |                 return ret
282 |             except Exception as e:
283 |                 logging.error(
284 |                     f"Failed to fetch aggregate report for query: {q}, error: {e}"
285 |                 )
286 |                 return {"key": key, "val": value, "query": q, "hosts": 0}
287 | 
288 |         query_base = query
289 | 
290 |         if self.query_prefix:
291 |             query = f"({self.query_prefix}) and ({query})"
292 | 
293 |         ret = dict()
294 | 
295 |         if query != query_base and self.duo_reporting:
296 |             # We want to actually make two queries here, one with the query_prefix, and one without. The idea is that
297 |             # we can create a report that shows "number_of_hosts_matching_query_prefix / total_number_of_hosts_matching_query"
298 |             total_report = await _aggregate_report(query_base)  # without query_prefix
299 |             match_report = await _aggregate_report(query)  # with query_prefix
300 |             ret = match_report
301 |             ret["noprefix_hosts"] = total_report["hosts"]
302 |         else:
303 |             ret = await _aggregate_report(query)
304 | 
305 |         # Special case for services.certificate values of '1' or even '0' (which can happen with --query-prefix)
306 |         # we take that certificate fingerprint, and look for any historical observations.
307 |         if key == "services.certificate" and ret["hosts"] <= 1:
308 |             try:
309 |                 obs = self._get_certificate_observations(value)
310 |                 # if we only get one returned observation, it means it only matched the host we are running against.
311 |                 if obs and len(obs) > 1:
312 |                     ret["historical_observations"] = obs
313 |             except Exception as e:
314 |                 logging.error(
315 |                     "Failed to fetch historical observations for certificate"
316 |                     f" {value}: {e}"
317 |                 )
318 | 
319 |         logging.debug(f"aggregate report for query: {query}, hosts: {ret['hosts']}")
320 |         return ret
321 | 
322 |     def get_queries(self, host_data):
323 |         """
324 |         generates censys-like queries from censys data
325 |         """
326 |         ret = []
327 |         queries = self._generate_queries(host_data)
328 | 
329 |         # run our query generator gadgets.
330 |         for gadget in self.gadgets:
331 |             if not isinstance(gadget, QueryGeneratorGadget):
332 |                 continue
333 |             try:
334 |                 pqueries = gadget.run(host_data)
335 |                 if pqueries:
336 |                     queries.extend(pqueries)
337 | 
338 |             except Exception as e:
339 |                 logging.error(f"Gadget {gadget} failed: {e}")
340 | 
341 |         for k, v in queries:
342 |             # check if there is a value-only variant.
343 |             tkey = "~" + k
344 |             if tkey in self.config:
345 |                 # this means we should also query just the value in a wildcard search
346 |                 # note that we also do the normal key=val statement too
347 |                 ret.append((tkey, v))
348 | 
349 |             if k in self.config:
350 |                 ret.append((k, v))
351 | 
352 |         return ret
353 | 
354 |     async def get_report(self, host_data):
355 |         report = []
356 |         queries = self.get_queries(host_data)
357 |         sem = asyncio.Semaphore(self.workers)
358 | 
359 |         logging.info(
360 |             f"{host_data['ip']} gave us {len(queries)} _potential_ pivots to try."
361 |         )
362 | 
363 |         async def run_worker(k, v):
364 |             async with sem:
365 |                 logging.debug(f"running report for '{k}={v}'")
366 |                 return await self._get_aggregate_report(k, v)
367 | 
368 |         tasks = [run_worker(k, v) for k, v in queries]
369 | 
370 |         logging.info(f"Enqueuing {host_data['ip']} for {len(queries)} reports")
371 | 
372 |         results = await asyncio.gather(*tasks)
373 | 
374 |         for result in results:
375 |             if result:
376 |                 report.append(result)
377 | 
378 |         return report
379 | 


--------------------------------------------------------------------------------
/censeye/censeye.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import hashlib
  3 | import logging
  4 | import os
  5 | import pickle
  6 | from typing import Optional
  7 | 
  8 | from censys.search import CensysHosts
  9 | 
 10 | from .aggregator import Aggregator
 11 | from .config import Config
 12 | from .const import USER_AGENT
 13 | from .gadget import Gadget, HostLabelerGadget
 14 | 
 15 | 
 16 | class Censeye:
 17 |     QUEUE_TYPE_HOST = 0
 18 |     QUEUE_TYPE_SEARCH = 1
 19 | 
 20 |     def __init__(
 21 |         self,
 22 |         depth=0,
 23 |         cache_dir=None,
 24 |         at_time=None,
 25 |         query_prefix=None,
 26 |         duo_reporting=False,
 27 |         config: Optional[Config] = None,
 28 |         armed_gadgets: Optional[set[Gadget]] = None,
 29 |     ):
 30 |         if config is None:
 31 |             config = Config()
 32 |         self.config = config
 33 |         self.workers = config.workers
 34 |         self.client = CensysHosts(user_agent=USER_AGENT)
 35 |         if armed_gadgets is None:
 36 |             armed_gadgets = set()
 37 |         self.find = Aggregator(
 38 |             cache_dir=cache_dir,
 39 |             query_prefix=query_prefix,
 40 |             duo_reporting=duo_reporting,
 41 |             config=config,
 42 |             armed_gadgets=armed_gadgets,
 43 |         )
 44 |         self.seen_hosts: set[str] = set()
 45 |         self.depth = depth
 46 |         self.cache_dir = cache_dir
 47 |         self.at_time = at_time
 48 |         self.query_prefix = (
 49 |             query_prefix  # this gets appended to every report and search
 50 |         )
 51 |         self.num_queries = 0
 52 |         self.in_transit: set[str] = set()
 53 |         self.search_buckets: dict[int, set[str]] = dict()
 54 |         self.lock = asyncio.Lock()
 55 |         self.duo_reporting = duo_reporting
 56 |         self.gadgets = armed_gadgets
 57 | 
 58 |         if self.cache_dir:
 59 |             os.makedirs(self.cache_dir, exist_ok=True)
 60 | 
 61 |         logging.info(
 62 |             f"max_host_count: {self.config.max_host_count}, min_host_count:"
 63 |             f" {self.config.min_host_count}, max_service_count:"
 64 |             f" {self.config.max_serv_count} workers: {self.config.workers} depth:"
 65 |             f" {self.depth} cache_dir: {self.cache_dir}"
 66 |         )
 67 | 
 68 |     def _get_cache_filename(self, input_data, at_time=None, other=None):
 69 |         fmt = f"{input_data}_{self.config.max_search_res}"
 70 | 
 71 |         if at_time:
 72 |             fmt = f"{fmt}_{at_time}"
 73 | 
 74 |         if other:
 75 |             fmt = f"{fmt}_{other}"
 76 | 
 77 |         input_hash = hashlib.md5(fmt.encode("utf-8")).hexdigest()
 78 | 
 79 |         return (
 80 |             os.path.join(self.cache_dir, f"{input_hash}.pkl")
 81 |             if self.cache_dir
 82 |             else None
 83 |         )
 84 | 
 85 |     def _load_from_cache(self, input_data, at_time=None, other=None):
 86 |         cache_file = self._get_cache_filename(input_data, at_time, other)
 87 | 
 88 |         if cache_file and os.path.exists(cache_file):
 89 |             with open(cache_file, "rb") as f:
 90 |                 logging.debug(
 91 |                     f"Loaded cached data {cache_file} for input: {input_data}"
 92 |                 )
 93 |                 return pickle.load(f)
 94 | 
 95 |         return None
 96 | 
 97 |     def _save_to_cache(self, input_data, data, at_time=None, other=None):
 98 |         cache_file = self._get_cache_filename(input_data, at_time, other)
 99 | 
100 |         if cache_file:
101 |             with open(cache_file, "wb") as f:
102 |                 pickle.dump(data, f)
103 |                 logging.debug(f"Saved data for input: {input_data} to cache")
104 | 
105 |     async def _get_host(self, ip, at_time=None):
106 |         if ip in self.seen_hosts:
107 |             return None
108 | 
109 |         cache = self._load_from_cache(ip, at_time=at_time)
110 | 
111 |         if cache:
112 |             self.seen_hosts.add(ip)
113 |             return cache
114 | 
115 |         try:
116 |             data = await asyncio.to_thread(self.client.view, ip, at_time=at_time)
117 | 
118 |             if data:
119 |                 self.seen_hosts.add(ip)
120 |                 self.num_queries += 1
121 |                 self._save_to_cache(ip, data, at_time=at_time)
122 |                 return data
123 |         except Exception as e:
124 |             logging.error(f"Error fetching host data for {ip}: {e}")
125 | 
126 |         return None
127 | 
128 |     async def _search(self, qstr):
129 |         """Use Censys API to search for hosts based on the query and cache the results."""
130 |         qstr = (  # Exclude unwanted host types
131 |             f"({qstr}) and not labels={{tarpit, truncated}}"
132 |         )
133 |         if self.query_prefix:
134 |             qstr = f"({self.query_prefix}) and ({qstr})"
135 | 
136 |         logging.debug(f"Searching for: {qstr}")
137 | 
138 |         # Check if the search query is cached
139 |         # we set other here because we want our cache to be unique to the number of results we want.
140 |         cached_results = self._load_from_cache(qstr, other=self.config.max_search_res)
141 |         if cached_results:
142 |             logging.info(f"Cache hit for query: {qstr}")
143 |             for host in cached_results:
144 |                 yield host["ip"]
145 |             return
146 |         else:
147 |             logging.info(f"Cache miss for query: {qstr}")
148 | 
149 |         try:
150 |             pages = max(1, self.config.max_search_res // 100)
151 |             per_page = min(100, self.config.max_search_res)
152 |             logging.info(f"Searching for: {qstr}, pages: {pages}, per_page={per_page}")
153 | 
154 |             self.num_queries += 1
155 |             res = await asyncio.to_thread(
156 |                 self.client.search,
157 |                 qstr,
158 |                 per_page=per_page,
159 |                 pages=pages,
160 |             )
161 | 
162 |             all_hosts = []
163 |             for page in res:
164 |                 for host in page:
165 |                     if len(host["services"]) <= self.config.max_serv_count:
166 |                         all_hosts.append(host)
167 |                         yield host["ip"]
168 | 
169 |             self._save_to_cache(qstr, all_hosts, other=self.config.max_search_res)
170 | 
171 |         except Exception as e:
172 |             logging.error(f"Error during search with query '{qstr}': {e}")
173 | 
174 |     async def _process_ip(
175 |         self, ip, depth, parent, results, searches, queue, query, at_time=None
176 |     ):
177 |         if depth not in self.search_buckets:
178 |             self.search_buckets[depth] = set()
179 | 
180 |         id = f"{ip}@{at_time}"
181 | 
182 |         async with self.lock:
183 |             if id in self.in_transit:
184 |                 return
185 |             self.in_transit.add(id)
186 | 
187 |         logging.info(f"processing {ip} (triggered by: {parent}, depth: {depth})")
188 | 
189 |         data = await self._get_host(ip, at_time=at_time)
190 | 
191 |         for gadget in self.gadgets:
192 |             if not isinstance(gadget, HostLabelerGadget):
193 |                 continue
194 | 
195 |             logging.info(f"Running labeler gadget {gadget}")
196 | 
197 |             try:
198 |                 if data:
199 |                     gadget.run(data)
200 |             except Exception as e:
201 |                 logging.error(f"Gadget {gadget} failed: {e}")
202 | 
203 |         async with self.lock:
204 |             self.in_transit.remove(id)
205 | 
206 |         if not data:
207 |             return
208 | 
209 |         report = await self.find.get_report(data)
210 |         result = {
211 |             "ip": ip,
212 |             "labels": data.get("labels", []),
213 |             "report": report,
214 |             "parent_ip": parent,
215 |             "found_via": query,
216 |             "depth": depth,
217 |             "at_time": at_time,
218 |         }
219 | 
220 |         for r in report:
221 |             # for each result in the report, if it matches our min/max hostcount, then queue
222 |             # it up for grabbing host _SEARCH_ results.
223 |             if "historical_observations" in r:
224 |                 for hip, at_time in r["historical_observations"].items():
225 |                     logging.debug(
226 |                         f"Found historical observation for {hip} at {at_time}"
227 |                     )
228 |                     await queue.put(
229 |                         (
230 |                             self.QUEUE_TYPE_HOST,
231 |                             (hip, at_time, r["query"]),
232 |                             depth + 1,
233 |                             ip,
234 |                         )
235 |                     )
236 | 
237 |             # need this to grab the weight assigned to this field.
238 |             field = self.config[r["key"]]
239 |             weight = field.weight if field else 0.0
240 | 
241 |             if self.config.min_host_count <= r["hosts"] <= self.config.max_host_count:
242 |                 new_query = r["query"]
243 | 
244 |                 searches.add(new_query)
245 | 
246 |                 # only add if this query hasn't been seen at this depth or any previous depth
247 |                 if not any(
248 |                     new_query in self.search_buckets.get(d, set())
249 |                     for d in range(depth + 1)
250 |                 ):
251 |                     if depth + 1 > self.depth:
252 |                         logging.debug(
253 |                             f"max depth reached for query: {new_query}, not going any"
254 |                             " further."
255 |                         )
256 |                     else:
257 |                         if weight >= self.config.min_pivot_weight:
258 |                             logging.info(
259 |                                 f"Pivoting into:'{new_query}' via: (ip={ip},"
260 |                                 f" parent={parent}, host_count={r['hosts']})"
261 |                             )
262 | 
263 |                             self.search_buckets.setdefault(depth, set()).add(new_query)
264 | 
265 |                             await queue.put(
266 |                                 (self.QUEUE_TYPE_SEARCH, new_query, depth + 1, ip)
267 |                             )
268 | 
269 |         results.append(result)
270 | 
271 |     async def _worker(self, queue, results, searches_set):
272 |         while True:
273 |             try:
274 |                 q_type, query, depth, parent_ip = await queue.get()
275 | 
276 |                 logging.debug(f"got job: {q_type}, {query}, {depth}, {parent_ip}")
277 | 
278 |                 if depth > self.depth:
279 |                     logging.debug(
280 |                         f"max depth reached for query: {query}, not going any further."
281 |                     )
282 |                     queue.task_done()
283 |                     continue
284 | 
285 |                 if q_type == self.QUEUE_TYPE_SEARCH:
286 |                     async for found_ip in self._search(query):
287 |                         if found_ip not in self.find.seen_hosts:
288 |                             await self._process_ip(
289 |                                 found_ip,
290 |                                 depth,
291 |                                 parent_ip,
292 |                                 results,
293 |                                 searches_set,
294 |                                 queue,
295 |                                 query,
296 |                             )
297 |                 elif q_type == self.QUEUE_TYPE_HOST:
298 |                     # we store extra information in the query element of this job, so
299 |                     # extract it and process.
300 |                     (ip, at_time, parent_query) = query
301 | 
302 |                     if query not in self.find.seen_hosts:
303 |                         await self._process_ip(
304 |                             ip,
305 |                             depth,
306 |                             parent_ip,
307 |                             results,
308 |                             searches_set,
309 |                             queue,
310 |                             parent_query,
311 |                             at_time=at_time,
312 |                         )
313 | 
314 |                 queue.task_done()
315 |             except asyncio.CancelledError:
316 |                 break
317 |             except Exception as e:
318 |                 logging.error(f"worker error: {e}")
319 |                 queue.task_done()
320 | 
321 |     async def run(self, ip):
322 |         searches = set()
323 |         queue = asyncio.Queue()
324 |         results = []
325 | 
326 |         workers = [
327 |             asyncio.create_task(self._worker(queue, results, searches))
328 |             for _ in range(self.config.workers)
329 |         ]
330 | 
331 |         await self._process_ip(
332 |             ip, 0, None, results, searches, queue, "", at_time=self.at_time
333 |         )
334 |         await queue.join()
335 | 
336 |         for w in workers:
337 |             w.cancel()
338 | 
339 |         await asyncio.gather(*workers, return_exceptions=True)
340 | 
341 |         return results, searches
342 | 
343 |     def get_num_queries(self):
344 |         return self.num_queries + self.find.num_queries
345 | 


--------------------------------------------------------------------------------
/censeye/cli.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import logging
  3 | import sys
  4 | import urllib.parse
  5 | from collections import defaultdict
  6 | from typing import Optional
  7 | 
  8 | import click
  9 | from appdirs import user_cache_dir
 10 | from dateutil import parser as dateutil_parser
 11 | from rich import box
 12 | from rich.console import Console
 13 | from rich.style import Style
 14 | from rich.table import Table
 15 | from rich.tree import Tree
 16 | 
 17 | from . import censeye
 18 | from .__version__ import __version__
 19 | from .config import Config
 20 | from .const import DEFAULT_MAX_SEARCH_RESULTS
 21 | from .gadget import GADGET_NAMESPACE, Gadget
 22 | from .gadgets import unarmed_gadgets
 23 | 
 24 | console = Console(record=True, soft_wrap=True)
 25 | 
 26 | 
 27 | async def run_censeye(
 28 |     ip,
 29 |     depth=0,
 30 |     cache_dir=None,
 31 |     console=console,
 32 |     at_time=None,
 33 |     query_prefix=None,
 34 |     duo_reporting=False,
 35 |     config: Optional[Config] = None,
 36 |     gadgets: Optional[set[Gadget]] = None,
 37 | ):
 38 |     if config is None:
 39 |         config = Config()
 40 | 
 41 |     if cache_dir is None:
 42 |         cache_dir = user_cache_dir("censys/censeye")
 43 |         logging.debug(f"Using cache dir: {cache_dir}")
 44 | 
 45 |     if gadgets is None:
 46 |         gadgets = set()
 47 | 
 48 |     c = censeye.Censeye(
 49 |         depth=depth,
 50 |         cache_dir=cache_dir,
 51 |         at_time=at_time,
 52 |         query_prefix=query_prefix,
 53 |         duo_reporting=duo_reporting,
 54 |         config=config,
 55 |         armed_gadgets=gadgets,
 56 |     )
 57 | 
 58 |     result, searches = await c.run(ip)
 59 |     searches = sorted(searches)
 60 |     seen_hosts = set()
 61 | 
 62 |     # TODO: make these configurable, e.g., themes.
 63 |     style_bold = Style(bold=True)
 64 |     style_gadget = Style(bold=False, color="#5696CC")
 65 |     style_gadget_bold = Style(bold=True, color="#9FC3E2")
 66 | 
 67 |     for host in result:
 68 |         if host["ip"] in seen_hosts:
 69 |             continue
 70 | 
 71 |         seen_hosts.add(host["ip"])
 72 |         if host["depth"] > depth:
 73 |             # these are just empty anyway.
 74 |             continue
 75 | 
 76 |         sres = sorted(host["report"], key=lambda x: x["hosts"], reverse=True)
 77 |         link = f"https://search.censys.io/hosts/{host['ip']}"
 78 | 
 79 |         if "at_time" in host and host["at_time"] is not None:
 80 |             try:
 81 |                 at_encoded = urllib.parse.quote(
 82 |                     host["at_time"].isoformat(timespec="milliseconds") + "Z"
 83 |                 )
 84 |                 link = f"{link}?at_time={at_encoded}"
 85 |             except Exception:
 86 |                 pass
 87 | 
 88 |         title = (
 89 |             f"[link={link}]{host['ip']}[/link] (depth: {host['depth']}) (Via:"
 90 |             f" {host['parent_ip']} -- {host['found_via']} -- {host['labels']})"
 91 |         )
 92 | 
 93 |         table = Table(
 94 |             title=title,
 95 |             min_width=20,
 96 |             title_justify="left",
 97 |             box=box.SIMPLE_HEAVY,
 98 |         )
 99 | 
100 |         table.add_column("Hosts", justify="right", style="magenta")
101 |         table.add_column("Key", justify="left", style="cyan", no_wrap=False)
102 |         table.add_column(
103 |             "Val", justify="left", style="green", no_wrap=False, overflow="fold"
104 |         )
105 | 
106 |         seen_rows = set()
107 |         hist_obs = dict()
108 | 
109 |         for r in sres:
110 |             key = r["key"]
111 |             if key.startswith("services."):
112 |                 key = key[len("services.") :]
113 |             elif key.startswith("~services."):
114 |                 key = key[len("~services.") :]
115 |                 key = f"{key} (VALUE ONLY)"
116 | 
117 |             if key.startswith("parsed."):
118 |                 key = key[len("parsed.") :]
119 |             elif key.startswith("~parsed."):
120 |                 key = key[len("~parsed.") :]
121 |                 key = f"{key} (VALUE ONLY)"
122 | 
123 |             row = (key, r["val"])
124 | 
125 |             host_count = f"{r['hosts']}"
126 |             hist_count = 0
127 | 
128 |             urlenc_query = urllib.parse.quote(r["query"])
129 |             key = f"[link=https://search.censys.io/search?resource=hosts&q={urlenc_query}]{key}[/link]"
130 | 
131 |             if "historical_observations" in r:
132 |                 hist_count = len(r["historical_observations"])
133 |                 hkey = f"{r['key']}={r['val']}"
134 |                 if hkey not in hist_obs:
135 |                     hist_obs[hkey] = r["historical_observations"]
136 | 
137 |             if row not in seen_rows:
138 |                 row_style = None
139 |                 count_col = ""
140 | 
141 |                 if (
142 |                     r["hosts"] <= config.max_host_count
143 |                     and (r["hosts"] + hist_count) > 1
144 |                 ):
145 |                     if r["key"].endswith(f".{GADGET_NAMESPACE}"):
146 |                         row_style = style_gadget_bold
147 |                     else:
148 |                         row_style = style_bold
149 | 
150 |                     if hist_count:
151 |                         count_col = f"{host_count} (+{hist_count})"
152 |                     else:
153 |                         count_col = f"{host_count}"
154 |                 else:
155 |                     if r["key"].endswith(f".{GADGET_NAMESPACE}"):
156 |                         row_style = style_gadget
157 |                     count_col = f"{host_count}"
158 | 
159 |                 if "noprefix_hosts" in r:
160 |                     count_col = f"{count_col} / {r['noprefix_hosts']}"
161 | 
162 |                 table.add_row(count_col, key, r["val"], style=row_style)
163 |                 seen_rows.add(row)
164 | 
165 |         console.print(table)
166 | 
167 |         if len(hist_obs) > 0:
168 |             htree = Tree(f"Historical Certificate Observations: {len(hist_obs)}")
169 | 
170 |             for k, v in hist_obs.items():
171 |                 sorted_v = dict(
172 |                     sorted(
173 |                         v.items(),
174 |                         key=lambda item: dateutil_parser.isoparse(item[1]),
175 |                         reverse=False,
176 |                     )
177 |                 )
178 | 
179 |                 node = htree.add(k)
180 | 
181 |                 for hip, at_time in sorted_v.items():
182 |                     if host["ip"] != hip:
183 |                         lnk = f"https://search.censys.io/hosts/{hip}?at_time={at_time}"
184 |                         node.add(f"{at_time}: [link={lnk}]{hip}[/link]")
185 | 
186 |             console.print(htree)
187 |             console.print()
188 | 
189 |     console.print(f"Interesting search terms: {len(searches)}")
190 |     for s in searches:
191 |         # urlencode "s"
192 |         ul = urllib.parse.quote(s)
193 |         console.print(
194 |             f" - [link=https://search.censys.io/search?resource=hosts&q={ul}]{s}[/link]"
195 |         )
196 | 
197 |     console.print()
198 |     if depth > 0:
199 |         ipmap = defaultdict(list)
200 |         root = None
201 |         seen = set()
202 | 
203 |         for host in result:
204 |             parent = host["parent_ip"]
205 |             nfo = (
206 |                 host["ip"],
207 |                 host["found_via"],
208 |                 host["labels"],
209 |                 host.get("at_time", None),
210 |             )
211 | 
212 |             if parent is None:
213 |                 root = host["ip"]
214 | 
215 |             ipmap[parent].append(nfo)
216 | 
217 |         def _build_tree(ip, parent_tree):
218 |             if ip in seen:
219 |                 return
220 | 
221 |             seen.add(ip)
222 | 
223 |             for cip, via, labels, at_time in ipmap[ip]:
224 |                 cip_fmt = f"{cip:<15}"
225 |                 via_str = f"via: [i]{via}[/i]"
226 | 
227 |                 if at_time:
228 |                     via_str = f"{via_str} @ {at_time}"
229 | 
230 |                 lnk = (
231 |                     f"[link=https://search.censys.io/hosts/{cip}][b]{cip_fmt}[/b][/link]"
232 |                     f" ({via_str}) {labels}"
233 |                 )
234 | 
235 |                 child_tree = parent_tree.add(lnk)
236 |                 _build_tree(cip, child_tree)
237 | 
238 |         if root is None:
239 |             return
240 | 
241 |         tree = Tree(f"[link=https://search.censys.io/hosts/{root}][b]{root}[/b][/link]")
242 |         _build_tree(root, tree)
243 | 
244 |         console.print("Pivot Tree:")
245 |         console.print(tree)
246 | 
247 |     console.print(f"Total queries used: {c.get_num_queries()}")
248 | 
249 | 
250 | @click.command(
251 |     context_settings=dict(max_content_width=125, help_option_names=["-h", "--help"])
252 | )
253 | @click.argument(
254 |     "ip",
255 |     required=False,
256 | )
257 | @click.option(
258 |     "--depth",
259 |     "-d",
260 |     default=0,
261 |     help=(
262 |         "[auto-pivoting] search depth (0 is single host, 1 is all the hosts that host"
263 |         " found, etc...)"
264 |     ),
265 | )
266 | @click.option(
267 |     "--workers",
268 |     default=4,
269 |     help="number of workers to run queries in parallel",
270 | )
271 | @click.option(
272 |     "--workspace",
273 |     "-w",
274 |     default=None,
275 |     help="directory for caching results (defaults to XDG configuration path)",
276 | )
277 | @click.option(
278 |     "--max-search-results",
279 |     "-m",
280 |     default=DEFAULT_MAX_SEARCH_RESULTS,
281 |     help="maximum number of censys search results to process",
282 | )
283 | @click.option(
284 |     "--log-level",
285 |     "-ll",
286 |     default=None,
287 |     help="set the logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)",
288 | )
289 | @click.option("--save", "-s", default=None, help="save report to a file")
290 | @click.option(
291 |     "--pivot-threshold",
292 |     "-p",
293 |     default=128,
294 |     help=(
295 |         "maximum number of hosts for a search term that will trigger a pivot (default:"
296 |         " 128)"
297 |     ),
298 | )
299 | @click.option(
300 |     "--at-time",
301 |     "-a",
302 |     type=click.DateTime(formats=["%Y-%m-%d %H:%M:%S", "%Y-%m-%d"]),
303 |     help="historical host data at_time.",
304 | )
305 | @click.option(
306 |     "--query-prefix",
307 |     "-q",
308 |     default=None,
309 |     help=(
310 |         "prefix to add to all queries (useful for filtering, the ' and ' is added"
311 |         " automatically)"
312 |     ),
313 | )
314 | @click.option(
315 |     "--input-workers",
316 |     default=2,
317 |     help=(
318 |         "number of parallel workers to process inputs (e.g., only has an effect on"
319 |         " stdin inputs)"
320 |     ),
321 | )
322 | @click.option(
323 |     "--query-prefix-count",
324 |     "-qp",
325 |     is_flag=True,
326 |     default=False,
327 |     help=(
328 |         "If the --query-prefix is set, this will return a count of hosts for both the"
329 |         " filtered and unfiltered results."
330 |     ),
331 | )
332 | @click.option(
333 |     "--config",
334 |     "-c",
335 |     "cfgfile_",
336 |     default=None,
337 |     help="configuration file path",
338 | )
339 | @click.option(
340 |     "--min-pivot-weight",
341 |     "-mp",
342 |     "-M",
343 |     type=float,
344 |     help=(
345 |         "[auto-pivoting] only pivot into fields with a weight greater-than or equal-to"
346 |         " this number (see configuration)"
347 |     ),
348 | )
349 | @click.option(
350 |     "--fast", is_flag=True, help="[auto-pivoting] alias for --min-pivot-weight 1.0"
351 | )
352 | @click.option(
353 |     "--slow", is_flag=True, help="[auto-pivoting] alias for --min-pivot-weight 0.0"
354 | )
355 | @click.option(
356 |     "--gadget",
357 |     "-g",
358 |     "-G",
359 |     multiple=True,
360 |     help="list of gadgets to load",
361 | )
362 | @click.option("--list-gadgets", is_flag=True, help="list available gadgets")
363 | @click.version_option(__version__)
364 | def main(
365 |     ip,
366 |     depth,
367 |     workers,
368 |     workspace,
369 |     max_search_results,
370 |     log_level,
371 |     save,
372 |     pivot_threshold,
373 |     at_time,
374 |     query_prefix,
375 |     input_workers,
376 |     query_prefix_count,
377 |     cfgfile_,
378 |     min_pivot_weight,
379 |     fast,
380 |     slow,
381 |     gadget,
382 |     list_gadgets,
383 | ):
384 |     if sum([fast, slow]) > 1:
385 |         print("Only one of --fast or --slow can be set.")
386 |         sys.exit(1)
387 | 
388 |     cfg = Config(cfgfile_)
389 | 
390 |     if workers:
391 |         cfg.workers = workers
392 | 
393 |     if max_search_results:
394 |         cfg.max_search_res = max_search_results
395 | 
396 |     if pivot_threshold:
397 |         cfg.max_host_count = pivot_threshold
398 | 
399 |     if min_pivot_weight:
400 |         cfg.min_pivot_weight = min_pivot_weight
401 | 
402 |     if fast:
403 |         cfg.min_pivot_weight = 1.0
404 | 
405 |     if slow:
406 |         cfg.min_pivot_weight = 0.0
407 | 
408 |     for g in gadget:
409 |         cfg.gadgets.enable(g)
410 | 
411 |     armed_gadgets = set()
412 | 
413 |     for g in cfg.gadgets.enabled():
414 |         if g.name not in unarmed_gadgets:
415 |             raise ValueError(f"gadget {g} not loaded!")
416 | 
417 |         loaded_gadget = unarmed_gadgets[g.name]
418 |         loaded_gadget.config = g.config
419 |         armed_gadgets.add(loaded_gadget)
420 | 
421 |     def _parse_ip(d):
422 |         period_replacements = ["[.]", ".]", "[."]
423 |         remove = ['"', ","]
424 | 
425 |         for r in period_replacements:
426 |             d = d.replace(r, ".")
427 |         for r in remove:
428 |             d = d.replace(r, "")
429 |         return d.strip()
430 | 
431 |     logging.captureWarnings(True)
432 | 
433 |     if log_level:
434 |         llevel = getattr(logging, log_level.upper(), None)
435 | 
436 |         if not isinstance(llevel, int):
437 |             raise ValueError(f"Invalid log level: {log_level}")
438 | 
439 |         logging.basicConfig(
440 |             level=llevel,
441 |             format=(
442 |                 "%(asctime)s [%(filename)s:%(lineno)d] - %(levelname)s - %(message)s"
443 |             ),
444 |         )
445 |     else:
446 |         logging.basicConfig(
447 |             level=logging.CRITICAL,
448 |             format=(
449 |                 "%(asctime)s [%(filename)s:%(lineno)d] - %(levelname)s - %(message)s"
450 |             ),
451 |         )
452 | 
453 |     console = Console(record=True, soft_wrap=True)
454 | 
455 |     if list_gadgets:
456 |         table = Table(title="available gadgets", box=box.MINIMAL_DOUBLE_HEAD)
457 |         table.add_column("name", no_wrap=True)
458 |         table.add_column("aliases", no_wrap=True)
459 |         table.add_column("desc", no_wrap=False)
460 | 
461 |         for name, g in unarmed_gadgets.items():
462 |             table.add_row(
463 |                 f"[bold]{name}[/bold]", f"[i]{', '.join(g.aliases)}[/i]", g.__doc__
464 |             )
465 | 
466 |         console.print(table)
467 |         sys.exit(0)
468 | 
469 |     async def _run_worker(queue):
470 |         while not queue.empty():
471 |             ip = await queue.get()
472 |             logging.debug(
473 |                 f"processing {ip} - max_search_results: {max_search_results} -"
474 |                 f" pivot_threshold: {pivot_threshold} - query_prefix: {query_prefix} -"
475 |                 f" cache_dir: {workspace} - workers: {workers} - at_time: {at_time} -"
476 |                 f" depth: {depth} - save: {save} min_pivot_weight: {min_pivot_weight}"
477 |             )
478 |             await run_censeye(
479 |                 ip,
480 |                 duo_reporting=query_prefix_count,
481 |                 query_prefix=query_prefix,
482 |                 cache_dir=workspace,
483 |                 at_time=at_time,
484 |                 depth=depth,
485 |                 console=console,
486 |                 config=cfg,
487 |                 gadgets=armed_gadgets,
488 |             )
489 |             queue.task_done()
490 | 
491 |     async def _run_stdin():
492 |         wqueue = asyncio.Queue()
493 |         for line in sys.stdin:
494 |             ip = line.strip()
495 |             if ip:
496 |                 await wqueue.put(_parse_ip(ip))
497 | 
498 |         tasks = []
499 |         for _ in range(input_workers):
500 |             tasks.append(_run_worker(wqueue))
501 | 
502 |         await asyncio.gather(*tasks)
503 | 
504 |     if ip == "-" or not ip:
505 |         logging.info("processing IPs from stdin")
506 |         asyncio.run(_run_stdin())
507 |     else:
508 |         asyncio.run(
509 |             run_censeye(
510 |                 _parse_ip(ip),
511 |                 duo_reporting=query_prefix_count,
512 |                 query_prefix=query_prefix,
513 |                 cache_dir=workspace,
514 |                 at_time=at_time,
515 |                 depth=depth,
516 |                 console=console,
517 |                 config=cfg,
518 |                 gadgets=armed_gadgets,
519 |             )
520 |         )
521 | 
522 |     if save:
523 |         console.save_html(save)
524 | 
525 | 
526 | if __name__ == "__main__":
527 |     main()
528 | 


--------------------------------------------------------------------------------
/censeye/config.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import warnings
  3 | from dataclasses import dataclass, field
  4 | from typing import Any, Optional, Union
  5 | 
  6 | import yaml
  7 | 
  8 | from .gadgets import unarmed_gadgets
  9 | 
 10 | IgnoreType = Optional[Union[list[str], list[dict[str, list[str]]]]]
 11 | 
 12 | 
 13 | @dataclass
 14 | class Field:
 15 |     name: str
 16 |     weight: float = 0.0
 17 |     ignore: IgnoreType = field(default_factory=list)
 18 | 
 19 |     def __eq__(self, other: Any) -> bool:
 20 |         if isinstance(other, Field):
 21 |             return self.name == other.name
 22 |         if isinstance(other, str):
 23 |             return self.name == other
 24 |         return False
 25 | 
 26 |     def __hash__(self) -> int:
 27 |         return hash(self.name)
 28 | 
 29 | 
 30 | @dataclass
 31 | class Gadget:
 32 |     name: str
 33 |     aliases: list[str]
 34 |     enabled: bool = False
 35 |     config: dict[str, Any] = field(default_factory=dict)
 36 | 
 37 |     def __hash__(self) -> int:
 38 |         return hash((self.name, frozenset(self.aliases)))
 39 | 
 40 |     def __eq__(self, other: Any) -> bool:
 41 |         if not isinstance(other, Gadget):
 42 |             return False
 43 | 
 44 |         return self.name == other.name and self.aliases == other.aliases
 45 | 
 46 | 
 47 | class Gadgets:
 48 |     def __init__(self, gadgets: Optional[set[Gadget]] = None) -> None:
 49 |         if gadgets is None:
 50 |             gadgets = set()
 51 |         self.gadgets = gadgets
 52 | 
 53 |     def __iter__(self):
 54 |         return iter(self.gadgets)
 55 | 
 56 |     def __getitem__(self, key):
 57 |         for gadget in self.gadgets:
 58 |             if gadget.name == key or key in gadget.aliases:
 59 |                 return gadget
 60 |         return None
 61 | 
 62 |     def __contains__(self, key):
 63 |         return any(
 64 |             key == gadget.name or key in gadget.aliases for gadget in self.gadgets
 65 |         )
 66 | 
 67 |     def __len__(self):
 68 |         return len(self.gadgets)
 69 | 
 70 |     def __str__(self):
 71 |         return str(self.gadgets)
 72 | 
 73 |     def __repr__(self):
 74 |         return repr(self.gadgets)
 75 | 
 76 |     def __eq__(self, other: Any) -> bool:
 77 |         if isinstance(other, Gadgets):
 78 |             return self.gadgets == other.gadgets
 79 |         return False
 80 | 
 81 |     def __hash__(self) -> int:
 82 |         return hash(frozenset(self.gadgets))
 83 | 
 84 |     def add(self, gadget: Gadget):
 85 |         if gadget in self.gadgets:
 86 |             self.gadgets.remove(gadget)
 87 |         self.gadgets.add(gadget)
 88 | 
 89 |     def update(self, gadgets: set[Gadget]):
 90 |         self.gadgets.update(gadgets)
 91 | 
 92 |     def enable(self, name: str):
 93 |         for gadget in self.gadgets:
 94 |             if gadget.name == name or name in gadget.aliases:
 95 |                 gadget.enabled = True
 96 |                 return
 97 |         raise ValueError(f"Gadget {name} not found")
 98 | 
 99 |     def disable(self, name: str):
100 |         for gadget in self.gadgets:
101 |             if gadget.name == name or name in gadget.aliases:
102 |                 gadget.enabled = False
103 |                 return
104 |         raise ValueError(f"Gadget {name} not found")
105 | 
106 |     def enabled(self) -> set[Gadget]:
107 |         return {gadget for gadget in self.gadgets if gadget.enabled}
108 | 
109 | 
110 | @dataclass
111 | class Config:
112 |     def __init__(self, config_file=None) -> None:
113 |         self._load_defauts()
114 | 
115 |         if config_file:
116 |             try:
117 |                 self._load_config(config_file)
118 |             except FileNotFoundError:
119 |                 warnings.warn(
120 |                     f"Config file {config_file} not found, using defaults", stacklevel=2
121 |                 )
122 |         else:
123 |             home_dir = os.path.expanduser("~")
124 |             try:
125 |                 self._load_config(
126 |                     os.path.join(home_dir, ".config", "censys", "censeye.yaml")
127 |                 )
128 |             except FileNotFoundError:
129 |                 pass
130 | 
131 |     def _load_defauts(self) -> None:
132 |         self.workers = 2
133 |         self.max_serv_count = 20
134 |         self.max_search_res = 45
135 |         self.min_host_count = 2
136 |         self.max_host_count = 120
137 |         self.min_pivot_weight = 0.0
138 |         self.gadgets = Gadgets()
139 | 
140 |         for name, gadget in unarmed_gadgets.items():
141 |             self.gadgets.add(
142 |                 Gadget(
143 |                     name=name,
144 |                     aliases=gadget.aliases,
145 |                     config=gadget.config,
146 |                     enabled=False,
147 |                 )
148 |             )
149 | 
150 |         self.fields = [
151 |             # Field definitions for the query generator gadgets (if enabled), so we can use them for pivots
152 |             Field(name="open-directory.gadget.censeye", weight=1.0, ignore=[]),
153 |             Field(name="nobbler.gadget.censeye", weight=0.8, ignore=[]),
154 |             # Field definitions for the search results
155 |             Field(name="services.banner_hex", weight=1.0, ignore=[]),
156 |             Field(name="services.ssh.endpoint_id.raw", weight=0.9, ignore=[]),
157 |             Field(
158 |                 name="services.ssh.server_host_key.fingerprint_sha256",
159 |                 weight=1.0,
160 |                 ignore=[],
161 |             ),
162 |             Field(
163 |                 name="services.http.response.body_hash",
164 |                 weight=1.0,
165 |                 ignore=[
166 |                     "sha1:4dcf84abb6c414259c1d5aec9a5598eebfcea842",
167 |                     "sha256:55c801a02ad9a08dfdcf159ba0c8354b37189519ce9a95129941ec6daeca5648",
168 |                     "sha1:11e71530661013137721d635f95630722eaa6afd",
169 |                     "sha256:036bacf3bd34365006eac2a78e4520a953a6250e9550dcf9c9d4b0678c225b4c",
170 |                 ],
171 |             ),
172 |             Field(name="services.jarm.fingerprint", weight=1.0, ignore=[]),
173 |             Field(
174 |                 name="services.tls.certificates.leaf_data.subject_dn",
175 |                 weight=1.0,
176 |                 ignore=[],
177 |             ),
178 |             Field(
179 |                 name="services.tls.certificates.leaf_data.issuer_dn",
180 |                 weight=1.0,
181 |                 ignore=[
182 |                     "C=US, O=DigiCert Inc, CN=DigiCert Global G2 TLS RSA SHA256 2020 CA1"
183 |                 ],
184 |             ),
185 |             Field(
186 |                 name="~services.tls.certificates.leaf_data.issuer.common_name",
187 |                 weight=0.5,
188 |                 ignore=["127.0.0.1"],
189 |             ),
190 |             Field(
191 |                 name="services.tls.certificates.leaf_data.issuer.common_name",
192 |                 weight=1.0,
193 |                 ignore=["DigiCert Global G2 TLS RSA SHA256 2020 CA1"],
194 |             ),
195 |             Field(
196 |                 name="~services.tls.certificates.leaf_data.subject.organization",
197 |                 weight=0.5,
198 |                 ignore=["Cloudflare, Inc."],
199 |             ),
200 |             Field(
201 |                 name="services.tls.certificates.leaf_data.subject.organization",
202 |                 weight=1.0,
203 |                 ignore=[],
204 |             ),
205 |             Field(name="services.certificate", weight=1.0, ignore=[]),
206 |             Field(
207 |                 name="services.http.response.html_tags",
208 |                 weight=0.9,
209 |                 ignore=[
210 |                     "<title>301 Moved Permanently</title>",
211 |                     "<title>403 Forbidden</title>",
212 |                     "<title> 403 Forbidden </title>",
213 |                     "<title>404 Not Found</title>",
214 |                     "<title></title>",
215 |                     "<title>401 - Unauthorized</title>",
216 |                     "<TITLE>Not Found</TITLE>",
217 |                     '<meta charset="UTF-8">',
218 |                     '<meta charset="utf-8">',
219 |                     "<title>400 The plain HTTP request was sent to HTTPS port</title>",
220 |                     '<meta charset="UTF-8" />',
221 |                     '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />',
222 |                     '<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">',
223 |                     '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">',
224 |                     "<TITLE>ERROR: The request could not be satisfied</TITLE>",
225 |                     "<title>400 The plain HTTP request was sent to HTTPS port</title>",
226 |                 ],
227 |             ),
228 |             Field(
229 |                 name="services.http.response.favicons.md5_hash", weight=0.9, ignore=[]
230 |             ),
231 |             Field(
232 |                 name="services.parsed.opc_ua.endpoints.endpoint_url",
233 |                 weight=0.5,
234 |                 ignore=[],
235 |             ),
236 |             Field(
237 |                 name="services.parsed.opc_ua.endpoints.server.product_uri",
238 |                 weight=0.5,
239 |                 ignore=[],
240 |             ),
241 |             Field(
242 |                 name="services.parsed.opc_ua.endpoints.server.application_name.text",
243 |                 weight=0.5,
244 |                 ignore=[],
245 |             ),
246 |             Field(
247 |                 name="~services.parsed.winrm.ntlm_info.dns_server_name",
248 |                 weight=0.1,
249 |                 ignore=[],
250 |             ),
251 |             Field(
252 |                 name="services.parsed.winrm.ntlm_info.dns_server_name",
253 |                 weight=0.4,
254 |                 ignore=[],
255 |             ),
256 |             Field(
257 |                 name="~services.parsed.winrm.ntlm_info.netbios_computer_name",
258 |                 weight=0.5,
259 |                 ignore=[],
260 |             ),
261 |             Field(
262 |                 name="services.parsed.winrm.ntlm_info.netbios_computer_name",
263 |                 weight=0.8,
264 |                 ignore=[],
265 |             ),
266 |             Field(name="services.snmp.oid_system.desc", weight=0.5, ignore=[]),
267 |             Field(name="services.snmp.oid_system.contact", weight=0.5, ignore=[]),
268 |             Field(name="services.snmp.oid_system.name", weight=0.3, ignore=[]),
269 |             Field(name="services.snmp.oid_system.location", weight=0.2, ignore=[]),
270 |             Field(name="services.snmp.engine.organization", weight=0.1, ignore=[]),
271 |             Field(
272 |                 name="services.parsed.eip.identity.product_name", weight=0.4, ignore=[]
273 |             ),
274 |             Field(
275 |                 name="services.parsed.eip.identity.vendor_name", weight=0.3, ignore=[]
276 |             ),
277 |             Field(
278 |                 name="services.tls.certificates.leaf_data.subject.organizational_unit",
279 |                 weight=1.0,
280 |                 ignore=[],
281 |             ),
282 |             Field(
283 |                 name="~services.tls.certificates.leaf_data.subject.email_address",
284 |                 weight=0.2,
285 |                 ignore=[],
286 |             ),
287 |             Field(
288 |                 name="services.tls.certificates.leaf_data.subject.email_address",
289 |                 weight=0.5,
290 |                 ignore=[],
291 |             ),
292 |             Field(name="services.telnet.banner", weight=1.0, ignore=[]),
293 |             Field(
294 |                 name="services.http.response.headers",
295 |                 weight=0.8,
296 |                 ignore=[
297 |                     {"Location": ["*/"]},
298 |                     {"Vary": ["Accept-Encoding"]},
299 |                     {
300 |                         "Content-Type": [
301 |                             "text/html",
302 |                             "text/html; charset=UTF-8",
303 |                             "text/html;charset=UTF-8",
304 |                             "text/html; charset=utf-8",
305 |                         ]
306 |                     },
307 |                     {
308 |                         "Content-type": [
309 |                             "text/html",
310 |                             "text/html; charset=UTF-8",
311 |                             "text/html;charset=UTF-8",
312 |                             "text/html; charset=utf-8",
313 |                         ]
314 |                     },
315 |                     {"Connection": ["close", "keep-alive", "Keep-Alive"]},
316 |                     {"Transfer-Encoding": ["chunked"]},
317 |                     {"Pragma": ["no-cache"]},
318 |                     {"Cache-Control": ["no-cache"]},
319 |                     {"Content-Encoding": ["gzip"]},
320 |                     {"Date": ["<REDACTED>"]},
321 |                     {"X-Frame-Options": ["SAMEORIGIN", "DENY"]},
322 |                     {"Server": ["nginx", "Microsoft-HTTPAPI/2.0", "cloudflare"]},
323 |                     {"Content-Length": ["*"]},
324 |                     {"Last-Modified": ["*"]},
325 |                     {"Accept-Ranges": ["bytes"]},
326 |                 ],
327 |             ),
328 |             Field(
329 |                 name="~services.parsed.l2tp.sccrp.attribute_values.hostname",
330 |                 weight=0.2,
331 |                 ignore=[],
332 |             ),
333 |             Field(
334 |                 name="services.parsed.l2tp.sccrp.attribute_values.hostname",
335 |                 weight=0.5,
336 |                 ignore=[],
337 |             ),
338 |             Field(
339 |                 name="services.parsed.l2tp.sccrp.attribute_values.vendor_name",
340 |                 weight=0.5,
341 |                 ignore=[],
342 |             ),
343 |             Field(name="~services.vnc.desktop_name", weight=0.2, ignore=[]),
344 |             Field(name="services.vnc.desktop_name", weight=0.5, ignore=[]),
345 |             Field(name="services.bacnet.vendor_name", weight=0.4, ignore=[]),
346 |             Field(
347 |                 name="services.bacnet.application_software_revision",
348 |                 weight=0.2,
349 |                 ignore=[],
350 |             ),
351 |             Field(name="services.bacnet.object_name", weight=0.2, ignore=[]),
352 |             Field(name="services.bacnet.model_name", weight=0.2, ignore=[]),
353 |             Field(name="~services.bacnet.description", weight=0.1, ignore=[]),
354 |             Field(name="services.bacnet.description", weight=0.2, ignore=[]),
355 |             Field(
356 |                 name="~services.parsed.chromecast.applications.display_name",
357 |                 weight=0.0,
358 |                 ignore=[],
359 |             ),
360 |             Field(
361 |                 name="services.parsed.chromecast.applications.display_name",
362 |                 weight=0.1,
363 |                 ignore=[],
364 |             ),
365 |             Field(name="services.cobalt_strike.x86.watermark", weight=1.0, ignore=[]),
366 |             Field(name="services.cobalt_strike.x86.public_key", weight=1.0, ignore=[]),
367 |             Field(name="services.cobalt_strike.x86.post_ex.x86", weight=0.1, ignore=[]),
368 |             Field(name="services.cobalt_strike.x86.post_ex.x64", weight=0.1, ignore=[]),
369 |             Field(
370 |                 name="services.cobalt_strike.x86.http_post.uri", weight=1.0, ignore=[]
371 |             ),
372 |             Field(name="services.cobalt_strike.x86.user_agent", weight=1.0, ignore=[]),
373 |             Field(name="services.cobalt_strike.x64.watermark", weight=1.0, ignore=[]),
374 |             Field(name="services.cobalt_strike.x64.public_key", weight=1.0, ignore=[]),
375 |             Field(name="services.cobalt_strike.x64.post_ex.x86", weight=0.1, ignore=[]),
376 |             Field(name="services.cobalt_strike.x64.post_ex.x64", weight=0.1, ignore=[]),
377 |             Field(
378 |                 name="services.cobalt_strike.x64.http_post.uri", weight=1.0, ignore=[]
379 |             ),
380 |             Field(name="services.cobalt_strike.x64.user_agent", weight=1.0, ignore=[]),
381 |             Field(
382 |                 name="services.cwmp.http_info.favicons.md5_hash", weight=0.5, ignore=[]
383 |             ),
384 |             Field(name="services.cwmp.http_info.headers", weight=0.5, ignore=[]),
385 |             Field(name="services.cwmp.http_info.html_tags", weight=0.5, ignore=[]),
386 |             Field(name="services.parsed.cwmp.server", weight=1.0, ignore=[]),
387 |             Field(
388 |                 name="~services.parsed.dhcpdiscover.params.device_info.machine_name",
389 |                 weight=0.2,
390 |                 ignore=[],
391 |             ),
392 |             Field(
393 |                 name="services.parsed.dhcpdiscover.params.device_info.machine_name",
394 |                 weight=0.5,
395 |                 ignore=[],
396 |             ),
397 |             Field(
398 |                 name="services.parsed.dhcpdiscover.params.device_info.device_type",
399 |                 weight=0.1,
400 |                 ignore=[],
401 |             ),
402 |             Field(
403 |                 name="services.parsed.dhcpdiscover.params.device_info.vendor",
404 |                 weight=0.1,
405 |                 ignore=[],
406 |             ),
407 |             Field(
408 |                 name="services.parsed.dhcpdiscover.params.device_info.version",
409 |                 weight=0.1,
410 |                 ignore=[],
411 |             ),
412 |             Field(
413 |                 name="services.elasticsearch.system_info.version.number",
414 |                 weight=0.1,
415 |                 ignore=[],
416 |             ),
417 |             Field(
418 |                 name="services.elasticsearch.system_info.version.lucene_version",
419 |                 weight=0.1,
420 |                 ignore=[],
421 |             ),
422 |             Field(
423 |                 name="services.elasticsearch.node_info.cluster_combined_info.name",
424 |                 weight=0.1,
425 |                 ignore=[],
426 |             ),
427 |             Field(
428 |                 name="services.elasticsearch.node_info.cluster_combined_info.indices.docs.count",
429 |                 weight=0.1,
430 |                 ignore=[],
431 |             ),
432 |             Field(
433 |                 name="services.elasticsearch.node_info.nodes.node_data.host",
434 |                 weight=0.3,
435 |                 ignore=[],
436 |             ),
437 |             Field(
438 |                 name="services.elasticsearch.node_info.nodes.node_data.name",
439 |                 weight=0.1,
440 |                 ignore=[],
441 |             ),
442 |             Field(name="services.parsed.etcd.v2.members.id", weight=0.1, ignore=[]),
443 |             Field(
444 |                 name="services.parsed.etcd.v3.members.peer_urls", weight=0.1, ignore=[]
445 |             ),
446 |             Field(
447 |                 name="services.parsed.etcd.v3.members.client_urls",
448 |                 weight=0.2,
449 |                 ignore=[],
450 |             ),
451 |             Field(name="~services.fox.hostname", weight=0.2, ignore=[]),
452 |             Field(name="services.fox.hostname", weight=0.5, ignore=[]),
453 |             Field(name="~services.fox.station_name", weight=0.1, ignore=[]),
454 |             Field(name="services.fox.station_name", weight=0.3, ignore=[]),
455 |             Field(name="services.fox.sys_info", weight=0.1, ignore=[]),
456 |             Field(name="services.fox.vm_version", weight=0.1, ignore=[]),
457 |             Field(name="services.fox.os_version", weight=0.1, ignore=[]),
458 |             Field(name="services.fox.hostId", weight=0.1, ignore=[]),
459 |             Field(name="services.mms.model", weight=0.1, ignore=[]),
460 |             Field(name="services.mms.vendor", weight=0.1, ignore=[]),
461 |             Field(
462 |                 name="services.mongodb.build_info.git_version", weight=0.1, ignore=[]
463 |             ),
464 |             Field(name="services.mysql.server_version", weight=0.1, ignore=[]),
465 |             Field(name="services.parsed.nbd.exports.name", weight=0.1, ignore=[]),
466 |             Field(
467 |                 name="services.parsed.onvif.services.namespace", weight=0.1, ignore=[]
468 |             ),
469 |             Field(name="services.parsed.onvif.services.xaddr", weight=0.1, ignore=[]),
470 |             Field(name="services.parsed.onvif.hostname.name", weight=0.1, ignore=[]),
471 |             Field(name="services.parsed.pcom.model", weight=0.3, ignore=[]),
472 |             Field(name="services.parsed.pcom.os_build", weight=0.1, ignore=[]),
473 |             Field(name="services.parsed.pcom.os_version", weight=0.1, ignore=[]),
474 |             Field(name="~services.pc_anywhere.name", weight=0.5, ignore=[]),
475 |             Field(name="services.pc_anywhere.name", weight=1.0, ignore=[]),
476 |             Field(name="~services.pptp.hostname", weight=0.5, ignore=[]),
477 |             Field(name="services.pptp.hostname", weight=1.0, ignore=[]),
478 |             Field(name="services.parsed.redlion_crimson.model", weight=0.1, ignore=[]),
479 |             Field(
480 |                 name="services.parsed.rocketmq.topics.topic_list", weight=0.5, ignore=[]
481 |             ),
482 |             Field(name="services.parsed.rocketmq.version", weight=0.1, ignore=[]),
483 |             Field(name="services.s7.plant_id", weight=0.2, ignore=[]),
484 |             Field(name="services.s7.memory_serial_number", weight=0.3, ignore=[]),
485 |             Field(name="~services.parsed.scpi.manufacturer", weight=0.0, ignore=[]),
486 |             Field(name="services.parsed.scpi.manufacturer", weight=0.1, ignore=[]),
487 |             Field(name="services.parsed.scpi.model", weight=0.1, ignore=[]),
488 |             Field(name="services.parsed.scpi.firmware", weight=0.1, ignore=[]),
489 |             Field(name="services.smb.group_name", weight=1.0, ignore=[]),
490 |             Field(name="services.smb.ntlm", weight=0.1, ignore=[]),
491 |             Field(name="services.upnp.devices.manufacturer", weight=0.1, ignore=[]),
492 |             Field(name="~services.upnp.devices.model_name", weight=0.0, ignore=[]),
493 |             Field(name="services.upnp.devices.model_name", weight=0.1, ignore=[]),
494 |             Field(name="services.upnp.devices.serial_number", weight=0.1, ignore=[]),
495 |             Field(
496 |                 name="services.parsed.zeromq.handshake.socket_type",
497 |                 weight=0.0,
498 |                 ignore=[],
499 |             ),
500 |             Field(
501 |                 name="services.tls.certificate.parsed.issuer.locality",
502 |                 weight=1.0,
503 |                 ignore=[],
504 |             ),
505 |         ]
506 | 
507 |     def _load_config(self, config_file):
508 |         with open(config_file) as file:
509 |             cfg = yaml.safe_load(file)
510 | 
511 |         self.workers = cfg.get("workers", self.workers)
512 |         self.max_serv_count = cfg.get("max_serv_count", self.max_serv_count)
513 |         self.max_search_res = cfg.get("max_search_results", self.max_search_res)
514 |         self.min_host_count = cfg.get("rarity", {}).get("min", self.min_host_count)
515 |         self.max_host_count = cfg.get("rarity", {}).get("max", self.max_host_count)
516 |         self.min_pivot_weight = cfg.get("min_pivot_weight", self.min_pivot_weight)
517 |         self.gadgets = cfg.get("gadgets", self.gadgets)
518 | 
519 |         if "fields" in cfg:
520 |             for item in cfg["fields"]:
521 |                 self.fields.append(
522 |                     Field(
523 |                         name=item["field"],
524 |                         weight=item.get("weight", 0.0),
525 |                         ignore=item.get("ignore", []),
526 |                     )
527 |                 )
528 |         if "gadgets" in cfg:
529 |             self.gadgets = Gadgets()
530 |             for item in cfg["gadgets"]:
531 |                 name = item["gadget"]
532 | 
533 |                 if name not in unarmed_gadgets:
534 |                     raise ValueError(f"Gadget {name} not found")
535 | 
536 |                 base = unarmed_gadgets[name]
537 | 
538 |                 self.gadgets.add(
539 |                     Gadget(
540 |                         name=name,
541 |                         aliases=base.aliases,
542 |                         config=item.get("config", base.config),
543 |                         enabled=item.get("enabled", False),
544 |                     )
545 |                 )
546 | 
547 |     def __iter__(self):
548 |         return iter(self.fields)
549 | 
550 |     def __getitem__(self, key) -> Optional[Field]:
551 |         for _field in self.fields:
552 |             if _field == key:
553 |                 return _field
554 |         return None
555 | 


--------------------------------------------------------------------------------
/censeye/const.py:
--------------------------------------------------------------------------------
1 | from censys.search import CensysHosts
2 | 
3 | from .__version__ import __version__
4 | 
5 | DEFAULT_MAX_SEARCH_RESULTS = 45
6 | USER_AGENT = CensysHosts.DEFAULT_USER_AGENT + f" censeye/{__version__}"
7 | 


--------------------------------------------------------------------------------
/censeye/gadget.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | from abc import ABC, abstractmethod
  4 | from typing import Any, Optional
  5 | 
  6 | from appdirs import user_cache_dir
  7 | 
  8 | GADGET_NAMESPACE = "gadget.censeye"
  9 | 
 10 | 
 11 | class Gadget(ABC):
 12 |     name: str
 13 |     aliases: list[str]
 14 |     cache_dir: str
 15 |     config: dict[str, Any] = {}
 16 | 
 17 |     Namespace = GADGET_NAMESPACE
 18 | 
 19 |     def __init__(
 20 |         self,
 21 |         name: str,
 22 |         aliases: Optional[list[str]] = None,
 23 |         config: Optional[dict[str, Any]] = None,
 24 |     ):
 25 |         self.name = name
 26 |         if aliases is None:
 27 |             aliases = []
 28 |         self.aliases = aliases
 29 |         self.cache_dir = self.get_cache_dir()
 30 |         if config is None:
 31 |             config = dict()
 32 |         self.config = config
 33 | 
 34 |     @abstractmethod
 35 |     def run(self, host: dict) -> Any:
 36 |         pass
 37 | 
 38 |     def set_config(self, config: Optional[dict[str, Any]]) -> None:
 39 |         self.config = config or self.config
 40 | 
 41 |     def get_env(self, key: str, default=None):
 42 |         return os.getenv(key, default)
 43 | 
 44 |     def get_cache_dir(self) -> str:
 45 |         cache_dir = user_cache_dir(f"censys/{self.name}")
 46 |         os.makedirs(cache_dir, exist_ok=True)
 47 |         return cache_dir
 48 | 
 49 |     def get_cache_file(self, filename: str) -> str:
 50 |         return os.path.join(self.cache_dir, filename)
 51 | 
 52 |     def load_json(self, filename: str) -> Optional[dict]:
 53 |         try:
 54 |             with open(self.get_cache_file(filename)) as f:
 55 |                 return json.load(f)
 56 |         except FileNotFoundError:
 57 |             return None
 58 | 
 59 |     def save_json(self, filename: str, data: dict) -> None:
 60 |         with open(self.get_cache_file(filename), "w") as f:
 61 |             json.dump(data, f)
 62 | 
 63 |     def __str__(self) -> str:
 64 |         return f"{self.__class__.__name__}({self.name})"
 65 | 
 66 |     def __repr__(self) -> str:
 67 |         return str(self)
 68 | 
 69 | 
 70 | class HostLabelerGadget(Gadget):
 71 |     @abstractmethod
 72 |     def label_host(self, host: dict) -> None:
 73 |         pass
 74 | 
 75 |     def run(self, host: dict) -> Any:
 76 |         self.label_host(host)
 77 | 
 78 |     def add_label(
 79 |         self,
 80 |         host: dict,
 81 |         label: str,
 82 |         style: Optional[str] = None,
 83 |         link: Optional[str] = None,
 84 |     ) -> None:
 85 |         if style:
 86 |             label = f"[{style}]{label}[/{style}]"
 87 |         if link:
 88 |             label = f"[link={link}]{label}[/link]"
 89 |         host["labels"].append(label)
 90 | 
 91 | 
 92 | class QueryGeneratorGadget(Gadget):
 93 |     @abstractmethod
 94 |     def generate_query(self, host: dict) -> Optional[set[tuple[str, str]]]:
 95 |         pass
 96 | 
 97 |     def run(self, host: dict) -> Optional[set[tuple[str, str]]]:
 98 |         ret = set()
 99 | 
100 |         q = self.generate_query(host)
101 |         if not q:
102 |             return None
103 | 
104 |         for k, v in q:
105 |             if not k.endswith(self.Namespace):
106 |                 k = f"{k}.{self.Namespace}"
107 |             ret.add((k, v))
108 | 
109 |         return ret
110 | 


--------------------------------------------------------------------------------
/censeye/gadgets/__init__.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | from pathlib import Path
 3 | 
 4 | from censeye.gadget import Gadget
 5 | 
 6 | 
 7 | def load_gadgets() -> dict[str, Gadget]:
 8 |     gadgets = {}
 9 |     for file in Path(__file__).parent.glob("*.py"):
10 |         if file.stem == "__init__":
11 |             continue
12 |         module = importlib.import_module(f"censeye.gadgets.{file.stem}")
13 |         gadget: Gadget = module.__gadget__
14 |         gadgets[gadget.name] = gadget
15 |     return gadgets
16 | 
17 | 
18 | unarmed_gadgets = load_gadgets()
19 | 


--------------------------------------------------------------------------------
/censeye/gadgets/nobbler.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from censeye.gadget import QueryGeneratorGadget
 4 | 
 5 | 
 6 | class NobblerGadget(QueryGeneratorGadget):
 7 |     """When the service_name is UNKNOWN, it is often more effective to search the first N bytes of the response rather than analyzing the entire response.
 8 | 
 9 |     Many services include a fixed header or a "magic number" at the beginning of their responses, followed by dynamic data at a later offset. This feature generates queries that focus on the initial N bytes of the response at various offsets while using wildcards for the remaining data.
10 | 
11 |     The goal is to make the search more generalizable: analyzing the full UNKNOWN response might only match a specific host, whereas examining just the initial N bytes is likely to match similar services across multiple hosts.
12 | 
13 |     Configuration:
14 |     - iterations: A list of integers specifying the number of bytes to examine at the start of the response.
15 |     - default: [4, 8, 16, 32]
16 |     - services.banner_hex=XXXXXXXX*
17 |     - services.banner_hex=XXXXXXXXXXXXXXXX*
18 |     - services.banner_hex=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*
19 |     - services.banner_hex=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*
20 | 
21 |     "If a nibble is to bits, then a nobble is to bytes." - Aristotle
22 |     """
23 | 
24 |     def __init__(self):
25 |         super().__init__("nobbler", aliases=["nob", "nblr"])
26 | 
27 |         if not self.config.get("iterations"):
28 |             self.config["iterations"] = [4, 8, 16, 32]
29 | 
30 |     def generate_query(self, host: dict) -> Optional[set[tuple[str, str]]]:
31 |         queries = set()
32 |         for service in host.get("services", []):
33 |             if service.get("service_name") == "UNKNOWN":
34 |                 banner_hex = service.get("banner_hex", "")
35 | 
36 |                 for i in self.config["iterations"]:
37 |                     if len(banner_hex) > i:
38 |                         nobbled = banner_hex[:i]
39 |                         queries.add(
40 |                             (
41 |                                 "nobbler",
42 |                                 f"services.banner_hex={nobbled}*",
43 |                             )
44 |                         )
45 |         return queries
46 | 
47 | 
48 | __gadget__ = NobblerGadget()
49 | 


--------------------------------------------------------------------------------
/censeye/gadgets/opendir.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Optional
 3 | 
 4 | from bs4 import BeautifulSoup
 5 | 
 6 | from censeye.gadget import QueryGeneratorGadget
 7 | 
 8 | 
 9 | class OpenDirectoryGadget(QueryGeneratorGadget):
10 |     """When a service is found with an open directory listing, this gadget will attempt to parse out the file names from the HTTP response body and generate queries for each file found.
11 | 
12 |     This is useful for finding additional hosts with the same specific files.
13 | 
14 |     Configuration
15 |      - max_files: The maximum number of files to generate queries for.
16 |        default: 32
17 |      - min_chars: The minimum number of characters a file name must have to be considered.
18 |        default: 2
19 |     """
20 | 
21 |     def __init__(self):
22 |         super().__init__("open-directory", aliases=["odir", "open-dir"])
23 | 
24 |         if not self.config.get("max_files"):
25 |             self.config["max_files"] = 32
26 |         if not self.config.get("min_chars"):
27 |             self.config["min_chars"] = 2
28 | 
29 |     def _valid_file(self, file: str) -> bool:
30 |         # do more filtering as we come across weirdness
31 |         return (
32 |             "?" not in file
33 |             and not file.startswith(".")
34 |             and not file == "/.."
35 |             and not len(file) < self.config["min_chars"]
36 |         )
37 | 
38 |     def _parse_files(self, body: str) -> list[str]:
39 |         parser = BeautifulSoup(body, "html.parser")
40 |         files: list[str] = list()
41 |         for a_tag in parser.find_all("a", href=True):
42 |             href: str = a_tag["href"]
43 |             if self._valid_file(href):
44 |                 files.append(href)
45 |         return files
46 | 
47 |     def generate_query(self, host: dict) -> Optional[set[tuple[str, str]]]:
48 |         queries: set[tuple[str, str]] = set()
49 |         for service in host.get("services", []):
50 |             if "open-dir" not in service.get("labels", []):
51 |                 continue
52 | 
53 |             body = service.get("http", {}).get("response", {}).get("body")
54 |             if not body:
55 |                 continue
56 | 
57 |             files = self._parse_files(body)
58 | 
59 |             for file in files:
60 |                 if len(queries) >= self.config["max_files"]:
61 |                     logging.debug(f"[open-dir] Reached max files for {host['ip']}")
62 |                     break
63 | 
64 |                 queries.add(
65 |                     (
66 |                         "open-directory",
67 |                         f"services:(labels=open-dir and http.response.body='*{file}*')",
68 |                     )
69 |                 )
70 | 
71 |         return queries
72 | 
73 | 
74 | __gadget__ = OpenDirectoryGadget()
75 | 


--------------------------------------------------------------------------------
/censeye/gadgets/threatfox.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from typing import Any
  3 | 
  4 | import backoff
  5 | import requests
  6 | from requests.utils import default_user_agent
  7 | 
  8 | from censeye.gadget import HostLabelerGadget
  9 | 
 10 | 
 11 | def fatal_code(e: requests.exceptions.RequestException) -> bool:
 12 |     assert isinstance(e, requests.exceptions.RequestException)
 13 |     assert e.response is not None
 14 |     assert isinstance(e.response, requests.Response)
 15 |     assert e.response.status_code is not None
 16 |     assert isinstance(e.response.status_code, int)
 17 |     return 400 <= e.response.status_code < 500
 18 | 
 19 | 
 20 | class ThreatFoxClient:
 21 |     """
 22 |     Client for the ThreatFox API.
 23 | 
 24 |     Documentation: https://threatfox.abuse.ch/api/
 25 | 
 26 |     Example usage:
 27 |     >>> from threatfox_censys.threatfox.api import ThreatFoxClient
 28 |     >>> client = ThreatFoxClient(api_key="YOUR_API_KEY")
 29 |     """
 30 | 
 31 |     api_key: str
 32 |     base_url: str
 33 |     timeout: int
 34 | 
 35 |     def __init__(
 36 |         self,
 37 |         api_key: str,
 38 |         base_url: str = "https://threatfox-api.abuse.ch/api/v1/",
 39 |         timeout: int = 30,
 40 |     ) -> None:
 41 |         """
 42 |         Initialize the ThreatFoxClient with the given parameters.
 43 | 
 44 |         :param api_key: API key for threatfox.
 45 |         :param base_url: Base URL for the API (default is their v1 endpoint).
 46 |         :param timeout: Timeout for requests (in seconds).
 47 |         """
 48 |         self.api_key = api_key
 49 |         self.base_url = base_url.rstrip("/")  # Remove trailing slash if it exists
 50 |         self.timeout = timeout
 51 |         self.headers = {
 52 |             "Auth-Key": self.api_key,
 53 |             "Accept": "application/json",
 54 |             "User-Agent": (
 55 |                 f"{default_user_agent()} (Censeye;"
 56 |                 " +https://github.com/Censys-Research/censeye)"
 57 |             ),
 58 |         }
 59 | 
 60 |     @backoff.on_exception(
 61 |         backoff.expo,
 62 |         requests.exceptions.RequestException,
 63 |         max_time=60,
 64 |         giveup=fatal_code,  # type: ignore[arg-type]
 65 |     )
 66 |     def _send_request(
 67 |         self, endpoint: str, method: str = "GET", data: Any = None
 68 |     ) -> dict:
 69 |         """
 70 |         Internal method to send requests to the API.
 71 | 
 72 |         :param endpoint: Endpoint for the API call.
 73 |         :param method: HTTP method (GET or POST).
 74 |         :param data: Dictionary with data to send (only for POST requests).
 75 |         :return: Response from the server.
 76 |         """
 77 |         url = f"{self.base_url}/{endpoint}"
 78 |         if method == "GET":
 79 |             if data:
 80 |                 raise ValueError("GET requests cannot have a data parameter")
 81 |             response = requests.get(
 82 |                 url, headers=self.headers, timeout=self.timeout
 83 |             )  # pragma: no cover
 84 |         elif method == "POST":
 85 |             response = requests.post(
 86 |                 url, headers=self.headers, json=data, timeout=self.timeout
 87 |             )
 88 |         else:
 89 |             raise ValueError("Unsupported HTTP method")
 90 | 
 91 |         # Check for HTTP errors
 92 |         if not response.ok:
 93 |             # Log the error
 94 |             logging.error(
 95 |                 f"Error sending request to {url}. Status code: {response.status_code}."
 96 |             )
 97 |             # Log the data if it exists
 98 |             if data:
 99 |                 logging.error(f"Data: {data}")
100 |             raise requests.HTTPError(response=response)
101 | 
102 |         return response.json()
103 | 
104 |     def get_recent_iocs(self, days: int = 3) -> dict:
105 |         """
106 |         Get recent IOCs on ThreatFox.
107 | 
108 |         :param days: Number of days to look back.
109 |         :return: Response from the server.
110 |         """
111 |         data = {"query": "get_iocs", "days": days}
112 |         response = self._send_request(endpoint="", method="POST", data=data)
113 |         return response
114 | 
115 |     def get_ioc_by_id(self, ioc_id: str) -> dict:
116 |         """
117 |         Get an IOC by its ID.
118 | 
119 |         :param ioc_id: ID of the IOC.
120 |         :return: Response from the server.
121 |         """
122 |         data = {"query": "ioc", "id": ioc_id}
123 |         response = self._send_request(endpoint="", method="POST", data=data)
124 |         return response
125 | 
126 |     def search_iocs(self, search_term: str) -> dict:
127 |         """
128 |         Search for an IOC on ThreatFox.
129 | 
130 |         :param search_term: The IOC you want to search for.
131 |         :return: Response from the server.
132 |         """
133 |         data = {"query": "search_ioc", "search_term": search_term}
134 |         response = self._send_request(endpoint="", method="POST", data=data)
135 |         return response
136 | 
137 | 
138 | class ThreatFoxGadget(HostLabelerGadget):
139 |     """Gadget to label hosts that are present in ThreatFox."""
140 | 
141 |     def __init__(self):
142 |         super().__init__("threatfox", aliases=["tf"])
143 |         self.api_key = self.get_env("THREATFOX_API_KEY")
144 | 
145 |     def label_host(self, host: dict) -> None:
146 |         ip = host["ip"]
147 | 
148 |         client = ThreatFoxClient(api_key=self.api_key)
149 |         cache_file = self.get_cache_file(f"{ip}.json")
150 |         response = self.load_json(cache_file)
151 | 
152 |         # If the cache is empty, get the recent IOCs
153 |         if not response:
154 |             # Get search for IOCs related to the IP
155 |             response = client.search_iocs(ip)
156 |             self.save_json(cache_file, response)
157 | 
158 |         query_status = response.get("query_status", "")
159 |         if query_status != "ok":
160 |             return
161 | 
162 |         iocs = response.get("data", [])
163 | 
164 |         if iocs:
165 |             self.add_label(
166 |                 host,
167 |                 "in-threatfox",
168 |                 style="bold red",
169 |                 link=f"https://threatfox.abuse.ch/browse.php?search=ioc%3A{ip}",
170 |             )
171 | 
172 | 
173 | __gadget__ = ThreatFoxGadget()
174 | 


--------------------------------------------------------------------------------
/censeye/gadgets/vt.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import requests
 4 | 
 5 | from censeye.gadget import HostLabelerGadget
 6 | 
 7 | 
 8 | class VT:
 9 |     def __init__(self, key):
10 |         self.key = key
11 | 
12 |     def fetch_ip(self, ip) -> Optional[dict]:
13 |         url = f"https://www.virustotal.com/api/v3/ip_addresses/{ip}"
14 |         headers = {"accept": "application/json", "x-apikey": self.key}
15 | 
16 |         response = requests.get(url, headers=headers)
17 |         if response.status_code != 200:
18 |             return None
19 |         return response.json()
20 | 
21 | 
22 | class VTGadget(HostLabelerGadget):
23 |     """A simple VirusTotal API client which will label the host if it is found to be malicious.
24 | 
25 |     Configuration:
26 |      - VT_API_KEY: *ENVVAR* VirusTotal API key
27 |     """
28 | 
29 |     def __init__(self):
30 |         super().__init__("virustotal", aliases=["vt"])
31 |         self.api_key = self.get_env("VT_API_KEY")
32 | 
33 |     def is_malicious(self, response: dict):
34 |         # just return true/false based on what other people say
35 |         if response:
36 |             stats = (
37 |                 response.get("data", {})
38 |                 .get("attributes", {})
39 |                 .get("last_analysis_stats", {})
40 |             )
41 |             suspicious = stats.get("suspicious", 0)
42 |             malicious = stats.get("malicious", 0)
43 |             return suspicious > 0 or malicious > 0
44 |         return False
45 | 
46 |     def label_host(self, host: dict) -> None:
47 |         ip = host["ip"]
48 |         vt = VT(self.api_key)
49 |         cache_file = self.get_cache_file(f"{ip}.json")
50 |         response = self.load_json(cache_file)
51 | 
52 |         if not response:
53 |             response = vt.fetch_ip(ip)
54 |             if not response:
55 |                 return None
56 |             self.save_json(cache_file, response)
57 | 
58 |         if self.is_malicious(response):
59 |             self.add_label(
60 |                 host,
61 |                 "in-virustotal",
62 |                 style="bold red",
63 |                 link=f"https://www.virustotal.com/gui/ip-address/{ip}",
64 |             )
65 | 
66 | 
67 | __gadget__ = VTGadget()
68 | 


--------------------------------------------------------------------------------
/config.yaml:
--------------------------------------------------------------------------------
  1 | workers: 2
  2 | max_serv_count: 20     # maximum number of services to proces on a host
  3 | max_search_results: 45  # number of search results to process (if depth > 0)
  4 | min_pivot_weight: 0.0  # only pivot into fields that have a weight greater than this
  5 | 
  6 | rarity:
  7 |   min: 2               # minimum host count for a field to be treated as "interesting"
  8 |   max: 120             # maximum host count for a field to be treated as "interesting"
  9 | 
 10 | gadgets:
 11 |   # The open-directory gadget will parse out files from HTTP open directories and generate
 12 |   # queries to find those file names on other hosts.
 13 |   - gadget: open-directory
 14 |     enabled: false # can be enabled here, or with the cli with `--gadget open-directory`
 15 |     config:
 16 |       max_files: 32 # if the number of files is over this number, then only the first N will be used to generate queries.
 17 |       min_chars: 1 # the minimum length of a filename to be considered for generating queries.
 18 |   # The nobbler gadget will look at UNKNOWN services, take the `services.banner_hex` as an input, and generate banner_hex queries
 19 |   # of different lengths. The idea here is to check for patterns in the banner_hex field that may exist with protocols with headers or
 20 |   # magic numbers where the actual payload may be dynamic.
 21 |   - gadget: nobbler
 22 |     enabled: false # can be enabled here, or with the cli with `--gadget nobbler`
 23 |     config:
 24 |       iterations: [4, 8, 16, 32] # generate wildcard banner_hex queries of these lengths, for example
 25 |                                  # [4] `services.banner_hex=NNNNNNNN*`
 26 |                                  # [8] `services.banner_hex=NNNNNNNNNNNNNNNN*`
 27 |                                  # etc etc..
 28 |   - gadget: virustotal
 29 |     enabled: false
 30 |   - gadget: threatfox
 31 |     enabled: false
 32 | 
 33 | # These are the fields that will be used to determine if a report for that field should be
 34 | # generated and analyzed.
 35 | #
 36 | # The weight is a number from 0 to 1 that is used to determine whether the tool will use that
 37 | # field to pivot into. For example, if you set the --min-pivot-weight to 1.0, and it matches
 38 | # a field with a weight of 0.1, the pivot will be taken.
 39 | fields:
 40 |   - field: services.banner_hex
 41 |     weight: 1.0
 42 |   - field: services.ssh.endpoint_id.raw
 43 |     weight: 0.9
 44 |   - field: services.ssh.server_host_key.fingerprint_sha256
 45 |     weight: 1.0
 46 |   - field: services.http.response.body_hash
 47 |     weight: 1.0
 48 |     ignore:
 49 |       # don't generate reports for any body_hash's with these values
 50 |       - "sha1:4dcf84abb6c414259c1d5aec9a5598eebfcea842"
 51 |       - "sha256:55c801a02ad9a08dfdcf159ba0c8354b37189519ce9a95129941ec6daeca5648"
 52 |       - "sha1:11e71530661013137721d635f95630722eaa6afd"
 53 |       - "sha256:036bacf3bd34365006eac2a78e4520a953a6250e9550dcf9c9d4b0678c225b4c"
 54 |   - field: services.jarm.fingerprint
 55 |     weight: 1.0
 56 |   - field: services.tls.certificates.leaf_data.subject_dn
 57 |     weight: 1.0
 58 |   - field: services.tls.certificates.leaf_data.issuer_dn
 59 |     weight: 1.0
 60 |     ignore:
 61 |       - "C=US, O=DigiCert Inc, CN=DigiCert Global G2 TLS RSA SHA256 2020 CA1"
 62 |   - field: ~services.tls.certificates.leaf_data.issuer.common_name
 63 |     weight: 0.5
 64 |     ignore:
 65 |       - "127.0.0.1"
 66 |   - field: services.tls.certificates.leaf_data.issuer.common_name
 67 |     weight: 1.0
 68 |     ignore:
 69 |       - "DigiCert Global G2 TLS RSA SHA256 2020 CA1"
 70 |   - field: ~services.tls.certificates.leaf_data.subject.organization
 71 |     weight: 0.5
 72 |     ignore:
 73 |       - "Cloudflare, Inc."
 74 |   - field: services.tls.certificates.leaf_data.subject.organization
 75 |     weight: 1.0
 76 |   - field: services.certificate
 77 |     weight: 1.0
 78 |   - field: services.http.response.html_tags
 79 |     weight: 0.9
 80 |     ignore:
 81 |       # don't generate reports for any html_tags with these values
 82 |       - "<title>301 Moved Permanently</title>"
 83 |       - "<title>403 Forbidden</title>"
 84 |       - "<title> 403 Forbidden </title>"
 85 |       - "<title>404 Not Found</title>"
 86 |       - "<title></title>"
 87 |       - "<title>401 - Unauthorized</title>"
 88 |       - "<TITLE>Not Found</TITLE>"
 89 |       - '<meta charset="UTF-8">'
 90 |       - '<meta charset="utf-8">'
 91 |       - "<title>400 The plain HTTP request was sent to HTTPS port</title>"
 92 |       - '<meta charset="UTF-8" />'
 93 |       - '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />'
 94 |       - '<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">'
 95 |       - '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
 96 |       - "<TITLE>ERROR: The request could not be satisfied</TITLE>"
 97 |       - "<title>400 The plain HTTP request was sent to HTTPS port</title>"
 98 |   - field: services.http.response.favicons.md5_hash
 99 |     weight: 0.9
100 |   - field: services.parsed.opc_ua.endpoints.endpoint_url
101 |     weight: 0.5
102 |   - field: services.parsed.opc_ua.endpoints.server.product_uri
103 |     weight: 0.5
104 |   - field: services.parsed.opc_ua.endpoints.server.application_name.text
105 |     weight: 0.5
106 |   - field: ~services.parsed.winrm.ntlm_info.dns_server_name
107 |     weight: 0.1
108 |   - field: services.parsed.winrm.ntlm_info.dns_server_name
109 |     weight: 0.4
110 |   - field: ~services.parsed.winrm.ntlm_info.netbios_computer_name
111 |     weight: 0.5
112 |   - field: services.parsed.winrm.ntlm_info.netbios_computer_name
113 |     weight: 0.8
114 |   - field: services.snmp.oid_system.desc
115 |     weight: 0.5
116 |   - field: services.snmp.oid_system.contact
117 |     weight: 0.5
118 |   - field: services.snmp.oid_system.name
119 |     weight: 0.3
120 |   - field: services.snmp.oid_system.location
121 |     weight: 0.2
122 |   - field: services.snmp.engine.organization
123 |     weight: 0.1
124 |   - field: services.parsed.eip.identity.product_name
125 |     weight: 0.4
126 |   - field: services.parsed.eip.identity.vendor_name
127 |     weight: 0.3
128 |   - field: services.tls.certificates.leaf_data.subject.organizational_unit
129 |     weight: 1.0
130 |   - field: ~services.tls.certificates.leaf_data.subject.email_address
131 |     weight: 0.2
132 |   - field: services.tls.certificates.leaf_data.subject.email_address
133 |     weight: 0.5
134 |   - field: services.telnet.banner
135 |     weight: 1.0
136 |   - field: services.http.response.headers
137 |     weight: 0.8
138 |     ignore:
139 |       # don't generate reports for any HTTP response headers with these key/values
140 |       - "Location": ["*/"]
141 |       - "Vary": ["Accept-Encoding"]
142 |       - "Content-Type":
143 |           - "text/html"
144 |           - "text/html; charset=UTF-8"
145 |           - "text/html;charset=UTF-8"
146 |           - "text/html; charset=utf-8"
147 |       - "Content-type":
148 |           - "text/html"
149 |           - "text/html; charset=UTF-8"
150 |           - "text/html;charset=UTF-8"
151 |           - "text/html; charset=utf-8"
152 |       - "Connection":
153 |           - "close"
154 |           - "keep-alive"
155 |           - "Keep-Alive"
156 |       - "Transfer-Encoding": ["chunked"]
157 |       - "Pragma": ["no-cache"]
158 |       - "Cache-Control": ["no-cache"]
159 |       - "Content-Encoding": ["gzip"]
160 |       - "Date": ["<REDACTED>"]
161 |       - "X-Frame-Options":
162 |           - "SAMEORIGIN"
163 |           - "DENY"
164 |       - "Server":
165 |           - "nginx"
166 |           - "Microsoft-HTTPAPI/2.0"
167 |           - "cloudflare"
168 |       - "Content-Length": ["*"]
169 |       - "Last-Modified": ["*"]
170 |       - "Accept-Ranges": ["bytes"]
171 |   - field: ~services.parsed.l2tp.sccrp.attribute_values.hostname
172 |     weight: 0.2
173 |   - field: services.parsed.l2tp.sccrp.attribute_values.hostname
174 |     weight: 0.5
175 |   - field: services.parsed.l2tp.sccrp.attribute_values.vendor_name
176 |     weight: 0.5
177 |   - field: ~services.vnc.desktop_name
178 |     weight: 0.2
179 |   - field: services.vnc.desktop_name
180 |     weight: 0.5
181 |   - field: services.bacnet.vendor_name
182 |     weight: 0.4
183 |   - field: services.bacnet.application_software_revision
184 |     weight: 0.2
185 |   - field: services.bacnet.object_name
186 |     weight: 0.2
187 |   - field: services.bacnet.model_name
188 |     weight: 0.2
189 |   - field: ~services.bacnet.description
190 |     weight: 0.1
191 |   - field: services.bacnet.description
192 |     weight: 0.2
193 |   - field: ~services.parsed.chromecast.applications.display_name
194 |     weight: 0.0
195 |   - field: services.parsed.chromecast.applications.display_name
196 |     weight: 0.1
197 |   - field: services.cobalt_strike.x86.watermark
198 |     weight: 1.0
199 |   - field: services.cobalt_strike.x86.public_key
200 |     weight: 1.0
201 |   - field: services.cobalt_strike.x86.post_ex.x86
202 |     weight: 0.1
203 |   - field: services.cobalt_strike.x86.post_ex.x64
204 |     weight: 0.1
205 |   - field: services.cobalt_strike.x86.http_post.uri
206 |     weight: 1.0
207 |   - field: services.cobalt_strike.x86.user_agent
208 |     weight: 1.0
209 |   - field: services.cobalt_strike.x64.watermark
210 |     weight: 1.0
211 |   - field: services.cobalt_strike.x64.public_key
212 |     weight: 1.0
213 |   - field: services.cobalt_strike.x64.post_ex.x86
214 |     weight: 0.1
215 |   - field: services.cobalt_strike.x64.post_ex.x64
216 |     weight: 0.1
217 |   - field: services.cobalt_strike.x64.http_post.uri
218 |     weight: 1.0
219 |   - field: services.cobalt_strike.x64.user_agent
220 |     weight: 1.0
221 |   - field: services.cwmp.http_info.favicons.md5_hash
222 |     weight: 0.5
223 |   - field: services.cwmp.http_info.headers
224 |     weight: 0.5
225 |   - field: services.cwmp.http_info.html_tags
226 |     weight: 0.5
227 |   - field: services.parsed.cwmp.server
228 |     weight: 1.0
229 |   - field: ~services.parsed.dhcpdiscover.params.device_info.machine_name
230 |     weight: 0.2
231 |   - field: services.parsed.dhcpdiscover.params.device_info.machine_name
232 |     weight: 0.5
233 |   - field: services.parsed.dhcpdiscover.params.device_info.device_type
234 |     weight: 0.1
235 |   - field: services.parsed.dhcpdiscover.params.device_info.vendor
236 |     weight: 0.1
237 |   - field: services.parsed.dhcpdiscover.params.device_info.version
238 |     weight: 0.1
239 |   - field: services.elasticsearch.system_info.version.number
240 |     weight: 0.1
241 |   - field: services.elasticsearch.system_info.version.lucene_version
242 |     weight: 0.1
243 |   - field: services.elasticsearch.node_info.cluster_combined_info.name
244 |     weight: 0.1
245 |   - field: services.elasticsearch.node_info.cluster_combined_info.indices.docs.count
246 |     weight: 0.1
247 |   - field: services.elasticsearch.node_info.nodes.node_data.host
248 |     weight: 0.3
249 |   - field: services.elasticsearch.node_info.nodes.node_data.name
250 |     weight: 0.1
251 |   - field: services.parsed.etcd.v2.members.id
252 |     weight: 0.1
253 |   - field: services.parsed.etcd.v3.members.peer_urls
254 |     weight: 0.1
255 |   - field: services.parsed.etcd.v3.members.client_urls
256 |     weight: 0.2
257 |   - field: ~services.fox.hostname
258 |     weight: 0.2
259 |   - field: services.fox.hostname
260 |     weight: 0.5
261 |   - field: ~services.fox.station_name
262 |     weight: 0.1
263 |   - field: services.fox.station_name
264 |     weight: 0.3
265 |   - field: services.fox.sys_info
266 |     weight: 0.1
267 |   - field: services.fox.vm_version
268 |     weight: 0.1
269 |   - field: services.fox.os_version
270 |     weight: 0.1
271 |   - field: services.fox.hostId
272 |     weight: 0.1
273 |   - field: services.mms.model
274 |     weight: 0.1
275 |   - field: services.mms.vendor
276 |     weight: 0.1
277 |   - field: services.mongodb.build_info.git_version
278 |     weight: 0.1
279 |   - field: services.mysql.server_version
280 |     weight: 0.1
281 |   - field: services.parsed.nbd.exports.name
282 |     weight: 0.1
283 |   - field: services.parsed.onvif.services.namespace
284 |     weight: 0.1
285 |   - field: services.parsed.onvif.services.xaddr
286 |     weight: 0.1
287 |   - field: services.parsed.onvif.hostname.name
288 |     weight: 0.1
289 |   - field: services.parsed.pcom.model
290 |     weight: 0.3
291 |   - field: services.parsed.pcom.os_build
292 |     weight: 0.1
293 |   - field: services.parsed.pcom.os_version
294 |     weight: 0.1
295 |   - field: ~services.pc_anywhere.name
296 |     weight: 0.5
297 |   - field: services.pc_anywhere.name
298 |     weight: 1.0
299 |   - field: ~services.pptp.hostname
300 |     weight: 0.5
301 |   - field: services.pptp.hostname
302 |     weight: 1.0
303 |   - field: services.parsed.redlion_crimson.model
304 |     weight: 0.1
305 |   - field: services.parsed.rocketmq.topics.topic_list
306 |     weight: 0.5
307 |   - field: services.parsed.rocketmq.version
308 |     weight: 0.1
309 |   - field: services.s7.plant_id
310 |     weight: 0.2
311 |   - field: services.s7.memory_serial_number
312 |     weight: 0.3
313 |   - field: ~services.parsed.scpi.manufacturer
314 |     weight: 0.0
315 |   - field: services.parsed.scpi.manufacturer
316 |     weight: 0.1
317 |   - field: services.parsed.scpi.model
318 |     weight: 0.1
319 |   - field: services.parsed.scpi.firmware
320 |     weight: 0.1
321 |   - field: services.smb.group_name
322 |     weight: 1.0
323 |   - field: services.smb.ntlm
324 |     weight: 0.1
325 |   - field: services.upnp.devices.manufacturer
326 |     weight: 0.1
327 |   - field: ~services.upnp.devices.model_name
328 |     weight: 0.0
329 |   - field: services.upnp.devices.model_name
330 |     weight: 0.1
331 |   - field: services.upnp.devices.serial_number
332 |     weight: 0.1
333 |   - field: services.parsed.zeromq.handshake.socket_type
334 |     weight: 0.0
335 |   - field: services.tls.certificate.parsed.issuer.locality
336 |     weight: 1.0
337 |   # not actually used unless the open-directory gadget is enabled.
338 |   - field: open-directory.gadget.censeye
339 |     weight: 1.0
340 |   # not actually used unless the nobbler gadget is enabled.
341 |   - field: nobbler.gadget.censeye
342 |     weight: 1.0
343 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools >= 70.0"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [tool.black]
 6 | target-version = ["py39"]
 7 | 
 8 | [tool.isort]
 9 | profile = "black"
10 | line_length = 88
11 | multi_line_output = 3
12 | known_first_party = ["censeye"]
13 | known_local_folder = ["censeye"]
14 | sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"]
15 | extend_skip = ["setup.py", "conf.py"]
16 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | appdirs==1.4.4
 2 | backoff==2.2.1
 3 | beautifulsoup4==4.12.3
 4 | censys==2.2.16
 5 | click==8.1.7
 6 | python_dateutil==2.9.0.post0
 7 | PyYAML==6.0.2
 8 | Requests==2.32.3
 9 | rich==13.9.4
10 | setuptools>70.0.0
11 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Note: To use the 'upload' functionality of this file, you must:
  4 | #   $ pipenv install twine --dev
  5 | 
  6 | import os
  7 | import sys
  8 | from shutil import rmtree
  9 | 
 10 | from setuptools import Command, find_packages, setup
 11 | 
 12 | # Package meta-data.
 13 | NAME = "censeye"
 14 | DESCRIPTION = (
 15 |     "This tool is designed to help researchers identify hosts with characteristics"
 16 |     " similar to a given target."
 17 | )
 18 | URL = "https://github.com/Censys-Research/censeye"
 19 | EMAIL = "support@censys.io"
 20 | AUTHOR = "Censys, Inc."
 21 | REQUIRES_PYTHON = ">=3.9.0"
 22 | 
 23 | # Open requirements file
 24 | with open("requirements.txt") as f:
 25 |     REQUIRED = f.read().splitlines()
 26 | 
 27 | here = os.path.abspath(os.path.dirname(__file__))
 28 | 
 29 | # Import the README and use it as the long-description.
 30 | try:
 31 |     with open(os.path.join(here, "README.md"), encoding="utf-8") as f:
 32 |         long_description = "\n" + f.read()
 33 | except FileNotFoundError:
 34 |     long_description = DESCRIPTION
 35 | 
 36 | # Load the package's __version__.py module as a dictionary.
 37 | about = {}
 38 | project_slug = NAME.lower().replace("-", "_").replace(" ", "_")
 39 | with open(os.path.join(here, project_slug, "__version__.py")) as f:
 40 |     exec(f.read(), about)
 41 | 
 42 | 
 43 | class UploadCommand(Command):
 44 |     """Support setup.py upload."""
 45 | 
 46 |     description = "Build and publish the package."
 47 |     user_options = []
 48 | 
 49 |     @staticmethod
 50 |     def status(s):
 51 |         """Prints things in bold."""
 52 |         print(f"\033[1m{s}\033[0m")
 53 | 
 54 |     def initialize_options(self):
 55 |         pass
 56 | 
 57 |     def finalize_options(self):
 58 |         pass
 59 | 
 60 |     def run(self):
 61 |         try:
 62 |             self.status("Removing previous builds…")
 63 |             rmtree(os.path.join(here, "dist"))
 64 |         except OSError:
 65 |             pass
 66 | 
 67 |         self.status("Building Source and Wheel (universal) distribution…")
 68 |         os.system(f"{sys.executable} setup.py sdist bdist_wheel --universal")
 69 | 
 70 |         self.status("Uploading the package to PyPI via Twine…")
 71 |         os.system("twine upload dist/*")
 72 | 
 73 |         self.status("Pushing git tags…")
 74 |         os.system("git tag v{}".format(about["__version__"]))
 75 |         os.system("git push --tags")
 76 | 
 77 |         sys.exit()
 78 | 
 79 | 
 80 | # Where the magic happens:
 81 | setup(
 82 |     name=NAME,
 83 |     version=about["__version__"],
 84 |     description=DESCRIPTION,
 85 |     long_description=long_description,
 86 |     long_description_content_type="text/markdown",
 87 |     author=AUTHOR,
 88 |     author_email=EMAIL,
 89 |     python_requires=REQUIRES_PYTHON,
 90 |     url=URL,
 91 |     packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]),
 92 |     entry_points={
 93 |         "console_scripts": ["censeye=censeye:cli.main"],
 94 |     },
 95 |     install_requires=REQUIRED,
 96 |     extras_require={
 97 |         "dev": ["twine", "black", "isort", "pyupgrade", "flake8", "flake8-bugbear"],
 98 |     },
 99 |     include_package_data=True,
100 |     license="BSD",
101 |     classifiers=[
102 |         "Topic :: Security",
103 |         "Topic :: Utilities",
104 |         "Environment :: Console",
105 |         "Intended Audience :: Information Technology",
106 |         "Intended Audience :: Science/Research",
107 |         "License :: OSI Approved :: BSD License",
108 |         "Programming Language :: Python",
109 |         "Programming Language :: Python :: 3",
110 |         "Programming Language :: Python :: 3.9",
111 |         "Programming Language :: Python :: 3.10",
112 |         "Programming Language :: Python :: 3.11",
113 |     ],
114 |     # $ setup.py publish support.
115 |     cmdclass={
116 |         "upload": UploadCommand,
117 |     },
118 | )
119 | 


--------------------------------------------------------------------------------
/static/2024-11-26_13-19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Censys-Research/censeye/280184680e214b21dc21465eaab17eed140e7369/static/2024-11-26_13-19.png


--------------------------------------------------------------------------------
/static/cert_history.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Censys-Research/censeye/280184680e214b21dc21465eaab17eed140e7369/static/cert_history.png


--------------------------------------------------------------------------------
/static/diag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Censys-Research/censeye/280184680e214b21dc21465eaab17eed140e7369/static/diag.png


--------------------------------------------------------------------------------
/static/gadget_nobbler.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Censys-Research/censeye/280184680e214b21dc21465eaab17eed140e7369/static/gadget_nobbler.png


--------------------------------------------------------------------------------
/static/gadget_open_dir.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Censys-Research/censeye/280184680e214b21dc21465eaab17eed140e7369/static/gadget_open_dir.png


--------------------------------------------------------------------------------
/static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Censys-Research/censeye/280184680e214b21dc21465eaab17eed140e7369/static/logo.png


--------------------------------------------------------------------------------
/static/query_prefix_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Censys-Research/censeye/280184680e214b21dc21465eaab17eed140e7369/static/query_prefix_01.png


--------------------------------------------------------------------------------