├── requirements.txt ├── .gitignore ├── optic-actions.png ├── storm ├── commands │ ├── zw.sinkdb.setup.tagprefix.storm │ ├── zw.sinkdb.setup.apikey.storm │ ├── zw.sinkdb.lookup.storm │ └── zw.sinkdb.import.storm └── modules │ ├── zw.sinkdb.api.storm │ └── zw.sinkdb.privsep.storm ├── LICENSE ├── .github └── workflows │ ├── test.yml │ └── release.yml ├── synapse-sinkdb.yaml ├── README.md └── test_synapse_sinkdb.py /requirements.txt: -------------------------------------------------------------------------------- 1 | synapse 2 | pytest -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .pytest_cache 3 | synapse_sinkdb.json 4 | sinkdb_data.json -------------------------------------------------------------------------------- /optic-actions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/captainGeech42/synapse-sinkdb/HEAD/optic-actions.png -------------------------------------------------------------------------------- /storm/commands/zw.sinkdb.setup.tagprefix.storm: -------------------------------------------------------------------------------- 1 | init { 2 | $privsep = $lib.import(zw.sinkdb.privsep) 3 | 4 | $privsep.setTagPrefix($cmdopts.tagname) 5 | $lib.print("Changed the SinkDB tag prefix to #{p}", p=$privsep.getTagPrefix()) 6 | } -------------------------------------------------------------------------------- /storm/commands/zw.sinkdb.setup.apikey.storm: -------------------------------------------------------------------------------- 1 | init { 2 | $privsep = $lib.import(zw.sinkdb.privsep) 3 | 4 | if ($cmdopts.self) { 5 | $lib.print("Setting SinkDB HTTPS API key for the current user.") 6 | } else { 7 | if (not $lib.user.allowed("zw.sinkdb.admin")) { 8 | $lib.exit("failed to set sinkdb global API key, missing zw.sinkdb.admin perms") 9 | } 10 | $lib.print("Setting SinkDB HTTPS API key for all users.") 11 | } 12 | 13 | $privsep.setApiKey($cmdopts.apikey, $cmdopts.self) 14 | } -------------------------------------------------------------------------------- /storm/commands/zw.sinkdb.lookup.storm: -------------------------------------------------------------------------------- 1 | init { 2 | if $cmdopts.debug { $lib.debug = $lib.true } 3 | 4 | $api = $lib.import(zw.sinkdb.api, debug=$lib.debug) 5 | $privsep = $lib.import(zw.sinkdb.privsep, debug=$lib.debug) 6 | } 7 | 8 | $data = $lib.false 9 | 10 | switch $node.form() { 11 | "inet:fqdn": { $data = $api.lookupFqdn($node.repr(), $cmdopts.asof) } 12 | "inet:email": { $data = $api.lookupEmail($node.repr(), $cmdopts.asof) } 13 | "inet:ipv4": { $data = $api.lookupIpv4($node.repr(), $cmdopts.asof) } 14 | *: { if $lib.debug { $lib.warn("unsupported form received: {f}", f=$node.form()) } } 15 | } 16 | 17 | if ($data) { 18 | for $res in $data { 19 | { | $privsep.modelLookupResponse($node, $res) } 20 | } 21 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Zander Work 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Run tests 2 | 3 | on: 4 | push: 5 | # don't run when a new tag is pushed 6 | # https://github.community/t/dont-run-on-tag-creation/137469/2 7 | tags-ignore: 8 | - "**" 9 | branches: 10 | - "**" 11 | paths: 12 | - "**/*.storm" 13 | - "test_synapse_sinkdb.py" 14 | - "synapse-sinkdb.yaml" 15 | pull_request: 16 | paths: 17 | - "**/*.storm" 18 | - "test_synapse_sinkdb.py" 19 | - "synapse-sinkdb.yaml" 20 | workflow_dispatch: 21 | 22 | jobs: 23 | test: 24 | runs-on: ubuntu-latest 25 | steps: 26 | - name: Checkout code 27 | uses: actions/checkout@master 28 | 29 | - name: Set up Python 3.10 30 | uses: actions/setup-python@v1 31 | with: 32 | python-version: "3.10" 33 | 34 | - name: Install dependencies 35 | run: python -m pip install -r requirements.txt 36 | 37 | - name: Run test suite 38 | run: python -m pytest test_synapse_sinkdb.py 39 | env: 40 | SYNAPSE_SINKDB_APIKEY: ${{ secrets.SYNAPSE_SINKDB_APIKEY }} 41 | SYNAPSE_SINKDB_DATA: ${{ secrets.SYNAPSE_SINKDB_DATA }} -------------------------------------------------------------------------------- /storm/commands/zw.sinkdb.import.storm: -------------------------------------------------------------------------------- 1 | init { 2 | if $cmdopts.debug { $lib.debug = $lib.true } 3 | 4 | $api = $lib.import(zw.sinkdb.api, debug=$lib.debug) 5 | $privsep = $lib.import(zw.sinkdb.privsep, debug=$lib.debug) 6 | 7 | if ($cmdopts.no_awareness and $cmdopts.no_scanners and $cmdopts.no_sinkholes) { 8 | $lib.warn("no categories of sinkdb data enabled for import") 9 | } 10 | 11 | $recordsToModel = ([]) 12 | 13 | if (not $cmdopts.no_awareness) { 14 | if $lib.debug { $lib.print("fetching awareness indicators") } 15 | $data = $api.exportIndicators(awareness, $cmdopts.asof) 16 | if (not $data) { 17 | $lib.warn("failed to get awareness data from sinkdb") 18 | } 19 | $recordsToModel.extend($data) 20 | } 21 | 22 | if (not $cmdopts.no_scanners) { 23 | if $lib.debug { $lib.print("fetching scanner indicators") } 24 | $data = $api.exportIndicators(scanners, $cmdopts.asof) 25 | if (not $data) { 26 | $lib.warn("failed to get scanner data from sinkdb") 27 | } 28 | $recordsToModel.extend($data) 29 | } 30 | 31 | if (not $cmdopts.no_sinkholes) { 32 | if $lib.debug { $lib.print("fetching sinkhole indicators") } 33 | $data = $api.exportIndicators(sinkholes, $cmdopts.asof) 34 | if (not $data) { 35 | $lib.warn("failed to get sinkhole data from sinkdb") 36 | } 37 | $recordsToModel.extend($data) 38 | } 39 | 40 | $lib.print("modeling {d} records from sinkdb", d=$recordsToModel.size()) 41 | divert $cmdopts.yield $privsep.modelExportResponse($recordsToModel) 42 | } -------------------------------------------------------------------------------- /storm/modules/zw.sinkdb.api.storm: -------------------------------------------------------------------------------- 1 | $privsep = $lib.import(zw.sinkdb.privsep, debug=$lib.debug) 2 | 3 | // (MODULE INTERNAL ONLY) 4 | // Make a lookup request to SinkDB 5 | // Args 6 | // - $querystr (str): Query string to append to the POST body (which this function will add the API key to) 7 | // Returns: dict of results or $lib.false on failure 8 | function __makeLookupRequest(query, asof) { 9 | $resp = $privsep.makeSinkdbApiCall($query, $asof) 10 | 11 | if ($resp and $resp.query_status = "ok") { 12 | return($resp.results) 13 | } else { 14 | $lib.warn("got a bad response from SinkDB for query: {q}", q=$query) 15 | return($lib.false) 16 | } 17 | } 18 | 19 | // Make a lookup against SinkDB for an IPv4 address 20 | function lookupIpv4(ip, asof) { 21 | $query = $lib.str.format("ipv4={ip}", ip=$ip) 22 | return($__makeLookupRequest($query, $asof)) 23 | } 24 | 25 | // Make a lookup against SinkDB for a domain 26 | function lookupFqdn(fqdn, asof) { 27 | $lib.print($lib.debug) 28 | $query = $lib.str.format("domain={fqdn}", fqdn=$fqdn) 29 | return($__makeLookupRequest($query, $asof)) 30 | } 31 | 32 | // Make a lookup against SinkDB for an email addr 33 | function lookupEmail(email, asof) { 34 | $query = $lib.str.format("email={email}", email=$email) 35 | return($__makeLookupRequest($query, $asof)) 36 | } 37 | 38 | // Make an export request to SinkDB 39 | // Args 40 | // - category (str): what category of data to export (sinkholes, awareness, or scanners) 41 | // Returns: dict of results or $lib.false on failure 42 | function exportIndicators(category, asof) { 43 | $query = $lib.str.format("export={category}", category=$category) 44 | return($__makeLookupRequest($query, $asof)) 45 | } 46 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Publish new release 2 | 3 | on: 4 | push: 5 | tags: 6 | - "*" 7 | 8 | jobs: 9 | release: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout code 13 | uses: actions/checkout@master 14 | 15 | - name: Set up Python 3.10 16 | uses: actions/setup-python@v1 17 | with: 18 | python-version: "3.10" 19 | 20 | - name: Install dependencies 21 | run: python -m pip install -r requirements.txt 22 | 23 | - name: Build release JSON blob 24 | run: python -m synapse.tools.genpkg --save synapse_sinkdb.json synapse-sinkdb.yaml 25 | 26 | - name: Get tag 27 | id: get_tag 28 | run: echo ::set-output name=SOURCE_TAG::${GITHUB_REF#refs/tags/} 29 | 30 | - name: Create a new draft release 31 | id: create_release 32 | uses: actions/create-release@v1 33 | env: 34 | GITHUB_TOKEN: ${{ secrets.RELEASE_TOKEN }} 35 | with: 36 | tag_name: ${{ steps.get_tag.outputs.SOURCE_TAG }} 37 | release_name: ${{ steps.get_tag.outputs.SOURCE_TAG }} 38 | draft: true 39 | prerelease: false 40 | 41 | - name: Upload package JSON blob to the release 42 | uses: actions/upload-release-asset@v1.0.1 43 | env: 44 | GITHUB_TOKEN: ${{ secrets.RELEASE_TOKEN }} 45 | with: 46 | upload_url: ${{ steps.create_release.outputs.upload_url }} 47 | asset_path: ./synapse_sinkdb.json 48 | asset_name: synapse_sinkdb.json 49 | asset_content_type: application/json 50 | 51 | - name: Publish release 52 | uses: eregon/publish-release@v1 53 | env: 54 | GITHUB_TOKEN: ${{ secrets.RELEASE_TOKEN }} 55 | with: 56 | release_id: ${{ steps.create_release.outputs.id }} -------------------------------------------------------------------------------- /synapse-sinkdb.yaml: -------------------------------------------------------------------------------- 1 | name: zw-sinkdb 2 | version: 1.1.1 3 | synapse_minversion: [2, 117, 0] 4 | desc: The zw-sinkdb package provides commands to ingest and model data from SinkDB (sinkdb.abuse.ch). 5 | 6 | author: 7 | url: https://zanderwork.com/ 8 | name: Zander Work 9 | 10 | genopts: 11 | dotstorm: true 12 | 13 | perms: 14 | - perm: [zw, sinkdb, user] 15 | desc: Allows a user to lookup indicators on SinkDB 16 | gate: cortex 17 | - perm: [zw, sinkdb, admin] 18 | desc: Allows a user to bulk import the listable IOCs on SinkDB, and make global configuration changes 19 | gate: cortex 20 | 21 | modules: 22 | - name: zw.sinkdb.api 23 | - name: zw.sinkdb.privsep 24 | modconf: 25 | source: a9fc8fc6af73f0bf2dda26961f50cfe6 26 | asroot:perms: 27 | - [zw, sinkdb, user] 28 | 29 | commands: 30 | - name: zw.sinkdb.setup.apikey 31 | descr: Set the SinkDB HTTPS API key 32 | perms: 33 | - [zw, sinkdb, user] 34 | cmdargs: 35 | - - apikey 36 | - type: str 37 | help: The SinkDB HTTPS API key 38 | - - --self 39 | - type: bool 40 | default: false 41 | action: store_true 42 | help: Set the key as a user variable. If not used, the key is set globally. 43 | 44 | - name: zw.sinkdb.setup.tagprefix 45 | descr: "Set the SinkDB tag tree prefix (default: `rep.sinkdb`). Note that this won't migrate any existing tags under the previous prefix." 46 | perms: 47 | - [zw, sinkdb, admin] 48 | cmdargs: 49 | - - tagname 50 | - type: str 51 | help: The tag prefix to use 52 | 53 | - name: zw.sinkdb.lookup 54 | descr: Lookup an indicator on SinkDB 55 | perms: 56 | - [zw, sinkdb, user] 57 | cmdargs: 58 | - - --asof 59 | - type: str 60 | default: -30days 61 | help: Specify the maximum age for a cached result. To disable caching, use --asof now. 62 | - - --debug 63 | - type: bool 64 | default: false 65 | action: store_true 66 | help: Show verbose debug output. 67 | cmdinput: 68 | - form: inet:email 69 | help: Lookup an email address 70 | - form: inet:fqdn 71 | help: Lookup a fqdn 72 | - form: inet:ipv4 73 | help: Lookup an IP 74 | 75 | - name: zw.sinkdb.import 76 | descr: Import the listable indicators on SinkDB. By default, imports the sinkhole, phishing awareness, and scanner indicators. 77 | perms: 78 | - [zw, sinkdb, admin] 79 | cmdargs: 80 | - - --asof 81 | - type: str 82 | default: -30days 83 | help: Specify the maximum age for a cached result. To disable caching, use --asof now. 84 | - - --debug 85 | - type: bool 86 | default: false 87 | action: store_true 88 | help: Show verbose debug output. 89 | - - --yield 90 | - type: bool 91 | default: false 92 | action: store_true 93 | help: Yield the newly created nodes. 94 | - - --no-awareness 95 | - type: bool 96 | default: false 97 | action: store_true 98 | help: Don't import the awareness indicators. 99 | - - --no-scanners 100 | - type: bool 101 | default: false 102 | action: store_true 103 | help: Don't import the scanner indicators. 104 | - - --no-sinkholes 105 | - type: bool 106 | default: false 107 | action: store_true 108 | help: Don't import the sinkholes indicators. 109 | 110 | optic: 111 | actions: 112 | - name: SinkDB Lookup 113 | storm: zw.sinkdb.lookup 114 | descr: Enrich a node with SinkDB data 115 | forms: [ inet:ipv4, inet:fqdn, inet:email ] 116 | - name: SinkDB Lookup (no cache) 117 | storm: zw.sinkdb.lookup --asof now 118 | descr: Enrich a node with SinkDB data, ignoring any cached data 119 | forms: [ inet:ipv4, inet:fqdn, inet:email ] -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # synapse-sinkdb 2 | [![Tests](https://github.com/captainGeech42/synapse-sinkdb/actions/workflows/test.yml/badge.svg)](https://github.com/captainGeech42/synapse-sinkdb/actions/workflows/test.yml) [![Release](https://github.com/captainGeech42/synapse-sinkdb/actions/workflows/release.yml/badge.svg)](https://github.com/captainGeech42/synapse-sinkdb/actions/workflows/release.yml) [![GitHub Release](https://img.shields.io/github/release/captainGeech42/synapse-sinkdb.svg?style=flat)](https://github.com/captainGeech42/synapse-sinkdb/releases) 3 | 4 | Synapse Rapid Powerup for [SinkDB](https://sinkdb.abuse.ch/) 5 | 6 | ## Install 7 | 8 | To install the latest release, run the following Storm command 9 | 10 | ``` 11 | storm> pkg.load --raw https://github.com/captainGeech42/synapse-sinkdb/releases/latest/download/synapse_sinkdb.json 12 | ``` 13 | 14 | You can also clone this repo, and install via the telepath API: 15 | 16 | ``` 17 | $ python -m synapse.tools.genpkg --push aha://mycortex synapse-sinkdb.yaml 18 | ``` 19 | 20 | ## Usage 21 | 22 | First, configure your HTTPS API key (globally, or per user with `--self`): 23 | 24 | ``` 25 | storm> zw.sinkdb.setup.apikey 26 | ``` 27 | 28 | Optionally, you can also change the tag prefix (default is `rep.sinkdb`): 29 | 30 | ``` 31 | storm> zw.sinkdb.setup.tagprefix 3p.aka.sinkdb 32 | ``` 33 | 34 | Then, you can lookup IOCs against SinkDB: 35 | 36 | ``` 37 | storm> inet:fqdn=ns1.mysinkhole.lol | zw.sinkdb.lookup 38 | ................ 39 | inet:fqdn=ns1.mysinkhole.lol 40 | :domain = mysinkhole.lol 41 | :host = ns1 42 | :issuffix = False 43 | :iszone = False 44 | :zone = mysinkhole.lol 45 | .created = 2023/02/04 02:11:24.673 46 | #rep.sinkdb.class.listed = (2023/02/04 02:14:02.284, 2023/02/04 02:14:02.285) 47 | #rep.sinkdb.has_operator = (2023/02/04 02:14:02.284, 2023/02/04 02:14:02.285) 48 | #rep.sinkdb.sinkhole = (2021/06/27 19:46:08.000, 2023/02/04 02:14:02.284) 49 | #rep.sinkdb.type.nameserver = (2023/02/04 02:14:02.284, 2023/02/04 02:14:02.285) 50 | #test 51 | complete. 1 nodes in 706 ms (1/sec). 52 | ``` 53 | 54 | You can also bulk import the `listed` indicators from SinkDB: 55 | 56 | ``` 57 | storm> zw.sinkdb.import 58 | modeling 445 records from sinkdb 59 | complete. 0 nodes in 4412 ms (0/sec). 60 | storm> zw.sinkdb.import --yield | count 61 | modeling 445 records from sinkdb 62 | Counted 860 nodes. 63 | complete. 0 nodes in 4813 ms (0/sec). 64 | ``` 65 | 66 | By default, `lookup` and `import` use a 30 day cache window. To override this, use the `--asof` flag. To ignore the cached data, specify `--asof now`. 67 | 68 | For more details, please run `help zw.sinkdb`. 69 | 70 | ### Optic 71 | 72 | If you are an Optic user, there are right-click actions registered for `inet:fqdn`, `inet:email`, and `inet:ipv4` nodes: 73 | 74 | ![optic screenshot of sinkdb enrichment](./optic-actions.png) 75 | 76 | ## Administration 77 | 78 | This package exposes two permissions: 79 | 80 | * `zw.sinkdb.user`: Intended for general analyst use, allows the invocation of `zw.sinkdb.lookup` 81 | * `zw.sinkdb.admin`: Intended for administrative/automation use, allows the invocation of `zw.sinkdb.import` and changing of global configuration items 82 | 83 | This package uses a `meta:source` node with the GUID `a9fc8fc6af73f0bf2dda26961f50cfe6`. All observed nodes are edged with `seen` to the `meta:source`. The created `ps:contact` nodes to track the operators use the type `zw.sinkdb.operator`. 84 | 85 | ## Tag Tree 86 | 87 | By default, this package creates a tag tree under `#rep.sinkdb` (you can change the prefix globally with `zw.sinkdb.setup.tagprefix`): 88 | 89 | * `#rep.sinkdb.sinkhole`: The node is a sinkhole 90 | * `#rep.sinkdb.awareness`: The node is a part of a phishing awareness campaign 91 | * `#rep.sinkdb.scanner`: The node is a scanner 92 | * `#rep.sinkdb.has_operator`: The operator of the entry is made known 93 | * `#rep.sinkdb.expose.vendor`: The sinkhole is exposed to vendors 94 | * `#rep.sinkdb.expose.lea`: The sinkhole is exclusively exposed to law enforcement agencies 95 | * `#rep.sinkdb.class.listed`: The entry is classified as "listed" 96 | * `#rep.sinkdb.class.query`: The entry is classified as "query-only" 97 | * `#rep.sinkdb.type.*`: The type of entry on SinkDB (`ipv4`, `ipv6`, `ipv4_range`, `ipv6_range`, `whois_email`, `domain_soa`, `nameserver`, `web_ipv4`, `web_ipv6`, `sending_ipv4`, `sending_ipv6`, `web_url`, `web_domain`, `email_from`, `email_from_name`, `email_subject`) 98 | * Please note that SinkDB entries with the type `email_from_name` or `email_subject` are modeled as `it:dev:str` nodes, since the `inet:email:message` form doesn't capture them in a standalone manner. These are only modeled when doing `zw.sinkdb.import` for awareness campaigns. 99 | 100 | The time interval on `#rep.sinkdb.sinkhole` reflects the time data exposed by SinkDB (that is, when it was added to SinkDB, through the current time when the entry was observed on SinkDB) 101 | 102 | An additional tag, `#rep.sinkdb.operator`, is applied on `ps:contact` nodes that are created to track the sinkhole operators. 103 | 104 | ## Running the test suite 105 | 106 | You must have a SinkDB HTTPS API key to run the tests. Please put the key in `$SYNAPSE_SINKDB_APIKEY` when running the tests. 107 | 108 | Additionally, you must provide your own entries on SinkDB to seed the test cortex, since the data is TLP:AMBER and can't be stored in the public test code. Test data should be a JSON blob in the below structure. Please be mindful of the `ipv4_range` entries, each IP in the range will be looked up. 109 | 110 | ``` 111 | { 112 | "ipv4": [], 113 | "ipv4_range": [], 114 | "domain_soa": [], 115 | "whois_email": [], 116 | "nameserver": [] 117 | } 118 | ``` 119 | 120 | Make sure you add at least the following indicators (the test suite checks for the combination of tags they provide). They *should* be accessible on any account type, ymmv: 121 | 122 | ``` 123 | https://sinkdb.abuse.ch/sinkholes/indicator/1b26d0e462/ 124 | https://sinkdb.abuse.ch/sinkholes/indicator/d9b85decab/ 125 | https://sinkdb.abuse.ch/sinkholes/indicator/55b492114b/ 126 | https://sinkdb.abuse.ch/sinkholes/indicator/d42a88a939/ 127 | https://sinkdb.abuse.ch/sinkholes/indicator/e3fdeea6a0/ 128 | ``` 129 | 130 | This can be stored on disk and provided as a filepath in `$SYNAPSE_SINKDB_DATA_PATH`, or the data can be stored directly in `$SYNAPSE_SINKDB_DATA`. Optionally, if you can verify SinkDB access to me, I'll send you my test blob to make things easier for you. 131 | 132 | ``` 133 | $ pip install -r requirements.txt 134 | $ SYNAPSE_SINKDB_APIKEY=asdf SYNAPSE_SINKDB_DATA_PATH=sinkdb_data.json python -m pytest test_synapse_sinkdb.py 135 | ``` -------------------------------------------------------------------------------- /test_synapse_sinkdb.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import json 4 | import logging 5 | 6 | import synapse.common as s_common 7 | import synapse.cortex as s_cortex 8 | import synapse.tests.utils as s_test 9 | import synapse.tools.genpkg as s_genpkg 10 | 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | dirname = os.path.dirname(__file__) 15 | pkgproto = s_common.genpath(dirname, "synapse-sinkdb.yaml") 16 | 17 | 18 | def get_api_key() -> str | None: 19 | """Get the SinkDB API key.""" 20 | 21 | return os.getenv("SYNAPSE_SINKDB_APIKEY") 22 | 23 | def get_seed_nodes() -> dict | None: 24 | """Get the SinkDB seed data, either from disk or from an env var directly.""" 25 | 26 | j = {} 27 | 28 | path = os.getenv("SYNAPSE_SINKDB_DATA_PATH") 29 | if path: 30 | with open(path, "r") as f: 31 | logger.warning("got sinkdb data from %s", path) 32 | j = json.loads(f.read()) 33 | else: 34 | data = os.getenv("SYNAPSE_SINKDB_DATA") 35 | if data is None: 36 | logger.error("failed to find sinkdb seed data!") 37 | return None 38 | logger.warning("got sinkdb data from $SYNAPSE_SINKDB_DATA") 39 | j = json.loads(data) 40 | 41 | # make sure the dictionary has the required keys 42 | req_keys = ["ipv4", "ipv4_range", "domain_soa", "whois_email", "nameserver"] 43 | for k in req_keys: 44 | v = j.get(k, None) 45 | if v is None or type(v) is not list or len(v) == 0: 46 | logger.error("invalid structure for sinkdb data, see README.md") 47 | return None 48 | 49 | return j 50 | 51 | class SynapseSinkdbTest(s_test.SynTest): 52 | async def _t_install_pkg(self, core: s_cortex.Cortex): 53 | """Install and configure the Storm package.""" 54 | 55 | # get API key 56 | api_key = get_api_key() 57 | self.assertIsNotNone(api_key, "You must provide an API key in $SYNAPSE_SINKDB_APIKEY to run the test suite") 58 | 59 | # install package 60 | await s_genpkg.main((pkgproto, "--push", f"cell://{core.dirn}")) 61 | 62 | # set the api key 63 | msgs = await core.stormlist("zw.sinkdb.setup.apikey --self $key", opts={"vars": {"key": api_key}}) 64 | self.stormIsInPrint("for the current user", msgs) 65 | 66 | msgs = await core.stormlist("zw.sinkdb.setup.apikey $key", opts={"vars": {"key": api_key}}) 67 | self.stormIsInPrint("for all users", msgs) 68 | 69 | async def _t_seed_cortex(self, core: s_cortex.Cortex): 70 | """Add the SinkDB test nodes to the cortex.""" 71 | 72 | data = get_seed_nodes() 73 | self.assertIsNotNone(data, "You must provide seed data present in SinkDB to run the test suite. See README.md for details") 74 | 75 | self.assertGreater(await core.count("for $v in $vals { [inet:fqdn=$v +#test.domain_soa] }", opts={"vars": {"vals": data["domain_soa"]}}), 0) 76 | self.assertGreater(await core.count("for $v in $vals { [inet:ipv4=$v +#test.ipv4] }", opts={"vars": {"vals": data["ipv4"]}}), 0) 77 | self.assertGreater(await core.count("for $v in $vals { [inet:ipv4=$v +#test.ipv4_range] }", opts={"vars": {"vals": data["ipv4_range"]}}), 0) 78 | self.assertGreater(await core.count("for $v in $vals { [inet:fqdn=$v +#test.nameserver] }", opts={"vars": {"vals": data["nameserver"]}}), 0) 79 | self.assertGreater(await core.count("for $v in $vals { [inet:email=$v +#test.whois_email] }", opts={"vars": {"vals": data["whois_email"]}}), 0) 80 | self.assertGreater(await core.count("#test"), 0) 81 | 82 | async def _t_check_lookup_type(self, core: s_cortex.Cortex, type: str, expected_tags: list[str], prefix = "rep.sinkdb"): 83 | """Validate a type of lookup nodes on SinkDB data modeling.""" 84 | 85 | # get the number of nodes of the category 86 | num_nodes = await core.count(f"#test.{type}") 87 | self.assertGreater(num_nodes, 0) 88 | 89 | # model the sinkdb data 90 | msgs = await core.stormlist(f"#test.{type} | zw.sinkdb.lookup") 91 | self.stormHasNoWarnErr(msgs) 92 | 93 | # make sure each node got at least something from sinkdb 94 | self.assertEqual(await core.count(f"#test.{type} +#{prefix}" + " +{ <(seen)- meta:source:name=sinkdb }"), num_nodes) 95 | 96 | # make sure the main test node got all of the proper tags 97 | tag_str = " ".join([f"+#{prefix}." + x for x in expected_tags]) 98 | self.assertGreater(await core.count(f"#test.{type} {tag_str}"), 0) 99 | 100 | async def test_lookups(self): 101 | self.skipIfNoInternet() 102 | 103 | async with self.getTestCore() as core: 104 | await self._t_install_pkg(core) 105 | await self._t_seed_cortex(core) 106 | 107 | await self._t_check_lookup_type(core, "domain_soa", ["class.listed", "expose.vendor", "has_operator", "sinkhole", "type.domain_soa"]) 108 | await self._t_check_lookup_type(core, "ipv4", ["class.listed", "expose.vendor", "has_operator", "sinkhole", "type.ipv4"]) 109 | await self._t_check_lookup_type(core, "ipv4_range", ["class.listed", "sinkhole", "type.ipv4_range"]) 110 | await self._t_check_lookup_type(core, "nameserver", ["class.query_only", "has_operator", "sinkhole", "type.nameserver"]) 111 | await self._t_check_lookup_type(core, "whois_email", ["class.listed", "has_operator", "sinkhole", "type.domain_soa", "type.whois_email"]) 112 | 113 | msgs = await core.stormlist("[it:dev:str=asdf] | zw.sinkdb.lookup --debug") 114 | self.stormIsInWarn("unsupported form received", msgs) 115 | 116 | async def test_tag_prefix(self): 117 | self.skipIfNoInternet() 118 | 119 | async with self.getTestCore() as core: 120 | await self._t_install_pkg(core) 121 | await self._t_seed_cortex(core) 122 | 123 | await self._t_check_lookup_type(core, "domain_soa", ["class.listed", "expose.vendor", "has_operator", "sinkhole", "type.domain_soa"]) 124 | msgs = await core.stormlist("zw.sinkdb.setup.tagprefix new.asdf") 125 | self.stormIsInPrint("tag prefix to #new.asdf", msgs) 126 | await self._t_check_lookup_type(core, "domain_soa", ["class.listed", "expose.vendor", "has_operator", "sinkhole", "type.domain_soa"], prefix="new.asdf") 127 | 128 | async def test_cache(self): 129 | self.skipIfNoInternet() 130 | 131 | async with self.getTestCore() as core: 132 | await self._t_install_pkg(core) 133 | await self._t_seed_cortex(core) 134 | 135 | msgs = await core.stormlist("#test.nameserver | zw.sinkdb.lookup --debug") 136 | self.stormIsInPrint("wrote http query cache data", msgs) 137 | self.stormHasNoWarnErr(msgs) 138 | 139 | msgs = await core.stormlist("#test.nameserver | zw.sinkdb.lookup --debug") 140 | self.stormIsInPrint("using cached data for http query", msgs) 141 | self.stormHasNoWarnErr(msgs) 142 | 143 | msgs = await core.stormlist("#test.nameserver | zw.sinkdb.lookup --debug --asof now") 144 | self.stormIsInPrint("wrote http query cache data", msgs) 145 | self.stormHasNoWarnErr(msgs) 146 | 147 | async def test_import(self): 148 | self.skipIfNoInternet() 149 | 150 | async with self.getTestCore() as core: 151 | await self._t_install_pkg(core) 152 | 153 | msgs = await core.stormlist("zw.sinkdb.import --debug --no-awareness --no-scanners --no-sinkholes") 154 | self.stormIsInWarn("no categories of sinkdb data enabled for import", msgs) 155 | self.stormNotInPrint("fetching", msgs) 156 | 157 | msgs = await core.stormlist("zw.sinkdb.import --debug --no-awareness --no-scanners") 158 | self.stormIsInPrint("records from sinkdb", msgs) 159 | self.stormIsInPrint("fetching sinkhole indicators", msgs) 160 | self.stormNotInPrint("fetching awareness indicators", msgs) 161 | self.stormNotInPrint("fetching scanner indicators", msgs) 162 | self.stormHasNoWarnErr(msgs) 163 | 164 | msgs = await core.stormlist("zw.sinkdb.import --debug --no-awareness --no-sinkholes") 165 | self.stormIsInPrint("records from sinkdb", msgs) 166 | self.stormNotInPrint("fetching sinkhole indicators", msgs) 167 | self.stormNotInPrint("fetching awareness indicators", msgs) 168 | self.stormIsInPrint("fetching scanner indicators", msgs) 169 | self.stormHasNoWarnErr(msgs) 170 | 171 | msgs = await core.stormlist("zw.sinkdb.import --debug --no-scanners --no-sinkholes") 172 | self.stormIsInPrint("records from sinkdb", msgs) 173 | self.stormNotInPrint("fetching sinkhole indicators", msgs) 174 | self.stormIsInPrint("fetching awareness indicators", msgs) 175 | self.stormNotInPrint("fetching scanner indicators", msgs) 176 | self.stormHasNoWarnErr(msgs) 177 | 178 | msgs = await core.stormlist("zw.sinkdb.import") 179 | self.stormHasNoWarnErr(msgs) 180 | print_str = '\n'.join([m[1].get('mesg') for m in msgs if m[0] == 'print']) 181 | matches = re.findall(r"modeling (\d+) records from sinkdb", print_str) 182 | self.assertEqual(len(matches), 1) 183 | self.assertGreater(int(matches[0]), 300) 184 | 185 | self.assertGreater(await core.count("zw.sinkdb.import --yield"), 700) 186 | 187 | self.assertGreater(await core.count("inet:ipv4 +#rep.sinkdb.type.ipv4_range"), 0) 188 | 189 | self.assertGreater(await core.count("inet:ipv4 +#rep.sinkdb +{<(has)- ps:contact +#rep.sinkdb.operator +:type=zw.sinkdb.operator}"), 0) -------------------------------------------------------------------------------- /storm/modules/zw.sinkdb.privsep.storm: -------------------------------------------------------------------------------- 1 | // Name of the dictionary key used to store the SinkDB HTTPS API key 2 | $apiKeyName = "zw.sinkdb:https-apikey" 3 | 4 | // Name of the dictionary key to store the SinkDB tag prefix 5 | $tagPrefixKeyName = "zw.sinkdb:tag-prefix" 6 | 7 | // Get the SinkDB HTTPS API key 8 | // Args: n/a 9 | // Returns: str (the API key) 10 | function getApiKey() { 11 | // see if the user has their own key set 12 | $apikey = $lib.user.vars.get($apiKeyName) 13 | if ($apikey) { 14 | return($apikey) 15 | } 16 | 17 | // no user key, check for global 18 | $apikey = $lib.globals.get($apiKeyName) 19 | if ($apikey) { 20 | return($apikey) 21 | } 22 | 23 | // no key in either, bail 24 | $lib.exit("SinkDB HTTPS API key is not set. Use zw.sinkdb.setup.apikey to set one.") 25 | } 26 | 27 | // Set the SinkDB HTTPS API key 28 | // Args: 29 | // - key (str): the API key to save 30 | // - user (bool): if true, key will be stored to user vars instead of globals 31 | // Returns: n/a 32 | function setApiKey(key, user) { 33 | if ($user) { 34 | $lib.user.vars.set($apiKeyName, $key) 35 | } else { 36 | $lib.globals.set($apiKeyName, $key) 37 | } 38 | } 39 | 40 | // Get the SinkDB tag prefix 41 | // Args: n/a 42 | // Returns: str (the tag prefix) 43 | function getTagPrefix() { 44 | return($lib.globals.get($tagPrefixKeyName, default=rep.sinkdb)) 45 | } 46 | 47 | // Set the SinkDB tag prefix 48 | // Args: 49 | // - prefix (str): the tag prefix to save 50 | // Returns: n/a 51 | function setTagPrefix(prefix) { 52 | ($ok, $norm) = $lib.trycast(syn:tag:part, $prefix) 53 | if (not $ok) { 54 | $lib.warn("failed to set tag prefix, invalid value: {s}", s=$prefix) 55 | return($lib.null) 56 | } 57 | 58 | $lib.globals.set($tagPrefixKeyName, $prefix) 59 | } 60 | 61 | // Make an API call to SinkDB 62 | // Args 63 | // - $querystr (str): Query string to append to the POST body (which this function will add the API key to) 64 | // Returns: prim (JSON dictionary) or $lib.false 65 | function makeSinkdbApiCall(querystr, asof) { 66 | $cachedData = $lib.jsonstor.cacheget("zw.sinkdb.http", $querystr, $asof) 67 | if ($cachedData) { 68 | if $lib.debug { $lib.print("using cached data for http query: {s}", s=$querystr) } 69 | return($cachedData) 70 | } 71 | 72 | $body = $lib.str.format("api_key={k}&{b}", k=$getApiKey(), b=$querystr) 73 | 74 | $resp = $lib.inet.http.post("https://sinkdb-api.abuse.ch/api/v1/", headers=({"content-type": "application/x-www-form-urlencoded"}), body=$body) 75 | if ($resp.code != 200) { return($lib.false) } 76 | 77 | $j = $resp.json() 78 | $ret = $lib.jsonstor.cacheset("zw.sinkdb.http", $querystr, $j) 79 | if $lib.debug { $lib.print("wrote http query cache data for {s}: {r}", s=$querystr, r=$ret) } 80 | 81 | return($j) 82 | } 83 | 84 | // Get the proper meta:source node for SinkDB 85 | // Args: n/a 86 | // Returns: meta:source node 87 | function getMetaSource() { 88 | [ meta:source=$modconf.source :name="sinkdb" ] 89 | return($node) 90 | } 91 | 92 | // Model the response for a sinkdb entry 93 | // Args: 94 | // - $node: node to model tags on 95 | // - $data: dict of data from SinkDB 96 | // Returns: n/a 97 | function modelLookupResponse(node, data) { 98 | ($ok, $src) = $lib.trycast(syn:tag:part, $data.source) 99 | if (not $ok) { 100 | $lib.warn("failed to model {ioc}, couldn't norm source: {d}", ioc=$data.indicator, d=$data.source) 101 | return() 102 | } 103 | ($ok, $class) = $lib.trycast(syn:tag:part, $data.classification) 104 | if (not $ok) { 105 | $lib.warn("failed to model {ioc}, couldn't norm classification: {d}", ioc=$data.indicator, d=$data.classification) 106 | return() 107 | } 108 | ($ok, $type) = $lib.trycast(syn:tag:part, $data.type) 109 | if (not $ok) { 110 | $lib.warn("failed to model {ioc}, couldn't norm type: {d}", ioc=$data.indicator, d=$data.type) 111 | return() 112 | } 113 | 114 | $ts = $lib.time.parse($data.date_added, "%Y-%m-%d %H:%M:%S UTC") 115 | $now = $lib.time.now() 116 | $srcnode = $getMetaSource() 117 | 118 | $prefix = $getTagPrefix() 119 | 120 | $tags = ([$src, $lib.str.concat(class., $class), $lib.str.concat(type., $type)]) 121 | if ($data.expose_org = 1) { $tags.append(has_operator) } 122 | if ($data.expose_vend = 1) { $tags.append(expose.vendor) } 123 | if ($data.lea_only = 1) { $tags.append(expose.lea) } 124 | 125 | // add tags to the indicator 126 | // TODO: should we be removing things that are no longer true, or allow the analyst to derive the current state based on the timestamps? 127 | yield $node | { for $tag in $tags { 128 | $t = $lib.str.join('.', ($prefix, $tag)) 129 | if ($t.endswith($src)) {[+#$t=($ts,$now)]} 130 | else {[+#$t=$now]} 131 | } } 132 | 133 | // add edge to the meta:source node 134 | [ <(seen)+ { yield $srcnode }] 135 | 136 | if ($data.expose_org = 1 and $data.operator) { 137 | // add a ps:contact node for the operator 138 | $t = $lib.str.concat($prefix, ".operator") 139 | $g = $lib.guid(zw,sinkdb,$data.operator) 140 | [ <(has)+ {[ ps:contact=$g :name=$data.operator :type=zw.sinkdb.operator +#$t ]} ] 141 | { spin | ps:contact=$g [<(seen)+ { yield $srcnode }] } 142 | } 143 | 144 | fini { return() } 145 | } 146 | 147 | // Model an IPv4 range from SinkDB 148 | // Args: 149 | // - d: dict of data from SinkDB 150 | // Returns: n/a 151 | function __modelIpv4Range(d) { 152 | $size = $lib.cast(int, $d.indicator.split("/").1) 153 | if ($size >= 24) { 154 | if $lib.debug { $lib.print("making inet:ipv4 nodes for {i}", i=$d.indicator) } 155 | [inet:ipv4=$d.indicator] 156 | } 157 | 158 | [inet:cidr4=$d.indicator] { $modelLookupResponse($node, $d) } 159 | } 160 | 161 | // Model an IPv6 range from SinkDB 162 | // Args: 163 | // - d: dict of data from SinkDB 164 | // Returns: n/a 165 | function __modelIPv6Range(d) { 166 | // sinkdb doesn't validate that ipv6 ranges don't have host bits set 167 | // remove them if they are 168 | $ioc = $lib.regex.replace("::[0-9a-f]+", "::", $d.indicator) 169 | if ($lib.debug and $ioc != $d.indicator) { $lib.print("removed host bits from {i}", i=$d.indicator) } 170 | 171 | $size = $lib.cast(int, $ioc.split("/").1) 172 | if ($size >= 120) { 173 | if $lib.debug { $lib.print("making inet:ipv6 nodes for {i}", i=$d.indicator) } 174 | [inet:ipv6=$ioc] 175 | } 176 | 177 | [inet:cidr6=$ioc] { $modelLookupResponse($node, $d) } 178 | } 179 | 180 | // Model the exported data from SinkDB. Creates nodes. 181 | // Args: 182 | // - $data: list of dicts of data from SinkDB 183 | // Yields nodes 184 | function modelExportResponse(data) { 185 | init { $count = 0 } 186 | 187 | for $d in $data { 188 | if $lib.debug { $lib.print("modeling {i} (type: {t})", i=$d.indicator, t=$d.type) } 189 | 190 | switch $d.type { 191 | "ipv4": { [inet:ipv4=$d.indicator] { $modelLookupResponse($node, $d) } } 192 | "ipv6": { [inet:ipv6=$d.indicator] { $modelLookupResponse($node, $d) } } 193 | "ipv4_range": { yield $__modelIpv4Range($d) } 194 | "ipv6_range": { yield $__modelIPv6Range($d) } 195 | "domain_soa": { 196 | if ($d.indicator.find("@") != $lib.null) { 197 | [inet:email=$d.indicator] { $modelLookupResponse($node, $d) } 198 | } else { 199 | [inet:fqdn=$d.indicator] { $modelLookupResponse($node, $d) } 200 | } 201 | } 202 | "whois_email": { [inet:email=$d.indicator] { $modelLookupResponse($node, $d) } } 203 | "nameserver": { [inet:fqdn=$d.indicator] { $modelLookupResponse($node, $d) } } 204 | "web_url": { [inet:url=$d.indicator] { $modelLookupResponse($node, $d) } } 205 | "web_domain": { 206 | // they put multiple comma delimited IOCs in here sometimes 207 | for $part in $d.indicator.split(", ") { 208 | // sometimes this has fqdns and IP address, no clue why 209 | try { 210 | [inet:fqdn=$part] { $modelLookupResponse($node, $d) } 211 | } catch BadTypeValu as err { 212 | [inet:ipv4=$part] { $modelLookupResponse($node, $d) } 213 | } 214 | } 215 | } 216 | "email_from": { [inet:email=$d.indicator] { $modelLookupResponse($node, $d) } } 217 | "sending_ipv4": { [inet:ipv4=$d.indicator] { $modelLookupResponse($node, $d) } } 218 | "sending_ipv4_range": { yield $__modelIpv4Range($d) } 219 | "web_ipv4": { [inet:ipv4=$d.indicator] { $modelLookupResponse($node, $d) } } 220 | "sending_ipv6": { [inet:ipv6=$d.indicator] { $modelLookupResponse($node, $d) } } 221 | "sending_ipv6_range": { yield $__modelIPv6Range($d) } 222 | "web_ipv6": { [inet:ipv6=$d.indicator] { $modelLookupResponse($node, $d) } } 223 | "email_subject": { [it:dev:str=$d.indicator] { $modelLookupResponse($node, $d) } } 224 | "email_from_name": { [it:dev:str=$d.indicator] { $modelLookupResponse($node, $d) } } 225 | *: { 226 | $lib.warn("got an unsupported indicator type from sinkdb: {s}", s=$d.type) 227 | continue 228 | } 229 | } 230 | 231 | $count = ($count + 1) 232 | } 233 | 234 | // not foolproof, a big cidr mask will blow the count up, but a good sanity check 235 | fini { 236 | if ($data.size() > $count) { 237 | $lib.warn("tried to model {i} records but only could do {j} nodes", i=$data.size(), j=$count) 238 | } else { 239 | if $lib.debug { $lib.print("modeled {d} nodes", d=$count) } 240 | } 241 | } 242 | } --------------------------------------------------------------------------------