├── .gitattributes ├── .github ├── ISSUE_TEMPLATE.md ├── release.yml └── workflows │ ├── docker-publish-dev.yml │ ├── docker-publish.yml │ ├── feedtest.yml │ ├── ruff.yml │ └── unittests.yml ├── .gitignore ├── .pylintrc ├── .python-version ├── .vscode ├── launch.json └── settings.json ├── LICENSE ├── README.md ├── contrib ├── README.md ├── analytics │ └── ivre_api │ │ ├── README.md │ │ ├── investigation-archlinux.png │ │ ├── ivre_api.py │ │ └── requirements.txt └── feeds │ ├── otx │ ├── config.txt │ ├── otx.py │ └── requirements.txt │ ├── proofpoint │ ├── config.txt │ ├── proofpoint.py │ └── requirements.txt │ └── threattracking │ ├── config.txt │ ├── requirements.txt │ └── threattracking.py ├── core ├── __init__.py ├── clients │ └── file_storage │ │ ├── __init__.py │ │ └── classes │ │ ├── interface.py │ │ ├── local_storage.py │ │ └── s3.py ├── common │ ├── __init__.py │ ├── misp_to_yeti.py │ └── utils.py ├── config │ ├── __init__.py │ └── config.py ├── constants.py ├── database_arango.py ├── errors.py ├── events │ ├── __init__.py │ ├── consumers.py │ ├── message.py │ └── producer.py ├── helpers.py ├── interfaces.py ├── logger.py ├── migrations │ ├── __init__.py │ ├── arangodb.py │ └── migration.py ├── schemas │ ├── __init__.py │ ├── audit.py │ ├── dfiq.py │ ├── entities │ │ ├── __init__.py │ │ ├── attack_pattern.py │ │ ├── campaign.py │ │ ├── company.py │ │ ├── course_of_action.py │ │ ├── identity.py │ │ ├── intrusion_set.py │ │ ├── investigation.py │ │ ├── malware.py │ │ ├── note.py │ │ ├── phone.py │ │ ├── private │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ └── __init__.py │ │ ├── threat_actor.py │ │ ├── tool.py │ │ └── vulnerability.py │ ├── entity.py │ ├── graph.py │ ├── indicator.py │ ├── indicators │ │ ├── __init__.py │ │ ├── forensicartifact.py │ │ ├── private │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ └── __init__.py │ │ ├── query.py │ │ ├── regex.py │ │ ├── sigma.py │ │ ├── suricata.py │ │ └── yara.py │ ├── model.py │ ├── observable.py │ ├── observables │ │ ├── __init__.py │ │ ├── asn.py │ │ ├── auth_secret.py │ │ ├── bic.py │ │ ├── certificate.py │ │ ├── cidr.py │ │ ├── command_line.py │ │ ├── container_image.py │ │ ├── email.py │ │ ├── file.py │ │ ├── generic.py │ │ ├── hostname.py │ │ ├── iban.py │ │ ├── imphash.py │ │ ├── ipv4.py │ │ ├── ipv6.py │ │ ├── ja3.py │ │ ├── jarm.py │ │ ├── mac_address.py │ │ ├── md5.py │ │ ├── mutex.py │ │ ├── named_pipe.py │ │ ├── package.py │ │ ├── path.py │ │ ├── private │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ └── __init__.py │ │ ├── registry_key.py │ │ ├── sha1.py │ │ ├── sha256.py │ │ ├── ssdeep.py │ │ ├── tlsh.py │ │ ├── url.py │ │ ├── user_account.py │ │ ├── user_agent.py │ │ └── wallet.py │ ├── package.py │ ├── rbac.py │ ├── roles.py │ ├── tag.py │ ├── task.py │ ├── template.py │ └── user.py ├── taskmanager.py ├── taskscheduler.py └── web │ ├── __init__.py │ ├── apiv2 │ ├── __init__.py │ ├── audit.py │ ├── auth.py │ ├── bloom.py │ ├── context.py │ ├── dfiq.py │ ├── entities.py │ ├── graph.py │ ├── groups.py │ ├── import_data.py │ ├── indicators.py │ ├── observables.py │ ├── rbac.py │ ├── system.py │ ├── tag.py │ ├── tasks.py │ ├── templates.py │ └── users.py │ ├── frontend │ └── README.md │ └── webapp.py ├── extras ├── docker │ ├── Dockerfile │ ├── README.md │ ├── dev │ │ ├── Dockerfile │ │ └── docker-compose.yaml │ └── docker-entrypoint.sh ├── git │ └── ruff-precommit-check.sh └── v1migrate │ └── README.md ├── plugins ├── __init__.py ├── analytics │ ├── __init__.py │ ├── deprecated │ │ ├── dnsdb.py │ │ ├── domain_tools.py │ │ ├── email_rep.py │ │ ├── expire_tags.py │ │ ├── hash_file.py │ │ ├── malwares.py │ │ ├── onyphe.py │ │ ├── process_hostnames.py │ │ ├── process_ip.py │ │ ├── process_url.py │ │ ├── propagate_blocklist.py │ │ ├── resolve_hostnames.py │ │ ├── tag_logic.py │ │ ├── threatminer.py │ │ ├── urlscanio.py │ │ └── yeti_whois.py │ ├── private │ │ ├── .gitignore │ │ ├── README.md │ │ └── __init__.py │ └── public │ │ ├── __init__.py │ │ ├── censys.py │ │ ├── circl_passive_ssl.py │ │ ├── circl_pdns.py │ │ ├── dockerhub.py │ │ ├── expire_tags.py │ │ ├── github.py │ │ ├── macaddress_io.py │ │ ├── malshare.py │ │ ├── network_whois.py │ │ ├── passive_total.py │ │ ├── random_analytics.py │ │ ├── shodan.py │ │ ├── shodan_api.py │ │ └── virustotal_api.py ├── events │ ├── __init__.py │ ├── hostname_extract.py │ ├── log │ │ ├── __init__.py │ │ └── event_logger_example.py │ ├── private │ │ ├── .gitignore │ │ ├── README.md │ │ └── __init__.py │ └── public │ │ ├── __init__.py │ │ ├── datadog_metrics.py │ │ └── dockerhub.py ├── exports │ └── __init__.py ├── feeds │ ├── __init__.py │ ├── private │ │ ├── .gitignore │ │ ├── README.md │ │ └── __init__.py │ └── public │ │ ├── .pylintrc │ │ ├── __init__.py │ │ ├── abusech_malwarebazaar.py │ │ ├── abuseipdb.py │ │ ├── alienvault_ip_reputation.py │ │ ├── artifacts.py │ │ ├── attack.py │ │ ├── azorult-tracker.py │ │ ├── blocklistde_all.py │ │ ├── blocklistde_apache.py │ │ ├── blocklistde_bots.py │ │ ├── blocklistde_bruteforcelogin.py │ │ ├── blocklistde_ftp.py │ │ ├── blocklistde_imap.py │ │ ├── blocklistde_ircbot.py │ │ ├── blocklistde_mail.py │ │ ├── blocklistde_sip.py │ │ ├── blocklistde_ssh.py │ │ ├── blocklistde_strongips.py │ │ ├── botvrij_domain.py │ │ ├── botvrij_filename.py │ │ ├── botvrij_hostname.py │ │ ├── botvrij_ipdst.py │ │ ├── botvrij_md5.py │ │ ├── botvrij_sha1.py │ │ ├── botvrij_sha256.py │ │ ├── botvrij_url.py │ │ ├── cisa_kev.py │ │ ├── cisco_umbrella_top_domains.py │ │ ├── cruzit.py │ │ ├── dataplane_dnsrd.py │ │ ├── dataplane_dnsrdany.py │ │ ├── dataplane_dnsversion.py │ │ ├── dataplane_proto41.py │ │ ├── dataplane_sipinvite.py │ │ ├── dataplane_sipquery.py │ │ ├── dataplane_sipregistr.py │ │ ├── dataplane_smtpdata.py │ │ ├── dataplane_smtpgreet.py │ │ ├── dataplane_sshclient.py │ │ ├── dataplane_sshpwauth.py │ │ ├── dataplane_telnetlogin.py │ │ ├── dataplane_vnc.py │ │ ├── deprecated │ │ ├── benkowcc.py │ │ ├── cybercrime_atmostracker.py │ │ ├── cybercrime_ponytracker.py │ │ ├── cybercrimeatmtracker.py │ │ ├── cybercrimetracker.py │ │ ├── dynamic_dns.py │ │ ├── ipspamlist.py │ │ ├── malwaremustdiecncs.py │ │ ├── passive_dns.py │ │ ├── tweetlive.py │ │ ├── urlhaus_payloads.py │ │ └── virustotal_apiv3.py │ │ ├── dfiq.py │ │ ├── elastic.py │ │ ├── et_open.py │ │ ├── feodo_tracker_ip_blocklist.py │ │ ├── futex_re.py │ │ ├── hybrid_analysis.py │ │ ├── lolbas.py │ │ ├── malpedia.py │ │ ├── miningpoolstats.py │ │ ├── misp.py │ │ ├── openphish.py │ │ ├── otx_alienvault.py │ │ ├── phishing_database.py │ │ ├── phishtank.py │ │ ├── random.py │ │ ├── rulezskbruteforceblocker.py │ │ ├── signaturebase.py │ │ ├── sslblacklist_fingerprints.py │ │ ├── sslblacklist_ip.py │ │ ├── sslblacklist_ja3.py │ │ ├── threatfox.py │ │ ├── threatview_c2.py │ │ ├── timesketch.py │ │ ├── tor_exit_nodes.py │ │ ├── tranco_top_domains.py │ │ ├── urlhaus.py │ │ ├── viriback_tracker.py │ │ ├── vxvault_url.py │ │ ├── wiz_cloud_threat_landscape.py │ │ ├── yaraforge.py │ │ └── yaraify.py └── inline │ └── __init__.py ├── pyproject.toml ├── tests ├── __init__.py ├── analytics_test.py ├── apiv2 │ ├── __init__.py │ ├── auth.py │ ├── bloom.py │ ├── dfiq.py │ ├── entities.py │ ├── graph.py │ ├── groups.py │ ├── import_data.py │ ├── indicators.py │ ├── observables.py │ ├── rbac.py │ ├── system.py │ ├── tags.py │ ├── tasks.py │ ├── templates.py │ ├── timeline.py │ └── users.py ├── core_tests │ ├── __init__.py │ ├── events.py │ └── tasks.py ├── dfiq_test_data │ ├── DFIQ_Scenario_no_id.yaml │ ├── F1005.yaml │ ├── Q1020.yaml │ ├── Q1020_no_indicators.yaml │ ├── Q1020_no_parents.yaml │ ├── Q1020_uuid_parent.yaml │ ├── Q1020_uuid_scenario_parent.yaml │ ├── S1003.yaml │ └── dfiq_test_data.zip ├── feeds.py ├── helpers.py ├── migration.py ├── misp_test_data │ └── misp_event.json ├── observable_test_data │ └── iocs.txt ├── schemas │ ├── __init__.py │ ├── dfiq.py │ ├── entity.py │ ├── fixture.py │ ├── graph.py │ ├── indicator.py │ ├── observable.py │ ├── package.py │ ├── rbac.py │ ├── tag.py │ ├── user.py │ └── yararule.py └── tor_exit_nodes_test.py ├── uv.lock ├── yeti.conf.sample └── yetictl ├── __init__.py └── cli.py /.gitattributes: -------------------------------------------------------------------------------- 1 | *.sh text eol=lf 2 | *.py text eol=lf 3 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Before opening an issue, please make do a quick search through the current issues. 2 | 3 | ### Description 4 | [Please provide a description of the issue encountered] 5 | 6 | ### Environment 7 | 8 | | Question | Answer 9 | |---------------------------|-------------------- 10 | | Yeti release | 11 | | Yeti-frontend release | 12 | | OS version | Ubuntu 22.04, Windows 11, macOS 13.6.2 13 | | Browser | Chrome 119.0.6045.123 14 | 15 | ### Steps to Reproduce 16 | 17 | [Please explain how to reproduce the issue.] 18 | 19 | #### Expected behavior 20 | [How are you expecting the application to behave?] 21 | 22 | #### Actual behavior 23 | [How is the application behaving? (include any stacktraces, logs, screenshots, etc.)] 24 | -------------------------------------------------------------------------------- /.github/release.yml: -------------------------------------------------------------------------------- 1 | changelog: 2 | categories: 3 | - title: Breaking changes 🧨 4 | labels: 5 | - breaking 6 | - title: Security 🚨 7 | labels: 8 | - security-impact 9 | - title: Enhancements 📈 10 | labels: 11 | - enhancement 12 | - title: Bug fixes 🐛 13 | labels: 14 | - bug 15 | - title: New feeds 16 | labels: 17 | - tasks:feed 18 | - title: New analytics 19 | labels: 20 | - tasks:analytics 21 | - title: Other Changes 22 | labels: 23 | - "*" 24 | -------------------------------------------------------------------------------- /.github/workflows/docker-publish-dev.yml: -------------------------------------------------------------------------------- 1 | name: Publish :dev Docker image 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | push_to_registries: 10 | name: Push :dev Docker image to DockerHub and GHCR 11 | runs-on: ubuntu-latest 12 | permissions: 13 | packages: write 14 | contents: read 15 | steps: 16 | - name: Check out the repo 17 | uses: actions/checkout@v4 18 | 19 | - name: Set up QEMU 20 | uses: docker/setup-qemu-action@v3 21 | 22 | - name: Set up Docker Buildx 23 | uses: docker/setup-buildx-action@v3 24 | 25 | - name: Log in to Docker Hub 26 | uses: docker/login-action@v3 27 | with: 28 | username: ${{ secrets.DOCKER_USERNAME }} 29 | password: ${{ secrets.DOCKER_TOKEN }} 30 | 31 | - name: Log in to the Container registry 32 | uses: docker/login-action@v3 33 | with: 34 | registry: ghcr.io 35 | username: ${{ github.actor }} 36 | password: ${{ secrets.GITHUB_TOKEN }} 37 | 38 | - name: Build and push Docker images 39 | uses: docker/build-push-action@v5 40 | with: 41 | context: . 42 | file: ./extras/docker/Dockerfile 43 | platforms: linux/amd64,linux/arm64 44 | push: true 45 | tags: yetiplatform/yeti:dev,ghcr.io/${{ github.repository }}:dev 46 | -------------------------------------------------------------------------------- /.github/workflows/docker-publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Docker image 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | push_to_registries: 9 | name: Push Docker image to DockerHub and GHCR 10 | runs-on: ubuntu-latest 11 | permissions: 12 | packages: write 13 | contents: read 14 | steps: 15 | - name: Check out the repo 16 | uses: actions/checkout@v4 17 | 18 | - name: Set up QEMU 19 | uses: docker/setup-qemu-action@v3 20 | - name: Set up Docker Buildx 21 | uses: docker/setup-buildx-action@v3 22 | 23 | - name: Log in to Docker Hub 24 | uses: docker/login-action@v3 25 | with: 26 | username: ${{ secrets.DOCKER_USERNAME }} 27 | password: ${{ secrets.DOCKER_TOKEN }} 28 | - name: Log in to the Container registry 29 | uses: docker/login-action@v3 30 | with: 31 | registry: ghcr.io 32 | username: ${{ github.actor }} 33 | password: ${{ secrets.GITHUB_TOKEN }} 34 | 35 | - name: Extract metadata (tags, labels) for Docker 36 | id: meta 37 | uses: docker/metadata-action@v5 38 | with: 39 | images: | 40 | yetiplatform/yeti 41 | ghcr.io/${{ github.repository }} 42 | 43 | - name: Build and push Docker images 44 | uses: docker/build-push-action@v5 45 | with: 46 | context: . 47 | file: ./extras/docker/Dockerfile 48 | platforms: linux/amd64,linux/arm64 49 | push: true 50 | tags: ${{ steps.meta.outputs.tags }},yetiplatform/yeti:latest,ghcr.io/${{ github.repository }}:latest 51 | labels: ${{ steps.meta.outputs.labels }} 52 | -------------------------------------------------------------------------------- /.github/workflows/feedtest.yml: -------------------------------------------------------------------------------- 1 | name: Test feeds 2 | 3 | on: 4 | schedule: 5 | - cron: '0 6 * * 3' # Run every Wednesday at 06:00 UTC 6 | 7 | jobs: 8 | debug: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Debug 12 | run: echo "${{ toJson(github) }}" 13 | testfeeds: 14 | if: ${{ github.event_name == 'schedule' || contains(github.event.pull_request.labels.*.name, 'tasks:feed') }} 15 | runs-on: ubuntu-latest 16 | env: 17 | YETI_REDIS_HOST: localhost 18 | YETI_REDIS_PORT: 6379 19 | YETI_REDIS_DATABASE: 0 20 | YETI_ARANGODB_HOST: localhost 21 | YETI_ARANGODB_PORT: 8529 22 | YETI_ARANGODB_DATABASE: yeti 23 | YETI_ARANGODB_USERNAME: root 24 | YETI_ARANGODB_PASSWORD: "" 25 | YETI_AUTH_SECRET_KEY: SECRET 26 | YETI_AUTH_ALGORITHM: HS256 27 | YETI_AUTH_ACCESS_TOKEN_EXPIRE_MINUTES: 30 28 | YETI_AUTH_BROWSER_TOKEN_EXPIRE_MINUTES: 43200 29 | YETI_AUTH_ENABLED: False 30 | YETI_SYSTEM_PLUGINS_PATH: ./plugins 31 | strategy: 32 | matrix: 33 | os: [ubuntu-latest] 34 | python-version: ["3.10"] 35 | steps: 36 | - uses: actions/checkout@v4 37 | - run: 38 | sudo apt-get update && sudo apt-get install -y python3-pip && sudo pip3 install uv 39 | - name: Set up Python ${{ matrix.python-version }} 40 | uses: actions/setup-python@v4 41 | with: 42 | python-version: ${{ matrix.python-version }} 43 | - name: Install Python dependencies 44 | run: uv sync --all-extras 45 | - name: Copy dummy config file 46 | run: cp yeti.conf.sample yeti.conf 47 | - name: Start redis & arangodb conainers 48 | run: docker compose -f extras/docker/dev/docker-compose.yaml up -d redis arangodb 49 | - name: Start event consumer 50 | run: uv run python -m core.events.consumers events & sleep 5 51 | - name: Run test feeds 52 | run: | 53 | uv run python -m unittest discover -s tests/ -p 'feeds.py' 54 | -------------------------------------------------------------------------------- /.github/workflows/ruff.yml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | on: [pull_request] 4 | 5 | jobs: 6 | lint: 7 | name: Lint using Ruff 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v4 11 | - run: 12 | sudo apt-get update && sudo apt-get install -y python3-pip && sudo pip3 install uv 13 | - uses: actions/setup-python@v4 14 | - name: Install ruff 15 | run: uv tool install ruff 16 | - name: Run ruff lint check 17 | run: ruff check . 18 | - name: Run ruff format check 19 | run: ruff format . --check 20 | -------------------------------------------------------------------------------- /.github/workflows/unittests.yml: -------------------------------------------------------------------------------- 1 | name: Unit tests 2 | 3 | on: [pull_request] 4 | 5 | jobs: 6 | 7 | unittest: 8 | runs-on: ubuntu-latest 9 | env: 10 | YETI_REDIS_HOST: localhost 11 | YETI_REDIS_PORT: 6379 12 | YETI_REDIS_DATABASE: 0 13 | YETI_ARANGODB_HOST: localhost 14 | YETI_ARANGODB_PORT: 8529 15 | YETI_ARANGODB_DATABASE: yeti 16 | YETI_ARANGODB_USERNAME: root 17 | YETI_ARANGODB_PASSWORD: "" 18 | YETI_AUTH_SECRET_KEY: SECRET 19 | YETI_AUTH_ALGORITHM: HS256 20 | YETI_AUTH_ACCESS_TOKEN_EXPIRE_MINUTES: 30 21 | YETI_AUTH_BROWSER_TOKEN_EXPIRE_MINUTES: 43200 22 | YETI_AUTH_ENABLED: False 23 | YETI_SYSTEM_PLUGINS_PATH: ./plugins 24 | YETI_SYSTEM_AUDIT_LOGFILE: /tmp/yeti_audit.log 25 | strategy: 26 | matrix: 27 | os: [ubuntu-latest] 28 | python-version: ["3.10"] 29 | steps: 30 | - uses: actions/checkout@v4 31 | - run: 32 | sudo apt-get update && sudo apt-get install -y python3-pip && sudo pip3 install uv 33 | - name: Set up Python ${{ matrix.python-version }} 34 | uses: actions/setup-python@v4 35 | with: 36 | python-version: ${{ matrix.python-version }} 37 | - name: Install Python dependencies 38 | run: uv sync --group dev 39 | - name: Copy dummy config file 40 | run: cp yeti.conf.sample yeti.conf 41 | - name: Start redis & arangodb conainers 42 | run: docker compose -f extras/docker/dev/docker-compose.yaml up -d redis arangodb 43 | - name: Test with unittest (schemas) 44 | run: uv run python -m unittest discover -s tests/schemas -p '*.py' 45 | - name: Test with unittest (apiv2) 46 | run: uv run python -m unittest discover -s tests/apiv2 -p '*.py' 47 | - name: Test with unittest (core_tests) 48 | run: uv run python -m unittest discover -s tests/core_tests -p '*.py' 49 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | __pycache__ 21 | /local 22 | /include 23 | /share 24 | pip-selfcheck.json 25 | 26 | # Installer logs 27 | pip-log.txt 28 | 29 | # Unit test / coverage reports 30 | .coverage 31 | .tox 32 | nosetests.xml 33 | 34 | # Translations 35 | *.mo 36 | 37 | # Mr Developer 38 | .mr.developer.cfg 39 | .project 40 | .pydevproject 41 | 42 | # MAC OS X stuff 43 | .fuse_* 44 | .DS_Store 45 | .DS_Store? 46 | ._* 47 | .Spotlight-V100 48 | .Trashes 49 | Icon? 50 | ehthumbs.db 51 | Thumbs.db 52 | 53 | # YETI stuff 54 | exports/* 55 | storage/* 56 | celerybeat.pid 57 | yeti.conf 58 | 59 | # Frontend Dependencies 60 | node_modules 61 | 62 | # Vagrant 63 | .vagrant/ 64 | vagrant 65 | 66 | #IDE 67 | .idea 68 | 69 | #Patch 70 | *.patch 71 | #venv 72 | venv* 73 | *extras/docker/dev/data/* 74 | extras/docker/dev/docker-compose.yaml 75 | celerybeat-schedule.db 76 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.10 2 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "Python Debugger: FastAPI", 9 | "type": "debugpy", 10 | "request": "launch", 11 | "module": "uvicorn", 12 | "args": [ 13 | "core.web.webapp:app", 14 | "--reload", 15 | "--host", 16 | "0.0.0.0" 17 | ], 18 | "jinja": true 19 | } 20 | ] 21 | } 22 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.testing.unittestArgs": [ 3 | "-v", 4 | "-s", 5 | "./tests", 6 | "-p", 7 | "*.py" 8 | ], 9 | "python.testing.pytestEnabled": false, 10 | "python.testing.unittestEnabled": true, 11 | "python.analysis.typeCheckingMode": "basic", 12 | "editor.defaultFormatter": "charliermarsh.ruff", 13 | "mypy-type-checker.importStrategy": "fromEnvironment", 14 | "[python]": { 15 | "editor.defaultFormatter": "charliermarsh.ruff" 16 | } 17 | 18 | } 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Yeti Platform 2 | 3 | Yeti aims to bridge the gap between CTI and DFIR practitioners by providing a 4 | Forensics Intelligence platform and pipeline for DFIR teams. It was born out of frustration 5 | of having to answer the question "where have I seen this artifact before?" or 6 | "how do I search for IOCs related to this threat (or all threats?) in my timeline?" 7 | 8 | Documentation links: 9 | 10 | * Main website: https://yeti-platform.io/ 11 | * [Documentation](https://yeti-platform.io/docs) 12 | * [Guides](https://yeti-platform.io/guides) 13 | 14 | ![](https://yeti-platform.io/logo.png) 15 | 16 | ## What is Yeti? 17 | 18 | In a nutshell, Yeti allows you to: 19 | 20 | - Bulk search observables and get a pretty good guess on the nature of the 21 | threat, and how to find it on a system. 22 | - Inversely, focus on a threat and quickly list all TTPs, malware, and related 23 | DFIR artifacts. 24 | - Let CTI analysts focus on adding intelligence rather than worrying about 25 | machine-readable export formats. 26 | - Incorporate your own data sources, analytics, and logic very easily. 27 | 28 | This is done by: 29 | 30 | - Storing technical and tactical CTI (observables, TTPs, campagins, etc.) from 31 | internal or external systems. 32 | - Being a backend for DFIR-related queries: Yara signatures, Sigma rules, DFIQ. 33 | - Providing a web API to automate queries (think incident management platform) 34 | and enrichment (think malware sandbox). 35 | - Export the data in user-defined formats so that they can be ingested by 36 | third-party applications (SIEM, DFIR platforms). 37 | 38 | ## Some screenshots 39 | 40 | ![](https://yeti-platform.io/scattered.png) 41 | 42 | ![](https://yeti-platform.io/attack.png) 43 | 44 | ![](https://yeti-platform.io/vuln.png) 45 | 46 | ![](https://yeti-platform.io/intrusionset.png) 47 | -------------------------------------------------------------------------------- /contrib/analytics/ivre_api/README.md: -------------------------------------------------------------------------------- 1 | # IVRE plugin for YETI # 2 | 3 | This analytics uses IVRE's data. [IVRE](https://ivre.rocks/) is an 4 | open-source network recon framework. 5 | 6 | IVRE's code is available on [GitHub](https://github.com/cea-sec/ivre/) 7 | and its documentation on [Read the docs](https://doc.ivre.rocks/). 8 | 9 | ## Description ## 10 | 11 | Currently, this analytics provides: 12 | 13 | - Estimated geographic location and Autonomous System (AS) of IP 14 | addresses (based on 15 | [MaxMind data](https://dev.maxmind.com/geoip/geoip2/geolite2/)). 16 | 17 | - DNS responses seen: links are created from IP addresses to hostnames 18 | and vice versa, aka your own private Passive DNS service. 19 | 20 | - X509 certificates seen in TLS traffic: links are created: 21 | 22 | - from IP addresses to certificates. 23 | 24 | - from certificates to hostnames and IP addresses (via `Subject` and 25 | `Subject Alternative Names` fields). 26 | 27 | - from certificates to subjects and issuers (as a dedicated 28 | observable type: `CertificateSubject`, via `Subject` and 29 | `Issuer` fields). 30 | 31 | - certificate subjects to (other) certificates (with same issuer 32 | or subject). 33 | 34 | - HTTP headers: links are created from IP addresses to hostnames (and 35 | vice versa) based on `Host:` headers, and from IP addresses to 36 | `User-Agent` and `Server` header values. 37 | 38 | This adds "IVRE - MaxMind" that uses IVRE's API to fetch information 39 | from MaxMind databases, and "IVRE - Passive" that uses IVRE's passive 40 | data to create links with hostnames and certificates. 41 | 42 | ## Example ## 43 | 44 | Here is a graph of a fictitious investigation about an IP address used 45 | by the `archlinux.org` domain, based on data from IVRE: 46 | 47 | ![Investigation graph for archlinux.org](investigation-archlinux.png) 48 | 49 | ## Installation ## 50 | 51 | Using the `virtualenv` you use for YETI (if you do use `virtualenv`), 52 | run (from YETI's source directory): 53 | 54 | ```Bash 55 | ln -s ../../../contrib/analytics/ivre_api/ivre_api.py plugins/analytics/private/ 56 | pip install -r contrib/analytics/ivre_api/requirements.txt 57 | ivre ipdata --download 58 | ``` 59 | 60 | You should be all set! 61 | -------------------------------------------------------------------------------- /contrib/analytics/ivre_api/investigation-archlinux.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/contrib/analytics/ivre_api/investigation-archlinux.png -------------------------------------------------------------------------------- /contrib/analytics/ivre_api/requirements.txt: -------------------------------------------------------------------------------- 1 | ivre==0.9.13 2 | -------------------------------------------------------------------------------- /contrib/feeds/otx/config.txt: -------------------------------------------------------------------------------- 1 | [otx] 2 | 3 | ## 4 | ## Use this setting to subscribe Yeti to OTX pulses. 5 | ## pulses: a comma-separated list of pulse entries you wish to subscribe to 6 | ## key: Your OTX API key 7 | ## 8 | 9 | # pulses = otx_pulse1 10 | # key = OTX_API_KEY 11 | 12 | [otx_pulse1] 13 | 14 | ## 15 | ## For every OTX pulse, you need to specify a pulse ID and a boolean indicating 16 | ## if you want tag observables with tags coming from this pulse. 17 | ## "Y" will tag observables, and any other value won't. 18 | ## 19 | 20 | # pulse_id = PULSE_ID 21 | # use_otx_tags = Y 22 | -------------------------------------------------------------------------------- /contrib/feeds/otx/requirements.txt: -------------------------------------------------------------------------------- 1 | python-dateutil 2 | OTXv2 3 | -------------------------------------------------------------------------------- /contrib/feeds/proofpoint/config.txt: -------------------------------------------------------------------------------- 1 | [proofpoint] 2 | 3 | # The ProofPoint API credentials set must be obtained by an Tenant administrator 4 | api_user = 12345678-1234-1234-1234-1234585686896896 5 | api_password = wiq3890ijwdfwefwe8f9fwehiofwehkefwhiwefiohwefohi... 6 | 7 | # Then Tenant id is the uuid showing in the Web_UI queries 8 | tenant_id = 09872137821783-1234-1234-1234-121239086896896 9 | 10 | # do we want email metadata 11 | import_email_metadata = False 12 | -------------------------------------------------------------------------------- /contrib/feeds/proofpoint/requirements.txt: -------------------------------------------------------------------------------- 1 | hammock 2 | -------------------------------------------------------------------------------- /contrib/feeds/threattracking/config.txt: -------------------------------------------------------------------------------- 1 | [threattracking] 2 | 3 | # https://developers.google.com/sheets/api/quickstart/python 4 | # activate google sheet api in https://console.developers.google.com/apis/api/sheets.googleapis.com/overview?project=api-project-xxxx 5 | # create an API key account https://console.developers.google.com/apis/credentials 6 | 7 | google_api_key = "wefkjlwfklweklfwefhklwefhklwefhkl" 8 | 9 | # The threat tracking spreadsheet key here (http://apt.threattracking.com) 10 | # https://docs.google.com/spreadsheets/u/1/d/1H9_xaxQHpWaa4O_Son4Gx0YOIzlcBWMsdvePFX68EKU/pubhtml 11 | # (this is not a confidential key. It is the public spreadsheet id. keep this unmodified. 12 | sheet_key = "1H9_xaxQHpWaa4O_Son4Gx0YOIzlcBWMsdvePFX68EKU" 13 | -------------------------------------------------------------------------------- /contrib/feeds/threattracking/requirements.txt: -------------------------------------------------------------------------------- 1 | hammock 2 | -------------------------------------------------------------------------------- /core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/core/__init__.py -------------------------------------------------------------------------------- /core/clients/file_storage/__init__.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import inspect 3 | import os 4 | from typing import Type 5 | 6 | from core.clients.file_storage.classes.interface import FileStorageClient 7 | from core.clients.file_storage.classes.local_storage import LocalStorageClient 8 | 9 | ignored_files = ["interface.py", "local_storage.py"] 10 | 11 | 12 | def load_client_classes(): 13 | classes: list[Type[FileStorageClient]] = [] 14 | 15 | class_directory = os.path.join(os.path.dirname(__file__), "classes") 16 | for filename in os.listdir(class_directory): 17 | if filename.endswith(".py") and filename not in ignored_files: 18 | module_name = filename.removesuffix(".py") 19 | 20 | module = importlib.import_module( 21 | f"core.clients.file_storage.classes.{module_name}" 22 | ) 23 | for _, obj in inspect.getmembers(module, inspect.isclass): 24 | if issubclass(obj, FileStorageClient) and obj != FileStorageClient: 25 | classes.append(obj) 26 | 27 | return classes 28 | 29 | 30 | def get_client(path: str) -> FileStorageClient: 31 | for client_class in load_client_classes(): 32 | if path.startswith(client_class.PREFIX): 33 | return client_class(path) 34 | return LocalStorageClient(path) 35 | -------------------------------------------------------------------------------- /core/clients/file_storage/classes/interface.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | 4 | class FileStorageClient(ABC): 5 | PREFIX: str 6 | 7 | @abstractmethod 8 | def __init__(self, path: str): 9 | raise NotImplementedError 10 | 11 | @abstractmethod 12 | def file_path(self, file_name: str) -> str: 13 | raise NotImplementedError 14 | 15 | @abstractmethod 16 | def get_file(self, file_name: str) -> bytes: 17 | raise NotImplementedError 18 | 19 | @abstractmethod 20 | def put_file(self, file_name: str, contents: bytes) -> None: 21 | raise NotImplementedError 22 | 23 | @abstractmethod 24 | def delete_file(self, file_name: str) -> None: 25 | raise NotImplementedError 26 | -------------------------------------------------------------------------------- /core/clients/file_storage/classes/local_storage.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import pathlib 4 | 5 | from core.clients.file_storage.classes.interface import FileStorageClient 6 | 7 | 8 | class LocalStorageClient(FileStorageClient): 9 | PREFIX = "" 10 | 11 | def __init__(self, path: str): 12 | self.path = pathlib.Path(path) 13 | self.path.mkdir(parents=True, exist_ok=True) 14 | 15 | logging.info(f"Initialized local storage client with path {self.path}") 16 | 17 | def _file_path(self, file_name: str) -> pathlib.Path: 18 | return self.path.joinpath(file_name) 19 | 20 | def file_path(self, file_name: str) -> str: 21 | return str(self._file_path(file_name)) 22 | 23 | def get_file(self, file_name: str) -> bytes: 24 | return self._file_path(file_name).read_bytes() 25 | 26 | def put_file(self, file_name: str, contents: bytes) -> None: 27 | self._file_path(file_name).write_bytes(contents) 28 | 29 | def delete_file(self, file_name: str) -> None: 30 | os.remove(self.file_path(file_name)) 31 | -------------------------------------------------------------------------------- /core/clients/file_storage/classes/s3.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | from core.clients.file_storage.classes.interface import FileStorageClient 5 | 6 | try: 7 | import boto3 8 | except ImportError: 9 | boto3 = None 10 | logging.warning( 11 | "boto3 is not imported, if you wish to use s3 file storage please install with `uv sync --group s3`" 12 | ) 13 | 14 | 15 | class S3Client(FileStorageClient): 16 | PREFIX = "s3://" 17 | 18 | def __init__(self, path: str): 19 | if boto3 is None: 20 | logging.warning( 21 | "Attempting to use `S3Client` without `boto3` installed; install with `uv sync --group s3`" 22 | ) 23 | raise ImportError("boto3 is not installed") 24 | 25 | bucket, *prefix = path.removeprefix(self.PREFIX).split("/") 26 | 27 | self.bucket = bucket 28 | self.prefix = "/".join(prefix) 29 | 30 | self.s3 = boto3.client("s3") 31 | 32 | logging.info( 33 | f'Initialized S3 client with bucket "{self.bucket}" and prefix "{self.prefix}"' 34 | ) 35 | 36 | def file_path(self, file_name: str) -> str: 37 | return os.path.join(self.prefix, file_name) 38 | 39 | def get_file(self, file_name: str) -> bytes: 40 | response = self.s3.get_object(Bucket=self.bucket, Key=self.file_path(file_name)) 41 | return response["Body"].read() 42 | 43 | def put_file(self, file_name: str, contents: bytes) -> None: 44 | self.s3.put_object( 45 | Bucket=self.bucket, Key=self.file_path(file_name), Body=contents 46 | ) 47 | 48 | def delete_file(self, file_name: str) -> None: 49 | self.s3.delete_object(Bucket=self.bucket, Key=self.file_path(file_name)) 50 | -------------------------------------------------------------------------------- /core/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/core/common/__init__.py -------------------------------------------------------------------------------- /core/common/utils.py: -------------------------------------------------------------------------------- 1 | # DEPRECATED 2 | import logging 3 | 4 | from dateutil import parser 5 | from dateutil.tz import UTC, gettz 6 | from tldextract import TLDExtract 7 | 8 | from core.config.config import yeti_config 9 | 10 | tzinfos = {"CEST": gettz("Europe/Amsterdam"), "CST": gettz("Europe/Amsterdam")} 11 | 12 | 13 | tld_extract_dict = {"extra_suffixes": list(), "suffix_list_urls": None} 14 | 15 | if hasattr(yeti_config, "tldextract"): 16 | if yeti_config.tldextract.extra_suffixes: 17 | tld_extract_dict["extra_suffixes"] = ( 18 | yeti_config.tldextract.extra_suffixes.split(",") 19 | ) 20 | if yeti_config.tldextract.suffix_list_urls: 21 | tld_extract_dict["suffix_list_urls"] = yeti_config.tldextract.suffix_list_urls 22 | 23 | 24 | def tldextract_parser(url): 25 | parts = None 26 | 27 | try: 28 | parts = TLDExtract(**tld_extract_dict)(url) 29 | except Exception as e: 30 | logging.error(e) 31 | 32 | return parts 33 | 34 | 35 | def parse_date_to_utc(date): 36 | """Parses a string to a datetime object in UTC timezone. 37 | 38 | Args: 39 | date (str): string-timestamp to convert to datetime object. 40 | e.g. 2019-06-09 00:00:00 41 | 42 | Returns: 43 | A datetime.datetime object in UTC timezone. 44 | """ 45 | 46 | return parser.parse(date, tzinfos=tzinfos).astimezone(UTC) 47 | -------------------------------------------------------------------------------- /core/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/core/config/__init__.py -------------------------------------------------------------------------------- /core/constants.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | 3 | YETI_ROOT = path.normpath(path.dirname(path.dirname(path.abspath(__file__)))) 4 | STORAGE_ROOT = path.join(YETI_ROOT, "storage") 5 | PLUGINS_ROOT = path.join(YETI_ROOT, "plugins") 6 | -------------------------------------------------------------------------------- /core/errors.py: -------------------------------------------------------------------------------- 1 | class YetiError(RuntimeError): 2 | def __init__(self, message: str, meta: dict | None = None): 3 | self.meta = meta or {} 4 | super().__init__(message) 5 | 6 | 7 | class ObjectCreationError(YetiError): 8 | pass 9 | -------------------------------------------------------------------------------- /core/events/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/core/events/__init__.py -------------------------------------------------------------------------------- /core/helpers.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import hashlib 3 | import re 4 | 5 | 6 | def refang(url): 7 | def http(match): 8 | return "http{}".format(match.group("real")) 9 | 10 | substitutes = ("me[o0]w", "h..p") 11 | schema_re = re.compile("^(?P{})(?Ps?://)".format("|".join(substitutes))) 12 | domain_re = re.compile(r"(\[\.\]|\[\.|\.\]|,)") 13 | url = schema_re.sub(http, url) 14 | url = domain_re.sub(".", url) 15 | return url 16 | 17 | 18 | def stream_sha256(stream): 19 | sha256 = hashlib.sha256() 20 | 21 | while True: 22 | data = stream.read(4096) 23 | if data: 24 | sha256.update(data) 25 | else: 26 | stream.seek(0, 0) 27 | break 28 | 29 | return sha256.hexdigest() 30 | 31 | 32 | def now(): 33 | return datetime.datetime.now(datetime.timezone.utc) 34 | -------------------------------------------------------------------------------- /core/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/core/migrations/__init__.py -------------------------------------------------------------------------------- /core/migrations/migration.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | 3 | 4 | class MigrationManager: 5 | MIGRATIONS: list[Callable] = [] 6 | 7 | def __init__(self): 8 | self.connect_to_db() 9 | 10 | def connect_to_db(self): 11 | raise NotImplementedError 12 | 13 | def update_db_version(self, version: int): 14 | raise NotImplementedError 15 | 16 | def migrate_to_latest(self, stop_at: int | None = None): 17 | for idx, migration in enumerate(self.MIGRATIONS): 18 | if stop_at is not None and idx >= stop_at: 19 | print(f"Stopping at migration {idx}") 20 | elif idx >= self.db_version and (stop_at is None or idx < stop_at): 21 | print(f"Running migration {idx} -> {idx + 1}") 22 | migration() 23 | self.update_db_version(idx + 1) 24 | else: 25 | print(f"Skipping migration {idx}, current version is {self.db_version}") 26 | continue 27 | 28 | @classmethod 29 | def register_migration(cls, migration): 30 | cls.MIGRATIONS.append(migration) 31 | -------------------------------------------------------------------------------- /core/schemas/entities/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/core/schemas/entities/__init__.py -------------------------------------------------------------------------------- /core/schemas/entities/attack_pattern.py: -------------------------------------------------------------------------------- 1 | from typing import ClassVar, Literal 2 | 3 | from core.schemas import entity 4 | 5 | 6 | class AttackPattern(entity.Entity): 7 | _type_filter: ClassVar[str] = "attack-pattern" 8 | type: Literal["attack-pattern"] = "attack-pattern" 9 | aliases: list[str] = [] 10 | kill_chain_phases: list[str] = [] 11 | -------------------------------------------------------------------------------- /core/schemas/entities/campaign.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from typing import ClassVar, Literal 3 | 4 | from pydantic import Field 5 | 6 | from core.helpers import now 7 | from core.schemas import entity 8 | 9 | 10 | class Campaign(entity.Entity): 11 | _type_filter: ClassVar[str] = "campaign" 12 | type: Literal["campaign"] = "campaign" 13 | 14 | aliases: list[str] = [] 15 | first_seen: datetime.datetime = Field(default_factory=now) 16 | last_seen: datetime.datetime = Field(default_factory=now) 17 | -------------------------------------------------------------------------------- /core/schemas/entities/company.py: -------------------------------------------------------------------------------- 1 | from typing import ClassVar, Literal 2 | 3 | from core.schemas import entity 4 | 5 | 6 | class Company(entity.Entity): 7 | type: Literal["company"] = "company" 8 | _type_filter: ClassVar[str] = "company" 9 | -------------------------------------------------------------------------------- /core/schemas/entities/course_of_action.py: -------------------------------------------------------------------------------- 1 | from typing import ClassVar, Literal 2 | 3 | from core.schemas import entity 4 | 5 | 6 | class CourseOfAction(entity.Entity): 7 | _type_filter: ClassVar[str] = "course-of-action" 8 | type: Literal["course-of-action"] = "course-of-action" 9 | -------------------------------------------------------------------------------- /core/schemas/entities/identity.py: -------------------------------------------------------------------------------- 1 | from typing import ClassVar, Literal 2 | 3 | from core.schemas import entity 4 | 5 | 6 | class Identity(entity.Entity): 7 | _type_filter: ClassVar[str] = "identity" 8 | type: Literal["identity"] = "identity" 9 | 10 | identity_class: str = "" 11 | sectors: list[str] = [] 12 | contact_information: str = "" 13 | -------------------------------------------------------------------------------- /core/schemas/entities/intrusion_set.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from typing import ClassVar, Literal 3 | 4 | from pydantic import Field 5 | 6 | from core.helpers import now 7 | from core.schemas import entity 8 | 9 | 10 | class IntrusionSet(entity.Entity): 11 | _type_filter: ClassVar[str] = "intrusion-set" 12 | type: Literal["intrusion-set"] = "intrusion-set" 13 | 14 | aliases: list[str] = [] 15 | first_seen: datetime.datetime = Field(default_factory=now) 16 | last_seen: datetime.datetime = Field(default_factory=now) 17 | -------------------------------------------------------------------------------- /core/schemas/entities/investigation.py: -------------------------------------------------------------------------------- 1 | from typing import ClassVar, Literal 2 | 3 | from core.schemas import entity 4 | 5 | 6 | class Investigation(entity.Entity): 7 | _type_filter: ClassVar[str] = "investigation" 8 | type: Literal["investigation"] = "investigation" 9 | 10 | reference: str = "" 11 | -------------------------------------------------------------------------------- /core/schemas/entities/malware.py: -------------------------------------------------------------------------------- 1 | from typing import ClassVar, Literal 2 | 3 | from core.schemas import entity 4 | 5 | 6 | class Malware(entity.Entity): 7 | _type_filter: ClassVar[str] = "malware" 8 | type: Literal["malware"] = "malware" 9 | 10 | kill_chain_phases: list[str] = [] 11 | aliases: list[str] = [] 12 | family: str = "" 13 | -------------------------------------------------------------------------------- /core/schemas/entities/note.py: -------------------------------------------------------------------------------- 1 | from typing import ClassVar, Literal 2 | 3 | from core.schemas import entity 4 | 5 | 6 | class Note(entity.Entity): 7 | type: Literal["note"] = "note" 8 | _type_filter: ClassVar[str] = "note" 9 | -------------------------------------------------------------------------------- /core/schemas/entities/phone.py: -------------------------------------------------------------------------------- 1 | from typing import ClassVar, Literal 2 | 3 | from core.schemas import entity 4 | 5 | 6 | class Phone(entity.Entity): 7 | type: Literal["phone"] = "phone" 8 | _type_filter: ClassVar[str] = "phone" 9 | -------------------------------------------------------------------------------- /core/schemas/entities/private/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | !README.md 4 | !__init__.py -------------------------------------------------------------------------------- /core/schemas/entities/private/README.md: -------------------------------------------------------------------------------- 1 | ### Private indicators 2 | 3 | This directory is where you should place your private indicators. It could be 4 | named anything else, but this one has a `.gitignore` so you don't mess things 5 | up. ;-) 6 | 7 | Each entity defined with a filename containing `_` will be then represented in the API and the UI with `-`. For example, if you add a file `super_new_entity.py`, this entity will be defined as `super-new-entity`. -------------------------------------------------------------------------------- /core/schemas/entities/private/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/core/schemas/entities/private/__init__.py -------------------------------------------------------------------------------- /core/schemas/entities/threat_actor.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from typing import ClassVar, Literal 3 | 4 | from pydantic import Field 5 | 6 | from core.helpers import now 7 | from core.schemas import entity 8 | 9 | 10 | class ThreatActor(entity.Entity): 11 | _type_filter: ClassVar[str] = "threat-actor" 12 | type: Literal["threat-actor"] = "threat-actor" 13 | 14 | threat_actor_types: list[str] = [] 15 | aliases: list[str] = [] 16 | first_seen: datetime.datetime = Field(default_factory=now) 17 | last_seen: datetime.datetime = Field(default_factory=now) 18 | -------------------------------------------------------------------------------- /core/schemas/entities/tool.py: -------------------------------------------------------------------------------- 1 | from typing import ClassVar, Literal 2 | 3 | from core.schemas import entity 4 | 5 | 6 | class Tool(entity.Entity): 7 | _type_filter: ClassVar[str] = "tool" 8 | type: Literal["tool"] = "tool" 9 | 10 | aliases: list[str] = [] 11 | kill_chain_phases: list[str] = [] 12 | tool_version: str = "" 13 | -------------------------------------------------------------------------------- /core/schemas/entities/vulnerability.py: -------------------------------------------------------------------------------- 1 | import re 2 | from enum import Enum 3 | from typing import ClassVar, Literal 4 | 5 | from pydantic import Field 6 | 7 | from core.schemas import entity 8 | 9 | vulnerability_matcher = re.compile( 10 | r"(?P
\W?)(?PCVE-\d{4}-\d{4,7})(?P\W?)"
11 | )
12 | 
13 | 
14 | class SeverityType(str, Enum):
15 |     none = "none"
16 |     low = "low"
17 |     medium = "medium"
18 |     high = "high"
19 |     critical = "critical"
20 | 
21 | 
22 | class Vulnerability(entity.Entity):
23 |     """
24 |     This class represents a vulnerability in the schema.
25 | 
26 |     Attributes:
27 |         title: title of the vulnerability.
28 |         base_score : base score of the vulnerability obtained from CVSS metric
29 |                      ranging from 0.0 to 10.0.
30 |         severity: represents the severity of a vulnerability. One of none, low,
31 |                   medium, high, critical.
32 |     """
33 | 
34 |     _type_filter: ClassVar[str] = "vulnerability"
35 |     type: Literal["vulnerability"] = "vulnerability"
36 | 
37 |     title: str = ""
38 |     base_score: float = Field(ge=0.0, le=10.0, default=0.0)
39 |     severity: SeverityType = "none"
40 |     reference: str = ""
41 | 
42 |     @classmethod
43 |     def is_valid(cls, ent: entity.Entity) -> bool:
44 |         if vulnerability_matcher.match(ent.name):
45 |             return True
46 |         return False
47 | 


--------------------------------------------------------------------------------
/core/schemas/indicators/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/core/schemas/indicators/__init__.py


--------------------------------------------------------------------------------
/core/schemas/indicators/private/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 | !README.md
4 | !__init__.py


--------------------------------------------------------------------------------
/core/schemas/indicators/private/README.md:
--------------------------------------------------------------------------------
1 | ### Private entities
2 | This directory is where you should place your private entities. It could be named anything else, but this one has a `.gitignore` so you don't mess things up. ;-)
3 | 


--------------------------------------------------------------------------------
/core/schemas/indicators/private/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/core/schemas/indicators/private/__init__.py


--------------------------------------------------------------------------------
/core/schemas/indicators/query.py:
--------------------------------------------------------------------------------
 1 | from typing import ClassVar, Literal
 2 | 
 3 | from core.schemas import indicator
 4 | 
 5 | 
 6 | class Query(indicator.Indicator):
 7 |     """Represents a query that can be sent to another system."""
 8 | 
 9 |     _type_filter: ClassVar[str] = "query"
10 |     type: Literal["query"] = "query"
11 | 
12 |     query_type: str
13 |     target_systems: list[str] = []
14 | 


--------------------------------------------------------------------------------
/core/schemas/indicators/regex.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from typing import ClassVar, Literal
 3 | 
 4 | from pydantic import BaseModel, PrivateAttr, field_validator
 5 | 
 6 | from core.schemas import indicator
 7 | 
 8 | 
 9 | class RegexMatch(BaseModel):
10 |     name: str
11 |     matched_string: str
12 | 
13 | 
14 | class Regex(indicator.Indicator):
15 |     _type_filter: ClassVar[str] = "regex"
16 |     _compiled_pattern: re.Pattern | None = PrivateAttr(None)
17 |     type: Literal["regex"] = "regex"
18 | 
19 |     @property
20 |     def compiled_pattern(self):
21 |         if not self._compiled_pattern:
22 |             self._compiled_pattern = re.compile(self.pattern)
23 |         return self._compiled_pattern
24 | 
25 |     @field_validator("pattern")
26 |     @classmethod
27 |     def validate_regex(cls, value) -> str:
28 |         try:
29 |             re.compile(value)
30 |         except re.error as error:
31 |             raise ValueError(f"Invalid regex pattern: {error}")
32 |         except OverflowError:
33 |             raise ValueError("Regex pattern is too large")
34 |         return value
35 | 
36 |     def match(self, value: str) -> RegexMatch | None:
37 |         result = self.compiled_pattern.search(value)
38 |         if result:
39 |             return RegexMatch(name=self.name, matched_string=result.group())
40 |         return None
41 | 


--------------------------------------------------------------------------------
/core/schemas/indicators/sigma.py:
--------------------------------------------------------------------------------
 1 | from typing import ClassVar, Literal
 2 | 
 3 | from core.schemas import indicator
 4 | 
 5 | 
 6 | class Sigma(indicator.Indicator):
 7 |     """Represents a Sigma rule.
 8 | 
 9 |     Parsing and matching is yet TODO.
10 |     """
11 | 
12 |     _type_filter: ClassVar[str] = "sigma"
13 |     type: Literal["sigma"] = "sigma"
14 | 


--------------------------------------------------------------------------------
/core/schemas/indicators/suricata.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import ClassVar, List, Literal
 3 | 
 4 | from idstools import rule
 5 | from pydantic import field_validator
 6 | 
 7 | from core.schemas import indicator
 8 | 
 9 | 
10 | class Suricata(indicator.Indicator):
11 |     """Represents a Suricata rule.
12 | 
13 |     Parsing and matching is yet TODO.
14 |     """
15 | 
16 |     _type_filter: ClassVar[str] = "suricata"
17 |     type: Literal["suricata"] = "suricata"
18 |     sid: int = 0
19 |     metadata: List[str] = []
20 |     references: List[str] = []
21 | 
22 |     @field_validator("pattern")
23 |     @classmethod
24 |     def validate_rules(cls, value) -> str:
25 |         try:
26 |             rule.parse(value)
27 |         except Exception as e:
28 |             raise ValueError(f"invalid {cls.pattern} {e}")
29 |         return value
30 | 
31 |     def parse(self) -> rule.Rule | None:
32 |         try:
33 |             return rule.parse(self.pattern)
34 |         except Exception as e:
35 |             logging.error(f" Error parsing {self.pattern} {e}")
36 | 


--------------------------------------------------------------------------------
/core/schemas/observables/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/core/schemas/observables/asn.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | 
 3 | from core.schemas import observable
 4 | 
 5 | 
 6 | class ASN(observable.Observable):
 7 |     type: Literal["asn"] = "asn"
 8 |     country: str | None = None
 9 |     description: str | None = None
10 | 


--------------------------------------------------------------------------------
/core/schemas/observables/auth_secret.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | 
 3 | from core.schemas import observable
 4 | 
 5 | 
 6 | class AuthSecret(observable.Observable):
 7 |     """
 8 |     An authentication secret, such as a private key, public key, or certificate.
 9 |     """
10 | 
11 |     type: Literal["auth_secret"] = "auth_secret"
12 |     auth_type: str = ""  # can be pubkey, privkey, cert, ...
13 |     name: str = ""  # keypair name as found in aws key pairs
14 | 


--------------------------------------------------------------------------------
/core/schemas/observables/bic.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from typing import Literal
 3 | 
 4 | from pydantic import field_validator
 5 | 
 6 | from core.schemas import observable
 7 | 
 8 | BIC_MATCHER_REGEX = re.compile("^[A-Z]{6}[A-Z0-9]{2}[A-Z0-9]{3}?")
 9 | 
10 | 
11 | class BIC(observable.Observable):
12 |     type: Literal["bic"] = "bic"
13 | 
14 |     @classmethod
15 |     def validator(cls, value: str) -> bool:
16 |         if BIC_MATCHER_REGEX.match(value):
17 |             return True
18 |         else:
19 |             return False
20 | 


--------------------------------------------------------------------------------
/core/schemas/observables/certificate.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import hashlib
 3 | from typing import Literal
 4 | 
 5 | from pydantic import Field
 6 | 
 7 | from core.helpers import now
 8 | from core.schemas import observable
 9 | 
10 | 
11 | class Certificate(observable.Observable):
12 |     """This is the schema for the Certificate observable type.
13 | 
14 |     Attributes:
15 |         last_seen: the last time the certificate was seen.
16 |         first_seen: the first time the certificate was seen.
17 |         issuer: the issuer of the certificate.
18 |         subject: the certificate subject.
19 |         serial_number: the certificate serial.
20 |         after: the date after which the certificate is valid.
21 |         before: the date before which the certificate is valid.
22 |         fingerprint: the certificate fingerprint.
23 |     """
24 | 
25 |     type: Literal["certificate"] = "certificate"
26 |     last_seen: datetime.datetime = Field(default_factory=now)
27 |     first_seen: datetime.datetime = Field(default_factory=now)
28 |     issuer: str | None = None
29 |     subject: str | None = None
30 |     serial_number: str | None = None
31 |     after: datetime.datetime | None = None
32 |     before: datetime.datetime | None = None
33 |     fingerprint: str | None = None
34 | 
35 |     @classmethod
36 |     def from_data(cls, data: bytes):
37 |         hash_256 = hashlib.sha256(data).hexdigest()
38 |         return cls(value=f"CERT:{hash_256}")
39 | 


--------------------------------------------------------------------------------
/core/schemas/observables/cidr.py:
--------------------------------------------------------------------------------
1 | from typing import Literal
2 | 
3 | from core.schemas import observable
4 | 
5 | 
6 | class CIDR(observable.Observable):
7 |     type: Literal["cidr"] = "cidr"
8 | 


--------------------------------------------------------------------------------
/core/schemas/observables/command_line.py:
--------------------------------------------------------------------------------
1 | from typing import Literal
2 | 
3 | from core.schemas import observable
4 | 
5 | 
6 | class CommandLine(observable.Observable):
7 |     type: Literal["command_line"] = "command_line"
8 | 


--------------------------------------------------------------------------------
/core/schemas/observables/container_image.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | 
 3 | from core.schemas import observable
 4 | 
 5 | 
 6 | class ContainerImage(observable.Observable):
 7 |     type: Literal["container_image"] = "container_image"
 8 |     registry: str = "docker.io"
 9 | 
10 | 
11 | class DockerImage(ContainerImage):
12 |     type: Literal["docker_image"] = "docker_image"
13 | 


--------------------------------------------------------------------------------
/core/schemas/observables/email.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | 
 3 | import validators
 4 | from pydantic import field_validator
 5 | 
 6 | from core.schemas import observable
 7 | 
 8 | 
 9 | class Email(observable.Observable):
10 |     type: Literal["email"] = "email"
11 | 
12 |     @field_validator("value", mode="before")
13 |     def refang(cls, v) -> str:
14 |         return observable.refang(v)
15 | 
16 |     @classmethod
17 |     def validator(cls, value: str) -> bool:
18 |         return validators.email(value) or False
19 | 


--------------------------------------------------------------------------------
/core/schemas/observables/file.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | 
 3 | from core.schemas import observable
 4 | 
 5 | 
 6 | class File(observable.Observable):
 7 |     """Represents a file.
 8 | 
 9 |     One of sha256, md5, or sha1 should be provided.
10 |     Value should to be in the form FILE:.
11 |     """
12 | 
13 |     type: Literal["file"] = "file"
14 |     name: str | None = None
15 |     size: int | None = None
16 |     sha256: str | None = None
17 |     md5: str | None = None
18 |     sha1: str | None = None
19 |     mime_type: str | None = None
20 | 


--------------------------------------------------------------------------------
/core/schemas/observables/generic.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | 
 3 | from core.schemas import observable
 4 | 
 5 | 
 6 | class Generic(observable.Observable):
 7 |     """Use this type of Observable for any type of observable that doesn't fit into any other category."""
 8 | 
 9 |     type: Literal["generic"] = "generic"
10 | 


--------------------------------------------------------------------------------
/core/schemas/observables/hostname.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | 
 3 | import validators
 4 | from pydantic import field_validator
 5 | 
 6 | from core.schemas import observable
 7 | 
 8 | 
 9 | class Hostname(observable.Observable):
10 |     type: Literal["hostname"] = "hostname"
11 | 
12 |     @field_validator("value", mode="before")
13 |     def refang(cls, v) -> str:
14 |         return observable.refang(v)
15 | 
16 |     @classmethod
17 |     def validator(cls, value: str) -> bool:
18 |         # Replace underscores with hyphens in the domain
19 |         # https://stackoverflow.com/a/14622263
20 |         value = value.replace("_", "-")
21 |         return validators.domain(value) or False
22 | 


--------------------------------------------------------------------------------
/core/schemas/observables/iban.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | 
 3 | import validators
 4 | from pydantic import field_validator
 5 | 
 6 | from core.schemas import observable
 7 | 
 8 | 
 9 | class IBAN(observable.Observable):
10 |     type: Literal["iban"] = "iban"
11 | 
12 |     @classmethod
13 |     def validator(cls, value: str) -> bool:
14 |         return validators.iban(value) or False
15 | 


--------------------------------------------------------------------------------
/core/schemas/observables/imphash.py:
--------------------------------------------------------------------------------
1 | from typing import Literal
2 | 
3 | from core.schemas import observable
4 | 
5 | 
6 | class Imphash(observable.Observable):
7 |     type: Literal["imphash"] = "imphash"
8 | 


--------------------------------------------------------------------------------
/core/schemas/observables/ipv4.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | 
 3 | import validators
 4 | from pydantic import field_validator
 5 | 
 6 | from core.schemas import observable
 7 | 
 8 | 
 9 | class IPv4(observable.Observable):
10 |     type: Literal["ipv4"] = "ipv4"
11 | 
12 |     @field_validator("value", mode="before")
13 |     def refang(cls, v) -> str:
14 |         return observable.refang(v)
15 | 
16 |     @classmethod
17 |     def validator(cls, value: str) -> bool:
18 |         return validators.ipv4(value) or False
19 | 


--------------------------------------------------------------------------------
/core/schemas/observables/ipv6.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | 
 3 | import validators
 4 | from pydantic import field_validator
 5 | 
 6 | from core.schemas import observable
 7 | 
 8 | 
 9 | class IPv6(observable.Observable):
10 |     type: Literal["ipv6"] = "ipv6"
11 | 
12 |     @classmethod
13 |     def validator(cls, value: str) -> bool:
14 |         return validators.ipv6(value) or False
15 | 


--------------------------------------------------------------------------------
/core/schemas/observables/ja3.py:
--------------------------------------------------------------------------------
1 | from typing import Literal
2 | 
3 | from core.schemas import observable
4 | 
5 | 
6 | class JA3(observable.Observable):
7 |     type: Literal["ja3"] = "ja3"
8 | 


--------------------------------------------------------------------------------
/core/schemas/observables/jarm.py:
--------------------------------------------------------------------------------
1 | from typing import Literal
2 | 
3 | from core.schemas import observable
4 | 
5 | 
6 | class JARM(observable.Observable):
7 |     type: Literal["jarm"] = "jarm"
8 | 


--------------------------------------------------------------------------------
/core/schemas/observables/mac_address.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | 
 3 | import validators
 4 | from pydantic import field_validator
 5 | 
 6 | from core.schemas import observable
 7 | 
 8 | 
 9 | class MacAddress(observable.Observable):
10 |     type: Literal["mac_address"] = "mac_address"
11 | 
12 |     @classmethod
13 |     def validator(cls, value: str) -> bool:
14 |         return validators.mac_address(value) or False
15 | 


--------------------------------------------------------------------------------
/core/schemas/observables/md5.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | 
 3 | import validators
 4 | from pydantic import field_validator
 5 | 
 6 | from core.schemas import observable
 7 | 
 8 | 
 9 | class MD5(observable.Observable):
10 |     type: Literal["md5"] = "md5"
11 | 
12 |     @classmethod
13 |     def validator(cls, value: str) -> bool:
14 |         return validators.md5(value) or False
15 | 


--------------------------------------------------------------------------------
/core/schemas/observables/mutex.py:
--------------------------------------------------------------------------------
1 | from typing import Literal
2 | 
3 | from core.schemas import observable
4 | 
5 | 
6 | class Mutex(observable.Observable):
7 |     type: Literal["mutex"] = "mutex"
8 | 


--------------------------------------------------------------------------------
/core/schemas/observables/named_pipe.py:
--------------------------------------------------------------------------------
1 | from typing import Literal
2 | 
3 | from core.schemas import observable
4 | 
5 | 
6 | class NamedPipe(observable.Observable):
7 |     type: Literal["named_pipe"] = "named_pipe"
8 | 


--------------------------------------------------------------------------------
/core/schemas/observables/package.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | 
 3 | from core.schemas import observable
 4 | 
 5 | 
 6 | class Package(observable.Observable):
 7 |     type: Literal["package"] = "package"
 8 |     version: str = None
 9 |     regitry_type: str = None
10 | 


--------------------------------------------------------------------------------
/core/schemas/observables/path.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from typing import Literal
 3 | 
 4 | from pydantic import field_validator
 5 | 
 6 | from core.schemas import observable
 7 | 
 8 | # Regex generated with https://chatgpt.com/share/6720b845-1cb8-8006-9005-1837e2654525
 9 | 
10 | LINUX_PATH_REGEX = re.compile(
11 |     r"""
12 | ^
13 | (
14 |     # Absolute path (e.g., /usr/local/bin/file)
15 |     /(?:[^/\0]+/)+[^/\0]* |
16 |     
17 |     # Home directory path (e.g., ~/Documents/file)
18 |     ~(?:/[^/\0]+)+ |
19 |     
20 |     # Relative path (e.g., bin/file or ../folder/file)
21 |     (?:\./|\.\./|[^/\0]+/)+[^/\0]*
22 | )
23 | $
24 | """,
25 |     re.VERBOSE,
26 | )
27 | 
28 | WINDOWS_PATH_REGEX = re.compile(
29 |     r"""
30 | ^
31 | (
32 |     # Drive letter path (e.g., C:\path\to\file)
33 |     [a-zA-Z]:[\\/](?:[^<>:"|?*\\/\r\n]+[\\/])+[^<>:"|?*\\/\r\n]* |
34 |     
35 |     # UNC path (e.g., \\server\share\path\to\file)
36 |     \\\\[a-zA-Z0-9._-]+\\[a-zA-Z0-9$_.-]+(?:\\[^<>:"|?*\\/\r\n]+)+ |
37 |     
38 |     # Relative path (e.g., folder\file or ..\folder\file)
39 |     (?:\.\.?(?:[\\/]|$))+[\\/](?:[^<>:"|?*\\/\r\n]+[\\/])+[^<>:"|?*\\/\r\n]*
40 | )
41 | $
42 | """,
43 |     re.VERBOSE,
44 | )
45 | 
46 | 
47 | class Path(observable.Observable):
48 |     type: Literal["path"] = "path"
49 | 
50 |     @classmethod
51 |     def validator(cls, value: str) -> bool:
52 |         if LINUX_PATH_REGEX.match(value) or WINDOWS_PATH_REGEX.match(value):
53 |             return True
54 |         else:
55 |             return False
56 | 


--------------------------------------------------------------------------------
/core/schemas/observables/private/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 | !README.md
4 | !__init__.py


--------------------------------------------------------------------------------
/core/schemas/observables/private/README.md:
--------------------------------------------------------------------------------
1 | ### Private observables
2 | This directory is where you should place your private observables. It could be named anything else, but this one has a `.gitignore` so you don't mess things up. ;-)
3 | 


--------------------------------------------------------------------------------
/core/schemas/observables/private/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/core/schemas/observables/private/__init__.py


--------------------------------------------------------------------------------
/core/schemas/observables/registry_key.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | from typing import Literal
 3 | 
 4 | from core.schemas import observable
 5 | 
 6 | 
 7 | class RegistryHive(str, Enum):
 8 |     """Registry Hive enum class."""
 9 | 
10 |     HKEY_CURRENT_CONFIG = "HKEY_CURRENT_CONFIG"
11 |     HKEY_CURRENT_USER = "HKEY_CURRENT_USER"
12 |     HKEY_LOCAL_MACHINE_SAM = "HKEY_LOCAL_MACHINE_SAM"
13 |     HKEY_LOCAL_MACHINE_Security = "HKEY_LOCAL_MACHINE_Security"
14 |     HKEY_LOCAL_MACHINE_Software = "HKEY_LOCAL_MACHINE_Software"
15 |     HKEY_LOCAL_MACHINE_System = "HKEY_LOCAL_MACHINE_System"
16 |     HKEY_USERS_DEFAULT = "HKEY_USERS_DEFAULT"
17 | 
18 | 
19 | class RegistryKey(observable.Observable):
20 |     """Registry Key observable schema class.
21 | 
22 |     Attributes:
23 |         key: The registry key name.
24 |         value: The registry key value.
25 |         hive: The registry hive like SYSEM, SOFTWARE, etc.
26 |         path_file: The filesystem path to the file that contains the registry key value.
27 |     """
28 | 
29 |     type: Literal["registry_key"] = "registry_key"
30 |     key: str
31 |     data: bytes
32 |     hive: RegistryHive
33 |     path_file: str | None = None
34 | 


--------------------------------------------------------------------------------
/core/schemas/observables/sha1.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | 
 3 | import validators
 4 | 
 5 | from core.schemas import observable
 6 | 
 7 | 
 8 | class SHA1(observable.Observable):
 9 |     type: Literal["sha1"] = "sha1"
10 | 
11 |     @classmethod
12 |     def validator(cls, value: str) -> bool:
13 |         return validators.sha1(value) or False
14 | 


--------------------------------------------------------------------------------
/core/schemas/observables/sha256.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | 
 3 | import validators
 4 | 
 5 | from core.schemas import observable
 6 | 
 7 | 
 8 | class SHA256(observable.Observable):
 9 |     type: Literal["sha256"] = "sha256"
10 | 
11 |     @classmethod
12 |     def validator(cls, value: str) -> bool:
13 |         return validators.sha256(value) or False
14 | 


--------------------------------------------------------------------------------
/core/schemas/observables/ssdeep.py:
--------------------------------------------------------------------------------
1 | from typing import Literal
2 | 
3 | from core.schemas import observable
4 | 
5 | 
6 | class Ssdeep(observable.Observable):
7 |     type: Literal["ssdeep"] = "ssdeep"
8 | 


--------------------------------------------------------------------------------
/core/schemas/observables/tlsh.py:
--------------------------------------------------------------------------------
1 | from typing import Literal
2 | 
3 | from core.schemas import observable
4 | 
5 | 
6 | class TLSH(observable.Observable):
7 |     type: Literal["tlsh"] = "tlsh"
8 | 


--------------------------------------------------------------------------------
/core/schemas/observables/url.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | from urllib.parse import urlparse
 3 | 
 4 | import validators
 5 | from pydantic import field_validator
 6 | 
 7 | from core.schemas import observable
 8 | 
 9 | 
10 | class Url(observable.Observable):
11 |     type: Literal["url"] = "url"
12 | 
13 |     @field_validator("value", mode="before")
14 |     def refang(cls, v) -> str:
15 |         return observable.refang(v)
16 | 
17 |     @classmethod
18 |     def validator(cls, value: str) -> bool:
19 |         # Replace underscores with hyphens in the domain
20 |         # https://stackoverflow.com/a/14622263
21 |         o = urlparse(value)
22 |         value = o._replace(netloc=o.netloc.replace("_", "-")).geturl()
23 |         return validators.url(value, strict_query=False) or False
24 | 


--------------------------------------------------------------------------------
/core/schemas/observables/user_account.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | from typing import Literal
 3 | 
 4 | from pydantic import model_validator
 5 | 
 6 | from core.schemas import observable
 7 | 
 8 | 
 9 | class UserAccount(observable.Observable):
10 |     """Represents a user account observable based on the Oasis schema.
11 |     https://docs.oasis-open.org/cti/stix/v2.1/os/stix-v2.1-os.html#_azo70vgj1vm2
12 | 
13 |     account_login and account_type must be provided.
14 |     Value should to be in the form :.
15 |     """
16 | 
17 |     type: Literal["user_account"] = "user_account"
18 |     user_id: str | None = None
19 |     credential: str | None = None
20 |     account_login: str | None = None
21 |     account_type: str | None = None
22 |     display_name: str | None = None
23 |     is_service_account: bool | None = None
24 |     is_privileged: bool | None = None
25 |     can_escalate_privs: bool | None = None
26 |     is_disabled: bool | None = None
27 |     account_created: datetime.datetime | None = None
28 |     account_expires: datetime.datetime | None = None
29 |     credential_last_changed: datetime.datetime | None = None
30 |     account_first_login: datetime.datetime | None = None
31 |     account_last_login: datetime.datetime | None = None
32 | 
33 |     @model_validator(mode="after")
34 |     def check_timestamp_coherence(self) -> "UserAccount":
35 |         if self.account_created and self.account_expires:
36 |             if self.account_created > self.account_expires:
37 |                 raise ValueError(
38 |                     "Account created date is after account expiration date."
39 |                 )
40 |         return self
41 | 


--------------------------------------------------------------------------------
/core/schemas/observables/user_agent.py:
--------------------------------------------------------------------------------
1 | from typing import Literal
2 | 
3 | from core.schemas import observable
4 | 
5 | 
6 | class UserAgent(observable.Observable):
7 |     type: Literal["user_agent"] = "user_agent"
8 | 


--------------------------------------------------------------------------------
/core/schemas/observables/wallet.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | 
 3 | from core.schemas import observable
 4 | 
 5 | 
 6 | class Wallet(observable.Observable):
 7 |     """Represents a wallet observable.
 8 | 
 9 |     coin and address must be provided.
10 |     Value should be in the form :
. 11 | """ 12 | 13 | type: Literal["wallet"] = "wallet" 14 | coin: str | None = None 15 | address: str | None = None 16 | -------------------------------------------------------------------------------- /core/schemas/roles.py: -------------------------------------------------------------------------------- 1 | from enum import IntFlag 2 | 3 | 4 | class Permission(IntFlag): 5 | READ = 0b0001 # 1 6 | WRITE = 0b0010 # 2 7 | DELETE = 0b0100 # 4 8 | 9 | 10 | class Role: 11 | NONE = Permission(0) 12 | READER = Permission.READ 13 | WRITER = Permission.READ | Permission.WRITE 14 | OWNER = Permission.READ | Permission.WRITE | Permission.DELETE 15 | -------------------------------------------------------------------------------- /core/schemas/tag.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from typing import ClassVar, Literal 3 | 4 | from pydantic import ConfigDict, Field, computed_field 5 | 6 | from core import database_arango 7 | from core.config.config import yeti_config 8 | from core.helpers import now 9 | from core.schemas.model import YetiModel 10 | 11 | DEFAULT_EXPIRATION = datetime.timedelta( 12 | days=yeti_config.get( 13 | "tag", 14 | "default_tag_expiration", 15 | default=90, # Completely arbitrary 16 | ) 17 | ) 18 | 19 | MAX_TAG_LENGTH = 250 20 | MAX_TAGS_REQUEST = 50 21 | 22 | 23 | def future(): 24 | return DEFAULT_EXPIRATION 25 | 26 | 27 | class Tag(YetiModel, database_arango.ArangoYetiConnector): 28 | model_config = ConfigDict(str_strip_whitespace=True) 29 | 30 | _collection_name: ClassVar[str] = "tags" 31 | _root_type: Literal["tags"] = "tag" 32 | _type_filter: ClassVar[str | None] = None 33 | 34 | name: str = Field(max_length=MAX_TAG_LENGTH) 35 | count: int = 0 36 | created: datetime.datetime = Field(default_factory=now) 37 | default_expiration: datetime.timedelta = DEFAULT_EXPIRATION 38 | produces: list[str] = [] 39 | replaces: list[str] = [] 40 | 41 | @computed_field(return_type=Literal["tag"]) 42 | @property 43 | def root_type(self): 44 | return self._root_type 45 | 46 | @classmethod 47 | def load(cls, object: dict) -> "Tag": 48 | return cls(**object) 49 | 50 | def absorb(self, other: list[str], permanent: bool) -> int: 51 | """Absorb other tags into this one.""" 52 | merged = 0 53 | for tag_name in other: 54 | old_tag = Tag.find(name=tag_name) 55 | if old_tag: 56 | self.count += old_tag.count 57 | old_tag.count = 0 58 | if permanent: 59 | self.replaces.append(old_tag.name) 60 | self.replaces.extend(old_tag.replaces) 61 | self.produces.extend(old_tag.produces) 62 | old_tag.delete() 63 | else: 64 | old_tag.save() 65 | merged += 1 66 | else: 67 | self.replaces.append(tag_name) 68 | 69 | self.produces = list(set(self.produces) - {self.name}) 70 | self.replaces = list(set(self.replaces) - {self.name}) 71 | self.save() 72 | return merged 73 | -------------------------------------------------------------------------------- /core/schemas/template.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from typing import TYPE_CHECKING, Literal, Optional 4 | 5 | import minijinja 6 | from pydantic import BaseModel, ConfigDict, computed_field 7 | 8 | from core.config.config import yeti_config 9 | 10 | if TYPE_CHECKING: 11 | from core.schemas.observable import Observable 12 | 13 | 14 | class Template(BaseModel): 15 | """A template for exporting data to an external system.""" 16 | 17 | model_config = ConfigDict(str_strip_whitespace=True) 18 | _root_type: Literal["template"] = "template" 19 | name: str 20 | template: str 21 | 22 | def render(self, data: list["Observable"], output_file: str | None) -> None | str: 23 | """Renders the template with the given data to the output file.""" 24 | 25 | environment = minijinja.Environment(templates={self.name: self.template}) 26 | result = environment.render_template(self.name, data=data) 27 | if output_file: 28 | os.makedirs(os.path.dirname(output_file), exist_ok=True) 29 | with open(output_file, "w+") as fd: 30 | fd.write(result) 31 | return None 32 | else: 33 | return result 34 | 35 | @computed_field(return_type=Literal["template"]) 36 | @property 37 | def root_type(self): 38 | return self._root_type 39 | 40 | def save(self) -> "Template": 41 | directory = Path( 42 | yeti_config.get("system", "template_dir", "/opt/yeti/templates") 43 | ) 44 | Path.mkdir(directory, parents=True, exist_ok=True) 45 | file = directory / f"{self.name}.jinja2" 46 | file.write_text(self.template) 47 | return self 48 | 49 | def delete(self) -> None: 50 | directory = Path( 51 | yeti_config.get("system", "template_dir", "/opt/yeti/templates") 52 | ) 53 | file = directory / f"{self.name}.jinja2" 54 | file.unlink() 55 | 56 | @classmethod 57 | def find(cls, name: str) -> Optional["Template"]: 58 | directory = Path( 59 | yeti_config.get("system", "template_dir", "/opt/yeti/templates") 60 | ) 61 | file = directory / f"{name}.jinja2" 62 | if file.exists(): 63 | return Template(name=name, template=file.read_text()) 64 | return None 65 | -------------------------------------------------------------------------------- /core/web/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/core/web/__init__.py -------------------------------------------------------------------------------- /core/web/apiv2/__init__.py: -------------------------------------------------------------------------------- 1 | # TODO: User management 2 | -------------------------------------------------------------------------------- /core/web/apiv2/audit.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from fastapi import APIRouter, HTTPException 4 | from pydantic import BaseModel, ConfigDict 5 | 6 | from core.schemas.audit import TimelineLog 7 | 8 | router = APIRouter() 9 | 10 | 11 | @router.get("/timeline/{id:path}") 12 | def trail(id: str): 13 | return TimelineLog.filter({"target_id": id}) 14 | -------------------------------------------------------------------------------- /core/web/apiv2/import_data.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, File, UploadFile 2 | 3 | router = APIRouter() 4 | 5 | 6 | @router.post("/import_misp_json", tags=["import_misp_json"]) 7 | def import_misp_json(misp_file_json: UploadFile = File(...)) -> dict[str, bool]: 8 | # contents = await misp_file_json.read() 9 | # data_json = json.loads(contents) 10 | 11 | # converter = MispToYeti(data_json["Event"]) 12 | # converter.misp_to_yeti() 13 | return {"status": True} 14 | -------------------------------------------------------------------------------- /core/web/frontend/README.md: -------------------------------------------------------------------------------- 1 | # Build me 2 | 3 | Insert contents of /dist folder from yeti-feeds-frontend here. 4 | -------------------------------------------------------------------------------- /extras/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # Actual Yeti container 2 | FROM python:3.10 AS yeti 3 | 4 | # Python 5 | RUN apt-get update && apt-get install -y \ 6 | python3-pip \ 7 | libmagic-dev \ 8 | && apt-get clean && rm -rf /var/cache/apt/* /var/lib/apt/lists/* 9 | 10 | ADD . /app 11 | 12 | WORKDIR /app 13 | RUN cp yeti.conf.sample yeti.conf 14 | RUN cp ./extras/docker/docker-entrypoint.sh /docker-entrypoint.sh 15 | 16 | # Upgrade pip 17 | RUN pip3 install --upgrade pip && pip3 install uv 18 | 19 | # Install yeti 20 | RUN uv sync --group plugins 21 | 22 | ENV PYTHONPATH /app 23 | 24 | ENTRYPOINT ["/docker-entrypoint.sh"] 25 | -------------------------------------------------------------------------------- /extras/docker/README.md: -------------------------------------------------------------------------------- 1 | # Docker support 2 | 3 | We support production and development Docker setups. 4 | 5 | For production, head to 6 | [the official docs](https://yeti-platform.io/docs/getting-started/) and follow 7 | the instructions there. 8 | 9 | For development, follow 10 | [these instructions](https://github.com/yeti-platform/yeti-docker/blob/main/dev/README.md) 11 | -------------------------------------------------------------------------------- /extras/docker/dev/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | 3 | ENV LC_ALL C.UTF-8 4 | ENV LANG C.UTF-8 5 | 6 | # Python 7 | RUN apt-get update && apt-get install -y \ 8 | python3-pip \ 9 | libmagic-dev \ 10 | git 11 | 12 | # Upgrade pip 13 | RUN pip3 install --upgrade pip && pip3 install uv 14 | 15 | # Install & Configure YETI 16 | ADD . /app 17 | WORKDIR /app 18 | RUN uv sync --all-groups 19 | 20 | COPY --chmod=744 ./extras/docker/docker-entrypoint.sh /docker-entrypoint.sh 21 | 22 | ENV PYTHONPATH /app 23 | 24 | ENTRYPOINT ["/docker-entrypoint.sh"] 25 | 26 | CMD ["webserver"] 27 | -------------------------------------------------------------------------------- /extras/docker/dev/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | services: 2 | 3 | redis: 4 | image: redis:latest 5 | ports: 6 | - 127.0.0.1:6379:6379 7 | 8 | arangodb: 9 | image: arangodb:3.11 10 | ports: 11 | - 127.0.0.1:8529:8529 12 | environment: 13 | - ARANGO_ROOT_PASSWORD= 14 | -------------------------------------------------------------------------------- /extras/docker/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euo pipefail 3 | 4 | if [[ "$1" = 'webserver' ]]; then 5 | uv run uvicorn core.web.webapp:app --reload --host 0.0.0.0 --log-level=debug 6 | elif [[ "$1" = 'webserver-prod' ]]; then 7 | uv run uvicorn core.web.webapp:app --host 0.0.0.0 --workers $(nproc --all || echo 4) --log-level=info 8 | elif [[ "$1" = 'tasks' ]]; then 9 | uv run celery -A core.taskscheduler worker --loglevel=INFO --purge -P threads 10 | elif [[ "$1" = 'tasks-beat' ]]; then 11 | rm -f celerybeat-schedule.db && uv run celery -A core.taskscheduler beat 12 | elif [[ "$1" = 'events-tasks' ]]; then 13 | uv run python -m core.events.consumers events 14 | elif [[ "$1" = 'create-user' ]]; then 15 | uv run python yetictl/cli.py create-user "${@:2}" 16 | elif [[ "$1" = 'reset-password' ]]; then 17 | uv run python yetictl/cli.py reset-password "${@:2}" 18 | elif [[ "$1" = 'toggle-user' ]]; then 19 | uv run python yetictl/cli.py toggle-user "${@:2}" 20 | elif [[ "$1" = 'toggle-admin' ]]; then 21 | uv run python yetictl/cli.py toggle-admin "${@:2}" 22 | elif [[ "$1" = 'migrate-arangodb' ]]; then 23 | uv run python yetictl/cli.py migrate-arangodb "${@:2}" 24 | elif [[ "$1" = 'envshell' ]]; then 25 | $(uv venv activate) && exec bash 26 | else 27 | exec "$@" 28 | fi 29 | -------------------------------------------------------------------------------- /extras/git/ruff-precommit-check.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This script gives a poor error message due to https://github.com/microsoft/vscode/issues/189924 3 | # but still blocks the commit from happening, so sorta WAI 4 | uv run ruff check . && uv run ruff format . --check 5 | 6 | # Check the exit status of the previous command 7 | 8 | if [ $? -ne 0 ]; then 9 | echo "ruff check failed. Aborting commit." 10 | exit 1 11 | else 12 | echo "ruff check passed." 13 | fi 14 | -------------------------------------------------------------------------------- /extras/v1migrate/README.md: -------------------------------------------------------------------------------- 1 | # migrate from old yeti 2 | 3 | 4 | ## Deps 5 | 6 | ``` 7 | pip install pymongo 8 | ``` 9 | -------------------------------------------------------------------------------- /plugins/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/plugins/__init__.py -------------------------------------------------------------------------------- /plugins/analytics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/plugins/analytics/__init__.py -------------------------------------------------------------------------------- /plugins/analytics/deprecated/email_rep.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import requests 4 | 5 | from core.analytics import OneShotAnalytics 6 | from core.errors import GenericYetiError 7 | 8 | 9 | class EmailRepAPI(object): 10 | """Base class for querying the EmailRep API.""" 11 | 12 | @staticmethod 13 | def fetch(observable): 14 | try: 15 | r = requests.get("https://emailrep.io/{}".format(observable.value)) 16 | if r.ok: 17 | return r.json() 18 | raise GenericYetiError("{} - {}".format(r.status_code, r.content)) 19 | except requests.exceptions.RequestException as e: 20 | logging.error(e) 21 | raise GenericYetiError("{} - {}".format(r.status_code, r.content)) 22 | 23 | 24 | class EmailRep(EmailRepAPI, OneShotAnalytics): 25 | default_values = { 26 | "name": "EmailRep", 27 | "description": "Perform a EmailRep query.", 28 | } 29 | 30 | ACTS_ON = ["Email"] 31 | 32 | @staticmethod 33 | def analyze(observable, results): 34 | json_result = EmailRepAPI.fetch(observable) 35 | result = {} 36 | 37 | json_string = json.dumps( 38 | json_result, sort_keys=True, indent=4, separators=(",", ": ") 39 | ) 40 | result = { 41 | "raw": json_string, 42 | "source": "EmailRep", 43 | } 44 | observable.add_context(result) 45 | 46 | return list() 47 | -------------------------------------------------------------------------------- /plugins/analytics/deprecated/expire_tags.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | from datetime import timedelta 3 | 4 | from core.analytics import ScheduledAnalytics 5 | from mongoengine import Q 6 | 7 | 8 | class ExpireTags(ScheduledAnalytics): 9 | default_values = { 10 | "frequency": timedelta(hours=12), 11 | "name": "ExpireTags", 12 | "description": "Expires tags in observables", 13 | } 14 | 15 | ACTS_ON = [] # act on all observables 16 | 17 | # TODO Use server-side JS filter 18 | CUSTOM_FILTER = Q(tags__not__size=0) # filter only tagged elements 19 | 20 | EXPIRATION = timedelta(days=1) 21 | 22 | def bulk(self, observables): 23 | for o in observables: 24 | self.each(o) 25 | 26 | @staticmethod 27 | def each(obj): 28 | obj.expire_tags() 29 | -------------------------------------------------------------------------------- /plugins/analytics/deprecated/hash_file.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | 3 | from core.analytics import InlineAnalytics 4 | from core.observables import Hash 5 | 6 | HASH_TYPES_DICT = { 7 | "md5": hashlib.md5, 8 | "sha1": hashlib.sha1, 9 | "sha256": hashlib.sha256, 10 | "sha512": hashlib.sha512, 11 | } 12 | 13 | 14 | class HashFile(InlineAnalytics): 15 | default_values = { 16 | "name": "HashFile", 17 | "description": "Extracts MD5, SHA1, SHA256, SHA512 hashes from file", 18 | } 19 | 20 | ACTS_ON = ["File", "Certificate"] 21 | 22 | @staticmethod 23 | def each(f): 24 | if f.body: 25 | f.hashes = [] 26 | for hash_type, h in HashFile.extract_hashes(f.body.contents): 27 | hash_object = Hash.get_or_create(value=h.hexdigest()) 28 | hash_object.add_source("analytics") 29 | hash_object.save() 30 | f.active_link_to( 31 | hash_object, 32 | "{} hash".format(hash_type.upper()), 33 | "HashFile", 34 | clean_old=False, 35 | ) 36 | f.hashes.append({"hash": hash_type, "value": h.hexdigest()}) 37 | f.save() 38 | 39 | @staticmethod 40 | def extract_hashes(body_contents): 41 | hashers = {k: HASH_TYPES_DICT[k]() for k in HASH_TYPES_DICT} 42 | 43 | while True: 44 | chunk = body_contents.read(512 * 16) 45 | if not chunk: 46 | break 47 | for h in hashers.values(): 48 | h.update(chunk) 49 | 50 | return hashers.items() 51 | -------------------------------------------------------------------------------- /plugins/analytics/deprecated/process_hostnames.py: -------------------------------------------------------------------------------- 1 | from core.common.utils import tldextract_parser 2 | from core.analytics import InlineAnalytics 3 | from core.observables import Hostname 4 | 5 | SUSPICIOUS_TLDS = [ 6 | "pw", 7 | "cc", 8 | "nu", 9 | "ms", 10 | "vg", 11 | "cm", 12 | "biz", 13 | "cn", 14 | "kr", 15 | "br", 16 | "ws", 17 | "me", 18 | ] 19 | 20 | 21 | class ProcessHostnames(InlineAnalytics): 22 | default_values = { 23 | "name": "ProcessHostnames", 24 | "description": "Extracts and analyze domains", 25 | } 26 | 27 | ACTS_ON = "Hostname" 28 | 29 | @staticmethod 30 | def analyze_string(hostname_string): 31 | parts = tldextract_parser(hostname_string) 32 | return [parts.registered_domain] 33 | 34 | @staticmethod 35 | def each(hostname): 36 | parts = tldextract_parser(hostname.value) 37 | 38 | if parts.suffix in SUSPICIOUS_TLDS: 39 | hostname.tag("suspicious_tld") 40 | 41 | if parts.subdomain != "": 42 | hostname.update(domain=False) 43 | 44 | domain = Hostname.get_or_create(value=parts.registered_domain, domain=True) 45 | domain.add_source("analytics") 46 | hostname.active_link_to( 47 | domain, "domain", "ProcessHostnames", clean_old=False 48 | ) 49 | 50 | if domain.has_tag("dyndns"): 51 | hostname.tag("dyndns") 52 | 53 | return domain 54 | else: 55 | hostname.update(domain=True) 56 | return None 57 | -------------------------------------------------------------------------------- /plugins/analytics/deprecated/process_ip.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | import logging 3 | 4 | import geoip2.database 5 | from geoip2.errors import AddressNotFoundError 6 | 7 | from core.analytics import InlineAnalytics 8 | from core.config.config import yeti_config 9 | from core.errors import ObservableValidationError 10 | 11 | reader = None 12 | try: 13 | path = yeti_config.get("maxmind", "path") 14 | if path: 15 | reader = geoip2.database.Reader(path) 16 | except IOError as e: 17 | logging.info("Could not open GeoLite2-City.mmdb. Will proceed without GeoIP data") 18 | logging.info(e) 19 | reader = False 20 | 21 | 22 | class ProcessIp(InlineAnalytics): 23 | default_values = { 24 | "name": "ProcessIp", 25 | "description": "Extracts information from IP addresses", 26 | } 27 | 28 | ACTS_ON = "Ip" 29 | 30 | @staticmethod 31 | def each(ip): 32 | try: 33 | if reader: 34 | response = reader.city(ip.value) 35 | ip.geoip = { 36 | "country": response.country.iso_code, 37 | "city": response.city.name, 38 | } 39 | ip.save() 40 | except ObservableValidationError: 41 | logging.error( 42 | "An error occurred when trying to add {} to the database".format( 43 | ip.value 44 | ) 45 | ) 46 | except AddressNotFoundError: 47 | logging.error("{} was not found in the GeoIp database".format(ip.value)) 48 | -------------------------------------------------------------------------------- /plugins/analytics/deprecated/process_url.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | import re 3 | import logging 4 | 5 | from core.analytics import InlineAnalytics 6 | from core.observables import Observable 7 | from core.errors import ObservableValidationError 8 | 9 | 10 | class ProcessUrl(InlineAnalytics): 11 | default_values = { 12 | "name": "ProcessUrl", 13 | "description": "Extracts domains from URLs", 14 | } 15 | 16 | ACTS_ON = "Url" 17 | 18 | @staticmethod 19 | def analyze_string(url_string): 20 | return [ProcessUrl.extract_hostname(url_string)] 21 | 22 | @staticmethod 23 | def extract_hostname(url_string): 24 | host = re.search("://(?P[^/:]+)[/:]?", url_string) 25 | if host: 26 | host = host.group("host") 27 | logging.debug("Extracted {} from {}".format(host, url_string)) 28 | return host 29 | 30 | @staticmethod 31 | def each(url): 32 | try: 33 | host = ProcessUrl.analyze_string(url.value)[0] 34 | h = Observable.guess_type(host).get_or_create(value=host) 35 | h.add_source("analytics") 36 | url.active_link_to(h, "hostname", "ProcessUrl", clean_old=False) 37 | return h 38 | except ObservableValidationError: 39 | logging.error( 40 | "An error occurred when trying to add {} to the database".format(host) 41 | ) 42 | -------------------------------------------------------------------------------- /plugins/analytics/deprecated/propagate_blocklist.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | from datetime import timedelta 3 | 4 | from plugins.analytics.public.process_url import ProcessUrl 5 | from core.analytics import ScheduledAnalytics 6 | from mongoengine import Q 7 | 8 | 9 | class PropagateBlocklist(ScheduledAnalytics): 10 | default_values = { 11 | "frequency": timedelta(hours=1), 12 | "name": "PropagateBlocklist", 13 | "description": "Propagates blocklist from URLs to hostnames", 14 | } 15 | 16 | ACTS_ON = "Url" # act on Urls only 17 | 18 | CUSTOM_FILTER = Q(tags__name="blocklist") # filter only tagged elements 19 | 20 | EXPIRATION = None 21 | 22 | @staticmethod 23 | def each(obj): 24 | n = obj.neighbors(neighbor_type="Hostname").values() 25 | if n: 26 | for link in n[0]: 27 | link[1].tag("blocklist") 28 | else: 29 | h = ProcessUrl.each(obj) 30 | if h is not None: 31 | h.tag("blocklist") 32 | -------------------------------------------------------------------------------- /plugins/analytics/deprecated/tag_logic.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | from datetime import timedelta 3 | import logging 4 | 5 | from mongoengine import DoesNotExist 6 | 7 | from core.analytics import ScheduledAnalytics 8 | from core.observables import Tag 9 | from mongoengine import Q 10 | 11 | 12 | class TagLogic(ScheduledAnalytics): 13 | default_values = { 14 | "frequency": timedelta(minutes=30), 15 | "name": "TagLogic", 16 | "description": "Processes some tagging logic", 17 | } 18 | 19 | ACTS_ON = [] # act on all observables 20 | EXPIRATION = timedelta(seconds=3) 21 | 22 | def __init__(self, *args, **kwargs): 23 | super(TagLogic, self).__init__(*args, **kwargs) 24 | 25 | existing_tags = {t.name: (t.replaces, t.produces) for t in Tag.objects.all()} 26 | all_replacements = {} 27 | all_produces = {} 28 | for tag, (replaces, produces) in existing_tags.items(): 29 | for rep in replaces: 30 | if rep: 31 | all_replacements[rep] = tag 32 | 33 | all_produces[tag] = [t.name for t in produces] 34 | 35 | exists = Q(tags__exists=True) 36 | not_in_existing = Q(tags__name__nin=existing_tags.keys()) 37 | must_replace = Q(tags__name__in=all_replacements.keys()) 38 | 39 | self.CUSTOM_FILTER = exists & (not_in_existing | must_replace) 40 | 41 | def bulk(self, observables): 42 | for o in observables: 43 | self.each(o) 44 | 45 | @staticmethod 46 | def each(obj): 47 | all_tags = set([t.name for t in obj.tags]) 48 | 49 | # tag absent produced tags 50 | for tag in all_tags: 51 | try: 52 | db_tag = Tag.objects.get(name=tag) 53 | produced_tags = db_tag.produces 54 | obj.tag([t.name for t in produced_tags if t.name not in all_tags]) 55 | except DoesNotExist: 56 | logging.error( 57 | "Nonexisting tag: {} (found in {})".format(tag, obj.value) 58 | ) 59 | -------------------------------------------------------------------------------- /plugins/analytics/private/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | !README.md 4 | !__init__.py 5 | -------------------------------------------------------------------------------- /plugins/analytics/private/README.md: -------------------------------------------------------------------------------- 1 | ### Private feeds 2 | This directory is where you should place your private analytics. It could be named anything else, but this one has a `.gitignore` so you don't mess things up. ;-) 3 | -------------------------------------------------------------------------------- /plugins/analytics/private/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/plugins/analytics/private/__init__.py -------------------------------------------------------------------------------- /plugins/analytics/public/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/plugins/analytics/public/__init__.py -------------------------------------------------------------------------------- /plugins/analytics/public/censys.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import math 3 | from datetime import timedelta 4 | 5 | from censys.search import CensysHosts 6 | 7 | from core import taskmanager 8 | from core.config.config import yeti_config 9 | from core.schemas import indicator, observable, task 10 | 11 | 12 | class CensysApiQuery(task.AnalyticsTask): 13 | _defaults = { 14 | "name": "Censys", 15 | "description": "Executes Censys queries (stored as indicators) and tags the returned IP addresses.", 16 | "frequency": timedelta(hours=24), 17 | } 18 | 19 | def run(self): 20 | api_key = yeti_config.get("censys", "api_key") 21 | api_secret = yeti_config.get("censys", "secret") 22 | max_results = yeti_config.get("censys", "max_results", 1000) 23 | 24 | if not (api_key and api_secret): 25 | logging.error( 26 | "Error: please configure an api_key and secret to use Censys analytics" 27 | ) 28 | raise RuntimeError 29 | 30 | hosts_api = CensysHosts( 31 | api_id=api_key, 32 | api_secret=api_secret, 33 | ) 34 | 35 | censys_queries, _ = indicator.Query.filter({"query_type": "censys"}) 36 | 37 | for query in censys_queries: 38 | ip_addresses = query_censys(hosts_api, query.pattern, max_results) 39 | for ip in ip_addresses: 40 | ip_object = observable.save(value=ip) 41 | ip_object.tag(query.relevant_tags) 42 | query.link_to( 43 | ip_object, "censys", f"IP found with Censys query: {query.pattern}" 44 | ) 45 | 46 | 47 | def query_censys(api: CensysHosts, query: str, max_results=1000) -> set[str]: 48 | """Queries Censys and returns all identified IP addresses.""" 49 | ip_addresses: set[str] = set() 50 | if max_results <= 0: 51 | results = api.search(query, fields=["ip"], pages=-1) 52 | elif max_results < 100: 53 | results = api.search(query, fields=["ip"], per_page=max_results, pages=1) 54 | else: 55 | pages = math.ceil(max_results / 100) 56 | results = api.search(query, fields=["ip"], per_page=100, pages=pages) 57 | 58 | for result in results: 59 | for record in result: 60 | ip = record.get("ip") 61 | if ip is not None: 62 | ip_addresses.add(ip) 63 | 64 | return ip_addresses 65 | 66 | 67 | taskmanager.TaskManager.register_task(CensysApiQuery) 68 | -------------------------------------------------------------------------------- /plugins/analytics/public/circl_pdns.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import requests 4 | 5 | from core import taskmanager 6 | from core.config.config import yeti_config 7 | from core.schemas import task 8 | from core.schemas.observable import Observable, ObservableType 9 | from core.schemas.observables import hostname 10 | 11 | 12 | class CirclPDNSApi(object): 13 | def fetch(observable: Observable): 14 | auth = ( 15 | yeti_config["circl_pdns"]["username"], 16 | yeti_config["circl_pdns"]["password"], 17 | ) 18 | API_URL = "https://www.circl.lu/pdns/query/" 19 | headers = {"accept": "application/json"} 20 | results = [] 21 | r = requests.get( 22 | API_URL + observable.value, 23 | auth=auth, 24 | headers=headers, 25 | proxies=yeti_config.get("proxy"), 26 | ) 27 | if r.status_code == 200: 28 | for line in filter(None, r.text.split("\n")): 29 | obj = json.loads(line) 30 | results.append(obj) 31 | 32 | return results 33 | 34 | 35 | class CirclPDNSApiQuery(task.AnalyticsTask, CirclPDNSApi): 36 | _defaults = { 37 | "name": "Circl.lu PDNS", 38 | "group": "PDNS", 39 | "description": "Perform passive DNS \ 40 | lookups on domain names or ip address.", 41 | } 42 | 43 | acts_on: list[ObservableType] = [ObservableType.hostname, ObservableType.ipv4] 44 | 45 | def each(self, observable: Observable): 46 | json_result = CirclPDNSApi.fetch(observable) 47 | 48 | result = {} 49 | result["source"] = "circl_pdns_query" 50 | 51 | if observable.type == ObservableType.ipv4: 52 | for record in json_result: 53 | new_hostname = hostname.Hostname(value=record["rrname"]).save() 54 | observable.link_to(new_hostname, record["rrtype"], "Circl PDNS") 55 | 56 | elif observable.type == ObservableType.hostname: 57 | for record in json_result: 58 | new_ip = hostname.Hostname(value=record["rdata"]).save() 59 | observable.link_to(new_ip, record["rrtype"], "Circl PDNS") 60 | 61 | 62 | taskmanager.TaskManager.register_task(CirclPDNSApiQuery) 63 | -------------------------------------------------------------------------------- /plugins/analytics/public/expire_tags.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from datetime import timedelta 3 | 4 | from core import taskmanager 5 | from core.schemas import observable, task 6 | 7 | 8 | class ExpireTags(task.AnalyticsTask): 9 | _defaults = { 10 | "name": "ExpireTags", 11 | "description": "Expires tags in observables", 12 | "frequency": timedelta(hours=12), 13 | } 14 | 15 | def run(self): 16 | now = datetime.datetime.now(datetime.timezone.utc) 17 | observables, total = observable.Observable.filter( 18 | query_args={"tags.expires": f"<{now.isoformat()}"}, 19 | ) 20 | for obs in observables: 21 | obs.expire_tags() 22 | 23 | 24 | taskmanager.TaskManager.register_task(ExpireTags) 25 | -------------------------------------------------------------------------------- /plugins/analytics/public/network_whois.py: -------------------------------------------------------------------------------- 1 | from ipwhois import IPWhois 2 | 3 | from core import taskmanager 4 | from core.schemas import task 5 | from core.schemas.entity import Company 6 | from core.schemas.observable import ObservableType 7 | from core.schemas.observables import email, ipv4 8 | 9 | 10 | class NetworkWhois(task.AnalyticsTask): 11 | _defaults = { 12 | "name": "NetworkWhois", 13 | "description": "Perform a Network Whois request on the IP address and tries to" 14 | " extract relevant information.", 15 | } 16 | 17 | acts_on: list[ObservableType] = [ObservableType.ipv4] 18 | 19 | def each(self, ip: ipv4.IPv4): 20 | r = IPWhois(ip.value) 21 | result = r.lookup_whois() 22 | 23 | # Let's focus on the most specific information 24 | # Which should be in the smallest subnet 25 | n = 0 26 | smallest_subnet = None 27 | 28 | for network in result["nets"]: 29 | cidr_bits = int(network["cidr"].split("/")[1].split(",")[0]) 30 | if cidr_bits > n: 31 | n = cidr_bits 32 | smallest_subnet = network 33 | 34 | if smallest_subnet: 35 | # Create the company 36 | company = Company(name=smallest_subnet["description"].split("\n")[0]) 37 | 38 | # Link it to every email address referenced 39 | if smallest_subnet["emails"]: 40 | for email_address in smallest_subnet["emails"]: 41 | email_obs = email.Email(value=email_address) 42 | company.link_to(email_obs, "email-company", "IPWhois") 43 | 44 | # Copy the subnet info into the main dict 45 | for key in smallest_subnet: 46 | if smallest_subnet[key]: 47 | result["net_{}".format(key)] = smallest_subnet[key] 48 | 49 | ip.add_context("IPWhois", result) 50 | 51 | 52 | taskmanager.TaskManager.register_task(NetworkWhois) 53 | -------------------------------------------------------------------------------- /plugins/analytics/public/random_analytics.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | 3 | from core import taskmanager 4 | from core.schemas import task 5 | 6 | # from core.analytics import ScheduledAnalytics 7 | # from mongoengine import Q 8 | 9 | 10 | class PrintDomains(task.AnalyticsTask): 11 | _defaults = { 12 | "frequency": timedelta(hours=12), 13 | "type": "analytics", 14 | "description": "Extracts a domain from a URL", 15 | } 16 | 17 | acts_on: list[str] = ["hostname"] # act on all observables 18 | 19 | def each(self, observable): 20 | print(observable.value) 21 | 22 | 23 | class PrintDomain(task.OneShotTask): 24 | _defaults = { 25 | "type": "oneshot", 26 | "description": "Just prints an observable's value", 27 | } 28 | 29 | acts_on: list[str] = ["hostname"] 30 | 31 | def each(self, observable): 32 | print(observable.value) 33 | 34 | 35 | taskmanager.TaskManager.register_task(PrintDomains) 36 | taskmanager.TaskManager.register_task(PrintDomain) 37 | -------------------------------------------------------------------------------- /plugins/analytics/public/shodan.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from datetime import timedelta 3 | 4 | from shodan import Shodan 5 | 6 | from core import taskmanager 7 | from core.config.config import yeti_config 8 | from core.schemas import indicator, observable, task 9 | 10 | 11 | class ShodanApiQuery(task.AnalyticsTask): 12 | _defaults = { 13 | "name": "Shodan", 14 | "description": "Executes Shodan queries (stored as indicators) and tags the returned IP addresses.", 15 | "frequency": timedelta(hours=24), 16 | } 17 | 18 | def run(self): 19 | api_key = yeti_config.get("shodan", "api_key") 20 | result_limit = yeti_config.get("shodan", "result_limit") 21 | if not result_limit: 22 | result_limit = 100 23 | else: 24 | result_limit = int(result_limit) 25 | 26 | if not api_key: 27 | logging.error("Error: please configure an api_key to use Shodan analytics") 28 | raise RuntimeError 29 | 30 | shodan_api = Shodan(api_key) 31 | 32 | shodan_queries, _ = indicator.Query.filter({"query_type": "shodan"}) 33 | 34 | for query in shodan_queries: 35 | ip_addresses = query_shodan(shodan_api, query.pattern, result_limit) 36 | for ip in ip_addresses: 37 | ip_object = observable.save(value=ip) 38 | ip_object.tag(query.relevant_tags) 39 | query.link_to( 40 | ip_object, "shodan", f"IP found with Shodan query: {query.pattern}" 41 | ) 42 | 43 | 44 | def query_shodan(api: Shodan, query: str, limit: int) -> set[str]: 45 | """Queries Shodan and returns a set of identified IP addresses.""" 46 | ip_addresses: set[str] = set() 47 | count = 0 48 | 49 | for record in api.search_cursor(query): 50 | if record.get("ip_str") is not None: 51 | ip_addresses.add(record.get("ip_str")) 52 | # Setting the limit to -1 indicates the user wants unlimited results. 53 | if limit != -1: 54 | count += 1 55 | if count >= limit: 56 | break 57 | 58 | return ip_addresses 59 | 60 | 61 | taskmanager.TaskManager.register_task(ShodanApiQuery) 62 | -------------------------------------------------------------------------------- /plugins/analytics/public/shodan_api.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import shodan 4 | 5 | from core import taskmanager 6 | from core.config.config import yeti_config 7 | from core.schemas import task 8 | from core.schemas.entity import Company 9 | from core.schemas.observable import Observable, ObservableType 10 | from core.schemas.observables import asn, hostname, ipv4 11 | 12 | 13 | class ShodanApi(object): 14 | settings = { 15 | "shodan_api_key": { 16 | "name": "Shodan API Key", 17 | "description": "API Key provided by Shodan.io.", 18 | } 19 | } 20 | 21 | def fetch(observable: Observable): 22 | try: 23 | return shodan.Shodan(yeti_config.get("shodan", "api_key")).host( 24 | observable.value 25 | ) 26 | except shodan.APIError as e: 27 | logging.error("Error: {}".format(e)) 28 | 29 | 30 | class ShodanQuery(task.OneShotTask, ShodanApi): 31 | _defaults = { 32 | "name": "Shodan", 33 | "description": "Perform a Shodan query on the IP address and tries to" 34 | " extract relevant information.", 35 | } 36 | 37 | acts_on: list[ObservableType] = [ObservableType.ipv4] 38 | 39 | def each(self, ip: ipv4.IPv4) -> Observable: 40 | result = ShodanApi.fetch(ip) 41 | logging.debug(result) 42 | 43 | if "tags" in result and result["tags"] is not None: 44 | ip.tag(result["tags"]) 45 | 46 | logging.debug(result["asn"]) 47 | if "asn" in result and result["asn"] is not None: 48 | o_asn = asn.ASN( 49 | value=result["asn"], 50 | ).save() 51 | logging.debug(o_asn) 52 | o_asn.link_to(ip, "asn#", "Shodan Query") 53 | 54 | if "hostnames" in result and result["hostnames"] is not None: 55 | for hostname_str in result["hostnames"]: 56 | h = hostname.Hostname(value=hostname_str).save() 57 | h.link_to(ip, "A record", "Shodan Query") 58 | 59 | if "isp" in result and result["isp"] is not None: 60 | logging.debug(result["isp"]) 61 | o_isp = Company(name=result["isp"]).save() 62 | ip.link_to(o_isp, "hosting", "Shodan Query") 63 | return ip 64 | 65 | 66 | taskmanager.TaskManager.register_task(ShodanQuery) 67 | -------------------------------------------------------------------------------- /plugins/events/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/plugins/events/__init__.py -------------------------------------------------------------------------------- /plugins/events/hostname_extract.py: -------------------------------------------------------------------------------- 1 | from urllib.parse import urlparse 2 | 3 | from core import taskmanager 4 | from core.events.message import EventMessage 5 | from core.schemas import observable, task 6 | 7 | 8 | class HostnameExtract(task.EventTask): 9 | _defaults = { 10 | "name": "HostnameExtact", 11 | "description": "Extract hostname (domain or ip) from new URL observable.", 12 | "acts_on": "(new|update):observable:url", 13 | } 14 | 15 | def run(self, message: EventMessage) -> None: 16 | url = message.event.yeti_object 17 | self.logger.info(f"Extracting hostname from: {url.value}") 18 | o = urlparse(url.value) 19 | if observable.IPv4.validator(o.hostname): 20 | extracted_obs = observable.IPv4(value=o.hostname).save() 21 | else: 22 | extracted_obs = observable.Hostname(value=o.hostname).save() 23 | url.link_to(extracted_obs, "hostname", "Extracted hostname from URL") 24 | return 25 | 26 | 27 | taskmanager.TaskManager.register_task(HostnameExtract) 28 | -------------------------------------------------------------------------------- /plugins/events/log/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/plugins/events/log/__init__.py -------------------------------------------------------------------------------- /plugins/events/log/event_logger_example.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | 4 | from core import taskmanager 5 | from core.events.message import LogMessage 6 | from core.schemas import task 7 | 8 | 9 | class LoggerExample(task.LogTask): 10 | _defaults = { 11 | "name": "EventLoggerExample", 12 | "description": "Logs events from eventlog bus", 13 | } 14 | 15 | def run(self, message: LogMessage) -> None: 16 | if isinstance(message.log, dict): 17 | logging.info(f"Received event: {json.dumps(message.log)}") 18 | else: 19 | logging.info(f"Received event: {message.log}") 20 | return 21 | 22 | 23 | taskmanager.TaskManager.register_task(LoggerExample) 24 | -------------------------------------------------------------------------------- /plugins/events/private/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | !README.md 4 | !__init__.py 5 | -------------------------------------------------------------------------------- /plugins/events/private/README.md: -------------------------------------------------------------------------------- 1 | ### Private events 2 | This directory is where you should place your private events tasks. It could be named anything else, but this one has a `.gitignore` so you don't mess things up. ;-) 3 | -------------------------------------------------------------------------------- /plugins/events/private/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/plugins/events/private/__init__.py -------------------------------------------------------------------------------- /plugins/events/public/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/plugins/events/public/__init__.py -------------------------------------------------------------------------------- /plugins/exports/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/plugins/exports/__init__.py -------------------------------------------------------------------------------- /plugins/feeds/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /plugins/feeds/private/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | !README.md 4 | !__init__.py 5 | -------------------------------------------------------------------------------- /plugins/feeds/private/README.md: -------------------------------------------------------------------------------- 1 | ### Private feeds 2 | This directory is where you should place your private feeds. It could be named anything else, but this one has a `.gitignore` so you don't mess things up. ;-) 3 | -------------------------------------------------------------------------------- /plugins/feeds/private/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/plugins/feeds/private/__init__.py -------------------------------------------------------------------------------- /plugins/feeds/public/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/plugins/feeds/public/__init__.py -------------------------------------------------------------------------------- /plugins/feeds/public/abuseipdb.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from datetime import datetime, timedelta 3 | from typing import ClassVar 4 | 5 | from core import taskmanager 6 | from core.config.config import yeti_config 7 | from core.schemas import task 8 | from core.schemas.observables import ipv4 9 | 10 | 11 | class AbuseIPDB(task.FeedTask): 12 | _SOURCE: ClassVar["str"] = ( 13 | "https://api.abuseipdb.com/api/v2/blacklist?&key=%s&plaintext&limit=10000" 14 | ) 15 | _defaults = { 16 | "frequency": timedelta(hours=5), 17 | "name": "AbuseIPDB", 18 | "description": "Black List IP generated by AbuseIPDB", 19 | } 20 | 21 | def run(self): 22 | api_key = yeti_config.get("abuseIPDB", "key") 23 | 24 | if not api_key: 25 | raise Exception("Your abuseIPDB API key is not set in the yeti.conf file") 26 | 27 | # change the limit rate if you subscribe to a paid plan 28 | response = self._make_request(self._SOURCE % api_key, verify=True) 29 | if response: 30 | data = response.text 31 | 32 | for line in data.split("\n"): 33 | self.analyze(line) 34 | 35 | def analyze(self, line): 36 | line = line.strip() 37 | 38 | ip_value = line 39 | 40 | context = {"source": self.name, "date_added": datetime.utcnow()} 41 | ipv4_obs = ipv4.IPv4(value=ip_value).save() 42 | 43 | logging.debug(f"Adding context to {ip_value}") 44 | ipv4_obs.add_context(self.name, context) 45 | ipv4_obs.tag(["blocklist"]) 46 | 47 | 48 | taskmanager.TaskManager.register_task(AbuseIPDB) 49 | -------------------------------------------------------------------------------- /plugins/feeds/public/alienvault_ip_reputation.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from io import StringIO 3 | from typing import ClassVar 4 | 5 | import pandas as pd 6 | 7 | from core import taskmanager 8 | from core.schemas import task 9 | from core.schemas.observables import ipv4 10 | 11 | 12 | class AlienVaultIPReputation(task.FeedTask): 13 | _SOURCE: ClassVar["str"] = "http://reputation.alienvault.com/reputation.data" 14 | _defaults = { 15 | "frequency": timedelta(hours=4), 16 | "name": "AlienVaultIPReputation", 17 | "description": "Reputation IP generated by Alienvault", 18 | } 19 | _NAMES = [ 20 | "IP", 21 | "number_1", 22 | "number_2", 23 | "Tag", 24 | "Country", 25 | "City", 26 | "Coord", 27 | "number_3", 28 | ] 29 | 30 | def run(self): 31 | response = self._make_request(self._SOURCE, verify=True) 32 | if response: 33 | data = response.text 34 | 35 | df = pd.read_csv( 36 | StringIO(data), 37 | delimiter="#", 38 | names=self._NAMES, 39 | ) 40 | 41 | for _, item in df.iterrows(): 42 | self.analyze(item) 43 | 44 | def analyze(self, item): 45 | context = dict(source=self.name) 46 | 47 | ip_str = item["IP"] 48 | category = item["Tag"] 49 | country = item["Country"] 50 | 51 | ip_obs = ipv4.IPv4(value=ip_str).save() 52 | 53 | context["country"] = country 54 | context["threat"] = category 55 | context["reliability"] = item["number_1"] 56 | context["risk"] = item["number_2"] 57 | 58 | ip_obs.tag([category]) 59 | ip_obs.add_context(self.name, context) 60 | 61 | 62 | taskmanager.TaskManager.register_task(AlienVaultIPReputation) 63 | -------------------------------------------------------------------------------- /plugins/feeds/public/artifacts.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import logging 3 | import os 4 | import tempfile 5 | from datetime import timedelta 6 | from io import BytesIO 7 | from zipfile import ZipFile 8 | 9 | from artifacts.scripts import validator 10 | 11 | from core import taskmanager 12 | from core.schemas import indicator, task 13 | 14 | 15 | class ForensicArtifacts(task.FeedTask): 16 | _defaults = { 17 | "name": "ForensicArtifacts GitHub repo", 18 | "frequency": timedelta(hours=1), 19 | "type": "feed", 20 | "description": "Imports ForensicArtifact definitions from the official github repo: https://github.com/forensicartifacts/artifacts", 21 | } 22 | 23 | def run(self): 24 | validator_object = validator.ArtifactDefinitionsValidator() 25 | 26 | response = self._make_request( 27 | "https://github.com/forensicartifacts/artifacts/archive/refs/heads/master.zip" 28 | ) 29 | if not response: 30 | logging.info("No response: skipping ForensicArtifact update") 31 | return 32 | 33 | with tempfile.TemporaryDirectory() as tempdir: 34 | ZipFile(BytesIO(response.content)).extractall(path=tempdir) 35 | artifacts_datadir = os.path.join( 36 | tempdir, "artifacts-main", "artifacts", "data" 37 | ) 38 | 39 | data_files_glob = glob.glob(os.path.join(artifacts_datadir, "*.yaml")) 40 | artifacts_dict = {} 41 | for file in data_files_glob: 42 | result = validator_object.CheckFile(file) 43 | if not result: 44 | logging.error("Failed to validate %s, skipping", file) 45 | continue 46 | logging.info("Processing %s", file) 47 | with open(file, "r") as f: 48 | yaml_string = f.read() 49 | 50 | forensic_indicators = indicator.ForensicArtifact.from_yaml_string( 51 | yaml_string, update_parents=False 52 | ) 53 | for fi in forensic_indicators: 54 | artifacts_dict[fi.name] = fi 55 | 56 | for artifact in artifacts_dict.values(): 57 | artifact.update_parents(artifacts_dict) 58 | artifact.save_indicators(create_links=True) 59 | 60 | 61 | taskmanager.TaskManager.register_task(ForensicArtifacts) 62 | -------------------------------------------------------------------------------- /plugins/feeds/public/blocklistde_all.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from typing import ClassVar 3 | 4 | from core import taskmanager 5 | from core.schemas import task 6 | from core.schemas.observables import ipv4 7 | 8 | 9 | class BlocklistdeAll(task.FeedTask): 10 | _SOURCE: ClassVar["str"] = "https://lists.blocklist.de/lists/all.txt" 11 | _defaults = { 12 | "frequency": timedelta(hours=1), 13 | "name": "BlocklistdeAll", 14 | "description": "All IP addresses that have attacked one of our customers/servers in the last 48 hours. It's not recommended to use this feed due to the lesser amount of contextual information, it's better to use each blocklist.de feed separately.", 15 | } 16 | 17 | def run(self): 18 | response = self._make_request(self._SOURCE) 19 | if response: 20 | data = response.text 21 | for item in data.split("\n"): 22 | self.analyze(item) 23 | 24 | def analyze(self, item): 25 | ip_str = item.strip() 26 | 27 | context = {"source": self.name} 28 | 29 | if ip_str: 30 | obs = ipv4.IPv4(value=ip_str).save() 31 | obs.add_context(self.name, context) 32 | obs.tag(["blocklist"]) 33 | 34 | 35 | taskmanager.TaskManager.register_task(BlocklistdeAll) 36 | -------------------------------------------------------------------------------- /plugins/feeds/public/blocklistde_apache.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from typing import ClassVar 3 | 4 | from core import taskmanager 5 | from core.schemas import task 6 | from core.schemas.observables import ipv4 7 | 8 | 9 | class BlocklistdeApache(task.FeedTask): 10 | _SOURCE: ClassVar["str"] = "https://lists.blocklist.de/lists/apache.txt" 11 | _defaults = { 12 | "frequency": timedelta(hours=1), 13 | "name": "BlocklistdeApache", 14 | "description": "All IP addresses which have been reported within the last 48 hours as having run attacks on the service Apache, Apache-DDOS, RFI-Attacks.", 15 | } 16 | 17 | def run(self): 18 | response = self._make_request(self._SOURCE) 19 | if response: 20 | data = response.text 21 | for item in data.split("\n"): 22 | self.analyze(item) 23 | 24 | def analyze(self, item): 25 | ip_str = item.strip() 26 | 27 | context = {"source": self.name} 28 | 29 | if ip_str: 30 | obs = ipv4.IPv4(value=ip_str).save() 31 | obs.add_context(self.name, context) 32 | obs.tag(["blocklist", "apache"]) 33 | 34 | 35 | taskmanager.TaskManager.register_task(BlocklistdeApache) 36 | -------------------------------------------------------------------------------- /plugins/feeds/public/blocklistde_bots.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from typing import ClassVar 3 | 4 | from core import taskmanager 5 | from core.schemas import task 6 | from core.schemas.observables import ipv4 7 | 8 | 9 | class BlocklistdeBots(task.FeedTask): 10 | _SOURCE: ClassVar["str"] = "https://lists.blocklist.de/lists/bots.txt" 11 | _defaults = { 12 | "frequency": timedelta(hours=1), 13 | "name": "BlocklistdeBots", 14 | "description": "All IP addresses which have been reported within the last 48 hours as having run attacks attacks on the RFI-Attacks, REG-Bots, IRC-Bots or BadBots (BadBots = he has posted a Spam-Comment on a open Forum or Wiki).", 15 | } 16 | 17 | def run(self): 18 | response = self._make_request(self._SOURCE) 19 | if response: 20 | data = response.text 21 | for item in data.split("\n"): 22 | self.analyze(item) 23 | 24 | def analyze(self, item): 25 | ip_str = item.strip() 26 | 27 | context = {"source": self.name} 28 | 29 | if ip_str: 30 | obs = ipv4.IPv4(value=ip_str).save() 31 | obs.add_context(self.name, context) 32 | obs.tag(["blocklist", "bots"]) 33 | 34 | 35 | taskmanager.TaskManager.register_task(BlocklistdeBots) 36 | -------------------------------------------------------------------------------- /plugins/feeds/public/blocklistde_bruteforcelogin.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from typing import ClassVar 3 | 4 | from core import taskmanager 5 | from core.schemas import task 6 | from core.schemas.observables import ipv4 7 | 8 | 9 | class BlocklistdeBruteforceLogin(task.FeedTask): 10 | _SOURCE: ClassVar["str"] = "https://lists.blocklist.de/lists/bruteforcelogin.txt" 11 | _defaults = { 12 | "frequency": timedelta(hours=1), 13 | "name": "BlocklistdeBruteforceLogin", 14 | "description": "All IPs which attacks Joomlas, Wordpress and other Web-Logins with Brute-Force Logins.", 15 | } 16 | 17 | def run(self): 18 | response = self._make_request(self._SOURCE) 19 | if response: 20 | data = response.text 21 | for item in data.split("\n"): 22 | self.analyze(item) 23 | 24 | def analyze(self, item): 25 | ip_str = item.strip() 26 | 27 | context = {"source": self.name} 28 | 29 | if ip_str: 30 | obs = ipv4.IPv4(value=ip_str).save() 31 | obs.add_context(self.name, context) 32 | obs.tag(["blocklist", "bruteforce"]) 33 | 34 | 35 | taskmanager.TaskManager.register_task(BlocklistdeBruteforceLogin) 36 | -------------------------------------------------------------------------------- /plugins/feeds/public/blocklistde_ftp.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from typing import ClassVar 3 | 4 | from core import taskmanager 5 | from core.schemas import task 6 | from core.schemas.observables import ipv4 7 | 8 | 9 | class BlocklistdeFTP(task.FeedTask): 10 | _SOURCE: ClassVar["str"] = "https://lists.blocklist.de/lists/ftp.txt" 11 | _defaults = { 12 | "frequency": timedelta(hours=1), 13 | "name": "BlocklistdeFTP", 14 | "description": "All IP addresses which have been reported within the last 48 hours for attacks on the Service FTP.", 15 | } 16 | 17 | def run(self): 18 | response = self._make_request(self._SOURCE) 19 | if response: 20 | data = response.text 21 | for item in data.split("\n"): 22 | self.analyze(item) 23 | 24 | def analyze(self, item): 25 | ip_str = item.strip() 26 | 27 | context = {"source": self.name} 28 | 29 | if ip_str: 30 | obs = ipv4.IPv4(value=ip_str).save() 31 | obs.add_context(self.name, context) 32 | obs.tag(["blocklist", "ftp"]) 33 | 34 | 35 | taskmanager.TaskManager.register_task(BlocklistdeFTP) 36 | -------------------------------------------------------------------------------- /plugins/feeds/public/blocklistde_imap.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from typing import ClassVar 3 | 4 | from core import taskmanager 5 | from core.schemas import task 6 | from core.schemas.observables import ipv4 7 | 8 | 9 | class BlocklistdeIMAP(task.FeedTask): 10 | _SOURCE: ClassVar["str"] = "https://lists.blocklist.de/lists/imap.txt" 11 | _defaults = { 12 | "frequency": timedelta(hours=1), 13 | "name": "BlocklistdeIMAP", 14 | "description": "All IP addresses which have been reported within the last 48 hours for attacks on the Service IMAP.", 15 | } 16 | 17 | def run(self): 18 | response = self._make_request(self._SOURCE) 19 | if response: 20 | data = response.text 21 | for item in data.split("\n"): 22 | self.analyze(item) 23 | 24 | def analyze(self, item): 25 | ip_str = item.strip() 26 | 27 | context = {"source": self.name} 28 | 29 | if ip_str: 30 | obs = ipv4.IPv4(value=ip_str).save() 31 | obs.add_context(self.name, context) 32 | obs.tag(["blocklist", "imap"]) 33 | 34 | 35 | taskmanager.TaskManager.register_task(BlocklistdeIMAP) 36 | -------------------------------------------------------------------------------- /plugins/feeds/public/blocklistde_ircbot.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from typing import ClassVar 3 | 4 | from core import taskmanager 5 | from core.schemas import task 6 | from core.schemas.observables import ipv4 7 | 8 | 9 | class BlocklistdeIRCBot(task.FeedTask): 10 | _SOURCE: ClassVar["str"] = "https://lists.blocklist.de/lists/ircbot.txt" 11 | _defaults = { 12 | "frequency": timedelta(hours=1), 13 | "name": "BlocklistdeIRCBot", 14 | "description": "All IP addresses which have been reported within the last 48 hours for attacks on the Service IRC.", 15 | } 16 | 17 | def run(self): 18 | response = self._make_request(self._SOURCE) 19 | if response: 20 | data = response.text 21 | for item in data.split("\n"): 22 | self.analyze(item) 23 | 24 | def analyze(self, item): 25 | ip_str = item.strip() 26 | 27 | context = {"source": self.name} 28 | 29 | if ip_str: 30 | obs = ipv4.IPv4(value=ip_str).save() 31 | obs.add_context(self.name, context) 32 | obs.tag(["blocklist", "irc"]) 33 | 34 | 35 | taskmanager.TaskManager.register_task(BlocklistdeIRCBot) 36 | -------------------------------------------------------------------------------- /plugins/feeds/public/blocklistde_mail.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from typing import ClassVar 3 | 4 | from core import taskmanager 5 | from core.schemas import task 6 | from core.schemas.observables import ipv4 7 | 8 | 9 | class BlocklistdeMail(task.FeedTask): 10 | _SOURCE: ClassVar["str"] = "https://lists.blocklist.de/lists/mail.txt" 11 | _defaults = { 12 | "frequency": timedelta(hours=1), 13 | "name": "BlocklistdeMail", 14 | "description": "All IP addresses which have been reported within the last 48 hours for attacks on the Service Mail.", 15 | } 16 | 17 | def run(self): 18 | response = self._make_request(self._SOURCE) 19 | if response: 20 | data = response.text 21 | for item in data.split("\n"): 22 | self.analyze(item) 23 | 24 | def analyze(self, item): 25 | ip_str = item.strip() 26 | 27 | context = {"source": self.name} 28 | 29 | if ip_str: 30 | obs = ipv4.IPv4(value=ip_str).save() 31 | obs.add_context(self.name, context) 32 | obs.tag(["blocklist", "mail"]) 33 | 34 | 35 | taskmanager.TaskManager.register_task(BlocklistdeMail) 36 | -------------------------------------------------------------------------------- /plugins/feeds/public/blocklistde_sip.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from typing import ClassVar 3 | 4 | from core import taskmanager 5 | from core.schemas import task 6 | from core.schemas.observables import ipv4 7 | 8 | 9 | class BlocklistdeSIP(task.FeedTask): 10 | _SOURCE: ClassVar["str"] = "https://lists.blocklist.de/lists/sip.txt" 11 | _defaults = { 12 | "frequency": timedelta(hours=1), 13 | "name": "BlocklistdeSIP", 14 | "description": "All IP addresses which have been reported within the last 48 hours for attacks on the Service SIP.", 15 | } 16 | 17 | def run(self): 18 | response = self._make_request(self._SOURCE) 19 | if response: 20 | data = response.text 21 | for item in data.split("\n"): 22 | self.analyze(item) 23 | 24 | def analyze(self, item): 25 | ip_str = item.strip() 26 | 27 | context = {"source": self.name} 28 | 29 | if ip_str: 30 | obs = ipv4.IPv4(value=ip_str).save() 31 | obs.add_context(self.name, context) 32 | obs.tag(["blocklist", "sip"]) 33 | 34 | 35 | taskmanager.TaskManager.register_task(BlocklistdeSIP) 36 | -------------------------------------------------------------------------------- /plugins/feeds/public/blocklistde_ssh.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from typing import ClassVar 3 | 4 | from core import taskmanager 5 | from core.schemas import task 6 | from core.schemas.observables import ipv4 7 | 8 | 9 | class BlocklistdeSSH(task.FeedTask): 10 | _SOURCE: ClassVar["str"] = "https://lists.blocklist.de/lists/ssh.txt" 11 | _defaults = { 12 | "frequency": timedelta(hours=1), 13 | "name": "BlocklistdeSSH", 14 | "description": "All IP addresses which have been reported within the last 48 hours for attacks on the Service SSH.", 15 | } 16 | 17 | def run(self): 18 | response = self._make_request(self._SOURCE) 19 | if response: 20 | data = response.text 21 | for item in data.split("\n"): 22 | self.analyze(item) 23 | 24 | def analyze(self, item): 25 | ip_str = item.strip() 26 | 27 | context = {"source": self.name} 28 | 29 | if ip_str: 30 | obs = ipv4.IPv4(value=ip_str).save() 31 | obs.add_context(self.name, context) 32 | obs.tag(["blocklist", "ssh"]) 33 | 34 | 35 | taskmanager.TaskManager.register_task(BlocklistdeSSH) 36 | -------------------------------------------------------------------------------- /plugins/feeds/public/blocklistde_strongips.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from typing import ClassVar 3 | 4 | from core import taskmanager 5 | from core.schemas import task 6 | from core.schemas.observables import ipv4 7 | 8 | 9 | class BlocklistdeStrongIPs(task.FeedTask): 10 | _SOURCE: ClassVar["str"] = "https://lists.blocklist.de/lists/strongips.txt" 11 | _defaults = { 12 | "frequency": timedelta(hours=1), 13 | "name": "BlocklistdeStrongIPs", 14 | "description": "All IP addresses which have been reported within the last 48 hours for attacks on the Service SIP.", 15 | } 16 | 17 | def run(self): 18 | response = self._make_request(self._SOURCE) 19 | if response: 20 | data = response.text 21 | for item in data.split("\n"): 22 | self.analyze(item) 23 | 24 | def analyze(self, item): 25 | ip_str = item.strip() 26 | 27 | context = {"source": self.name} 28 | 29 | if ip_str: 30 | obs = ipv4.IPv4(value=ip_str).save() 31 | obs.add_context(self.name, context) 32 | obs.tag(["blocklist", "strongips"]) 33 | 34 | 35 | taskmanager.TaskManager.register_task(BlocklistdeStrongIPs) 36 | -------------------------------------------------------------------------------- /plugins/feeds/public/botvrij_domain.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from typing import ClassVar 3 | 4 | from core import taskmanager 5 | from core.schemas import task 6 | from core.schemas.observables import hostname 7 | 8 | 9 | class BotvrijDomain(task.FeedTask): 10 | _SOURCE: ClassVar["str"] = "https://www.botvrij.eu/data/ioclist.domain" 11 | _defaults = { 12 | "frequency": timedelta(hours=12), 13 | "name": "BotvrijDomain", 14 | "description": "Botvrij.eu is a project of the Dutch National Cyber Security Centre (NCSC-NL) and SIDN Labs, the R&D team of SIDN, the registry for the .nl domain.", 15 | } 16 | 17 | def run(self): 18 | response = self._make_request(self._SOURCE) 19 | if response: 20 | data = response.text 21 | for item in data.split("\n")[6:-1]: 22 | self.analyze(item.strip()) 23 | 24 | def analyze(self, item): 25 | hostn, descr = item.split(" # domain - ") 26 | 27 | context = { 28 | "source": self.name, 29 | "description": descr, 30 | } 31 | 32 | obs = hostname.Hostname(value=hostn).save() 33 | obs.add_context(self.name, context) 34 | obs.tag(["botvrij"]) 35 | 36 | 37 | taskmanager.TaskManager.register_task(BotvrijDomain) 38 | -------------------------------------------------------------------------------- /plugins/feeds/public/botvrij_filename.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from typing import ClassVar 3 | 4 | from core import taskmanager 5 | from core.schemas import task 6 | from core.schemas.observables import file 7 | 8 | 9 | class BotvrijFilename(task.FeedTask): 10 | _SOURCE: ClassVar["str"] = "https://www.botvrij.eu/data/ioclist.filename" 11 | _defaults = { 12 | "frequency": timedelta(hours=12), 13 | "name": "BotvrijFilename", 14 | "description": "Botvrij.eu is a project of the Dutch National Cyber Security Centre (NCSC-NL) and SIDN Labs, the R&D team of SIDN, the registry for the .nl domain.", 15 | } 16 | 17 | def run(self): 18 | response = self._make_request(self._SOURCE) 19 | if response: 20 | data = response.text 21 | for item in data.split("\n")[6:-1]: 22 | self.analyze(item.strip()) 23 | 24 | def analyze(self, item): 25 | filen, descr = item.split(" # filename - ") 26 | 27 | context = { 28 | "source": self.name, 29 | "description": descr, 30 | } 31 | 32 | obs = file.File(value=filen).save() 33 | obs.name = filen 34 | obs.add_context(self.name, context) 35 | obs.tag(["botvrij"]) 36 | 37 | 38 | taskmanager.TaskManager.register_task(BotvrijFilename) 39 | -------------------------------------------------------------------------------- /plugins/feeds/public/botvrij_hostname.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from typing import ClassVar 3 | 4 | from core import taskmanager 5 | from core.schemas import task 6 | from core.schemas.observables import hostname 7 | 8 | 9 | class BotvrijHostname(task.FeedTask): 10 | _SOURCE: ClassVar["str"] = "https://www.botvrij.eu/data/ioclist.hostname" 11 | _defaults = { 12 | "frequency": timedelta(hours=12), 13 | "name": "BotvrijHostname", 14 | "description": "Botvrij.eu is a project of the Dutch National Cyber Security Centre (NCSC-NL) and SIDN Labs, the R&D team of SIDN, the registry for the .nl domain.", 15 | } 16 | 17 | def run(self): 18 | response = self._make_request(self._SOURCE) 19 | if response: 20 | data = response.text 21 | for item in data.split("\n")[6:-1]: 22 | self.analyze(item.strip()) 23 | 24 | def analyze(self, item): 25 | hostn, descr = item.split(" # hostname - ") 26 | 27 | context = { 28 | "source": self.name, 29 | "description": descr, 30 | } 31 | 32 | obs = hostname.Hostname(value=hostn).save() 33 | obs.add_context(self.name, context) 34 | obs.tag(["botvrij"]) 35 | 36 | 37 | taskmanager.TaskManager.register_task(BotvrijHostname) 38 | -------------------------------------------------------------------------------- /plugins/feeds/public/botvrij_ipdst.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from typing import ClassVar 3 | 4 | from core import taskmanager 5 | from core.schemas import task 6 | from core.schemas.observables import ipv4 7 | 8 | 9 | class BotvrijIPDst(task.FeedTask): 10 | _SOURCE: ClassVar["str"] = "https://www.botvrij.eu/data/ioclist.ip-dst" 11 | _defaults = { 12 | "frequency": timedelta(hours=12), 13 | "name": "BotvrijIPDst", 14 | "description": "Botvrij.eu is a project of the Dutch National Cyber Security Centre (NCSC-NL) and SIDN Labs, the R&D team of SIDN, the registry for the .nl domain.", 15 | } 16 | 17 | def run(self): 18 | response = self._make_request(self._SOURCE) 19 | if response: 20 | data = response.text 21 | for item in data.split("\n")[6:-1]: 22 | self.analyze(item.strip()) 23 | 24 | def analyze(self, item): 25 | ip, descr = item.split(" # ip-dst - ") 26 | 27 | context = { 28 | "source": self.name, 29 | "description": descr, 30 | } 31 | 32 | obs = ipv4.IPv4(value=ip).save() 33 | obs.add_context(self.name, context) 34 | obs.tag(["botvrij"]) 35 | 36 | 37 | taskmanager.TaskManager.register_task(BotvrijIPDst) 38 | -------------------------------------------------------------------------------- /plugins/feeds/public/botvrij_md5.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from typing import ClassVar 3 | 4 | from core import taskmanager 5 | from core.schemas import task 6 | from core.schemas.observables import md5 7 | 8 | 9 | class BotvrijMD5(task.FeedTask): 10 | _SOURCE: ClassVar["str"] = "https://www.botvrij.eu/data/ioclist.md5" 11 | _defaults = { 12 | "frequency": timedelta(hours=12), 13 | "name": "BotvrijMD5", 14 | "description": "Botvrij.eu is a project of the Dutch National Cyber Security Centre (NCSC-NL) and SIDN Labs, the R&D team of SIDN, the registry for the .nl domain.", 15 | } 16 | 17 | def run(self): 18 | response = self._make_request(self._SOURCE) 19 | if response: 20 | data = response.text 21 | for item in data.split("\n")[6:-1]: 22 | self.analyze(item.strip()) 23 | 24 | def analyze(self, item): 25 | val, descr = item.split(" # md5 - ") 26 | 27 | context = { 28 | "source": self.name, 29 | "description": descr, 30 | } 31 | 32 | obs = md5.MD5(value=val).save() 33 | obs.add_context(self.name, context) 34 | obs.tag(["botvrij"]) 35 | 36 | 37 | taskmanager.TaskManager.register_task(BotvrijMD5) 38 | -------------------------------------------------------------------------------- /plugins/feeds/public/botvrij_sha1.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from typing import ClassVar 3 | 4 | from core import taskmanager 5 | from core.schemas import task 6 | from core.schemas.observables import sha1 7 | 8 | 9 | class BotvrijSHA1(task.FeedTask): 10 | _SOURCE: ClassVar["str"] = "https://www.botvrij.eu/data/ioclist.sha1" 11 | _defaults = { 12 | "frequency": timedelta(hours=12), 13 | "name": "BotvrijSHA1", 14 | "description": "Botvrij.eu is a project of the Dutch National Cyber Security Centre (NCSC-NL) and SIDN Labs, the R&D team of SIDN, the registry for the .nl domain.", 15 | } 16 | 17 | def run(self): 18 | response = self._make_request(self._SOURCE) 19 | if response: 20 | data = response.text 21 | for item in data.split("\n")[6:-1]: 22 | self.analyze(item.strip()) 23 | 24 | def analyze(self, item): 25 | val, descr = item.split(" # sha1 - ") 26 | 27 | context = { 28 | "source": self.name, 29 | "description": descr, 30 | } 31 | 32 | obs = sha1.SHA1(value=val).save() 33 | obs.add_context(self.name, context) 34 | obs.tag(["botvrij"]) 35 | 36 | 37 | taskmanager.TaskManager.register_task(BotvrijSHA1) 38 | -------------------------------------------------------------------------------- /plugins/feeds/public/botvrij_sha256.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from typing import ClassVar 3 | 4 | from core import taskmanager 5 | from core.schemas import task 6 | from core.schemas.observables import sha256 7 | 8 | 9 | class BotvrijSHA256(task.FeedTask): 10 | _SOURCE: ClassVar["str"] = "https://www.botvrij.eu/data/ioclist.sha256" 11 | 12 | _defaults = { 13 | "frequency": timedelta(hours=12), 14 | "name": "BotvrijSHA256", 15 | "description": "Botvrij.eu is a project of the Dutch National Cyber Security Centre (NCSC-NL) and SIDN Labs, the R&D team of SIDN, the registry for the .nl domain.", 16 | } 17 | 18 | def run(self): 19 | response = self._make_request(self._SOURCE) 20 | if response: 21 | data = response.text 22 | for item in data.split("\n")[6:-1]: 23 | self.analyze(item.strip()) 24 | 25 | def analyze(self, item): 26 | val, descr = item.split(" # sha256 - ") 27 | 28 | context = { 29 | "source": self.name, 30 | "description": descr, 31 | } 32 | 33 | obs = sha256.SHA256(value=val).save() 34 | obs.add_context(self.name, context) 35 | obs.tag(["botvrij"]) 36 | 37 | 38 | taskmanager.TaskManager.register_task(BotvrijSHA256) 39 | -------------------------------------------------------------------------------- /plugins/feeds/public/botvrij_url.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from typing import ClassVar 3 | 4 | from core import taskmanager 5 | from core.schemas import task 6 | from core.schemas.observables import url 7 | 8 | 9 | class BotvrijUrl(task.FeedTask): 10 | _SOURCE: ClassVar["str"] = "https://www.botvrij.eu/data/ioclist.url" 11 | 12 | _defaults = { 13 | "frequency": timedelta(hours=12), 14 | "name": "BotvrijUrl", 15 | "description": "Botvrij.eu is a project of the Dutch National Cyber Security Centre (NCSC-NL) and SIDN Labs, the R&D team of SIDN, the registry for the .nl domain.", 16 | } 17 | 18 | def run(self): 19 | response = self._make_request(self._SOURCE) 20 | if response: 21 | data = response.text 22 | for item in data.split("\n")[6:-1]: 23 | self.analyze(item.strip()) 24 | 25 | def analyze(self, item): 26 | url_str, descr = item.split(" # url - ") 27 | 28 | context = { 29 | "source": self.name, 30 | "description": descr, 31 | } 32 | 33 | obs = url.Url(value=url_str).save() 34 | obs.add_context(self.name, context) 35 | obs.tag(["botvrij"]) 36 | 37 | 38 | taskmanager.TaskManager.register_task(BotvrijUrl) 39 | -------------------------------------------------------------------------------- /plugins/feeds/public/cisco_umbrella_top_domains.py: -------------------------------------------------------------------------------- 1 | import io 2 | from datetime import timedelta 3 | from typing import ClassVar 4 | 5 | from core import taskmanager 6 | from core.config.config import yeti_config 7 | from core.schemas import task 8 | from core.schemas.observables import hostname 9 | 10 | 11 | class CiscoUmbrellaTopDomains(task.FeedTask): 12 | _defaults = { 13 | "frequency": timedelta(hours=24), 14 | "name": "CloudflareTopDomains", 15 | "description": "Import Cloudflare top domains", 16 | } 17 | _SOURCE: ClassVar["str"] = ( 18 | "http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip" 19 | ) 20 | 21 | def run(self): 22 | top_domains = yeti_config.get("umbrella", "top_domains", 10000) 23 | response = self._make_request(self._SOURCE, sort=False) 24 | data = self._unzip_content(response.content) 25 | context = { 26 | "name": self.name, 27 | } 28 | feed = io.BytesIO(data) 29 | while top_domains > 0: 30 | line = feed.readline().decode("utf-8").strip() 31 | _, domain = line.split(",") 32 | hostname_obs = hostname.Hostname(value=domain).save() 33 | hostname_obs.add_context(self.name, context) 34 | hostname_obs.tag(["cisco_umbrella", "top_domain"]) 35 | top_domains -= 1 36 | 37 | 38 | taskmanager.TaskManager.register_task(CiscoUmbrellaTopDomains) 39 | -------------------------------------------------------------------------------- /plugins/feeds/public/cruzit.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from datetime import timedelta 3 | from typing import ClassVar 4 | 5 | from core import taskmanager 6 | from core.schemas import task 7 | from core.schemas.observables import ipv4 8 | 9 | 10 | class Cruzit(task.FeedTask): 11 | _SOURCE: ClassVar["str"] = ( 12 | "https://iplists.firehol.org/files/cruzit_web_attacks.ipset" 13 | ) 14 | 15 | _defaults = { 16 | "frequency": timedelta(hours=1), 17 | "name": "Cruzit", 18 | "description": "IP addresses that have been reported within the last 48 hours for attacks on the Service FTP, IMAP, Apache, Apache-DDOS, RFI-Attacks, and Web-Logins with Brute-Force Logins.", 19 | } 20 | 21 | def run(self): 22 | response = self._make_request(self._SOURCE) 23 | if response: 24 | data = response.text 25 | for line in data.split("\n")[63:]: 26 | self.analyze(line) 27 | 28 | def analyze(self, line): 29 | ip_str = line.strip() 30 | 31 | context = {"source": self.name} 32 | if ip_str: 33 | obs = ipv4.IPv4(value=ip_str).save() 34 | obs.add_context(self.name, context) 35 | obs.tag(["cruzit", "web attacks"]) 36 | logging.debug(f"Adding {ip_str} to cruzit feed") 37 | 38 | 39 | taskmanager.TaskManager.register_task(Cruzit) 40 | -------------------------------------------------------------------------------- /plugins/feeds/public/dataplane_dnsrd.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from typing import ClassVar 3 | 4 | import pandas as pd 5 | 6 | from core import taskmanager 7 | from core.schemas import task 8 | from core.schemas.observables import asn, ipv4 9 | 10 | 11 | class DataplaneDNSRecursive(task.FeedTask): 12 | """ 13 | Feed of Dataplane DNS Recursive IPs with ASN 14 | """ 15 | 16 | _SOURCE: ClassVar["str"] = "https://dataplane.org/dnsrd.txt" 17 | _defaults = { 18 | "frequency": timedelta(hours=12), 19 | "name": "DataplaneDNSRecursive", 20 | "description": "Feed of Dataplane DNS Recursive IPs with ASN", 21 | } 22 | _NAMES = [ 23 | "ASN", 24 | "ASname", 25 | "ipaddr", 26 | "lastseen", 27 | "category", 28 | ] 29 | 30 | def run(self): 31 | response = self._make_request(self._SOURCE, sort=False) 32 | if response: 33 | lines = response.content.decode("utf-8").split("\n")[64:-5] 34 | df = pd.DataFrame([line.split("|") for line in lines], columns=self._NAMES) 35 | df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x) 36 | df["lastseen"] = pd.to_datetime(df["lastseen"]) 37 | df.ffill(inplace=True) 38 | df = self._filter_observables_by_time(df, "lastseen") 39 | for _, row in df.iterrows(): 40 | self.analyze(row) 41 | 42 | def analyze(self, item): 43 | if not item["ipaddr"]: 44 | return 45 | 46 | context_ip = { 47 | "source": self.name, 48 | "last_seen": item["lastseen"], 49 | } 50 | 51 | ip_obs = ipv4.IPv4(value=item["ipaddr"]).save() 52 | category = item["category"].lower() 53 | tags = ["dataplane", "dnsrd"] 54 | if category: 55 | tags.append(category) 56 | ip_obs.add_context(self.name, context_ip) 57 | ip_obs.tag(tags) 58 | 59 | asn_obs = asn.ASN(value=item["ASN"]).save() 60 | context_asn = { 61 | "source": self.name, 62 | "name": item["ASname"], 63 | "last_seen": item["lastseen"], 64 | } 65 | asn_obs.add_context(self.name, context_asn) 66 | asn_obs.tag(tags) 67 | 68 | asn_obs.link_to(ip_obs, "ASN_IP", self.name) 69 | 70 | 71 | taskmanager.TaskManager.register_task(DataplaneDNSRecursive) 72 | -------------------------------------------------------------------------------- /plugins/feeds/public/dataplane_dnsrdany.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from typing import ClassVar 3 | 4 | import pandas as pd 5 | 6 | from core import taskmanager 7 | from core.schemas import task 8 | from core.schemas.observables import asn, ipv4 9 | 10 | 11 | class DataplaneDNSAny(task.FeedTask): 12 | """ 13 | Feed of dataplane dns any Ips with ASN 14 | """ 15 | 16 | _SOURCE: ClassVar["str"] = "https://dataplane.org/dnsrdany.txt" 17 | _defaults = { 18 | "frequency": timedelta(hours=12), 19 | "name": "DataplaneDNSAny", 20 | "description": "Feed of dataplane dns any Ips with ASN", 21 | } 22 | _NAMES = ["ASN", "ASname", "ipaddr", "lastseen", "category"] 23 | 24 | def run(self): 25 | response = self._make_request(self._SOURCE, sort=False) 26 | if response: 27 | lines = response.content.decode("utf-8").split("\n")[64:-5] 28 | 29 | df = pd.DataFrame([line.split("|") for line in lines], columns=self._NAMES) 30 | df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x) 31 | df = df.dropna() 32 | df["lastseen"] = pd.to_datetime(df["lastseen"]) 33 | df.ffill(inplace=True) 34 | 35 | df = self._filter_observables_by_time(df, "lastseen") 36 | 37 | for _, row in df.iterrows(): 38 | self.analyze(row) 39 | 40 | def analyze(self, item): 41 | if not item["ipaddr"]: 42 | return 43 | context_ip = { 44 | "source": self.name, 45 | "last_seen": item["lastseen"], 46 | } 47 | ip_obs = ipv4.IPv4(value=item["ipaddr"]).save() 48 | category = item["category"].lower() 49 | tags = ["dataplane", "dnsany"] 50 | if category: 51 | tags.append(category) 52 | ip_obs.add_context(self.name, context_ip) 53 | ip_obs.tag(tags) 54 | 55 | asn_obs = asn.ASN(value=item["ASN"]).save() 56 | context_asn = { 57 | "source": self.name, 58 | "name": item["ASname"], 59 | "last_seen": item["lastseen"], 60 | } 61 | asn_obs.add_context(self.name, context_asn) 62 | asn_obs.tag(["dataplane", "dnsany"]) 63 | asn_obs.tag(tags) 64 | 65 | asn_obs.link_to(ip_obs, "ASN_IP", self.name) 66 | 67 | 68 | taskmanager.TaskManager.register_task(DataplaneDNSAny) 69 | -------------------------------------------------------------------------------- /plugins/feeds/public/dataplane_dnsversion.py: -------------------------------------------------------------------------------- 1 | """ 2 | Feed DNS Version IPs with ASN 3 | """ 4 | 5 | import logging 6 | from datetime import timedelta 7 | from typing import ClassVar 8 | 9 | import pandas as pd 10 | 11 | from core import taskmanager 12 | from core.schemas import task 13 | from core.schemas.observables import asn, ipv4 14 | 15 | 16 | class DataplaneDNSVersion(task.FeedTask): 17 | """ 18 | Feed DNS Version IPs with ASN 19 | """ 20 | 21 | _SOURCE: ClassVar["str"] = "https://dataplane.org/dnsversion.txt" 22 | _defaults = { 23 | "frequency": timedelta(hours=12), 24 | "name": "DataplaneDNSVersion", 25 | "description": "Feed DNS Version IPs with ASN", 26 | } 27 | _NAMES = ["ASN", "ASname", "ipaddr", "lastseen", "category"] 28 | 29 | def run(self): 30 | response = self._make_request(self._SOURCE, sort=False) 31 | if response: 32 | lines = response.content.decode("utf-8").split("\n")[66:-5] 33 | 34 | df = pd.DataFrame([line.split("|") for line in lines], columns=self._NAMES) 35 | 36 | df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x) 37 | df.ffill(inplace=True) 38 | df["lastseen"] = pd.to_datetime(df["lastseen"]) 39 | df = self._filter_observables_by_time(df, "lastseen") 40 | 41 | for _, row in df.iterrows(): 42 | self.analyze(row) 43 | 44 | def analyze(self, item): 45 | if not item["ipaddr"]: 46 | return 47 | 48 | context_ip = { 49 | "source": self.name, 50 | } 51 | 52 | ip_obs = ipv4.IPv4(value=item["ipaddr"]).save() 53 | category = item["category"].lower() 54 | tags = ["dataplane", "dnsversion"] 55 | if category: 56 | tags.append(category) 57 | logging.debug(f"Adding context {context_ip} to {ip_obs}") 58 | ip_obs.add_context("dataplane dns version", context_ip) 59 | ip_obs.tag(tags) 60 | asn_obs = asn.ASN(value=item["ASN"]).save() 61 | context_asn = { 62 | "source": self.name, 63 | "name": item["ASname"], 64 | "last_seen": item["lastseen"], 65 | } 66 | asn_obs.add_context(self.name, context_asn) 67 | asn_obs.tag(tags) 68 | 69 | asn_obs.link_to(ip_obs, "ASN_IP", self.name) 70 | 71 | 72 | taskmanager.TaskManager.register_task(DataplaneDNSVersion) 73 | -------------------------------------------------------------------------------- /plugins/feeds/public/dataplane_sipquery.py: -------------------------------------------------------------------------------- 1 | """ 2 | Feed of SIPs from Dataplane with IPs and ASNs 3 | """ 4 | 5 | from datetime import timedelta 6 | from typing import ClassVar 7 | 8 | import pandas as pd 9 | 10 | from core import taskmanager 11 | from core.schemas import task 12 | from core.schemas.observables import asn, ipv4 13 | 14 | 15 | class DataplaneSIPQuery(task.FeedTask): 16 | """ 17 | Feed of SIPs from Dataplane with IPs and ASNs 18 | """ 19 | 20 | _SOURCE: ClassVar["str"] = "https://dataplane.org/sipquery.txt" 21 | _defaults = { 22 | "frequency": timedelta(hours=12), 23 | "name": "DataplaneSIPQuery", 24 | "description": "Feed of SIPs from Dataplane with IPs and ASNs", 25 | } 26 | 27 | def run(self): 28 | response = self._make_request(self._SOURCE, sort=False) 29 | if response: 30 | lines = response.content.decode("utf-8").split("\n")[66:-5] 31 | columns = ["ASN", "ASname", "ipaddr", "lastseen", "category"] 32 | df = pd.DataFrame([line.split("|") for line in lines], columns=columns) 33 | df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x) 34 | df["lastseen"] = pd.to_datetime(df["lastseen"]) 35 | df.ffill(inplace=True) 36 | df = self._filter_observables_by_time(df, "lastseen") 37 | for _, row in df.iterrows(): 38 | self.analyze(row) 39 | 40 | def analyze(self, item): 41 | if not item["ipaddr"]: 42 | return 43 | 44 | context_ip = { 45 | "source": self.name, 46 | "last_seen": item["lastseen"], 47 | } 48 | ip_obs = ipv4.IPv4(value=item["ipaddr"]).save() 49 | category = item["category"].lower() 50 | tags = ["dataplane", "sipquery"] 51 | if category: 52 | tags.append(category) 53 | ip_obs.add_context("dataplane sip query", context_ip) 54 | ip_obs.tag(tags) 55 | 56 | asn_obs = asn.ASN(value=item["ASN"]).save() 57 | context_asn = { 58 | "source": self.name, 59 | "name": item["ASname"], 60 | "last_seen": item["lastseen"], 61 | } 62 | 63 | asn_obs.add_context(self.name, context_asn) 64 | asn_obs.tag(tags) 65 | 66 | asn_obs.link_to(ip_obs, "ASN_IP", self.name) 67 | 68 | 69 | taskmanager.TaskManager.register_task(DataplaneSIPQuery) 70 | -------------------------------------------------------------------------------- /plugins/feeds/public/dataplane_sipregistr.py: -------------------------------------------------------------------------------- 1 | """ 2 | Feed of SIP registr with IPs and ASNs 3 | """ 4 | 5 | import logging 6 | from datetime import timedelta 7 | from typing import ClassVar 8 | 9 | import pandas as pd 10 | 11 | from core import taskmanager 12 | from core.schemas import task 13 | from core.schemas.observables import asn, ipv4 14 | 15 | 16 | class DataplaneSIPRegistr(task.FeedTask): 17 | """ 18 | Feed of SIP registr with IPs and ASNs 19 | """ 20 | 21 | _SOURCE: ClassVar["str"] = "https://dataplane.org/sipregistration.txt" 22 | _defaults = { 23 | "frequency": timedelta(hours=12), 24 | "name": "DataplaneSIPRegistr", 25 | "description": "Feed of SIP registr with IPs and ASNs", 26 | } 27 | _NAMES = ["ASN", "ASname", "ipaddr", "lastseen", "category"] 28 | 29 | def run(self): 30 | response = self._make_request(self._SOURCE, sort=False) 31 | if response: 32 | lines = response.content.decode("utf-8").split("\n")[66:-5] 33 | 34 | df = pd.DataFrame([line.split("|") for line in lines], columns=self._NAMES) 35 | df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x) 36 | df["lastseen"] = pd.to_datetime(df["lastseen"]) 37 | df.ffill(inplace=True) 38 | df = self._filter_observables_by_time(df, "lastseen") 39 | for _, row in df.iterrows(): 40 | self.analyze(row) 41 | 42 | def analyze(self, item): 43 | if not item["ipaddr"]: 44 | return 45 | 46 | context_ip = { 47 | "source": "dataplane sip registr", 48 | } 49 | ip_obs = ipv4.IPv4(value=item["ipaddr"]).save() 50 | 51 | category = item["category"].lower() 52 | tags = ["dataplane", "sipregistr"] 53 | if category: 54 | tags.append(category) 55 | logging.debug(f"Adding context {context_ip} to {ip_obs}") 56 | ip_obs.add_context("dataplane sip registr", context_ip) 57 | ip_obs.tag(tags) 58 | 59 | asn_obs = asn.ASN(value=item["ASN"]).save() 60 | 61 | context_asn = { 62 | "source": self.name, 63 | "name": item["ASname"], 64 | } 65 | 66 | asn_obs.add_context("dataplane sip registr", context_asn) 67 | asn_obs.tag(tags) 68 | 69 | asn_obs.link_to(ip_obs, "ASN to IP", self.name) 70 | 71 | 72 | taskmanager.TaskManager.register_task(DataplaneSIPRegistr) 73 | -------------------------------------------------------------------------------- /plugins/feeds/public/dataplane_smtpgreet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Feed of SMTP greetings from dataplane with IPs and ASN 3 | """ 4 | 5 | from datetime import timedelta 6 | from typing import ClassVar 7 | 8 | import pandas as pd 9 | 10 | from core import taskmanager 11 | from core.schemas import task 12 | from core.schemas.observables import asn, ipv4 13 | 14 | 15 | class DataplaneSMTPGreet(task.FeedTask): 16 | """ 17 | Feed of SMTP greetings from dataplane with IPs and ASN 18 | """ 19 | 20 | _SOURCE: ClassVar["str"] = "https://dataplane.org/smtpgreet.txt" 21 | _defaults = { 22 | "frequency": timedelta(hours=12), 23 | "name": "DataplaneSMTPGreet", 24 | "description": "Feed of SMTP greetings from dataplane with IPs and ASN", 25 | } 26 | _NAMES = ["ASN", "ASname", "ipaddr", "lastseen", "category"] 27 | 28 | def run(self): 29 | response = self._make_request(self._SOURCE, sort=False) 30 | if response: 31 | lines = response.content.decode("utf-8").split("\n")[68:-5] 32 | 33 | df = pd.DataFrame([line.split("|") for line in lines], columns=self._NAMES) 34 | df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x) 35 | df["lastseen"] = pd.to_datetime(df["lastseen"]) 36 | df.ffill(inplace=True) 37 | df = self._filter_observables_by_time(df, "lastseen") 38 | for _, row in df.iterrows(): 39 | self.analyze(row) 40 | 41 | def analyze(self, item): 42 | if not item["ipaddr"]: 43 | return 44 | 45 | context_ip = { 46 | "source": self.name, 47 | } 48 | 49 | ip_obs = ipv4.IPv4(value=item["ipaddr"]).save() 50 | category = item["category"].lower() 51 | tags = ["dataplane", "smtpgreet", "smtp", "scanning"] 52 | if category: 53 | tags.append(category) 54 | ip_obs.add_context(self.name, context_ip) 55 | ip_obs.tag(tags) 56 | 57 | asn_obs = asn.ASN(value=item["ASN"]).save() 58 | 59 | context_asn = { 60 | "source": self.name, 61 | "name": item["ASname"], 62 | } 63 | asn_obs.add_context(self.name, context_asn) 64 | asn_obs.tag(tags) 65 | asn_obs.link_to(ip_obs, "ASN to IP", self.name) 66 | 67 | 68 | taskmanager.TaskManager.register_task(DataplaneSMTPGreet) 69 | -------------------------------------------------------------------------------- /plugins/feeds/public/dataplane_sshclient.py: -------------------------------------------------------------------------------- 1 | """ 2 | Feed of ssh client bruteforce of Dataplane with IPs and ASNs 3 | """ 4 | 5 | from datetime import timedelta 6 | from typing import ClassVar 7 | 8 | import pandas as pd 9 | 10 | from core import taskmanager 11 | from core.schemas import task 12 | from core.schemas.observables import asn, ipv4 13 | 14 | 15 | class DataplaneSSHClient(task.FeedTask): 16 | """ 17 | Feed of ssh client bruteforce of Dataplane with IPs and ASNs. 18 | """ 19 | 20 | _SOURCE: ClassVar["str"] = "https://dataplane.org/sshclient.txt" 21 | _defaults = { 22 | "frequency": timedelta(hours=12), 23 | "name": "DataplaneSSHClient", 24 | "description": "Feed of ssh client bruteforce of Dataplane with IPs and ASNs", 25 | } 26 | _NAMES = ["ASN", "ASname", "ipaddr", "lastseen", "category"] 27 | 28 | def run(self): 29 | response = self._make_request(self._SOURCE, sort=False) 30 | if response: 31 | lines = response.content.decode("utf-8").split("\n")[64:-5] 32 | df = pd.DataFrame([line.split("|") for line in lines], columns=self._NAMES) 33 | df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x) 34 | df["lastseen"] = pd.to_datetime(df["lastseen"]) 35 | df.ffill(inplace=True) 36 | df = self._filter_observables_by_time(df, "lastseen") 37 | 38 | for _, row in df.iterrows(): 39 | self.analyze(row) 40 | 41 | def analyze(self, item): 42 | if not item["ipaddr"]: 43 | return 44 | 45 | context_ip = { 46 | "source": self.name, 47 | } 48 | 49 | ip_obs = ipv4.IPv4(value=item["ipaddr"]).save() 50 | category = item["category"].lower() 51 | tags = ["dataplane", "bruteforce", "ssh", "scanning"] 52 | if category: 53 | tags.append(category) 54 | ip_obs.add_context(self.name, context_ip) 55 | ip_obs.tag(tags) 56 | 57 | asn_obs = asn.ASN(value=item["ASN"]).save() 58 | context_asn = { 59 | "source": self.name, 60 | } 61 | asn_obs.add_context(self.name, context_asn) 62 | asn_obs.tag(tags) 63 | asn_obs.link_to(ip_obs, "ASN_IP", self.name) 64 | 65 | 66 | taskmanager.TaskManager.register_task(DataplaneSSHClient) 67 | -------------------------------------------------------------------------------- /plugins/feeds/public/dataplane_sshpwauth.py: -------------------------------------------------------------------------------- 1 | """ 2 | Feed of Dataplane SSH bruteforce IPs and ASNs 3 | """ 4 | 5 | from datetime import timedelta 6 | from typing import ClassVar 7 | 8 | import pandas as pd 9 | 10 | from core import taskmanager 11 | from core.schemas import task 12 | from core.schemas.observables import asn, ipv4 13 | 14 | 15 | class DataplaneSSHPwAuth(task.FeedTask): 16 | """ 17 | Feed of Dataplane SSH bruteforce IPs and ASNs 18 | """ 19 | 20 | _SOURCE: ClassVar["str"] = "https://dataplane.org/sshpwauth.txt" 21 | _defaults = { 22 | "frequency": timedelta(hours=12), 23 | "name": "DataplaneSSHPwAuth", 24 | "description": "Feed of Dataplane SSH bruteforce IPs and ASNs", 25 | } 26 | _NAMES = ["ASN", "ASname", "ipaddr", "lastseen", "category"] 27 | 28 | def run(self): 29 | response = self._make_request(self._SOURCE, sort=False) 30 | if response: 31 | lines = response.content.decode("utf-8").split("\n")[68:-5] 32 | df = pd.DataFrame([line.split("|") for line in lines], columns=self._NAMES) 33 | df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x) 34 | df = df.dropna() 35 | 36 | df["lastseen"] = pd.to_datetime(df["lastseen"]) 37 | df.ffill(inplace=True) 38 | df = self._filter_observables_by_time(df, "lastseen") 39 | for _, row in df.iterrows(): 40 | self.analyze(row) 41 | 42 | def analyze(self, item): 43 | if not item["ipaddr"]: 44 | return 45 | 46 | context_ip = { 47 | "source": self.name, 48 | } 49 | 50 | ip_obs = ipv4.IPv4(value=item["ipaddr"]).save() 51 | category = item["category"].lower() 52 | tags = ["dataplane", "bruteforce", "ssh", "scanning"] 53 | if category: 54 | tags.append(category) 55 | ip_obs.add_context(self.name, context_ip) 56 | ip_obs.tag(tags) 57 | 58 | asn_obs = asn.ASN(value=item["ASN"]).save() 59 | context_asn = { 60 | "source": self.name, 61 | } 62 | asn_obs.add_context(self.name, context_asn) 63 | asn_obs.tag(tags) 64 | 65 | asn_obs.link_to(ip_obs, "ASN_IP", self.name) 66 | 67 | 68 | taskmanager.TaskManager.register_task(DataplaneSSHPwAuth) 69 | -------------------------------------------------------------------------------- /plugins/feeds/public/dataplane_telnetlogin.py: -------------------------------------------------------------------------------- 1 | """ 2 | Feed of Dataplane SSH bruteforce IPs and ASNs 3 | """ 4 | 5 | from datetime import timedelta 6 | from typing import ClassVar 7 | 8 | import pandas as pd 9 | 10 | from core import taskmanager 11 | from core.schemas import task 12 | from core.schemas.observables import asn, ipv4 13 | 14 | 15 | class DataplaneTelenetLogin(task.FeedTask): 16 | """ 17 | Feed of telnet login attempt of dataplane IPs and ASNs 18 | """ 19 | 20 | _SOURCE: ClassVar["str"] = "https://dataplane.org/telnetlogin.txt" 21 | _defaults = { 22 | "frequency": timedelta(hours=12), 23 | "name": "DataplaneTelnetLogin", 24 | "description": "Feed of telnet login attempt of dataplane IPs and ASNs", 25 | } 26 | _NAMES = ["ASN", "ASname", "ipaddr", "lastseen", "category"] 27 | 28 | def run(self): 29 | response = self._make_request(self._SOURCE, sort=False) 30 | if response: 31 | lines = response.content.decode("utf-8").split("\n")[64:-5] 32 | 33 | df = pd.DataFrame([line.split("|") for line in lines], columns=self._NAMES) 34 | df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x) 35 | df["lastseen"] = pd.to_datetime(df["lastseen"]) 36 | df.ffill(inplace=True) 37 | df = self._filter_observables_by_time(df, "lastseen") 38 | for _, row in df.iterrows(): 39 | self.analyze(row) 40 | 41 | def analyze(self, item): 42 | if not item["ipaddr"]: 43 | return 44 | 45 | context_ip = { 46 | "source": self.name, 47 | } 48 | 49 | ip_obs = ipv4.IPv4(value=item["ipaddr"]).save() 50 | category = item["category"].lower() 51 | tags = ["dataplane", "bruteforce", "telnet", "scanning"] 52 | if category: 53 | tags.append(category) 54 | ip_obs.add_context(self.name, context_ip) 55 | ip_obs.tag(tags) 56 | 57 | asn_obs = asn.ASN(value=item["ASN"]).save() 58 | context_asn = { 59 | "source": self.name, 60 | } 61 | asn_obs.add_context(self.name, context_asn) 62 | asn_obs.tag(tags) 63 | 64 | asn_obs.link_to(ip_obs, "ASN_IP", self.name) 65 | 66 | 67 | taskmanager.TaskManager.register_task(DataplaneTelenetLogin) 68 | -------------------------------------------------------------------------------- /plugins/feeds/public/dataplane_vnc.py: -------------------------------------------------------------------------------- 1 | """ 2 | Feed of Dataplane SSH bruteforce IPs and ASNs 3 | """ 4 | 5 | from datetime import timedelta 6 | from typing import ClassVar 7 | 8 | import pandas as pd 9 | 10 | from core import taskmanager 11 | from core.schemas import task 12 | from core.schemas.observables import asn, ipv4 13 | 14 | 15 | class DataplaneVNC(task.FeedTask): 16 | """ 17 | Feed of VNC dataplane IPs. 18 | """ 19 | 20 | _SOURCE: ClassVar["str"] = "https://dataplane.org/vncrfb.txt" 21 | _NAMES = ["ASN", "ASname", "ipaddr", "lastseen", "category"] 22 | _defaults = { 23 | "frequency": timedelta(hours=12), 24 | "name": "DataplaneVNC", 25 | "description": "Feed of VNC dataplane IPs.", 26 | } 27 | 28 | def run(self): 29 | response = self._make_request(self._SOURCE, sort=False) 30 | if response: 31 | lines = response.content.decode("utf-8").split("\n")[68:-5] 32 | 33 | df = pd.DataFrame([line.split("|") for line in lines], columns=self._NAMES) 34 | df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x) 35 | df["lastseen"] = pd.to_datetime(df["lastseen"]) 36 | df.ffill(inplace=True) 37 | df = self._filter_observables_by_time(df, "lastseen") 38 | for _, row in df.iterrows(): 39 | self.analyze(row) 40 | 41 | def analyze(self, item): 42 | if not item["ipaddr"]: 43 | return 44 | 45 | context_ip = { 46 | "source": self.name, 47 | } 48 | 49 | ip_obs = ipv4.IPv4(value=item["ipaddr"]).save() 50 | 51 | category = item["category"].lower() 52 | tags = ["dataplane", "vnc", "scanning"] 53 | if category: 54 | tags.append(category) 55 | ip_obs.add_context(self.name, context_ip) 56 | ip_obs.tag(tags) 57 | 58 | asn_obs = asn.ASN(value=item["ASN"]).save() 59 | context_asn = { 60 | "source": self.name, 61 | } 62 | asn_obs.add_context(self.name, context_asn) 63 | asn_obs.tag(tags) 64 | asn_obs.link_to(ip_obs, "ASN_IP", self.name) 65 | 66 | 67 | taskmanager.TaskManager.register_task(DataplaneVNC) 68 | -------------------------------------------------------------------------------- /plugins/feeds/public/deprecated/benkowcc.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta, datetime 2 | 3 | from core.schemas.observables import ipv4 4 | from core.schemas import task 5 | from core import taskmanager 6 | 7 | 8 | class BenkowTracker(task.FeedTask): 9 | URL_FEED = "https://benkow.cc/export_csv.php" 10 | _defaults = { 11 | "frequency": timedelta(hours=1), 12 | "name": "BenkowTracker", 13 | "description": "This feed contains known Malware C2 servers", 14 | } 15 | 16 | def update(self): 17 | for index, line in self.update_csv(filter_row="date", delimiter=";", header=0): 18 | self.analyze(line) 19 | 20 | def analyze(self, line): 21 | url_obs = False 22 | url = line["url"] 23 | ip = line["ip"] 24 | family = line["type"] 25 | context = {} 26 | context["first_seen"] = line["date"] 27 | context["source"] = self.name 28 | context["date_added"] = datetime.utcnow() 29 | tags = [] 30 | tags.append(family.lower()) 31 | 32 | url_obs = url.Url(value=url).save() 33 | url_obs.add_context(self.name, context) 34 | url_obs.tag(tags) 35 | 36 | ip_obs = ipv4.IPv4(value=ip).save() 37 | ip_obs.add_context(self.name, context) 38 | url_obs.link_to(ip_obs, "url-ip", self.name) 39 | 40 | 41 | taskmanager.TaskManager.register_task(BenkowTracker) 42 | -------------------------------------------------------------------------------- /plugins/feeds/public/deprecated/cybercrimeatmtracker.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from datetime import datetime, timedelta 3 | 4 | from pytz import timezone 5 | 6 | from core.common.utils import parse_date_to_utc 7 | from core.errors import ObservableValidationError 8 | from core.feed import Feed 9 | from core.observables import Hash 10 | 11 | 12 | class CybercrimeAtmTracker(Feed): 13 | default_values = { 14 | "frequency": timedelta(hours=1), 15 | "name": "CybercrimeAtmTracker", 16 | "source": "http://atm.cybercrime-tracker.net/rss.php", 17 | "description": "CyberCrime ATM Tracker - Latest 40 CnC URLS", 18 | } 19 | 20 | def update(self): 21 | since_last_run = datetime.now(timezone("UTC")) - self.frequency 22 | 23 | for item in self.update_xml( 24 | "item", ["title", "link", "pubDate", "description"] 25 | ): 26 | pub_date = parse_date_to_utc(item["pubDate"]) 27 | if self.last_run is not None: 28 | if since_last_run > pub_date: 29 | continue 30 | 31 | self.analyze(item, pub_date) 32 | 33 | def analyze(self, item, pub_date): # pylint: disable=arguments-differ 34 | observable_sample = item["title"] 35 | context_sample = {} 36 | context_sample["description"] = "ATM sample" 37 | context_sample["first_seen"] = pub_date 38 | context_sample["source"] = self.name 39 | context_sample["date_added"] = datetime.utcnow() 40 | family = False 41 | if " - " in observable_sample: 42 | family, observable_sample = observable_sample.split(" - ") 43 | 44 | try: 45 | sample = Hash.get_or_create(value=observable_sample) 46 | sample.add_context(context_sample, dedup_list=["date_added"]) 47 | sample.add_source(self.name) 48 | sample_tags = ["atm"] 49 | if family: 50 | sample_tags.append(family) 51 | sample.tag(sample_tags) 52 | except ObservableValidationError as e: 53 | logging.error(e) 54 | return 55 | -------------------------------------------------------------------------------- /plugins/feeds/public/deprecated/dynamic_dns.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from datetime import datetime, timedelta 3 | 4 | from core.errors import ObservableValidationError 5 | from core.feed import Feed 6 | from core.observables import Hostname 7 | 8 | 9 | class DynamicDomains(Feed): 10 | default_values = { 11 | "frequency": timedelta(hours=24), 12 | "name": "DynamicDomains", 13 | "source": "http://mirror1.malwaredomains.com/files/dynamic_dns.txt", 14 | "description": "Malwaredomains.com Dynamic Domains list", 15 | } 16 | 17 | def update(self): 18 | for line in self.update_lines(): 19 | if line.startswith("#"): 20 | continue 21 | 22 | self.analyze(line) 23 | 24 | def analyze(self, item): 25 | item = item.strip() 26 | logging.debug(item) 27 | sline = item.split() 28 | 29 | hostname = sline[0] 30 | 31 | context = {} 32 | context["source"] = self.name 33 | context["provider"] = sline[0] 34 | context["date_added"] = datetime.utcnow() 35 | 36 | try: 37 | hostname = Hostname.get_or_create(value=hostname) 38 | hostname.add_context(context, dedup_list=["date_added"]) 39 | hostname.add_source(self.name) 40 | hostname.tag("dyndns") 41 | except ObservableValidationError: 42 | pass 43 | -------------------------------------------------------------------------------- /plugins/feeds/public/deprecated/ipspamlist.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from datetime import timedelta, datetime 3 | 4 | from dateutil import parser 5 | 6 | from core import Feed 7 | from core.errors import ObservableValidationError 8 | from core.observables import Ip 9 | 10 | 11 | class IPSpamList(Feed): 12 | default_values = { 13 | "frequency": timedelta(days=1), 14 | "name": "IPSpamList", 15 | "source": "http://www.ipspamlist.com/public_feeds.csv", 16 | "description": "Service provided by NoVirusThanks that keeps track of malicious " 17 | "IP addresses engaged in hacking attempts, spam comments", 18 | } 19 | 20 | def update(self): 21 | for index, line in self.update_csv(delimiter=",", filter_row="first_seen"): 22 | self.analyze(line) 23 | 24 | def analyze(self, item): 25 | context = { 26 | "source": self.name, 27 | "threat": item["category"], 28 | "first_seen": item["first_seen"], 29 | "last_seen": parser.parse(item["last_seen"]), 30 | "attack_count": item["attacks_count"], 31 | "date_added": datetime.utcnow(), 32 | } 33 | ip_address = item["ip_address"] 34 | try: 35 | ip_obs = Ip.get_or_create(value=ip_address) 36 | ip_obs.tag(context["threat"]) 37 | ip_obs.add_source(self.name) 38 | ip_obs.add_context(context, dedup_list=["date_added"]) 39 | except ObservableValidationError as e: 40 | logging.error("Error in IP format %s %s" % (ip_address, e)) 41 | -------------------------------------------------------------------------------- /plugins/feeds/public/deprecated/malwaremustdiecncs.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from datetime import datetime, timedelta 3 | 4 | from core.errors import ObservableValidationError 5 | from core.feed import Feed 6 | from core.observables import Hostname 7 | 8 | 9 | class MalwareMustDieCncs(Feed): 10 | default_values = { 11 | "frequency": timedelta(hours=24), 12 | "name": "MalwareMustDieCncs", 13 | "source": "https://malwared.malwaremustdie.org/rss.php", 14 | "description": "List of cncs", 15 | } 16 | 17 | def update(self): 18 | since_last_run = datetime.now() - self.frequency 19 | 20 | for item in self.update_xml("item", ["title", "description"]): 21 | if self.last_run is not None: 22 | try: 23 | if ( 24 | datetime.strptime(item["description"], "%d/%b/%Y") 25 | < since_last_run 26 | ): 27 | continue 28 | except ValueError: 29 | if ( 30 | datetime.strptime(item["description"], "%d/%B/%Y") 31 | < since_last_run 32 | ): 33 | continue 34 | self.analyze(item["title"]) 35 | 36 | def analyze(self, cnc): 37 | try: 38 | cnc_data = Hostname.get_or_create(value=cnc) 39 | cnc_data.add_context( 40 | {"source": self.name, "date_added": datetime.utcnow()}, 41 | dedup_list=["date_added"], 42 | ) 43 | cnc_data.add_source(self.name) 44 | except ObservableValidationError as e: 45 | logging.error(e) 46 | -------------------------------------------------------------------------------- /plugins/feeds/public/deprecated/tweetlive.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from typing import ClassVar 3 | 4 | import pandas as pd 5 | 6 | from core import taskmanager 7 | from core.schemas import observable, task 8 | 9 | MAPPING = { 10 | "domain": observable.hostname.Hostname, 11 | "ip": observable.ipv4.IPv4, 12 | "sha256": observable.sha256.SHA256, 13 | "url": observable.url.Url, 14 | "md5": observable.md5.MD5, 15 | } 16 | 17 | 18 | class TweetLive(task.FeedTask): 19 | _defaults = { 20 | "frequency": timedelta(days=1), 21 | "name": "TweetLive", 22 | "description": "This feed contains IOCs parsed out of Tweets in https://twitter.com/i/lists/1423693426437001224", 23 | } 24 | _SOURCE: ClassVar["str"] = "https://api.tweetfeed.live/v1/today" 25 | 26 | def run(self): 27 | r = self._make_request(self._SOURCE, sort=False) 28 | if not r: 29 | raise ValueError("Error fetching data") 30 | 31 | data = r.json() 32 | 33 | if not data: 34 | raise ValueError("No data returned") 35 | 36 | df = pd.DataFrame(data) 37 | df.fillna("") 38 | df["date"] = pd.to_datetime(df["date"]) 39 | 40 | df = self._filter_observables_by_time(df, "date") 41 | 42 | for _, line in df.iterrows(): 43 | self.analyze(line) 44 | 45 | def analyze(self, item): 46 | obs_type = MAPPING.get(item["type"]) 47 | 48 | if not obs_type: 49 | raise ValueError(f"Observable type {item['type']} not supported") 50 | 51 | obs = obs_type(value=item["value"]).save() 52 | 53 | context = {} 54 | 55 | if item["tweet"]: 56 | context["tweet"] = item["tweet"] 57 | if item["user"]: 58 | context["user"] = item["user"] 59 | if item["tags"]: 60 | obs.tag(item["tags"]) 61 | 62 | if context: 63 | obs.add_context(self.name, context) 64 | 65 | 66 | taskmanager.TaskManager.register_task(TweetLive) 67 | -------------------------------------------------------------------------------- /plugins/feeds/public/dfiq.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import tempfile 4 | from datetime import timedelta 5 | from io import BytesIO 6 | from zipfile import ZipFile 7 | 8 | from core import taskmanager 9 | from core.config.config import yeti_config 10 | from core.schemas import dfiq, task 11 | 12 | 13 | class DFIQFeed(task.FeedTask): 14 | _defaults = { 15 | "name": "DFIQ Github repo", 16 | "frequency": timedelta(hours=1), 17 | "type": "feed", 18 | "description": "DFIQ feed", 19 | } 20 | 21 | def run(self): 22 | # move back to "https://github.com/google/dfiq/archive/refs/heads/main.zip" 23 | # once the changes have been merged. 24 | response = self._make_request( 25 | "https://github.com/tomchop/dfiq/archive/refs/heads/dfiq1.1.zip" 26 | ) 27 | if not response: 28 | logging.info("No response: skipping DFIQ update") 29 | return 30 | 31 | with tempfile.TemporaryDirectory() as tempdir: 32 | ZipFile(BytesIO(response.content)).extractall(path=tempdir) 33 | dfiq.read_from_data_directory( 34 | os.path.join(tempdir, "*", "dfiq", "data", "*", "*.yaml"), 35 | "DFIQFeed", 36 | overwrite=True, 37 | ) 38 | 39 | extra_dirs = yeti_config.get("dfiq", "extra_dirs") 40 | if not extra_dirs: 41 | return 42 | for directory in extra_dirs.split(","): 43 | logging.info("Processing extra directory %s", directory) 44 | dfiq.read_from_data_directory(directory, "DFIQFeed") 45 | 46 | 47 | taskmanager.TaskManager.register_task(DFIQFeed) 48 | -------------------------------------------------------------------------------- /plugins/feeds/public/elastic.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import json 3 | import logging 4 | import os 5 | import tempfile 6 | from datetime import timedelta 7 | from io import BytesIO 8 | from zipfile import ZipFile 9 | 10 | from core import taskmanager 11 | from core.schemas import indicator, task 12 | 13 | logger = logging.getLogger(__name__) 14 | logger.setLevel(logging.INFO) 15 | 16 | 17 | class Elastic(task.FeedTask): 18 | _defaults = { 19 | "name": "Elastic", 20 | "frequency": timedelta(days=1), 21 | "type": "feed", 22 | "description": "Collection of protection rules by Elastic Security: https://www.elastic.co/security/endpoint-security", 23 | } 24 | 25 | _SOURCE_ZIP = ( 26 | "https://github.com/elastic/protections-artifacts/archive/refs/heads/main.zip" 27 | ) 28 | 29 | def run(self): 30 | response = self._make_request(self._SOURCE_ZIP, no_cache=True) 31 | if not response: 32 | logging.info(f"No response: skipping {self.name} update") 33 | return 34 | 35 | with tempfile.TemporaryDirectory() as tempdir: 36 | ZipFile(BytesIO(response.content)).extractall(path=tempdir) 37 | 38 | rules_path = os.path.join( 39 | tempdir, "protections-artifacts-main", "yara", "rules" 40 | ) 41 | for file in glob.glob(f"{rules_path}/*.yar"): 42 | with open(file, "r") as f: 43 | rule = f.read() 44 | 45 | indicator.Yara.import_bulk_rules(rule, tags=["Elastic"]) 46 | 47 | 48 | taskmanager.TaskManager.register_task(Elastic) 49 | -------------------------------------------------------------------------------- /plugins/feeds/public/feodo_tracker_ip_blocklist.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from io import StringIO 3 | from typing import ClassVar 4 | 5 | import pandas as pd 6 | 7 | from core import taskmanager 8 | from core.schemas import task 9 | from core.schemas.observables import ipv4 10 | 11 | 12 | class FeodoTrackerIPBlockList(task.FeedTask): 13 | _SOURCE: ClassVar["str"] = "https://feodotracker.abuse.ch/downloads/ipblocklist.csv" 14 | 15 | _defaults = { 16 | "frequency": datetime.timedelta(hours=24), 17 | "name": "FeodoTrackerIPBlocklist", 18 | "source": "https://feodotracker.abuse.ch/downloads/ipblocklist.csv", 19 | "description": "Feodo Tracker IP Feed. This feed shows a full list C2s.", 20 | } 21 | 22 | def run(self): 23 | response = self._make_request(self._SOURCE) 24 | if response: 25 | data = response.text 26 | df = pd.read_csv( 27 | StringIO(data), 28 | comment="#", 29 | delimiter=",", 30 | quotechar='"', 31 | quoting=1, 32 | skipinitialspace=True, 33 | parse_dates=["first_seen_utc"], 34 | ) 35 | df = self._filter_observables_by_time(df, "first_seen_utc") 36 | df.ffill(inplace=True) 37 | for _, line in df.iterrows(): 38 | self.analyze(line) 39 | 40 | def analyze(self, item): 41 | tags = ["c2", "blocklist"] 42 | tags.append(item["malware"].lower()) 43 | 44 | context = { 45 | "first_seen": str(item["first_seen_utc"]), 46 | "last_online": item["last_online"], 47 | "c2_status": item["c2_status"], 48 | "port": item["dst_port"], 49 | } 50 | 51 | ip_observable = ipv4.IPv4(value=item["dst_ip"]).save() 52 | ip_observable.add_context(source=self.name, context=context) 53 | ip_observable.tag(tags) 54 | 55 | 56 | taskmanager.TaskManager.register_task(FeodoTrackerIPBlockList) 57 | -------------------------------------------------------------------------------- /plugins/feeds/public/openphish.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from datetime import timedelta 3 | from typing import ClassVar 4 | 5 | from core import taskmanager 6 | from core.schemas import observable, task 7 | 8 | 9 | class OpenPhish(task.FeedTask): 10 | # set default values for feed 11 | _SOURCE: ClassVar["str"] = ( 12 | "https://raw.githubusercontent.com/openphish/public_feed/refs/heads/main/feed.txt" 13 | ) 14 | _defaults = { 15 | "frequency": timedelta(hours=1), 16 | "name": "OpenPhish", 17 | "description": "OpenPhish is a community feed of phishing URLs which are updated every 24 hours.", 18 | } 19 | 20 | # run() is the main function that is called by the scheduler 21 | # it is the main entry point into the feed 22 | def run(self): 23 | # make a request to the feed URL 24 | response = self._make_request(self._SOURCE) 25 | if response: 26 | # iterate over the lines in the response and analyze each one 27 | for line in response.text.split("\n"): 28 | self.analyze(line) 29 | 30 | # don't need to do much here; want to add the information 31 | # and tag it with 'phish' 32 | def analyze(self, url_str): 33 | context = {"source": self.name} 34 | 35 | # check to see if the URL is already in the database 36 | # if it is, then we don't need to do anything 37 | # if it isn't, then we need to add it 38 | if not url_str: 39 | return 40 | try: 41 | obs = observable.save(type="url", value=url_str, tags=["phish"]) 42 | obs.add_context(self.name, context) 43 | except Exception: 44 | self.logger.exception(f"Failed to save URL: {url_str}") 45 | 46 | 47 | taskmanager.TaskManager.register_task(OpenPhish) 48 | -------------------------------------------------------------------------------- /plugins/feeds/public/phishing_database.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """This class will incorporate the PhishingDatabase feed into yeti.""" 3 | 4 | from datetime import timedelta 5 | from typing import ClassVar 6 | 7 | from core import taskmanager 8 | from core.schemas import task 9 | from core.schemas.observables import hostname 10 | 11 | 12 | class PhishingDatabase(task.FeedTask): 13 | """This class will incorporate the PhishingDatabase feed into yeti.""" 14 | 15 | _defaults = { 16 | "frequency": timedelta(hours=1), 17 | "name": "PhishingDatabase", 18 | "description": "PhishingDatabase is a community feed of phishing URLs which are updated every 24 hours.", 19 | } 20 | 21 | _SOURCE: ClassVar["str"] = ( 22 | "https://phishing.army/download/phishing_army_blocklist_extended.txt" 23 | ) 24 | 25 | def run(self): 26 | response = self._make_request(self._SOURCE) 27 | if response: 28 | for line in response.text.split("\n"): 29 | if not line.startswith("#"): 30 | self.analyze(line.strip()) 31 | 32 | def analyze(self, domain): 33 | if domain: 34 | obs = hostname.Hostname(value=domain).save() 35 | obs.add_context(self.name, {"source": self.name}) 36 | obs.tag(["phish", "phishing_database", "blocklist"]) 37 | 38 | 39 | taskmanager.TaskManager.register_task(PhishingDatabase) 40 | -------------------------------------------------------------------------------- /plugins/feeds/public/phishtank.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from io import StringIO 3 | from typing import ClassVar 4 | 5 | import pandas as pd 6 | 7 | from core import taskmanager 8 | from core.config.config import yeti_config 9 | from core.schemas import task 10 | from core.schemas.observables import url 11 | 12 | 13 | class PhishTank(task.FeedTask): 14 | _defaults = { 15 | "frequency": timedelta(hours=1), 16 | "name": "PhishTank", 17 | "description": "PhishTank is a collaborative clearing house for data and information about phishing on the Internet.", 18 | } 19 | 20 | _SOURCE: ClassVar["str"] = "http://data.phishtank.com/data/%s/online-valid.csv" 21 | 22 | # don't need to do much here; want to add the information 23 | # and tag it with 'phish' 24 | def run(self): 25 | key_phishtank = yeti_config.get("phishtank", "key") 26 | assert key_phishtank, "PhishTank key not configured in yeti.conf" 27 | 28 | response = self._make_request(self._SOURCE % key_phishtank) 29 | if response: 30 | data = response.text 31 | 32 | df = pd.read_csv( 33 | StringIO(data), 34 | delimiter=",", 35 | date_parser=lambda x: pd.to_datetime(x.rsplit("+", 1)[0]), 36 | comment=None, 37 | parse_dates=["submission_time"], 38 | ) 39 | df.ffill(inplace=True) 40 | 41 | df = self._filter_observables_by_time(df, "submission_time") 42 | for _, line in df.iterrows(): 43 | self.analyze(line) 44 | 45 | def analyze(self, line): 46 | tags = ["phishing", "phishtank"] 47 | 48 | url_str = line["url"] 49 | 50 | context = { 51 | "source": self.name, 52 | "phish_detail_url": line["phish_detail_url"], 53 | "submission_time": line["submission_time"], 54 | "verified": line["verified"], 55 | "verification_time": line["verification_time"], 56 | "online": line["online"], 57 | "target": line["target"], 58 | } 59 | 60 | if url_str is not None and url_str != "": 61 | url_obs = url.Url(value=url_str).save() 62 | url_obs.add_context(self.name, context) 63 | url_obs.tag(tags) 64 | 65 | 66 | taskmanager.TaskManager.register_task(PhishTank) 67 | -------------------------------------------------------------------------------- /plugins/feeds/public/random.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | 3 | from core import taskmanager 4 | from core.schemas import observable, task 5 | 6 | DATA = [ 7 | "hostname1.com", 8 | "hostname2.com", 9 | "hostname3.com", 10 | "hostname4.com", 11 | "hostname5.com", 12 | ] 13 | 14 | 15 | class Random(task.FeedTask): 16 | _defaults = { 17 | "frequency": timedelta(hours=1), 18 | "type": "feed", 19 | # "source": "https://bazaar.abuse.ch/export/csv/recent/", 20 | "description": "This feed contains md5/sha1/sha256", 21 | } 22 | 23 | def run(self): 24 | for item in DATA: 25 | print(item) 26 | observable.save(value=item) 27 | 28 | 29 | taskmanager.TaskManager.register_task(Random) 30 | -------------------------------------------------------------------------------- /plugins/feeds/public/rulezskbruteforceblocker.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from typing import ClassVar 3 | 4 | import pandas as pd 5 | from dateutil import parser 6 | 7 | from core import taskmanager 8 | from core.schemas import task 9 | from core.schemas.observables import ipv4 10 | 11 | 12 | class RulezSKBruteforceBlocker(task.FeedTask): 13 | _defaults = { 14 | "frequency": timedelta(hours=24), 15 | "name": "RulezSKBruteforceBlocker", 16 | "description": "This feed contains daily list of IPs from rules.sk", 17 | } 18 | 19 | _SOURCE: ClassVar["str"] = ( 20 | "http://danger.rulez.sk/projects/bruteforceblocker/blist.php" 21 | ) 22 | 23 | def run(self): 24 | r = self._make_request(self._SOURCE, headers={"User-Agent": "yeti-project"}) 25 | if r: 26 | data = [ 27 | line.split("\t") 28 | for line in r.text.split("\n") 29 | if not line.startswith("#") and line.strip() 30 | ] 31 | df = pd.DataFrame(data) 32 | df.drop([1, 3], axis=1, inplace=True) 33 | df.columns = ["ip", "last_report", "count", "id"] 34 | df["last_report"] = df["last_report"].str.replace("# ", "") 35 | df["last_report"] = df["last_report"].apply(lambda x: parser.parse(x)) 36 | 37 | df = self._filter_observables_by_time(df, "last_report") 38 | for _, row in df.iterrows(): 39 | self.analyze(row) 40 | 41 | def analyze(self, row): 42 | context = {} 43 | context["first_seen"] = row["last_report"] 44 | context["source"] = self.name 45 | context["count"] = row["count"] 46 | context["id"] = row["id"] 47 | 48 | ipobs = ipv4.IPv4(value=row["ip"]).save() 49 | ipobs.add_context(self.name, context) 50 | ipobs.tag(["bruteforceblocker", "blocklist", "rules.sk"]) 51 | 52 | 53 | taskmanager.TaskManager.register_task(RulezSKBruteforceBlocker) 54 | -------------------------------------------------------------------------------- /plugins/feeds/public/signaturebase.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import logging 3 | import os 4 | import tempfile 5 | from datetime import timedelta 6 | from io import BytesIO 7 | from zipfile import ZipFile 8 | 9 | from core import taskmanager 10 | from core.schemas import indicator, task 11 | 12 | 13 | class Neo23x0SignatureBase(task.FeedTask): 14 | _defaults = { 15 | "name": "Neo23x0 Signature base", 16 | "frequency": timedelta(days=1), 17 | "type": "feed", 18 | "description": "Gets Yara rules from the Neo23x0/signature-base GitHub repo.", 19 | } 20 | 21 | def run(self): 22 | response = self._make_request( 23 | "https://github.com/Neo23x0/signature-base/archive/refs/heads/master.zip" 24 | ) 25 | if not response: 26 | logging.info("No response: skipping Neo23x0 Signature base update") 27 | return 28 | 29 | with tempfile.TemporaryDirectory() as tempdir: 30 | ZipFile(BytesIO(response.content)).extractall(path=tempdir) 31 | rules_path = os.path.join(tempdir, "signature-base-master", "yara") 32 | 33 | for file in glob.glob(f"{rules_path}/*.yar"): 34 | with open(file, "r") as f: 35 | rule = f.read() 36 | 37 | indicator.Yara.import_bulk_rules( 38 | rule, tags=["Neo23x0", "signature-base"] 39 | ) 40 | 41 | 42 | taskmanager.TaskManager.register_task(Neo23x0SignatureBase) 43 | -------------------------------------------------------------------------------- /plugins/feeds/public/sslblacklist_ip.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from io import StringIO 3 | from typing import ClassVar 4 | 5 | import pandas as pd 6 | 7 | from core import taskmanager 8 | from core.schemas import task 9 | from core.schemas.observables import ipv4, url 10 | 11 | 12 | class SSLBlackListIP(task.FeedTask): 13 | _defaults = { 14 | "frequency": timedelta(hours=1), 15 | "name": "SSLBlackListIP", 16 | "description": "SSL Black List IP", 17 | } 18 | 19 | _SOURCE: ClassVar["str"] = "https://sslbl.abuse.ch/blacklist/sslipblacklist.csv" 20 | 21 | def run(self): 22 | response = self._make_request(self._SOURCE) 23 | if response: 24 | data = response.text 25 | names = names = ["Firstseen", "DstIP", "DstPort"] 26 | df = pd.read_csv( 27 | StringIO(data), 28 | comment="#", 29 | delimiter=",", 30 | names=names, 31 | quotechar='"', 32 | quoting=0, 33 | skipinitialspace=True, 34 | parse_dates=["Firstseen"], 35 | header=8, 36 | ) 37 | df.ffill(inplace=True) 38 | df = self._filter_observables_by_time(df, "Firstseen") 39 | 40 | for _, line in df.iterrows(): 41 | self.analyze(line) 42 | 43 | def analyze(self, line): 44 | first_seen = line["Firstseen"] 45 | dst_ip = line["DstIP"] 46 | ip_obs = False 47 | tags = ["potentially_malicious_infrastructure", "c2"] 48 | port = line["DstPort"] 49 | context = { 50 | "source": self.name, 51 | "first_seen": first_seen, 52 | } 53 | 54 | ip_obs = ipv4.IPv4(value=dst_ip).save() 55 | ip_obs.add_context(self.name, context) 56 | ip_obs.tag(tags) 57 | _url = "https://{dst_ip}:{port}/".format(dst_ip=dst_ip, port=port) 58 | 59 | url_obs = url.Url(value=_url).save() 60 | url_obs.add_context(self.name, context) 61 | url_obs.tag(tags) 62 | 63 | ip_obs.link_to(url_obs, "ip-url", self.name) 64 | 65 | 66 | taskmanager.TaskManager.register_task(SSLBlackListIP) 67 | -------------------------------------------------------------------------------- /plugins/feeds/public/sslblacklist_ja3.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from io import StringIO 3 | from typing import ClassVar 4 | 5 | import pandas as pd 6 | 7 | from core import taskmanager 8 | from core.schemas import task 9 | from core.schemas.observables import ja3 10 | 11 | 12 | class SSLBlacklistJA3(task.FeedTask): 13 | _defaults = { 14 | "frequency": timedelta(hours=1), 15 | "name": "SSLBlacklistJA3", 16 | "description": "This feed contains JA3 hashes of SSL by AbuseCH", 17 | } 18 | 19 | _SOURCE: ClassVar["str"] = "https://sslbl.abuse.ch/blacklist/ja3_fingerprints.csv" 20 | _NAMES = ["ja3_md5", "first_seen", "last_seen", "threat"] 21 | 22 | def run(self): 23 | response = self._make_request(self._SOURCE, auth=None, verify=True) 24 | if response: 25 | data = StringIO(response.text) 26 | 27 | df = pd.read_csv( 28 | data, 29 | delimiter=",", 30 | comment="#", 31 | names=self._NAMES, 32 | parse_dates=["first_seen", "last_seen"], 33 | ) 34 | df = self._filter_observables_by_time(df, "last_seen") 35 | df = df.fillna("") 36 | for _, row in df.iterrows(): 37 | self.analyze(row) 38 | 39 | def analyze(self, row): 40 | ja3_md5 = row["ja3_md5"] 41 | first_seen = row["first_seen"] 42 | last_seen = row["last_seen"] 43 | threat = row["threat"] 44 | 45 | ja3_obs = ja3.JA3(value=ja3_md5).save() 46 | 47 | context = {} 48 | context["first_seen"] = first_seen 49 | context["last_seen"] = last_seen 50 | 51 | ja3_obs.add_context(self.name, context) 52 | 53 | if threat: 54 | ja3_obs.tag([threat]) 55 | 56 | 57 | taskmanager.TaskManager.register_task(SSLBlacklistJA3) 58 | -------------------------------------------------------------------------------- /plugins/feeds/public/threatview_c2.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from datetime import timedelta 3 | from typing import ClassVar 4 | 5 | from core import taskmanager 6 | from core.schemas import observable, task 7 | 8 | 9 | class ThreatviewC2(task.FeedTask): 10 | _defaults = { 11 | "frequency": timedelta(hours=1), 12 | "name": "ThreatviewC2", 13 | "description": "This feed contains Cobalt Strike C2 IPs and Hostnames", 14 | } 15 | 16 | _SOURCE: ClassVar["str"] = ( 17 | "https://threatview.io/Downloads/High-Confidence-CobaltstrikeC2_IP_feed.txt" 18 | ) 19 | 20 | def run(self): 21 | response = self._make_request(self._SOURCE, sort=False) 22 | if response: 23 | lines = response.content.decode("utf-8").split("\n")[2:-1] 24 | for line in lines: 25 | self.analyze(line) 26 | 27 | def analyze(self, item): 28 | item = item.strip() 29 | 30 | context = {"source": self.name} 31 | tags = ["c2", "cobaltstrike"] 32 | 33 | try: 34 | obs = observable.save(value=item, tags=tags) 35 | obs.add_context(self.name, context) 36 | except ValueError as error: 37 | return logging.error(error) 38 | 39 | 40 | taskmanager.TaskManager.register_task(ThreatviewC2) 41 | -------------------------------------------------------------------------------- /plugins/feeds/public/tranco_top_domains.py: -------------------------------------------------------------------------------- 1 | import io 2 | import logging 3 | from datetime import timedelta 4 | from typing import ClassVar 5 | 6 | from core import taskmanager 7 | from core.config.config import yeti_config 8 | from core.schemas import task 9 | from core.schemas.observables import hostname 10 | 11 | 12 | class TrancoTopDomains(task.FeedTask): 13 | _defaults = { 14 | "frequency": timedelta(hours=24), 15 | "name": "TrancoTopDomains", 16 | "description": "Import Tranco top domains", 17 | } 18 | _SOURCE: ClassVar["str"] = "https://tranco-list.eu" 19 | 20 | def run(self): 21 | top_domains = yeti_config.get("tranco", "top_domains", 10000) 22 | include_subdomains = yeti_config.get("tranco", "include_subdomains", False) 23 | if include_subdomains: 24 | endpoint = "https://tranco-list.eu/download/J9X3Y/1000000" 25 | else: 26 | endpoint = "https://tranco-list.eu/download/QG9J4/1000000" 27 | logging.info( 28 | f"Importing {top_domains} Tranco top domains (include subdomains: {include_subdomains})" 29 | ) 30 | response = self._make_request(endpoint, sort=False) 31 | context = { 32 | "name": self.name, 33 | } 34 | feed = io.BytesIO(response.content) 35 | while top_domains > 0: 36 | line = feed.readline().decode("utf-8").strip() 37 | _, domain = line.split(",") 38 | hostname_obs = hostname.Hostname(value=domain).save() 39 | hostname_obs.add_context(self.name, context) 40 | hostname_obs.tag(["tranco", "top_domain"]) 41 | top_domains -= 1 42 | 43 | 44 | taskmanager.TaskManager.register_task(TrancoTopDomains) 45 | -------------------------------------------------------------------------------- /plugins/feeds/public/viriback_tracker.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from io import StringIO 3 | from typing import ClassVar 4 | 5 | import pandas as pd 6 | 7 | from core import taskmanager 8 | from core.schemas import task 9 | from core.schemas.observables import ipv4, url 10 | 11 | 12 | class ViriBackTracker(task.FeedTask): 13 | _defaults = { 14 | "frequency": timedelta(hours=24), 15 | "name": "ViriBackTracker", 16 | "description": "Malware C2 Urls and IPs", 17 | } 18 | 19 | _SOURCE: ClassVar["str"] = "http://tracker.viriback.com/dump.php" 20 | 21 | def run(self): 22 | response = self._make_request(self._SOURCE) 23 | if response: 24 | data = response.text 25 | df = pd.read_csv( 26 | StringIO(data), parse_dates=["FirstSeen"], date_format="%d-%m-%Y" 27 | ) 28 | df.ffill(inplace=True) 29 | df = self._filter_observables_by_time(df, "FirstSeen") 30 | for _, line in df.iterrows(): 31 | self.analyze(line) 32 | 33 | def analyze(self, line): 34 | url_obs = False 35 | ip_obs = False 36 | family = line["Family"] 37 | url_str = line["URL"] 38 | ip_str = line["IP"] 39 | first_seen = line["FirstSeen"] 40 | family = family.lower() 41 | context = { 42 | "first_seen": first_seen, 43 | "source": self.name, 44 | } 45 | tags = ["c2"] 46 | if family: 47 | tags.append(family) 48 | 49 | if url_str: 50 | url_obs = url.Url(value=url_str).save() 51 | url_obs.add_context(self.name, context) 52 | url_obs.tag(tags) 53 | 54 | if ip_str: 55 | ip_obs = ipv4.IPv4(value=ip_str).save() 56 | ip_obs.add_context(self.name, context) 57 | ip_obs.tag(tags) 58 | 59 | if url_obs and ip_obs: 60 | url_obs.link_to(ip_obs, "resolve_to", self.name) 61 | 62 | 63 | taskmanager.TaskManager.register_task(ViriBackTracker) 64 | -------------------------------------------------------------------------------- /plugins/feeds/public/vxvault_url.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from datetime import timedelta 3 | from typing import ClassVar 4 | 5 | from core import taskmanager 6 | from core.schemas import task 7 | from core.schemas.observables import url 8 | 9 | 10 | class VXVaultUrl(task.FeedTask): 11 | # set default values for feed 12 | _defaults = { 13 | "frequency": timedelta(hours=1), 14 | "name": "VXVaultUrl", 15 | "description": "VXVault Community URL list.", 16 | } 17 | 18 | _SOURCE: ClassVar["str"] = "http://vxvault.net/URL_List.php" 19 | 20 | # should tell yeti how to get and chunk the feed 21 | def run(self): 22 | response = self._make_request(self._SOURCE) 23 | if response: 24 | data = response.text 25 | for item in data.split("\n"): 26 | self.analyze(item.strip()) 27 | 28 | # don't need to do much here; want to add the information 29 | # and tag it with 'malware' 30 | def analyze(self, item): 31 | if not item: 32 | return 33 | tags = ["malware", "dropzone"] 34 | context = {"source": self.name} 35 | logging.debug(f"VXVaultUrl: {item}") 36 | url_obs = url.Url(value=item).save() 37 | url_obs.add_context(self.name, context) 38 | url_obs.tag(tags) 39 | 40 | 41 | taskmanager.TaskManager.register_task(VXVaultUrl) 42 | -------------------------------------------------------------------------------- /plugins/feeds/public/yaraforge.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import json 3 | import logging 4 | import os 5 | import tempfile 6 | from datetime import timedelta 7 | from io import BytesIO 8 | from zipfile import ZipFile 9 | 10 | from core import taskmanager 11 | from core.schemas import indicator, task 12 | 13 | logger = logging.getLogger(__name__) 14 | logger.setLevel(logging.INFO) 15 | 16 | 17 | class YaraForge(task.FeedTask): 18 | _defaults = { 19 | "name": "YaraForge", 20 | "frequency": timedelta(days=1), 21 | "type": "feed", 22 | "description": "Collection of community Yara rules: https://yarahq.github.io/", 23 | } 24 | 25 | _SOURCE_ZIP = "https://github.com/YARAHQ/yara-forge/releases/latest/download/yara-forge-rules-core.zip" 26 | 27 | def run(self): 28 | response = self._make_request(self._SOURCE_ZIP, no_cache=True) 29 | if not response: 30 | logging.info(f"No response: skipping {self.name} update") 31 | return 32 | 33 | with tempfile.TemporaryDirectory() as tempdir: 34 | ZipFile(BytesIO(response.content)).extractall(path=tempdir) 35 | 36 | rules_path = os.path.join( 37 | tempdir, "packages", "core", "yara-rules-core.yar" 38 | ) 39 | with open(rules_path, "r") as f: 40 | rules = f.read() 41 | 42 | indicator.Yara.import_bulk_rules(rules, tags=["yara-forge-core"]) 43 | 44 | 45 | taskmanager.TaskManager.register_task(YaraForge) 46 | -------------------------------------------------------------------------------- /plugins/feeds/public/yaraify.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from datetime import timedelta 3 | from io import BytesIO 4 | from typing import ClassVar 5 | from zipfile import ZipFile 6 | 7 | import yara 8 | 9 | from core import taskmanager 10 | from core.schemas import indicator, task 11 | 12 | 13 | class YARAify(task.FeedTask): 14 | _defaults = { 15 | "frequency": timedelta(days=1), 16 | "name": "YARAify", 17 | "description": "This feed contains yara rules", 18 | "source": "", 19 | } 20 | 21 | _SOURCE_ALL_RULES: ClassVar["str"] = ( 22 | "https://yaraify.abuse.ch/yarahub/yaraify-rules.zip" 23 | ) 24 | 25 | def run(self): 26 | response = self._make_request(self._SOURCE_ALL_RULES) 27 | if not response: 28 | return 29 | zip_file = BytesIO(response.content) 30 | with ZipFile(zip_file) as zfile: 31 | for name in zfile.namelist(): 32 | if name.endswith(".yar"): 33 | self.analyze_entry(zfile.read(name).decode("utf-8")) 34 | 35 | def analyze_entry(self, entry: str): 36 | logging.debug(f"Yaraify: {entry}") 37 | try: 38 | yara_rules = yara.compile(source=entry) 39 | except yara.SyntaxError as e: 40 | logging.error(f"Error compiling yara rule: {e}") 41 | return 42 | for r in yara_rules: 43 | ind_obj = indicator.Yara( 44 | name=f"{r.identifier}", 45 | pattern=entry, 46 | diamond=indicator.DiamondModel.capability, 47 | description=f"{r.meta.get('description', 'N/A')}", 48 | ) 49 | 50 | ind_obj.save() 51 | 52 | 53 | taskmanager.TaskManager.register_task(YARAify) 54 | -------------------------------------------------------------------------------- /plugins/inline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/plugins/inline/__init__.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "yeti" 3 | version = "0.1.0" 4 | description = "Yeti" 5 | authors = [ 6 | {name = "Thomas Chopitea", email = "tomchop@gmail.com"}, 7 | ] 8 | license = {text = "Apache 2.0"} 9 | readme = "README.md" 10 | requires-python = ">=3.10,<3.12" 11 | dependencies = [ 12 | "uvicorn>=0.34", 13 | "fastapi>=0.115", 14 | "python-arango>=8.1.2", 15 | "celery>=5.3.4", 16 | "validators>=0.34.0", 17 | "python-jose[cryptography]>=3.4", 18 | "passlib[bcrypt]>=1.7.4", 19 | "python-multipart>=0.0.6,<0.0.19", 20 | "pandas>=2.1.1", 21 | "redis>=5.0.0", 22 | "click>=8.1.7", 23 | "authlib>=1.2.1", 24 | "itsdangerous>=2.1.2", 25 | "pyyaml>=6.0.1", 26 | "parameterized>=0.9.0", 27 | "yara-python>=4.5.0", 28 | "idstools>=0.6.5", 29 | "aenum>=3.1.15", 30 | "tqdm>=4.67.1", 31 | "plyara>=2.2", 32 | "minijinja>=2.9.0", 33 | "beautifulsoup4>=4.13.4", 34 | "artifacts", 35 | "google-auth>=2.39.0", 36 | ] 37 | 38 | [dependency-groups] 39 | dev = [ 40 | "pylint>=2.16.1", 41 | "mypy>=1.0.0", 42 | "httpx>=0.23.3", 43 | "ruff>=0.9.0", 44 | "httpx>=0.28.1", 45 | ] 46 | plugins = [ 47 | "pymisp>=2.4.176", 48 | "otxv2>=1.5.12", 49 | "shodan>=1.30.0", 50 | "timesketch-api-client>=20230721", 51 | "pyopenssl>=23.3.0", 52 | "ipwhois>=1.2.0", 53 | "maclookup>=1.0.3", 54 | "censys>=2.2.10", 55 | "artifacts @ git+https://github.com/forensicartifacts/artifacts.git@main", 56 | "pygithub>=2.3.0", 57 | ] 58 | s3 = [ 59 | "boto3>=1.35.22", 60 | ] 61 | 62 | [tool.ruff] 63 | # Enable the isort rules. 64 | lint.extend-select = ["I"] 65 | lint.ignore = ["E402", "F401"] 66 | # exclude files in the /deprecated/ directories 67 | exclude = ["deprecated"] 68 | 69 | [tool.uv.sources] 70 | artifacts = { git = "https://github.com/forensicartifacts/artifacts.git", rev = "main" } 71 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/tests/__init__.py -------------------------------------------------------------------------------- /tests/apiv2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/tests/apiv2/__init__.py -------------------------------------------------------------------------------- /tests/apiv2/import_data.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | import unittest 4 | 5 | from fastapi.testclient import TestClient 6 | 7 | from core import database_arango 8 | from core.schemas.user import UserSensitive 9 | from core.web import webapp 10 | 11 | client = TestClient(webapp.app) 12 | 13 | 14 | class ImportData(unittest.TestCase): 15 | @classmethod 16 | def setUpClass(cls) -> None: 17 | logging.disable(sys.maxsize) 18 | database_arango.db.connect(database="yeti_test") 19 | database_arango.db.truncate() 20 | 21 | user = UserSensitive(username="test") 22 | user.set_password("test") 23 | user.save() 24 | 25 | apikey = user.create_api_key("default") 26 | token_data = client.post( 27 | "/api/v2/auth/api-token", headers={"x-yeti-apikey": apikey} 28 | ).json() 29 | client.headers = {"Authorization": "Bearer " + token_data["access_token"]} 30 | 31 | cls.path_json = "tests/misp_test_data/misp_event.json" 32 | 33 | def test_import_misp(self): 34 | logging.info("Test import misp") 35 | with open(self.path_json, "rb") as fichier: 36 | files = {"misp_file_json": (self.path_json, fichier)} 37 | r = client.post("/api/v2/import_data/import_misp_json", files=files) 38 | self.assertEqual(r.status_code, 200) 39 | 40 | 41 | if __name__ == "__main__": 42 | unittest.main() 43 | -------------------------------------------------------------------------------- /tests/apiv2/system.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | import unittest 4 | 5 | from fastapi.testclient import TestClient 6 | 7 | from core import database_arango 8 | from core.web import webapp 9 | 10 | client = TestClient(webapp.app) 11 | 12 | 13 | class userTest(unittest.TestCase): 14 | def setUp(self) -> None: 15 | logging.disable(sys.maxsize) 16 | database_arango.db.connect(database="yeti_test") 17 | database_arango.db.truncate() 18 | 19 | def test_get_config(self) -> None: 20 | response = client.get("/api/v2/system/config") 21 | data = response.json() 22 | self.assertEqual(response.status_code, 200, data) 23 | self.assertIn("auth", data) 24 | self.assertIn("system", data) 25 | self.assertIn("rbac_enabled", data) 26 | -------------------------------------------------------------------------------- /tests/core_tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/tests/core_tests/__init__.py -------------------------------------------------------------------------------- /tests/dfiq_test_data/DFIQ_Scenario_no_id.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | name: scenario1 3 | type: scenario 4 | description: > 5 | Long description 1 6 | id: 7 | uuid: 2ee16263-56f8-49a5-9b33-d1a2dd8b829c 8 | dfiq_version: 1.1.0 9 | tags: 10 | - Tag1 11 | - Tag2 12 | - Tag3 13 | -------------------------------------------------------------------------------- /tests/dfiq_test_data/F1005.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | name: facet1 3 | type: facet 4 | description: > 5 | Long description of facet1 6 | id: F1005 7 | uuid: b2bab31f-1670-4297-8cb1-685747a13468 8 | dfiq_version: 1.1.0 9 | tags: 10 | - Web Browser 11 | parent_ids: 12 | - S1003 13 | -------------------------------------------------------------------------------- /tests/dfiq_test_data/Q1020.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | name: What is a question? 3 | 4 | type: question 5 | description: 6 | id: Q1020 7 | uuid: bd46ce6e-c933-46e5-960c-36945aaef401 8 | dfiq_version: 1.1.0 9 | tags: 10 | - Web Browser 11 | parent_ids: 12 | - F1005 13 | approaches: 14 | - name: Approach 1 15 | description: blah 16 | notes: 17 | references: [] 18 | tags: [] 19 | steps: 20 | - name: step1 21 | description: step1 description 22 | stage: collection 23 | type: ForensicArtifact 24 | value: NTFSUSNJournal 25 | - name: Run a query 26 | description: null 27 | stage: analysis 28 | type: opensearch-query 29 | value: data_type:"fs:ntfs:usn_change" RANDOM_QUERY 30 | - name: Run another query 31 | description: null 32 | stage: analysis 33 | type: opensearch-query-second 34 | value: data_type:"fs:ntfs:usn_change" ANOTHER_QUERY 35 | -------------------------------------------------------------------------------- /tests/dfiq_test_data/Q1020_no_indicators.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | approaches: [] 3 | name: What is a question? 4 | type: question 5 | description: desc 6 | id: Q1020 7 | uuid: bd46ce6e-c933-46e5-960c-36945aaef401 8 | dfiq_version: 1.1.0 9 | tags: 10 | - Web Browser 11 | parent_ids: 12 | - F1005 13 | -------------------------------------------------------------------------------- /tests/dfiq_test_data/Q1020_no_parents.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | name: What is a question? 3 | type: question 4 | description: 5 | id: Q1020 6 | uuid: bd46ce6e-c933-46e5-960c-36945aaef401 7 | dfiq_version: 1.1.0 8 | tags: 9 | - Web Browser 10 | parent_ids: [] 11 | -------------------------------------------------------------------------------- /tests/dfiq_test_data/Q1020_uuid_parent.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | name: What is a question? 3 | type: question 4 | description: 5 | id: Q1020 6 | uuid: bd46ce6e-c933-46e5-960c-36945aaef401 7 | dfiq_version: 1.1.0 8 | tags: 9 | - Web Browser 10 | parent_ids: 11 | - b2bab31f-1670-4297-8cb1-685747a13468 12 | -------------------------------------------------------------------------------- /tests/dfiq_test_data/Q1020_uuid_scenario_parent.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | name: What is a question? 3 | type: question 4 | description: 5 | id: Q1020 6 | uuid: bd46ce6e-c933-46e5-960c-36945aaef401 7 | dfiq_version: 1.1.0 8 | tags: 9 | - Web Browser 10 | parent_ids: 11 | - S1003 12 | -------------------------------------------------------------------------------- /tests/dfiq_test_data/S1003.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | name: scenario1 3 | type: scenario 4 | description: > 5 | Long description 1 6 | id: S1003 7 | uuid: 2ee16263-56f8-49a5-9b33-d1a2dd8b829c 8 | dfiq_version: 1.1.0 9 | tags: 10 | - Tag1 11 | - Tag2 12 | - Tag3 13 | -------------------------------------------------------------------------------- /tests/dfiq_test_data/dfiq_test_data.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/tests/dfiq_test_data/dfiq_test_data.zip -------------------------------------------------------------------------------- /tests/helpers.py: -------------------------------------------------------------------------------- 1 | import time 2 | import unittest 3 | from typing import Any, Optional 4 | 5 | from core.database_arango import ArangoYetiConnector 6 | from core.schemas import observable 7 | 8 | 9 | class YetiTestCase(unittest.TestCase): 10 | def check_observables(self, expected_values: list[dict[str, Any]]): 11 | """Checks observables against a list of expected values. 12 | 13 | Args: 14 | expected_values: A list of dictionaries, each containing expected values 15 | for 'value', 'type', and 'tags' attributes. 16 | """ 17 | # Allow for indexes to catch up 18 | time.sleep(1) 19 | observables = observable.Observable.filter({"value": ""}) 20 | observable_obj, _ = observables 21 | observable_obj = sorted(observable_obj, key=lambda x: x.value) 22 | expected_values = sorted(expected_values, key=lambda x: x["value"]) 23 | 24 | self.assertEqual(len(observable_obj), len(expected_values)) 25 | 26 | for obs, expected_value in zip(observable_obj, expected_values): 27 | self.assertEqual(obs.value, expected_value["value"]) 28 | self.assertEqual(obs.type, expected_value["type"]) 29 | self.assertEqual({tag.name for tag in obs.tags}, expected_value["tags"]) 30 | 31 | def check_neighbors( 32 | self, 33 | indicator: Optional[ArangoYetiConnector], 34 | expected_neighbor_values: list[str], 35 | ): 36 | """Checks an indicator's neighbors against a list of expected values. 37 | 38 | Args: 39 | indicator: The indicator.Query object to use for neighbor comparison. 40 | expected_neighbor_values: A list of expected neighbor values. 41 | """ 42 | if indicator is None: 43 | self.assertIsNone(indicator, "Indicator not found in database") 44 | return 45 | 46 | indicator_neighbors = [ 47 | o.value 48 | for o in indicator.neighbors()[0].values() 49 | if isinstance(o, observable.Observable) 50 | ] 51 | 52 | for expected_value in expected_neighbor_values: 53 | self.assertIn(expected_value, indicator_neighbors) 54 | -------------------------------------------------------------------------------- /tests/migration.py: -------------------------------------------------------------------------------- 1 | import time 2 | import unittest 3 | 4 | from core.migrations import arangodb 5 | 6 | 7 | class ArangoMigrationTest(unittest.TestCase): 8 | def setUp(self): 9 | self.migration_manager = arangodb.ArangoMigrationManager() 10 | self.migration_manager.update_db_version(0) 11 | 12 | def test_migration_init(self): 13 | self.assertEqual(self.migration_manager.db_version, 0) 14 | 15 | def test_migration_0(self): 16 | self.migration_manager.migrate_to_latest(stop_at=1) 17 | self.assertEqual(self.migration_manager.db_version, 1) 18 | 19 | def test_migration_1(self): 20 | observable_col = self.migration_manager.db.collection("observables") 21 | observable_col.truncate() 22 | observable_col.insert( 23 | { 24 | "value": "test.com", 25 | "type": "hostname", 26 | "root_type": "observable", 27 | "created": "2024-11-14T11:58:49.757379Z", 28 | } 29 | ) 30 | observable_col.insert( 31 | { 32 | "value": "test.com123", 33 | "type": "hostname", 34 | "root_type": "observable", 35 | "created": "2024-11-14T11:58:49.757379Z", 36 | } 37 | ) 38 | self.migration_manager.migrate_to_latest(stop_at=2) 39 | self.assertEqual(self.migration_manager.db_version, 2) 40 | job = observable_col.all() 41 | while job.status() != "done": 42 | time.sleep(0.1) 43 | obs = list(job.result()) 44 | self.assertEqual(len(obs), 2) 45 | self.assertEqual(obs[0]["value"], "test.com") 46 | self.assertEqual(obs[0]["is_valid"], True) 47 | self.assertEqual(obs[1]["value"], "test.com123") 48 | self.assertEqual(obs[1]["is_valid"], False) 49 | -------------------------------------------------------------------------------- /tests/observable_test_data/iocs.txt: -------------------------------------------------------------------------------- 1 | 1.1.1[.]1 2 | 8.8.8.8 3 | tomchop[.]me 4 | google.com 5 | http://google.com/ 6 | http://tomchop[.]me/ 7 | d41d8cd98f00b204e9800998ecf8427e 8 | da39a3ee5e6b4b0d3255bfef95601890afd80709 9 | e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 10 | junk 11 | 12 | tom_chop.me -------------------------------------------------------------------------------- /tests/schemas/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/tests/schemas/__init__.py -------------------------------------------------------------------------------- /tests/schemas/user.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from core import database_arango 4 | from core.schemas.user import UserSensitive 5 | 6 | 7 | class UserTest(unittest.TestCase): 8 | def setUp(self) -> None: 9 | database_arango.db.connect(database="yeti_test") 10 | database_arango.db.truncate() 11 | self.user1 = UserSensitive(username="tomchop").save() 12 | 13 | def test_set_user_password(self) -> None: 14 | self.user1.set_password("test") 15 | self.user1.save() 16 | 17 | user = UserSensitive.find(username="tomchop") 18 | assert user is not None 19 | self.assertEqual(user.username, "tomchop") 20 | self.assertTrue(user.verify_password("test")) 21 | self.assertFalse(user.verify_password("password")) 22 | 23 | def test_create_api_key(self) -> None: 24 | self.user1.create_api_key("apikey") 25 | old_api_key = self.user1.api_keys["apikey"] 26 | self.user1.create_api_key("apikey") 27 | self.user1.save() 28 | 29 | user = UserSensitive.find(username="tomchop") 30 | new_api_key = user.api_keys["apikey"] 31 | self.assertNotEqual(old_api_key.created, new_api_key.created) 32 | self.assertEqual(old_api_key.sub, new_api_key.sub) 33 | 34 | def test_delete_api_key(self) -> None: 35 | user = UserSensitive.find(username="tomchop") 36 | self.assertEqual(len(user.api_keys), 0) 37 | 38 | self.user1.create_api_key("apikey") 39 | self.user1.save() 40 | 41 | user = UserSensitive.find(username="tomchop") 42 | self.assertEqual(len(user.api_keys), 1) 43 | 44 | user.delete_api_key("apikey") 45 | user.save() 46 | user = UserSensitive.find(username="tomchop") 47 | self.assertEqual(len(user.api_keys), 0) 48 | -------------------------------------------------------------------------------- /yetictl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeti-platform/yeti/6a38091150199cd5ac73aec2bb49a38bb91c341b/yetictl/__init__.py --------------------------------------------------------------------------------