├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── doc_update.md │ └── feature_request.md └── workflows │ ├── code-coverage.yml │ ├── code-style.yml │ ├── publish-to-pypi.yml │ ├── stixshifter-module-verification.yml │ ├── unit-testing.yml │ └── unused-import.yml ├── .gitignore ├── .readthedocs.yaml ├── AUTHORS.rst ├── CHANGELOG.rst ├── CONTRIBUTING.rst ├── GOVERNANCE.rst ├── LICENSE.md ├── Makefile ├── README.rst ├── codecov.yml ├── containers ├── docker │ ├── Dockerfile │ └── Rprofile.site └── oci │ └── README.rst ├── docs ├── Makefile ├── README.rst ├── _static │ └── css │ │ ├── logo.css │ │ └── table.css ├── authors.rst ├── conf.py ├── configuration.rst ├── contributing.rst ├── debug.rst ├── deployment │ ├── dockerhub.rst │ ├── index.rst │ └── oci.rst ├── highlighttest.py ├── images │ ├── cg1.png │ ├── cg2.png │ ├── ecgp_centered_graph_illustration.png │ ├── ecgp_full_illustration.png │ ├── ecgp_single_node_illustration.png │ ├── entityrelation.png │ ├── huntflow.png │ ├── huntstep.png │ ├── interfaces.png │ ├── overview.png │ └── tutorial │ │ ├── analytics_pinip.png │ │ ├── datasource_list.png │ │ ├── find_command.png │ │ ├── first_get.png │ │ ├── jupyter_helloworld_hunt.png │ │ ├── jupyter_helloworld_strech.png │ │ ├── param_stix.png │ │ ├── pattern_web_exploit.png │ │ ├── start_kernel.png │ │ └── ttp_exploit_matching.png ├── index.rst ├── installation │ ├── analytics.rst │ ├── datasource.rst │ ├── index.rst │ └── runtime.rst ├── language │ ├── commands.rst │ ├── eav.rst │ ├── ecgp.rst │ ├── index.rst │ ├── interface.rst │ └── tac.rst ├── overview │ ├── businesslogic.rst │ ├── hunting.rst │ ├── index.rst │ ├── notrepeat.rst │ ├── nutshell.rst │ ├── packages.rst │ └── symbiosis.rst ├── requirements.txt ├── runtime.rst ├── source │ ├── kestrel.analytics.interface.rst │ ├── kestrel.datasource.interface.rst │ ├── kestrel.datasource.retstruct.rst │ ├── kestrel.session.rst │ ├── kestrel_analytics_docker.interface.rst │ ├── kestrel_analytics_python.interface.rst │ ├── kestrel_datasource_stixbundle.interface.rst │ └── kestrel_datasource_stixshifter.interface.rst ├── talks.rst ├── theory.rst └── tutorial.rst ├── logo ├── README.md ├── logo.png ├── logo.svg ├── logo_w_text.png ├── logo_w_text.svg ├── logo_w_text_white.png ├── logo_w_text_white.svg ├── logo_white.png ├── logo_white.svg └── svg2png.sh ├── mapping-examples └── security-datasets │ └── GoldenSAML │ ├── GoldenSAML_AADAuditEvents.yaml │ ├── GoldenSAML_Microsoft365DefenderEvents.yaml │ ├── GoldenSAML_OfficeActivityEvents.yaml │ ├── GoldenSAML_WindowsEvents.yaml │ └── README.md └── packages ├── kestrel_core ├── README.rst ├── pyproject.toml ├── src │ └── kestrel │ │ ├── __future__.py │ │ ├── __init__.py │ │ ├── analytics │ │ ├── __init__.py │ │ ├── config.py │ │ └── interface.py │ │ ├── cache │ │ ├── __init__.py │ │ ├── base.py │ │ ├── inmemory.py │ │ └── sql.py │ │ ├── cli.py │ │ ├── config │ │ ├── __init__.py │ │ ├── internal.py │ │ ├── kestrel.yaml │ │ ├── relations │ │ │ ├── entity.csv │ │ │ └── event.csv │ │ └── utils.py │ │ ├── display.py │ │ ├── exceptions.py │ │ ├── frontend │ │ ├── __init__.py │ │ ├── compile.py │ │ ├── completor.py │ │ ├── kestrel.lark │ │ └── parser.py │ │ ├── interface │ │ ├── __init__.py │ │ ├── base.py │ │ ├── codegen │ │ │ ├── __init__.py │ │ │ ├── dataframe.py │ │ │ ├── sql.py │ │ │ └── utils.py │ │ ├── manager.py │ │ └── translation │ │ │ ├── __init__.py │ │ │ ├── query │ │ │ └── __init__.py │ │ │ └── result │ │ │ └── __init__.py │ │ ├── ir │ │ ├── __init__.py │ │ ├── filter.py │ │ ├── graph.py │ │ └── instructions.py │ │ ├── mapping │ │ ├── __init__.py │ │ ├── data_model.py │ │ ├── fields │ │ │ ├── ecs.yaml │ │ │ └── stix.yaml │ │ ├── path.py │ │ ├── transformers.py │ │ ├── types │ │ │ └── ocsf.yaml │ │ └── utils.py │ │ ├── session.py │ │ └── utils.py └── tests │ ├── __init__.py │ ├── conftest.py │ ├── logs_ocsf_process_creation.csv │ ├── test_analytic.py │ ├── test_cache_inmemory.py │ ├── test_cache_sqlite.py │ ├── test_config.py │ ├── test_interface_datasource_codegen_dataframe.py │ ├── test_interface_datasource_codegen_sql.py │ ├── test_ir_filter.py │ ├── test_ir_graph.py │ ├── test_ir_instructions.py │ ├── test_mapping_data_model.py │ ├── test_mapping_transformers.py │ ├── test_parser.py │ └── test_session.py ├── kestrel_interface_opensearch ├── README.rst ├── pyproject.toml ├── src │ └── kestrel_interface_opensearch │ │ ├── __init__.py │ │ ├── config.py │ │ ├── example.yaml │ │ ├── interface.py │ │ └── ossql.py └── tests │ ├── __init__.py │ ├── test_config.py │ └── test_ossql.py ├── kestrel_interface_sqlalchemy ├── README.rst ├── pyproject.toml ├── src │ └── kestrel_interface_sqlalchemy │ │ ├── __init__.py │ │ ├── config.py │ │ ├── example.yaml │ │ ├── interface.py │ │ └── translator.py └── tests │ ├── logs_ecs_process_creation.csv │ ├── result_interface_find_entity_to_entity.txt │ ├── result_interface_find_entity_to_event.txt │ ├── result_interface_find_event_to_entity.txt │ ├── test_config.py │ ├── test_interface.py │ └── test_translate.py ├── kestrel_jupyter ├── README.rst ├── pyproject.toml ├── src │ ├── kestrel_ipython │ │ ├── __init__.py │ │ └── magic.py │ └── kestrel_jupyter_kernel │ │ ├── __init__.py │ │ ├── __main__.py │ │ ├── codemirror │ │ ├── __init__.py │ │ ├── kestrel_template.js │ │ └── setup.py │ │ ├── config.py │ │ ├── display.py │ │ ├── kernel.py │ │ └── setup.py └── tests │ ├── __init__.py │ ├── test_kernel_install.py │ └── test_notebook_syntax_gen.py └── kestrel_tool ├── README.rst ├── pyproject.toml └── src └── kestrel_tool ├── main.py └── mkdb.py /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | One or two sentences summary of the bug. 12 | 13 | **Details of the bug** 14 | - What is the hunt flow/script you are executing? 15 | - What is the command that failed? 16 | - What is the error message? 17 | - If it is a STIX-Shifter related issue, what is the results of STIX-Shifter translation? 18 | - If it is a STIX-Shifter related issue, what is the results of STIX-Shifter transmission? 19 | 20 | **To Reproduce** 21 | Steps to reproduce the behavior: 22 | 1. setup a data source: ... 23 | 2. test STIX-Shifter: ... 24 | 3. run hunt flow: ... 25 | 26 | **Expected behavior** 27 | A clear and concise description of what you expected to happen. 28 | 29 | **Screenshots** 30 | If applicable, add screenshots to help explain your problem. 31 | 32 | **Environment (please complete the following information):** 33 | - OS: [e.g. MacOS 11; Fedora 34; Archlinux; Ubuntu 20.04] 34 | - Python version: [e.g. Python 3.9.5] 35 | - Python install environment: [e.g., Python virtual environment; Conda; pip --user] 36 | - STIX-Shifter version: [e.g., 3.4.4] 37 | 38 | **Additional context** 39 | Add any other context about the problem here. 40 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/doc_update.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Documentation update 3 | about: Propose a documentation fix or update 4 | title: '' 5 | labels: documentation 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your documentation update request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. there is a missing part in the doc [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to change/fix in the documentation. 15 | 16 | **Additional context** 17 | Add any other context or screenshots about the feature request here. 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/workflows/code-coverage.yml: -------------------------------------------------------------------------------- 1 | name: Code Coverage Evaluation on PR 2 | 3 | on: 4 | push: 5 | branches: 6 | - develop 7 | paths: 8 | - 'packages/*/src/**' 9 | pull_request: 10 | branches: 11 | - develop 12 | paths: 13 | - 'packages/*/src/**' 14 | types: 15 | - opened 16 | - reopened 17 | - synchronize 18 | 19 | jobs: 20 | codecov: 21 | runs-on: ubuntu-latest 22 | steps: 23 | - uses: actions/checkout@v4 24 | - name: Set up Python 25 | uses: actions/setup-python@v5 26 | with: 27 | python-version: '3.10' 28 | - name: Install Python Tools 29 | run: pip install --upgrade pip setuptools wheel 30 | - name: Install Pytest 31 | run: pip install pytest pytest-cov 32 | - name: Install kestrel [all packages] 33 | run: make install 34 | - name: Coverage for kestrel_core 35 | working-directory: ./packages/kestrel_core 36 | run: pytest -vv --cov-report=xml --cov=kestrel 37 | - name: Coverage for kestrel_interface_opensearch 38 | working-directory: ./packages/kestrel_interface_opensearch 39 | run: pytest -vv --cov-report=xml --cov=kestrel_interface_opensearch 40 | - name: Coverage for kestrel_interface_sqlalchemy 41 | working-directory: ./packages/kestrel_interface_sqlalchemy 42 | run: pytest -vv --cov-report=xml --cov=kestrel_interface_sqlalchemy 43 | - name: Coverage for kestrel_jupyter 44 | working-directory: ./packages/kestrel_jupyter 45 | run: pytest -vv --cov-report=xml $(ls src | grep -v '.egg-info' | xargs | sed -r 's/^| / --cov=/g') 46 | - name: Upload coverage to Codecov 47 | uses: codecov/codecov-action@v3 48 | with: 49 | fail_ci_if_error: false 50 | files: ./packages/kestrel_core/coverage.xml,./packages/kestrel_interface_opensearch/coverage.xml,./packages/kestrel_interface_sqlalchemy/coverage.xml,./packages/kestrel_jupyter/coverage.xml 51 | verbose: true 52 | -------------------------------------------------------------------------------- /.github/workflows/code-style.yml: -------------------------------------------------------------------------------- 1 | name: Code style compliance check 2 | 3 | on: 4 | push: 5 | branches: 6 | - develop 7 | paths: 8 | - 'packages/*/src/**' 9 | pull_request: 10 | branches: 11 | - develop 12 | paths: 13 | - 'packages/*/src/**' 14 | types: 15 | - opened 16 | - reopened 17 | - synchronize 18 | 19 | jobs: 20 | codestyle: 21 | runs-on: ubuntu-latest 22 | steps: 23 | - uses: actions/checkout@v4 24 | - name: Set up Python 25 | uses: actions/setup-python@v5 26 | with: 27 | python-version: '3.10' 28 | - name: Install Kestrel package 29 | run: | 30 | pip install --upgrade pip setuptools wheel 31 | pip install black isort 32 | - name: Import order check 33 | run: isort --profile black --check-only packages/*/src/ 34 | - name: Code style check (please black your code) 35 | run: black --check packages/*/src/ 36 | -------------------------------------------------------------------------------- /.github/workflows/publish-to-pypi.yml: -------------------------------------------------------------------------------- 1 | name: Publish to PyPI 2 | 3 | on: 4 | # Disable automatic publishing until Kestrel v2 finishes beta and replaces Kestrel v1 5 | #release: 6 | # types: [published] 7 | workflow_dispatch: 8 | 9 | jobs: 10 | publish: 11 | strategy: 12 | matrix: 13 | package: 14 | - kestrel_core 15 | - kestrel_interface_opensearch 16 | - kestrel_interface_sqlalchemy 17 | - kestrel_jupyter 18 | - kestrel_tool 19 | runs-on: ubuntu-latest 20 | defaults: 21 | run: 22 | shell: bash 23 | working-directory: ./packages/${{ matrix.package }} 24 | steps: 25 | - uses: actions/checkout@v4 26 | - uses: actions/setup-python@v5 27 | with: 28 | python-version: '3.12' 29 | - name: Install building environment 30 | run: | 31 | pip install --upgrade pip setuptools wheel 32 | pip install --upgrade build twine 33 | - name: Build and publish 34 | env: 35 | TWINE_USERNAME: __token__ 36 | TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} 37 | run: | 38 | # `build` is installed as a Python module, not a standalone commandline 39 | python -m build --sdist --wheel --outdir dist/ . 40 | twine check dist/* 41 | twine upload --verbose --skip-existing dist/* 42 | -------------------------------------------------------------------------------- /.github/workflows/stixshifter-module-verification.yml: -------------------------------------------------------------------------------- 1 | name: STIX-shifter Connector Package Verification (Kestrel 1) 2 | 3 | on: 4 | #schedule: 5 | ## Run this once per day, towards the end of the day for keeping the most 6 | ## recent data point most meaningful (hours are interpreted in UTC). 7 | #- cron: "55 02 * * *" 8 | workflow_dispatch: # Allow for running this manually. 9 | 10 | jobs: 11 | verify-stixshifter: 12 | runs-on: ubuntu-latest 13 | defaults: 14 | run: 15 | working-directory: ./packages/kestrel_datasource_stixshifter 16 | steps: 17 | - uses: actions/checkout@v4 18 | with: 19 | ref: develop_v1 20 | - name: Set up Python 21 | uses: actions/setup-python@v5 22 | with: 23 | python-version: '3.10' 24 | - name: Install Python Tools 25 | run: pip install --upgrade pip setuptools wheel 26 | - name: Install kestrel_core 27 | working-directory: ./packages/kestrel_core 28 | run: pip install . 29 | - name: Install kestrel_datasource_stixshifter 30 | run: pip install .[test] 31 | - name: Sample STIX-shifter Connector Package Verification on PyPI 32 | run: pytest -vv tests/test_stixshifter.py -k test_verify_package_origin 33 | -------------------------------------------------------------------------------- /.github/workflows/unit-testing.yml: -------------------------------------------------------------------------------- 1 | name: Unit testing on PR 2 | 3 | on: 4 | push: 5 | branches: 6 | - develop 7 | paths: 8 | - 'packages/**' 9 | pull_request: 10 | branches: 11 | - develop 12 | paths: 13 | - 'packages/**' 14 | types: 15 | - opened 16 | - reopened 17 | - synchronize 18 | 19 | jobs: 20 | test-kestrel-core: 21 | strategy: 22 | matrix: 23 | os: [ubuntu-latest, macos-latest] 24 | python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] 25 | runs-on: ${{ matrix.os }} 26 | defaults: 27 | run: 28 | shell: bash 29 | working-directory: ./packages/kestrel_core 30 | steps: 31 | - uses: actions/checkout@v4 32 | - name: Set up Python 33 | uses: actions/setup-python@v5 34 | with: 35 | python-version: ${{ matrix.python-version }} 36 | - name: Install Python Tools 37 | run: pip install --upgrade pip setuptools wheel pytest 38 | - name: Install kestrel_core 39 | run: pip install . 40 | - name: Unit testing 41 | run: pytest -vv 42 | 43 | test-kestrel-interface-opensearch: 44 | strategy: 45 | matrix: 46 | os: [ubuntu-latest, macos-latest] 47 | python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] 48 | runs-on: ${{ matrix.os }} 49 | defaults: 50 | run: 51 | shell: bash 52 | working-directory: ./packages/kestrel_interface_opensearch 53 | steps: 54 | - uses: actions/checkout@v4 55 | - name: Set up Python 56 | uses: actions/setup-python@v5 57 | with: 58 | python-version: ${{ matrix.python-version }} 59 | - name: Install Python Tools 60 | run: pip install --upgrade pip setuptools wheel pytest 61 | - name: Install kestrel_core 62 | working-directory: ./packages/kestrel_core 63 | run: pip install . 64 | - name: Install kestrel_interface_opensearch 65 | run: pip install . 66 | - name: Unit testing 67 | run: pytest -vv 68 | 69 | test-kestrel-interface-sqlalchemy: 70 | strategy: 71 | matrix: 72 | os: [ubuntu-latest, macos-latest] 73 | python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] 74 | runs-on: ${{ matrix.os }} 75 | defaults: 76 | run: 77 | shell: bash 78 | working-directory: ./packages/kestrel_interface_sqlalchemy 79 | steps: 80 | - uses: actions/checkout@v4 81 | - name: Set up Python 82 | uses: actions/setup-python@v5 83 | with: 84 | python-version: ${{ matrix.python-version }} 85 | - name: Install Python Tools 86 | run: pip install --upgrade pip setuptools wheel pytest 87 | - name: Install kestrel_core 88 | working-directory: ./packages/kestrel_core 89 | run: pip install . 90 | - name: Install kestrel_interface_sqlalchemy 91 | run: pip install . 92 | - name: Unit testing 93 | run: pytest -vv 94 | -------------------------------------------------------------------------------- /.github/workflows/unused-import.yml: -------------------------------------------------------------------------------- 1 | name: Unused imports check 2 | 3 | on: 4 | push: 5 | branches: 6 | - develop 7 | paths: 8 | - 'packages/*/src/**' 9 | pull_request: 10 | branches: 11 | - develop 12 | paths: 13 | - 'packages/*/src/**' 14 | types: 15 | - opened 16 | - reopened 17 | - synchronize 18 | 19 | jobs: 20 | unusedimports: 21 | runs-on: ubuntu-latest 22 | steps: 23 | - uses: actions/checkout@v4 24 | - name: Set up Python 25 | uses: actions/setup-python@v5 26 | with: 27 | python-version: '3.10' 28 | - name: Install Kestrel package 29 | run: | 30 | pip install --upgrade pip setuptools wheel 31 | pip install unimport 32 | - name: Check 33 | run: unimport --check --exclude __init__.py packages/*/src/ 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the version of Python and other tools you might need 9 | build: 10 | os: ubuntu-22.04 11 | tools: 12 | python: "3.11" 13 | jobs: 14 | pre_build: 15 | # there are reference to (Kestrel 1) source code in the current doc 16 | # need to install Kestrel 1 to resolve the reference 17 | # readthedocs shallow clone the repo, need to get the develop_v1 branch 18 | - git remote set-branches origin develop_v1 19 | - git fetch --depth 1 origin develop_v1 20 | - git checkout develop_v1 21 | - make install 22 | - git checkout develop 23 | 24 | # Build documentation in the docs/ directory with Sphinx 25 | sphinx: 26 | configuration: docs/conf.py 27 | 28 | # If using Sphinx, optionally build your docs in additional formats such as PDF 29 | formats: 30 | - pdf 31 | 32 | # Optionally declare the Python requirements required to build your docs 33 | python: 34 | install: 35 | - requirements: docs/requirements.txt 36 | -------------------------------------------------------------------------------- /AUTHORS.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | Credits 3 | ======= 4 | 5 | Maintainers 6 | ----------- 7 | 8 | - `Xiaokui Shu`_ 9 | - `Paul Coccoli`_ 10 | 11 | Contributors 12 | ------------ 13 | 14 | - `Charlie Wu`_ 15 | - `Jill Casavant`_ 16 | - `Sulakshan Vajipayajula`_ 17 | - `Chew Kin Zhong`_ 18 | - `Ian Molloy`_ 19 | - `Constantin Adam`_ 20 | - `Ting Dai`_ 21 | - `Leila Rashidi`_ 22 | - `Kenneth Peeples`_ 23 | 24 | .. _Xiaokui Shu: https://github.com/subbyte 25 | .. _Paul Coccoli: https://github.com/pcoccoli 26 | .. _Charlie Wu: https://github.com/charliewutw 27 | .. _Jill Casavant: https://github.com/jmcasava 28 | .. _Sulakshan Vajipayajula: https://github.com/svajipay 29 | .. _Chew Kin Zhong: https://github.com/kinzhong 30 | .. _Ian Molloy: https://github.com/imolloy 31 | .. _Constantin Adam: https://github.com/cmadam 32 | .. _Ting Dai: https://github.com/tingdai 33 | .. _Leila Rashidi: https://github.com/leila-rashidi 34 | .. _Kenneth Peeples: https://github.com/kpeeples 35 | -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | ========= 2 | Changelog 3 | ========= 4 | 5 | All notable changes to this project will be documented in this file. 6 | 7 | The format is based on `Keep a Changelog`_. 8 | 9 | Unreleased 10 | ========== 11 | 12 | This is the Changelog for Kestrel 2. Look for Changelog for Kestrel 1 in the ``develop_v1`` branch. 13 | 14 | 2.0.0b (2024-07-30) 15 | ================== 16 | 17 | Added 18 | ----- 19 | 20 | - Commands supported 21 | - NEW 22 | - GET 23 | - FIND 24 | - DISP 25 | - INFO 26 | - APPLY 27 | - EXPLAIN 28 | - expression 29 | 30 | - Supported Entities 31 | - `event` is a first-class citizen in Kestrel v2 32 | - Check `kestrel.mapping.types.*` for details 33 | 34 | - Supported Relations 35 | - Relation between entity and entity 36 | - Relation between event and entity 37 | - Check `kestrel.config.relations.*` for details 38 | 39 | - Kestrel Intermediate Representation Graph (IRGraph) 40 | - GIT compilation with IRGraph 41 | - Kestrel segments IRGraph to execute on multiple interfaces/datastores/exec_env 42 | - Kestrel cache glues executions together for a session 43 | 44 | - OCSF/ECS/STIX syntax supported in frontend 45 | - Type inferencing supported 46 | - Comparison field translation supported 47 | - Project field translation supported 48 | 49 | - Datasource Interfaces 50 | - Sqlalchemy fully working 51 | - Multi-store support 52 | - Query column translation supported 53 | - Value translation supported 54 | - Opensearch halfy done 55 | 56 | - Analytics Interfaces 57 | - Python analytics interface works for `DataFrame` but not `Display` objects 58 | 59 | - Kestrel Tool 60 | - `mkdb` to ingest NLJSON logs into SQL databases 61 | 62 | - Example Mappings 63 | - Four examples mappings created for BlackHat 2024 (SecurityDatasets GoldenSAML case) 64 | 65 | .. _Keep a Changelog: https://keepachangelog.com/en/1.0.0/ 66 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Contributing 3 | ============ 4 | 5 | Contributions are welcome, and they are greatly appreciated! 6 | 7 | Types of Contributions 8 | ---------------------- 9 | 10 | - Have something to say: join us at slack (find how to join in `README`_), or create a ticket at `GitHub Issues`_. 11 | 12 | - Report bugs: report bugs at `GitHub Issues`_. 13 | 14 | - Fix bugs: look through the `GitHub Issues`_ for bugs to fix. 15 | 16 | - Implement features: look through the `GitHub Issues`_ for features to implement. 17 | 18 | - Write documentation: we use the `Google Style`_ docstrings in our source code. 19 | 20 | - `supported sections`_ 21 | - `docstring examples`_ 22 | 23 | - Share your Kestrel analytics: submit a PR to the `kestrel-analytics repo`_. 24 | 25 | - Share your Kestrel huntbook: submit a PR to the `kestrel-huntbook repo`_. 26 | 27 | Code Style 28 | ---------- 29 | 30 | We follow the `symbol naming convention`_ and use `black`_ to format the code. 31 | 32 | How to Submit a Pull Request 33 | ---------------------------- 34 | 35 | Checklist before submitting a pull request: 36 | 37 | 1. The pull request should include tests. 38 | 2. If the pull request adds functionality, the docs should be updated. 39 | 3. Run a full unittest with ``pytest``. 40 | 4. Check unused imports with ``unimport --check --exclude __init__.py src/``. 41 | 5. Black your code with ``black src/``. 42 | 43 | All contributions must be covered by a `Contributor's License Agreement`_ (CLA) and ECLA (if you are contributing on behalf of your employer). You will get a prompt to sign CLA when you submit your first PR. 44 | 45 | .. _GitHub Issues: https://github.com/opencybersecurityalliance/kestrel-lang/issues 46 | .. _Symbol Naming Convention: https://google.github.io/styleguide/pyguide.html#3164-guidelines-derived-from-guidos-recommendations 47 | .. _black: https://github.com/psf/black 48 | .. _Google Style: https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings 49 | .. _supported sections: https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html#docstring-sections 50 | .. _docstring examples: https://www.sphinx-doc.org/en/master/usage/extensions/example_google.html 51 | .. _README: README.rst 52 | .. _kestrel-analytics repo: https://github.com/opencybersecurityalliance/kestrel-analytics 53 | .. _kestrel-huntbook repo: https://github.com/opencybersecurityalliance/kestrel-huntbook 54 | .. _Contributor's License Agreement: https://cla-assistant.io/opencybersecurityalliance/oasis-open-project 55 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | default: help 2 | 3 | ## Install core Kestrel package 4 | kestrel_core: 5 | cd packages/kestrel_core; pip install . 6 | 7 | ## Install STIX bundle data source package 8 | kestrel_interface_opensearch: kestrel_core 9 | cd packages/kestrel_interface_opensearch; pip install . 10 | 11 | ## Install STIX-Shifter data source package 12 | kestrel_interface_sqlalchemy: kestrel_core 13 | cd packages/kestrel_interface_sqlalchemy; pip install . 14 | 15 | ## Install Kestrel kernel for Jupyter 16 | kestrel_jupyter: kestrel_interface_opensearch kestrel_interface_sqlalchemy 17 | cd packages/kestrel_jupyter; pip install .; kestrel_jupyter_setup 18 | 19 | ## Install Kestrel kernel for Jupyter 20 | install: kestrel_jupyter 21 | 22 | ## This help screen 23 | help: 24 | @printf "Available targets:\n\n" 25 | @awk '/^[a-zA-Z\-\_0-9%:\\]+/ { \ 26 | helpMessage = match(lastLine, /^## (.*)/); \ 27 | if (helpMessage) { \ 28 | helpCommand = $$1; \ 29 | helpMessage = substr(lastLine, RSTART + 3, RLENGTH); \ 30 | gsub("\\\\", "", helpCommand); \ 31 | gsub(":+$$", "", helpCommand); \ 32 | printf " \x1b[32;01m%-35s\x1b[0m %s\n", helpCommand, helpMessage; \ 33 | } \ 34 | } \ 35 | { lastLine = $$0 }' $(MAKEFILE_LIST) | sort -u 36 | @printf "\n" 37 | 38 | 39 | PKG_DIRS = $(wildcard packages/kestrel_*) 40 | 41 | test: 42 | for d in $(PKG_DIRS); do pytest $$d || break; done 43 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | ignore: 2 | - "tests" 3 | 4 | coverage: 5 | status: 6 | patch: false 7 | -------------------------------------------------------------------------------- /containers/docker/Rprofile.site: -------------------------------------------------------------------------------- 1 | # Add R mimetype to specify how the plot returns from R to the browser. 2 | # https://notebook.community/andrie/jupyter-notebook-samples/Changing%20R%20plot%20options%20in%20Jupyter 3 | 4 | options(jupyter.plot_mimetypes = c('text/plain', 'image/png', 'image/jpeg', 'image/svg+xml', 'application/pdf')) 5 | -------------------------------------------------------------------------------- /containers/oci/README.rst: -------------------------------------------------------------------------------- 1 | This is a placeholder for any future oci formats 2 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # You can set these variables from the command line, and also 2 | # from the environment for the first two. 3 | SPHINXOPTS ?= 4 | SPHINXBUILD ?= sphinx-build 5 | SOURCEDIR = . 6 | BUILDDIR = _build 7 | 8 | clean: 9 | rm -r "$(BUILDDIR)" 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/README.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | ===================== 4 | Kestrel Documentation 5 | ===================== 6 | 7 | Kestrel documentation is automatically compiled and published to https://kestrel.readthedocs.io 8 | 9 | To compile a local or offline copy: 10 | 11 | .. code-block:: console 12 | 13 | $ pip install -r requirements.txt 14 | $ make html 15 | 16 | ``autosectionlabel`` is enabled and refernces can be used: 17 | 18 | - reference to a file: ``:doc:`filePathRelativeToCurrentFile``` 19 | 20 | - reference to a section: ``:ref:`topdir/dir/file:sectionTitle``` 21 | 22 | - reference to a section with text: ``:ref:`text``` 23 | -------------------------------------------------------------------------------- /docs/_static/css/logo.css: -------------------------------------------------------------------------------- 1 | /* 2 | `width:auto` was rendering 0px wide for .svg files 3 | https://stackoverflow.com/questions/59215996/how-to-add-a-logo-to-my-readthedocs-logo-rendering-at-0px-wide 4 | */ 5 | .wy-side-nav-search .wy-dropdown > a img.logo, .wy-side-nav-search > a img.logo { 6 | width: 241px; 7 | margin-top: 15px; 8 | } 9 | -------------------------------------------------------------------------------- /docs/_static/css/table.css: -------------------------------------------------------------------------------- 1 | .rst-content .line-block { 2 | margin-bottom: 0; 3 | } 4 | -------------------------------------------------------------------------------- /docs/authors.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../AUTHORS.rst 2 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | import tomllib 2 | 3 | def get_version(): 4 | """Use the kestrel_jupyter (umbralla package) version as doc version""" 5 | with open("../packages/kestrel_jupyter/pyproject.toml", "rb") as f: 6 | pyproject_config = tomllib.load(f) 7 | return pyproject_config["project"]["version"] 8 | 9 | project = "Kestrel Threat Hunting Language" 10 | version = get_version() 11 | release = version 12 | author = "Xiaokui Shu, Paul Coccoli" 13 | copyright = "2022 Open Cybersecurity Alliance" 14 | 15 | extensions = [ 16 | "sphinx.ext.intersphinx", 17 | "sphinx.ext.autodoc", 18 | "sphinx.ext.napoleon", 19 | "sphinx.ext.viewcode", 20 | "sphinx.ext.autosectionlabel", 21 | "sphinx_design", 22 | ] 23 | 24 | autodoc_default_options = { 25 | "members": True, 26 | "member-order": "bysource", 27 | # "undoc-members": True, 28 | "show-inheritance": True 29 | } 30 | 31 | suppress_warnings = ['autosectionlabel.*'] 32 | 33 | autosectionlabel_prefix_document = True 34 | 35 | html_title = project 36 | html_theme = "sphinx_rtd_theme" 37 | highlight_language = "none" 38 | html_logo = "../logo/logo_w_text_white.png" 39 | html_theme_options = { 40 | 'logo_only': True, 41 | 'display_version': False, 42 | } 43 | html_static_path = ['_static'] 44 | html_css_files = [ 45 | 'css/logo.css', 46 | 'css/table.css', 47 | ] 48 | -------------------------------------------------------------------------------- /docs/configuration.rst: -------------------------------------------------------------------------------- 1 | ============= 2 | Configuration 3 | ============= 4 | 5 | Kestrel loads user-defined configurations to override default values when the 6 | runtimes start. Thus you can customize your Kestrel runtime by putting 7 | configuration values in ``~/.config/kestrel/kestrel.yaml`` or any YAML file 8 | with path specified in the environment variable ``KESTREL_CONFIG``. 9 | 10 | Note: the Kestrel main config should not be confused with configurations for 11 | data sources. In Kestrel, data sources are defined/grouped by each 12 | :doc:`../source/kestrel.datasource.interface`. Each data source interface is a 13 | Python package and has its own configuration file. For example, 14 | :doc:`../source/kestrel_datasource_stixshifter.interface` describes the use and 15 | configuration of STIX-shifter data sources. 16 | 17 | Default Kestrel Configuration 18 | ============================= 19 | 20 | * `Default Kestrel 1 Config`_ 21 | 22 | * `Default Kestrel 2 Config`_ 23 | 24 | Example of User-Defined Configurations 25 | ====================================== 26 | 27 | You can disable prefetch by creating ``~/.config/kestrel/kestrel.yaml`` with 28 | the following: 29 | 30 | .. code-block:: yaml 31 | 32 | prefetch: 33 | switch_per_command: 34 | get: false 35 | find: false 36 | 37 | Kestrel will then not proactively search for logs/records for entities 38 | extracted from the return of ``GET``/``FIND``, which will largely disable 39 | followup ``FIND`` commands/steps. 40 | 41 | Kestrel config supports expansion of environment variables, e.g., if a value in 42 | the YAML file is ``$ENVX``, then the value is fetched from environment variable 43 | ``$ENVX`` Kestrel loads the config file. 44 | 45 | .. _Default Kestrel 1 Config: https://github.com/opencybersecurityalliance/kestrel-lang/blob/develop_v1/packages/kestrel_core/src/kestrel/config.yaml 46 | .. _Default Kestrel 2 Config: https://github.com/opencybersecurityalliance/kestrel-lang/blob/develop/packages/kestrel_core/src/kestrel/config/kestrel.yaml 47 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../CONTRIBUTING.rst 2 | -------------------------------------------------------------------------------- /docs/debug.rst: -------------------------------------------------------------------------------- 1 | ===== 2 | Debug 3 | ===== 4 | 5 | If you encountered a Kestrel error, you may want to ask for help in our `OCA 6 | slack`_ channel of Kestrel. A Kestrel veteran may guide you to further dig out 7 | the issue in Kestrel debug mode. 8 | 9 | Kestrel Errors 10 | ============== 11 | 12 | Generally there are two categories of Kestrel errors: 13 | 14 | - Kestrel exceptions: the errors that have been thought by Kestrel developers 15 | and encapsulated in a Kestrel Exception class. These errors can be quickly 16 | explained by a Kestrel developer and their root causes are limited. 17 | 18 | - Generic Python exceptions: the errors that haven't been captured by Kestrel 19 | runtime, which may be due to the incomplete try/catch coverage in Kestrel 20 | code or an error from a third party code, e.g., a dependent library or a 21 | Kestrel analytics (e.g., :doc:`source/kestrel_analytics_python.interface`). 22 | These errors usually need further debug, especially help from you to work 23 | with a Kestrel or third party code developer to debug. 24 | 25 | Enable Debug Mode 26 | ================= 27 | 28 | You can run Kestrel in debug mode by either use the ``--debug`` flag of the 29 | Kestrel command-line utility, or create environment variable ``KESTREL_DEBUG`` 30 | with any value before launching Kestrel, which is useful when you use Kestrel 31 | in Jupyter Notebook. In the debug mode, all runtime data including caches and 32 | logs at debug level are at ``/tmp/kestrel-$USER/`` (``$TMPDIR/kestrel-$USER/`` 33 | on macOS). The runtime logs of the latest created session is at 34 | ``/tmp/kestrel-$USER/session.log`` (``$TMPDIR/kestrel-$USER/session.log`` on 35 | macOS). 36 | 37 | Add Your Own Log Entry 38 | ====================== 39 | 40 | If a Kestrel veteran assisted you in further debuging an issue, it is likely 41 | he/she will let you add a debug log entry to a specific Kestrel module/function 42 | to print out some value: 43 | 44 | #. Clone the `kestrel-lang`_ repo: 45 | 46 | .. code-block:: console 47 | 48 | $ git clone https://github.com/opencybersecurityalliance/kestrel-lang.git 49 | 50 | #. Ensure the following is in the module you'd like to debug (add if not): 51 | 52 | .. code-block:: python 53 | 54 | import logging 55 | _logger = logging.getLogger(__name__) 56 | 57 | #. Add debug log entry where you want: 58 | 59 | .. code-block:: python 60 | 61 | _logger.debug(something_you_want_to_log) 62 | 63 | #. Install your local Kestrel build: 64 | 65 | .. code-block:: console 66 | 67 | $ pip install -e . 68 | 69 | #. Rerun Kestrel (command-line utility or restart Kestrel kernel in Jupyter) 70 | and check the entry you logged at ``/tmp/kestrel-$USER/session.log``. 71 | 72 | .. _kestrel-lang: http://github.com/opencybersecurityalliance/kestrel-lang 73 | .. _OCA slack: https://open-cybersecurity.slack.com/ 74 | -------------------------------------------------------------------------------- /docs/deployment/dockerhub.rst: -------------------------------------------------------------------------------- 1 | Besides Python package (PyPI), Kestrel is also released into Docker container 2 | image on DockerHub. 3 | 4 | The image provides a full Kestrel runtime composed of the basic Kestrel 5 | runtime, `kestrel-jupyter`_ package, open-source Kestrel analytics in the 6 | `kestrel-analytics repo`_, and open-source Kestrel huntbooks and tutorials in 7 | the `kestrel-huntbook repo`_. 8 | 9 | The image is based on the `docker-stacks`_ Jupyter image, maintained by 10 | `Kenneth Peeples`_, and currently located under `Kenneth's DockerHub account`_. 11 | 12 | To launch the Kestrel container (opening Jupyter on host port 8888): 13 | 14 | .. code-block:: console 15 | 16 | $ docker run -d -p 8888:8888 kpeeples/kaas-baseline:latest 17 | 18 | To have Kestrel syntax highlighting support, use the Jupyter Notebook URL 19 | (``http://hostname:8888/nbclassic``) instead of Jupyter Lab 20 | (``http://hostname:8888/lab``) for Kestrel huntbooks. 21 | 22 | To find the token for the Jupyter server, you can either: 23 | 24 | - Show it in the container log: 25 | 26 | .. code-block:: console 27 | 28 | $ docker logs 29 | 30 | - Go inside the container and print the token from Jupyter server: 31 | 32 | .. code-block:: console 33 | 34 | # on the host 35 | $ docker exec -it /bin/bash 36 | 37 | # inside the container 38 | $ jupyter server list 39 | 40 | .. _kestrel-jupyter: https://github.com/opencybersecurityalliance/kestrel-jupyter 41 | .. _kestrel-analytics repo: https://github.com/opencybersecurityalliance/kestrel-analytics 42 | .. _kestrel-huntbook repo: https://github.com/opencybersecurityalliance/kestrel-huntbook 43 | .. _docker-stacks: https://github.com/jupyter/docker-stacks 44 | .. _Kenneth Peeples: https://github.com/kpeeples 45 | .. _Kenneth's DockerHub account: https://hub.docker.com/repository/docker/kpeeples/kaas-baseline 46 | -------------------------------------------------------------------------------- /docs/deployment/index.rst: -------------------------------------------------------------------------------- 1 | ==================== 2 | Container Deployment 3 | ==================== 4 | 5 | Docker (at Dockerhub) 6 | ===================== 7 | 8 | .. include:: dockerhub.rst 9 | 10 | OCI 11 | === 12 | 13 | .. include:: oci.rst 14 | -------------------------------------------------------------------------------- /docs/deployment/oci.rst: -------------------------------------------------------------------------------- 1 | Placeholder for future `Open Container Initiative`_ (OCI) 2 | 3 | .. _Open Container Initiative: https://opencontainers.org/ 4 | -------------------------------------------------------------------------------- /docs/highlighttest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | names = ["apl", "arduino", "basemake", "bash", "sh", "ksh", "zsh", "csh", "fish", "shell", "coffeescript", "cu", "ecl", "elixir", "fancy", "felix", "flo", "fortran", "freefem", "gsql", "icon", "idl", "pan"] 4 | 5 | HF = r""" 6 | 7 | # a single (process) node graph pattern 8 | proc1 = GET process FROM stixshifter://edp1 9 | WHERE name = "powershell.exe" AND pid = 1234 10 | AND binary_ref.name = "powershell.exe" 11 | LAST 5 MIN 12 | 13 | # a single (network-traffic) node graph pattern 14 | # this pattern is equivalent to `dst_port IN (80, 443)` 15 | netflow1 = GET network-traffic FROM stixshifter://gateway1 16 | WHERE dst_port = 80 OR dst_port = 443 17 | AND dst_ref.value = "192.168.1.1" 18 | START 2022-01-01T00:00:00Z STOP 2022-01-02T00:00:00Z 19 | 20 | # a single (file) node graph pattern 21 | minikatz = GET file FROM stixshifter://edp1 22 | WHERE name = "C:\ProgramData\p.exe" 23 | OR hashes.MD5 IN ( "1a4fe4413a92d478625d97b7df1bd0cf" 24 | , "b6ff8f31007a3629a3c4be8999001ec9" 25 | , "e8994399f1656e58f72443b8861ce5d1" 26 | , "9ae602fddb5d2f9b63c5eb6aad0a2612" 27 | ) 28 | START "2022-01-01T00:00:00Z" STOP t"2022-01-02T00:00:00Z" 29 | 30 | # a single (user-account) node graph pattern 31 | users = GET user-account FROM stixshifter://authlogs 32 | WHERE (user_id = 1001 AND account_login = "Tracy") 33 | OR user_id = 0 34 | OR (user_id = 1003 AND is_privileged = true) 35 | OR (account_login = "JJ" AND is_privileged = true) 36 | 37 | APPLY python://sef ON users 38 | 39 | u = users WHERE name = "asdf" 40 | 41 | 42 | """ 43 | 44 | with open("highlighttest.rst", "w") as ht: 45 | ht.write("""============== 46 | Highlight Test 47 | ============== 48 | 49 | """) 50 | for name in names: 51 | header = f"{name}\n\n.. code-block:: {name}" 52 | ht.write(header) 53 | ht.write(HF) 54 | -------------------------------------------------------------------------------- /docs/images/cg1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/docs/images/cg1.png -------------------------------------------------------------------------------- /docs/images/cg2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/docs/images/cg2.png -------------------------------------------------------------------------------- /docs/images/ecgp_centered_graph_illustration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/docs/images/ecgp_centered_graph_illustration.png -------------------------------------------------------------------------------- /docs/images/ecgp_full_illustration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/docs/images/ecgp_full_illustration.png -------------------------------------------------------------------------------- /docs/images/ecgp_single_node_illustration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/docs/images/ecgp_single_node_illustration.png -------------------------------------------------------------------------------- /docs/images/entityrelation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/docs/images/entityrelation.png -------------------------------------------------------------------------------- /docs/images/huntflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/docs/images/huntflow.png -------------------------------------------------------------------------------- /docs/images/huntstep.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/docs/images/huntstep.png -------------------------------------------------------------------------------- /docs/images/interfaces.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/docs/images/interfaces.png -------------------------------------------------------------------------------- /docs/images/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/docs/images/overview.png -------------------------------------------------------------------------------- /docs/images/tutorial/analytics_pinip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/docs/images/tutorial/analytics_pinip.png -------------------------------------------------------------------------------- /docs/images/tutorial/datasource_list.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/docs/images/tutorial/datasource_list.png -------------------------------------------------------------------------------- /docs/images/tutorial/find_command.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/docs/images/tutorial/find_command.png -------------------------------------------------------------------------------- /docs/images/tutorial/first_get.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/docs/images/tutorial/first_get.png -------------------------------------------------------------------------------- /docs/images/tutorial/jupyter_helloworld_hunt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/docs/images/tutorial/jupyter_helloworld_hunt.png -------------------------------------------------------------------------------- /docs/images/tutorial/jupyter_helloworld_strech.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/docs/images/tutorial/jupyter_helloworld_strech.png -------------------------------------------------------------------------------- /docs/images/tutorial/param_stix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/docs/images/tutorial/param_stix.png -------------------------------------------------------------------------------- /docs/images/tutorial/pattern_web_exploit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/docs/images/tutorial/pattern_web_exploit.png -------------------------------------------------------------------------------- /docs/images/tutorial/start_kernel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/docs/images/tutorial/start_kernel.png -------------------------------------------------------------------------------- /docs/images/tutorial/ttp_exploit_matching.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/docs/images/tutorial/ttp_exploit_matching.png -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | =============================== 2 | Kestrel Threat Hunting Language 3 | =============================== 4 | 5 | *Hunt faster, easier, and with more fun!* 6 | 7 | Kestrel threat hunting language provides an abstraction for threat hunters to 8 | focus on the high-value and composable threat hypothesis development instead of 9 | specific realization of hypothesis testing with heterogeneous data sources, 10 | threat intelligence, and public or proprietary analytics. 11 | 12 | `Kestrel GitHub repo`_ is the official portal of everything Kestrel beyond this 13 | documentation: news, demo, tutorial, sandbox, huntbooks, analytics, blogs, 14 | talks, community entrances, and more. 15 | 16 | .. toctree:: 17 | :maxdepth: 2 18 | 19 | overview/index 20 | installation/index 21 | tutorial 22 | language/index 23 | configuration 24 | debug 25 | runtime 26 | deployment/index 27 | theory 28 | talks 29 | contributing 30 | authors 31 | 32 | Indices and tables 33 | ================== 34 | 35 | * :ref:`genindex` 36 | * :ref:`modindex` 37 | * :ref:`search` 38 | 39 | .. _Kestrel GitHub repo: https://github.com/opencybersecurityalliance/kestrel-lang 40 | -------------------------------------------------------------------------------- /docs/installation/analytics.rst: -------------------------------------------------------------------------------- 1 | Setup Kestrel Analytics 2 | ----------------------- 3 | 4 | Kestrel analytics are one type of hunt steps (:ref:`language/commands:APPLY`) 5 | that provide foreign language interfaces to non-Kestrel hunting modules. You 6 | can apply any external logic as a Kestrel analytics to 7 | 8 | - compute new attributes to one or more Kestrel variables 9 | - perform visualizations 10 | 11 | Note Kestrel treats analytics as black boxes and only cares about the input and 12 | output formats. So it is possible to wrap even proprietary software in a 13 | Kestrel analytics to be a hunt step. 14 | 15 | Kestrel Analytics Abstraction 16 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 17 | 18 | Kestrel manages analytics in a two-level abstraction: an analytics registers at 19 | a :doc:`../source/kestrel.analytics.interface`, which defines the way how a set 20 | of analytics are executed and talk to Kestrel. In other words, Kestrel manages 21 | multiple analytics interfaces at runtime, each of which manages a set of 22 | analytics with the same execution model and input/output formats. Learn more 23 | about the abstraction in :doc:`../language/interface`. 24 | 25 | Kestrel by default ships with the two most common analytics interfaces: 26 | 27 | - :doc:`../source/kestrel_analytics_python.interface` 28 | 29 | - run a Python function as an analytics 30 | - require no additional software to run 31 | - simple and easy to write a new analytics 32 | - not limited to Python logic with process spawning support 33 | 34 | - :doc:`../source/kestrel_analytics_docker.interface` 35 | 36 | - run a Docker container as an analytics 37 | - could pack any black-box logic in an analytics 38 | 39 | Kestrel Analytics Repo 40 | ~~~~~~~~~~~~~~~~~~~~~~ 41 | 42 | Community-contributed Kestrel analytics are hosted at the `kestrel-analytics 43 | repo`_, which support execution via either the Python or Docker analytics 44 | interface. Currently there are Kestrel analytics for IP enrichment, threat 45 | intelligence enrichment, machine learning inference, plotting, complex 46 | visualization, clustering, suspicious process scoring, and log4shell 47 | deobfuscation. 48 | 49 | Clone the `kestrel-analytics repo`_ to start using existing open-sourced analytics: 50 | 51 | .. code-block:: console 52 | 53 | $ git clone https://github.com/opencybersecurityalliance/kestrel-analytics.git 54 | 55 | Setup Python Analytics 56 | ~~~~~~~~~~~~~~~~~~~~~~ 57 | 58 | The Python analytics interface calls a Kestrel analytics directly in Python, so 59 | the interface is natively supported without any additional software. However, 60 | you need to make sure the analytics function you are using is executable, e.g., 61 | all dependencies for the analytics have been installed. 62 | 63 | To setup an analytics via the Python interface, you only need to tell Kestrel 64 | where the analytics module/function is: specifying analytics profiles at 65 | ``~/.config/kestrel/pythonanalytics.yaml``. You can follow the `Kestrel 66 | analytics example profile`_ in the `kestrel-analytics repo`_. To learn more 67 | including how to write your own Python analytics, visit 68 | :doc:`../source/kestrel_analytics_python.interface`. 69 | 70 | 71 | Setup Docker Analytics 72 | ~~~~~~~~~~~~~~~~~~~~~~ 73 | 74 | To setup a Kestrel Docker analytics, you need to have `docker`_ installed, and 75 | then build the docker container for that analytics. For example, to build a 76 | docker container for the `Pin IP`_ analytics, go to its source code, download 77 | ``GeoLite2-City.mmdb`` as instructed in README, and run the command: 78 | 79 | .. code-block:: console 80 | 81 | $ docker build -t kestrel-analytics-pinip . 82 | 83 | To learn more about how to write and run a Kestrel analytics through the Docker 84 | interface, visit :doc:`../source/kestrel_analytics_docker.interface` and our blog 85 | `Building Your Own Kestrel Analytics`_. 86 | 87 | What's to Do Next 88 | ~~~~~~~~~~~~~~~~~ 89 | 90 | - :ref:`tutorial:Run an Analytics` 91 | - :ref:`language/commands:APPLY` 92 | 93 | .. _kestrel-analytics repo: https://github.com/opencybersecurityalliance/kestrel-analytics 94 | .. _Kestrel analytics example profile: https://github.com/opencybersecurityalliance/kestrel-analytics/blob/release/pythonanalytics_sample.yaml 95 | .. _docker: https://www.docker.com/ 96 | .. _Building Your Own Kestrel Analytics: https://opencybersecurityalliance.org/posts/kestrel-custom-analytics/ 97 | .. _Pin IP: https://github.com/opencybersecurityalliance/kestrel-analytics/tree/release/analytics/piniponmap 98 | -------------------------------------------------------------------------------- /docs/installation/datasource.rst: -------------------------------------------------------------------------------- 1 | Connect to Data Sources 2 | ----------------------- 3 | 4 | Data sources, e.g., an EDR, a SIEM, a firewall, provide raw or processed data 5 | for hunting. Kestrel hunt steps such as :ref:`language/commands:GET` and 6 | :ref:`language/commands:FIND` generate code or queries to retrieve data, e.g., 7 | system logs or alerts, from data sources. 8 | 9 | Kestrel Data Source Abstraction 10 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 11 | 12 | Kestrel manages data sources in a two-level abstraction: a data source 13 | registers at a :doc:`../source/kestrel.datasource.interface`, which defines the 14 | way how a set of data sources are queried and ingested into Kestrel. In other 15 | words, Kestrel manages multiple data source interfaces at runtime, each of 16 | which manages a set of data sources with the same query method and ingestion 17 | procedure. Learn more about the abstraction in :doc:`../language/interface`. 18 | 19 | Kestrel by default ships with the two most common data source interfaces: 20 | 21 | - :doc:`../source/kestrel_datasource_stixshifter.interface` 22 | 23 | - leverage `STIX-shifter`_ as a federated search layer 24 | - talk to more then a dozen of different data sources 25 | 26 | - :doc:`../source/kestrel_datasource_stixbundle.interface` 27 | 28 | - use canned STIX bundle data for demo or development 29 | 30 | Setup STIX-shifter Data Source 31 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 32 | 33 | Once you get credentials of a data source, you need to tell Kestrel how to use 34 | them to connect. In other words, you need to create a profile for each data 35 | source. The profile: 36 | 37 | - names the data source to refer to in a huntbook, 38 | - specifies which `STIX-shifter connector`_ to use, 39 | - specifies how to connect to the data source, 40 | - gives additional configuration if needed for data source access. 41 | 42 | Check :doc:`../source/kestrel_datasource_stixshifter.interface` for details and 43 | examples of adding data source profiles. 44 | 45 | .. _STIX-shifter connector: https://github.com/opencybersecurityalliance/stix-shifter/blob/develop/OVERVIEW.md#available-connectors 46 | .. _STIX-shifter: https://github.com/opencybersecurityalliance/stix-shifter 47 | -------------------------------------------------------------------------------- /docs/installation/index.rst: -------------------------------------------------------------------------------- 1 | ====================== 2 | Installation And Setup 3 | ====================== 4 | 5 | Kestrel utilizes computing resources and interacts with the world in three ways: 6 | 7 | #. Huntflow organization and execution (core Kestrel compiler/interpreter/runtime) 8 | 9 | #. Data retrieval (graph pattern matching, relation resolution, etc.) 10 | 11 | #. Entity enrichment and extensible analytics (Kestrel analytics) 12 | 13 | Accordingly, to install and setup Kestrel: 14 | 15 | #. :doc:`Install the Kestrel runtime with a front-end of your choice` 16 | Right after this step, you will be able to play with the 17 | :ref:`tutorial:Hello World Hunt`. However, this Kestrel environment does 18 | not have connections to any data sources or Kestrel analytics. 19 | 20 | #. :doc:`Configurate data sources to use` 21 | Kestrel ships with two data source interfaces 22 | (:doc:`../source/kestrel_datasource_stixshifter.interface` and 23 | :doc:`../source/kestrel_datasource_stixbundle.interface`). However, Kestrel 24 | does not know what data sources you have. You need to tell Kestrel where 25 | your data sources are and how to connect to them. This is done through data 26 | source configuration, especially :ref:`installation/datasource:Setup 27 | STIX-shifter Data Source`. 28 | 29 | #. :doc:`Setup Kestrel analytics` 30 | Kestrel ships with two analytics interfaces by default 31 | (:doc:`../source/kestrel_analytics_python.interface` and 32 | :doc:`../source/kestrel_analytics_docker.interface`). You need to :ref:`get 33 | analytics` and register them 34 | under any of the interfaces, e.g., adding configuration to the 35 | :doc:`../source/kestrel_analytics_python.interface`. 36 | 37 | Detailed subsections: 38 | 39 | .. toctree:: 40 | :maxdepth: 2 41 | 42 | runtime 43 | datasource 44 | analytics 45 | -------------------------------------------------------------------------------- /docs/language/index.rst: -------------------------------------------------------------------------------- 1 | ====================== 2 | Language Specification 3 | ====================== 4 | 5 | .. toctree:: 6 | :maxdepth: 2 7 | 8 | tac 9 | eav 10 | ecgp 11 | commands 12 | interface 13 | -------------------------------------------------------------------------------- /docs/overview/businesslogic.rst: -------------------------------------------------------------------------------- 1 | Threat hunting activities can be summarized by asking and answering two types of 2 | questions: 3 | 4 | - What to hunt? 5 | 6 | - What is the threat hypothesis? 7 | - What is the next step? 8 | - What threat intelligence should be added? 9 | - What machine learning models fit? 10 | 11 | - How to hunt? 12 | 13 | - How to query this EDR? 14 | - How to extract the field for the next query? 15 | - How to enrich this data? 16 | - How to plug in this machine learning model? 17 | 18 | Any threat hunting activity involves both types of questions and the answers 19 | to both questions contain domain-specific knowledge. However, the types of domain 20 | knowledge regarding these two types of questions are not the same. The answers 21 | to the *what* contain the domain knowledge that is highly creative, mostly 22 | abstract, and largely reusable from one hunt to another, while the answers to the 23 | *how* guides the realization of the *what* and are replaced from one hunting 24 | platform to another. 25 | 26 | To not repeat ourselves, we need to identify and split the *what* and *how* for 27 | all hunting steps and flows, and answer them separately -- the *what* will be 28 | reused in different parts of a hunt or different hunts, while the *how* will be 29 | developed to instantiate *what* regarding their different environments. 30 | 31 | With the understanding of the two types of domain knowledge invoked in threat 32 | hunting, we can start to reuse domain knowledge regarding the questions of 33 | *what* and not repeat ourselves, yet we still need to answer the tremendous 34 | amount of mundane questions of *how*, which is hunting platform-specific and 35 | not repeatable. Can we go further? 36 | -------------------------------------------------------------------------------- /docs/overview/hunting.rst: -------------------------------------------------------------------------------- 1 | Cyberthreat hunting is the planning and developing of threat discovery 2 | procedures against new and customized advanced persistent threats (APT). 3 | Cyberthreat hunting is comprised of several activities such as: 4 | 5 | #. Understanding the security measurements in the target environment. 6 | #. Thinking about potential threats escaping existing defenses. 7 | #. Obtaining useful observations from system and network activities. 8 | #. Developing threat hypotheses. 9 | #. Revising threat hypotheses iteratively with the last two steps. 10 | #. Confirming new threats. 11 | 12 | Threat hunters create customized intrusion detection system (IDS) instances 13 | every day with a combination of data source queries, complex data processing, 14 | machine learning, threat intelligence enrichment, proprietary detection logic, 15 | and more. Threat hunters take advantage of scripting languages, spreadsheets, 16 | whiteboards, and other tools to plan and execute their hunts. In traditional 17 | cyberthreat hunting, many pieces of hunts are written against specific data 18 | sources and data types, which makes the domain knowledge in them not reusable, 19 | and hunters need to express the same knowledge again and again for different 20 | hunts. 21 | 22 | *That's slow and tedious!* 23 | 24 | Can we improve it? 25 | -------------------------------------------------------------------------------- /docs/overview/index.rst: -------------------------------------------------------------------------------- 1 | ================ 2 | What is Kestrel? 3 | ================ 4 | 5 | Cyberthreat Hunting 6 | =================== 7 | 8 | .. include:: hunting.rst 9 | 10 | Do Not Repeat Yourself 11 | ====================== 12 | 13 | .. include:: notrepeat.rst 14 | 15 | Business Logic + Execution 16 | ========================== 17 | 18 | .. include:: businesslogic.rst 19 | 20 | Human-Machine Symbiosis 21 | ======================= 22 | 23 | .. include:: symbiosis.rst 24 | 25 | Kestrel in a Nutshell 26 | ===================== 27 | 28 | .. include:: nutshell.rst 29 | 30 | Runtime Packages 31 | ================ 32 | 33 | .. include:: packages.rst 34 | -------------------------------------------------------------------------------- /docs/overview/notrepeat.rst: -------------------------------------------------------------------------------- 1 | - **Don't** Repeatedly write a Tactics, Techniques and Procedures (TTP) pattern 2 | in different endpoint detection and response (EDR) query languages. 3 | 4 | - **Do** Express all patterns in a common language so that it can be compiled to 5 | different EDR queries and Security Information and Event Management (SIEM) 6 | APIs. 7 | 8 | - **Don't** Repeatedly write dependent hunting steps such as getting child 9 | processes for suspicious processes against various record/log formats in 10 | different parts of a hunt. 11 | 12 | - **Do** Express flows of hunting steps in a common means that can be reused 13 | and re-executed at different parts of a hunt or even in different hunts. 14 | 15 | - **Don't** Repeatedly write different execution-environment adapters for an 16 | implemented domain-specific detection module or a proprietary detection box. 17 | 18 | - **Do** Express analytics execution with uniform input/output schema and 19 | encapsulating existing analytics to operate in a reusable manner. 20 | 21 | Reading carefully, you will find the examples of repeats are actually not 22 | literally repeating. Each repeat is a little different from its 23 | siblings due to their different execution environments. We need to take it a 24 | little bit further to find what is repeated and how to not repeat ourselves. 25 | -------------------------------------------------------------------------------- /docs/overview/nutshell.rst: -------------------------------------------------------------------------------- 1 | Kestrel provides a layer of abstraction to stop the repetition involved in 2 | cyberthreat hunting. 3 | 4 | .. image:: ../images/overview.png 5 | :width: 100% 6 | :alt: Kestrel overview. 7 | 8 | - **Kestrel language**: a threat hunting language for a human to express *what to 9 | hunt*. 10 | 11 | - expressing the knowledge of *what* in patterns, analytics, and hunt flows. 12 | - composing reusable hunting flows from individual hunting steps. 13 | - reasoning with human-friendly entity-based data representation abstraction. 14 | - thinking across heterogeneous data and threat intelligence sources. 15 | - applying existing public and proprietary detection logic as analytic hunt steps. 16 | - reusing and sharing individual hunting steps, hunt-flow, and entire hunt books. 17 | 18 | - **Kestrel runtime**: a machine interpreter that deals with *how to hunt*. 19 | 20 | - compiling the *what* against specific hunting platform instructions. 21 | - executing the compiled code locally and remotely. 22 | - assembling raw logs and records into entities for entity-based reasoning. 23 | - caching intermediate data and related records for fast response. 24 | - prefetching related logs and records for link construction between entities. 25 | - defining extensible interfaces for data sources and analytics execution. 26 | -------------------------------------------------------------------------------- /docs/overview/packages.rst: -------------------------------------------------------------------------------- 1 | The entire Kestrel runtime consists of the following Python packages: 2 | 3 | - ``kestrel`` (repo: `kestrel-lang`_): The interpreter including parser, 4 | session management, code generation, data source and analytics interface 5 | managers, and a command-line front-end. 6 | 7 | - ``firepit`` (repo: `firepit`_): The Kestrel internal data storage ingesting, 8 | processing, storing, caching, and linking data with Kestrel variables. 9 | 10 | - ``kestrel_datasource_stixshifter`` (repo: `kestrel-lang`_): The STIX-Shifter 11 | data source interface for managing data sources via STIX-Shifter. 12 | 13 | - ``kestrel_datasource_stixbundle`` (repo: `kestrel-lang`_): The data source 14 | interface for ingesting static telemetry data that is already sealed in STIX 15 | bundles. 16 | 17 | - ``kestrel_analytics_python`` (repo: `kestrel-lang`_): The analytics interface 18 | that calls analytics in Python. 19 | 20 | - ``kestrel_analytics_docker`` (repo: `kestrel-lang`_): The analytics interface 21 | that executes analytics in docker containers. 22 | 23 | - ``kestrel_jupyter_kernel`` (repo: `kestrel-jupyter`_): The Kestrel Jupyter 24 | Notebook kernel to use Kestrel in a Jupyter notebook. 25 | 26 | - ``kestrel_ipython`` (repo: `kestrel-jupyter`_): The iPython *magic command* 27 | realization for writing native Kestrel in iPython. 28 | 29 | .. _kestrel-lang: http://github.com/opencybersecurityalliance/kestrel-lang 30 | .. _firepit: http://github.com/opencybersecurityalliance/firepit 31 | .. _kestrel-jupyter: http://github.com/opencybersecurityalliance/kestrel-jupyter 32 | -------------------------------------------------------------------------------- /docs/overview/symbiosis.rst: -------------------------------------------------------------------------------- 1 | In traditional threat hunting, hunters answer both questions of *what to hunt* 2 | and *how to hunt*. While there is no doubt that human intelligence and creativity are 3 | the irreplaceable secret sauce of asking and answering the questions of the 4 | *what*, it is a waste of time to manually answer most questions of 5 | the *how*, which is just a translation between the knowledge in *what* and 6 | execution instructions specified by different hunting platforms. 7 | 8 | We know that machines are good at solving translation problems with well-defined 9 | grammars fast. 10 | 11 | Why not create an *efficient cyberthreat hunting symbiosis* with humans and 12 | machines to ask and answer different types of hunting questions and enjoy their 13 | strengths and values? 14 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | docutils==0.17.1 2 | sphinx==5.3.0 3 | sphinx-rtd-theme==1.1.1 4 | sphinx-design==0.3.0 5 | -------------------------------------------------------------------------------- /docs/runtime.rst: -------------------------------------------------------------------------------- 1 | =========== 2 | Runtime API 3 | =========== 4 | 5 | .. toctree:: 6 | :maxdepth: 2 7 | 8 | source/kestrel.session 9 | source/kestrel.datasource.interface 10 | source/kestrel.datasource.retstruct 11 | source/kestrel_datasource_stixshifter.interface 12 | source/kestrel_datasource_stixbundle.interface 13 | source/kestrel.analytics.interface 14 | source/kestrel_analytics_docker.interface 15 | source/kestrel_analytics_python.interface 16 | -------------------------------------------------------------------------------- /docs/source/kestrel.analytics.interface.rst: -------------------------------------------------------------------------------- 1 | Kestrel Analytics Interface 2 | =========================== 3 | 4 | .. automodule:: kestrel.analytics.interface 5 | -------------------------------------------------------------------------------- /docs/source/kestrel.datasource.interface.rst: -------------------------------------------------------------------------------- 1 | Kestrel Data Source Interface 2 | ============================= 3 | 4 | .. automodule:: kestrel.datasource.interface 5 | -------------------------------------------------------------------------------- /docs/source/kestrel.datasource.retstruct.rst: -------------------------------------------------------------------------------- 1 | Kestrel Data Source ReturnStruct 2 | ================================ 3 | 4 | .. automodule:: kestrel.datasource.retstruct 5 | -------------------------------------------------------------------------------- /docs/source/kestrel.session.rst: -------------------------------------------------------------------------------- 1 | Kestrel Session 2 | =============== 3 | 4 | .. automodule:: kestrel.session 5 | -------------------------------------------------------------------------------- /docs/source/kestrel_analytics_docker.interface.rst: -------------------------------------------------------------------------------- 1 | Docker Analytics Interface 2 | ========================== 3 | 4 | .. automodule:: kestrel_analytics_docker.interface 5 | -------------------------------------------------------------------------------- /docs/source/kestrel_analytics_python.interface.rst: -------------------------------------------------------------------------------- 1 | Python Analytics Interface 2 | ========================== 3 | 4 | .. automodule:: kestrel_analytics_python.interface 5 | -------------------------------------------------------------------------------- /docs/source/kestrel_datasource_stixbundle.interface.rst: -------------------------------------------------------------------------------- 1 | STIX bundle Data Source Interface 2 | ================================== 3 | 4 | .. automodule:: kestrel_datasource_stixbundle.interface 5 | -------------------------------------------------------------------------------- /docs/source/kestrel_datasource_stixshifter.interface.rst: -------------------------------------------------------------------------------- 1 | STIX-shifter Data Source Interface 2 | ================================== 3 | 4 | .. automodule:: kestrel_datasource_stixshifter.interface 5 | -------------------------------------------------------------------------------- /docs/talks.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | Talks and Demos 3 | =============== 4 | 5 | 2022 6 | ==== 7 | 8 | Kestrel was demoed at Black Hat USA 2022 in session `Streamlining and 9 | Automating Threat Hunting With Kestrel`_. The session is a blue team event 10 | composed of (i) TTP pattern matching, (ii) control/data-flow tracking of the 11 | cross-host threat, (iii) applying analytics, and (iv) automation with OpenC2. 12 | The session playback is available at `Kestrel Black Hat 2022 recording`_, and 13 | the `Black Hat 22 Kestrel Blue Team Lab`_ is released for everyone to play. 14 | 15 | Kestrel was invited to `Cybersecurity Automation Workshop 2022`_ and showcased 16 | automated hunting with `OpenC2`_. In the demo, a system issued OpenC2 commands 17 | to investigate multiple entities using a library of templated Kestrel 18 | huntbooks, and `SBOM`_ was used in one of the exploited process investigations. 19 | 20 | Kestrel was discussed at `SC eSummit on Threat Hunting & Offense 21 | Security`_ in an interview session `The ABCs of Kestrel: How the threat-hunting 22 | language enables efficiencies & interoperability`_. The session discussed 23 | the history, mission, key idea, community, and stories of Kestrel for 24 | threat hunters, enterprise executives, and security researchers. 25 | 26 | 2021 27 | ==== 28 | 29 | Kestrel was demoed at `Infosec Jupyterthon 2021`_ in session: `Reason Cyber 30 | Campaigns With Kestrel`_. The live hunting demo explained the basics of Kestrel 31 | throughout the discovery of the hybrid cloud APT campaign developed for our 32 | Black Hat Europe 2021 session. 33 | 34 | Kestrel, together with `STIX-shifter`_, `Elastic`_, and `SysFlow`_ constitute 35 | the *open hunting stack* demoed at Black Hat Europe 2021: `An Open Stack for 36 | Threat Hunting in Hybrid Cloud With Connected Observability`_. A supply chain 37 | attack variant across a hybrid cloud (two clouds and on-premises machines) was 38 | hunted in the arsenal session. 39 | 40 | Kestrel was further introduced to the threat hunting community at `SANS Threat 41 | Hunting Summit 2021`_ in session `Compose Your Hunts With Reusable Knowledge 42 | and Share Your Huntbook With the Community`_ to facilitate huntbook 43 | composition, sharing, and reuse---from simple single hunt step demos (TTP 44 | pattern matching, provenance tracking, and data visualization analytics) to 45 | complex comprehensive hunt flow composition. 46 | 47 | Kestrel was debuted at RSA Conference 2021: `The Game of Cyber Threat Hunting: 48 | The Return of the Fun`_ with the goal of an :ref:`overview/index:Human-Machine 49 | Symbiosis`, its key design concepts :ref:`language/tac:Entity-Based Reasoning` 50 | and :ref:`language/tac:Composable Hunt Flow`, and a `small-enterprise APT 51 | hunting demo`_ with TTP pattern matching, cross-host provenance tracking, 52 | TI-enrichment, machine learning analytics, and more. 53 | 54 | 55 | 56 | .. _The Game of Cyber Threat Hunting\: The Return of the Fun: https://www.rsaconference.com/Library/presentation/USA/2021/The%20Game%20of%20Cyber%20Threat%20Hunting%20The%20Return%20of%20the%20Fun 57 | .. _small-enterprise APT hunting demo: https://www.youtube.com/watch?v=tASFWZfD7l8 58 | 59 | .. _SANS Threat Hunting Summit 2021: https://www.sans.org/blog/a-visual-summary-of-sans-threat-hunting-summit-2021/ 60 | .. _Compose Your Hunts With Reusable Knowledge and Share Your Huntbook With the Community: https://www.youtube.com/watch?v=gyY5DAWLwT0 61 | 62 | .. _STIX-shifter: https://github.com/opencybersecurityalliance/stix-shifter 63 | .. _Elastic: https://www.elastic.co/ 64 | .. _SysFlow: https://github.com/sysflow-telemetry 65 | .. _An Open Stack for Threat Hunting in Hybrid Cloud With Connected Observability: https://www.blackhat.com/eu-21/arsenal/schedule/index.html#an-open-stack-for-threat-hunting-in-hybrid-cloud-with-connected-observability-25112 66 | 67 | .. _Infosec Jupyterthon 2021: https://infosecjupyterthon.com/2021/agenda.html 68 | .. _Reason Cyber Campaigns With Kestrel: https://www.youtube.com/embed/nMnHBnYfIaI?start=20557&end=22695 69 | 70 | .. _SC eSummit on Threat Hunting & Offense Security: https://www.scmagazine.com/esummit/automating-the-hunt-for-advanced-threats 71 | .. _The ABCs of Kestrel\: How the threat-hunting language enables efficiencies & interoperability: https://www.scmagazine.com/esummit/automating-the-hunt-for-advanced-threats 72 | 73 | .. _Cybersecurity Automation Workshop 2022: http://www.cybersecurityautomationworkshop.org/ 74 | .. _OpenC2: https://openc2.org/ 75 | .. _SBOM: https://www.cisa.gov/sbom 76 | 77 | .. _Streamlining and Automating Threat Hunting With Kestrel: https://www.blackhat.com/us-22/arsenal/schedule/index.html#streamlining-and-automating-threat-hunting-with-kestrel-28014 78 | .. _Kestrel Black Hat 2022 recording: https://www.youtube.com/watch?v=tf1VLIpFefs 79 | .. _Black Hat 22 Kestrel Blue Team Lab: https://mybinder.org/v2/gh/opencybersecurityalliance/black-hat-us-2022/HEAD?filepath=demo 80 | -------------------------------------------------------------------------------- /logo/README.md: -------------------------------------------------------------------------------- 1 | #### Font 2 | 3 | Google Fonts: Actor 4 | -------------------------------------------------------------------------------- /logo/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/logo/logo.png -------------------------------------------------------------------------------- /logo/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 7 | 8 | 9 | 10 | 11 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /logo/logo_w_text.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/logo/logo_w_text.png -------------------------------------------------------------------------------- /logo/logo_w_text_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/logo/logo_w_text_white.png -------------------------------------------------------------------------------- /logo/logo_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/logo/logo_white.png -------------------------------------------------------------------------------- /logo/logo_white.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 7 | 8 | 9 | 10 | 11 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /logo/svg2png.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # https://www.npmjs.com/package/svgexport 4 | 5 | for svgfile in *.svg 6 | do 7 | pngfile="${svgfile%.svg}.png" 8 | svgexport $svgfile $pngfile 100% 4x 9 | done 10 | -------------------------------------------------------------------------------- /mapping-examples/security-datasets/GoldenSAML/GoldenSAML_AADAuditEvents.yaml: -------------------------------------------------------------------------------- 1 | # SecurityDatasets GoldenSAML AADAuditEvents.json to OSCF mapping 2 | 3 | 4 | time: TimeGenerated 5 | 6 | 7 | # endpoint: see https://schema.ocsf.io/1.1.0/objects/endpoint 8 | device: &endpoint 9 | uid: TenantId 10 | 11 | 12 | # https://schema.ocsf.io/1.1.0/objects/http_request 13 | http_request: 14 | user_agent: UserAgent 15 | 16 | 17 | # https://schema.ocsf.io/1.1.0/objects/managed_entity 18 | entity: 19 | uid: 20 | - targetId 21 | - ModifiedApplicationObjectId 22 | type: 23 | - targetType 24 | - Type 25 | name: 26 | - targetDisplayName 27 | - ModifiedApplication 28 | data: TargetResources 29 | 30 | 31 | actor: 32 | user: 33 | endpoint: *endpoint 34 | uid: InitiatedBy.user.id 35 | name: InitiatedBy.user.userPrincipalName 36 | 37 | 38 | # https://schema.ocsf.io/1.2.0/classes/user_access 39 | privileges: Permissions 40 | 41 | 42 | type_uid: 43 | native_field: OperationName 44 | native_value: 45 | 300403: # Entity Management: Update 46 | - "Update application – Certificates and secrets management " 47 | - "Update application" 48 | 300501: # User Access Management: Assign Privileges 49 | - "Add delegated permission grant" 50 | 51 | type_name: OperationName 52 | 53 | status_id: 54 | - native_field: Result 55 | native_value: 56 | 1: # Success 57 | - "success" 58 | 2: # Failure 59 | - "failure" 60 | -------------------------------------------------------------------------------- /mapping-examples/security-datasets/GoldenSAML/GoldenSAML_Microsoft365DefenderEvents.yaml: -------------------------------------------------------------------------------- 1 | # SecurityDatasets GoldenSAML Microsoft365DefenderEvents.json to OSCF mapping 2 | 3 | 4 | time: Timestamp 5 | 6 | 7 | # endpoint: see https://schema.ocsf.io/1.1.0/objects/endpoint 8 | device: &endpoint 9 | hostname: DeviceName 10 | uid: DeviceId 11 | 12 | 13 | file: &file 14 | endpoint: *endpoint 15 | name: FileName 16 | 17 | 18 | # https://schema.ocsf.io/1.1.0/objects/process 19 | process: 20 | endpoint: *endpoint 21 | cmd_line: ProcessCommandLine 22 | pid: ProcessId 23 | uid: ProcessId 24 | hash: 25 | md5: MD5 26 | sha1: SHA1 27 | sha256: SHA256 28 | 29 | 30 | actor: 31 | process: 32 | endpoint: *endpoint 33 | cmd_line: InitiatingProcessCommandLine 34 | pid: InitiatingProcessId 35 | uid: InitiatingProcessId 36 | parent_process: 37 | endpoint: *endpoint 38 | pid: InitiatingProcessParentId 39 | uid: InitiatingProcessParentId 40 | file: 41 | name: InitiatingProcessParentFileName 42 | file: 43 | name: InitiatingProcessFileName 44 | path: InitiatingProcessFolderPath 45 | hash: 46 | md5: InitiatingProcessMD5 47 | sha1: InitiatingProcessSHA1 48 | sha256: InitiatingProcessSHA256 49 | parent_folder: 50 | native_field: InitiatingProcessFolderPath 51 | native_op: LIKE 52 | native_value: winpath_startswith 53 | ocsf_value: dirname 54 | user: 55 | endpoint: *endpoint 56 | uid: 57 | - InitiatingProcessAccountSid 58 | - AccountSid 59 | name: 60 | - InitiatingProcessAccountName 61 | - AccountName 62 | domain: 63 | - InitiatingProcessAccountDomain 64 | - AccountDomain 65 | 66 | 67 | # src_endpoint: see https://schema.ocsf.io/1.1.0/objects/endpoint 68 | src_endpoint: 69 | ip: IPAddress 70 | port: Port 71 | 72 | 73 | # dst_endpoint: see https://schema.ocsf.io/1.1.0/objects/endpoint 74 | dst_endpoint: 75 | hostname: DestinationDeviceName 76 | ip: DestinationIPAddress 77 | port: DestinationPort 78 | 79 | 80 | # https://schema.ocsf.io/1.1.0/objects/user 81 | user: 82 | endpoint: *endpoint 83 | uid: AccountObjectId 84 | name: AccountName 85 | domain: AccountDomain 86 | 87 | 88 | # https://schema.ocsf.io/1.1.0/objects/http_request 89 | http_request: 90 | user_agent: UserAgent 91 | 92 | 93 | # https://schema.ocsf.io/1.1.0/objects/query_info 94 | query_info: 95 | uid: ReportId 96 | attr_list: AdditionalFields.AttributeList 97 | search_filter: AdditionalFields.SearchFilter 98 | 99 | 100 | # https://schema.ocsf.io/1.1.0/objects/managed_entity 101 | entity: 102 | uid: ReportId 103 | data: ActivityObjects 104 | 105 | 106 | # https://schema.ocsf.io/1.2.0/classes/user_access 107 | privileges: Permissions 108 | 109 | 110 | # https://schema.ocsf.io/1.1.0/classes/base_event 111 | # Base Event 112 | type_uid: 113 | native_field: ActionType 114 | native_value: 115 | 300403: # Entity Management: Update 116 | - "MailItemsAccessed" 117 | 300501: # User Access Management: Assign Privileges 118 | - "Add delegated permission grant." 119 | 600504: # Datastore Activity: Query 120 | - "LdapSearch" 121 | 600599: # Datastore Activity: Other 122 | - "Directory Services replication" 123 | 124 | type_name: ActionType 125 | 126 | status_id: 127 | - native_field: RawEventData.ResultStatus 128 | native_value: 129 | 1: # Success 130 | - "Succeeded" 131 | 2: # Failure 132 | - "Failed" 133 | -------------------------------------------------------------------------------- /mapping-examples/security-datasets/GoldenSAML/GoldenSAML_OfficeActivityEvents.yaml: -------------------------------------------------------------------------------- 1 | # SecurityDatasets GoldenSAML AADAuditEvents.json to OSCF mapping 2 | 3 | 4 | time: TimeGenerated 5 | 6 | 7 | # endpoint: see https://schema.ocsf.io/1.1.0/objects/endpoint 8 | device: &endpoint 9 | uid: TenantId 10 | 11 | 12 | # src_endpoint: see https://schema.ocsf.io/1.1.0/objects/endpoint 13 | src_endpoint: 14 | ip: Client_IPAddress 15 | port: Client_Port 16 | 17 | 18 | # https://schema.ocsf.io/1.1.0/objects/managed_entity 19 | entity: 20 | uid: MailboxGuid 21 | type: Type 22 | data: Folders 23 | 24 | 25 | actor: 26 | user: 27 | endpoint: *endpoint 28 | uid: LogonUserSid 29 | name: UserId 30 | 31 | 32 | type_uid: 33 | native_field: Operation 34 | native_value: 35 | 300403: # Entity Management: Update 36 | - "MailItemsAccessed" 37 | 38 | type_name: Operation 39 | 40 | status_id: 41 | - native_field: ResultStatus 42 | native_value: 43 | 1: # Success 44 | - "Succeeded" 45 | 2: # Failure 46 | - "Failed" 47 | -------------------------------------------------------------------------------- /mapping-examples/security-datasets/GoldenSAML/README.md: -------------------------------------------------------------------------------- 1 | # Golden SAML AD FS Mail Access 2 | 3 | This directory contains an example Kestrel data model mapping for _Security Datasets_ Golden SAML dataset (https://securitydatasets.com/notebooks/compound/GoldenSAMLADFSMailAccess.html). 4 | 5 | Tha dataset is in turn based on Microsoft's SimuLand Golden SAML Lab Guide (https://simulandlabs.com/labs/GoldenSAML/README.html) 6 | 7 | ## Setup 8 | 9 | ### Data Ingestion 10 | 11 | ``` 12 | kestrel-tool mkdb --db sqlite:///golden_saml.db --table WindowsEvents WindowsEvents.json 13 | ``` 14 | 15 | ### Data Source Configuration 16 | 17 | As an example, if using a datasource compatible with SQLAlchemy, add the connection info to your `sqlalchemy.yaml` file (e.g. `~/.config/kestrel/sqlalchemy.yaml`): 18 | 19 | ``` 20 | connections: 21 | goldensaml: 22 | url: sqlite:////home/user/datasources/golden_saml.db 23 | table_creation_permission: true 24 | ``` 25 | 26 | In the same file, add datasources for each table/file in the dataset: 27 | ``` 28 | datasources: 29 | WindowsEvents: 30 | connection: goldensaml 31 | table: WindowsEvents 32 | timestamp: TimeGenerated 33 | timestamp_format: "%Y-%m-%d %H:%M:%S.%fZ" 34 | data_model_map: "/home/user/.config/kestrel/GoldenSAML_WindowsEvents.yaml" 35 | ``` 36 | 37 | Copy the example data model maps from this directory into your Kestrel config directory (e.g. `~/.config/kestrel`) 38 | 39 | -------------------------------------------------------------------------------- /packages/kestrel_core/README.rst: -------------------------------------------------------------------------------- 1 | ../../README.rst -------------------------------------------------------------------------------- /packages/kestrel_core/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools >= 68.2.2", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "kestrel_core" 7 | version = "2.0.0b" 8 | description = "Kestrel Threat Hunting Language" 9 | readme = "README.rst" 10 | requires-python = ">=3.8" 11 | license = {text = "Apache 2.0 License"} 12 | maintainers = [ 13 | {name = "Xiaokui Shu", email = "xiaokui.shu@ibm.com"}, 14 | {name = "Paul Coccoli", email = "pcoccoli@us.ibm.com"}, 15 | ] 16 | keywords = [ 17 | "kestrel", 18 | "language", 19 | "DSL", 20 | "cybersecurity", 21 | "threat hunting", 22 | "huntflow", 23 | "entity", 24 | ] 25 | classifiers = [ 26 | "Topic :: Security", 27 | "Operating System :: OS Independent", 28 | "Development Status :: 4 - Beta", 29 | "Programming Language :: Python :: 3", 30 | ] 31 | 32 | dependencies = [ 33 | "typeguard>=4.3.0", 34 | "pyyaml>=6.0.1", 35 | "lark>=1.1.9", 36 | "pandas>=2.0.3", # any higher version drops Python 3.8 support 37 | "pyarrow>=17.0.0", 38 | "mashumaro>=3.13.1", 39 | "networkx>=3.1", # any higher version drops Python 3.8 support 40 | "SQLAlchemy>=2.0.31", 41 | ] 42 | 43 | [project.optional-dependencies] 44 | dev = [ 45 | "black", 46 | ] 47 | test = [ 48 | "pytest", 49 | ] 50 | 51 | [project.urls] 52 | Homepage = "https://github.com/opencybersecurityalliance/kestrel-lang" 53 | Documentation = "https://kestrel.readthedocs.io/" 54 | Repository = "https://github.com/opencybersecurityalliance/kestrel-lang.git" 55 | 56 | [project.scripts] 57 | kestrel = "kestrel.cli:kestrel" 58 | ikestrel = "kestrel.cli:ikestrel" 59 | 60 | [tool.setuptools.packages.find] 61 | where = ["src"] 62 | 63 | [tool.setuptools.package-data] 64 | "*" = ["*.lark", "*.yaml", "*.csv"] 65 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/__future__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from typeguard import typechecked 4 | 5 | """Entrance to invoke any backward compatibility patch 6 | 7 | This module is for developers to quickly locate backward compatibility pathes 8 | in Kestrel code and remove them through time. 9 | """ 10 | 11 | 12 | @typechecked 13 | def is_python_older_than_minor_version(minor: int) -> bool: 14 | return sys.version_info.minor < minor 15 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/__init__.py: -------------------------------------------------------------------------------- 1 | from kestrel.session import Session 2 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/analytics/__init__.py: -------------------------------------------------------------------------------- 1 | from .interface import PythonAnalyticsInterface 2 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/analytics/config.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from kestrel.config.utils import CONFIG_DIR_DEFAULT, load_user_config 4 | from kestrel.exceptions import InvalidAnalytics 5 | 6 | PROFILE_PATH_DEFAULT = CONFIG_DIR_DEFAULT / "pythonanalytics.yaml" 7 | PROFILE_PATH_ENV_VAR = "KESTREL_PYTHON_ANALYTICS_CONFIG" 8 | 9 | _logger = logging.getLogger(__name__) 10 | 11 | 12 | def load_profiles(): 13 | config = load_user_config(PROFILE_PATH_ENV_VAR, PROFILE_PATH_DEFAULT) 14 | if config and "profiles" in config: 15 | _logger.debug(f"python analytics profiles found in config file") 16 | profiles = config["profiles"] 17 | else: 18 | _logger.info("no python analytics config with profiles found") 19 | profiles = {} 20 | _logger.debug(f"profiles loaded: {profiles}") 21 | return profiles 22 | 23 | 24 | def get_profile(profile_name, profiles): 25 | if profile_name not in profiles: 26 | raise InvalidAnalytics( 27 | profile_name, 28 | "python", 29 | f"no {profile_name} configuration found", 30 | ) 31 | else: 32 | profile = profiles[profile_name] 33 | _logger.debug(f"profile to use: {profile}") 34 | if "module" not in profile: 35 | raise InvalidAnalytics( 36 | profile_name, 37 | "python", 38 | f"no {profile_name} module defined", 39 | ) 40 | else: 41 | module_name = profile["module"] 42 | if "func" not in profile: 43 | raise InvalidAnalytics( 44 | profile_name, 45 | "python", 46 | f"no {profile_name} func defined", 47 | ) 48 | else: 49 | func_name = profile["func"] 50 | 51 | return module_name, func_name 52 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/cache/__init__.py: -------------------------------------------------------------------------------- 1 | from kestrel.cache.base import AbstractCache 2 | from kestrel.cache.inmemory import InMemoryCache 3 | from kestrel.cache.sql import SqlCache 4 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/cache/base.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from abc import abstractmethod 4 | from typing import Iterable, List, MutableMapping 5 | from uuid import UUID 6 | 7 | from kestrel.config.internal import CACHE_INTERFACE_IDENTIFIER, CACHE_STORAGE_IDENTIFIER 8 | from kestrel.interface import DatasourceInterface 9 | from pandas import DataFrame 10 | 11 | 12 | class AbstractCache(DatasourceInterface, MutableMapping): 13 | """Base class for Kestrel cache""" 14 | 15 | @staticmethod 16 | def schemes() -> Iterable[str]: 17 | return [CACHE_INTERFACE_IDENTIFIER] 18 | 19 | def get_datasources(self) -> List[str]: 20 | return [] 21 | 22 | def get_storage_of_datasource(self, datasource: str) -> str: 23 | return CACHE_STORAGE_IDENTIFIER 24 | 25 | @abstractmethod 26 | def __del__(self): 27 | """Delete the cache and release memory/disk space""" 28 | ... 29 | 30 | @abstractmethod 31 | def __getitem__(self, instruction_id: UUID) -> DataFrame: 32 | """Get the dataframe for the cached instruction 33 | 34 | This method will automatically support `uuid in cache` 35 | 36 | Parameters: 37 | instruction_id: id of the instruction 38 | 39 | Returns: 40 | dataframe of the given (likely Variable) instruction 41 | """ 42 | ... 43 | 44 | @abstractmethod 45 | def __setitem__(self, instruction_id: UUID, data: DataFrame): 46 | """Store the dataframe of an instruction into cache 47 | 48 | Parameters: 49 | 50 | instruction_id: id of the instruction 51 | 52 | data: data associated with the instruction 53 | """ 54 | ... 55 | 56 | @abstractmethod 57 | def __delitem__(self, instruction_id: UUID): 58 | """Delete cached item 59 | 60 | Parameters: 61 | instruction_id: id of the instruction 62 | """ 63 | ... 64 | 65 | @abstractmethod 66 | def get_virtual_copy(self) -> AbstractCache: 67 | """Create a virtual cache object from this cache 68 | 69 | This method needs to reimplement __del__, __getitem__, __setitem__, 70 | __delitem__ to not actually hit the store media, e.g., SQLite. 71 | 72 | The virtual cache is useful for the implementation of the Explain() 73 | instruction, pretending the dependent graphs are evaluated, so the 74 | evaluation can continue towards the Return() instruction. 75 | 76 | Because Python invokes special methods from class methods, replacing 77 | the __getitem__, __setitem__, and __delitem__ in the object does not 78 | help. It is better to derive a subclass and replace __class__ of the 79 | object to the subclass to correctly invoke the new set of __xitem___. 80 | 81 | https://docs.python.org/3/reference/datamodel.html#special-lookup 82 | 83 | And Python garbage collector could clean up the virtual cache when 84 | not in use, so the __del__ method should be reimplemented to make 85 | sure the store media is not closed. 86 | """ 87 | ... 88 | 89 | def store(self, instruction_id: UUID, data: DataFrame): 90 | self[instruction_id] = data 91 | 92 | def __iter__(self) -> UUID: 93 | """Return UUIDs of instructions cached 94 | 95 | Returns: 96 | UUIDs in iterator 97 | """ 98 | return iter(self.cache_catalog) 99 | 100 | def __len__(self) -> int: 101 | """How many items are cached""" 102 | return len(self.cache_catalog) 103 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/cache/inmemory.py: -------------------------------------------------------------------------------- 1 | from copy import copy 2 | from typing import Any, Iterable, Mapping, MutableMapping, Optional 3 | from uuid import UUID 4 | 5 | from kestrel.cache.base import AbstractCache 6 | from kestrel.display import GraphletExplanation, NativeQuery 7 | from kestrel.interface.codegen.dataframe import ( 8 | evaluate_source_instruction, 9 | evaluate_transforming_instruction, 10 | ) 11 | from kestrel.ir.graph import IRGraphEvaluable 12 | from kestrel.ir.instructions import ( 13 | Explain, 14 | Filter, 15 | Instruction, 16 | Return, 17 | SourceInstruction, 18 | TransformingInstruction, 19 | Variable, 20 | ) 21 | from pandas import DataFrame 22 | from typeguard import typechecked 23 | 24 | 25 | @typechecked 26 | class InMemoryCache(AbstractCache): 27 | def __init__( 28 | self, 29 | initial_cache: Mapping[UUID, DataFrame] = {}, 30 | session_id: Optional[UUID] = None, 31 | ): 32 | super().__init__(session_id) 33 | self.cache: MutableMapping[UUID, DataFrame] = {} 34 | 35 | # update() will call __setitem__() internally 36 | self.update(initial_cache) 37 | 38 | def __del__(self): 39 | del self.cache 40 | 41 | def __getitem__(self, instruction_id: UUID) -> DataFrame: 42 | return self.cache[self.cache_catalog[instruction_id]] 43 | 44 | def __delitem__(self, instruction_id: UUID): 45 | del self.cache[self.cache_catalog[instruction_id]] 46 | del self.cache_catalog[instruction_id] 47 | 48 | def __setitem__( 49 | self, 50 | instruction_id: UUID, 51 | data: DataFrame, 52 | ): 53 | self.cache_catalog[instruction_id] = instruction_id.hex 54 | self.cache[self.cache_catalog[instruction_id]] = data 55 | 56 | def get_virtual_copy(self) -> AbstractCache: 57 | v = copy(self) 58 | v.cache_catalog = copy(self.cache_catalog) 59 | v.cache = copy(self.cache) 60 | return v 61 | 62 | def evaluate_graph( 63 | self, 64 | graph: IRGraphEvaluable, 65 | cache: MutableMapping[UUID, Any], 66 | instructions_to_evaluate: Optional[Iterable[Instruction]] = None, 67 | ) -> Mapping[UUID, DataFrame]: 68 | mapping = {} 69 | if not instructions_to_evaluate: 70 | instructions_to_evaluate = graph.get_sink_nodes() 71 | for instruction in instructions_to_evaluate: 72 | df = self._evaluate_instruction_in_graph(graph, instruction) 73 | self[instruction.id] = df 74 | mapping[instruction.id] = df 75 | return mapping 76 | 77 | def explain_graph( 78 | self, 79 | graph: IRGraphEvaluable, 80 | instructions_to_explain: Optional[Iterable[Instruction]] = None, 81 | ) -> Mapping[UUID, GraphletExplanation]: 82 | mapping = {} 83 | if not instructions_to_explain: 84 | instructions_to_explain = graph.get_sink_nodes() 85 | for instruction in instructions_to_explain: 86 | dep_graph = graph.duplicate_dependent_subgraph_of_node(instruction) 87 | graph_dict = dep_graph.to_dict() 88 | query = NativeQuery("DataFrame", "") 89 | mapping[instruction.id] = GraphletExplanation(graph_dict, query) 90 | return mapping 91 | 92 | def _evaluate_instruction_in_graph( 93 | self, graph: IRGraphEvaluable, instruction: Instruction 94 | ) -> DataFrame: 95 | if instruction.id in self: 96 | df = self[instruction.id] 97 | elif isinstance(instruction, SourceInstruction): 98 | df = evaluate_source_instruction(instruction) 99 | elif isinstance(instruction, TransformingInstruction): 100 | trunk, r2n = graph.get_trunk_n_branches(instruction) 101 | df = self._evaluate_instruction_in_graph(graph, trunk) 102 | if isinstance(instruction, (Return, Explain)): 103 | pass 104 | elif isinstance(instruction, Variable): 105 | self[instruction.id] = df 106 | else: 107 | if isinstance(instruction, Filter): 108 | # replace each ReferenceValue with a list of values 109 | instruction.resolve_references( 110 | lambda x: self._evaluate_instruction_in_graph(graph, r2n[x]) 111 | ) 112 | df = evaluate_transforming_instruction(instruction, df) 113 | else: 114 | raise NotImplementedError(f"Unknown instruction type: {instruction}") 115 | return df 116 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/cli.py: -------------------------------------------------------------------------------- 1 | ################################################################ 2 | # 3 | # Kestrel Command-line Utilities 4 | # - kestrel 5 | # - ikestrel 6 | # 7 | ################################################################ 8 | 9 | import argparse 10 | import cmd 11 | import logging 12 | 13 | from kestrel.exceptions import KestrelError 14 | from kestrel.session import Session 15 | from pandas import DataFrame 16 | 17 | 18 | def add_logging_handler(handler, if_debug): 19 | fmt = "%(asctime)s %(levelname)s %(name)s %(message)s" 20 | datefmt = "%H:%M:%S" 21 | formatter = logging.Formatter(fmt=fmt, datefmt=datefmt) 22 | 23 | handler.setFormatter(formatter) 24 | 25 | root_logger = logging.getLogger() 26 | current_logging_level = root_logger.getEffectiveLevel() 27 | root_logger.addHandler(handler) 28 | root_logger.setLevel(logging.DEBUG if if_debug else logging.INFO) 29 | 30 | return handler, current_logging_level 31 | 32 | 33 | def kestrel(): 34 | parser = argparse.ArgumentParser(description="Kestrel Interpreter") 35 | parser.add_argument("huntflow", help="huntflow in .hf file") 36 | parser.add_argument( 37 | "-v", "--verbose", help="print verbose log", action="store_true" 38 | ) 39 | parser.add_argument( 40 | "--debug", help="debug level log (default is info level)", action="store_true" 41 | ) 42 | args = parser.parse_args() 43 | 44 | if args.verbose: 45 | add_logging_handler(logging.StreamHandler(), args.debug) 46 | 47 | with Session() as session: 48 | with open(args.huntflow, "r") as fp: 49 | huntflow = fp.read() 50 | outputs = session.execute(huntflow) 51 | results = "\n\n".join( 52 | [o.to_string() if isinstance(o, DataFrame) else str(o) for o in outputs] 53 | ) 54 | print(results) 55 | 56 | 57 | # TODO: fix #405 so we do not need this 58 | CMDS = [ # command_no_result from kestrel.lark 59 | "APPLY", 60 | "DISP", 61 | "INFO", 62 | "SAVE", 63 | ] 64 | 65 | 66 | def display_outputs(outputs): 67 | for i in outputs: 68 | print(i) 69 | 70 | 71 | class IKestrel(cmd.Cmd): 72 | prompt = "> " 73 | 74 | def __init__(self, session: Session): 75 | self.session = session 76 | self.buf = "" 77 | super().__init__() 78 | 79 | def default(self, line: str): 80 | try: 81 | outputs = self.session.execute(line) 82 | display_outputs(outputs) 83 | except KestrelError as e: 84 | print(e) 85 | 86 | def completenames(self, text, *ignored): 87 | code, _start, _end = ignored 88 | if code.isupper(): 89 | # Probably a command? 90 | results = [i for i in CMDS if i.startswith(code)] 91 | else: 92 | # Try all commands and vars 93 | results = [i for i in CMDS if i.lower().startswith(code)] 94 | results += [ 95 | i for i in self.session.get_variable_names() if i.startswith(code) 96 | ] 97 | return results 98 | 99 | def completedefault(self, *ignored): 100 | _, code, start, end = ignored 101 | results = self.session.do_complete(code, end) 102 | stub = code[start:] 103 | return [stub + suffix for suffix in results] 104 | 105 | def do_EOF(self, _line: str): 106 | print() 107 | return True 108 | 109 | 110 | def ikestrel(): 111 | parser = argparse.ArgumentParser(description="Kestrel Interpreter") 112 | parser.add_argument( 113 | "-v", "--verbose", help="print verbose log", action="store_true" 114 | ) 115 | parser.add_argument( 116 | "--debug", help="debug level log (default is info level)", action="store_true" 117 | ) 118 | args = parser.parse_args() 119 | 120 | if args.verbose: 121 | add_logging_handler(logging.StreamHandler(), args.debug) 122 | 123 | with Session() as s: 124 | ik = IKestrel(s) 125 | ik.cmdloop() 126 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/config/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import load_kestrel_config 2 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/config/internal.py: -------------------------------------------------------------------------------- 1 | from pandas import DataFrame 2 | 3 | CACHE_INTERFACE_IDENTIFIER = "cache" 4 | CACHE_STORAGE_IDENTIFIER = "local" 5 | 6 | VIRTUAL_CACHE_VAR_DATA = DataFrame({"*": ["*"]}) 7 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/config/kestrel.yaml: -------------------------------------------------------------------------------- 1 | # syntax default values 2 | language: 3 | default_sort_order: "desc" 4 | 5 | # debug mode 6 | debug: false 7 | 8 | # default identifier attribute(s) of an entity across all datasource interfaces 9 | # always provide a list as identifiers even it is a single identifier 10 | # if multiple attributes are specified, logic AND will be added in between 11 | entity_identifier: 12 | endpoint: 13 | - uid 14 | file: # "hashes[?algorithm_id == 3]" # sha256 15 | - name 16 | - endpoint.uid 17 | entity: 18 | - uid 19 | group: 20 | - uid 21 | process: 22 | - uid 23 | - endpoint.uid 24 | network_endpoint: 25 | - ip 26 | - port 27 | certificate: 28 | - serial_number 29 | user: 30 | - uid 31 | email: 32 | - uid 33 | query_info: 34 | - uid 35 | managed_entity: 36 | - uid 37 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/config/relations/entity.csv: -------------------------------------------------------------------------------- 1 | OutputType,InputType,Relation,OutputProjection,InputSpecifier,type_uid 2 | process,process,CREATED,process.parent_process,process,100701 3 | user,process,OWNED,process.user,process, 4 | user,managed_entity,CREATED,actor.user,entity,300401 5 | user,managed_entity,READ,actor.user,entity,300402 6 | user,managed_entity,UPDATED,actor.user,entity,300403 7 | user,managed_entity,DELETED,actor.user,entity,300404 8 | process,managed_entity,CREATED,actor.process,entity,300401 9 | process,managed_entity,READ,actor.process,entity,300402 10 | process,managed_entity,UPDATED,actor.process,entity,300403 11 | process,managed_entity,DELETED,actor.process,entity,300404 12 | process,query_info,CREATED,actor.process,query_info,300401 13 | user,query_info,CREATED,actor.user,query_info,300401 14 | process,file,LOADED,process,process.file, 15 | process,file,CREATED,actor.process,file,100101 16 | process,file,READ,actor.process,file,100102 17 | process,file,UPDATED,actor.process,file,100103 18 | process,file,DELETED,actor.process,file,100104 19 | user,file,CREATED,actor.user,file,100101 20 | user,file,READ,actor.user,file,100102 21 | user,file,UPDATED,actor.user,file,100103 22 | user,file,DELETED,actor.user,file,100104 23 | network_endpoint,email,CREATED,src_endpoint,email, 24 | network_endpoint,email,ACCEPTED,dst_endpoint,email, 25 | process,reg_key,CREATED,actor.process,reg_key,20100101 26 | process,reg_key,READ,actor.process,reg_key,20100102 27 | process,reg_key,UPDATED,actor.process,reg_key,20100103 28 | process,reg_key,DELETED,actor.process,reg_key,20100104 29 | process,reg_value,READ,actor.process,reg_value,20100201 30 | process,reg_value,CREATED,actor.process,reg_value,20100202 31 | process,reg_value,UPDATED,actor.process,reg_value,20100203 32 | process,reg_value,DELETED,actor.process,reg_value,20100204 33 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/config/relations/event.csv: -------------------------------------------------------------------------------- 1 | OutputType,Relation,OutputProjection 2 | endpoint,RESPONDED,device 3 | process,ORIGINATED,actor.process 4 | process,RESPONDED,process 5 | file,RESPONDED,file 6 | managed_entity,RESPONDED,entity 7 | network_endpoint,ORIGINATED,src_endpoint 8 | network_endpoint,RESPONDED,dst_endpoint 9 | reg_key,RESPONDED,reg_key 10 | reg_value,RESPONDED,reg_value 11 | user,ORIGINATED,actor.user 12 | user,RESPONDED,user 13 | query_info,RESPONDED,query_info 14 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/config/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from pathlib import Path 4 | from typing import List, Mapping, Union 5 | 6 | import pandas 7 | import yaml 8 | from kestrel.exceptions import ( 9 | InvalidKestrelConfig, 10 | InvalidKestrelRelationTable, 11 | InvalidYamlInConfig, 12 | ) 13 | from kestrel.utils import list_folder_files, load_data_file, update_nested_dict 14 | from typeguard import typechecked 15 | 16 | CONFIG_DIR_DEFAULT = Path.home() / ".config" / "kestrel" 17 | CONFIG_PATH_DEFAULT = CONFIG_DIR_DEFAULT / "kestrel.yaml" 18 | CONFIG_PATH_ENV_VAR = "KESTREL_CONFIG" # override CONFIG_PATH_DEFAULT if provided 19 | 20 | relations = [] 21 | entity_types = [] 22 | 23 | _logger = logging.getLogger(__name__) 24 | 25 | 26 | @typechecked 27 | def load_leaf_yaml(config: Mapping, path_dir: str) -> Mapping: 28 | new = {} 29 | for k, v in config.items(): 30 | if isinstance(v, Mapping): 31 | new[k] = load_leaf_yaml(v, path_dir) 32 | elif isinstance(v, str) and v.endswith(".yaml"): 33 | try: 34 | if os.path.isabs(v): 35 | with open(v, "r") as fp: 36 | new[k] = yaml.safe_load(fp.read()) 37 | else: 38 | with open(os.path.join(path_dir, v), "r") as fp: 39 | new[k] = yaml.safe_load(fp.read()) 40 | except: 41 | raise InvalidYamlInConfig(v) 42 | else: 43 | new[k] = v 44 | return new 45 | 46 | 47 | @typechecked 48 | def load_default_config() -> Mapping: 49 | _logger.debug(f"Loading default config file...") 50 | default_config = load_data_file("kestrel.config", "kestrel.yaml") 51 | config_with_envvar_expanded = os.path.expandvars(default_config) 52 | config_content = yaml.safe_load(config_with_envvar_expanded) 53 | return config_content 54 | 55 | 56 | @typechecked 57 | def load_user_config( 58 | config_path_env_var: str, config_path_default: Union[str, Path] 59 | ) -> Mapping: 60 | config_path_default = config_path_default.absolute().as_posix() 61 | config_path = os.getenv(config_path_env_var, config_path_default) 62 | config_path = os.path.expanduser(config_path) 63 | config = {} 64 | if config_path: 65 | try: 66 | with open(config_path, "r") as fp: 67 | _logger.debug(f"User configuration file found: {config_path}") 68 | config = yaml.safe_load(os.path.expandvars(fp.read())) 69 | config = load_leaf_yaml(config, os.path.dirname(config_path)) 70 | except FileNotFoundError: 71 | _logger.debug(f"User configuration file not exist.") 72 | return config 73 | 74 | 75 | @typechecked 76 | def load_kestrel_config() -> Mapping: 77 | config_default = load_default_config() 78 | config_user = load_user_config(CONFIG_PATH_ENV_VAR, CONFIG_PATH_DEFAULT) 79 | _logger.debug(f"User configuration loaded: {config_user}") 80 | _logger.debug(f"Updating default config with user config...") 81 | full_config = update_nested_dict(config_default, config_user) 82 | 83 | # valid the entity identifier section format 84 | for entity, idx in full_config["entity_identifier"].items(): 85 | if not (isinstance(idx, list) and all((isinstance(x, str) for x in idx))): 86 | raise InvalidKestrelConfig(f"Invalid entity_identifier for '{entity}'") 87 | 88 | return full_config 89 | 90 | 91 | @typechecked 92 | def load_relation_configs(table_name: str) -> pandas.DataFrame: 93 | """Load relation tables 94 | 95 | Parameters: 96 | table_name: "entity" (entity-to-entity relation) or "event" (entity-to-event relation) 97 | 98 | Returns: 99 | Relation table in DataFrame, which has column names 100 | """ 101 | filepaths = list( 102 | list_folder_files("kestrel.config", "relations", table_name, "csv") 103 | ) 104 | if len(filepaths) > 1: 105 | _logger.error(f"More than one relation table found; will return the first one") 106 | try: 107 | table = pandas.read_csv(filepaths[0]) 108 | except: 109 | raise InvalidKestrelRelationTable(filepaths[0]) 110 | return table 111 | 112 | 113 | @typechecked 114 | def get_all_relations() -> List[str]: 115 | global relations 116 | if not relations: 117 | _relations = set() 118 | for filepath in list_folder_files( 119 | "kestrel.config", "relations", extension="csv" 120 | ): 121 | table = pandas.read_csv(filepath) 122 | _relations |= set(table["Relation"].to_list()) 123 | relations = list(_relations) 124 | return relations 125 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/display.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List, Mapping, Union 3 | 4 | from mashumaro.mixins.json import DataClassJSONMixin 5 | from pandas import DataFrame 6 | 7 | 8 | @dataclass 9 | class NativeQuery(DataClassJSONMixin): 10 | # which query language 11 | language: str 12 | # what query statement 13 | statement: str 14 | 15 | 16 | @dataclass 17 | class AnalyticOperation(DataClassJSONMixin): 18 | # which interface 19 | interface: str 20 | # operation description 21 | operation: str 22 | 23 | 24 | @dataclass 25 | class GraphletExplanation(DataClassJSONMixin): 26 | # serialized IRGraph 27 | graph: Mapping 28 | # data source query 29 | action: Union[NativeQuery, AnalyticOperation] 30 | 31 | 32 | @dataclass 33 | class GraphExplanation(DataClassJSONMixin): 34 | graphlets: List[GraphletExplanation] 35 | 36 | 37 | # Kestrel Display Object 38 | Display = Union[ 39 | str, 40 | dict, 41 | DataFrame, 42 | GraphExplanation, 43 | ] 44 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/exceptions.py: -------------------------------------------------------------------------------- 1 | class KestrelError(Exception): 2 | pass 3 | 4 | 5 | class InstructionNotFound(KestrelError): 6 | pass 7 | 8 | 9 | class InvalidInstruction(KestrelError): 10 | pass 11 | 12 | 13 | class InvalidComparison(KestrelError): 14 | pass 15 | 16 | 17 | class MismatchedFieldValueInMultiColumnComparison(KestrelError): 18 | pass 19 | 20 | 21 | class InvalidOperatorInMultiColumnComparison(KestrelError): 22 | pass 23 | 24 | 25 | class InvalidSeralizedGraph(KestrelError): 26 | pass 27 | 28 | 29 | class InvalidSeralizedInstruction(KestrelError): 30 | pass 31 | 32 | 33 | class InvalidDataSource(KestrelError): 34 | pass 35 | 36 | 37 | class InvalidYamlInConfig(KestrelError): 38 | pass 39 | 40 | 41 | class InvalidKestrelConfig(KestrelError): 42 | pass 43 | 44 | 45 | class MissingEntityIdentifierInConfig(KestrelError): 46 | pass 47 | 48 | 49 | class InvalidKestrelRelationTable(KestrelError): 50 | pass 51 | 52 | 53 | class UnsupportedObjectRelation(KestrelError): 54 | pass 55 | 56 | 57 | class DuplicatedRelationMapping(KestrelError): 58 | pass 59 | 60 | 61 | class VariableNotFound(KestrelError): 62 | pass 63 | 64 | 65 | class SourceNotFound(KestrelError): 66 | pass 67 | 68 | 69 | class ReferenceNotFound(KestrelError): 70 | pass 71 | 72 | 73 | class DataSourceNotFound(KestrelError): 74 | pass 75 | 76 | 77 | class DuplicatedVariable(KestrelError): 78 | pass 79 | 80 | 81 | class DuplicatedReference(KestrelError): 82 | pass 83 | 84 | 85 | class DuplicatedDataSource(KestrelError): 86 | pass 87 | 88 | 89 | class DuplicatedSingletonInstruction(KestrelError): 90 | pass 91 | 92 | 93 | class MultiInterfacesInGraph(KestrelError): 94 | pass 95 | 96 | 97 | class MultiSourcesInGraph(KestrelError): 98 | pass 99 | 100 | 101 | class LargerThanOneIndegreeInstruction(KestrelError): 102 | pass 103 | 104 | 105 | class DanglingReferenceInFilter(KestrelError): 106 | pass 107 | 108 | 109 | class DanglingFilter(KestrelError): 110 | pass 111 | 112 | 113 | class DuplicatedReferenceInFilter(KestrelError): 114 | pass 115 | 116 | 117 | class MissingReferenceInFilter(KestrelError): 118 | pass 119 | 120 | 121 | class InvalidSerializedDatasourceInterfaceCacheCatalog(KestrelError): 122 | pass 123 | 124 | 125 | class InevaluableInstruction(KestrelError): 126 | pass 127 | 128 | 129 | class MappingParseError(KestrelError): 130 | pass 131 | 132 | 133 | class InterfaceNotFound(KestrelError): 134 | pass 135 | 136 | 137 | class IRGraphMissingNode(KestrelError): 138 | pass 139 | 140 | 141 | class InterfaceNotConfigured(KestrelError): 142 | pass 143 | 144 | 145 | class InvalidInterfaceImplementation(KestrelError): 146 | pass 147 | 148 | 149 | class ConflictingInterfaceScheme(KestrelError): 150 | pass 151 | 152 | 153 | class DataSourceError(KestrelError): 154 | pass 155 | 156 | 157 | class UnsupportedOperatorError(KestrelError): 158 | """The data source doesn't support this operator""" 159 | 160 | pass 161 | 162 | 163 | class IncompleteDataMapping(KestrelError): 164 | pass 165 | 166 | 167 | class InvalidAnalytics(KestrelError): 168 | pass 169 | 170 | 171 | class InvalidAnalyticsArgumentCount(KestrelError): 172 | pass 173 | 174 | 175 | class InvalidAnalyticsInterfaceImplementation(KestrelError): 176 | pass 177 | 178 | 179 | class InvalidAnalyticsOutput(KestrelError): 180 | pass 181 | 182 | 183 | class AnalyticsError(KestrelError): 184 | pass 185 | 186 | 187 | class SourceSchemaNotFound(KestrelError): 188 | pass 189 | 190 | 191 | class InvalidProjectEntityFromEntity(KestrelError): 192 | pass 193 | 194 | 195 | class EntityNotFound(KestrelError): 196 | pass 197 | 198 | 199 | class InvalidMappingWithMultipleIdentifierFields(KestrelError): 200 | pass 201 | 202 | 203 | class InvalidTransformerInMapping(KestrelError): 204 | pass 205 | 206 | 207 | class InvalidAttributes(KestrelError): 208 | pass 209 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/frontend/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/packages/kestrel_core/src/kestrel/frontend/__init__.py -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/frontend/parser.py: -------------------------------------------------------------------------------- 1 | # parse Kestrel syntax, apply frontend mapping, transform to IR 2 | 3 | import logging 4 | from itertools import chain 5 | from typing import Iterable 6 | 7 | import lark 8 | import yaml 9 | from kestrel.config.utils import get_all_relations, load_relation_configs 10 | from kestrel.frontend.compile import _KestrelT 11 | from kestrel.ir.graph import IRGraph 12 | from kestrel.ir.instructions import Return 13 | from kestrel.mapping.data_model import reverse_mapping 14 | from kestrel.utils import list_folder_files, load_data_file 15 | from pandas import DataFrame 16 | from typeguard import typechecked 17 | 18 | _logger = logging.getLogger(__name__) 19 | 20 | 21 | MAPPING_MODULE = "kestrel.mapping" 22 | 23 | # cache mapping in the module 24 | frontend_mappings = {} 25 | 26 | # cache relation table in the module 27 | relation_tables = {} 28 | 29 | 30 | @typechecked 31 | def get_relation_table(table: str) -> DataFrame: 32 | global relation_tables 33 | if table not in relation_tables: 34 | relation_tables[table] = load_relation_configs(table) 35 | return relation_tables[table] 36 | 37 | 38 | @typechecked 39 | def get_frontend_mapping(submodule: str, do_reverse_mapping: bool = False) -> dict: 40 | global frontend_mappings 41 | if submodule not in frontend_mappings: 42 | mapping = {} 43 | for f in list_folder_files(MAPPING_MODULE, submodule, extension="yaml"): 44 | with open(f, "r") as fp: 45 | mapping_ind = yaml.safe_load(fp) 46 | if do_reverse_mapping: 47 | mapping_ind = reverse_mapping(mapping_ind) 48 | mapping.update(mapping_ind) 49 | frontend_mappings[submodule] = mapping 50 | return frontend_mappings[submodule] 51 | 52 | 53 | @typechecked 54 | def get_keywords(including_relations: bool = True): 55 | grammar = load_data_file("kestrel.frontend", "kestrel.lark") 56 | parser = lark.Lark(grammar, parser="lalr") 57 | alphabet_patterns = filter(lambda x: x.pattern.value.isalnum(), parser.terminals) 58 | all_relations = get_all_relations() 59 | keywords = [x.pattern.value for x in alphabet_patterns] + all_relations 60 | keywords_lower = map(lambda x: x.lower(), keywords) 61 | keywords_upper = map(lambda x: x.upper(), keywords) 62 | keywords_comprehensive = list(chain(keywords_lower, keywords_upper)) 63 | return keywords_comprehensive 64 | 65 | 66 | @typechecked 67 | def parse_kestrel_and_update_irgraph( 68 | stmts: str, irgraph: IRGraph, entity_identifier_map: dict 69 | ) -> Iterable[Return]: 70 | """Parse Kestrel code block and update the input IRGraph 71 | 72 | Parameters: 73 | stmts: Kestrel code block (statements) 74 | irgraph: existing IRGraph (used for reference resolution; will be updated) 75 | entity_identifier_map: identifiers for each entity, required by FIND 76 | 77 | Returns: 78 | List of Return instructions in the current code block 79 | """ 80 | lp = lark.Lark( 81 | load_data_file("kestrel.frontend", "kestrel.lark"), 82 | parser="lalr", 83 | transformer=_KestrelT( 84 | irgraph, 85 | get_frontend_mapping("fields", True), 86 | get_frontend_mapping("types"), 87 | get_relation_table("entity"), 88 | get_relation_table("event"), 89 | entity_identifier_map, 90 | ), 91 | ) 92 | return lp.parse(stmts) 93 | 94 | 95 | @typechecked 96 | def parse_without_transform( 97 | stmts: str, 98 | ) -> lark.tree.Tree: 99 | """Parse Kestrel code block and not transform; for syntax error check""" 100 | lp = lark.Lark( 101 | load_data_file("kestrel.frontend", "kestrel.lark"), 102 | parser="lalr", 103 | ) 104 | return lp.parse(stmts) 105 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/interface/__init__.py: -------------------------------------------------------------------------------- 1 | from kestrel.interface.base import ( 2 | AbstractInterface, 3 | AnalyticsInterface, 4 | DatasourceInterface, 5 | ) 6 | from kestrel.interface.manager import InterfaceManager 7 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/interface/base.py: -------------------------------------------------------------------------------- 1 | import json 2 | from abc import ABC, abstractmethod 3 | from typing import Any, Iterable, List, Mapping, MutableMapping, Optional 4 | from uuid import UUID 5 | 6 | from kestrel.display import GraphletExplanation 7 | from kestrel.exceptions import InvalidSerializedDatasourceInterfaceCacheCatalog 8 | from kestrel.ir.graph import IRGraphEvaluable 9 | from kestrel.ir.instructions import Instruction 10 | from pandas import DataFrame 11 | 12 | MODULE_PREFIX = "kestrel_interface_" 13 | 14 | 15 | class AbstractInterface(ABC): 16 | """Abstract class for datasource/analytics interface 17 | 18 | Concepts: 19 | 20 | - Think an interface as a type of datalakes 21 | 22 | - Think a storage as a datalake 23 | 24 | - Think a datasource as a table in the datalake 25 | 26 | Attributes: 27 | 28 | session_id: the optional information to derive table name in datalake 29 | 30 | cache_catalog: map a cached item (instruction.id) to datalake table/view name 31 | """ 32 | 33 | def __init__( 34 | self, 35 | serialized_cache_catalog: Optional[str] = None, 36 | session_id: Optional[UUID] = None, 37 | ): 38 | self.session_id = session_id 39 | self.cache_catalog: MutableMapping[UUID, str] = {} 40 | 41 | if serialized_cache_catalog: 42 | try: 43 | self.cache_catalog = json.loads(serialized_cache_catalog) 44 | except: 45 | raise InvalidSerializedDatasourceInterfaceCacheCatalog() 46 | 47 | # Python 3.13 will drop chain of @classmethod and @property 48 | # use @staticmethod instead (cannot make it a property) 49 | @staticmethod 50 | @abstractmethod 51 | def schemes() -> Iterable[str]: 52 | """The schemes to specify the interface 53 | 54 | Each scheme should be defined as ``("_"|LETTER) ("_"|LETTER|DIGIT)*`` 55 | """ 56 | ... 57 | 58 | @abstractmethod 59 | def get_datasources(self) -> List[str]: 60 | """Get the list of datasource names registered at this interface""" 61 | ... 62 | 63 | @abstractmethod 64 | def get_storage_of_datasource(self, datasource: str) -> str: 65 | """Get the storage name of a given datasource""" 66 | ... 67 | 68 | @abstractmethod 69 | def store( 70 | self, 71 | instruction_id: UUID, 72 | data: DataFrame, 73 | ): 74 | """Create a new table in the datalake from a dataframe 75 | 76 | The name of the table is a function of instruction_id (and session_id) 77 | in case there are conflicting tables in the datalake. 78 | 79 | The function can be implemented as a hashtable. If the hash collides 80 | with an existing hash, figure out whether the existing hash/table is 81 | used by the current interface and session. If yes, then replace; if 82 | not, then generate a new random value and record in self.cache_catalog. 83 | 84 | This method will update self.cache_catalog. 85 | 86 | Parameters: 87 | 88 | instruction_id: the key to be placed in `self.cache_catalog` 89 | 90 | data: the dataframe to store 91 | """ 92 | ... 93 | 94 | @abstractmethod 95 | def evaluate_graph( 96 | self, 97 | graph: IRGraphEvaluable, 98 | cache: MutableMapping[UUID, Any], 99 | instructions_to_evaluate: Optional[Iterable[Instruction]] = None, 100 | ) -> Mapping[UUID, DataFrame]: 101 | """Evaluate the IRGraph 102 | 103 | Parameters: 104 | 105 | graph: The evaluate IRGraph 106 | 107 | instructions_to_evaluate: instructions to evaluate and return; by default, it will be all Return instructions in the graph 108 | 109 | Returns: 110 | 111 | DataFrames for each instruction in instructions_to_evaluate. 112 | """ 113 | ... 114 | 115 | @abstractmethod 116 | def explain_graph( 117 | self, 118 | graph: IRGraphEvaluable, 119 | instructions_to_explain: Optional[Iterable[Instruction]] = None, 120 | ) -> Mapping[UUID, GraphletExplanation]: 121 | """Explain how to evaluate the IRGraph 122 | 123 | Parameters: 124 | 125 | graph: The evaluable IRGraph 126 | 127 | instructions_to_explain: instructions to explain and return; by default, it will be all Return instructions in the graph 128 | 129 | Returns: 130 | 131 | GraphletExplanation (a Kestrel Display object) for each instruction in instructions_to_explain. 132 | """ 133 | ... 134 | 135 | def cache_catalog_to_json(self) -> str: 136 | """Serialize the cache catalog to a JSON string""" 137 | return json.dumps(self.cache_catalog) 138 | 139 | 140 | class DatasourceInterface(AbstractInterface): ... 141 | 142 | 143 | class AnalyticsInterface(AbstractInterface): ... 144 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/interface/codegen/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/packages/kestrel_core/src/kestrel/interface/codegen/__init__.py -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/interface/codegen/utils.py: -------------------------------------------------------------------------------- 1 | from itertools import groupby 2 | from typing import List 3 | 4 | from pandas import DataFrame 5 | 6 | 7 | def variable_attributes_to_dataframe(attrs: List[str]) -> DataFrame: 8 | categories = [] 9 | for k, g in groupby(sorted(attrs), lambda s: s.split(".")[0] if "." in s else ""): 10 | categories.append(", ".join(g)) 11 | return DataFrame(data={"attributes": categories}) 12 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/interface/manager.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import importlib 4 | import inspect 5 | import itertools 6 | import logging 7 | import pkgutil 8 | import sys 9 | from copy import copy 10 | from typing import Iterable, Mapping, Type 11 | 12 | from kestrel.config.internal import CACHE_INTERFACE_IDENTIFIER 13 | from kestrel.exceptions import ( 14 | ConflictingInterfaceScheme, 15 | InterfaceNotConfigured, 16 | InterfaceNotFound, 17 | InvalidInterfaceImplementation, 18 | ) 19 | from kestrel.interface.base import MODULE_PREFIX, AbstractInterface 20 | from typeguard import typechecked 21 | 22 | _logger = logging.getLogger(__name__) 23 | 24 | 25 | # basically a scheme to interface mapping 26 | @typechecked 27 | class InterfaceManager(Mapping): 28 | def __init__(self, init_interfaces: Iterable[AbstractInterface] = []): 29 | interface_classes = _load_interface_classes() 30 | self.interfaces = list(init_interfaces) # copy/recreate the list 31 | for iface_cls in interface_classes: 32 | try: 33 | iface = iface_cls() 34 | _logger.debug(f"Initialize interface {iface_cls.__name__}") 35 | self.interfaces.append(iface) 36 | except InterfaceNotConfigured as e: 37 | _logger.debug(f"Interface {iface_cls.__name__} not configured; ignored") 38 | 39 | def __getitem__(self, scheme: str) -> AbstractInterface: 40 | for interface in self.interfaces: 41 | if scheme in interface.schemes(): 42 | return interface 43 | else: 44 | raise InterfaceNotFound(f"no interface loaded for scheme {scheme}") 45 | 46 | def __iter__(self) -> Iterable[str]: 47 | return filter( 48 | lambda x: x != CACHE_INTERFACE_IDENTIFIER, 49 | itertools.chain(*[i.schemes() for i in self.interfaces]), 50 | ) 51 | 52 | def __len__(self) -> int: 53 | return sum(1 for _ in iter(self)) 54 | 55 | def copy_with_virtual_cache(self) -> InterfaceManager: 56 | im = copy(self) 57 | # shallow copy refers to the same list, so create/copy a new one 58 | im.interfaces = copy(im.interfaces) 59 | # now swap in virtual cache 60 | cache = im[CACHE_INTERFACE_IDENTIFIER] 61 | im.interfaces.remove(cache) 62 | im.interfaces.append(cache.get_virtual_copy()) 63 | return im 64 | 65 | def del_cache(self): 66 | cache = self[CACHE_INTERFACE_IDENTIFIER] 67 | self.interfaces.remove(cache) 68 | del cache 69 | 70 | def schemes(self, interface_type: type) -> Iterable[str]: 71 | return filter( 72 | lambda x: x != CACHE_INTERFACE_IDENTIFIER, 73 | itertools.chain( 74 | *[i.schemes() for i in self.interfaces if isinstance(i, interface_type)] 75 | ), 76 | ) 77 | 78 | def list_datasources_from_scheme(self, scheme: str) -> Iterable[str]: 79 | return self[scheme].get_datasources() 80 | 81 | 82 | def _load_interface_classes(): 83 | interface_clss = [] 84 | for itf_pkg_name in _list_interface_pkg_names(): 85 | mod = importlib.import_module(itf_pkg_name) 86 | _logger.debug(f"Imported {mod} from package {itf_pkg_name}") 87 | cls = inspect.getmembers( 88 | sys.modules[itf_pkg_name], _is_class(AbstractInterface) 89 | ) 90 | if not cls: 91 | raise InvalidInterfaceImplementation( 92 | f'no interface class found in package "{itf_pkg_name}"' 93 | ) 94 | elif len(cls) > 1: 95 | raise InvalidInterfaceImplementation( 96 | f'more than one interface class found in package "{itf_pkg_name}"' 97 | ) 98 | else: 99 | interface_cls = cls[0][1] 100 | _guard_scheme_conflict(interface_cls, interface_clss) 101 | interface_clss.append(interface_cls) 102 | return interface_clss 103 | 104 | 105 | def _list_interface_pkg_names(): 106 | pkg_names = [x.name for x in pkgutil.iter_modules()] 107 | itf_names = [pkg for pkg in pkg_names if pkg.startswith(MODULE_PREFIX)] 108 | return itf_names 109 | 110 | 111 | def _is_class(cls): 112 | return lambda obj: inspect.isclass(obj) and issubclass(obj, cls) 113 | 114 | 115 | @typechecked 116 | def _guard_scheme_conflict( 117 | new_interface: Type[AbstractInterface], 118 | interfaces: Iterable[Type[AbstractInterface]], 119 | ): 120 | for interface in interfaces: 121 | for scheme_new in new_interface.schemes(): 122 | for scheme_old in interface.schemes(): 123 | if scheme_new == scheme_old: 124 | raise ConflictingInterfaceScheme( 125 | f"scheme: {scheme_new} conflicting between {new_interface.__name__} and {interface.__name__}" 126 | ) 127 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/interface/translation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/packages/kestrel_core/src/kestrel/interface/translation/__init__.py -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/interface/translation/query/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/packages/kestrel_core/src/kestrel/interface/translation/query/__init__.py -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/interface/translation/result/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/packages/kestrel_core/src/kestrel/interface/translation/result/__init__.py -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/ir/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/packages/kestrel_core/src/kestrel/ir/__init__.py -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/mapping/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/packages/kestrel_core/src/kestrel/mapping/__init__.py -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/mapping/path.py: -------------------------------------------------------------------------------- 1 | from pathlib import PurePosixPath, PureWindowsPath 2 | 3 | 4 | class Path: 5 | """Wrapper class for handling file paths from either Windows or POSIX systems""" 6 | 7 | def __init__(self, raw_path: str): 8 | """Create a path object that respects the original path separator""" 9 | if "\\" in raw_path: 10 | self._path = PureWindowsPath(raw_path) 11 | elif "/" in raw_path: 12 | self._path = PurePosixPath(raw_path) 13 | else: 14 | # need some heuristics to guess the path type 15 | self._path = PureWindowsPath(raw_path) # TODO: more advanced detection? 16 | 17 | def basename(self) -> str: 18 | """Returns the path with any leading directories removed""" 19 | return str(self._path.name) 20 | 21 | def dirname(self) -> str: 22 | """Returns the path with the last component removed, or "." if there's only 1 component""" 23 | return str(self._path.parent) 24 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/mapping/transformers.py: -------------------------------------------------------------------------------- 1 | """Kestrel Data Model Map value transformers""" 2 | 3 | from datetime import datetime, timezone 4 | from typing import Callable 5 | 6 | from kestrel.exceptions import InvalidTransformerInMapping 7 | from kestrel.mapping.path import Path 8 | from pandas import Series 9 | 10 | # Dict of "registered" transformers 11 | _transformers = {} 12 | 13 | 14 | def transformer(func: Callable) -> Callable: 15 | """A decorator for registering a transformer""" 16 | _transformers[func.__name__] = func 17 | return func 18 | 19 | 20 | @transformer 21 | def to_epoch_ms(value: str) -> int: 22 | """Convert a time value to milliseconds since the epoch""" 23 | if "." in value: 24 | time_pattern = "%Y-%m-%dT%H:%M:%S.%fZ" 25 | else: 26 | time_pattern = "%Y-%m-%dT%H:%M:%SZ" 27 | dt = datetime.strptime(value, time_pattern).replace(tzinfo=timezone.utc) 28 | return int(dt.timestamp() * 1000) 29 | 30 | 31 | @transformer 32 | def dirname(path: str) -> str: 33 | """Get the directory part of `path`""" 34 | return Path(path).dirname() 35 | 36 | 37 | @transformer 38 | def basename(path: str) -> str: 39 | """Get the filename part of `path`""" 40 | return Path(path).basename() 41 | 42 | 43 | @transformer 44 | def startswith(value: str) -> str: 45 | return f"{value}%" 46 | 47 | 48 | @transformer 49 | def winpath_startswith(value: str) -> str: 50 | return f"{value}\\%" 51 | 52 | 53 | @transformer 54 | def posixpath_startswith(value: str) -> str: 55 | return f"{value}/%" 56 | 57 | 58 | @transformer 59 | def endswith(value: str) -> str: 60 | return f"%{value}" 61 | 62 | 63 | @transformer 64 | def winpath_endswith(value: str) -> str: 65 | return f"%\\{value}" 66 | 67 | 68 | @transformer 69 | def posixpath_endswith(value: str) -> str: 70 | return f"%/{value}" 71 | 72 | 73 | @transformer 74 | def to_int(value) -> int: 75 | """Ensure `value` is an int""" 76 | try: 77 | return int(value) 78 | except ValueError: 79 | # Maybe it's a hexadecimal string? 80 | try: 81 | return int(value, 16) 82 | except: 83 | return -1 84 | 85 | 86 | @transformer 87 | def to_str(value) -> str: 88 | """Ensure `value` is a str""" 89 | return str(value) 90 | 91 | 92 | @transformer 93 | def lowercase(value: str) -> str: 94 | """Ensure `value` is all lowercase""" 95 | return value.lower() 96 | 97 | 98 | @transformer 99 | def ip_version_to_network_layer(value: int) -> str: 100 | if value == 4: 101 | return "ipv4" 102 | elif value == 6: 103 | return "ipv6" 104 | elif value == 99: 105 | return "other" 106 | return "unknown" 107 | 108 | 109 | @transformer 110 | def network_layer_to_ip_version(val: str) -> int: 111 | value = val.lower() 112 | if value == "ipv4": 113 | return 4 114 | elif value == "ipv6": 115 | return 6 116 | elif value == "other": 117 | return 99 118 | return 0 119 | 120 | 121 | def run_transformer(transformer_name: str, value): 122 | """Run the registered transformer with name `transformer_name` on `value`""" 123 | func = _transformers.get(transformer_name) 124 | if func: 125 | result = func(value) 126 | else: 127 | raise InvalidTransformerInMapping(transformer_name) 128 | return result 129 | 130 | 131 | def run_transformer_on_series(transformer_name: str, value: Series): 132 | """Run the registered transformer with name `transformer_name` on `value`""" 133 | func = _transformers.get(transformer_name) 134 | if func: 135 | result = value.apply(func) 136 | else: 137 | raise InvalidTransformerInMapping(transformer_name) 138 | return result 139 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/mapping/types/ocsf.yaml: -------------------------------------------------------------------------------- 1 | # OCSF field to OCSF type mapping 2 | 3 | # entities 4 | endpoint: endpoint 5 | device: endpoint 6 | user: user 7 | file: file 8 | file.endpoint: endpoint 9 | group: group 10 | process: process 11 | process.endpoint: endpoint 12 | process.file: file 13 | process.user: user 14 | process.parent_process: process 15 | process.parent_process.file: file 16 | process.parent_process.user: user 17 | src_endpoint: network_endpoint 18 | dst_endpoint: network_endpoint 19 | certificate: certificate 20 | connection_info: connection 21 | email: email 22 | reg_key: reg_key 23 | reg_value: reg_value 24 | actor.process: process 25 | actor.user: user 26 | entity: managed_entity 27 | query: query_info 28 | traffic: network_traffic 29 | 30 | # event 31 | activity: event 32 | network_activity: event 33 | http_activity: event 34 | dns_activity: event 35 | ssh_activity: event 36 | ftp_activity: event 37 | email_activity: event 38 | file_activity: event 39 | process_activity: event 40 | registry_key_activity: event 41 | registry_value_activity: event 42 | datastore_activity: event 43 | entity_management: event 44 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/mapping/utils.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | from kestrel.utils import list_folder_files 3 | 4 | MAPPING_MODULE = "kestrel.mapping" 5 | 6 | # cache type mapping 7 | type_mapping = {} 8 | 9 | 10 | def get_type_from_projection(proj) -> str: 11 | global type_mapping 12 | if not type_mapping: 13 | for f in list_folder_files(MAPPING_MODULE, "types", extension="yaml"): 14 | with open(f, "r") as fp: 15 | mapping_ind = yaml.safe_load(fp) 16 | type_mapping.update(mapping_ind) 17 | if proj in type_mapping: 18 | return type_mapping[proj] 19 | else: 20 | return "unkown_entity" 21 | -------------------------------------------------------------------------------- /packages/kestrel_core/src/kestrel/utils.py: -------------------------------------------------------------------------------- 1 | import collections.abc 2 | import os 3 | from datetime import datetime 4 | from importlib import resources 5 | from pathlib import Path 6 | from pkgutil import get_data 7 | from typing import Iterable, Mapping, Optional 8 | 9 | from kestrel.__future__ import is_python_older_than_minor_version 10 | from typeguard import typechecked 11 | 12 | TIME_FMT = "%Y-%m-%dT%H:%M:%S.%f" 13 | 14 | 15 | @typechecked 16 | def load_data_file(package_name: str, file_name: str) -> str: 17 | try: 18 | # resources.files() is introduced in Python 3.9 19 | content = resources.files(package_name).joinpath(file_name).read_text() 20 | except AttributeError: 21 | # Python 3.8; deprecation warning forward 22 | if is_python_older_than_minor_version(9): 23 | content = get_data(package_name, file_name).decode("utf-8") 24 | 25 | return content 26 | 27 | 28 | @typechecked 29 | def list_folder_files( 30 | package_name: str, 31 | folder_name: str, 32 | prefix: Optional[str] = None, 33 | extension: Optional[str] = None, 34 | ) -> Iterable[str]: 35 | # preprocesss extension to add dot it not there 36 | if extension and extension[0] != ".": 37 | extension = "." + extension 38 | try: 39 | file_paths = resources.files(package_name).joinpath(folder_name).iterdir() 40 | except AttributeError: 41 | if is_python_older_than_minor_version(9): 42 | import pkg_resources 43 | 44 | file_names = pkg_resources.resource_listdir(package_name, folder_name) 45 | file_paths = [ 46 | Path( 47 | pkg_resources.resource_filename( 48 | package_name, os.path.join(folder_name, filename) 49 | ) 50 | ) 51 | for filename in file_names 52 | ] 53 | file_list = ( 54 | f 55 | for f in file_paths 56 | if ( 57 | f.is_file() 58 | and (f.name.endswith(extension) if extension else True) 59 | and (f.name.startswith(prefix) if prefix else True) 60 | ) 61 | ) 62 | return file_list 63 | 64 | 65 | @typechecked 66 | def unescape_quoted_string(s: str) -> str: 67 | if s.startswith("r"): 68 | return s[2:-1] 69 | else: 70 | return s[1:-1].encode("utf-8").decode("unicode_escape") 71 | 72 | 73 | @typechecked 74 | def update_nested_dict(dict_old: Mapping, dict_new: Optional[Mapping]) -> Mapping: 75 | if dict_new: 76 | for k, v in dict_new.items(): 77 | if isinstance(v, collections.abc.Mapping) and k in dict_old: 78 | dict_old[k] = update_nested_dict(dict_old[k], v) 79 | else: 80 | dict_old[k] = v 81 | return dict_old 82 | 83 | 84 | @typechecked 85 | def timefmt(t: datetime, prec: int = 3) -> str: 86 | """Format Python datetime `t` in RFC 3339-format 87 | 88 | Ported from firepit.timestamp 89 | """ 90 | val = t.strftime(TIME_FMT) 91 | parts = val.split(".") 92 | if len(parts) > 1: 93 | l = len(parts[0]) 94 | digits = parts[1] 95 | num_digits = len(digits) 96 | if num_digits: 97 | l += min(num_digits, prec) + 1 98 | return val[:l] + "Z" 99 | -------------------------------------------------------------------------------- /packages/kestrel_core/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/packages/kestrel_core/tests/__init__.py -------------------------------------------------------------------------------- /packages/kestrel_core/tests/conftest.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | 6 | @pytest.fixture(autouse=True) 7 | def run_before_and_after_tests(tmpdir): 8 | # Setup: remove any old DB 9 | Path("cache.db").unlink(missing_ok=True) 10 | yield # this is where the testing happens 11 | -------------------------------------------------------------------------------- /packages/kestrel_core/tests/test_analytic.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | from pandas import DataFrame 5 | 6 | _logger = logging.getLogger(__name__) 7 | 8 | 9 | def do_something(df: DataFrame, **kwargs): 10 | _logger.debug("python analytics: run pseudo-analytic") 11 | for k, v in kwargs.items(): 12 | df[k] = v 13 | return df 14 | 15 | 16 | def do_something_no_annotations(df): 17 | _logger.debug("python analytics: run pseudo-analytic with env vars") 18 | name = os.environ.get("name", "new_column") 19 | value = int(os.environ.get("value", 0)) 20 | df[name] = value 21 | return df 22 | 23 | 24 | def do_something_env(df: DataFrame): 25 | return do_something_no_annotations(df) 26 | -------------------------------------------------------------------------------- /packages/kestrel_core/tests/test_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from kestrel.config.utils import load_kestrel_config 4 | 5 | 6 | def test_env_vars_in_config(): 7 | 8 | test_config = """--- 9 | credentials: 10 | username: $TEST_USER 11 | password: $TEST_PASSWORD 12 | """ 13 | os.environ["TEST_USER"] = "test-user" 14 | os.environ["TEST_PASSWORD"] = "test-password" 15 | os.environ["KESTREL_CONFIG"] = os.path.join(os.sep, "tmp", "config.yaml") 16 | 17 | with open(os.getenv("KESTREL_CONFIG"), "w") as fp: 18 | fp.write(test_config) 19 | config = load_kestrel_config() 20 | assert config["credentials"]["username"] == "test-user" 21 | assert config["credentials"]["password"] == "test-password" 22 | 23 | 24 | def test_env_vars_in_config_overwrite(): 25 | 26 | test_config = """--- 27 | credentials: 28 | username: ${TEST_USER} 29 | password: ${TEST_PASSWORD} 30 | testattr: 31 | cache_directory_prefix: $KESTREL_CACHE_DIRECTORY_PREFIX 32 | """ 33 | os.environ["TEST_USER"] = "test-user" 34 | os.environ["TEST_PASSWORD"] = "test-password" 35 | os.environ["KESTREL_CONFIG"] = os.path.join(os.sep, "tmp", "config.yaml") 36 | os.environ["KESTREL_CACHE_DIRECTORY_PREFIX"] = "Kestrel2.0-" 37 | with open(os.getenv("KESTREL_CONFIG"), "w") as fp: 38 | fp.write(test_config) 39 | config = load_kestrel_config() 40 | assert config["credentials"]["username"] == "test-user" 41 | assert config["credentials"]["password"] == "test-password" 42 | assert config["testattr"]["cache_directory_prefix"] == "Kestrel2.0-" 43 | 44 | def test_empty_env_var_in_config(): 45 | test_config = """--- 46 | credentials: 47 | username: ${TEST_USER} 48 | password: ${TEST_PASSWORD} 49 | testattr: 50 | cache_directory_prefix: $I_DONT_EXIST 51 | """ 52 | os.environ["TEST_USER"] = "test-user" 53 | os.environ["TEST_PASSWORD"] = "test-password" 54 | os.environ["KESTREL_CONFIG"] = os.path.join(os.sep, "tmp", "config.yaml") 55 | os.environ["KESTREL_CACHE_DIRECTORY_PREFIX"] = "Kestrel2.0-" 56 | with open(os.getenv("KESTREL_CONFIG"), "w") as fp: 57 | fp.write(test_config) 58 | config = load_kestrel_config() 59 | assert config["credentials"]["username"] == "test-user" 60 | assert config["credentials"]["password"] == "test-password" 61 | assert config["testattr"]["cache_directory_prefix"] == "$I_DONT_EXIST" 62 | 63 | def test_yaml_load_in_config(tmp_path): 64 | test_config = """--- 65 | credentials: 66 | username: ${TEST_USER} 67 | password: ${TEST_PASSWORD} 68 | loadtest: 69 | xyz: 70 | abc: abc.yaml 71 | """ 72 | os.environ["TEST_USER"] = "test-user" 73 | os.environ["TEST_PASSWORD"] = "test-password" 74 | os.environ["KESTREL_CONFIG"] = os.path.join(tmp_path, "config.yaml") 75 | with open(os.getenv("KESTREL_CONFIG"), "w") as fp: 76 | fp.write(test_config) 77 | with open(os.path.join(tmp_path, "abc.yaml"), "w") as fp: 78 | fp.write("test: fake-value") 79 | config = load_kestrel_config() 80 | assert config["credentials"]["username"] == "test-user" 81 | assert config["credentials"]["password"] == "test-password" 82 | assert config["loadtest"]["xyz"]["abc"]["test"] == "fake-value" 83 | -------------------------------------------------------------------------------- /packages/kestrel_core/tests/test_interface_datasource_codegen_dataframe.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pandas import DataFrame 3 | 4 | from kestrel.frontend.parser import parse_kestrel_and_update_irgraph 5 | from kestrel.interface.codegen.dataframe import ( 6 | evaluate_source_instruction, evaluate_transforming_instruction) 7 | from kestrel.ir.graph import IRGraph 8 | from kestrel.ir.instructions import Construct, Limit, ProjectAttrs, Variable 9 | from kestrel.interface.codegen.utils import variable_attributes_to_dataframe 10 | 11 | 12 | def test_evaluate_Construct(): 13 | data = [ {"name": "cmd.exe", "pid": 123} 14 | , {"name": "explorer.exe", "pid": 99} 15 | , {"name": "firefox.exe", "pid": 201} 16 | , {"name": "chrome.exe", "pid": 205} 17 | ] 18 | ins = Construct(data, "process") 19 | df = evaluate_source_instruction(ins) 20 | assert df.equals(DataFrame(data)) 21 | 22 | 23 | def test_non_exist_eval(): 24 | with pytest.raises(NotImplementedError): 25 | evaluate_transforming_instruction(Variable("asdf", "foo", "bar"), DataFrame()) 26 | 27 | 28 | def test_evaluate_Limit(): 29 | data = [ {"name": "cmd.exe", "pid": 123} 30 | , {"name": "explorer.exe", "pid": 99} 31 | , {"name": "firefox.exe", "pid": 201} 32 | , {"name": "chrome.exe", "pid": 205} 33 | ] 34 | df = DataFrame(data) 35 | dfx = evaluate_transforming_instruction(Limit(2), df) 36 | assert dfx.equals(df.head(2)) 37 | 38 | 39 | def test_evaluate_ProjectAttrs(): 40 | data = [ {"name": "cmd.exe", "pid": 123} 41 | , {"name": "explorer.exe", "pid": 99} 42 | , {"name": "firefox.exe", "pid": 201} 43 | , {"name": "chrome.exe", "pid": 205} 44 | ] 45 | df = DataFrame(data) 46 | dfx = evaluate_transforming_instruction(ProjectAttrs(["name"]), df) 47 | assert dfx.equals(df[["name"]]) 48 | 49 | 50 | def test_evaluate_Construct_Filter_ProjectAttrs(): 51 | stmt = r""" 52 | proclist = NEW process [ {"name": "cmd.exe", "pid": 123} 53 | , {"name": "explorer.exe", "pid": 99} 54 | , {"name": "firefox.exe", "pid": 201} 55 | , {"name": "chrome.exe", "pid": 205} 56 | ] 57 | browsers = proclist WHERE name = 'firefox.exe' OR name = 'chrome.exe' 58 | DISP browsers ATTR name, pid 59 | p2 = proclist WHERE pid > 100 60 | p3 = proclist WHERE name LIKE "c%.exe" 61 | p4 = proclist WHERE name MATCHES r"^c\w{2}\.exe" 62 | """ 63 | graph = IRGraph() 64 | parse_kestrel_and_update_irgraph(stmt, graph, {}) 65 | c = graph.get_nodes_by_type(Construct)[0] 66 | df0 = evaluate_source_instruction(c) 67 | assert df0.to_dict("records") == [ {"name": "cmd.exe", "pid": 123} 68 | , {"name": "explorer.exe", "pid": 99} 69 | , {"name": "firefox.exe", "pid": 201} 70 | , {"name": "chrome.exe", "pid": 205} 71 | ] 72 | 73 | browsers = graph.get_variable("browsers") 74 | ft = next(graph.predecessors(browsers)) 75 | dfx = evaluate_transforming_instruction(ft, df0) 76 | assert dfx.to_dict("records") == [ {"name": "firefox.exe", "pid": 201} 77 | , {"name": "chrome.exe", "pid": 205} 78 | ] 79 | proj = next(graph.successors(browsers)) 80 | dfy = evaluate_transforming_instruction(proj, dfx) 81 | assert dfx.to_dict("records") == [ {"name": "firefox.exe", "pid": 201} 82 | , {"name": "chrome.exe", "pid": 205} 83 | ] 84 | 85 | ft = next(graph.predecessors(graph.get_variable("p2"))) 86 | dfx = evaluate_transforming_instruction(ft, df0) 87 | assert dfx.to_dict("records") == [ {"name": "cmd.exe", "pid": 123} 88 | , {"name": "firefox.exe", "pid": 201} 89 | , {"name": "chrome.exe", "pid": 205} 90 | ] 91 | 92 | ft = next(graph.predecessors(graph.get_variable("p3"))) 93 | dfx = evaluate_transforming_instruction(ft, df0) 94 | assert dfx.to_dict("records") == [ {"name": "cmd.exe", "pid": 123} 95 | , {"name": "chrome.exe", "pid": 205} 96 | ] 97 | 98 | ft = next(graph.predecessors(graph.get_variable("p4"))) 99 | dfx = evaluate_transforming_instruction(ft, df0) 100 | assert dfx.to_dict("records") == [ {"name": "cmd.exe", "pid": 123} ] 101 | 102 | 103 | def test_information(): 104 | data = [ {"process.name": "cmd.exe", "process.pid": 123, "user.name": "user", "event_type": "process"} ] 105 | df = DataFrame(data) 106 | idf = variable_attributes_to_dataframe(df) 107 | attrs = idf["attributes"].to_list() 108 | assert attrs == ['event_type', 'process.name, process.pid', 'user.name'] 109 | -------------------------------------------------------------------------------- /packages/kestrel_core/tests/test_interface_datasource_codegen_sql.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import pytest 4 | # Use sqlite3 for testing 5 | import sqlalchemy 6 | from dateutil import parser 7 | 8 | from kestrel.interface.codegen.sql import SqlTranslator 9 | from kestrel.ir.filter import (ExpOp, IntComparison, ListComparison, ListOp, 10 | MultiComp, NumCompOp, StrComparison, StrCompOp, 11 | TimeRange) 12 | from kestrel.ir.instructions import Filter, Limit, Offset, ProjectAttrs, Sort 13 | 14 | 15 | def _dt(timestr: str) -> datetime: 16 | return parser.parse(timestr) 17 | 18 | 19 | def _time2string(ts: datetime) -> str: 20 | return ts.strftime('%Y-%m-%dT%H:%M:%S.%f') 21 | 22 | 23 | def _remove_nl(s): 24 | return s.replace('\n', '') 25 | 26 | 27 | @pytest.mark.parametrize( 28 | "iseq, sql", [ 29 | # Try a simple filter 30 | ([Filter(IntComparison('foo', NumCompOp.GE, 0))], 31 | "SELECT DISTINCT * FROM my_table WHERE foo >= ?"), 32 | # Try a simple filter with sorting 33 | ([Filter(IntComparison('foo', NumCompOp.GE, 0)), Sort('bar')], 34 | "SELECT DISTINCT * FROM my_table WHERE foo >= ? ORDER BY bar DESC"), 35 | # Simple filter plus time range 36 | ([Filter(IntComparison('foo', NumCompOp.GE, 0), timerange=TimeRange(_dt('2023-12-06T08:17:00Z'), _dt('2023-12-07T08:17:00Z')))], 37 | "SELECT DISTINCT * FROM my_table WHERE foo >= ? AND timestamp >= ? AND timestamp < ?"), 38 | # sqlalchemy's sqlite dialect seems to always add the offset 39 | ([Limit(3), ProjectAttrs(['foo', 'bar', 'baz']), Filter(StrComparison('foo', StrCompOp.EQ, 'abc'))], 40 | "SELECT DISTINCT foo, bar, baz FROM my_table WHERE foo = ? LIMIT ? OFFSET ?"), 41 | # Same as above but reverse order 42 | ([Filter(StrComparison('foo', StrCompOp.EQ, 'abc')), ProjectAttrs(['foo', 'bar', 'baz']), Limit(3)], 43 | "SELECT DISTINCT foo, bar, baz FROM my_table WHERE foo = ? LIMIT ? OFFSET ?"), 44 | ([Filter(ListComparison('foo', ListOp.NIN, ['abc', 'def']))], 45 | "SELECT DISTINCT * FROM my_table WHERE (foo NOT IN (__[POSTCOMPILE_foo_1]))"), # POSTCOMPILE is some SQLAlchemy-ism 46 | ([Filter(StrComparison('foo', StrCompOp.MATCHES, '.*abc.*'))], 47 | "SELECT DISTINCT * FROM my_table WHERE foo REGEXP ?"), 48 | ([Filter(StrComparison('foo', StrCompOp.NMATCHES, '.*abc.*'))], 49 | "SELECT DISTINCT * FROM my_table WHERE foo NOT REGEXP ?"), 50 | ([Filter(MultiComp(ExpOp.OR, [IntComparison('foo', NumCompOp.EQ, 1), IntComparison('bar', NumCompOp.EQ, 1)]))], 51 | "SELECT DISTINCT * FROM my_table WHERE foo = ? OR bar = ?"), 52 | ([Filter(MultiComp(ExpOp.AND, [IntComparison('foo', NumCompOp.EQ, 1), IntComparison('bar', NumCompOp.EQ, 1)]))], 53 | "SELECT DISTINCT * FROM my_table WHERE foo = ? AND bar = ?"), 54 | ([Limit(1000), Offset(2000)], 55 | "SELECT DISTINCT * FROM my_table LIMIT ? OFFSET ?"), 56 | ] 57 | ) 58 | def test_sql_translator(iseq, sql): 59 | trans = SqlTranslator(sqlalchemy.dialects.sqlite.dialect(), "my_table", ["foo", "bar", "baz"], None, None, _time2string, "timestamp") 60 | for i in iseq: 61 | trans.add_instruction(i) 62 | result = trans.result() 63 | assert _remove_nl(str(result)) == sql 64 | -------------------------------------------------------------------------------- /packages/kestrel_core/tests/test_ir_instructions.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pandas import DataFrame 3 | 4 | from kestrel.exceptions import InvalidDataSource, InvalidSeralizedInstruction 5 | from kestrel.ir.instructions import (CACHE_INTERFACE_IDENTIFIER, Construct, 6 | DataSource, Variable, 7 | get_instruction_class, 8 | instruction_from_dict, 9 | instruction_from_json) 10 | 11 | 12 | def test_instruction_post_init(): 13 | v = Variable("asdf", "foo", "bar") 14 | j = v.to_dict() 15 | assert "id" in j 16 | assert "instruction" in j 17 | assert j["instruction"] == "Variable" 18 | 19 | 20 | def test_stable_id(): 21 | v = Variable("asdf", "foo", "bar") 22 | _id = v.id 23 | v.name = "qwer" 24 | assert v.id == _id 25 | 26 | 27 | def test_stable_hash(): 28 | s = DataSource("stixshifter://abc") 29 | h1 = hash(s) 30 | s.datasource = "abcd" 31 | h2 = hash(s) 32 | assert h1 == h2 33 | 34 | 35 | def test_eq(): 36 | s1 = DataSource("stixshifter://abc") 37 | s2 = DataSource("stixshifter://abc") 38 | s3 = instruction_from_dict(s1.to_dict()) 39 | assert s1 != s2 40 | assert s1 == s3 41 | 42 | 43 | def test_get_instruction_class(): 44 | cls = get_instruction_class("Variable") 45 | v = cls("asdf", "foo", "bar") 46 | assert cls == Variable 47 | assert isinstance(v, Variable) 48 | 49 | 50 | def test_add_source(): 51 | s = DataSource("stixshifter://abc") 52 | j = s.to_dict() 53 | assert j["interface"] == "stixshifter" 54 | assert j["datasource"] == "abc" 55 | assert "id" in j 56 | assert "instruction" in j 57 | assert "uri" not in j 58 | assert "default_interface" not in j 59 | 60 | x = DataSource("abc", "stixshifter") 61 | assert x.interface == "stixshifter" 62 | assert x.datasource == "abc" 63 | 64 | with pytest.raises(InvalidDataSource): 65 | DataSource("sss://eee://ccc") 66 | 67 | with pytest.raises(InvalidDataSource): 68 | DataSource("sss") 69 | 70 | 71 | def test_construct(): 72 | data = [ {"name": "cmd.exe", "pid": 123} 73 | , {"name": "explorer.exe", "pid": 99} 74 | , {"name": "firefox.exe", "pid": 201} 75 | , {"name": "chrome.exe", "pid": 205} 76 | ] 77 | c = Construct(data) 78 | assert c.data.equals(DataFrame(data)) 79 | assert c.interface == CACHE_INTERFACE_IDENTIFIER 80 | 81 | 82 | def test_instruction_from_dict(): 83 | v = Variable("asdf", "foo", "bar") 84 | d = v.to_dict() 85 | w = instruction_from_dict(d) 86 | assert w == v 87 | 88 | del d["id"] 89 | with pytest.raises(InvalidSeralizedInstruction): 90 | instruction_from_dict(d) 91 | 92 | 93 | def test_instruction_from_json(): 94 | v = Variable("asdf", "foo", "bar") 95 | j = v.to_json() 96 | w = instruction_from_json(j) 97 | assert w == v 98 | -------------------------------------------------------------------------------- /packages/kestrel_core/tests/test_mapping_transformers.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import pytest 3 | 4 | from kestrel.mapping.transformers import (run_transformer, 5 | run_transformer_on_series) 6 | 7 | 8 | @pytest.mark.parametrize( 9 | "transform, value, expected", [ 10 | ("dirname", "/tmp", "/"), 11 | ("basename", "/tmp", "tmp"), 12 | ("dirname", "/usr/local/bin/thing", "/usr/local/bin"), 13 | ("basename", "/usr/local/bin/thing", "thing"), 14 | ("posixpath_startswith", "/var", "/var/%"), 15 | ("posixpath_endswith", "tmp.sh", r"%/tmp.sh"), 16 | ("dirname", r"C:\Windows\System32\cmd.exe", r"C:\Windows\System32"), 17 | ("basename", r"C:\Windows\System32\cmd.exe", r"cmd.exe"), 18 | ("winpath_startswith", r"C:\Windows\System32", r"C:\Windows\System32\%"), 19 | ("winpath_endswith", "cmd.exe", r"%\cmd.exe"), 20 | ("to_int", 1234, 1234), 21 | ("to_int", 1234.1234, 1234), # Maybe this should fail? 22 | ("to_int", "1234", 1234), 23 | ("to_int", "0x4d2", 1234), 24 | ("to_str", "1234", "1234"), 25 | ("to_str", 1234, "1234"), 26 | ("to_epoch_ms", "2024-03-29T12:57:56.926Z", 1711717076926), 27 | ("to_epoch_ms", "2024-03-29T12:57:56.92Z", 1711717076920), 28 | ("to_epoch_ms", "2024-03-29T12:57:56.9Z", 1711717076900), 29 | ("to_epoch_ms", "2024-03-29T12:57:56Z", 1711717076000), 30 | ("lowercase", "WORKSTATION5.example.com", "workstation5.example.com"), 31 | ] 32 | ) 33 | def test_run_transformer(transform, value, expected): 34 | assert run_transformer(transform, value) == expected 35 | 36 | 37 | def test_run_series_basename(): 38 | data = pd.Series([r"C:\Windows\System32\cmd.exe", r"C:\TMP"]) 39 | result = list(run_transformer_on_series("basename", data)) 40 | assert result == ["cmd.exe", "TMP"] 41 | -------------------------------------------------------------------------------- /packages/kestrel_interface_opensearch/README.rst: -------------------------------------------------------------------------------- 1 | ../../README.rst -------------------------------------------------------------------------------- /packages/kestrel_interface_opensearch/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools >= 68.2.2", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "kestrel_interface_opensearch" 7 | version = "2.0.0b" 8 | description = "Kestrel OpenSearch Datasource Interface" 9 | readme = "README.rst" 10 | requires-python = ">=3.8" 11 | license = {text = "Apache 2.0 License"} 12 | maintainers = [ 13 | {name = "Xiaokui Shu", email = "xiaokui.shu@ibm.com"}, 14 | {name = "Paul Coccoli", email = "pcoccoli@us.ibm.com"}, 15 | ] 16 | keywords = [ 17 | "kestrel", 18 | "cybersecurity", 19 | "threat hunting", 20 | ] 21 | classifiers = [ 22 | "Topic :: Security", 23 | "Operating System :: OS Independent", 24 | "Development Status :: 4 - Beta", 25 | "Programming Language :: Python :: 3", 26 | ] 27 | 28 | dependencies = [ 29 | "kestrel_core>=2.0.0b", 30 | "opensearch-py>=2.6.0", 31 | ] 32 | 33 | [project.urls] 34 | Homepage = "https://github.com/opencybersecurityalliance/kestrel-lang" 35 | Documentation = "https://kestrel.readthedocs.io/" 36 | Repository = "https://github.com/opencybersecurityalliance/kestrel-lang.git" 37 | -------------------------------------------------------------------------------- /packages/kestrel_interface_opensearch/src/kestrel_interface_opensearch/__init__.py: -------------------------------------------------------------------------------- 1 | from kestrel_interface_opensearch.interface import OpenSearchInterface 2 | -------------------------------------------------------------------------------- /packages/kestrel_interface_opensearch/src/kestrel_interface_opensearch/config.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from dataclasses import dataclass 3 | from typing import Dict, Optional 4 | 5 | from kestrel.config.utils import ( 6 | CONFIG_DIR_DEFAULT, 7 | load_kestrel_config, 8 | load_user_config, 9 | ) 10 | from kestrel.exceptions import InterfaceNotConfigured 11 | from kestrel.mapping.data_model import ( 12 | check_entity_identifier_existence_in_mapping, 13 | load_default_mapping, 14 | ) 15 | from mashumaro.mixins.json import DataClassJSONMixin 16 | 17 | PROFILE_PATH_DEFAULT = CONFIG_DIR_DEFAULT / "opensearch.yaml" 18 | PROFILE_PATH_ENV_VAR = "KESTREL_OPENSEARCH_CONFIG" 19 | 20 | _logger = logging.getLogger(__name__) 21 | 22 | 23 | @dataclass 24 | class Auth: 25 | username: str 26 | password: str 27 | 28 | 29 | @dataclass 30 | class Connection(DataClassJSONMixin): 31 | url: str 32 | auth: Auth 33 | verify_certs: bool = True 34 | 35 | def __post_init__(self): 36 | self.auth = Auth(**self.auth) 37 | 38 | 39 | @dataclass 40 | class DataSource(DataClassJSONMixin): 41 | connection: str 42 | index_pattern: str 43 | timestamp: str 44 | timestamp_format: str 45 | data_model_map: Optional[Dict] = None 46 | entity_identifier: Optional[Dict] = None 47 | 48 | def __post_init__(self): 49 | if not self.data_model_map: 50 | # Default to the built-in ECS mapping 51 | self.data_model_map = load_default_mapping("ecs") 52 | 53 | kestrel_config = load_kestrel_config() 54 | check_entity_identifier_existence_in_mapping( 55 | self.data_model_map, 56 | kestrel_config["entity_identifier"], 57 | "opensearch interface", 58 | ) 59 | 60 | 61 | @dataclass 62 | class Config(DataClassJSONMixin): 63 | connections: Dict[str, Connection] 64 | datasources: Dict[str, DataSource] 65 | 66 | def __post_init__(self): 67 | self.connections = {k: Connection(**v) for k, v in self.connections.items()} 68 | self.datasources = {k: DataSource(**v) for k, v in self.datasources.items()} 69 | 70 | 71 | def load_config(): 72 | try: 73 | interface_config = Config( 74 | **load_user_config(PROFILE_PATH_ENV_VAR, PROFILE_PATH_DEFAULT) 75 | ) 76 | return interface_config 77 | except TypeError: 78 | raise InterfaceNotConfigured() 79 | -------------------------------------------------------------------------------- /packages/kestrel_interface_opensearch/src/kestrel_interface_opensearch/example.yaml: -------------------------------------------------------------------------------- 1 | # OpenSearch configuration is broken into 2 sections: connections and 2 | # datasources. The connections section has one named entry per 3 | # OpenSearch instance; those connection names are referenced from the 4 | # datasources. 5 | connections: 6 | localhost: # this is the connection "name" 7 | url: https://localhost:9200 8 | verify_certs: false # For trusted, self-signed certs only! 9 | auth: 10 | username: admin 11 | password: admin 12 | my_opensearch: # this is a second connection "name" 13 | url: https://opensearch.example.com:9200/ 14 | auth: 15 | username: hunter 16 | password: password 17 | 18 | # An OpenSearch instance can have many indexes. For each index 19 | # pattern that will be available as a Kestrel datasource, create a 20 | # named entry here that references a connection name from above. You 21 | # can also specify some metadata about the indexes matched by the 22 | # index pattern, such as the field name to be used as the timestamp, 23 | # the format of that timestamp, and (optionally) a filename containing 24 | # a kestrel data model mapping, in YAML format. If no mapping is 25 | # specified, that index will default to Kestrel's built-in Elastic 26 | # Common Schema (ECS) data model mapping. 27 | datasources: 28 | winlogbeat-mordor: # this is the name you would use in your hunt 29 | connection: localhost # this is the "localhost" connection above 30 | index_pattern: winlogbeat-mordor 31 | timestamp: "@timestamp" 32 | timestamp_format: "%Y-%m-%dT%H:%M:%S.%fZ" 33 | data_model_map: "custom_mapping.yaml" 34 | sysmon-events: 35 | connection: my_opensearch 36 | index_pattern: sysmon-* # Search ALL indexes matching this pattern 37 | timestamp: "@timestamp" 38 | timestamp_format: "%Y-%m-%dT%H:%M:%S.%fZ" 39 | firewall-events: 40 | connection: my_opensearch 41 | index_pattern: firewall-* 42 | timestamp: "EventTime" 43 | timestamp_format: "%Y-%m-%d %H:%M:%S.%fZ" 44 | data_model_map: "another_custom_mapping.yaml" 45 | -------------------------------------------------------------------------------- /packages/kestrel_interface_opensearch/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/packages/kestrel_interface_opensearch/tests/__init__.py -------------------------------------------------------------------------------- /packages/kestrel_interface_opensearch/tests/test_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import yaml 4 | from kestrel_interface_opensearch.config import (PROFILE_PATH_ENV_VAR, 5 | Connection, load_config) 6 | 7 | 8 | def test_load_config(tmp_path): 9 | config = { 10 | "connections": { 11 | "localhost": { 12 | "url": "https://localhost:9200", 13 | "verify_certs": False, 14 | "auth": { 15 | "username": "admin", 16 | "password": "admin", 17 | } 18 | }, 19 | "some-cloud-thing": { 20 | "url": "https://www.example.com:9200", 21 | "verify_certs": True, 22 | "auth": { 23 | "username": "hunter", 24 | "password": "super_secret", 25 | } 26 | } 27 | }, 28 | "datasources": { 29 | "some_ds": { 30 | "connection": "some-cloud-thing", 31 | "index_pattern": "logs-*", 32 | "timestamp": "@timestamp", 33 | "timestamp_format": "%Y-%m-%d %H:%M:%S.%f", 34 | "data_model_map": "mapping.yaml" 35 | } 36 | } 37 | } 38 | map_file = tmp_path / "mapping.yaml" 39 | with open(map_file, 'w') as fp: 40 | fp.write("some.field: other.field\n") 41 | config_file = tmp_path / "opensearch.yaml" 42 | with open(config_file, 'w') as fp: 43 | yaml.dump(config, fp) 44 | os.environ[PROFILE_PATH_ENV_VAR] = str(config_file) 45 | read_config = load_config() 46 | conn: Connection = read_config.connections["localhost"] 47 | assert conn.url == config["connections"]["localhost"]["url"] 48 | assert read_config.connections["localhost"].url == config["connections"]["localhost"]["url"] 49 | assert read_config.datasources["some_ds"].index_pattern == config["datasources"]["some_ds"]["index_pattern"] 50 | assert read_config.datasources["some_ds"].data_model_map["some.field"] == "other.field" 51 | -------------------------------------------------------------------------------- /packages/kestrel_interface_opensearch/tests/test_ossql.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import pytest 4 | from dateutil import parser 5 | from kestrel_interface_opensearch.ossql import OpenSearchTranslator 6 | 7 | from kestrel.exceptions import UnsupportedOperatorError 8 | from kestrel.ir.filter import (ExpOp, IntComparison, ListComparison, ListOp, 9 | MultiComp, NumCompOp, StrComparison, StrCompOp, 10 | TimeRange) 11 | from kestrel.ir.instructions import (Filter, Limit, Offset, ProjectAttrs, 12 | ProjectEntity, Sort) 13 | 14 | TIMEFMT = '%Y-%m-%dT%H:%M:%S.%fZ' 15 | 16 | 17 | # A much-simplified test mapping 18 | data_model_map = { 19 | "process": { 20 | "cmd_line": "CommandLine", 21 | "file": { 22 | "path": "Image", 23 | # "name": [ 24 | # { 25 | # "native_field": "Image", 26 | # "native_value": "basename", 27 | # "ocsf_op": "LIKE", 28 | # "ocsf_value": "endswith" 29 | # } 30 | # ] 31 | }, 32 | "pid": "ProcessId", 33 | "parent_process": { 34 | "pid": "ParentProcessId", 35 | }, 36 | }, 37 | } 38 | 39 | schema = { 40 | "CommandLine": "text", 41 | "Image": "text", 42 | "ProcessId": "text", 43 | "ParentProcessId": "text", 44 | } 45 | 46 | 47 | def _dt(timestr: str) -> datetime: 48 | return parser.parse(timestr) 49 | 50 | 51 | def _remove_nl(s): 52 | return s.replace('\n', '') 53 | 54 | 55 | @pytest.mark.parametrize( 56 | "iseq, sql", [ 57 | # Try a simple filter 58 | ([Filter(IntComparison('foo', NumCompOp.GE, 0))], 59 | "SELECT {} FROM my_table WHERE `foo` >= 0"), 60 | # Try a simple filter with sorting 61 | ([Filter(IntComparison('foo', NumCompOp.GE, 0)), Sort('bar')], 62 | "SELECT {} FROM my_table WHERE `foo` >= 0 ORDER BY `bar` DESC"), 63 | # Simple filter plus time range 64 | ([Filter(IntComparison('foo', NumCompOp.GE, 0), timerange=TimeRange(_dt('2023-12-06T08:17:00Z'), _dt('2023-12-07T08:17:00Z')))], 65 | "SELECT {} FROM my_table WHERE `foo` >= 0 AND `timestamp` >= '2023-12-06T08:17:00.000000Z' AND `timestamp` < '2023-12-07T08:17:00.000000Z'"), 66 | # Add a limit and projection 67 | ([Limit(3), ProjectAttrs(['foo', 'bar', 'baz']), Filter(StrComparison('foo', StrCompOp.EQ, 'abc'))], 68 | "SELECT `foo`, `bar`, `baz` FROM my_table WHERE `foo` = 'abc' LIMIT 3"), 69 | # Same as above but reverse order 70 | ([Filter(StrComparison('foo', StrCompOp.EQ, 'abc')), ProjectAttrs(['foo', 'bar', 'baz']), Limit(3)], 71 | "SELECT `foo`, `bar`, `baz` FROM my_table WHERE `foo` = 'abc' LIMIT 3"), 72 | ([Filter(ListComparison('foo', ListOp.NIN, ['abc', 'def']))], 73 | "SELECT {} FROM my_table WHERE `foo` NOT IN ('abc', 'def')"), 74 | ([Filter(MultiComp(ExpOp.OR, [IntComparison('foo', NumCompOp.EQ, 1), IntComparison('bar', NumCompOp.EQ, 1)]))], 75 | "SELECT {} FROM my_table WHERE `foo` = 1 OR `bar` = 1"), 76 | ([Filter(MultiComp(ExpOp.AND, [IntComparison('foo', NumCompOp.EQ, 1), IntComparison('bar', NumCompOp.EQ, 1)]))], 77 | "SELECT {} FROM my_table WHERE `foo` = 1 AND `bar` = 1"), 78 | ([Limit(1000), Offset(2000)], 79 | "SELECT {} FROM my_table LIMIT 2000, 1000"), 80 | # Test entity projection 81 | ([Limit(3), Filter(StrComparison('cmd_line', StrCompOp.EQ, 'foo bar')), ProjectEntity('process', 'process')], 82 | "SELECT {} FROM my_table WHERE `CommandLine` = 'foo bar' LIMIT 3"), 83 | ] 84 | ) 85 | def test_opensearch_translator(iseq, sql): 86 | if ProjectEntity in {type(i) for i in iseq}: 87 | cols = '`CommandLine` AS `cmd_line`, `Image` AS `file.path`, `ProcessId` AS `pid`, `ParentProcessId` AS `parent_process.pid`' 88 | else: 89 | cols = '`CommandLine` AS `process.cmd_line`, `Image` AS `process.file.path`, `ProcessId` AS `process.pid`, `ParentProcessId` AS `process.parent_process.pid`' 90 | trans = OpenSearchTranslator(TIMEFMT, "timestamp", "my_table", data_model_map, schema) 91 | for i in iseq: 92 | trans.add_instruction(i) 93 | result = trans.result() 94 | assert _remove_nl(str(result)) == sql.format(cols) 95 | 96 | 97 | @pytest.mark.parametrize( 98 | "instruction", [ 99 | Filter(StrComparison('foo', StrCompOp.MATCHES, '.*abc.*')), 100 | Filter(StrComparison('foo', StrCompOp.NMATCHES, '.*abc.*')), 101 | ] 102 | ) 103 | def test_opensearch_translator_unsupported(instruction): 104 | trans = OpenSearchTranslator(TIMEFMT, "timestamp", "my_table", data_model_map, schema) 105 | with pytest.raises(UnsupportedOperatorError): 106 | trans.add_instruction(instruction) 107 | _ = trans.result() 108 | -------------------------------------------------------------------------------- /packages/kestrel_interface_sqlalchemy/README.rst: -------------------------------------------------------------------------------- 1 | ../../README.rst -------------------------------------------------------------------------------- /packages/kestrel_interface_sqlalchemy/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools >= 68.2.2", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "kestrel_interface_sqlalchemy" 7 | version = "2.0.0b" 8 | description = "Kestrel SQLAlchemy Datasource Interface" 9 | readme = "README.rst" 10 | requires-python = ">=3.8" 11 | license = {text = "Apache 2.0 License"} 12 | maintainers = [ 13 | {name = "Xiaokui Shu", email = "xiaokui.shu@ibm.com"}, 14 | {name = "Paul Coccoli", email = "pcoccoli@us.ibm.com"}, 15 | ] 16 | keywords = [ 17 | "kestrel", 18 | "cybersecurity", 19 | "threat hunting", 20 | ] 21 | classifiers = [ 22 | "Topic :: Security", 23 | "Operating System :: OS Independent", 24 | "Development Status :: 4 - Beta", 25 | "Programming Language :: Python :: 3", 26 | ] 27 | 28 | dependencies = [ 29 | "kestrel_core>=2.0.0b", 30 | ] 31 | 32 | [project.urls] 33 | Homepage = "https://github.com/opencybersecurityalliance/kestrel-lang" 34 | Documentation = "https://kestrel.readthedocs.io/" 35 | Repository = "https://github.com/opencybersecurityalliance/kestrel-lang.git" 36 | -------------------------------------------------------------------------------- /packages/kestrel_interface_sqlalchemy/src/kestrel_interface_sqlalchemy/__init__.py: -------------------------------------------------------------------------------- 1 | from kestrel_interface_sqlalchemy.interface import SQLAlchemyInterface 2 | -------------------------------------------------------------------------------- /packages/kestrel_interface_sqlalchemy/src/kestrel_interface_sqlalchemy/config.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from dataclasses import dataclass 3 | from typing import Dict, Optional 4 | 5 | from kestrel.config.utils import ( 6 | CONFIG_DIR_DEFAULT, 7 | load_kestrel_config, 8 | load_user_config, 9 | ) 10 | from kestrel.exceptions import InterfaceNotConfigured 11 | from kestrel.mapping.data_model import ( 12 | check_entity_identifier_existence_in_mapping, 13 | load_default_mapping, 14 | ) 15 | from mashumaro.mixins.json import DataClassJSONMixin 16 | 17 | PROFILE_PATH_DEFAULT = CONFIG_DIR_DEFAULT / "sqlalchemy.yaml" 18 | PROFILE_PATH_ENV_VAR = "KESTREL_SQLALCHEMY_CONFIG" 19 | 20 | _logger = logging.getLogger(__name__) 21 | 22 | 23 | @dataclass 24 | class Connection(DataClassJSONMixin): 25 | url: str # SQLAlchemy "connection URL" or "connection string" 26 | table_creation_permission: bool # whether we are allowed to create a table 27 | 28 | 29 | @dataclass 30 | class DataSource(DataClassJSONMixin): 31 | connection: str 32 | table: str 33 | timestamp: Optional[str] 34 | timestamp_format: Optional[str] 35 | data_model_map: Optional[Dict] = None 36 | entity_identifier: Optional[Dict] = None 37 | 38 | def __post_init__(self): 39 | if not self.data_model_map: 40 | # Default to the built-in ECS mapping 41 | self.data_model_map = load_default_mapping("ecs") # FIXME: need a default? 42 | 43 | kestrel_config = load_kestrel_config() 44 | check_entity_identifier_existence_in_mapping( 45 | self.data_model_map, 46 | kestrel_config["entity_identifier"], 47 | "sqlalchemy interface", 48 | ) 49 | 50 | 51 | @dataclass 52 | class Config(DataClassJSONMixin): 53 | connections: Dict[str, Connection] 54 | datasources: Dict[str, DataSource] 55 | 56 | def __post_init__(self): 57 | self.connections = {k: Connection(**v) for k, v in self.connections.items()} 58 | self.datasources = {k: DataSource(**v) for k, v in self.datasources.items()} 59 | 60 | 61 | def load_config(): 62 | try: 63 | interface_config = Config( 64 | **load_user_config(PROFILE_PATH_ENV_VAR, PROFILE_PATH_DEFAULT) 65 | ) 66 | return interface_config 67 | except TypeError: 68 | raise InterfaceNotConfigured() 69 | -------------------------------------------------------------------------------- /packages/kestrel_interface_sqlalchemy/src/kestrel_interface_sqlalchemy/example.yaml: -------------------------------------------------------------------------------- 1 | # SQLAlchemy configuration is broken into 2 sections: connections and 2 | # datasources. The connections section has one named entry per 3 | # SqlAlchemy instance; those connection names are referenced from the 4 | # datasources. 5 | connections: 6 | my_data_lake: 7 | url: sqlite:////home/pcoccoli/events.db # SQLAlchemy connection string 8 | table_creation_permission: true # this helps decide how to load cache 9 | 10 | # A single database can have many tables. For each table that will be 11 | # available as a Kestrel datasource, create a named entry here that 12 | # references a connection name from above. You can also specify some 13 | # metadata about the tables, such as the column name to be used as the 14 | # timestamp, the format of that timestamp, and (optionally) a filename 15 | # containing a kestrel data model mapping, in YAML format. 16 | datasources: 17 | events: # This is also the data source name you'll use in your hunts 18 | connection: my_data_lake # references the connection info above 19 | table: my_events # actual SQL table name 20 | timestamp: "@timestamp" 21 | timestamp_format: "%Y-%m-%d %H:%M:%S.%fZ" 22 | data_model_map: "my_events_mapping.yaml" 23 | -------------------------------------------------------------------------------- /packages/kestrel_interface_sqlalchemy/src/kestrel_interface_sqlalchemy/translator.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import logging 4 | from dataclasses import dataclass 5 | from typing import Callable, List, Optional, Union 6 | 7 | from kestrel.interface.codegen.sql import SqlTranslator 8 | from sqlalchemy.engine.default import DefaultDialect 9 | from typeguard import typechecked 10 | 11 | _logger = logging.getLogger(__name__) 12 | 13 | 14 | @dataclass 15 | class NativeTable: 16 | dialect: DefaultDialect 17 | table_name: str 18 | table_schema: Optional[List[str]] # column names 19 | data_model_map: Optional[dict] 20 | timefmt: Optional[Callable] 21 | timestamp: Optional[str] 22 | 23 | 24 | @dataclass 25 | class SubQuery: 26 | translator: SqlTranslator 27 | name: str 28 | 29 | 30 | @typechecked 31 | class SQLAlchemyTranslator(SqlTranslator): 32 | def __init__( 33 | self, 34 | obj: Union[NativeTable, SubQuery], 35 | ): 36 | if isinstance(obj, SubQuery): 37 | dialect = obj.translator.dialect 38 | from_obj = obj.translator.query.cte(name=obj.name) 39 | from_obj_schema = obj.translator.projected_schema 40 | from_obj_projection_base_field = obj.translator.projection_base_field 41 | ocsf_to_native_mapping = obj.translator.data_mapping 42 | timefmt = None 43 | timestamp = None 44 | 45 | elif isinstance(obj, NativeTable): 46 | dialect = obj.dialect 47 | from_obj = obj.table_name 48 | from_obj_schema = obj.table_schema 49 | from_obj_projection_base_field = None 50 | ocsf_to_native_mapping = obj.data_model_map 51 | timefmt = obj.timefmt 52 | timestamp = obj.timestamp 53 | 54 | else: 55 | raise NotImplementedError("Type not defined in argument") 56 | 57 | super().__init__( 58 | dialect, 59 | from_obj, 60 | from_obj_schema, 61 | from_obj_projection_base_field, 62 | ocsf_to_native_mapping, 63 | timefmt, 64 | timestamp, 65 | ) 66 | -------------------------------------------------------------------------------- /packages/kestrel_interface_sqlalchemy/tests/logs_ecs_process_creation.csv: -------------------------------------------------------------------------------- 1 | time,action,observer.product,observer.version,host.id,host.name,os.name,process.command_line,process.name,process.pid,process.entity_id,process.executable,user.id,user.name,process.parent.command_line,process.parent.name,process.parent.pid,process.parent.entity_id 2 | 2024-07-08T00:00:01.000000Z,Launch,EDRx,1.0rc6,8710288a,Host A,Linux,bash,bash,1026,1bf1d82d-aa83-4037-a748-3b2855fb29db,/usr/bin/bash,1001,user1,abc bash,abc,1022,6c99d0c1-6dc9-412d-9110-73bfd36d1b27 3 | 2024-07-08T00:00:02.000000Z,Launch,EDRx,1.0rc6,8710288a,Host A,Linux,uname -a,uname,1027,5bd8e59d-25d1-4a3e-a299-309ac1b42edf,/usr/bin/uname,1001,user1,bash,bash,1026,1bf1d82d-aa83-4037-a748-3b2855fb29db 4 | 2024-07-08T00:00:11.000000Z,Launch,EDRx,1.0rc6,8710288a,Host A,Linux,cat /etc/passwd,cat,1028,3b60149f-7a02-4f4f-accb-1f73a47d79dd,/usr/bin/cat,1001,user1,bash,bash,1026,1bf1d82d-aa83-4037-a748-3b2855fb29db 5 | 2024-07-08T00:04:11.000000Z,Launch,EDRx,1.0rc6,8710288a,Host A,Linux,ping 8.8.8.8,ping,1030,8b769bd5-ef77-4d79-adf7-ea7f7503ea7e,/usr/bin/ping,1001,user1,bash,bash,1026,1bf1d82d-aa83-4037-a748-3b2855fb29db 6 | 2024-07-08T00:30:51.000000Z,Launch,EDRx,1.0rc6,8710288a,Host A,Linux,curl https://www.google.com,curl,1066,ab440b18-193f-406a-9904-927e451634b8,/usr/bin/curl,1001,user1,bash,bash,1026,1bf1d82d-aa83-4037-a748-3b2855fb29db 7 | 2024-07-09T00:10:51.000000Z,Launch,EDRy,1.1rc2,8710288b,Host B,Windows,ping -a 192.168.5.1,ping,5010,4411ac0e-88ec-4096-99a0-6c8e690a531a,C:\WINDOWS\System32\ping.exe,user9@win.com,user9,"powershell ""c:\temp\hello world.ps1""",powershell.exe,1025,b7c095df-4ad2-461f-a0eb-40a9d8856556 8 | 2024-07-09T00:10:52.000000Z,Launch,EDRy,1.1rc2,8710288b,Host B,Windows,pathping 8.8.8.8,pathping,5018,fc50e0b6-512d-4e70-9fd0-710722cd0e49,C:\WINDOWS\System32\pathping.exe,user9@win.com,user9,"powershell ""c:\temp\hello world.ps1""",powershell.exe,1025,b7c095df-4ad2-461f-a0eb-40a9d8856556 9 | 2024-07-09T00:10:55.000020Z,Launch,EDRy,1.1rc2,8710288b,Host B,Windows,ipconfig,ipconfig,6008,4e466946-9da0-4bf2-8897-81da260d3fdd,C:\WINDOWS\System32\ipconfig.exe,user9@win.com,user9,"powershell ""c:\temp\hello world.ps1""",powershell.exe,1025,b7c095df-4ad2-461f-a0eb-40a9d8856556 10 | 2024-07-09T00:10:58.000001Z,Launch,EDRy,1.1rc2,8710288b,Host B,Windows,netstat -a,netstat,1235,44043065-2940-494a-b504-034e94b11f33,C:\WINDOWS\System32\netstat.exe,user9@win.com,user9,"powershell ""c:\temp\hello world.ps1""",powershell.exe,1025,b7c095df-4ad2-461f-a0eb-40a9d8856556 11 | -------------------------------------------------------------------------------- /packages/kestrel_interface_sqlalchemy/tests/result_interface_find_entity_to_entity.txt: -------------------------------------------------------------------------------- 1 | WITH evs AS 2 | (SELECT DISTINCT "host.id" AS "device.uid", "host.id" AS "file.endpoint.uid", "host.id" AS "process.endpoint.uid", "host.id" AS "process.file.endpoint.uid", "host.id" AS "process.parent_process.endpoint.uid", "host.id" AS "process.parent_process.file.endpoint.uid", "host.id" AS "process.parent_process.user.endpoint.uid", "host.id" AS "process.user.endpoint.uid", "host.id" AS "reg_key.endpoint.uid", "host.id" AS "reg_value.endpoint.uid", "host.id" AS "actor.process.endpoint.uid", "host.id" AS "actor.process.file.endpoint.uid", "host.id" AS "actor.user.endpoint.uid", "host.name" AS "device.name", "host.name" AS "file.endpoint.name", "host.name" AS "process.endpoint.name", "host.name" AS "process.file.endpoint.name", "host.name" AS "process.parent_process.endpoint.name", "host.name" AS "process.parent_process.file.endpoint.name", "host.name" AS "process.parent_process.user.endpoint.name", "host.name" AS "process.user.endpoint.name", "host.name" AS "reg_key.endpoint.name", "host.name" AS "reg_value.endpoint.name", "host.name" AS "actor.process.endpoint.name", "host.name" AS "actor.process.file.endpoint.name", "host.name" AS "actor.user.endpoint.name", "os.name" AS "device.os", "os.name" AS "file.endpoint.os", "os.name" AS "process.endpoint.os", "os.name" AS "process.file.endpoint.os", "os.name" AS "process.parent_process.endpoint.os", "os.name" AS "process.parent_process.file.endpoint.os", "os.name" AS "process.parent_process.user.endpoint.os", "os.name" AS "process.user.endpoint.os", "os.name" AS "reg_key.endpoint.os", "os.name" AS "reg_value.endpoint.os", "os.name" AS "actor.process.endpoint.os", "os.name" AS "actor.process.file.endpoint.os", "os.name" AS "actor.user.endpoint.os", "user.name" AS "user.name", "user.id" AS "user.uid", "process.command_line" AS "process.cmd_line", "process.name" AS "process.name", "process.pid" AS "process.pid", "process.entity_id" AS "process.uid", "process.executable" AS "process.file.name", "process.executable" AS "process.file.path", "process.executable" AS "process.file.parent_folder", "process.parent.command_line" AS "process.parent_process.cmd_line", "process.parent.command_line" AS "actor.process.cmd_line", "process.parent.name" AS "process.parent_process.name", "process.parent.name" AS "actor.process.name", "process.parent.pid" AS "process.parent_process.pid", "process.parent.pid" AS "actor.process.pid", "process.parent.entity_id" AS "process.parent_process.uid", "process.parent.entity_id" AS "actor.process.uid" 3 | FROM events 4 | WHERE "os.name" IN ('Linux', 'Windows')), 5 | procs AS 6 | (SELECT DISTINCT "process.endpoint.uid" AS "endpoint.uid", "process.file.endpoint.uid" AS "file.endpoint.uid", "process.parent_process.endpoint.uid" AS "parent_process.endpoint.uid", "process.parent_process.file.endpoint.uid" AS "parent_process.file.endpoint.uid", "process.parent_process.user.endpoint.uid" AS "parent_process.user.endpoint.uid", "process.user.endpoint.uid" AS "user.endpoint.uid", "process.endpoint.name" AS "endpoint.name", "process.file.endpoint.name" AS "file.endpoint.name", "process.parent_process.endpoint.name" AS "parent_process.endpoint.name", "process.parent_process.file.endpoint.name" AS "parent_process.file.endpoint.name", "process.parent_process.user.endpoint.name" AS "parent_process.user.endpoint.name", "process.user.endpoint.name" AS "user.endpoint.name", "process.endpoint.os" AS "endpoint.os", "process.file.endpoint.os" AS "file.endpoint.os", "process.parent_process.endpoint.os" AS "parent_process.endpoint.os", "process.parent_process.file.endpoint.os" AS "parent_process.file.endpoint.os", "process.parent_process.user.endpoint.os" AS "parent_process.user.endpoint.os", "process.user.endpoint.os" AS "user.endpoint.os", "process.cmd_line" AS cmd_line, "process.name" AS name, "process.pid" AS pid, "process.uid" AS uid, "process.file.name" AS "file.name", "process.file.path" AS "file.path", "process.file.parent_folder" AS "file.parent_folder", "process.parent_process.cmd_line" AS "parent_process.cmd_line", "process.parent_process.name" AS "parent_process.name", "process.parent_process.pid" AS "parent_process.pid", "process.parent_process.uid" AS "parent_process.uid" 7 | FROM evs 8 | WHERE "endpoint.os" = 'Linux'), 9 | parents AS 10 | (SELECT DISTINCT "host.id" AS "endpoint.uid", "host.id" AS "file.endpoint.uid", "host.id" AS "user.endpoint.uid", "host.name" AS "endpoint.name", "host.name" AS "file.endpoint.name", "host.name" AS "user.endpoint.name", "os.name" AS "endpoint.os", "os.name" AS "file.endpoint.os", "os.name" AS "user.endpoint.os", "process.parent.command_line" AS cmd_line, "process.parent.name" AS name, "process.parent.pid" AS pid, "process.parent.entity_id" AS uid 11 | FROM events 12 | WHERE ("process.entity_id", "host.id") IN (SELECT DISTINCT uid, "endpoint.uid" 13 | FROM procs)) 14 | SELECT DISTINCT * 15 | FROM parents 16 | -------------------------------------------------------------------------------- /packages/kestrel_interface_sqlalchemy/tests/result_interface_find_event_to_entity.txt: -------------------------------------------------------------------------------- 1 | WITH evs AS 2 | (SELECT DISTINCT "host.id" AS "device.uid", "host.id" AS "file.endpoint.uid", "host.id" AS "process.endpoint.uid", "host.id" AS "process.file.endpoint.uid", "host.id" AS "process.parent_process.endpoint.uid", "host.id" AS "process.parent_process.file.endpoint.uid", "host.id" AS "process.parent_process.user.endpoint.uid", "host.id" AS "process.user.endpoint.uid", "host.id" AS "reg_key.endpoint.uid", "host.id" AS "reg_value.endpoint.uid", "host.id" AS "actor.process.endpoint.uid", "host.id" AS "actor.process.file.endpoint.uid", "host.id" AS "actor.user.endpoint.uid", "host.name" AS "device.name", "host.name" AS "file.endpoint.name", "host.name" AS "process.endpoint.name", "host.name" AS "process.file.endpoint.name", "host.name" AS "process.parent_process.endpoint.name", "host.name" AS "process.parent_process.file.endpoint.name", "host.name" AS "process.parent_process.user.endpoint.name", "host.name" AS "process.user.endpoint.name", "host.name" AS "reg_key.endpoint.name", "host.name" AS "reg_value.endpoint.name", "host.name" AS "actor.process.endpoint.name", "host.name" AS "actor.process.file.endpoint.name", "host.name" AS "actor.user.endpoint.name", "os.name" AS "device.os", "os.name" AS "file.endpoint.os", "os.name" AS "process.endpoint.os", "os.name" AS "process.file.endpoint.os", "os.name" AS "process.parent_process.endpoint.os", "os.name" AS "process.parent_process.file.endpoint.os", "os.name" AS "process.parent_process.user.endpoint.os", "os.name" AS "process.user.endpoint.os", "os.name" AS "reg_key.endpoint.os", "os.name" AS "reg_value.endpoint.os", "os.name" AS "actor.process.endpoint.os", "os.name" AS "actor.process.file.endpoint.os", "os.name" AS "actor.user.endpoint.os", "user.name" AS "user.name", "user.id" AS "user.uid", "process.command_line" AS "process.cmd_line", "process.name" AS "process.name", "process.pid" AS "process.pid", "process.entity_id" AS "process.uid", "process.executable" AS "process.file.name", "process.executable" AS "process.file.path", "process.executable" AS "process.file.parent_folder", "process.parent.command_line" AS "process.parent_process.cmd_line", "process.parent.command_line" AS "actor.process.cmd_line", "process.parent.name" AS "process.parent_process.name", "process.parent.name" AS "actor.process.name", "process.parent.pid" AS "process.parent_process.pid", "process.parent.pid" AS "actor.process.pid", "process.parent.entity_id" AS "process.parent_process.uid", "process.parent.entity_id" AS "actor.process.uid" 3 | FROM events 4 | WHERE "os.name" IN ('Linux', 'Windows')), 5 | procs AS 6 | (SELECT DISTINCT "process.endpoint.uid" AS "endpoint.uid", "process.file.endpoint.uid" AS "file.endpoint.uid", "process.parent_process.endpoint.uid" AS "parent_process.endpoint.uid", "process.parent_process.file.endpoint.uid" AS "parent_process.file.endpoint.uid", "process.parent_process.user.endpoint.uid" AS "parent_process.user.endpoint.uid", "process.user.endpoint.uid" AS "user.endpoint.uid", "process.endpoint.name" AS "endpoint.name", "process.file.endpoint.name" AS "file.endpoint.name", "process.parent_process.endpoint.name" AS "parent_process.endpoint.name", "process.parent_process.file.endpoint.name" AS "parent_process.file.endpoint.name", "process.parent_process.user.endpoint.name" AS "parent_process.user.endpoint.name", "process.user.endpoint.name" AS "user.endpoint.name", "process.endpoint.os" AS "endpoint.os", "process.file.endpoint.os" AS "file.endpoint.os", "process.parent_process.endpoint.os" AS "parent_process.endpoint.os", "process.parent_process.file.endpoint.os" AS "parent_process.file.endpoint.os", "process.parent_process.user.endpoint.os" AS "parent_process.user.endpoint.os", "process.user.endpoint.os" AS "user.endpoint.os", "process.cmd_line" AS cmd_line, "process.name" AS name, "process.pid" AS pid, "process.uid" AS uid, "process.file.name" AS "file.name", "process.file.path" AS "file.path", "process.file.parent_folder" AS "file.parent_folder", "process.parent_process.cmd_line" AS "parent_process.cmd_line", "process.parent_process.name" AS "parent_process.name", "process.parent_process.pid" AS "parent_process.pid", "process.parent_process.uid" AS "parent_process.uid" 7 | FROM evs 8 | WHERE "endpoint.os" = 'Linux') 9 | SELECT DISTINCT * 10 | FROM procs 11 | -------------------------------------------------------------------------------- /packages/kestrel_interface_sqlalchemy/tests/test_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import yaml 4 | from kestrel_interface_sqlalchemy.config import (PROFILE_PATH_ENV_VAR, 5 | Connection, load_config) 6 | 7 | 8 | def test_load_config_w_default_map(tmp_path): 9 | config = { 10 | "connections": { 11 | "some-data-lake": { 12 | "url": "presto://jdoe@example.com:8889/hive", 13 | "table_creation_permission": True, 14 | } 15 | }, 16 | "datasources": { 17 | "cloud_table": { 18 | "connection": "some-data-lake", 19 | "table": "cloud_table", 20 | "timestamp": "eventTime", 21 | "timestamp_format": "%Y-%m-%d %H:%M:%S.%f", 22 | } 23 | } 24 | } 25 | config_file = tmp_path / "sqlalchemy.yaml" 26 | with open(config_file, 'w') as fp: 27 | yaml.dump(config, fp) 28 | os.environ[PROFILE_PATH_ENV_VAR] = str(config_file) 29 | read_config = load_config() 30 | assert read_config.datasources["cloud_table"].data_model_map["process"]["name"] == "process.name" 31 | 32 | 33 | def test_load_config(tmp_path): 34 | config = { 35 | "connections": { 36 | "localhost": { 37 | "url": "sqlite:////home/jdoe/test.db", 38 | "table_creation_permission": True, 39 | }, 40 | "some-data-lake": { 41 | "url": "presto://jdoe@example.com:8889/hive", 42 | "table_creation_permission": True, 43 | } 44 | }, 45 | "datasources": { 46 | "cloud_table": { 47 | "connection": "some-data-lake", 48 | "table": "cloud_table", 49 | "timestamp": "eventTime", 50 | "timestamp_format": "%Y-%m-%d %H:%M:%S.%f", 51 | "data_model_map": str(tmp_path / "mapping.yaml"), 52 | } 53 | } 54 | } 55 | map_file = tmp_path / "mapping.yaml" 56 | with open(map_file, 'w') as fp: 57 | fp.write("some.field: other.field\n") 58 | eid_file = tmp_path / "eid.yaml" 59 | with open(eid_file, 'w') as fp: 60 | fp.write("process: pid\n") 61 | config_file = tmp_path / "sqlalchemy.yaml" 62 | with open(config_file, 'w') as fp: 63 | yaml.dump(config, fp) 64 | os.environ[PROFILE_PATH_ENV_VAR] = str(config_file) 65 | read_config = load_config() 66 | conn: Connection = read_config.connections["localhost"] 67 | assert conn.url == config["connections"]["localhost"]["url"] 68 | assert read_config.connections["localhost"].url == config["connections"]["localhost"]["url"] 69 | assert read_config.datasources["cloud_table"].timestamp == config["datasources"]["cloud_table"]["timestamp"] 70 | assert read_config.datasources["cloud_table"].table == config["datasources"]["cloud_table"]["table"] 71 | assert read_config.datasources["cloud_table"].data_model_map["some.field"] == "other.field" 72 | -------------------------------------------------------------------------------- /packages/kestrel_jupyter/README.rst: -------------------------------------------------------------------------------- 1 | ../../README.rst -------------------------------------------------------------------------------- /packages/kestrel_jupyter/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools >= 68.2.2", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "kestrel_jupyter" 7 | version = "2.0.0b2" 8 | description = "Kestrel Jupyter Kernel" 9 | readme = "README.rst" 10 | requires-python = ">=3.8" 11 | license = {text = "Apache 2.0 License"} 12 | maintainers = [ 13 | {name = "Xiaokui Shu", email = "xiaokui.shu@ibm.com"}, 14 | {name = "Paul Coccoli", email = "pcoccoli@us.ibm.com"}, 15 | ] 16 | keywords = [ 17 | "kestrel", 18 | "Jupyter", 19 | "kernel", 20 | ] 21 | classifiers = [ 22 | "Topic :: Security", 23 | "Operating System :: OS Independent", 24 | "Development Status :: 4 - Beta", 25 | "Programming Language :: Python :: 3", 26 | ] 27 | 28 | dependencies = [ 29 | "kestrel_core==2.0.0b", 30 | "kestrel_interface_opensearch==2.0.0b", 31 | "kestrel_interface_sqlalchemy==2.0.0b", 32 | "kestrel_tool==2.0.0b", 33 | "jupyterlab-server", 34 | "jupyterlab", 35 | "jupyter_client", 36 | "nbclassic", 37 | "sqlparse==0.5.1", 38 | "pygments==2.18.0", 39 | "matplotlib==3.9.1", 40 | ] 41 | 42 | [project.optional-dependencies] 43 | test = [ 44 | "pytest", 45 | ] 46 | 47 | [project.urls] 48 | Homepage = "https://github.com/opencybersecurityalliance/kestrel-lang" 49 | Documentation = "https://kestrel.readthedocs.io/" 50 | Repository = "https://github.com/opencybersecurityalliance/kestrel-lang.git" 51 | 52 | [project.scripts] 53 | kestrel_jupyter_setup = "kestrel_jupyter_kernel.setup:run" 54 | 55 | [tool.setuptools.packages.find] 56 | where = ["src"] 57 | 58 | [tool.setuptools.package-data] 59 | "*" = ["*.js"] 60 | -------------------------------------------------------------------------------- /packages/kestrel_jupyter/src/kestrel_ipython/__init__.py: -------------------------------------------------------------------------------- 1 | import kestrel_ipython.magic 2 | -------------------------------------------------------------------------------- /packages/kestrel_jupyter/src/kestrel_ipython/magic.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | 4 | from IPython.core.magic import Magics, line_cell_magic, magics_class 5 | from kestrel.session import Session 6 | 7 | 8 | @magics_class 9 | class KestrelMagic(Magics): 10 | def __init__(self, shell=None, config=None, user_magics=None, **traits): 11 | super().__init__(shell=shell, config=config, user_magics=user_magics, **traits) 12 | self.session = None 13 | 14 | def __check_magic(self, line="", cell=None): 15 | """ 16 | Some non-Kestrel commands to handle separately for initializing the session. 17 | This likely includes how to connect to UDI, ATK, and other parameters. 18 | """ 19 | # regex is a simple hack 20 | r = r"^\s*(session)\s+(init)\s*(true|false)?\s*$" 21 | m = re.match(r, line, re.IGNORECASE) 22 | if m is None: 23 | return False 24 | stderr = m.groups()[2] is not None and m.groups()[2].lower() == "true" 25 | self.session = Session(stderr) 26 | return True 27 | 28 | @line_cell_magic 29 | def kestrel(self, line="", cell=None): 30 | """ 31 | session init [true / false] 32 | """ 33 | if self.__check_magic(line, cell): 34 | if len(line) > 0: 35 | line = "" 36 | if cell is None: 37 | return 38 | 39 | if self.session is None: 40 | self.session = Session() 41 | if len(line) == 0 and cell is None: 42 | sys.stderr.write("Need to provide a Kestrel query to execute") 43 | return None 44 | if cell is None: 45 | # assert cell is None 46 | return self.session.execute(line) 47 | else: 48 | sys.stderr.write(repr(cell)) 49 | if len(line) != 0: 50 | self.session.execute(line) 51 | return self.session.execute(cell) 52 | # indx = line.lower().find('as df') 53 | # if indx != -1: 54 | # return pd.DataFrame.from_records(self.session.execute(line[:indx])[0]) 55 | # else: return self.session.execute(line) 56 | 57 | 58 | ip = get_ipython() 59 | ip.register_magics(KestrelMagic) 60 | -------------------------------------------------------------------------------- /packages/kestrel_jupyter/src/kestrel_jupyter_kernel/__init__.py: -------------------------------------------------------------------------------- 1 | from kestrel_jupyter_kernel.kernel import KestrelKernel 2 | -------------------------------------------------------------------------------- /packages/kestrel_jupyter/src/kestrel_jupyter_kernel/__main__.py: -------------------------------------------------------------------------------- 1 | from ipykernel.kernelapp import IPKernelApp 2 | from kestrel_jupyter_kernel import KestrelKernel 3 | 4 | if __name__ == "__main__": 5 | IPKernelApp.launch_instance(kernel_class=KestrelKernel) 6 | -------------------------------------------------------------------------------- /packages/kestrel_jupyter/src/kestrel_jupyter_kernel/codemirror/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/packages/kestrel_jupyter/src/kestrel_jupyter_kernel/codemirror/__init__.py -------------------------------------------------------------------------------- /packages/kestrel_jupyter/src/kestrel_jupyter_kernel/codemirror/kestrel_template.js: -------------------------------------------------------------------------------- 1 | (function(mod) { 2 | if (typeof exports == "object" && typeof module == "object") // CommonJS 3 | mod(require("../../lib/codemirror")); 4 | else if (typeof define == "function" && define.amd) // AMD 5 | define(["../../lib/codemirror"], mod); 6 | else // Plain browser env 7 | mod(CodeMirror); 8 | })(function(CodeMirror) { 9 | "use strict"; 10 | 11 | CodeMirror.defineMode("kestrel", function() { 12 | 13 | function switchState(source, setState, f) { 14 | setState(f); 15 | return f(source, setState); 16 | } 17 | 18 | var smallRE = /[a-z_]/; 19 | var largeRE = /[A-Z]/; 20 | var digitRE = /[0-9]/; 21 | var hexitRE = /[0-9A-Fa-f]/; 22 | var octitRE = /[0-7]/; 23 | var idRE = /[a-z_A-Z0-9\']/; 24 | var typeRE = /[a-zA-Z0-9-]/; 25 | var symbolRE = /[-!#$%&*+.\/<=>?@\\^|~:]/; 26 | var specialRE = /[(),;[\]`{}]/; 27 | var whiteCharRE = /[ \t\v\f]/; // newlines are handled in tokenizer 28 | var isoTimestamp = /[0-9:.\-TZ]/; 29 | 30 | function normal() { 31 | return function (source, setState) { 32 | if (source.eatWhile(whiteCharRE)) { 33 | return null; 34 | } 35 | 36 | var ch = source.next(); 37 | 38 | if (ch == '#') { 39 | source.skipToEnd(); 40 | return "comment"; 41 | } 42 | 43 | if (ch == '\'') { 44 | return switchState(source, setState, stringLiteral); 45 | } 46 | 47 | if (ch == 't') { 48 | if (source.eat('\'')) { 49 | source.eatWhile(isoTimestamp); 50 | if (source.eat('\'')) { 51 | return "string-2"; 52 | } 53 | } 54 | } 55 | 56 | if (typeRE.test(source)) { 57 | source.eatWhile(typeRE); 58 | return "type"; 59 | } 60 | 61 | if (largeRE.test(ch)) { 62 | source.eatWhile(idRE); 63 | return "error"; 64 | } 65 | 66 | if (smallRE.test(ch)) { 67 | source.eatWhile(idRE); 68 | return "variable"; 69 | } 70 | 71 | if (digitRE.test(ch)) { 72 | if (ch == '0') { 73 | if (source.eat(/[xX]/)) { 74 | source.eatWhile(hexitRE); // should require at least 1 75 | return "integer"; 76 | } 77 | if (source.eat(/[oO]/)) { 78 | source.eatWhile(octitRE); // should require at least 1 79 | return "number"; 80 | } 81 | } 82 | source.eatWhile(digitRE); 83 | var t = "number"; 84 | if (source.eat('.')) { 85 | t = "number"; 86 | source.eatWhile(digitRE); // should require at least 1 87 | } 88 | if (source.eat(/[eE]/)) { 89 | t = "number"; 90 | source.eat(/[-+]/); 91 | source.eatWhile(digitRE); // should require at least 1 92 | } 93 | return t; 94 | } 95 | 96 | if (symbolRE.test(ch)) { 97 | if (ch == '#') { 98 | source.skipToEnd(); 99 | return "comment"; 100 | } 101 | } 102 | 103 | return "error"; 104 | } 105 | } 106 | 107 | function stringLiteral(source, setState) { 108 | while (!source.eol()) { 109 | var ch = source.next(); 110 | if (ch == '\'') { 111 | setState(normal()); 112 | return "string"; 113 | } 114 | // escape handling: need to test correctness 115 | //if (ch == '\\') { 116 | // if (source.eat('\'')) source.next(); 117 | //} 118 | } 119 | setState(normal()); 120 | return "error"; 121 | } 122 | 123 | var wellKnownWords = (function() { 124 | var wkw = {}; 125 | 126 | var keywords = <<>>; 127 | 128 | for (var i = keywords.length; i--;) 129 | wkw[keywords[i]] = "keyword"; 130 | 131 | var ops = ["IN", "NOT", "LIKE", "MATCHES", "ISSUBSET", "in", "not", "like", "matches", "isubset", "=", "!=", "<", ">", "<=", ">=",]; 132 | 133 | for (var i = ops.length; i--;) 134 | wkw[ops[i]] = "operator"; 135 | 136 | return wkw; 137 | })(); 138 | 139 | return { 140 | startState: function () { return { f: normal() }; }, 141 | copyState: function (s) { return { f: s.f }; }, 142 | 143 | token: function(stream, state) { 144 | var t = state.f(stream, function(s) { state.f = s; }); 145 | var w = stream.current(); 146 | return (wellKnownWords.hasOwnProperty(w)) ? wellKnownWords[w] : t; 147 | } 148 | }; 149 | 150 | }); 151 | 152 | CodeMirror.defineMIME("text/x-kestrel", "kestrel"); 153 | }); 154 | -------------------------------------------------------------------------------- /packages/kestrel_jupyter/src/kestrel_jupyter_kernel/codemirror/setup.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import pkgutil 4 | 5 | import kestrel 6 | import nbclassic 7 | import notebook 8 | 9 | 10 | def update_codemirror_mode(): 11 | for codemirror_file_path in _get_codemirror_file_paths(): 12 | src_current = "" 13 | if os.path.isfile(codemirror_file_path): 14 | try: 15 | with open(codemirror_file_path) as fp: 16 | src_current = fp.read() 17 | except PermissionError: 18 | pass 19 | 20 | src_latest = _instantiate_codemirror_mode_src() 21 | 22 | if src_latest != src_current: 23 | try: 24 | with open(codemirror_file_path, "w") as fp: 25 | fp.write(src_latest) 26 | except PermissionError: 27 | pass 28 | 29 | 30 | ################################################################ 31 | # Private Functions 32 | ################################################################ 33 | 34 | 35 | def _get_codemirror_file_paths(): 36 | paths = [] 37 | for pkg_path in (notebook.__path__[0], nbclassic.__path__[0]): 38 | codemirror_dir = os.path.join(pkg_path, "static/components/codemirror/mode") 39 | if os.path.isdir(codemirror_dir): 40 | kestrel_dir = os.path.join(codemirror_dir, "kestrel") 41 | if not os.path.isdir(kestrel_dir): 42 | try: 43 | os.mkdir(kestrel_dir) 44 | except PermissionError: 45 | pass 46 | paths.append(os.path.join(kestrel_dir, "kestrel.js")) 47 | return paths 48 | 49 | 50 | def _instantiate_codemirror_mode_src(): 51 | keywords = json.dumps(kestrel.frontend.parser.get_keywords()) 52 | codemirror_src = pkgutil.get_data(__name__, "kestrel_template.js").decode("utf-8") 53 | codemirror_src = codemirror_src.replace("<<>>", keywords) 54 | return codemirror_src 55 | -------------------------------------------------------------------------------- /packages/kestrel_jupyter/src/kestrel_jupyter_kernel/config.py: -------------------------------------------------------------------------------- 1 | LOG_FILE_NAME = "session.log" 2 | -------------------------------------------------------------------------------- /packages/kestrel_jupyter/src/kestrel_jupyter_kernel/display.py: -------------------------------------------------------------------------------- 1 | import base64 2 | from io import BytesIO 3 | from math import ceil, sqrt 4 | from typing import Iterable, Mapping 5 | 6 | import matplotlib.pyplot as plt 7 | import networkx as nx 8 | import numpy 9 | import sqlparse 10 | from kestrel.display import AnalyticOperation, Display, GraphExplanation, NativeQuery 11 | from kestrel.ir.graph import IRGraph 12 | from kestrel.ir.instructions import Construct, DataSource, Instruction, Variable 13 | from pandas import DataFrame 14 | from pygments import highlight 15 | from pygments.formatters import HtmlFormatter 16 | from pygments.lexers import guess_lexer 17 | from pygments.lexers.kusto import KustoLexer 18 | from pygments.lexers.sql import SqlLexer 19 | 20 | 21 | def gen_label_mapping(g: IRGraph) -> Mapping[Instruction, str]: 22 | d = {} 23 | for n in g: 24 | if isinstance(n, Variable): 25 | d[n] = n.name 26 | elif isinstance(n, Construct): 27 | d[n] = n.id.hex[:4] 28 | elif isinstance(n, DataSource): 29 | d[n] = n.datasource 30 | else: 31 | d[n] = f"[{n.instruction.upper()}]" 32 | return d 33 | 34 | 35 | def to_html_blocks(d: Display) -> Iterable[str]: 36 | if isinstance(d, DataFrame): 37 | d = d.replace("", numpy.nan).dropna(axis="columns", how="all") 38 | escaped_df = d.map(lambda x: x.replace("$", "\\$") if isinstance(x, str) else x) 39 | if escaped_df.empty: 40 | yield "
Nothing Found :-(
" 41 | else: 42 | yield escaped_df.to_html(index=False, na_rep="") 43 | elif isinstance(d, GraphExplanation): 44 | for graphlet in d.graphlets: 45 | graph = IRGraph(graphlet.graph) 46 | yield f"
INTERFACE: {graphlet.graph['interface']}; STORE: {graphlet.graph['store']}
" 47 | 48 | fig_side_length = min(10, ceil(sqrt(len(graph))) + 1) 49 | plt.figure(figsize=(fig_side_length, fig_side_length)) 50 | nx.draw( 51 | graph, 52 | with_labels=True, 53 | labels=gen_label_mapping(graph), 54 | font_size=8, 55 | node_size=260, 56 | node_color="#bfdff5", 57 | ) 58 | fig_buffer = BytesIO() 59 | plt.savefig(fig_buffer, format="png") 60 | img_base64 = base64.b64encode(fig_buffer.getvalue()).decode("utf-8") 61 | img_tag = f'' 62 | yield img_tag 63 | 64 | if isinstance(graphlet.action, NativeQuery): 65 | native_query = graphlet.action 66 | language = native_query.language 67 | query = native_query.statement 68 | if language == "SQL": 69 | lexer = SqlLexer() 70 | query = sqlparse.format(query, reindent=True, keyword_case="upper") 71 | elif language == "KQL": 72 | lexer = KustoLexer() 73 | else: 74 | lexer = guess_lexer(query) 75 | query = highlight(query, lexer, HtmlFormatter()) 76 | style = "" 77 | yield style + query 78 | elif isinstance(graphlet.action, AnalyticOperation): 79 | analytic_operation = graphlet.action 80 | data = { 81 | "Analytics": [analytic_operation.operation], 82 | } 83 | yield DataFrame(data).to_html(index=False) 84 | -------------------------------------------------------------------------------- /packages/kestrel_jupyter/src/kestrel_jupyter_kernel/kernel.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from ipykernel.kernelbase import Kernel 4 | from kestrel.session import Session 5 | from kestrel_jupyter_kernel.display import to_html_blocks 6 | 7 | _logger = logging.getLogger(__name__) 8 | 9 | 10 | class KestrelKernel(Kernel): 11 | implementation = "kestrel" 12 | implementation_version = "2.0" 13 | language = "kestrel" 14 | language_version = "2.0" 15 | # https://jupyter-client.readthedocs.io/en/stable/messaging.html#msging-kernel-info 16 | language_info = {"name": "kestrel", "file_extension": ".hf"} 17 | banner = "Kestrel" 18 | 19 | def __init__(self, **kwargs): 20 | super().__init__(**kwargs) 21 | self.kestrel_session = Session() 22 | 23 | def do_complete(self, code, cursor_pos): 24 | return { 25 | "matches": self.kestrel_session.do_complete(code, cursor_pos), 26 | "cursor_end": cursor_pos, 27 | "cursor_start": cursor_pos, 28 | "metadata": {}, 29 | "status": "ok", 30 | } 31 | 32 | def do_execute( 33 | self, code, silent, store_history=True, user_expressions=None, allow_stdin=False 34 | ): 35 | if not silent: 36 | try: 37 | for result in self.kestrel_session.execute_to_generate(code): 38 | for html in to_html_blocks(result): 39 | self.send_response( 40 | self.iopub_socket, 41 | "display_data", 42 | {"data": {"text/html": html}, "metadata": {}}, 43 | ) 44 | # how to clear output (if needed in the future): 45 | # self.send_response(self.iopub_socket, "clear_output") 46 | 47 | except Exception as e: 48 | _logger.error("Exception occurred", exc_info=True) 49 | error = f"{e.__class__.__name__}: {e}" 50 | self.send_response( 51 | self.iopub_socket, "stream", {"name": "stderr", "text": error} 52 | ) 53 | 54 | return { 55 | "status": "ok", 56 | "execution_count": self.execution_count, 57 | "payload": [], 58 | "user_expressions": {}, 59 | } 60 | -------------------------------------------------------------------------------- /packages/kestrel_jupyter/src/kestrel_jupyter_kernel/setup.py: -------------------------------------------------------------------------------- 1 | ################################################################ 2 | # Setup Kestrel Jupyter Kernel 3 | # 4 | # This module setups the Kestrel Jupyter kernel: 5 | # 1. install the kernel to Jupyter environment (local env) 6 | # 2. generate codemirror mode for Kestrel based on the 7 | # installed kestrel Python package for syntax highlighting 8 | # 3. install the codemirror mode into Jupyter 9 | # 10 | # Install: pip will install the utility `kestrel_jupyter_setup` 11 | # 12 | # Usage: `kestrel_jupyter_setup` 13 | # 14 | ################################################################ 15 | 16 | import json 17 | import os 18 | import tempfile 19 | 20 | from jupyter_client.kernelspec import KernelSpecManager 21 | from kestrel_jupyter_kernel.codemirror.setup import update_codemirror_mode 22 | 23 | _KERNEL_SPEC = { 24 | "argv": ["python3", "-m", "kestrel_jupyter_kernel", "-f", "{connection_file}"], 25 | "display_name": "Kestrel", 26 | "language": "kestrel", 27 | } 28 | 29 | 30 | def install_kernelspec(): 31 | with tempfile.TemporaryDirectory() as tmp_dirname: 32 | kernel_dirname = os.path.join(tmp_dirname, "kestrel_kernel") 33 | os.mkdir(kernel_dirname) 34 | kernel_filename = os.path.join(kernel_dirname, "kernel.json") 35 | with open(kernel_filename, "w") as kf: 36 | json.dump(_KERNEL_SPEC, kf) 37 | 38 | m = KernelSpecManager() 39 | m.install_kernel_spec(kernel_dirname, "kestrel", user=True) 40 | 41 | 42 | def run(): 43 | print("Setup Kestrel Jupyter Kernel") 44 | print(" Install new Jupyter kernel ...", end=" ") 45 | install_kernelspec() 46 | print("done") 47 | 48 | # generate and install kestrel codemirrmor mode 49 | print(" Compute and install syntax highlighting ...", end=" ") 50 | update_codemirror_mode() 51 | print("done") 52 | -------------------------------------------------------------------------------- /packages/kestrel_jupyter/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opencybersecurityalliance/kestrel-lang/01fd5fa446e1106d5553e8f66f90f05c8aea3f36/packages/kestrel_jupyter/tests/__init__.py -------------------------------------------------------------------------------- /packages/kestrel_jupyter/tests/test_kernel_install.py: -------------------------------------------------------------------------------- 1 | from jupyter_client.kernelspec import KernelSpecManager 2 | from kestrel_jupyter_kernel.setup import install_kernelspec 3 | 4 | 5 | def test_kernel_install(): 6 | m = KernelSpecManager() 7 | ks = m.get_all_specs() 8 | if "kestrel" in ks: 9 | m.remove_kernel_spec("kestrel") 10 | 11 | install_kernelspec() 12 | assert "kestrel" in m.get_all_specs() 13 | -------------------------------------------------------------------------------- /packages/kestrel_jupyter/tests/test_notebook_syntax_gen.py: -------------------------------------------------------------------------------- 1 | from os.path import exists 2 | 3 | from kestrel_jupyter_kernel.codemirror.setup import ( 4 | _get_codemirror_file_paths, update_codemirror_mode) 5 | 6 | 7 | def test_notebook_syntax_gen(): 8 | js_paths = _get_codemirror_file_paths() 9 | update_codemirror_mode() 10 | for js_path in js_paths: 11 | assert exists(js_path) 12 | -------------------------------------------------------------------------------- /packages/kestrel_tool/README.rst: -------------------------------------------------------------------------------- 1 | ../../README.rst -------------------------------------------------------------------------------- /packages/kestrel_tool/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools >= 68.2.2", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "kestrel_tool" 7 | version = "2.0.0b" 8 | description = "Kestrel Threat Hunting Language CLI Multi-tool" 9 | readme = "README.rst" 10 | requires-python = ">=3.8" 11 | license = {text = "Apache 2.0 License"} 12 | maintainers = [ 13 | {name = "Xiaokui Shu", email = "xiaokui.shu@ibm.com"}, 14 | {name = "Paul Coccoli", email = "pcoccoli@us.ibm.com"}, 15 | ] 16 | keywords = [ 17 | "kestrel", 18 | "cybersecurity", 19 | "threat hunting", 20 | ] 21 | classifiers = [ 22 | "Topic :: Security", 23 | "Operating System :: OS Independent", 24 | "Development Status :: 4 - Beta", 25 | "Programming Language :: Python :: 3", 26 | ] 27 | 28 | dependencies = [ 29 | "kestrel_core>=2.0.0b", 30 | "typer>=0.12.3", 31 | ] 32 | 33 | [project.urls] 34 | Homepage = "https://github.com/opencybersecurityalliance/kestrel-lang" 35 | Documentation = "https://kestrel.readthedocs.io/" 36 | Repository = "https://github.com/opencybersecurityalliance/kestrel-lang.git" 37 | 38 | [project.scripts] 39 | kestrel-tool = "kestrel_tool.main:app" 40 | 41 | [tool.setuptools.packages.find] 42 | where = ["src"] 43 | -------------------------------------------------------------------------------- /packages/kestrel_tool/src/kestrel_tool/main.py: -------------------------------------------------------------------------------- 1 | import typer 2 | from kestrel_tool import mkdb 3 | 4 | app = typer.Typer() 5 | app.command()(mkdb.mkdb) 6 | 7 | 8 | @app.command() 9 | def test(): 10 | """Temp placeholder until we have more commands""" 11 | pass 12 | 13 | 14 | if __name__ == "__main__": 15 | app() 16 | -------------------------------------------------------------------------------- /packages/kestrel_tool/src/kestrel_tool/mkdb.py: -------------------------------------------------------------------------------- 1 | """ mkdb: turn JSON logs into SQLAlchemy DBs (e.g. sqlite3)""" 2 | 3 | import json 4 | import re 5 | 6 | import pandas as pd 7 | import sqlalchemy 8 | import typer 9 | 10 | RE_UUID = re.compile( 11 | "^%?{?([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})}?$" 12 | ) 13 | 14 | 15 | def _normalize_event(event: dict) -> dict: 16 | # We could definitely optimize this 17 | if "tags" in event: 18 | # Blue Team Village CTF data (cribl?) 19 | del event["tags"] 20 | if "process" in event: 21 | # SecurityDatasets.com GoldenSAML WindowsEvents: case inconsistency? 22 | event["Process"] = event["process"] 23 | del event["process"] 24 | if "ProcessID" in event: 25 | # The case of the final 'd' seems to vary for Windows events! 26 | event["ProcessId"] = event["ProcessID"] 27 | del event["ProcessID"] 28 | if "NewProcessId" in event: 29 | event["ParentProcessId"] = event["ProcessId"] 30 | event["ProcessId"] = event["NewProcessId"] 31 | del event["NewProcessId"] 32 | event["ParentImage"] = event["ParentProcessName"] 33 | del event["ParentProcessName"] 34 | event["Image"] = event["NewProcessName"] 35 | del event["NewProcessName"] 36 | if "IpPort" in event: 37 | # normalize `IpPort` in auth log into generic source port 38 | event["SourcePort"] = event["IpPort"] 39 | del event["IpPort"] 40 | if "IpAddress" in event: 41 | # normalize `IpAddress` in auth log into generic source address 42 | event["SourceAddress"] = event["IpAddress"] 43 | del event["IpAddress"] 44 | 45 | for k, v in event.items(): 46 | if isinstance(v, str): 47 | try: 48 | event[k] = json.loads(v) 49 | except json.JSONDecodeError: 50 | pass # maybe it's NOT JSON 51 | 52 | for k in list(event): 53 | if k.endswith("_string"): 54 | base_key = k[:-7] 55 | if base_key not in event or not event[base_key]: 56 | event[base_key] = event[k] 57 | del event[k] 58 | if k.endswith("_long"): 59 | base_key = k[:-5] 60 | if base_key not in event or not event[base_key]: 61 | event[base_key] = int(event[k]) 62 | del event[k] 63 | 64 | return event 65 | 66 | 67 | def _read_events(filename: str) -> pd.DataFrame: 68 | """Read JSON lines from `filename` and return a DataFrame""" 69 | events = [] 70 | with open(filename, "r") as fp: 71 | for line in fp: 72 | event = json.loads(line) 73 | event = _normalize_event(event) 74 | events.append(event) 75 | return pd.json_normalize(events) 76 | 77 | 78 | def _update_cell(value): 79 | """Replace a cell value""" 80 | # dump list/dict 81 | if isinstance(value, (list, dict)): 82 | return json.dumps(value) 83 | 84 | # extract UUID 85 | if isinstance(value, str): 86 | matched = RE_UUID.match(value) 87 | if matched: 88 | return matched.group(1) 89 | 90 | # do nothing 91 | return value 92 | 93 | 94 | def _integerize_columns(df): 95 | for col in df.columns: 96 | try: 97 | df[col] = df[col].astype(pd.Int64Dtype()) 98 | except: 99 | pass 100 | 101 | 102 | def mkdb( 103 | db: str = typer.Option("sqlite:///events.db", help="Database connection string"), 104 | table: str = typer.Option("events", help="Table name"), 105 | filename: str = typer.Argument(..., help="File with JSON lines"), 106 | ): 107 | # basic normalize to DataFrame 108 | df = _read_events(filename) 109 | 110 | # post-processing values 111 | df = df.map(_update_cell) 112 | 113 | # convert values to integer if possible 114 | _integerize_columns(df) 115 | 116 | # write to db 117 | engine = sqlalchemy.create_engine(db) 118 | with engine.connect() as conn: 119 | df.to_sql(table, conn, index=False) 120 | --------------------------------------------------------------------------------