├── .dockerignore ├── .env ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── push_hub.yml │ └── test_code.yml ├── .gitignore ├── CONTRIBUTING.md ├── INSTALL.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── RELEASES.md ├── _config.yml ├── deploy └── docker │ ├── daemon.Dockerfile │ ├── dev.daemon.Dockerfile │ ├── dev.frontend.Dockerfile │ ├── dev.web.Dockerfile │ └── web.Dockerfile ├── docker-compose.dev.yml ├── docker-compose.yml ├── docs ├── README.md ├── api.md ├── client-urls.png ├── components.md ├── config-example.png ├── configuration.md ├── database.md ├── distribured-rev.graphml ├── distribured-rev.png ├── distributed.graphml ├── distributed.md ├── distributed.png ├── goodyara.md ├── how-to │ ├── install-native.md │ ├── integrate-with-s3.md │ └── release-a-new-version.md ├── indexed-datasets.png ├── indexing.md ├── indexing.png ├── interface-v1.4.gif ├── mquery-flowchart.png ├── mquery-web-ui.gif ├── new-client.png ├── new-realm.png ├── new-roles.png ├── new-user.png ├── plugin-config.png ├── plugins.md ├── query-window.png ├── recent-jobs.png ├── redis.md ├── security.md ├── swagger.png ├── users.md ├── utils │ ├── README.md │ ├── compactall.md │ ├── index.md │ ├── mquery.md │ ├── nanobench.md │ └── s3index.md └── yara.md ├── pyproject.toml ├── pyrightconfig.json ├── requirements.plain.txt ├── requirements.txt ├── setup.cfg ├── setup.py └── src ├── .dockerignore ├── __init__.py ├── alembic.ini ├── app.py ├── config.py ├── daemon.py ├── db.py ├── e2etests ├── Dockerfile ├── README.md ├── __init__.py ├── requirements.txt └── test_api.py ├── lib ├── __init__.py ├── ursadb.py └── yaraparse.py ├── metadata.py ├── migrations ├── __init__.py ├── env.py ├── script.py.mako └── versions │ ├── 6b495d5a4855_cascade_jobagent_match_and_del_removed_status_create_enum_jobstatus.py │ ├── 702d19cfa063_add_queuedfile.py │ ├── cbbba858deb0_init.py │ ├── dbb81bd4d47f_add_jobagent.py │ └── f623e1057b00_added_context_column_into_match_table.py ├── models ├── __init__.py ├── agentgroup.py ├── configentry.py ├── job.py ├── jobagent.py ├── match.py └── queuedfile.py ├── mqueryfront ├── .gitignore ├── index.html ├── package.json ├── public │ ├── favicon.ico │ └── manifest.json ├── src │ ├── App.css │ ├── App.js │ ├── App.test.js │ ├── Navigation.js │ ├── about │ │ └── AboutPage.js │ ├── api.js │ ├── auth │ │ └── AuthPage.js │ ├── components │ │ ├── ActionCancel.js │ │ ├── ActionCopyToClipboard.js │ │ ├── ActionDownload.js │ │ ├── ActionRemove.js │ │ ├── ActionShowMatchContext.js │ │ ├── ErrorBoundary.js │ │ ├── ErrorPage.js │ │ ├── FilterIcon.js │ │ ├── FilteringTableHeader.js │ │ ├── FilteringThead.js │ │ ├── FilteringTitle.js │ │ ├── LoadingPage.js │ │ ├── QueryProgressBar.js │ │ ├── QueryTimer.js │ │ └── WarningPage.js │ ├── config │ │ ├── ConfigEntries.js │ │ └── ConfigPage.js │ ├── index.css │ ├── index.js │ ├── indexFiles │ │ ├── IndexClearQueueButton.js │ │ ├── IndexClearedPage.js │ │ ├── IndexMultiSelect.js │ │ ├── IndexPage.js │ │ ├── IndexProgressBar.js │ │ └── IndexSuccessPage.js │ ├── logo.svg │ ├── query │ │ ├── QueryEditParseNav.js │ │ ├── QueryField.js │ │ ├── QueryLayoutManager.js │ │ ├── QueryMatches.js │ │ ├── QueryMatchesItem.js │ │ ├── QueryMonaco.js │ │ ├── QueryNavigation.js │ │ ├── QueryPage.js │ │ ├── QueryParseStatus.js │ │ ├── QueryResultsStatus.js │ │ ├── QuerySearchNav.js │ │ ├── QuerySubmitNav.js │ │ └── yara-lang.js │ ├── recent │ │ ├── RecentPage.js │ │ ├── SearchJobItem.js │ │ └── SearchJobs.js │ ├── setupProxy.js │ ├── status │ │ ├── BackendStatus.js │ │ ├── DatabaseTopology.js │ │ ├── StatusPage.js │ │ └── VersionStatus.js │ └── utils.js ├── vite.config.js └── yarn.lock ├── plugins ├── __init__.py ├── archive.py ├── blacklist.py ├── cuckoo_analysis.py ├── cuckoo_binaries.py ├── example_plugin.py ├── example_typed_config_plugin.py ├── mwdb_uploads.py ├── requirements-mwdb_uploads.txt └── s3_plugin.py ├── schema.py ├── scripts ├── __init__.py └── mquery-daemon ├── tasks.py ├── tests ├── Dockerfile ├── README.md ├── requirements.txt ├── test_ursadb.py ├── test_yaraparse.py └── yararules │ ├── README.md │ ├── generate_yaraparse_result_files.py │ ├── test_corpus_yara_rule.py │ └── testdata │ ├── anonymous_strings.txt │ ├── anonymous_strings.yar │ ├── apt_mal_dns_hijacking_campaign_aa19_024a.txt │ ├── apt_mal_dns_hijacking_campaign_aa19_024a.yar │ ├── base64_strings.txt │ ├── base64_strings.yar │ ├── conditions.txt │ ├── conditions.yar │ ├── count_strings.txt │ ├── count_strings.yar │ ├── data_position.txt │ ├── data_position.yar │ ├── executable_entry_point.txt │ ├── executable_entry_point.yar │ ├── hex_alternatives.txt │ ├── hex_alternatives.yar │ ├── hex_alternatives_wildcards.txt │ ├── hex_alternatives_wildcards.yar │ ├── hex_jumps.txt │ ├── hex_jumps.yar │ ├── hex_simple_rule.txt │ ├── hex_simple_rule.yar │ ├── hex_wild_cards.txt │ ├── hex_wild_cards.yar │ ├── iter_over_str_occurrences.txt │ ├── iter_over_str_occurrences.yar │ ├── or_corner_case.yar.txt │ ├── or_corner_case.yar.yar │ ├── parse_exception_example.txt │ ├── parse_exception_example.yar │ ├── private_strings.txt │ ├── private_strings.yar │ ├── referencing_rules.txt │ ├── referencing_rules.yar │ ├── referencing_rules_global.txt │ ├── referencing_rules_global.yar │ ├── referencing_rules_private.txt │ ├── referencing_rules_private.yar │ ├── regex_complex.yar.txt │ ├── regex_complex.yar.yar │ ├── regex_escapes.txt │ ├── regex_escapes.yar │ ├── regex_simple.yar.txt │ ├── regex_simple.yar.yar │ ├── regular_expressions.txt │ ├── regular_expressions.yar │ ├── rule_metadata.txt │ ├── rule_metadata.yar │ ├── rule_tags.txt │ ├── rule_tags.yar │ ├── string_offsets.txt │ ├── string_offsets.yar │ ├── strings_sets.txt │ ├── strings_sets.yar │ ├── text_string.txt │ ├── text_string.yar │ ├── text_string_case_insensitive.txt │ ├── text_string_case_insensitive.yar │ ├── text_string_case_insensitive_wide.txt │ ├── text_string_case_insensitive_wide.yar │ ├── text_string_case_insensitive_wide_ascii.txt │ ├── text_string_case_insensitive_wide_ascii.yar │ ├── text_string_full_word.txt │ ├── text_string_full_word.yar │ ├── wide_ascii_character_string.txt │ ├── wide_ascii_character_string.yar │ ├── wide_character_string.txt │ ├── wide_character_string.yar │ ├── xor_range_strings.txt │ ├── xor_range_strings.yar │ ├── xor_strings.txt │ ├── xor_strings.yar │ ├── xor_wide_ascii_strings.txt │ └── xor_wide_ascii_strings.yar ├── util.py └── utils ├── compactall.py ├── index.py ├── mquery.py ├── nanobench.py └── s3index.py /.dockerignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | venv/ 3 | config.py 4 | .vscode 5 | .idea 6 | .mypy_cache 7 | samples/ 8 | index/ 9 | e2e-state 10 | mquery.ini 11 | src/mqueryfront/dist 12 | src/mqueryfront/node_modules 13 | -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | # This file is only relevant for docker-compose deployments. 2 | 3 | # Directory where your samples are stored. By default you have to copy them 4 | # to ./samples subdirectory in this repository. 5 | SAMPLES_DIR=./samples 6 | # Directory where the index files should be saved. By default ./index 7 | # subdirectory in this repository. 8 | INDEX_DIR=./index 9 | # Directory where the postgres data should be stored 10 | POSTGRES_DIR=./postgres 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | 5 | --- 6 | 7 | **Environment information** 8 | * Mquery version (from the /status page): 9 | * Ursadb version (from the /status page): 10 | * Installation method: 11 | - [ ] Generic docker compose 12 | - [ ] Dev docker compose 13 | - [ ] Native (from source) 14 | - [ ] Other (please explain) 15 | 16 | **Reproduction Steps** 17 | 18 | 19 | 20 | 21 | **Expected behaviour** 22 | 23 | 24 | 25 | 26 | **Actual behaviour the bug** 27 | 28 | 29 | 30 | 31 | **Screenshots** 32 | 33 | 34 | 35 | 36 | **Additional context** 37 | 38 | 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest your idea 4 | 5 | --- 6 | 7 | **Feature Category** 8 | 9 | - [ ] Correctness 10 | - [ ] User Interface / User Experience 11 | - [ ] Performance 12 | - [ ] Other (please explain) 13 | 14 | **Describe the problem** 15 | 16 | 17 | 18 | **Describe the solution you'd like** 19 | 20 | 21 | 22 | **Describe alternatives you've considered** 23 | 24 | 25 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | **Your checklist for this pull request** 5 | - [ ] I've read the [contributing guideline](https://github.com/CERT-Polska/mquery/blob/master/CONTRIBUTING.md). 6 | - [ ] I've tested my changes by building and running mquery, and testing changed functionality (if applicable) 7 | - [ ] I've added automated tests for my change (if applicable, optional) 8 | - [ ] I've updated documentation to reflect my change (if applicable) 9 | 10 | **What is the current behaviour?** 11 | 12 | 13 | **What is the new behaviour?** 14 | 15 | 16 | **Test plan** 17 | 18 | 19 | 21 | 22 | **Closing issues** 23 | 24 | 25 | 26 | fixes #issuenumber 27 | -------------------------------------------------------------------------------- /.github/workflows/push_hub.yml: -------------------------------------------------------------------------------- 1 | name: Build and push docker images 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | branches: 8 | - master 9 | jobs: 10 | build_web: 11 | name: Build image 12 | runs-on: ubuntu-latest 13 | env: 14 | DOCKER_BUILDKIT: 1 15 | steps: 16 | - name: Check out repository 17 | uses: actions/checkout@v2 18 | - name: Build and push the image 19 | uses: docker/build-push-action@v1.1.0 20 | with: 21 | username: ${{ secrets.DOCKER_USERNAME }} 22 | password: ${{ secrets.DOCKER_PASSWORD }} 23 | dockerfile: ./deploy/docker/web.Dockerfile 24 | repository: mqueryci/mquery-web 25 | tags: ${{ github.sha }} 26 | push: ${{ github.event_name == 'push' }} 27 | build_daemon: 28 | name: Build image 29 | runs-on: ubuntu-latest 30 | env: 31 | DOCKER_BUILDKIT: 1 32 | steps: 33 | - name: Check out repository 34 | uses: actions/checkout@v2 35 | - name: Build and push the image 36 | uses: docker/build-push-action@v1.1.0 37 | with: 38 | username: ${{ secrets.DOCKER_USERNAME }} 39 | password: ${{ secrets.DOCKER_PASSWORD }} 40 | dockerfile: ./deploy/docker/daemon.Dockerfile 41 | repository: mqueryci/mquery-daemon 42 | tags: ${{ github.sha }} 43 | push: ${{ github.event_name == 'push' }} 44 | -------------------------------------------------------------------------------- /.github/workflows/test_code.yml: -------------------------------------------------------------------------------- 1 | name: "Code testing" 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | branches: 8 | - master 9 | jobs: 10 | expression_blacklist: 11 | name: expression blacklist 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | - run: git fetch origin master 16 | - name: No "console.log" please 17 | run: git diff origin/master -- "*.js" | grep "^[+][^+]" | grep -v "noqa" | grep "console.log" || exit 0 && exit 1 18 | test_python_types: 19 | name: python mypy 20 | runs-on: ubuntu-latest 21 | steps: 22 | - uses: actions/checkout@v2 23 | - name: Setup Python 24 | uses: actions/setup-python@v1 25 | with: 26 | python-version: '3.10' 27 | - name: install mypy==1.2.0 28 | run: pip3 install mypy==1.2.0 29 | - name: install requirements 30 | run: pip3 install -r requirements.txt 31 | - name: run mypy on main files 32 | run: mypy src/app.py src/daemon.py 33 | - name: run mypy on tests 34 | run: MYPYPATH=src/ mypy src/tests/ 35 | - name: run mypy on utils 36 | run: MYPYPATH=src/ mypy src/utils/ 37 | test_python_style: 38 | name: python flake8 39 | runs-on: ubuntu-latest 40 | steps: 41 | - uses: actions/checkout@v2 42 | - name: Setup Python 43 | uses: actions/setup-python@v1 44 | with: 45 | python-version: '3.10' 46 | - name: install flake8==6.0.0 47 | run: pip3 install flake8==6.0.0 48 | - name: run flake8 49 | run: flake8 src 50 | test_python_lint: 51 | name: python black 52 | runs-on: ubuntu-latest 53 | steps: 54 | - uses: actions/checkout@v2 55 | - name: Setup Python 56 | uses: actions/setup-python@v1 57 | with: 58 | python-version: '3.10' 59 | - name: install black 60 | run: pip3 install black==22.3.0 61 | - name: run black 62 | run: black --check "src" 63 | test_js_style: 64 | name: js prettier 65 | runs-on: ubuntu-latest 66 | env: 67 | working-directory: src/mqueryfront 68 | steps: 69 | - uses: actions/checkout@v2 70 | - name: Setup nodejs 71 | uses: actions/setup-node@v1 72 | with: 73 | node-version: '12.x' 74 | - name: install prettier 75 | run: npm install -g prettier@2.0.4 76 | - name: run prettier 77 | run: prettier --tab-width=4 --check "src/**/*.js" 78 | test_js_build: 79 | name: npm build 80 | runs-on: ubuntu-latest 81 | env: 82 | working-directory: src/mqueryfront 83 | steps: 84 | - name: Setup nodejs 85 | uses: actions/setup-node@v1 86 | with: 87 | node-version: '12.x' 88 | - name: install dependencies 89 | run: yarn install 90 | - name: build 91 | run: npm build 92 | test_unit: 93 | name: unit tests 94 | runs-on: ubuntu-latest 95 | steps: 96 | - uses: actions/checkout@v2 97 | with: 98 | submodules: recursive 99 | - name: build test image 100 | run: docker build -t mquery_tests:latest -f src/tests/Dockerfile . 101 | - name: run unit tests 102 | run: docker run mquery_tests 103 | test_e2e: 104 | name: e2e tests 105 | runs-on: ubuntu-latest 106 | steps: 107 | - uses: actions/checkout@v2 108 | with: 109 | submodules: recursive 110 | - name: build test image 111 | run: docker build -t mquery_tests:latest -f src/e2etests/Dockerfile . 112 | - name: run web with docker compose 113 | run: docker compose up --build -d web --wait 114 | - name: init the database 115 | run: docker compose exec -it -w /usr/src/app/src/ web alembic upgrade head 116 | - name: run the rest of the code 117 | run: docker compose up -d 118 | - name: run e2e tests 119 | run: docker run --net mquery_default -v $(readlink -f ./samples):/mnt/samples mquery_tests 120 | - name: get run logs 121 | if: always() 122 | run: docker compose logs 123 | - name: stop docker compose 124 | if: always() 125 | run: docker compose down 126 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | venv/ 3 | .vscode 4 | .idea 5 | .mypy_cache 6 | samples/ 7 | index/ 8 | .env 9 | mquery.ini 10 | mquery.egg-info/ 11 | package-lock.json 12 | build/ 13 | postgres/ 14 | node_modules/ 15 | dist/ 16 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contribute to mquery 2 | 3 | ## How to start? 4 | 5 | Great, so you want to join the development! 6 | 7 | First, [set up a development environment](INSTALL.md#docker-compose-dev). 8 | Since you're going to write new code, use the `docker-compose.dev.yml` method. 9 | 10 | If everything went right, the system should be accessible at `http://localhost:80`. 11 | 12 | ## Development workflow 13 | 14 | We use a standard [github fork workflow]( 15 | https://gist.github.com/Chaser324/ce0505fbed06b947d962). 16 | 17 | 1. Fork the repository. 18 | 19 | 2. Create a new branch. The name does not matter, but the recommended format 20 | is `feature/xxx` or `fix/yyy`. 21 | 22 | 3. Work on your changes! 23 | 24 | 4. If possible, add a test or two to the `src/tests/` directory. You can run 25 | them with: 26 | 27 | ```bash 28 | $ docker build -t mquery_tests -f ./src/tests/Dockerfile . 29 | $ docker run mquery_tests 30 | ``` 31 | 32 | 5. We run many code formatters and linters on the code to ensure expected 33 | code quality. Your code will be checked automatically when you submit your 34 | pull request, but you can also run the checks locally to speed-up review: 35 | 36 | - **Important:** we use [black](https://pypi.org/project/black/) for Python: 37 | 38 | ```bash 39 | $ pip3 install black==22.3.0 40 | $ black src/ 41 | ``` 42 | 43 | - Important: we use [prettier](httpss://prettier.io/) for Javascript/React: 44 | 45 | ```bash 46 | $ npm install -g prettier@2.0.4 47 | $ prettier --write src/mqueryfront/ 48 | ``` 49 | 50 | - Verify that there are no type errors with [mypy](http://mypy-lang.org/): 51 | 52 | ```bash 53 | $ pip install mypy==1.2.0 54 | $ mypy src 55 | ``` 56 | 57 | - Find other style issues with [flake8](https://flake8.pycqa.org): 58 | 59 | ```bash 60 | $ pip install flake8==6.0.0 61 | $ flake8 src 62 | ``` 63 | 64 | (Lifehack: you can also plug them into your editor as on-save action). 65 | 66 | You don't have to do this for every PR, but docstrings in this projects 67 | were also formatted using: 68 | 69 | ```bash 70 | pydocstringformatter --summary-quotes-same-line --max-summary-lines 10 --max-line-length=79 --no-split-summary-body -w src/ 71 | ``` 72 | 73 | 6. When you feel like you're done, commit the files: 74 | 75 | ```bash 76 | $ git add -A 77 | $ git status # check if included files match your expectations 78 | $ git diff --cached # check the diff for forgotten debug prints etc 79 | $ git commit # commit the changes (don't forget to add a commit message) 80 | ``` 81 | 82 | 7. Push changes to your fork: 83 | 84 | ``` 85 | $ git push origin [your_branch_name] 86 | ``` 87 | 88 | 8. Create a pull request with your changes from the GitHub interface and 89 | wait for review. 90 | 91 | That's it! Thank you very much, we appreciate you help. 92 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | # How to install mquery 2 | 3 | Supported installation and deployment methods: 4 | 5 | - [docker-compose.yml](#docker-compose) 6 | - [docker-compose.dev.yml](#docker-compose-dev) 7 | - [bare metal](#bare-metal) 8 | - [kubernetes](#kubernetes) 9 | 10 | ## Docker compose 11 | 12 | Quick build & run with [docker compose](https://docs.docker.com/compose/). 13 | 14 | ``` 15 | git clone https://github.com/CERT-Polska/mquery.git 16 | cd mquery 17 | mkdir samples 18 | # now set SAMPLES_DIR to a directory with your files, and INDEX_DIR to 19 | # empty directory for database files to live in. By default database will 20 | # expect files in ./samples directory, and keep index in ./index. 21 | vim .env 22 | docker compose up --scale daemon=3 # this will take a while 23 | ``` 24 | 25 | - Good for testing mquery and production deployments on a single server 26 | - Poor for development 27 | 28 | ## Docker compose (dev) 29 | 30 | Docker compose dedicated for developers. 31 | 32 | ``` 33 | git clone https://github.com/CERT-Polska/mquery.git 34 | cd mquery 35 | # now set SAMPLES_DIR to a directory with your files, and INDEX_DIR to 36 | # empty directory for database files to live in. By default database will 37 | # expect files in ./samples directory, and keep index in ./index. 38 | vim .env 39 | docker compose -f docker-compose.dev.yml up # this will take a while 40 | ``` 41 | 42 | - Good for development - all file changes will be picked up automatically. 43 | - Poor for production 44 | 45 | ## Bare metal 46 | 47 | - Read [How to: Install mquery natively (without docker)](how-to/install-native.md) 48 | 49 | ## Kubernetes 50 | 51 | Not strictly supported, but production ready - it's used internally in a 52 | few places, including CERT.PL. 53 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | graft src/mqueryfront/dist 2 | include src/alembic.ini 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mquery: Blazingly fast Yara queries for malware analysts 2 | 3 | Ever had trouble searching for malware samples? Mquery is an 4 | analyst-friendly web GUI to look through your digital warehouse. 5 | 6 | It can be used to search through terabytes of malware in a blink of an eye: 7 | 8 |  9 | 10 | Under the hood we use our [UrsaDB](https://github.com/CERT-Polska/ursadb), to 11 | accelerate yara queries with ngrams. 12 | 13 | ## Demo 14 | 15 | Public instance will be created soon, stay tuned... 16 | 17 | ## Quickstart 18 | 19 | ### 1. Install and start 20 | 21 | The easiest way to do this is with `docker compose`: 22 | 23 | ``` 24 | git clone https://github.com/CERT-Polska/mquery.git 25 | cd mquery 26 | vim .env # optional - change samples and index directory locations 27 | docker compose up --scale daemon=3 # building the images will take a while 28 | ``` 29 | 30 | The web interface should be available at `http://localhost`. 31 | 32 |  33 | 34 | *(For more installation options see the [installation manual](./INSTALL.md) ).* 35 | 36 | ### 2. Add the files 37 | 38 | Put some files in the `SAMPLES_DIR` (by default `./samples` in the repository, 39 | configurable with variable in the `.env` file). 40 | 41 | ### 3. Index your collection 42 | 43 | Launch ursacli in docker: 44 | 45 | ```shell 46 | docker compose exec ursadb ursacli 47 | [2023-06-14 17:20:24.940] [info] Connecting to tcp://localhost:9281 48 | [2023-06-14 17:20:24.942] [info] Connected to UrsaDB v1.5.1+98421d7 (connection id: 006B8B46B6) 49 | ursadb> 50 | ``` 51 | 52 | Index the samples with n-grams of your choosing (this may take a while!) 53 | 54 | ```shell 55 | ursadb> index "/mnt/samples" with [gram3, text4, wide8, hash4]; 56 | [2023-06-14 17:29:27.672] [info] Working... 1% (109 / 8218) 57 | [2023-06-14 17:29:28.674] [info] Working... 1% (125 / 8218) 58 | ... 59 | [2023-06-14 17:37:40.265] [info] Working... 99% (8217 / 8218) 60 | [2023-06-14 17:37:41.266] [info] Working... 99% (8217 / 8218) 61 | { 62 | "result": { 63 | "status": "ok" 64 | }, 65 | "type": "ok" 66 | } 67 | ``` 68 | 69 | 70 | This will scan samples directory for all new files and index them. You can 71 | monitor the progress in the `tasks` window on the left: 72 | 73 |  74 | 75 | You have to repeat this process every time you want to add new files! 76 | 77 | After indexing is over, you will notice new datasets: 78 | 79 |  80 | 81 | This is a good and easy way to start, but if you have a big collection you are 82 | strongly encouraged to read [indexing page](./docs/indexing.md) in the manual. 83 | 84 | ### 4. Test it 85 | 86 | Now your files should be searchable - insert any Yara rule into the search 87 | window and click `Query`. Just for demonstration, I've indexed the source code 88 | of this application and tested this Yara rule: 89 | 90 | ``` 91 | rule mquery_exceptions { 92 | strings: $a = "Exception" 93 | condition: all of them 94 | } 95 | ``` 96 | 97 |  98 | 99 | ## Learn more 100 | 101 | See the [documentation](./docs/README.md) to learn more. Probably a good idea 102 | if you plan a bigger deployment. 103 | 104 | You can also read the hosted version here: 105 | [cert-polska.github.io/mquery/docs](https://cert-polska.github.io/mquery/docs). 106 | 107 | ## Installation 108 | 109 | See the 110 | [installation instruction](./INSTALL.md). 111 | 112 | ## Contributing 113 | 114 | If you want to contribute, see our dedicated 115 | [documentation for contributors](./CONTRIBUTING.md). 116 | 117 | ## Changelog 118 | 119 | Learn how the project has changed by reading our 120 | [release log](./RELEASES.md). 121 | 122 | ## Contact 123 | 124 | If you have any problems, bugs or feature requests related to mquery, you're 125 | encouraged to create a GitHub issue. 126 | 127 | You can chat about this project on Discord: 128 | 129 | [](https://discord.gg/3FcP6GQNzd) 130 | 131 | If you have questions unsuitable for Github or discord, you can email CERT.PL 132 | (info@cert.pl) directly. 133 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-hacker -------------------------------------------------------------------------------- /deploy/docker/daemon.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10 2 | 3 | RUN apt update; apt install -y cmake 4 | 5 | # mquery and plugin requirements 6 | COPY requirements.txt src/plugins/requirements-*.txt /tmp/ 7 | RUN ls /tmp/requirements*.txt | xargs -i,, pip --no-cache-dir install -r ,, 8 | 9 | COPY requirements.txt setup.py MANIFEST.in /app/ 10 | COPY src /app/src/ 11 | RUN pip install /app 12 | 13 | ENTRYPOINT ["mquery-daemon"] 14 | -------------------------------------------------------------------------------- /deploy/docker/dev.daemon.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10 2 | 3 | WORKDIR /usr/src/app/src 4 | 5 | RUN apt update; apt install -y cmake 6 | 7 | # mquery and plugin requirements 8 | COPY requirements.txt src/plugins/requirements-*.txt /tmp/ 9 | RUN ls /tmp/requirements*.txt | xargs -i,, pip --no-cache-dir install -r ,, 10 | RUN pip install watchdog 11 | 12 | CMD pip install -e /usr/src/app && watchmedo auto-restart --pattern=*.py --recursive -- mquery-daemon 13 | -------------------------------------------------------------------------------- /deploy/docker/dev.frontend.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:18 AS build 2 | 3 | RUN npm install -g serve 4 | COPY src/mqueryfront /app 5 | WORKDIR /app 6 | RUN yarn install --legacy-peer-deps 7 | CMD ["npm", "start"] 8 | -------------------------------------------------------------------------------- /deploy/docker/dev.web.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10 2 | 3 | WORKDIR /usr/src/app/src 4 | 5 | RUN apt update; apt install -y cmake 6 | 7 | # mquery and plugin requirements 8 | COPY requirements.txt src/plugins/requirements-*.txt /tmp/ 9 | RUN ls /tmp/requirements*.txt | xargs -i,, pip --no-cache-dir install -r ,, 10 | 11 | CMD pip install -e /usr/src/app && uvicorn mquery.app:app --host 0.0.0.0 --port 5000 --reload 12 | -------------------------------------------------------------------------------- /deploy/docker/web.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:18 AS build 2 | 3 | RUN npm install -g serve 4 | COPY src/mqueryfront /app 5 | WORKDIR /app 6 | RUN yarn install --legacy-peer-deps && npm run build 7 | 8 | FROM python:3.10 9 | 10 | RUN apt update; apt install -y cmake 11 | 12 | # mquery and plugin requirements 13 | COPY requirements.txt src/plugins/requirements-*.txt /tmp/ 14 | RUN ls /tmp/requirements*.txt | xargs -i,, pip --no-cache-dir install -r ,, 15 | 16 | COPY requirements.txt setup.py MANIFEST.in /usr/src/app/ 17 | COPY src /usr/src/app/src/ 18 | COPY --from=build "/app/dist" "/usr/src/app/src/mqueryfront/dist" 19 | RUN pip3 install /usr/src/app 20 | CMD ["uvicorn", "mquery.app:app", "--host", "0.0.0.0", "--port", "5000"] 21 | -------------------------------------------------------------------------------- /docker-compose.dev.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | dev-frontend: 4 | build: 5 | context: . 6 | dockerfile: deploy/docker/dev.frontend.Dockerfile 7 | ports: 8 | - "80:3000" 9 | links: 10 | - dev-web 11 | volumes: 12 | - ./src/mqueryfront/src:/app/src 13 | depends_on: 14 | - "dev-web" 15 | dev-web: 16 | build: 17 | context: . 18 | dockerfile: deploy/docker/dev.web.Dockerfile 19 | ports: 20 | - "5000:5000" 21 | depends_on: 22 | redis: 23 | condition: service_started 24 | postgres: 25 | condition: service_healthy 26 | volumes: 27 | - "${SAMPLES_DIR}:/mnt/samples" 28 | - .:/usr/src/app 29 | environment: 30 | - "REDIS_HOST=redis" 31 | - "MQUERY_BACKEND=tcp://ursadb:9281" 32 | - "DATABASE_URL=postgresql://postgres:password@postgres:5432/mquery" 33 | healthcheck: 34 | test: ["CMD-SHELL", "curl --fail http://localhost:5000/api/server || exit 1"] 35 | interval: 5s 36 | timeout: 5s 37 | retries: 30 38 | dev-daemon: 39 | build: 40 | context: . 41 | dockerfile: deploy/docker/dev.daemon.Dockerfile 42 | links: 43 | - redis 44 | - ursadb 45 | - postgres 46 | volumes: 47 | - "${SAMPLES_DIR}:/mnt/samples" 48 | - .:/usr/src/app 49 | depends_on: 50 | dev-web: 51 | condition: service_healthy 52 | redis: 53 | condition: service_started 54 | ursadb: 55 | condition: service_started 56 | postgres: 57 | condition: service_healthy 58 | environment: 59 | - "REDIS_HOST=redis" 60 | - "MQUERY_BACKEND=tcp://ursadb:9281" 61 | - "DATABASE_URL=postgresql://postgres:password@postgres:5432/mquery" 62 | ursadb: 63 | image: mqueryci/ursadb:v1.5.1 64 | ports: 65 | - "9281:9281" 66 | volumes: 67 | - "${SAMPLES_DIR}:/mnt/samples" 68 | - "${INDEX_DIR}:/var/lib/ursadb" 69 | user: "0:0" 70 | keycloak: 71 | image: quay.io/keycloak/keycloak:15.1.0 72 | ports: 73 | - "8080:8080" 74 | environment: 75 | - KEYCLOAK_USER=admin 76 | - KEYCLOAK_PASSWORD=admin 77 | - DB_VENDOR=h2 78 | redis: 79 | image: redis 80 | postgres: 81 | image: postgres 82 | restart: always 83 | volumes: 84 | - ${POSTGRES_DIR}:/var/lib/postgresql/data 85 | environment: 86 | - POSTGRES_PASSWORD=password 87 | - POSTGRES_DB=mquery 88 | healthcheck: 89 | test: ["CMD-SHELL", "pg_isready -U postgres"] 90 | interval: 5s 91 | timeout: 5s 92 | retries: 5 93 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | web: 4 | restart: always 5 | build: 6 | context: . 7 | dockerfile: deploy/docker/web.Dockerfile 8 | ports: 9 | - "80:5000" 10 | depends_on: 11 | redis: 12 | condition: service_started 13 | postgres: 14 | condition: service_healthy 15 | volumes: 16 | - "${SAMPLES_DIR}:/mnt/samples" 17 | environment: 18 | - "REDIS_HOST=redis" 19 | - "MQUERY_BACKEND=tcp://ursadb:9281" 20 | - "DATABASE_URL=postgresql://postgres:password@postgres:5432/mquery" 21 | healthcheck: 22 | test: ["CMD-SHELL", "curl --fail http://localhost:5000/api/server || exit 1"] 23 | interval: 5s 24 | timeout: 5s 25 | retries: 30 26 | daemon: 27 | restart: always 28 | build: 29 | context: . 30 | dockerfile: deploy/docker/daemon.Dockerfile 31 | links: 32 | - redis 33 | - ursadb 34 | - postgres 35 | volumes: 36 | - "${SAMPLES_DIR}:/mnt/samples" 37 | depends_on: 38 | web: 39 | condition: service_healthy 40 | redis: 41 | condition: service_started 42 | ursadb: 43 | condition: service_started 44 | postgres: 45 | condition: service_healthy 46 | environment: 47 | - "REDIS_HOST=redis" 48 | - "MQUERY_BACKEND=tcp://ursadb:9281" 49 | - "DATABASE_URL=postgresql://postgres:password@postgres:5432/mquery" 50 | ursadb: 51 | restart: always 52 | image: mqueryci/ursadb:v1.5.1 53 | ports: 54 | - "127.0.0.1:9281:9281" 55 | volumes: 56 | - "${SAMPLES_DIR}:/mnt/samples" 57 | - "${INDEX_DIR}:/var/lib/ursadb" 58 | user: "0:0" 59 | redis: 60 | restart: always 61 | image: redis 62 | postgres: 63 | image: postgres 64 | restart: always 65 | volumes: 66 | - ${POSTGRES_DIR}:/var/lib/postgresql/data 67 | environment: 68 | - POSTGRES_PASSWORD=password 69 | - POSTGRES_DB=mquery 70 | healthcheck: 71 | test: ["CMD-SHELL", "pg_isready -U postgres"] 72 | interval: 5s 73 | timeout: 5s 74 | retries: 5 75 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # mquery documentation 2 | 3 | ## User guide 4 | 5 | - [Installation](../INSTALL.md): Installation instruction. 6 | - [Configuration](./configuration.md): Additional configuration options. 7 | - [Components](./components.md): More detailed description of mquery components. 8 | - [Indexing](./indexing.md): Indexing files is one of the most important things in 9 | mquery. In simple cases it can be solved without leaving the web UI, but 10 | many things will require more advanced approach. Read this if you need to 11 | index a considerable number of files. 12 | - [How to write good yara rules](./goodyara.md): How to write YARA rules that 13 | will work well in mquery. 14 | - [Yara support and limitations](./yara.md): Explains how mquery 15 | accelerates queries, what will, and what won't work. 16 | - [Utility scripts](./utils): Mquery ships with a few useful scripts. 17 | Here you can find documentation for them. 18 | - [For future contributors](../CONTRIBUTING.md): How to contribute. 19 | 20 | ## How to... 21 | 22 | - [Install mquery natively (without docker)](how-to/install-native.md) 23 | - [Integrate mquery with s3](how-to/integrate-with-s3.md) 24 | 25 | 26 | ## Relevant [ursadb's documentation](https://cert-polska.github.io/ursadb) 27 | 28 | Ursadb is the backend doing the heavy lifting for mquery. If you need to work with large 29 | datasets, it's a very useful read. It is also a prerequisite for understanding 30 | many things in mquery. 31 | 32 | - [Index types](https://cert-polska.github.io/ursadb/docs/indextypes.html): Picking 33 | index types you need is an important decision that's hard to change later. 34 | - [Datasets](https://cert-polska.github.io/ursadb/docs/datasets.html): Introduction to 35 | datasets. 36 | - [Performance and limits](https://cert-polska.github.io/ursadb/docs/limits.html): 37 | Read in case you're not sure if Ursadb can handle your collection. 38 | - [On-disk format](https://cert-polska.github.io/ursadb/docs/ondiskformat.html): 39 | Ursadb index format is relatively simple - reading this may be useful for 40 | advanced users. 41 | 42 | ## Advanced topics 43 | 44 | Relevant for people who want to run mquery in production or on a a bigger scale. 45 | 46 | - [Security](./security.md): Security considerations for hardening your mquery instance. 47 | - [Distributed mquery](./distributed.md): For users that want to run mquery on 48 | more than one machine. 49 | - [On-disk format](./ondiskformat.md): Read if you want to understand ursadb's on 50 | disk format (spoiler: many files are just JSON and can be inspected with vim). 51 | - [Plugin system](./plugins.md): For filtering, processing and tagging files. 52 | - [Database format](./database.md): Information about the data stored in the database. 53 | - [Redis applications](./redis.md): Of historical interest, redis is used only for [rq](https://python-rq.org/) now. 54 | - [User management](./users.md): Control and manage access to your mquery instance. 55 | - [API](./api.md): Mquery exposes a simple API that you may use for your automation. 56 | -------------------------------------------------------------------------------- /docs/api.md: -------------------------------------------------------------------------------- 1 | # API 2 | 3 | Launch mquery and browse to `/docs`. 4 | 5 |  6 | 7 | Mquery has a stable API that you can use to automate your work. It also 8 | has internal API endpoints, that are used by the website - you can use them, 9 | but they may change in the future without warning. 10 | 11 | ## Mquery API example 12 | 13 | The [mquery](https://github.com/CERT-Polska/mquery/utils/mquery.py) script 14 | is a good starting point for your scripts. It uses only stable endpoints. 15 | The interesting part of the script is: 16 | 17 | ```python 18 | #!/usr/bin/python3 19 | 20 | import time 21 | import requests 22 | 23 | mquery_server = "http://localhost" # hardcoded to localhost 24 | 25 | yara_rule = """ 26 | rule test { 27 | strings: $a = "Exception" 28 | condition: $a 29 | } 30 | """ # hardcoded yara rule 31 | 32 | job_id = requests.post( 33 | f"{mquery_server}/api/query", 34 | json={ 35 | "method": "query", 36 | "raw_yara": yara_rule, 37 | "taint": None, 38 | "method": "query", 39 | }, 40 | ).json()["query_hash"] 41 | 42 | offset = 0 43 | while True: 44 | out = requests.get( 45 | f"{mquery_server}/api/matches/{job_id}?offset={offset}&limit=50" 46 | ).json() 47 | 48 | for match in out["matches"]: 49 | file_path = match["file"] 50 | sha256 = match["meta"]["sha256"]["display_text"] 51 | print(sha256) 52 | with open(sha256, "wb") as outf: 53 | content = requests.get( 54 | f"{mquery_server}/api/download", 55 | {"job_id": job_id, "ordinal": offset, "file_path": file_path,}, 56 | ).content 57 | outf.write(content) 58 | offset += 1 59 | 60 | if out["job"]["status"] in ["cancelled", "failed", "done", "removed"]: 61 | break 62 | 63 | time.sleep(1.0) 64 | ``` 65 | 66 | ## Ursadb API example 67 | 68 | Many things that are not exposed by mquery can be done using the underlying 69 | Ursadb's API. Just remember that you shouldn't allow unauthenticated access to it, 70 | because a malicious user can use the API to index and query arbitrary files on the 71 | server's drive. 72 | 73 | See [ursadb's syntax documentation](https://cert-polska.github.io/ursadb/docs/syntax.html) 74 | to learn more. 75 | 76 | [`compactall`](https://github.com/CERT-Polska/mquery/blob/master/src/utils/compactall.py) 77 | is a very simple example of this type of integration: 78 | 79 | ```python 80 | ursa = UrsaDb("tcp://localhost:9281") 81 | last_datasets = None 82 | while True: 83 | datasets = set( 84 | ursa.execute_command("topology;")["result"]["datasets"].keys() 85 | ) 86 | logging.info("%s datasets left.", len(datasets)) 87 | if datasets == last_datasets: 88 | # Nothing can be compacted anymore 89 | break 90 | 91 | start = time.time() 92 | ursa.execute_command(f"compact smart;") 93 | end = time.time() 94 | logging.info("Compacting took %s seconds...", (end - start)) 95 | last_datasets = datasets 96 | ``` 97 | 98 | Where the `Ursadb` object is just a very thin wrapper around zeromq: 99 | 100 | ```python 101 | def make_socket(self, recv_timeout: int = 2000) -> zmq.Context: 102 | context = zmq.Context() 103 | socket = context.socket(zmq.REQ) 104 | socket.setsockopt(zmq.LINGER, 0) 105 | socket.setsockopt(zmq.RCVTIMEO, recv_timeout) 106 | socket.connect(self.backend) 107 | return socket 108 | 109 | def execute_command(self, command: str) -> Json: 110 | socket = self.make_socket(recv_timeout=-1) 111 | socket.send_string(command) 112 | response = socket.recv_string() 113 | socket.close() 114 | return json.loads(response) 115 | ``` 116 | -------------------------------------------------------------------------------- /docs/client-urls.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/client-urls.png -------------------------------------------------------------------------------- /docs/components.md: -------------------------------------------------------------------------------- 1 | # components 2 | 3 | There are four main independent components in mquery deployments: 4 | 5 | - web frontend 6 | - daemons (also called "agents" or "workers") 7 | - UrsaDB (backend database) 8 | - Redis 9 | 10 | In a normal situation, there is one web frontend, one shared Redis database, and 11 | for every UrsaDB instance, there is a group of one or more agents dedicated to 12 | it: 13 | 14 |  15 | 16 | In most small-to-medium sized deployments there is only one ursadb instance, and 17 | all workers are assigned to it. 18 | 19 | More complex configurations are possible, for example, consider this deployment 20 | with internal and external sample index: 21 | 22 |  23 | 24 | ### Web frontend (mquery) 25 | 26 | Mquery is a standard web application written in Python (using the 27 | Fastapi framework). 28 | 29 | It talks with Redis directly, and schedules tasks for the workers. 30 | 31 | For some administrative tasks (like checking ongoing tasks) it also sends requests 32 | to UrsaDB directly. 33 | 34 | ### Mquery daemon (agent) 35 | 36 | The workhorse of the entire setup. There must be at least one daemon for 37 | every UrsaDB instance. Daemon's tasks include querying the assigned UrsaDB 38 | instance for samples and running YARA rules on candidate samples. 39 | 40 | ### Redis 41 | 42 | It's a shared database that's used for several purposes. The main one being communication between daemon and mquery. 43 | It is also used as a task queue for jobs scheduled for agents. And it's also 44 | used for persistent storage of job results. Finally, it's utilized to store 45 | plugin configuration and job cache for agents. To sum up, it's pretty overloaded 46 | and used to store everything as the main database of the project. 47 | 48 | ### UrsaDB 49 | 50 | Ursadb is a [separate project](https://github.com/CERT-Polska/ursadb), used in 51 | mquery as a backend database to optimise YARA rules. Ursadb itself has no 52 | understanding of YARA syntax, so all rules are first transpiled by mquery to 53 | a simpler Ursadb syntax before a query. 54 | -------------------------------------------------------------------------------- /docs/config-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/config-example.png -------------------------------------------------------------------------------- /docs/database.md: -------------------------------------------------------------------------------- 1 | # How the data is stored in the database 2 | 3 | Currently, Postgres database is used to keep entities used by mquery. 4 | 5 | With the default docker configuration, you can connect to the database 6 | using the following oneliner: 7 | 8 | ``` 9 | sudo docker compose exec postgres psql -U postgres --dbname mquery 10 | ``` 11 | 12 | The followiung tables are defined: 13 | 14 | ### Job table (`job`) 15 | 16 | Jobs are stored in the `job` table. 17 | 18 | Every job has ID, which is a random 12 character string like 2OV8UP4DUOWK (the 19 | same string that is visible in urls like http://mquery.net/query/2OV8UP4DUOWK). 20 | 21 | Possible job statuses are: 22 | 23 | * "new" - Completely new job. 24 | * "inprogress" - Job that is in progress. 25 | * "done" - Job that was finished 26 | * "cancelled" - Job was cancelled by the user or failed 27 | * "removed" - Job is hidden in the UI (TODO: remove this status in the future) 28 | 29 | ### Job agent table (`jobagent`) 30 | 31 | It is a simple mapping between job_id and agent_id. Additionaly, it keeps track 32 | of how many tasks are still in progress for a given agent assigned to this job. 33 | 34 | ### Match table (`match`) 35 | 36 | Matches represent files matched to a job. 37 | 38 | Every match represents a single yara rule match (along with optional attributes 39 | from plugins). 40 | 41 | ### AgentGroup table (`agentgroup`) 42 | 43 | When scheduling jobs, mquery needs to know how many agent groups are 44 | waiting for tasks. In most cases there is only one, but in distributed environment 45 | there may be more. 46 | 47 | ### Configuration table (`configentry`) 48 | 49 | Represented by models.configentry.ConfigEntry class. 50 | 51 | For example, `plugin:TestPlugin` will store configuration for `TestPlugin` as a 52 | dictionary. All plugins can expose their own arbitrary config options. 53 | 54 | As a special case `plugin:Mquery` keeps configuration of the mquery itself. 55 | -------------------------------------------------------------------------------- /docs/distribured-rev.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/distribured-rev.png -------------------------------------------------------------------------------- /docs/distributed.md: -------------------------------------------------------------------------------- 1 | # Distributed mquery 2 | 3 | It's possible to use mquery in a distributed way: 4 | 5 |  6 | 7 | Every agent will talk with its dedicated Ursadb instance, queries will 8 | be run on all servers in parallel and results will be merged. 9 | 10 | In fact, the default stock configuration is really "distributed", just with 11 | a single agent running on the same machine. 12 | 13 | It's also possible to do it "in reverse" - a single Ursadb instance can 14 | be connected to multiple mquery servers: 15 | 16 |  17 | 18 | All of mquery's core functionality works in this setup, but there are some 19 | deployment problems. For example, web interface assumes that the samples are stored 20 | (or mounted) at the same location as in the workers. If that's not the case, 21 | it can be corrected with custom [plugins](./plugins.md). 22 | -------------------------------------------------------------------------------- /docs/distributed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/distributed.png -------------------------------------------------------------------------------- /docs/how-to/release-a-new-version.md: -------------------------------------------------------------------------------- 1 | # How to release a new mquery version 2 | 3 | I never remember the exact steps, so let's document this once and for all. 4 | 5 | This should be automated someday. 6 | 7 | * [ ] Bump the version 8 | * [ ] pick a new version, make sure you follow semver 9 | * [ ] Update `setup.py` 10 | * [ ] Update `src/utils.py` 11 | * [ ] `git checkout -b release/v1.6.0; git push origin release/v1.6.0` 12 | * [ ] Create a PR and merge it 13 | * [ ] Push a new git tag 14 | * [ ] `git tag v1.6.0` 15 | * [ ] `git push origin v1.6.0` 16 | * [ ] Publish a release on github 17 | * [ ] Build the package 18 | * [ ] `python3 setup.py build` 19 | * [ ] `python3 setup.py sdist` 20 | * [ ] Test the package 21 | * [ ] Publish the pypi package 22 | * [ ] `twine upload dist/mquery-1.6.0.tar.gz` 23 | -------------------------------------------------------------------------------- /docs/indexed-datasets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/indexed-datasets.png -------------------------------------------------------------------------------- /docs/indexing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/indexing.png -------------------------------------------------------------------------------- /docs/interface-v1.4.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/interface-v1.4.gif -------------------------------------------------------------------------------- /docs/mquery-flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/mquery-flowchart.png -------------------------------------------------------------------------------- /docs/mquery-web-ui.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/mquery-web-ui.gif -------------------------------------------------------------------------------- /docs/new-client.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/new-client.png -------------------------------------------------------------------------------- /docs/new-realm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/new-realm.png -------------------------------------------------------------------------------- /docs/new-roles.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/new-roles.png -------------------------------------------------------------------------------- /docs/new-user.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/new-user.png -------------------------------------------------------------------------------- /docs/plugin-config.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/plugin-config.png -------------------------------------------------------------------------------- /docs/query-window.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/query-window.png -------------------------------------------------------------------------------- /docs/recent-jobs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/recent-jobs.png -------------------------------------------------------------------------------- /docs/redis.md: -------------------------------------------------------------------------------- 1 | # How the data is stored in redis 2 | 3 | In the older mquery versions, data used to be stored in Redis. In mquery 4 | version 1.4.0 the data was migrated to a postgresql - see [database](./database.md). 5 | 6 | Please note that all this is 100% internal, and shouldn't be relied on. 7 | Data format in redis can and does change between mquery releases. 8 | 9 | You can use `redis-cli` to connect to redis. With the default docker compose configuration, 10 | use `docker compose exec redis redis-cli`. 11 | 12 | Redis command documentation is pretty good and available at https://redis.io/commands/. 13 | 14 | ### Rq objects (`rq:*`) 15 | 16 | Objects used internally by https://python-rq.org/, task scheduler used by mquery. 17 | 18 | You can browse them using tools from https://python-rq.org/docs/monitoring/. 19 | -------------------------------------------------------------------------------- /docs/security.md: -------------------------------------------------------------------------------- 1 | # security 2 | 3 | ## Secure Deployment 4 | 5 | There are multiple components necessary to have a working mquery instance. 6 | Some of them require special care from a security standpoint. 7 | 8 | ### Mquery 9 | 10 | Mquery is a standard web application written in Python. By default, everyone has permission to do everything. 11 | This default configuration is unsuitable for bigger organisations or public instances. 12 | In such cases, it's recommended to enable user accounts (see [users.md](./users.md)), 13 | and disallow anonymous users or at least don't give them admin rights. 14 | 15 | ### Mquery daemon (agent) 16 | 17 | No special considerations. Every daemon process must have network 18 | access to Redis and UrsaDB. 19 | 20 | ### Redis 21 | 22 | Mquery web and daemon must have network access to Redis. No other access to 23 | the Redis database is necessary. There is no support for securing Redis 24 | with a password in the current version, so network isolation is 25 | the only way to prevent attacks. Most importantly, Redis must not 26 | be available from the public network. 27 | 28 | ### Ursadb 29 | 30 | Mquery daemons must have network access to their respective ursadb instances. 31 | Similarly to Redis, it's best to restrict network access to the UrsaDB instance. Ursadb protocol does not take malicious actors into account, and 32 | unauthenticated users can, for example, remove indexed data from the database, 33 | or cause a denial of service. 34 | 35 | In the provided docker compose files, the UrsaDB user is overridden to root by 36 | default. This is for 37 | backwards compatibility, and to simplify deployment. For production instances 38 | consider running ursadb with the default user (`ursa`, UID 1000). This means 39 | that the shared index volume must be writable by UID 1000, and samples must 40 | be readable by UID 1000. 41 | 42 | ## How to report a vulnerability 43 | 44 | There is no dedicated email for reporting a security vulnerability. Please reach out 45 | to cert@cert.pl or one of the maintainers directly. If the vulnerability is not 46 | critical, the best way to report is via a GitHub issue. 47 | -------------------------------------------------------------------------------- /docs/swagger.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/swagger.png -------------------------------------------------------------------------------- /docs/utils/README.md: -------------------------------------------------------------------------------- 1 | # Utilities 2 | 3 | Mquery ships with a few scripts to automate common tasks: 4 | 5 | - [mquery.py](./mquery.md) - Automate yara hunts and download of the results. 6 | - [index.py](./index.md) - Can be used to index large amounts of data in a 7 | reliable way. 8 | - [compactall.py](./compactall.md) - Will compact datasets in the Ursadb instance 9 | as long, as there's anything left to compact. 10 | - [nanobench.py](./nanobench.md) - A small helper script, used by the developers 11 | to benchmark Ursadb performance on a given machine and with a given configuration. 12 | - [s3index.py](./s3index.md) - Helper script for indexing samples from S3. 13 | It serves as a demonstration, not a best practice. In the current version 14 | it suffers from a performance problems, so may not be suitable for big 15 | deployments. 16 | -------------------------------------------------------------------------------- /docs/utils/compactall.md: -------------------------------------------------------------------------------- 1 | # compactall.py 2 | 3 | Will compact datasets in the Ursadb instance as long, as there's anything 4 | left to compact. 5 | 6 | ## Usage 7 | 8 | ``` 9 | $ python3 -m utils.compactall --help 10 | usage: compactall.py [-h] [--ursadb URSADB] [--mode {smart,all}] 11 | 12 | Keep the database lean. 13 | 14 | optional arguments: 15 | -h, --help show this help message and exit 16 | --ursadb URSADB URL of the ursadb instance. 17 | --mode {smart,all} Compacting mode. Force (all) or optimise for time 18 | (smart). 19 | 20 | ``` 21 | 22 | ## Example 23 | 24 | This script is very easy to use - it only needs an url of the Ursadb instance, 25 | for example `tcp://127.0.0.1:9281` (which is the default). 26 | 27 | ``` 28 | python3 -m utils.compactall --ursadb tcp://127.0.0.1:9281 29 | ``` 30 | 31 | It will start issuing merging compatible datasets with the `compact` command, 32 | and will only stop when: 33 | - There are no more compatible datasets that can be merged; 34 | - There are compatible datasets, but they can't be merged because resulting 35 | dataset would exceed size maximum configured in Ursadb. 36 | 37 | Running this script periodically probably can't help (but it may put a lot of 38 | load on the disk, so should be run when the db is not used heavily). 39 | 40 | ## Caveats 41 | 42 | This script can be stopped with Ctrl+C at any point, but the last issued 43 | command will continue running (the database will finish compacting the datasets 44 | that it started). 45 | -------------------------------------------------------------------------------- /docs/utils/index.md: -------------------------------------------------------------------------------- 1 | # index.py 2 | 3 | Can be used to index large amounts of data in a reliable way. 4 | 5 | ## Usage 6 | 7 | ``` 8 | $ python3 -m utils.index --help 9 | usage: index.py [-h] [--mode {prepare,index,prepare-and-index}] 10 | [--ursadb URSADB] [--workdir WORKDIR] [--batch BATCH] 11 | [--path PATH] [--path-mount PATH_MOUNT] 12 | [--max-file-size-mb MAX_FILE_SIZE_MB] 13 | [--type {gram3,text4,hash4,wide8}] [--tag TAGS] 14 | [--workers WORKERS] [--working-datasets WORKING_DATASETS] 15 | 16 | Reindex local files. 17 | 18 | optional arguments: 19 | -h, --help show this help message and exit 20 | --mode {prepare,index,prepare-and-index} 21 | Mode of operation. Only prepare batches, index them, 22 | or both. 23 | --ursadb URSADB URL of the ursadb instance. 24 | --workdir WORKDIR Path to a working directory. 25 | --batch BATCH Size of indexing batch. 26 | --path PATH Path of samples to be indexed. 27 | --path-mount PATH_MOUNT 28 | Path to the samples to be indexed, as seen by ursadb 29 | (if different). 30 | --max-file-size-mb MAX_FILE_SIZE_MB 31 | Maximum file size, in MB, to index. 128 By default. 32 | --type {gram3,text4,hash4,wide8} 33 | Index types. By default [gram3, text4, wide8, hash4] 34 | --tag TAGS Additional tags for indexed datasets. 35 | --workers WORKERS Number of parallel indexing jobs. 36 | --working-datasets WORKING_DATASETS 37 | Numer of working datasets (uses sane value by 38 | default). 39 | ``` 40 | 41 | ## Example 42 | 43 | 44 | Probably the most complex script shipped with mquery. See 45 | [indexing](../indexing.md) guide for complete a tutorial. Basic usage is 46 | relatively simple though. To index files with ursadb running natively, run: 47 | 48 | ``` 49 | $ python3 -m utils.index --workdir /tmp/work --path ../samples --path-mount /mnt/samples 50 | ERROR:root:Can't connect to ursadb instance at tcp://localhost:9281 51 | INFO:root:Prepare.1: load all indexed files into memory. 52 | INFO:root:Prepare.2: find all new files. 53 | INFO:root:Prepare.3: Got 1 files in 1 batches to index. 54 | INFO:root:Index.1: Determine compacting threshold. 55 | INFO:root:Index.1: Compact threshold = 84. 56 | INFO:root:Index.2: Find prepared batches. 57 | INFO:root:Index.2: Got 1 batches to run. 58 | INFO:root:Index.3: Run index commands with 2 workers. 59 | INFO:root:Index.4: Batch /tmp/work/batch_0000000000.txt done [1/1]. 60 | INFO:root:Index.5: Unlinking the workdir. 61 | INFO:root:Indexing finished. Consider compacting the database now 62 | ``` 63 | 64 | ## Caveats 65 | 66 | This script can be stopped with Ctrl+C at any point, but the last started 67 | started indexing batch will continue. 68 | 69 | Don't set `--workers` parameter to a number too big! It can cause OOM crashes. 70 | -------------------------------------------------------------------------------- /docs/utils/mquery.md: -------------------------------------------------------------------------------- 1 | # mquery.py 2 | 3 | Automate yara hunts and download of the results. 4 | 5 | ## Usage 6 | 7 | ``` 8 | $ python3 utils/mquery.py --help 9 | usage: mquery.py [-h] (--yara YARA | --job JOB) [--mquery MQUERY] 10 | [--print-filenames] [--print-matches] [--save SAVE] 11 | 12 | optional arguments: 13 | -h, --help show this help message and exit 14 | --yara YARA Yara rule to use for query 15 | --job JOB Job ID to print or download 16 | --mquery MQUERY Change mquery server address 17 | --print-filenames Also print filenames 18 | --print-matches Also print matched rules 19 | --save SAVE Download samples and save to the provided directory 20 | ``` 21 | 22 | ## Example 23 | 24 | You can provide either a yara rule (`--yara` parameter) or existing job id 25 | (`--job` parameter). The script will then create a new job or download existing 26 | one, and return all the results as a list of hashes, optionally with filenames 27 | (`--print-filenames`) and matched rules (`--print-matches`). There is also an 28 | option to save samples to a local directory (with `--save DIRNAME`). 29 | 30 | For example, to start a new job: 31 | 32 | ``` 33 | $ python3 utils/mquery.py --yara rule.yar 34 | 89b27295b3ed353e38ab67c1d21d44578461413249d28d960f1c6fb4195dbb1b 35 | dacdab7b47f0788b20d33a44500cd3396d47894f37e32d0bd54aa2dbb4e5eed0 36 | 387e6f8912fb8ded6bca4d16c464bc186ad03759529b7ba8b19a54b590c13ab1 37 | 98b7b3faab88ff62720af747195156a3694131aa2fd760753ff48b044da310d4 38 | fcc7183658c7a6f92a580e3ea4ee8f3987b58a4fec08a0a826f5aee2226cda53 39 | ed04594b5bae61d40b8da8c81d9a0cf1b4aba44144f06cca674e0ea98d691dd5 40 | 442e658f0adaf384170cddc735d86cb3d5d6f5a6932af77d4080a88551790b53 41 | b2695a80ce56561577ee5b7f31f4b3119782e4b45fad599b33c153acf202a129 42 | 0abae63ce933d3f458cd710302a800a87b67bb643a5917098ec97a820dd7232f 43 | 4cfda945446db1d2d65fcce3de5322c679ce1b26c3205fb76f2d05ed19d86bf5 44 | ``` 45 | 46 | Use existing job ID, print more information, and save files locally: 47 | 48 | ``` 49 | $ python3 utils/mquery.py --job H3PAW4YF68T0 --print-matches --save test 50 | 89b27295b3ed353e38ab67c1d21d44578461413249d28d960f1c6fb4195dbb1b test 51 | dacdab7b47f0788b20d33a44500cd3396d47894f37e32d0bd54aa2dbb4e5eed0 test 52 | 387e6f8912fb8ded6bca4d16c464bc186ad03759529b7ba8b19a54b590c13ab1 test 53 | 98b7b3faab88ff62720af747195156a3694131aa2fd760753ff48b044da310d4 test 54 | fcc7183658c7a6f92a580e3ea4ee8f3987b58a4fec08a0a826f5aee2226cda53 test 55 | ed04594b5bae61d40b8da8c81d9a0cf1b4aba44144f06cca674e0ea98d691dd5 test 56 | 442e658f0adaf384170cddc735d86cb3d5d6f5a6932af77d4080a88551790b53 test 57 | b2695a80ce56561577ee5b7f31f4b3119782e4b45fad599b33c153acf202a129 test 58 | 0abae63ce933d3f458cd710302a800a87b67bb643a5917098ec97a820dd7232f test 59 | 4cfda945446db1d2d65fcce3de5322c679ce1b26c3205fb76f2d05ed19d86bf5 test 60 | 61 | $ ls test | wc -l 62 | 10 63 | ``` 64 | -------------------------------------------------------------------------------- /docs/utils/nanobench.md: -------------------------------------------------------------------------------- 1 | # nanobench.py 2 | 3 | Run performance tests on a local ursadb instance 4 | 5 | ## Usage 6 | 7 | ``` 8 | $ python3 utils/nanobench.py --help 9 | usage: nanobench.py [-h] [--ursadb URSADB] [--level {nano,mini,heavyduty}] 10 | 11 | Simple benchmark utility. 12 | 13 | optional arguments: 14 | -h, --help show this help message and exit 15 | --ursadb URSADB URL of the ursadb instance. 16 | --level {nano,mini,heavyduty} 17 | How hard should the tests be. 18 | 19 | ``` 20 | 21 | ## Example 22 | 23 | ``` 24 | $ python3 utils/nanobench.py 25 | select "abc"; average 10.954 files: 110 26 | select "abcdefgh"; average 2.150 files: 0 27 | select "abc" & "qwe" & "zxc"; average 1.060 files: 0 28 | select "abc" | "qwe" | "zxc"; average 6.789 files: 285 29 | select min 1 of ("abc", "qwe", "zxc"); average 1.128 files: 285 30 | ... 31 | ``` 32 | -------------------------------------------------------------------------------- /docs/utils/s3index.md: -------------------------------------------------------------------------------- 1 | # s3index.py 2 | 3 | Can be used to index files from S3. 4 | 5 | This script was created to accompany the [S3 integration guide](../how-to/integrate-with-s3.md). It will download files from S3 temporarily and index 6 | them with ursadb. After indexing local copies of samples are deleted. 7 | 8 | WARNING: this script is still in the development, and usage may change 9 | in the future. 10 | 11 | ## Usage 12 | 13 | ``` 14 | $ python3 -m utils.s3index --help 15 | usage: s3index.py [-h] [--mode {prepare,index,prepare-and-index}] [--ursadb URSADB] --s3-url S3_URL --s3-secret-key S3_SECRET_KEY --s3-access-key S3_ACCESS_KEY --s3-bucket S3_BUCKET [--s3-secure S3_SECURE] 16 | [--workdir WORKDIR] [--batch BATCH] [--type {gram3,text4,hash4,wide8}] [--tag TAGS] [--workers WORKERS] [--working-datasets WORKING_DATASETS] 17 | 18 | Index files from s3. 19 | 20 | options: 21 | -h, --help show this help message and exit 22 | --mode {prepare,index,prepare-and-index} 23 | Mode of operation. Only prepare batches, index them, or both. 24 | --ursadb URSADB URL of the ursadb instance. 25 | --s3-url S3_URL S3 server url. 26 | --s3-secret-key S3_SECRET_KEY 27 | Secret key. 28 | --s3-access-key S3_ACCESS_KEY 29 | Access key. 30 | --s3-bucket S3_BUCKET 31 | Bucket name. 32 | --s3-secure S3_SECURE 33 | Use https (1 or 0)?. 34 | --workdir WORKDIR Path to a working directory. 35 | --batch BATCH Size of indexing batch. 36 | --type {gram3,text4,hash4,wide8} 37 | Index types. By default [gram3, text4, wide8, hash4] 38 | --tag TAGS Additional tags for indexed datasets. 39 | --workers WORKERS Number of parallel indexing jobs. 40 | --working-datasets WORKING_DATASETS 41 | Numer of working datasets (uses sane value by default). 42 | ``` 43 | 44 | ## Example 45 | 46 | Only --workdir and S3-related parameters are required: 47 | 48 | ```shell 49 | $ python3 -m utils.s3index \ 50 | --workdir /root/mquery_tmp \ 51 | --s3-url localhost:9000 \ 52 | --s3-secret-key YOUR-SECRET-KEY \ 53 | --s3-access-key YOUR-ACCESS-KEY \ 54 | --s3-bucket mquery \ 55 | --s3-secure 0 56 | ``` 57 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | exclude = 'venv/,mqueryfront/' 3 | line-length = 79 4 | target_version = ['py36'] 5 | -------------------------------------------------------------------------------- /pyrightconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": [ 3 | "src" 4 | ], 5 | "pythonVersion": "3.10", 6 | "pythonPlatform": "Linux", 7 | "executionEnvironments": [{ 8 | "root": "src" 9 | } 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /requirements.plain.txt: -------------------------------------------------------------------------------- 1 | fastapi 2 | pydantic 3 | pyzmq 4 | redis 5 | requests 6 | typing-extensions 7 | uvicorn 8 | yara-python 9 | yaramod 10 | cachetools 11 | pyjwt[crypto] 12 | typed-config 13 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | alembic==1.11.1 2 | annotated-types==0.7.0 3 | anyio==4.6.0 4 | asgiref==3.4.1 5 | cachetools==5.5.0 6 | certifi==2024.8.30 7 | cffi==1.17.1 8 | charset-normalizer==3.3.2 9 | click==8.1.7 10 | cryptography==43.0.1 11 | Deprecated==1.2.13 12 | fastapi==0.115.0 13 | h11==0.14.0 14 | idna==3.10 15 | psycopg2==2.9.9 16 | pycparser==2.22 17 | pydantic==1.10.18 18 | pydantic_core==2.23.4 19 | PyJWT[crypto]==2.9.0 20 | pyzmq==26.2.0 21 | redis==5.0.8 22 | requests==2.32.2 23 | rq==1.16.2 24 | sniffio==1.3.1 25 | sqlmodel==0.0.11 26 | starlette==0.38.6 27 | typed-config==2.0.3 28 | types-requests==2.32.0.20240914 29 | typing_extensions==4.12.2 30 | urllib3==2.2.3 31 | uvicorn==0.30.6 32 | wrapt==1.16.0 33 | yara-python==4.5.1 34 | yaramod==3.23.0 35 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E501,W503,E203 3 | exclude = mqueryfront/ 4 | 5 | [mypy] 6 | python_version = 3.10 7 | 8 | [mypy-yaramod.*] 9 | ignore_missing_imports = True 10 | 11 | [mypy-zmq.*] 12 | ignore_missing_imports = True 13 | 14 | [mypy-uvicorn.*] 15 | ignore_missing_imports = True 16 | 17 | [mypy-mwdblib.*] 18 | ignore_missing_imports = True 19 | 20 | [mypy-cachetools.*] 21 | ignore_missing_imports = True 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup 4 | 5 | setup( 6 | name="mquery", 7 | version="1.6.0", 8 | description="Blazingly fast Yara queries for malware analysts", 9 | packages=[ 10 | "mquery", 11 | "mquery.lib", 12 | "mquery.plugins", 13 | "mquery.models", 14 | "mquery.migrations", 15 | "mquery.migrations.versions", 16 | ], 17 | package_dir={"mquery": "src"}, 18 | include_package_data=True, 19 | install_requires=open("requirements.txt").read().splitlines(), 20 | scripts=[ 21 | "src/scripts/mquery-daemon", 22 | ], 23 | classifiers=[ 24 | "Programming Language :: Python", 25 | "Operating System :: OS Independent", 26 | ], 27 | ) 28 | -------------------------------------------------------------------------------- /src/.dockerignore: -------------------------------------------------------------------------------- 1 | .pytest_cache 2 | .mypy_cache 3 | __pycache__ 4 | mqueryfront/ 5 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/src/__init__.py -------------------------------------------------------------------------------- /src/alembic.ini: -------------------------------------------------------------------------------- 1 | [alembic] 2 | script_location = %(here)s/migrations 3 | prepend_sys_path = . 4 | version_path_separator = os # Use os.pathsep. Default configuration used for new projects. 5 | 6 | [post_write_hooks] 7 | # format using "black" - use the console_scripts runner, against the "black" entrypoint 8 | # hooks = black 9 | # black.type = console_scripts 10 | # black.entrypoint = black 11 | # black.options = -l 79 REVISION_SCRIPT_FILENAME 12 | 13 | # Logging configuration 14 | [loggers] 15 | keys = root,sqlalchemy,alembic 16 | 17 | [handlers] 18 | keys = console 19 | 20 | [formatters] 21 | keys = generic 22 | 23 | [logger_root] 24 | level = WARN 25 | handlers = console 26 | qualname = 27 | 28 | [logger_sqlalchemy] 29 | level = WARN 30 | handlers = 31 | qualname = sqlalchemy.engine 32 | 33 | [logger_alembic] 34 | level = INFO 35 | handlers = 36 | qualname = alembic 37 | 38 | [handler_console] 39 | class = StreamHandler 40 | args = (sys.stderr,) 41 | level = NOTSET 42 | formatter = generic 43 | 44 | [formatter_generic] 45 | format = %(levelname)-5.5s [%(name)s] %(message)s 46 | datefmt = %H:%M:%S 47 | -------------------------------------------------------------------------------- /src/config.py: -------------------------------------------------------------------------------- 1 | from typedconfig import Config, key, section, group_key # type: ignore 2 | from typedconfig.source import EnvironmentConfigSource, IniFileConfigSource # type: ignore 3 | import os 4 | 5 | 6 | @section("redis") 7 | class RedisConfig(Config): 8 | # Hostname of a configured redis instance. 9 | host = key(cast=str, required=False, default="localhost") 10 | # Port of a configured redis instance. 11 | port = key(cast=int, required=False, default=6379) 12 | 13 | 14 | @section("database") 15 | class DatabaseConfig(Config): 16 | # URL of a configured sql database. 17 | url = key( 18 | cast=str, required=False, default="postgresql://localhost:5432/mquery" 19 | ) 20 | 21 | 22 | @section("rq") 23 | class RqConfig(Config): 24 | # Timeout value for rq jobs. 25 | job_timeout = key(cast=int, required=False, default=300) 26 | 27 | 28 | @section("mquery") 29 | class MqueryConfig(Config): 30 | # URL to a UrsaDB instance. 31 | backend = key(cast=str, required=False, default="tcp://127.0.0.1:9281") 32 | # List of plugin specifications separated by comma, for example 33 | # "plugins.archive:GzipPlugin, plugins.custom:CustomPlugin" 34 | plugins = key(cast=str, required=False, default="") 35 | # Maximum number of yara-scanned files per query (0 means no limit). 36 | yara_limit = key(cast=int, required=False, default=0) 37 | # Html code to be displayed on the about page. 38 | about = key(cast=str, required=False, default="") 39 | 40 | 41 | class AppConfig(Config): 42 | redis = group_key(RedisConfig) 43 | database = group_key(DatabaseConfig) 44 | rq = group_key(RqConfig) 45 | mquery = group_key(MqueryConfig) 46 | 47 | 48 | def _config_sources(): 49 | return [ 50 | EnvironmentConfigSource(), 51 | IniFileConfigSource("mquery.ini", must_exist=False), 52 | IniFileConfigSource( 53 | os.path.expanduser("~/.config/mquery/mquery.ini"), must_exist=False 54 | ), 55 | IniFileConfigSource("/etc/mquery/mquery.ini", must_exist=False), 56 | ] 57 | 58 | 59 | app_config = AppConfig(sources=_config_sources()) 60 | -------------------------------------------------------------------------------- /src/daemon.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | from multiprocessing import Process 4 | import logging 5 | from redis import Redis 6 | from rq import Connection, Worker # type: ignore 7 | 8 | from .util import setup_logging 9 | from . import tasks 10 | from .config import app_config 11 | 12 | 13 | def start_worker(args: argparse.Namespace, process_index: int) -> None: 14 | setup_logging() 15 | logging.info( 16 | "Agent [%s] running (process %s)...", args.group_id, process_index 17 | ) 18 | 19 | with Connection(Redis(app_config.redis.host, app_config.redis.port)): 20 | w = Worker([args.group_id]) 21 | w.work() 22 | 23 | 24 | def main() -> None: 25 | """Spawns a new agent process. Use argv if you want to use a different 26 | group_id (it's `default` by default). 27 | """ 28 | 29 | parser = argparse.ArgumentParser(description="Start mquery daemon.") 30 | parser.add_argument( 31 | "group_id", 32 | help="Name of the agent group to join to", 33 | nargs="?", 34 | default="default", 35 | ) 36 | parser.add_argument( 37 | "--scale", 38 | type=int, 39 | help="Specifies the number of concurrent processes to use.", 40 | default=1, 41 | ) 42 | 43 | args = parser.parse_args() 44 | 45 | # Initial registration of the worker group. 46 | # The goal is to make the web UI aware of this worker and its configuration. 47 | tasks.make_agent(args.group_id).register() 48 | 49 | if args.scale > 1: 50 | children = [ 51 | Process(target=start_worker, args=(args, i)) 52 | for i in range(args.scale) 53 | ] 54 | for child in children: 55 | child.start() 56 | for child in children: 57 | child.join() 58 | else: 59 | start_worker(args, 0) 60 | 61 | 62 | if __name__ == "__main__": 63 | main() 64 | -------------------------------------------------------------------------------- /src/e2etests/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10 2 | RUN apt update; apt install -y cmake 3 | RUN pip3 install pytest==7.1.2 4 | COPY requirements.txt /app/requirements.txt 5 | RUN pip3 install -r /app/requirements.txt 6 | COPY . /app/ 7 | RUN pip3 install /app/ 8 | WORKDIR /app 9 | CMD ["python", "-m", "pytest", "--log-cli-level=INFO", "/app/src/e2etests/"] 10 | -------------------------------------------------------------------------------- /src/e2etests/README.md: -------------------------------------------------------------------------------- 1 | # E2E tests 2 | 3 | Slow test suite, used as a sanity test for mquery matching capabilities. 4 | Hopefully it won't allow us to merge a completely broken version. 5 | 6 | They are automatically built and ran on every commit in the CI pipeline, 7 | so you don't have to. But if you want to test locally, run (from the main directory of mquery): 8 | 9 | ```bash 10 | $ rm -r e2e-state 11 | $ docker compose -f docker-compose.e2etests-local.yml up --build --exit-code-from e2etests-local 12 | ``` 13 | -------------------------------------------------------------------------------- /src/e2etests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/src/e2etests/__init__.py -------------------------------------------------------------------------------- /src/e2etests/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest==6.2.5 2 | pytest-timeout==1.3.4 3 | requests==2.23.0 4 | -------------------------------------------------------------------------------- /src/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/src/lib/__init__.py -------------------------------------------------------------------------------- /src/lib/ursadb.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | import zmq # type: ignore 4 | from typing import Dict, Any, List, Optional 5 | 6 | 7 | Json = Dict[str, Any] 8 | 9 | 10 | class PopResult: 11 | def __init__( 12 | self, 13 | was_locked: bool, 14 | files: List[str], 15 | iterator_pos: int, 16 | total_files: int, 17 | ) -> None: 18 | self.was_locked = was_locked 19 | self.files = files 20 | self.iterator_pos = iterator_pos 21 | self.total_files = total_files 22 | 23 | @property 24 | def iterator_empty(self) -> bool: 25 | """Is it safe to remove the iterator after this operation?""" 26 | if self.was_locked: 27 | return False 28 | return self.iterator_pos >= self.total_files 29 | 30 | def __str__(self) -> str: 31 | """Pretty-print iterator showing all important information.""" 32 | tag = "[locked] " if self.was_locked else "" 33 | pos = f"{self.iterator_pos}/{self.total_files}" 34 | return f"iterator {tag}with {len(self.files)} files ({pos})" 35 | 36 | 37 | class UrsaDb: 38 | def __init__(self, backend: str) -> None: 39 | self.backend = backend 40 | 41 | def __execute(self, command: str, recv_timeout: int = 2000) -> Json: 42 | context = zmq.Context() 43 | try: 44 | socket = context.socket(zmq.REQ) 45 | socket.setsockopt(zmq.LINGER, 0) 46 | socket.setsockopt(zmq.RCVTIMEO, recv_timeout) 47 | socket.connect(self.backend) 48 | socket.send_string(command) 49 | return json.loads(socket.recv_string()) 50 | finally: 51 | socket.close() 52 | 53 | def query( 54 | self, 55 | query: str, 56 | taints: List[str] | None = None, 57 | dataset: Optional[str] = None, 58 | ) -> Json: 59 | command = "select " 60 | if taints: 61 | taints_str = '", "'.join(taints) 62 | taints_whole_str = f'["{taints_str}"]' 63 | command += f"with taints {taints_whole_str} " 64 | if dataset: 65 | command += f'with datasets ["{dataset}"] ' 66 | command += f"into iterator {query};" 67 | 68 | start = time.perf_counter() 69 | res = self.__execute(command, recv_timeout=-1) 70 | end = time.perf_counter() 71 | 72 | if "error" in res: 73 | error = res.get("error", {}).get("message", "(no message)") 74 | return {"error": f"ursadb failed: {error}"} 75 | 76 | return { 77 | "time": (end - start), 78 | "iterator": res["result"]["iterator"], 79 | "file_count": res["result"]["file_count"], 80 | } 81 | 82 | def pop(self, iterator: str, count: int) -> PopResult: 83 | res = self.__execute(f'iterator "{iterator}" pop {count};', -1) 84 | 85 | if "error" in res: 86 | if res["error"].get("retry", False): 87 | # iterator locked, try again in a sec 88 | return PopResult(True, [], 0, 0) 89 | # return empty file set - this will clear the job from the db! 90 | return PopResult(False, [], 0, 0) 91 | 92 | res = res["result"] 93 | iterator_pos = res["iterator_position"] 94 | total_files = res["total_files"] 95 | return PopResult(False, res["files"], iterator_pos, total_files) 96 | 97 | def status(self) -> Json: 98 | return self.__execute("status;") 99 | 100 | def topology(self) -> Json: 101 | return self.__execute("topology;") 102 | 103 | def execute_command(self, command: str) -> Json: 104 | return self.__execute(command, -1) 105 | -------------------------------------------------------------------------------- /src/metadata.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | from typing import Any, Dict, Optional 3 | 4 | from .db import Database 5 | 6 | DEFAULT_CACHE_EXPIRE_TIME = 60 * 60 * 12 7 | 8 | Metadata = Dict[str, Any] 9 | MetadataPluginConfig = Dict[str, str] 10 | 11 | 12 | class MetadataPlugin(ABC): 13 | # Can extract() results be cached? Currently unused. 14 | cacheable: bool = False 15 | # Overrides default cache expire time 16 | cache_expire_time: int = DEFAULT_CACHE_EXPIRE_TIME 17 | # Configuration keys required by plugin with description as a value 18 | config_fields: Dict[str, str] = {} 19 | # can this plugin be used for prefiltering mwdb results? 20 | is_filter = False 21 | # can this plugin be used for extracting metadata? 22 | is_extractor = False 23 | 24 | def __init__(self, db: Database, config: MetadataPluginConfig) -> None: 25 | self.db = db 26 | for key in self.config_fields.keys(): 27 | if key not in config or not config[key]: 28 | raise KeyError( 29 | f"Required configuration key '{key}' is not set" 30 | ) 31 | 32 | @classmethod 33 | def get_name(cls) -> str: 34 | return cls.__name__ 35 | 36 | def identify(self, matched_fname: str) -> Optional[str]: 37 | """Returns file unique identifier based on matched path. 38 | 39 | Intended to be overridden by plugin. 40 | """ 41 | return matched_fname 42 | 43 | def run(self, matched_fname: str, current_meta: Metadata) -> Metadata: 44 | """Extracts metadata and updates cache. This method can only be run if 45 | the plugin sets `is_extractor` to True. 46 | 47 | :param matched_fname: Filename of the processed file 48 | :param current_meta: Metadata that will be updated 49 | :return: New metadata 50 | """ 51 | identifier = self.identify(matched_fname) 52 | if identifier is None: 53 | return {} 54 | 55 | return self.extract(identifier, matched_fname, current_meta) 56 | 57 | def filter(self, matched_fname: str, file_path: str) -> Optional[str]: 58 | """Checks if the file is a good candidate for further processing, 59 | and fix the file path if necessary. 60 | :param matched_fname: Original file path coming from ursadb 61 | :param file_path: Current path to the file contents 62 | :return: New path to a file (may be the same path). None if the file 63 | should be discarded. 64 | """ 65 | raise NotImplementedError 66 | 67 | def cleanup(self) -> None: 68 | """Optionally, clean up after the plugin, for example remove any 69 | temporary files. Called after processing a single batch of files. 70 | """ 71 | pass 72 | 73 | def extract( 74 | self, identifier: str, matched_fname: str, current_meta: Metadata 75 | ) -> Metadata: 76 | """Extracts metadata for matched path. 77 | 78 | Intended to be overridden by plugin, if is_extractor is True. 79 | 80 | :param identifier: File identifier returned by overridable 81 | :py:meth:`MetadataPlugin.identify` method 82 | :param matched_fname: Matched file path 83 | :param current_meta: Metadata extracted so far by dependencies 84 | :return: Metadata object. If you can't extract metadata for current file, 85 | return empty dict. 86 | """ 87 | raise NotImplementedError 88 | -------------------------------------------------------------------------------- /src/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/src/migrations/__init__.py -------------------------------------------------------------------------------- /src/migrations/env.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import create_engine 2 | from alembic import context 3 | from sqlmodel import SQLModel 4 | from logging.config import fileConfig 5 | 6 | from mquery.config import app_config # type: ignore 7 | from mquery.models.agentgroup import AgentGroup # type: ignore # noqa 8 | from mquery.models.configentry import ConfigEntry # type: ignore # noqa 9 | from mquery.models.job import Job # type: ignore # noqa 10 | from mquery.models.match import Match # type: ignore # noqa 11 | from mquery.models.queuedfile import QueuedFile # type: ignore # noqa 12 | 13 | 14 | target_metadata = SQLModel.metadata 15 | 16 | 17 | if context.config.config_file_name is not None: 18 | fileConfig(context.config.config_file_name) 19 | 20 | 21 | def run_migrations_online() -> None: 22 | connectable = create_engine(app_config.database.url) 23 | with connectable.connect() as connection: 24 | context.configure( 25 | connection=connection, target_metadata=target_metadata 26 | ) 27 | with context.begin_transaction(): 28 | context.run_migrations() 29 | 30 | 31 | run_migrations_online() 32 | -------------------------------------------------------------------------------- /src/migrations/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | Revision ID: ${up_revision} 3 | Revises: ${down_revision | comma,n} 4 | Create Date: ${create_date} 5 | """ 6 | from alembic import op 7 | import sqlalchemy as sa 8 | import sqlmodel 9 | ${imports if imports else ""} 10 | 11 | # revision identifiers, used by Alembic. 12 | revision = ${repr(up_revision)} 13 | down_revision = ${repr(down_revision)} 14 | branch_labels = ${repr(branch_labels)} 15 | depends_on = ${repr(depends_on)} 16 | 17 | 18 | def upgrade() -> None: 19 | ${upgrades if upgrades else "pass"} 20 | 21 | 22 | def downgrade() -> None: 23 | ${downgrades if downgrades else "pass"} 24 | -------------------------------------------------------------------------------- /src/migrations/versions/6b495d5a4855_cascade_jobagent_match_and_del_removed_status_create_enum_jobstatus.py: -------------------------------------------------------------------------------- 1 | """add jobstatus 2 | Revision ID: 6b495d5a4855 3 | Revises: dbb81bd4d47f 4 | Create Date: 2024-10-15 08:17:30.036531 5 | """ 6 | from alembic import op 7 | import sqlalchemy as sa 8 | 9 | 10 | # revision identifiers, used by Alembic. 11 | revision = "6b495d5a4855" 12 | down_revision = "dbb81bd4d47f" 13 | branch_labels = None 14 | depends_on = None 15 | 16 | job_status = sa.Enum( 17 | "done", "new", "cancelled", "processing", name="jobstatus" 18 | ) 19 | 20 | 21 | def upgrade() -> None: 22 | op.drop_constraint("jobagent_job_id_fkey", "jobagent", type_="foreignkey") 23 | op.create_foreign_key( 24 | constraint_name="jobagent_job_id_fkey", 25 | source_table="jobagent", 26 | referent_table="job", 27 | local_cols=["job_id"], 28 | remote_cols=["internal_id"], 29 | ondelete="CASCADE", 30 | ) 31 | 32 | op.drop_constraint("match_job_id_fkey", "match", type_="foreignkey") 33 | op.create_foreign_key( 34 | constraint_name="match_job_id_fkey", 35 | source_table="match", 36 | referent_table="job", 37 | local_cols=["job_id"], 38 | remote_cols=["internal_id"], 39 | ondelete="CASCADE", 40 | ) 41 | 42 | op.execute("DELETE FROM job WHERE status = 'removed';") 43 | 44 | job_status.create(op.get_bind()) 45 | op.alter_column( 46 | "job", 47 | "status", 48 | existing_type=sa.VARCHAR(), 49 | type_=job_status, 50 | postgresql_using="status::jobstatus", 51 | nullable=True, 52 | ) 53 | 54 | 55 | def downgrade() -> None: 56 | op.alter_column( 57 | "job", 58 | "status", 59 | existing_type=job_status, 60 | type_=sa.VARCHAR(), 61 | nullable=False, 62 | ) 63 | 64 | op.execute("DROP TYPE IF EXISTS jobstatus") 65 | 66 | op.drop_constraint("jobagent_job_id_fkey", "jobagent", type_="foreignkey") 67 | op.create_foreign_key( 68 | constraint_name="jobagent_job_id_fkey", 69 | source_table="jobagent", 70 | referent_table="job", 71 | local_cols=["job_id"], 72 | remote_cols=["internal_id"], 73 | ) 74 | 75 | op.drop_constraint("match_job_id_fkey", "match", type_="foreignkey") 76 | op.create_foreign_key( 77 | constraint_name="match_job_id_fkey", 78 | source_table="match", 79 | referent_table="job", 80 | local_cols=["job_id"], 81 | remote_cols=["internal_id"], 82 | ) 83 | -------------------------------------------------------------------------------- /src/migrations/versions/702d19cfa063_add_queuedfile.py: -------------------------------------------------------------------------------- 1 | """add queuedfile 2 | Revision ID: 702d19cfa063 3 | Revises: 6b495d5a4855 4 | Create Date: 2024-11-18 22:13:51.562315 5 | """ 6 | from alembic import op 7 | import sqlalchemy as sa 8 | import sqlmodel 9 | 10 | 11 | # revision identifiers, used by Alembic. 12 | revision = "702d19cfa063" 13 | down_revision = "6b495d5a4855" 14 | branch_labels = None 15 | depends_on = None 16 | 17 | 18 | def upgrade() -> None: 19 | # ### commands auto generated by Alembic - please adjust! ### 20 | op.create_table( 21 | "queuedfile", 22 | sa.Column("index_types", sa.ARRAY(sa.String()), nullable=False), 23 | sa.Column("tags", sa.ARRAY(sa.String()), nullable=False), 24 | sa.Column("id", sa.Integer(), nullable=False), 25 | sa.Column( 26 | "ursadb_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False 27 | ), 28 | sa.Column("path", sqlmodel.sql.sqltypes.AutoString(), nullable=False), 29 | sa.Column("created_at", sa.DateTime(), nullable=False), 30 | sa.PrimaryKeyConstraint("id"), 31 | ) 32 | # ### end Alembic commands ### 33 | 34 | 35 | def downgrade() -> None: 36 | # ### commands auto generated by Alembic - please adjust! ### 37 | op.drop_table("queuedfile") 38 | # ### end Alembic commands ### 39 | -------------------------------------------------------------------------------- /src/migrations/versions/cbbba858deb0_init.py: -------------------------------------------------------------------------------- 1 | """Init 2 | Revision ID: cbbba858deb0 3 | Revises: 4 | Create Date: 2024-02-15 16:52:45.261139. 5 | """ 6 | from alembic import op 7 | import sqlalchemy as sa 8 | import sqlmodel 9 | 10 | 11 | revision = "cbbba858deb0" 12 | down_revision = None 13 | branch_labels = None 14 | depends_on = None 15 | 16 | 17 | def upgrade() -> None: 18 | op.create_table( 19 | "agentgroup", 20 | sa.Column("plugins_spec", sa.JSON(), nullable=True), 21 | sa.Column("active_plugins", sa.ARRAY(sa.String()), nullable=True), 22 | sa.Column("name", sqlmodel.sql.sqltypes.AutoString(), nullable=False), 23 | sa.Column( 24 | "ursadb_url", sqlmodel.sql.sqltypes.AutoString(), nullable=False 25 | ), 26 | sa.Column("id", sa.Integer(), nullable=False), 27 | sa.PrimaryKeyConstraint("id"), 28 | ) 29 | op.create_table( 30 | "configentry", 31 | sa.Column("id", sa.Integer(), nullable=False), 32 | sa.Column( 33 | "plugin", sqlmodel.sql.sqltypes.AutoString(), nullable=False 34 | ), 35 | sa.Column("key", sqlmodel.sql.sqltypes.AutoString(), nullable=False), 36 | sa.Column("value", sqlmodel.sql.sqltypes.AutoString(), nullable=False), 37 | sa.PrimaryKeyConstraint("id"), 38 | ) 39 | op.create_table( 40 | "job", 41 | sa.Column("taints", sa.ARRAY(sa.String()), nullable=True), 42 | sa.Column("id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), 43 | sa.Column( 44 | "status", sqlmodel.sql.sqltypes.AutoString(), nullable=False 45 | ), 46 | sa.Column("error", sqlmodel.sql.sqltypes.AutoString(), nullable=True), 47 | sa.Column( 48 | "rule_name", sqlmodel.sql.sqltypes.AutoString(), nullable=False 49 | ), 50 | sa.Column( 51 | "rule_author", sqlmodel.sql.sqltypes.AutoString(), nullable=False 52 | ), 53 | sa.Column( 54 | "raw_yara", sqlmodel.sql.sqltypes.AutoString(), nullable=False 55 | ), 56 | sa.Column("submitted", sa.Integer(), nullable=False), 57 | sa.Column("finished", sa.Integer(), nullable=True), 58 | sa.Column("files_limit", sa.Integer(), nullable=False), 59 | sa.Column( 60 | "reference", sqlmodel.sql.sqltypes.AutoString(), nullable=False 61 | ), 62 | sa.Column("files_processed", sa.Integer(), nullable=False), 63 | sa.Column("files_matched", sa.Integer(), nullable=False), 64 | sa.Column("files_in_progress", sa.Integer(), nullable=False), 65 | sa.Column("total_files", sa.Integer(), nullable=False), 66 | sa.Column("files_errored", sa.Integer(), nullable=False), 67 | sa.Column("datasets_left", sa.Integer(), nullable=False), 68 | sa.Column("total_datasets", sa.Integer(), nullable=False), 69 | sa.Column("agents_left", sa.Integer(), nullable=False), 70 | sa.Column("internal_id", sa.Integer(), nullable=False), 71 | sa.PrimaryKeyConstraint("internal_id"), 72 | ) 73 | op.create_table( 74 | "match", 75 | sa.Column("meta", sa.JSON(), nullable=True), 76 | sa.Column("matches", sa.ARRAY(sa.String()), nullable=True), 77 | sa.Column("id", sa.Integer(), nullable=False), 78 | sa.Column("job_id", sa.Integer(), nullable=False), 79 | sa.Column("file", sqlmodel.sql.sqltypes.AutoString(), nullable=False), 80 | sa.ForeignKeyConstraint( 81 | ["job_id"], 82 | ["job.internal_id"], 83 | ), 84 | sa.PrimaryKeyConstraint("id"), 85 | ) 86 | 87 | 88 | def downgrade() -> None: 89 | op.drop_table("match") 90 | op.drop_table("job") 91 | op.drop_table("configentry") 92 | op.drop_table("agentgroup") 93 | -------------------------------------------------------------------------------- /src/migrations/versions/dbb81bd4d47f_add_jobagent.py: -------------------------------------------------------------------------------- 1 | """add jobagent 2 | Revision ID: dbb81bd4d47f 3 | Revises: cbbba858deb0 4 | Create Date: 2024-05-29 13:13:03.980030 5 | """ 6 | from alembic import op 7 | import sqlalchemy as sa 8 | 9 | 10 | # revision identifiers, used by Alembic. 11 | revision = "dbb81bd4d47f" 12 | down_revision = "cbbba858deb0" 13 | branch_labels = None 14 | depends_on = None 15 | 16 | 17 | def upgrade() -> None: 18 | # ### commands auto generated by Alembic - please adjust! ### 19 | op.create_table( 20 | "jobagent", 21 | sa.Column("id", sa.Integer(), nullable=False), 22 | sa.Column("task_in_progress", sa.Integer(), nullable=False), 23 | sa.Column("job_id", sa.Integer(), nullable=False), 24 | sa.Column("agent_id", sa.Integer(), nullable=False), 25 | sa.ForeignKeyConstraint( 26 | ["agent_id"], 27 | ["agentgroup.id"], 28 | ), 29 | sa.ForeignKeyConstraint( 30 | ["job_id"], 31 | ["job.internal_id"], 32 | ), 33 | sa.PrimaryKeyConstraint("id"), 34 | ) 35 | # ### end Alembic commands ### 36 | 37 | 38 | def downgrade() -> None: 39 | # ### commands auto generated by Alembic - please adjust! ### 40 | op.drop_table("jobagent") 41 | # ### end Alembic commands ### 42 | -------------------------------------------------------------------------------- /src/migrations/versions/f623e1057b00_added_context_column_into_match_table.py: -------------------------------------------------------------------------------- 1 | """Added context column into match table 2 | Revision ID: f623e1057b00 3 | Revises: 6b495d5a4855 4 | Create Date: 2024-11-13 15:14:14.618258 5 | """ 6 | from alembic import op 7 | import sqlalchemy as sa 8 | 9 | 10 | # revision identifiers, used by Alembic. 11 | revision = "f623e1057b00" 12 | down_revision = "702d19cfa063" 13 | branch_labels = None 14 | depends_on = None 15 | 16 | 17 | def upgrade() -> None: 18 | # ### commands auto generated by Alembic - please adjust! ### 19 | op.add_column("match", sa.Column("context", sa.JSON(), nullable=False)) 20 | # ### end Alembic commands ### 21 | 22 | 23 | def downgrade() -> None: 24 | # ### commands auto generated by Alembic - please adjust! ### 25 | op.drop_column("match", "context") 26 | # ### end Alembic commands ### 27 | -------------------------------------------------------------------------------- /src/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/src/models/__init__.py -------------------------------------------------------------------------------- /src/models/agentgroup.py: -------------------------------------------------------------------------------- 1 | from sqlmodel import SQLModel, Field, Column, ARRAY, String, JSON, Relationship 2 | from typing import Union, List, Dict 3 | from ..models.jobagent import JobAgent 4 | 5 | 6 | class AgentGroupView(SQLModel): 7 | name: str 8 | ursadb_url: str 9 | plugins_spec: Dict[str, Dict[str, str]] = Field(sa_column=Column(JSON)) 10 | active_plugins: List[str] = Field(sa_column=Column(ARRAY(String))) 11 | 12 | 13 | class AgentGroup(AgentGroupView, table=True): 14 | """Agent group is a group of processes working on a single 15 | file group, with a shared storage, and a single backing ursadb. 16 | """ 17 | 18 | id: Union[int, None] = Field(default=None, primary_key=True) 19 | jobs: List["JobAgent"] = Relationship(back_populates="agent") 20 | -------------------------------------------------------------------------------- /src/models/configentry.py: -------------------------------------------------------------------------------- 1 | from sqlmodel import Field, SQLModel 2 | from typing import Union 3 | 4 | 5 | class ConfigEntry(SQLModel, table=True): 6 | id: Union[int, None] = Field(default=None, primary_key=True) 7 | plugin: str 8 | key: str 9 | value: str 10 | -------------------------------------------------------------------------------- /src/models/job.py: -------------------------------------------------------------------------------- 1 | import enum 2 | 3 | from sqlalchemy.dialects import postgresql 4 | 5 | from sqlmodel import SQLModel, Field, ARRAY, String, Column, Relationship 6 | from typing import Optional, List, Union, TYPE_CHECKING 7 | 8 | if TYPE_CHECKING: 9 | from ..models.match import Match 10 | from ..models.jobagent import JobAgent 11 | 12 | 13 | class JobStatus(enum.Enum): 14 | done = "done" 15 | new = "new" 16 | cancelled = "cancelled" 17 | processing = "processing" 18 | 19 | 20 | class JobView(SQLModel): 21 | """Public fields of mquery jobs.""" 22 | 23 | __table_args__ = {"extend_existing": True} 24 | 25 | id: str 26 | status: JobStatus = Field(sa_column=Column(postgresql.ENUM(JobStatus, name="jobstatus"))) # type: ignore 27 | error: Optional[str] 28 | rule_name: str 29 | rule_author: str 30 | raw_yara: str 31 | submitted: int 32 | finished: Optional[int] 33 | files_limit: int 34 | reference: str 35 | files_processed: int 36 | files_matched: int 37 | files_in_progress: int 38 | total_files: int 39 | files_errored: int 40 | taints: List[str] = Field(sa_column=Column(ARRAY(String))) 41 | datasets_left: int 42 | total_datasets: int 43 | agents_left: int 44 | 45 | class Config: 46 | arbitrary_types_allowed = True 47 | 48 | 49 | class Job(JobView, table=True): 50 | """Job object in the database. Internal ID is an implementation detail.""" 51 | 52 | internal_id: Union[int, None] = Field(default=None, primary_key=True) 53 | 54 | matches: List["Match"] = Relationship(back_populates="job") 55 | agents: List["JobAgent"] = Relationship(back_populates="job") 56 | -------------------------------------------------------------------------------- /src/models/jobagent.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import ForeignKey 2 | from sqlmodel import SQLModel, Field, Relationship, Column 3 | from typing import Union, TYPE_CHECKING 4 | 5 | if TYPE_CHECKING: 6 | from ..models.match import Job 7 | from ..models.agentgroup import AgentGroup 8 | 9 | 10 | class JobAgent(SQLModel, table=True): 11 | """Information about job relating to a specific agent group.""" 12 | 13 | id: Union[int, None] = Field(default=None, primary_key=True) 14 | task_in_progress: int 15 | 16 | job_id: int = Field( 17 | sa_column=Column( 18 | ForeignKey("job.internal_id", ondelete="CASCADE"), nullable=False 19 | ), 20 | ) 21 | job: "Job" = Relationship(back_populates="agents") 22 | 23 | agent_id: int = Field(foreign_key="agentgroup.id") 24 | agent: "AgentGroup" = Relationship(back_populates="jobs") 25 | -------------------------------------------------------------------------------- /src/models/match.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import ForeignKey 2 | from sqlmodel import SQLModel, Field, ARRAY, String, Column, JSON, Relationship 3 | from typing import List, Union, Dict, Any 4 | 5 | from ..models.job import Job 6 | 7 | 8 | class Match(SQLModel, table=True): 9 | """Represents a file matched to a job, along with a related metadata.""" 10 | 11 | id: Union[int, None] = Field(default=None, primary_key=True) 12 | # A file path on one of the daemons 13 | file: str 14 | # A metadata dictionary - contains various tags added by plugins 15 | meta: Dict[str, Any] = Field(sa_column=Column(JSON)) 16 | # A list of yara rules matched to this file 17 | matches: List[str] = Field(sa_column=Column(ARRAY(String))) 18 | 19 | job_id: int = Field( 20 | sa_column=Column( 21 | ForeignKey("job.internal_id", ondelete="CASCADE"), nullable=False 22 | ) 23 | ) 24 | job: Job = Relationship(back_populates="matches") 25 | context: Dict[str, Dict[str, Dict[str, str]]] = Field( 26 | sa_column=Column(JSON, nullable=False) 27 | ) 28 | -------------------------------------------------------------------------------- /src/models/queuedfile.py: -------------------------------------------------------------------------------- 1 | from sqlmodel import SQLModel, Field, ARRAY, String, Column 2 | from typing import Union, List 3 | from datetime import datetime 4 | 5 | 6 | class QueuedFile(SQLModel, table=True): 7 | """Represents a file that is waiting to be indexed.""" 8 | 9 | id: Union[int, None] = Field(default=None, primary_key=True) 10 | 11 | # ID of the ursadb ("agent group") this file belongs to. 12 | ursadb_id: str 13 | 14 | # A file path that should be indexed. This path should be 15 | # valid on the Ursadb with ID `ursadb_id` (or there should be a plugin 16 | # that knows how to process this path to get a valid file). 17 | path: str 18 | 19 | # Time when this file was added. 20 | created_at: datetime = Field( 21 | default_factory=datetime.utcnow, 22 | ) 23 | 24 | # Desired index types for this file (valid values include ["gram3", 25 | # "text4", "hash4" and "wide8"], database enum feels like an overkill). 26 | index_types: List[str] = Field( 27 | sa_column=Column(ARRAY(String), nullable=False) 28 | ) 29 | 30 | # Desired tags for this file. Warning - overusing tags will have a big 31 | # negative impact on performance, it's best to keep to a few tags at most. 32 | tags: List[str] = Field(sa_column=Column(ARRAY(String), nullable=False)) 33 | -------------------------------------------------------------------------------- /src/mqueryfront/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/ignore-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | 6 | # testing 7 | /coverage 8 | 9 | # production 10 | /build 11 | 12 | # misc 13 | .DS_Store 14 | .env.local 15 | .env.development.local 16 | .env.test.local 17 | .env.production.local 18 | 19 | npm-debug.log* 20 | yarn-debug.log* 21 | yarn-error.log* 22 | -------------------------------------------------------------------------------- /src/mqueryfront/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | 9 | 10 | 14 | 15 | 16 |16 | 17 |
18 |57 | {submittedDate} 58 |
59 |{shortRequest}
17 | ID | 53 |Connection | 54 |Request | 55 |Progress | 56 |
---|
Component | 34 |Version | 35 |
---|