├── .dockerignore ├── .env ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── push_hub.yml │ └── test_code.yml ├── .gitignore ├── CONTRIBUTING.md ├── INSTALL.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── RELEASES.md ├── _config.yml ├── deploy └── docker │ ├── daemon.Dockerfile │ ├── dev.daemon.Dockerfile │ ├── dev.frontend.Dockerfile │ ├── dev.web.Dockerfile │ └── web.Dockerfile ├── docker-compose.dev.yml ├── docker-compose.yml ├── docs ├── README.md ├── api.md ├── client-urls.png ├── components.md ├── config-example.png ├── configuration.md ├── database.md ├── distribured-rev.graphml ├── distribured-rev.png ├── distributed.graphml ├── distributed.md ├── distributed.png ├── goodyara.md ├── how-to │ ├── install-native.md │ ├── integrate-with-s3.md │ └── release-a-new-version.md ├── indexed-datasets.png ├── indexing.md ├── indexing.png ├── interface-v1.4.gif ├── mquery-flowchart.png ├── mquery-web-ui.gif ├── new-client.png ├── new-realm.png ├── new-roles.png ├── new-user.png ├── plugin-config.png ├── plugins.md ├── query-window.png ├── recent-jobs.png ├── redis.md ├── security.md ├── swagger.png ├── users.md ├── utils │ ├── README.md │ ├── compactall.md │ ├── index.md │ ├── mquery.md │ ├── nanobench.md │ └── s3index.md └── yara.md ├── pyproject.toml ├── pyrightconfig.json ├── requirements.plain.txt ├── requirements.txt ├── setup.cfg ├── setup.py └── src ├── .dockerignore ├── __init__.py ├── alembic.ini ├── app.py ├── config.py ├── daemon.py ├── db.py ├── e2etests ├── Dockerfile ├── README.md ├── __init__.py ├── requirements.txt └── test_api.py ├── lib ├── __init__.py ├── ursadb.py └── yaraparse.py ├── metadata.py ├── migrations ├── __init__.py ├── env.py ├── script.py.mako └── versions │ ├── 6b495d5a4855_cascade_jobagent_match_and_del_removed_status_create_enum_jobstatus.py │ ├── 702d19cfa063_add_queuedfile.py │ ├── cbbba858deb0_init.py │ ├── dbb81bd4d47f_add_jobagent.py │ └── f623e1057b00_added_context_column_into_match_table.py ├── models ├── __init__.py ├── agentgroup.py ├── configentry.py ├── job.py ├── jobagent.py ├── match.py └── queuedfile.py ├── mqueryfront ├── .gitignore ├── index.html ├── package.json ├── public │ ├── favicon.ico │ └── manifest.json ├── src │ ├── App.css │ ├── App.js │ ├── App.test.js │ ├── Navigation.js │ ├── about │ │ └── AboutPage.js │ ├── api.js │ ├── auth │ │ └── AuthPage.js │ ├── components │ │ ├── ActionCancel.js │ │ ├── ActionCopyToClipboard.js │ │ ├── ActionDownload.js │ │ ├── ActionRemove.js │ │ ├── ActionShowMatchContext.js │ │ ├── ErrorBoundary.js │ │ ├── ErrorPage.js │ │ ├── FilterIcon.js │ │ ├── FilteringTableHeader.js │ │ ├── FilteringThead.js │ │ ├── FilteringTitle.js │ │ ├── LoadingPage.js │ │ ├── QueryProgressBar.js │ │ ├── QueryTimer.js │ │ └── WarningPage.js │ ├── config │ │ ├── ConfigEntries.js │ │ └── ConfigPage.js │ ├── index.css │ ├── index.js │ ├── indexFiles │ │ ├── IndexClearQueueButton.js │ │ ├── IndexClearedPage.js │ │ ├── IndexMultiSelect.js │ │ ├── IndexPage.js │ │ ├── IndexProgressBar.js │ │ └── IndexSuccessPage.js │ ├── logo.svg │ ├── query │ │ ├── QueryEditParseNav.js │ │ ├── QueryField.js │ │ ├── QueryLayoutManager.js │ │ ├── QueryMatches.js │ │ ├── QueryMatchesItem.js │ │ ├── QueryMonaco.js │ │ ├── QueryNavigation.js │ │ ├── QueryPage.js │ │ ├── QueryParseStatus.js │ │ ├── QueryResultsStatus.js │ │ ├── QuerySearchNav.js │ │ ├── QuerySubmitNav.js │ │ └── yara-lang.js │ ├── recent │ │ ├── RecentPage.js │ │ ├── SearchJobItem.js │ │ └── SearchJobs.js │ ├── setupProxy.js │ ├── status │ │ ├── BackendStatus.js │ │ ├── DatabaseTopology.js │ │ ├── StatusPage.js │ │ └── VersionStatus.js │ └── utils.js ├── vite.config.js └── yarn.lock ├── plugins ├── __init__.py ├── archive.py ├── blacklist.py ├── cuckoo_analysis.py ├── cuckoo_binaries.py ├── example_plugin.py ├── example_typed_config_plugin.py ├── mwdb_uploads.py ├── requirements-mwdb_uploads.txt └── s3_plugin.py ├── schema.py ├── scripts ├── __init__.py └── mquery-daemon ├── tasks.py ├── tests ├── Dockerfile ├── README.md ├── requirements.txt ├── test_ursadb.py ├── test_yaraparse.py └── yararules │ ├── README.md │ ├── generate_yaraparse_result_files.py │ ├── test_corpus_yara_rule.py │ └── testdata │ ├── anonymous_strings.txt │ ├── anonymous_strings.yar │ ├── apt_mal_dns_hijacking_campaign_aa19_024a.txt │ ├── apt_mal_dns_hijacking_campaign_aa19_024a.yar │ ├── base64_strings.txt │ ├── base64_strings.yar │ ├── conditions.txt │ ├── conditions.yar │ ├── count_strings.txt │ ├── count_strings.yar │ ├── data_position.txt │ ├── data_position.yar │ ├── executable_entry_point.txt │ ├── executable_entry_point.yar │ ├── hex_alternatives.txt │ ├── hex_alternatives.yar │ ├── hex_alternatives_wildcards.txt │ ├── hex_alternatives_wildcards.yar │ ├── hex_jumps.txt │ ├── hex_jumps.yar │ ├── hex_simple_rule.txt │ ├── hex_simple_rule.yar │ ├── hex_wild_cards.txt │ ├── hex_wild_cards.yar │ ├── iter_over_str_occurrences.txt │ ├── iter_over_str_occurrences.yar │ ├── or_corner_case.yar.txt │ ├── or_corner_case.yar.yar │ ├── parse_exception_example.txt │ ├── parse_exception_example.yar │ ├── private_strings.txt │ ├── private_strings.yar │ ├── referencing_rules.txt │ ├── referencing_rules.yar │ ├── referencing_rules_global.txt │ ├── referencing_rules_global.yar │ ├── referencing_rules_private.txt │ ├── referencing_rules_private.yar │ ├── regex_complex.yar.txt │ ├── regex_complex.yar.yar │ ├── regex_escapes.txt │ ├── regex_escapes.yar │ ├── regex_simple.yar.txt │ ├── regex_simple.yar.yar │ ├── regular_expressions.txt │ ├── regular_expressions.yar │ ├── rule_metadata.txt │ ├── rule_metadata.yar │ ├── rule_tags.txt │ ├── rule_tags.yar │ ├── string_offsets.txt │ ├── string_offsets.yar │ ├── strings_sets.txt │ ├── strings_sets.yar │ ├── text_string.txt │ ├── text_string.yar │ ├── text_string_case_insensitive.txt │ ├── text_string_case_insensitive.yar │ ├── text_string_case_insensitive_wide.txt │ ├── text_string_case_insensitive_wide.yar │ ├── text_string_case_insensitive_wide_ascii.txt │ ├── text_string_case_insensitive_wide_ascii.yar │ ├── text_string_full_word.txt │ ├── text_string_full_word.yar │ ├── wide_ascii_character_string.txt │ ├── wide_ascii_character_string.yar │ ├── wide_character_string.txt │ ├── wide_character_string.yar │ ├── xor_range_strings.txt │ ├── xor_range_strings.yar │ ├── xor_strings.txt │ ├── xor_strings.yar │ ├── xor_wide_ascii_strings.txt │ └── xor_wide_ascii_strings.yar ├── util.py └── utils ├── compactall.py ├── index.py ├── mquery.py ├── nanobench.py └── s3index.py /.dockerignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | venv/ 3 | config.py 4 | .vscode 5 | .idea 6 | .mypy_cache 7 | samples/ 8 | index/ 9 | e2e-state 10 | mquery.ini 11 | src/mqueryfront/dist 12 | src/mqueryfront/node_modules 13 | -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | # This file is only relevant for docker-compose deployments. 2 | 3 | # Directory where your samples are stored. By default you have to copy them 4 | # to ./samples subdirectory in this repository. 5 | SAMPLES_DIR=./samples 6 | # Directory where the index files should be saved. By default ./index 7 | # subdirectory in this repository. 8 | INDEX_DIR=./index 9 | # Directory where the postgres data should be stored 10 | POSTGRES_DIR=./postgres 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | 5 | --- 6 | 7 | **Environment information** 8 | * Mquery version (from the /status page): 9 | * Ursadb version (from the /status page): 10 | * Installation method: 11 | - [ ] Generic docker compose 12 | - [ ] Dev docker compose 13 | - [ ] Native (from source) 14 | - [ ] Other (please explain) 15 | 16 | **Reproduction Steps** 17 | 18 | 19 | 20 | 21 | **Expected behaviour** 22 | 23 | 24 | 25 | 26 | **Actual behaviour the bug** 27 | 28 | 29 | 30 | 31 | **Screenshots** 32 | 33 | 34 | 35 | 36 | **Additional context** 37 | 38 | 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest your idea 4 | 5 | --- 6 | 7 | **Feature Category** 8 | 9 | - [ ] Correctness 10 | - [ ] User Interface / User Experience 11 | - [ ] Performance 12 | - [ ] Other (please explain) 13 | 14 | **Describe the problem** 15 | 16 | 17 | 18 | **Describe the solution you'd like** 19 | 20 | 21 | 22 | **Describe alternatives you've considered** 23 | 24 | 25 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | **Your checklist for this pull request** 5 | - [ ] I've read the [contributing guideline](https://github.com/CERT-Polska/mquery/blob/master/CONTRIBUTING.md). 6 | - [ ] I've tested my changes by building and running mquery, and testing changed functionality (if applicable) 7 | - [ ] I've added automated tests for my change (if applicable, optional) 8 | - [ ] I've updated documentation to reflect my change (if applicable) 9 | 10 | **What is the current behaviour?** 11 | 12 | 13 | **What is the new behaviour?** 14 | 15 | 16 | **Test plan** 17 | 18 | 19 | 21 | 22 | **Closing issues** 23 | 24 | 25 | 26 | fixes #issuenumber 27 | -------------------------------------------------------------------------------- /.github/workflows/push_hub.yml: -------------------------------------------------------------------------------- 1 | name: Build and push docker images 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | branches: 8 | - master 9 | jobs: 10 | build_web: 11 | name: Build image 12 | runs-on: ubuntu-latest 13 | env: 14 | DOCKER_BUILDKIT: 1 15 | steps: 16 | - name: Check out repository 17 | uses: actions/checkout@v2 18 | - name: Build and push the image 19 | uses: docker/build-push-action@v1.1.0 20 | with: 21 | username: ${{ secrets.DOCKER_USERNAME }} 22 | password: ${{ secrets.DOCKER_PASSWORD }} 23 | dockerfile: ./deploy/docker/web.Dockerfile 24 | repository: mqueryci/mquery-web 25 | tags: ${{ github.sha }} 26 | push: ${{ github.event_name == 'push' }} 27 | build_daemon: 28 | name: Build image 29 | runs-on: ubuntu-latest 30 | env: 31 | DOCKER_BUILDKIT: 1 32 | steps: 33 | - name: Check out repository 34 | uses: actions/checkout@v2 35 | - name: Build and push the image 36 | uses: docker/build-push-action@v1.1.0 37 | with: 38 | username: ${{ secrets.DOCKER_USERNAME }} 39 | password: ${{ secrets.DOCKER_PASSWORD }} 40 | dockerfile: ./deploy/docker/daemon.Dockerfile 41 | repository: mqueryci/mquery-daemon 42 | tags: ${{ github.sha }} 43 | push: ${{ github.event_name == 'push' }} 44 | -------------------------------------------------------------------------------- /.github/workflows/test_code.yml: -------------------------------------------------------------------------------- 1 | name: "Code testing" 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | branches: 8 | - master 9 | jobs: 10 | expression_blacklist: 11 | name: expression blacklist 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | - run: git fetch origin master 16 | - name: No "console.log" please 17 | run: git diff origin/master -- "*.js" | grep "^[+][^+]" | grep -v "noqa" | grep "console.log" || exit 0 && exit 1 18 | test_python_types: 19 | name: python mypy 20 | runs-on: ubuntu-latest 21 | steps: 22 | - uses: actions/checkout@v2 23 | - name: Setup Python 24 | uses: actions/setup-python@v1 25 | with: 26 | python-version: '3.10' 27 | - name: install mypy==1.2.0 28 | run: pip3 install mypy==1.2.0 29 | - name: install requirements 30 | run: pip3 install -r requirements.txt 31 | - name: run mypy on main files 32 | run: mypy src/app.py src/daemon.py 33 | - name: run mypy on tests 34 | run: MYPYPATH=src/ mypy src/tests/ 35 | - name: run mypy on utils 36 | run: MYPYPATH=src/ mypy src/utils/ 37 | test_python_style: 38 | name: python flake8 39 | runs-on: ubuntu-latest 40 | steps: 41 | - uses: actions/checkout@v2 42 | - name: Setup Python 43 | uses: actions/setup-python@v1 44 | with: 45 | python-version: '3.10' 46 | - name: install flake8==6.0.0 47 | run: pip3 install flake8==6.0.0 48 | - name: run flake8 49 | run: flake8 src 50 | test_python_lint: 51 | name: python black 52 | runs-on: ubuntu-latest 53 | steps: 54 | - uses: actions/checkout@v2 55 | - name: Setup Python 56 | uses: actions/setup-python@v1 57 | with: 58 | python-version: '3.10' 59 | - name: install black 60 | run: pip3 install black==22.3.0 61 | - name: run black 62 | run: black --check "src" 63 | test_js_style: 64 | name: js prettier 65 | runs-on: ubuntu-latest 66 | env: 67 | working-directory: src/mqueryfront 68 | steps: 69 | - uses: actions/checkout@v2 70 | - name: Setup nodejs 71 | uses: actions/setup-node@v1 72 | with: 73 | node-version: '12.x' 74 | - name: install prettier 75 | run: npm install -g prettier@2.0.4 76 | - name: run prettier 77 | run: prettier --tab-width=4 --check "src/**/*.js" 78 | test_js_build: 79 | name: npm build 80 | runs-on: ubuntu-latest 81 | env: 82 | working-directory: src/mqueryfront 83 | steps: 84 | - name: Setup nodejs 85 | uses: actions/setup-node@v1 86 | with: 87 | node-version: '12.x' 88 | - name: install dependencies 89 | run: yarn install 90 | - name: build 91 | run: npm build 92 | test_unit: 93 | name: unit tests 94 | runs-on: ubuntu-latest 95 | steps: 96 | - uses: actions/checkout@v2 97 | with: 98 | submodules: recursive 99 | - name: build test image 100 | run: docker build -t mquery_tests:latest -f src/tests/Dockerfile . 101 | - name: run unit tests 102 | run: docker run mquery_tests 103 | test_e2e: 104 | name: e2e tests 105 | runs-on: ubuntu-latest 106 | steps: 107 | - uses: actions/checkout@v2 108 | with: 109 | submodules: recursive 110 | - name: build test image 111 | run: docker build -t mquery_tests:latest -f src/e2etests/Dockerfile . 112 | - name: run web with docker compose 113 | run: docker compose up --build -d web --wait 114 | - name: init the database 115 | run: docker compose exec -it -w /usr/src/app/src/ web alembic upgrade head 116 | - name: run the rest of the code 117 | run: docker compose up -d 118 | - name: run e2e tests 119 | run: docker run --net mquery_default -v $(readlink -f ./samples):/mnt/samples mquery_tests 120 | - name: get run logs 121 | if: always() 122 | run: docker compose logs 123 | - name: stop docker compose 124 | if: always() 125 | run: docker compose down 126 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | venv/ 3 | .vscode 4 | .idea 5 | .mypy_cache 6 | samples/ 7 | index/ 8 | .env 9 | mquery.ini 10 | mquery.egg-info/ 11 | package-lock.json 12 | build/ 13 | postgres/ 14 | node_modules/ 15 | dist/ 16 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contribute to mquery 2 | 3 | ## How to start? 4 | 5 | Great, so you want to join the development! 6 | 7 | First, [set up a development environment](INSTALL.md#docker-compose-dev). 8 | Since you're going to write new code, use the `docker-compose.dev.yml` method. 9 | 10 | If everything went right, the system should be accessible at `http://localhost:80`. 11 | 12 | ## Development workflow 13 | 14 | We use a standard [github fork workflow]( 15 | https://gist.github.com/Chaser324/ce0505fbed06b947d962). 16 | 17 | 1. Fork the repository. 18 | 19 | 2. Create a new branch. The name does not matter, but the recommended format 20 | is `feature/xxx` or `fix/yyy`. 21 | 22 | 3. Work on your changes! 23 | 24 | 4. If possible, add a test or two to the `src/tests/` directory. You can run 25 | them with: 26 | 27 | ```bash 28 | $ docker build -t mquery_tests -f ./src/tests/Dockerfile . 29 | $ docker run mquery_tests 30 | ``` 31 | 32 | 5. We run many code formatters and linters on the code to ensure expected 33 | code quality. Your code will be checked automatically when you submit your 34 | pull request, but you can also run the checks locally to speed-up review: 35 | 36 | - **Important:** we use [black](https://pypi.org/project/black/) for Python: 37 | 38 | ```bash 39 | $ pip3 install black==22.3.0 40 | $ black src/ 41 | ``` 42 | 43 | - Important: we use [prettier](httpss://prettier.io/) for Javascript/React: 44 | 45 | ```bash 46 | $ npm install -g prettier@2.0.4 47 | $ prettier --write src/mqueryfront/ 48 | ``` 49 | 50 | - Verify that there are no type errors with [mypy](http://mypy-lang.org/): 51 | 52 | ```bash 53 | $ pip install mypy==1.2.0 54 | $ mypy src 55 | ``` 56 | 57 | - Find other style issues with [flake8](https://flake8.pycqa.org): 58 | 59 | ```bash 60 | $ pip install flake8==6.0.0 61 | $ flake8 src 62 | ``` 63 | 64 | (Lifehack: you can also plug them into your editor as on-save action). 65 | 66 | You don't have to do this for every PR, but docstrings in this projects 67 | were also formatted using: 68 | 69 | ```bash 70 | pydocstringformatter --summary-quotes-same-line --max-summary-lines 10 --max-line-length=79 --no-split-summary-body -w src/ 71 | ``` 72 | 73 | 6. When you feel like you're done, commit the files: 74 | 75 | ```bash 76 | $ git add -A 77 | $ git status # check if included files match your expectations 78 | $ git diff --cached # check the diff for forgotten debug prints etc 79 | $ git commit # commit the changes (don't forget to add a commit message) 80 | ``` 81 | 82 | 7. Push changes to your fork: 83 | 84 | ``` 85 | $ git push origin [your_branch_name] 86 | ``` 87 | 88 | 8. Create a pull request with your changes from the GitHub interface and 89 | wait for review. 90 | 91 | That's it! Thank you very much, we appreciate you help. 92 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | # How to install mquery 2 | 3 | Supported installation and deployment methods: 4 | 5 | - [docker-compose.yml](#docker-compose) 6 | - [docker-compose.dev.yml](#docker-compose-dev) 7 | - [bare metal](#bare-metal) 8 | - [kubernetes](#kubernetes) 9 | 10 | ## Docker compose 11 | 12 | Quick build & run with [docker compose](https://docs.docker.com/compose/). 13 | 14 | ``` 15 | git clone https://github.com/CERT-Polska/mquery.git 16 | cd mquery 17 | mkdir samples 18 | # now set SAMPLES_DIR to a directory with your files, and INDEX_DIR to 19 | # empty directory for database files to live in. By default database will 20 | # expect files in ./samples directory, and keep index in ./index. 21 | vim .env 22 | docker compose up --scale daemon=3 # this will take a while 23 | ``` 24 | 25 | - Good for testing mquery and production deployments on a single server 26 | - Poor for development 27 | 28 | ## Docker compose (dev) 29 | 30 | Docker compose dedicated for developers. 31 | 32 | ``` 33 | git clone https://github.com/CERT-Polska/mquery.git 34 | cd mquery 35 | # now set SAMPLES_DIR to a directory with your files, and INDEX_DIR to 36 | # empty directory for database files to live in. By default database will 37 | # expect files in ./samples directory, and keep index in ./index. 38 | vim .env 39 | docker compose -f docker-compose.dev.yml up # this will take a while 40 | ``` 41 | 42 | - Good for development - all file changes will be picked up automatically. 43 | - Poor for production 44 | 45 | ## Bare metal 46 | 47 | - Read [How to: Install mquery natively (without docker)](how-to/install-native.md) 48 | 49 | ## Kubernetes 50 | 51 | Not strictly supported, but production ready - it's used internally in a 52 | few places, including CERT.PL. 53 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | graft src/mqueryfront/dist 2 | include src/alembic.ini 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mquery: Blazingly fast Yara queries for malware analysts 2 | 3 | Ever had trouble searching for malware samples? Mquery is an 4 | analyst-friendly web GUI to look through your digital warehouse. 5 | 6 | It can be used to search through terabytes of malware in a blink of an eye: 7 | 8 | ![mquery web GUI](docs/interface-v1.4.gif) 9 | 10 | Under the hood we use our [UrsaDB](https://github.com/CERT-Polska/ursadb), to 11 | accelerate yara queries with ngrams. 12 | 13 | ## Demo 14 | 15 | Public instance will be created soon, stay tuned... 16 | 17 | ## Quickstart 18 | 19 | ### 1. Install and start 20 | 21 | The easiest way to do this is with `docker compose`: 22 | 23 | ``` 24 | git clone https://github.com/CERT-Polska/mquery.git 25 | cd mquery 26 | vim .env # optional - change samples and index directory locations 27 | docker compose up --scale daemon=3 # building the images will take a while 28 | ``` 29 | 30 | The web interface should be available at `http://localhost`. 31 | 32 | ![](./docs/recent-jobs.png) 33 | 34 | *(For more installation options see the [installation manual](./INSTALL.md) ).* 35 | 36 | ### 2. Add the files 37 | 38 | Put some files in the `SAMPLES_DIR` (by default `./samples` in the repository, 39 | configurable with variable in the `.env` file). 40 | 41 | ### 3. Index your collection 42 | 43 | Launch ursacli in docker: 44 | 45 | ```shell 46 | docker compose exec ursadb ursacli 47 | [2023-06-14 17:20:24.940] [info] Connecting to tcp://localhost:9281 48 | [2023-06-14 17:20:24.942] [info] Connected to UrsaDB v1.5.1+98421d7 (connection id: 006B8B46B6) 49 | ursadb> 50 | ``` 51 | 52 | Index the samples with n-grams of your choosing (this may take a while!) 53 | 54 | ```shell 55 | ursadb> index "/mnt/samples" with [gram3, text4, wide8, hash4]; 56 | [2023-06-14 17:29:27.672] [info] Working... 1% (109 / 8218) 57 | [2023-06-14 17:29:28.674] [info] Working... 1% (125 / 8218) 58 | ... 59 | [2023-06-14 17:37:40.265] [info] Working... 99% (8217 / 8218) 60 | [2023-06-14 17:37:41.266] [info] Working... 99% (8217 / 8218) 61 | { 62 | "result": { 63 | "status": "ok" 64 | }, 65 | "type": "ok" 66 | } 67 | ``` 68 | 69 | 70 | This will scan samples directory for all new files and index them. You can 71 | monitor the progress in the `tasks` window on the left: 72 | 73 | ![](./docs/indexing.png) 74 | 75 | You have to repeat this process every time you want to add new files! 76 | 77 | After indexing is over, you will notice new datasets: 78 | 79 | ![](./docs/indexed-datasets.png) 80 | 81 | This is a good and easy way to start, but if you have a big collection you are 82 | strongly encouraged to read [indexing page](./docs/indexing.md) in the manual. 83 | 84 | ### 4. Test it 85 | 86 | Now your files should be searchable - insert any Yara rule into the search 87 | window and click `Query`. Just for demonstration, I've indexed the source code 88 | of this application and tested this Yara rule: 89 | 90 | ``` 91 | rule mquery_exceptions { 92 | strings: $a = "Exception" 93 | condition: all of them 94 | } 95 | ``` 96 | 97 | ![](./docs/query-window.png) 98 | 99 | ## Learn more 100 | 101 | See the [documentation](./docs/README.md) to learn more. Probably a good idea 102 | if you plan a bigger deployment. 103 | 104 | You can also read the hosted version here: 105 | [cert-polska.github.io/mquery/docs](https://cert-polska.github.io/mquery/docs). 106 | 107 | ## Installation 108 | 109 | See the 110 | [installation instruction](./INSTALL.md). 111 | 112 | ## Contributing 113 | 114 | If you want to contribute, see our dedicated 115 | [documentation for contributors](./CONTRIBUTING.md). 116 | 117 | ## Changelog 118 | 119 | Learn how the project has changed by reading our 120 | [release log](./RELEASES.md). 121 | 122 | ## Contact 123 | 124 | If you have any problems, bugs or feature requests related to mquery, you're 125 | encouraged to create a GitHub issue. 126 | 127 | You can chat about this project on Discord: 128 | 129 | [![](https://dcbadge.vercel.app/api/server/3FcP6GQNzd)](https://discord.gg/3FcP6GQNzd) 130 | 131 | If you have questions unsuitable for Github or discord, you can email CERT.PL 132 | (info@cert.pl) directly. 133 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-hacker -------------------------------------------------------------------------------- /deploy/docker/daemon.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10 2 | 3 | RUN apt update; apt install -y cmake 4 | 5 | # mquery and plugin requirements 6 | COPY requirements.txt src/plugins/requirements-*.txt /tmp/ 7 | RUN ls /tmp/requirements*.txt | xargs -i,, pip --no-cache-dir install -r ,, 8 | 9 | COPY requirements.txt setup.py MANIFEST.in /app/ 10 | COPY src /app/src/ 11 | RUN pip install /app 12 | 13 | ENTRYPOINT ["mquery-daemon"] 14 | -------------------------------------------------------------------------------- /deploy/docker/dev.daemon.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10 2 | 3 | WORKDIR /usr/src/app/src 4 | 5 | RUN apt update; apt install -y cmake 6 | 7 | # mquery and plugin requirements 8 | COPY requirements.txt src/plugins/requirements-*.txt /tmp/ 9 | RUN ls /tmp/requirements*.txt | xargs -i,, pip --no-cache-dir install -r ,, 10 | RUN pip install watchdog 11 | 12 | CMD pip install -e /usr/src/app && watchmedo auto-restart --pattern=*.py --recursive -- mquery-daemon 13 | -------------------------------------------------------------------------------- /deploy/docker/dev.frontend.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:18 AS build 2 | 3 | RUN npm install -g serve 4 | COPY src/mqueryfront /app 5 | WORKDIR /app 6 | RUN yarn install --legacy-peer-deps 7 | CMD ["npm", "start"] 8 | -------------------------------------------------------------------------------- /deploy/docker/dev.web.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10 2 | 3 | WORKDIR /usr/src/app/src 4 | 5 | RUN apt update; apt install -y cmake 6 | 7 | # mquery and plugin requirements 8 | COPY requirements.txt src/plugins/requirements-*.txt /tmp/ 9 | RUN ls /tmp/requirements*.txt | xargs -i,, pip --no-cache-dir install -r ,, 10 | 11 | CMD pip install -e /usr/src/app && uvicorn mquery.app:app --host 0.0.0.0 --port 5000 --reload 12 | -------------------------------------------------------------------------------- /deploy/docker/web.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:18 AS build 2 | 3 | RUN npm install -g serve 4 | COPY src/mqueryfront /app 5 | WORKDIR /app 6 | RUN yarn install --legacy-peer-deps && npm run build 7 | 8 | FROM python:3.10 9 | 10 | RUN apt update; apt install -y cmake 11 | 12 | # mquery and plugin requirements 13 | COPY requirements.txt src/plugins/requirements-*.txt /tmp/ 14 | RUN ls /tmp/requirements*.txt | xargs -i,, pip --no-cache-dir install -r ,, 15 | 16 | COPY requirements.txt setup.py MANIFEST.in /usr/src/app/ 17 | COPY src /usr/src/app/src/ 18 | COPY --from=build "/app/dist" "/usr/src/app/src/mqueryfront/dist" 19 | RUN pip3 install /usr/src/app 20 | CMD ["uvicorn", "mquery.app:app", "--host", "0.0.0.0", "--port", "5000"] 21 | -------------------------------------------------------------------------------- /docker-compose.dev.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | dev-frontend: 4 | build: 5 | context: . 6 | dockerfile: deploy/docker/dev.frontend.Dockerfile 7 | ports: 8 | - "80:3000" 9 | links: 10 | - dev-web 11 | volumes: 12 | - ./src/mqueryfront/src:/app/src 13 | depends_on: 14 | - "dev-web" 15 | dev-web: 16 | build: 17 | context: . 18 | dockerfile: deploy/docker/dev.web.Dockerfile 19 | ports: 20 | - "5000:5000" 21 | depends_on: 22 | redis: 23 | condition: service_started 24 | postgres: 25 | condition: service_healthy 26 | volumes: 27 | - "${SAMPLES_DIR}:/mnt/samples" 28 | - .:/usr/src/app 29 | environment: 30 | - "REDIS_HOST=redis" 31 | - "MQUERY_BACKEND=tcp://ursadb:9281" 32 | - "DATABASE_URL=postgresql://postgres:password@postgres:5432/mquery" 33 | healthcheck: 34 | test: ["CMD-SHELL", "curl --fail http://localhost:5000/api/server || exit 1"] 35 | interval: 5s 36 | timeout: 5s 37 | retries: 30 38 | dev-daemon: 39 | build: 40 | context: . 41 | dockerfile: deploy/docker/dev.daemon.Dockerfile 42 | links: 43 | - redis 44 | - ursadb 45 | - postgres 46 | volumes: 47 | - "${SAMPLES_DIR}:/mnt/samples" 48 | - .:/usr/src/app 49 | depends_on: 50 | dev-web: 51 | condition: service_healthy 52 | redis: 53 | condition: service_started 54 | ursadb: 55 | condition: service_started 56 | postgres: 57 | condition: service_healthy 58 | environment: 59 | - "REDIS_HOST=redis" 60 | - "MQUERY_BACKEND=tcp://ursadb:9281" 61 | - "DATABASE_URL=postgresql://postgres:password@postgres:5432/mquery" 62 | ursadb: 63 | image: mqueryci/ursadb:v1.5.1 64 | ports: 65 | - "9281:9281" 66 | volumes: 67 | - "${SAMPLES_DIR}:/mnt/samples" 68 | - "${INDEX_DIR}:/var/lib/ursadb" 69 | user: "0:0" 70 | keycloak: 71 | image: quay.io/keycloak/keycloak:15.1.0 72 | ports: 73 | - "8080:8080" 74 | environment: 75 | - KEYCLOAK_USER=admin 76 | - KEYCLOAK_PASSWORD=admin 77 | - DB_VENDOR=h2 78 | redis: 79 | image: redis 80 | postgres: 81 | image: postgres 82 | restart: always 83 | volumes: 84 | - ${POSTGRES_DIR}:/var/lib/postgresql/data 85 | environment: 86 | - POSTGRES_PASSWORD=password 87 | - POSTGRES_DB=mquery 88 | healthcheck: 89 | test: ["CMD-SHELL", "pg_isready -U postgres"] 90 | interval: 5s 91 | timeout: 5s 92 | retries: 5 93 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | web: 4 | restart: always 5 | build: 6 | context: . 7 | dockerfile: deploy/docker/web.Dockerfile 8 | ports: 9 | - "80:5000" 10 | depends_on: 11 | redis: 12 | condition: service_started 13 | postgres: 14 | condition: service_healthy 15 | volumes: 16 | - "${SAMPLES_DIR}:/mnt/samples" 17 | environment: 18 | - "REDIS_HOST=redis" 19 | - "MQUERY_BACKEND=tcp://ursadb:9281" 20 | - "DATABASE_URL=postgresql://postgres:password@postgres:5432/mquery" 21 | healthcheck: 22 | test: ["CMD-SHELL", "curl --fail http://localhost:5000/api/server || exit 1"] 23 | interval: 5s 24 | timeout: 5s 25 | retries: 30 26 | daemon: 27 | restart: always 28 | build: 29 | context: . 30 | dockerfile: deploy/docker/daemon.Dockerfile 31 | links: 32 | - redis 33 | - ursadb 34 | - postgres 35 | volumes: 36 | - "${SAMPLES_DIR}:/mnt/samples" 37 | depends_on: 38 | web: 39 | condition: service_healthy 40 | redis: 41 | condition: service_started 42 | ursadb: 43 | condition: service_started 44 | postgres: 45 | condition: service_healthy 46 | environment: 47 | - "REDIS_HOST=redis" 48 | - "MQUERY_BACKEND=tcp://ursadb:9281" 49 | - "DATABASE_URL=postgresql://postgres:password@postgres:5432/mquery" 50 | ursadb: 51 | restart: always 52 | image: mqueryci/ursadb:v1.5.1 53 | ports: 54 | - "127.0.0.1:9281:9281" 55 | volumes: 56 | - "${SAMPLES_DIR}:/mnt/samples" 57 | - "${INDEX_DIR}:/var/lib/ursadb" 58 | user: "0:0" 59 | redis: 60 | restart: always 61 | image: redis 62 | postgres: 63 | image: postgres 64 | restart: always 65 | volumes: 66 | - ${POSTGRES_DIR}:/var/lib/postgresql/data 67 | environment: 68 | - POSTGRES_PASSWORD=password 69 | - POSTGRES_DB=mquery 70 | healthcheck: 71 | test: ["CMD-SHELL", "pg_isready -U postgres"] 72 | interval: 5s 73 | timeout: 5s 74 | retries: 5 75 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # mquery documentation 2 | 3 | ## User guide 4 | 5 | - [Installation](../INSTALL.md): Installation instruction. 6 | - [Configuration](./configuration.md): Additional configuration options. 7 | - [Components](./components.md): More detailed description of mquery components. 8 | - [Indexing](./indexing.md): Indexing files is one of the most important things in 9 | mquery. In simple cases it can be solved without leaving the web UI, but 10 | many things will require more advanced approach. Read this if you need to 11 | index a considerable number of files. 12 | - [How to write good yara rules](./goodyara.md): How to write YARA rules that 13 | will work well in mquery. 14 | - [Yara support and limitations](./yara.md): Explains how mquery 15 | accelerates queries, what will, and what won't work. 16 | - [Utility scripts](./utils): Mquery ships with a few useful scripts. 17 | Here you can find documentation for them. 18 | - [For future contributors](../CONTRIBUTING.md): How to contribute. 19 | 20 | ## How to... 21 | 22 | - [Install mquery natively (without docker)](how-to/install-native.md) 23 | - [Integrate mquery with s3](how-to/integrate-with-s3.md) 24 | 25 | 26 | ## Relevant [ursadb's documentation](https://cert-polska.github.io/ursadb) 27 | 28 | Ursadb is the backend doing the heavy lifting for mquery. If you need to work with large 29 | datasets, it's a very useful read. It is also a prerequisite for understanding 30 | many things in mquery. 31 | 32 | - [Index types](https://cert-polska.github.io/ursadb/docs/indextypes.html): Picking 33 | index types you need is an important decision that's hard to change later. 34 | - [Datasets](https://cert-polska.github.io/ursadb/docs/datasets.html): Introduction to 35 | datasets. 36 | - [Performance and limits](https://cert-polska.github.io/ursadb/docs/limits.html): 37 | Read in case you're not sure if Ursadb can handle your collection. 38 | - [On-disk format](https://cert-polska.github.io/ursadb/docs/ondiskformat.html): 39 | Ursadb index format is relatively simple - reading this may be useful for 40 | advanced users. 41 | 42 | ## Advanced topics 43 | 44 | Relevant for people who want to run mquery in production or on a a bigger scale. 45 | 46 | - [Security](./security.md): Security considerations for hardening your mquery instance. 47 | - [Distributed mquery](./distributed.md): For users that want to run mquery on 48 | more than one machine. 49 | - [On-disk format](./ondiskformat.md): Read if you want to understand ursadb's on 50 | disk format (spoiler: many files are just JSON and can be inspected with vim). 51 | - [Plugin system](./plugins.md): For filtering, processing and tagging files. 52 | - [Database format](./database.md): Information about the data stored in the database. 53 | - [Redis applications](./redis.md): Of historical interest, redis is used only for [rq](https://python-rq.org/) now. 54 | - [User management](./users.md): Control and manage access to your mquery instance. 55 | - [API](./api.md): Mquery exposes a simple API that you may use for your automation. 56 | -------------------------------------------------------------------------------- /docs/api.md: -------------------------------------------------------------------------------- 1 | # API 2 | 3 | Launch mquery and browse to `/docs`. 4 | 5 | ![](./swagger.png?raw=true) 6 | 7 | Mquery has a stable API that you can use to automate your work. It also 8 | has internal API endpoints, that are used by the website - you can use them, 9 | but they may change in the future without warning. 10 | 11 | ## Mquery API example 12 | 13 | The [mquery](https://github.com/CERT-Polska/mquery/utils/mquery.py) script 14 | is a good starting point for your scripts. It uses only stable endpoints. 15 | The interesting part of the script is: 16 | 17 | ```python 18 | #!/usr/bin/python3 19 | 20 | import time 21 | import requests 22 | 23 | mquery_server = "http://localhost" # hardcoded to localhost 24 | 25 | yara_rule = """ 26 | rule test { 27 | strings: $a = "Exception" 28 | condition: $a 29 | } 30 | """ # hardcoded yara rule 31 | 32 | job_id = requests.post( 33 | f"{mquery_server}/api/query", 34 | json={ 35 | "method": "query", 36 | "raw_yara": yara_rule, 37 | "taint": None, 38 | "method": "query", 39 | }, 40 | ).json()["query_hash"] 41 | 42 | offset = 0 43 | while True: 44 | out = requests.get( 45 | f"{mquery_server}/api/matches/{job_id}?offset={offset}&limit=50" 46 | ).json() 47 | 48 | for match in out["matches"]: 49 | file_path = match["file"] 50 | sha256 = match["meta"]["sha256"]["display_text"] 51 | print(sha256) 52 | with open(sha256, "wb") as outf: 53 | content = requests.get( 54 | f"{mquery_server}/api/download", 55 | {"job_id": job_id, "ordinal": offset, "file_path": file_path,}, 56 | ).content 57 | outf.write(content) 58 | offset += 1 59 | 60 | if out["job"]["status"] in ["cancelled", "failed", "done", "removed"]: 61 | break 62 | 63 | time.sleep(1.0) 64 | ``` 65 | 66 | ## Ursadb API example 67 | 68 | Many things that are not exposed by mquery can be done using the underlying 69 | Ursadb's API. Just remember that you shouldn't allow unauthenticated access to it, 70 | because a malicious user can use the API to index and query arbitrary files on the 71 | server's drive. 72 | 73 | See [ursadb's syntax documentation](https://cert-polska.github.io/ursadb/docs/syntax.html) 74 | to learn more. 75 | 76 | [`compactall`](https://github.com/CERT-Polska/mquery/blob/master/src/utils/compactall.py) 77 | is a very simple example of this type of integration: 78 | 79 | ```python 80 | ursa = UrsaDb("tcp://localhost:9281") 81 | last_datasets = None 82 | while True: 83 | datasets = set( 84 | ursa.execute_command("topology;")["result"]["datasets"].keys() 85 | ) 86 | logging.info("%s datasets left.", len(datasets)) 87 | if datasets == last_datasets: 88 | # Nothing can be compacted anymore 89 | break 90 | 91 | start = time.time() 92 | ursa.execute_command(f"compact smart;") 93 | end = time.time() 94 | logging.info("Compacting took %s seconds...", (end - start)) 95 | last_datasets = datasets 96 | ``` 97 | 98 | Where the `Ursadb` object is just a very thin wrapper around zeromq: 99 | 100 | ```python 101 | def make_socket(self, recv_timeout: int = 2000) -> zmq.Context: 102 | context = zmq.Context() 103 | socket = context.socket(zmq.REQ) 104 | socket.setsockopt(zmq.LINGER, 0) 105 | socket.setsockopt(zmq.RCVTIMEO, recv_timeout) 106 | socket.connect(self.backend) 107 | return socket 108 | 109 | def execute_command(self, command: str) -> Json: 110 | socket = self.make_socket(recv_timeout=-1) 111 | socket.send_string(command) 112 | response = socket.recv_string() 113 | socket.close() 114 | return json.loads(response) 115 | ``` 116 | -------------------------------------------------------------------------------- /docs/client-urls.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/client-urls.png -------------------------------------------------------------------------------- /docs/components.md: -------------------------------------------------------------------------------- 1 | # components 2 | 3 | There are four main independent components in mquery deployments: 4 | 5 | - web frontend 6 | - daemons (also called "agents" or "workers") 7 | - UrsaDB (backend database) 8 | - Redis 9 | 10 | In a normal situation, there is one web frontend, one shared Redis database, and 11 | for every UrsaDB instance, there is a group of one or more agents dedicated to 12 | it: 13 | 14 | ![](./distributed.png) 15 | 16 | In most small-to-medium sized deployments there is only one ursadb instance, and 17 | all workers are assigned to it. 18 | 19 | More complex configurations are possible, for example, consider this deployment 20 | with internal and external sample index: 21 | 22 | ![](./distribured-rev.png) 23 | 24 | ### Web frontend (mquery) 25 | 26 | Mquery is a standard web application written in Python (using the 27 | Fastapi framework). 28 | 29 | It talks with Redis directly, and schedules tasks for the workers. 30 | 31 | For some administrative tasks (like checking ongoing tasks) it also sends requests 32 | to UrsaDB directly. 33 | 34 | ### Mquery daemon (agent) 35 | 36 | The workhorse of the entire setup. There must be at least one daemon for 37 | every UrsaDB instance. Daemon's tasks include querying the assigned UrsaDB 38 | instance for samples and running YARA rules on candidate samples. 39 | 40 | ### Redis 41 | 42 | It's a shared database that's used for several purposes. The main one being communication between daemon and mquery. 43 | It is also used as a task queue for jobs scheduled for agents. And it's also 44 | used for persistent storage of job results. Finally, it's utilized to store 45 | plugin configuration and job cache for agents. To sum up, it's pretty overloaded 46 | and used to store everything as the main database of the project. 47 | 48 | ### UrsaDB 49 | 50 | Ursadb is a [separate project](https://github.com/CERT-Polska/ursadb), used in 51 | mquery as a backend database to optimise YARA rules. Ursadb itself has no 52 | understanding of YARA syntax, so all rules are first transpiled by mquery to 53 | a simpler Ursadb syntax before a query. 54 | -------------------------------------------------------------------------------- /docs/config-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/config-example.png -------------------------------------------------------------------------------- /docs/database.md: -------------------------------------------------------------------------------- 1 | # How the data is stored in the database 2 | 3 | Currently, Postgres database is used to keep entities used by mquery. 4 | 5 | With the default docker configuration, you can connect to the database 6 | using the following oneliner: 7 | 8 | ``` 9 | sudo docker compose exec postgres psql -U postgres --dbname mquery 10 | ``` 11 | 12 | The followiung tables are defined: 13 | 14 | ### Job table (`job`) 15 | 16 | Jobs are stored in the `job` table. 17 | 18 | Every job has ID, which is a random 12 character string like 2OV8UP4DUOWK (the 19 | same string that is visible in urls like http://mquery.net/query/2OV8UP4DUOWK). 20 | 21 | Possible job statuses are: 22 | 23 | * "new" - Completely new job. 24 | * "inprogress" - Job that is in progress. 25 | * "done" - Job that was finished 26 | * "cancelled" - Job was cancelled by the user or failed 27 | * "removed" - Job is hidden in the UI (TODO: remove this status in the future) 28 | 29 | ### Job agent table (`jobagent`) 30 | 31 | It is a simple mapping between job_id and agent_id. Additionaly, it keeps track 32 | of how many tasks are still in progress for a given agent assigned to this job. 33 | 34 | ### Match table (`match`) 35 | 36 | Matches represent files matched to a job. 37 | 38 | Every match represents a single yara rule match (along with optional attributes 39 | from plugins). 40 | 41 | ### AgentGroup table (`agentgroup`) 42 | 43 | When scheduling jobs, mquery needs to know how many agent groups are 44 | waiting for tasks. In most cases there is only one, but in distributed environment 45 | there may be more. 46 | 47 | ### Configuration table (`configentry`) 48 | 49 | Represented by models.configentry.ConfigEntry class. 50 | 51 | For example, `plugin:TestPlugin` will store configuration for `TestPlugin` as a 52 | dictionary. All plugins can expose their own arbitrary config options. 53 | 54 | As a special case `plugin:Mquery` keeps configuration of the mquery itself. 55 | -------------------------------------------------------------------------------- /docs/distribured-rev.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/distribured-rev.png -------------------------------------------------------------------------------- /docs/distributed.md: -------------------------------------------------------------------------------- 1 | # Distributed mquery 2 | 3 | It's possible to use mquery in a distributed way: 4 | 5 | ![](./distributed.png) 6 | 7 | Every agent will talk with its dedicated Ursadb instance, queries will 8 | be run on all servers in parallel and results will be merged. 9 | 10 | In fact, the default stock configuration is really "distributed", just with 11 | a single agent running on the same machine. 12 | 13 | It's also possible to do it "in reverse" - a single Ursadb instance can 14 | be connected to multiple mquery servers: 15 | 16 | ![](./distribured-rev.png) 17 | 18 | All of mquery's core functionality works in this setup, but there are some 19 | deployment problems. For example, web interface assumes that the samples are stored 20 | (or mounted) at the same location as in the workers. If that's not the case, 21 | it can be corrected with custom [plugins](./plugins.md). 22 | -------------------------------------------------------------------------------- /docs/distributed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/distributed.png -------------------------------------------------------------------------------- /docs/how-to/release-a-new-version.md: -------------------------------------------------------------------------------- 1 | # How to release a new mquery version 2 | 3 | I never remember the exact steps, so let's document this once and for all. 4 | 5 | This should be automated someday. 6 | 7 | * [ ] Bump the version 8 | * [ ] pick a new version, make sure you follow semver 9 | * [ ] Update `setup.py` 10 | * [ ] Update `src/utils.py` 11 | * [ ] `git checkout -b release/v1.6.0; git push origin release/v1.6.0` 12 | * [ ] Create a PR and merge it 13 | * [ ] Push a new git tag 14 | * [ ] `git tag v1.6.0` 15 | * [ ] `git push origin v1.6.0` 16 | * [ ] Publish a release on github 17 | * [ ] Build the package 18 | * [ ] `python3 setup.py build` 19 | * [ ] `python3 setup.py sdist` 20 | * [ ] Test the package 21 | * [ ] Publish the pypi package 22 | * [ ] `twine upload dist/mquery-1.6.0.tar.gz` 23 | -------------------------------------------------------------------------------- /docs/indexed-datasets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/indexed-datasets.png -------------------------------------------------------------------------------- /docs/indexing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/indexing.png -------------------------------------------------------------------------------- /docs/interface-v1.4.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/interface-v1.4.gif -------------------------------------------------------------------------------- /docs/mquery-flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/mquery-flowchart.png -------------------------------------------------------------------------------- /docs/mquery-web-ui.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/mquery-web-ui.gif -------------------------------------------------------------------------------- /docs/new-client.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/new-client.png -------------------------------------------------------------------------------- /docs/new-realm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/new-realm.png -------------------------------------------------------------------------------- /docs/new-roles.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/new-roles.png -------------------------------------------------------------------------------- /docs/new-user.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/new-user.png -------------------------------------------------------------------------------- /docs/plugin-config.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/plugin-config.png -------------------------------------------------------------------------------- /docs/query-window.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/query-window.png -------------------------------------------------------------------------------- /docs/recent-jobs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/recent-jobs.png -------------------------------------------------------------------------------- /docs/redis.md: -------------------------------------------------------------------------------- 1 | # How the data is stored in redis 2 | 3 | In the older mquery versions, data used to be stored in Redis. In mquery 4 | version 1.4.0 the data was migrated to a postgresql - see [database](./database.md). 5 | 6 | Please note that all this is 100% internal, and shouldn't be relied on. 7 | Data format in redis can and does change between mquery releases. 8 | 9 | You can use `redis-cli` to connect to redis. With the default docker compose configuration, 10 | use `docker compose exec redis redis-cli`. 11 | 12 | Redis command documentation is pretty good and available at https://redis.io/commands/. 13 | 14 | ### Rq objects (`rq:*`) 15 | 16 | Objects used internally by https://python-rq.org/, task scheduler used by mquery. 17 | 18 | You can browse them using tools from https://python-rq.org/docs/monitoring/. 19 | -------------------------------------------------------------------------------- /docs/security.md: -------------------------------------------------------------------------------- 1 | # security 2 | 3 | ## Secure Deployment 4 | 5 | There are multiple components necessary to have a working mquery instance. 6 | Some of them require special care from a security standpoint. 7 | 8 | ### Mquery 9 | 10 | Mquery is a standard web application written in Python. By default, everyone has permission to do everything. 11 | This default configuration is unsuitable for bigger organisations or public instances. 12 | In such cases, it's recommended to enable user accounts (see [users.md](./users.md)), 13 | and disallow anonymous users or at least don't give them admin rights. 14 | 15 | ### Mquery daemon (agent) 16 | 17 | No special considerations. Every daemon process must have network 18 | access to Redis and UrsaDB. 19 | 20 | ### Redis 21 | 22 | Mquery web and daemon must have network access to Redis. No other access to 23 | the Redis database is necessary. There is no support for securing Redis 24 | with a password in the current version, so network isolation is 25 | the only way to prevent attacks. Most importantly, Redis must not 26 | be available from the public network. 27 | 28 | ### Ursadb 29 | 30 | Mquery daemons must have network access to their respective ursadb instances. 31 | Similarly to Redis, it's best to restrict network access to the UrsaDB instance. Ursadb protocol does not take malicious actors into account, and 32 | unauthenticated users can, for example, remove indexed data from the database, 33 | or cause a denial of service. 34 | 35 | In the provided docker compose files, the UrsaDB user is overridden to root by 36 | default. This is for 37 | backwards compatibility, and to simplify deployment. For production instances 38 | consider running ursadb with the default user (`ursa`, UID 1000). This means 39 | that the shared index volume must be writable by UID 1000, and samples must 40 | be readable by UID 1000. 41 | 42 | ## How to report a vulnerability 43 | 44 | There is no dedicated email for reporting a security vulnerability. Please reach out 45 | to cert@cert.pl or one of the maintainers directly. If the vulnerability is not 46 | critical, the best way to report is via a GitHub issue. 47 | -------------------------------------------------------------------------------- /docs/swagger.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/docs/swagger.png -------------------------------------------------------------------------------- /docs/utils/README.md: -------------------------------------------------------------------------------- 1 | # Utilities 2 | 3 | Mquery ships with a few scripts to automate common tasks: 4 | 5 | - [mquery.py](./mquery.md) - Automate yara hunts and download of the results. 6 | - [index.py](./index.md) - Can be used to index large amounts of data in a 7 | reliable way. 8 | - [compactall.py](./compactall.md) - Will compact datasets in the Ursadb instance 9 | as long, as there's anything left to compact. 10 | - [nanobench.py](./nanobench.md) - A small helper script, used by the developers 11 | to benchmark Ursadb performance on a given machine and with a given configuration. 12 | - [s3index.py](./s3index.md) - Helper script for indexing samples from S3. 13 | It serves as a demonstration, not a best practice. In the current version 14 | it suffers from a performance problems, so may not be suitable for big 15 | deployments. 16 | -------------------------------------------------------------------------------- /docs/utils/compactall.md: -------------------------------------------------------------------------------- 1 | # compactall.py 2 | 3 | Will compact datasets in the Ursadb instance as long, as there's anything 4 | left to compact. 5 | 6 | ## Usage 7 | 8 | ``` 9 | $ python3 -m utils.compactall --help 10 | usage: compactall.py [-h] [--ursadb URSADB] [--mode {smart,all}] 11 | 12 | Keep the database lean. 13 | 14 | optional arguments: 15 | -h, --help show this help message and exit 16 | --ursadb URSADB URL of the ursadb instance. 17 | --mode {smart,all} Compacting mode. Force (all) or optimise for time 18 | (smart). 19 | 20 | ``` 21 | 22 | ## Example 23 | 24 | This script is very easy to use - it only needs an url of the Ursadb instance, 25 | for example `tcp://127.0.0.1:9281` (which is the default). 26 | 27 | ``` 28 | python3 -m utils.compactall --ursadb tcp://127.0.0.1:9281 29 | ``` 30 | 31 | It will start issuing merging compatible datasets with the `compact` command, 32 | and will only stop when: 33 | - There are no more compatible datasets that can be merged; 34 | - There are compatible datasets, but they can't be merged because resulting 35 | dataset would exceed size maximum configured in Ursadb. 36 | 37 | Running this script periodically probably can't help (but it may put a lot of 38 | load on the disk, so should be run when the db is not used heavily). 39 | 40 | ## Caveats 41 | 42 | This script can be stopped with Ctrl+C at any point, but the last issued 43 | command will continue running (the database will finish compacting the datasets 44 | that it started). 45 | -------------------------------------------------------------------------------- /docs/utils/index.md: -------------------------------------------------------------------------------- 1 | # index.py 2 | 3 | Can be used to index large amounts of data in a reliable way. 4 | 5 | ## Usage 6 | 7 | ``` 8 | $ python3 -m utils.index --help 9 | usage: index.py [-h] [--mode {prepare,index,prepare-and-index}] 10 | [--ursadb URSADB] [--workdir WORKDIR] [--batch BATCH] 11 | [--path PATH] [--path-mount PATH_MOUNT] 12 | [--max-file-size-mb MAX_FILE_SIZE_MB] 13 | [--type {gram3,text4,hash4,wide8}] [--tag TAGS] 14 | [--workers WORKERS] [--working-datasets WORKING_DATASETS] 15 | 16 | Reindex local files. 17 | 18 | optional arguments: 19 | -h, --help show this help message and exit 20 | --mode {prepare,index,prepare-and-index} 21 | Mode of operation. Only prepare batches, index them, 22 | or both. 23 | --ursadb URSADB URL of the ursadb instance. 24 | --workdir WORKDIR Path to a working directory. 25 | --batch BATCH Size of indexing batch. 26 | --path PATH Path of samples to be indexed. 27 | --path-mount PATH_MOUNT 28 | Path to the samples to be indexed, as seen by ursadb 29 | (if different). 30 | --max-file-size-mb MAX_FILE_SIZE_MB 31 | Maximum file size, in MB, to index. 128 By default. 32 | --type {gram3,text4,hash4,wide8} 33 | Index types. By default [gram3, text4, wide8, hash4] 34 | --tag TAGS Additional tags for indexed datasets. 35 | --workers WORKERS Number of parallel indexing jobs. 36 | --working-datasets WORKING_DATASETS 37 | Numer of working datasets (uses sane value by 38 | default). 39 | ``` 40 | 41 | ## Example 42 | 43 | 44 | Probably the most complex script shipped with mquery. See 45 | [indexing](../indexing.md) guide for complete a tutorial. Basic usage is 46 | relatively simple though. To index files with ursadb running natively, run: 47 | 48 | ``` 49 | $ python3 -m utils.index --workdir /tmp/work --path ../samples --path-mount /mnt/samples 50 | ERROR:root:Can't connect to ursadb instance at tcp://localhost:9281 51 | INFO:root:Prepare.1: load all indexed files into memory. 52 | INFO:root:Prepare.2: find all new files. 53 | INFO:root:Prepare.3: Got 1 files in 1 batches to index. 54 | INFO:root:Index.1: Determine compacting threshold. 55 | INFO:root:Index.1: Compact threshold = 84. 56 | INFO:root:Index.2: Find prepared batches. 57 | INFO:root:Index.2: Got 1 batches to run. 58 | INFO:root:Index.3: Run index commands with 2 workers. 59 | INFO:root:Index.4: Batch /tmp/work/batch_0000000000.txt done [1/1]. 60 | INFO:root:Index.5: Unlinking the workdir. 61 | INFO:root:Indexing finished. Consider compacting the database now 62 | ``` 63 | 64 | ## Caveats 65 | 66 | This script can be stopped with Ctrl+C at any point, but the last started 67 | started indexing batch will continue. 68 | 69 | Don't set `--workers` parameter to a number too big! It can cause OOM crashes. 70 | -------------------------------------------------------------------------------- /docs/utils/mquery.md: -------------------------------------------------------------------------------- 1 | # mquery.py 2 | 3 | Automate yara hunts and download of the results. 4 | 5 | ## Usage 6 | 7 | ``` 8 | $ python3 utils/mquery.py --help 9 | usage: mquery.py [-h] (--yara YARA | --job JOB) [--mquery MQUERY] 10 | [--print-filenames] [--print-matches] [--save SAVE] 11 | 12 | optional arguments: 13 | -h, --help show this help message and exit 14 | --yara YARA Yara rule to use for query 15 | --job JOB Job ID to print or download 16 | --mquery MQUERY Change mquery server address 17 | --print-filenames Also print filenames 18 | --print-matches Also print matched rules 19 | --save SAVE Download samples and save to the provided directory 20 | ``` 21 | 22 | ## Example 23 | 24 | You can provide either a yara rule (`--yara` parameter) or existing job id 25 | (`--job` parameter). The script will then create a new job or download existing 26 | one, and return all the results as a list of hashes, optionally with filenames 27 | (`--print-filenames`) and matched rules (`--print-matches`). There is also an 28 | option to save samples to a local directory (with `--save DIRNAME`). 29 | 30 | For example, to start a new job: 31 | 32 | ``` 33 | $ python3 utils/mquery.py --yara rule.yar 34 | 89b27295b3ed353e38ab67c1d21d44578461413249d28d960f1c6fb4195dbb1b 35 | dacdab7b47f0788b20d33a44500cd3396d47894f37e32d0bd54aa2dbb4e5eed0 36 | 387e6f8912fb8ded6bca4d16c464bc186ad03759529b7ba8b19a54b590c13ab1 37 | 98b7b3faab88ff62720af747195156a3694131aa2fd760753ff48b044da310d4 38 | fcc7183658c7a6f92a580e3ea4ee8f3987b58a4fec08a0a826f5aee2226cda53 39 | ed04594b5bae61d40b8da8c81d9a0cf1b4aba44144f06cca674e0ea98d691dd5 40 | 442e658f0adaf384170cddc735d86cb3d5d6f5a6932af77d4080a88551790b53 41 | b2695a80ce56561577ee5b7f31f4b3119782e4b45fad599b33c153acf202a129 42 | 0abae63ce933d3f458cd710302a800a87b67bb643a5917098ec97a820dd7232f 43 | 4cfda945446db1d2d65fcce3de5322c679ce1b26c3205fb76f2d05ed19d86bf5 44 | ``` 45 | 46 | Use existing job ID, print more information, and save files locally: 47 | 48 | ``` 49 | $ python3 utils/mquery.py --job H3PAW4YF68T0 --print-matches --save test 50 | 89b27295b3ed353e38ab67c1d21d44578461413249d28d960f1c6fb4195dbb1b test 51 | dacdab7b47f0788b20d33a44500cd3396d47894f37e32d0bd54aa2dbb4e5eed0 test 52 | 387e6f8912fb8ded6bca4d16c464bc186ad03759529b7ba8b19a54b590c13ab1 test 53 | 98b7b3faab88ff62720af747195156a3694131aa2fd760753ff48b044da310d4 test 54 | fcc7183658c7a6f92a580e3ea4ee8f3987b58a4fec08a0a826f5aee2226cda53 test 55 | ed04594b5bae61d40b8da8c81d9a0cf1b4aba44144f06cca674e0ea98d691dd5 test 56 | 442e658f0adaf384170cddc735d86cb3d5d6f5a6932af77d4080a88551790b53 test 57 | b2695a80ce56561577ee5b7f31f4b3119782e4b45fad599b33c153acf202a129 test 58 | 0abae63ce933d3f458cd710302a800a87b67bb643a5917098ec97a820dd7232f test 59 | 4cfda945446db1d2d65fcce3de5322c679ce1b26c3205fb76f2d05ed19d86bf5 test 60 | 61 | $ ls test | wc -l 62 | 10 63 | ``` 64 | -------------------------------------------------------------------------------- /docs/utils/nanobench.md: -------------------------------------------------------------------------------- 1 | # nanobench.py 2 | 3 | Run performance tests on a local ursadb instance 4 | 5 | ## Usage 6 | 7 | ``` 8 | $ python3 utils/nanobench.py --help 9 | usage: nanobench.py [-h] [--ursadb URSADB] [--level {nano,mini,heavyduty}] 10 | 11 | Simple benchmark utility. 12 | 13 | optional arguments: 14 | -h, --help show this help message and exit 15 | --ursadb URSADB URL of the ursadb instance. 16 | --level {nano,mini,heavyduty} 17 | How hard should the tests be. 18 | 19 | ``` 20 | 21 | ## Example 22 | 23 | ``` 24 | $ python3 utils/nanobench.py 25 | select "abc"; average 10.954 files: 110 26 | select "abcdefgh"; average 2.150 files: 0 27 | select "abc" & "qwe" & "zxc"; average 1.060 files: 0 28 | select "abc" | "qwe" | "zxc"; average 6.789 files: 285 29 | select min 1 of ("abc", "qwe", "zxc"); average 1.128 files: 285 30 | ... 31 | ``` 32 | -------------------------------------------------------------------------------- /docs/utils/s3index.md: -------------------------------------------------------------------------------- 1 | # s3index.py 2 | 3 | Can be used to index files from S3. 4 | 5 | This script was created to accompany the [S3 integration guide](../how-to/integrate-with-s3.md). It will download files from S3 temporarily and index 6 | them with ursadb. After indexing local copies of samples are deleted. 7 | 8 | WARNING: this script is still in the development, and usage may change 9 | in the future. 10 | 11 | ## Usage 12 | 13 | ``` 14 | $ python3 -m utils.s3index --help 15 | usage: s3index.py [-h] [--mode {prepare,index,prepare-and-index}] [--ursadb URSADB] --s3-url S3_URL --s3-secret-key S3_SECRET_KEY --s3-access-key S3_ACCESS_KEY --s3-bucket S3_BUCKET [--s3-secure S3_SECURE] 16 | [--workdir WORKDIR] [--batch BATCH] [--type {gram3,text4,hash4,wide8}] [--tag TAGS] [--workers WORKERS] [--working-datasets WORKING_DATASETS] 17 | 18 | Index files from s3. 19 | 20 | options: 21 | -h, --help show this help message and exit 22 | --mode {prepare,index,prepare-and-index} 23 | Mode of operation. Only prepare batches, index them, or both. 24 | --ursadb URSADB URL of the ursadb instance. 25 | --s3-url S3_URL S3 server url. 26 | --s3-secret-key S3_SECRET_KEY 27 | Secret key. 28 | --s3-access-key S3_ACCESS_KEY 29 | Access key. 30 | --s3-bucket S3_BUCKET 31 | Bucket name. 32 | --s3-secure S3_SECURE 33 | Use https (1 or 0)?. 34 | --workdir WORKDIR Path to a working directory. 35 | --batch BATCH Size of indexing batch. 36 | --type {gram3,text4,hash4,wide8} 37 | Index types. By default [gram3, text4, wide8, hash4] 38 | --tag TAGS Additional tags for indexed datasets. 39 | --workers WORKERS Number of parallel indexing jobs. 40 | --working-datasets WORKING_DATASETS 41 | Numer of working datasets (uses sane value by default). 42 | ``` 43 | 44 | ## Example 45 | 46 | Only --workdir and S3-related parameters are required: 47 | 48 | ```shell 49 | $ python3 -m utils.s3index \ 50 | --workdir /root/mquery_tmp \ 51 | --s3-url localhost:9000 \ 52 | --s3-secret-key YOUR-SECRET-KEY \ 53 | --s3-access-key YOUR-ACCESS-KEY \ 54 | --s3-bucket mquery \ 55 | --s3-secure 0 56 | ``` 57 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | exclude = 'venv/,mqueryfront/' 3 | line-length = 79 4 | target_version = ['py36'] 5 | -------------------------------------------------------------------------------- /pyrightconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": [ 3 | "src" 4 | ], 5 | "pythonVersion": "3.10", 6 | "pythonPlatform": "Linux", 7 | "executionEnvironments": [{ 8 | "root": "src" 9 | } 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /requirements.plain.txt: -------------------------------------------------------------------------------- 1 | fastapi 2 | pydantic 3 | pyzmq 4 | redis 5 | requests 6 | typing-extensions 7 | uvicorn 8 | yara-python 9 | yaramod 10 | cachetools 11 | pyjwt[crypto] 12 | typed-config 13 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | alembic==1.11.1 2 | annotated-types==0.7.0 3 | anyio==4.6.0 4 | asgiref==3.4.1 5 | cachetools==5.5.0 6 | certifi==2024.8.30 7 | cffi==1.17.1 8 | charset-normalizer==3.3.2 9 | click==8.1.7 10 | cryptography==43.0.1 11 | Deprecated==1.2.13 12 | fastapi==0.115.0 13 | h11==0.14.0 14 | idna==3.10 15 | psycopg2==2.9.9 16 | pycparser==2.22 17 | pydantic==1.10.18 18 | pydantic_core==2.23.4 19 | PyJWT[crypto]==2.9.0 20 | pyzmq==26.2.0 21 | redis==5.0.8 22 | requests==2.32.2 23 | rq==1.16.2 24 | sniffio==1.3.1 25 | sqlmodel==0.0.11 26 | starlette==0.38.6 27 | typed-config==2.0.3 28 | types-requests==2.32.0.20240914 29 | typing_extensions==4.12.2 30 | urllib3==2.2.3 31 | uvicorn==0.30.6 32 | wrapt==1.16.0 33 | yara-python==4.5.1 34 | yaramod==3.23.0 35 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E501,W503,E203 3 | exclude = mqueryfront/ 4 | 5 | [mypy] 6 | python_version = 3.10 7 | 8 | [mypy-yaramod.*] 9 | ignore_missing_imports = True 10 | 11 | [mypy-zmq.*] 12 | ignore_missing_imports = True 13 | 14 | [mypy-uvicorn.*] 15 | ignore_missing_imports = True 16 | 17 | [mypy-mwdblib.*] 18 | ignore_missing_imports = True 19 | 20 | [mypy-cachetools.*] 21 | ignore_missing_imports = True 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup 4 | 5 | setup( 6 | name="mquery", 7 | version="1.6.0", 8 | description="Blazingly fast Yara queries for malware analysts", 9 | packages=[ 10 | "mquery", 11 | "mquery.lib", 12 | "mquery.plugins", 13 | "mquery.models", 14 | "mquery.migrations", 15 | "mquery.migrations.versions", 16 | ], 17 | package_dir={"mquery": "src"}, 18 | include_package_data=True, 19 | install_requires=open("requirements.txt").read().splitlines(), 20 | scripts=[ 21 | "src/scripts/mquery-daemon", 22 | ], 23 | classifiers=[ 24 | "Programming Language :: Python", 25 | "Operating System :: OS Independent", 26 | ], 27 | ) 28 | -------------------------------------------------------------------------------- /src/.dockerignore: -------------------------------------------------------------------------------- 1 | .pytest_cache 2 | .mypy_cache 3 | __pycache__ 4 | mqueryfront/ 5 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/src/__init__.py -------------------------------------------------------------------------------- /src/alembic.ini: -------------------------------------------------------------------------------- 1 | [alembic] 2 | script_location = %(here)s/migrations 3 | prepend_sys_path = . 4 | version_path_separator = os # Use os.pathsep. Default configuration used for new projects. 5 | 6 | [post_write_hooks] 7 | # format using "black" - use the console_scripts runner, against the "black" entrypoint 8 | # hooks = black 9 | # black.type = console_scripts 10 | # black.entrypoint = black 11 | # black.options = -l 79 REVISION_SCRIPT_FILENAME 12 | 13 | # Logging configuration 14 | [loggers] 15 | keys = root,sqlalchemy,alembic 16 | 17 | [handlers] 18 | keys = console 19 | 20 | [formatters] 21 | keys = generic 22 | 23 | [logger_root] 24 | level = WARN 25 | handlers = console 26 | qualname = 27 | 28 | [logger_sqlalchemy] 29 | level = WARN 30 | handlers = 31 | qualname = sqlalchemy.engine 32 | 33 | [logger_alembic] 34 | level = INFO 35 | handlers = 36 | qualname = alembic 37 | 38 | [handler_console] 39 | class = StreamHandler 40 | args = (sys.stderr,) 41 | level = NOTSET 42 | formatter = generic 43 | 44 | [formatter_generic] 45 | format = %(levelname)-5.5s [%(name)s] %(message)s 46 | datefmt = %H:%M:%S 47 | -------------------------------------------------------------------------------- /src/config.py: -------------------------------------------------------------------------------- 1 | from typedconfig import Config, key, section, group_key # type: ignore 2 | from typedconfig.source import EnvironmentConfigSource, IniFileConfigSource # type: ignore 3 | import os 4 | 5 | 6 | @section("redis") 7 | class RedisConfig(Config): 8 | # Hostname of a configured redis instance. 9 | host = key(cast=str, required=False, default="localhost") 10 | # Port of a configured redis instance. 11 | port = key(cast=int, required=False, default=6379) 12 | 13 | 14 | @section("database") 15 | class DatabaseConfig(Config): 16 | # URL of a configured sql database. 17 | url = key( 18 | cast=str, required=False, default="postgresql://localhost:5432/mquery" 19 | ) 20 | 21 | 22 | @section("rq") 23 | class RqConfig(Config): 24 | # Timeout value for rq jobs. 25 | job_timeout = key(cast=int, required=False, default=300) 26 | 27 | 28 | @section("mquery") 29 | class MqueryConfig(Config): 30 | # URL to a UrsaDB instance. 31 | backend = key(cast=str, required=False, default="tcp://127.0.0.1:9281") 32 | # List of plugin specifications separated by comma, for example 33 | # "plugins.archive:GzipPlugin, plugins.custom:CustomPlugin" 34 | plugins = key(cast=str, required=False, default="") 35 | # Maximum number of yara-scanned files per query (0 means no limit). 36 | yara_limit = key(cast=int, required=False, default=0) 37 | # Html code to be displayed on the about page. 38 | about = key(cast=str, required=False, default="") 39 | 40 | 41 | class AppConfig(Config): 42 | redis = group_key(RedisConfig) 43 | database = group_key(DatabaseConfig) 44 | rq = group_key(RqConfig) 45 | mquery = group_key(MqueryConfig) 46 | 47 | 48 | def _config_sources(): 49 | return [ 50 | EnvironmentConfigSource(), 51 | IniFileConfigSource("mquery.ini", must_exist=False), 52 | IniFileConfigSource( 53 | os.path.expanduser("~/.config/mquery/mquery.ini"), must_exist=False 54 | ), 55 | IniFileConfigSource("/etc/mquery/mquery.ini", must_exist=False), 56 | ] 57 | 58 | 59 | app_config = AppConfig(sources=_config_sources()) 60 | -------------------------------------------------------------------------------- /src/daemon.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | from multiprocessing import Process 4 | import logging 5 | from redis import Redis 6 | from rq import Connection, Worker # type: ignore 7 | 8 | from .util import setup_logging 9 | from . import tasks 10 | from .config import app_config 11 | 12 | 13 | def start_worker(args: argparse.Namespace, process_index: int) -> None: 14 | setup_logging() 15 | logging.info( 16 | "Agent [%s] running (process %s)...", args.group_id, process_index 17 | ) 18 | 19 | with Connection(Redis(app_config.redis.host, app_config.redis.port)): 20 | w = Worker([args.group_id]) 21 | w.work() 22 | 23 | 24 | def main() -> None: 25 | """Spawns a new agent process. Use argv if you want to use a different 26 | group_id (it's `default` by default). 27 | """ 28 | 29 | parser = argparse.ArgumentParser(description="Start mquery daemon.") 30 | parser.add_argument( 31 | "group_id", 32 | help="Name of the agent group to join to", 33 | nargs="?", 34 | default="default", 35 | ) 36 | parser.add_argument( 37 | "--scale", 38 | type=int, 39 | help="Specifies the number of concurrent processes to use.", 40 | default=1, 41 | ) 42 | 43 | args = parser.parse_args() 44 | 45 | # Initial registration of the worker group. 46 | # The goal is to make the web UI aware of this worker and its configuration. 47 | tasks.make_agent(args.group_id).register() 48 | 49 | if args.scale > 1: 50 | children = [ 51 | Process(target=start_worker, args=(args, i)) 52 | for i in range(args.scale) 53 | ] 54 | for child in children: 55 | child.start() 56 | for child in children: 57 | child.join() 58 | else: 59 | start_worker(args, 0) 60 | 61 | 62 | if __name__ == "__main__": 63 | main() 64 | -------------------------------------------------------------------------------- /src/e2etests/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10 2 | RUN apt update; apt install -y cmake 3 | RUN pip3 install pytest==7.1.2 4 | COPY requirements.txt /app/requirements.txt 5 | RUN pip3 install -r /app/requirements.txt 6 | COPY . /app/ 7 | RUN pip3 install /app/ 8 | WORKDIR /app 9 | CMD ["python", "-m", "pytest", "--log-cli-level=INFO", "/app/src/e2etests/"] 10 | -------------------------------------------------------------------------------- /src/e2etests/README.md: -------------------------------------------------------------------------------- 1 | # E2E tests 2 | 3 | Slow test suite, used as a sanity test for mquery matching capabilities. 4 | Hopefully it won't allow us to merge a completely broken version. 5 | 6 | They are automatically built and ran on every commit in the CI pipeline, 7 | so you don't have to. But if you want to test locally, run (from the main directory of mquery): 8 | 9 | ```bash 10 | $ rm -r e2e-state 11 | $ docker compose -f docker-compose.e2etests-local.yml up --build --exit-code-from e2etests-local 12 | ``` 13 | -------------------------------------------------------------------------------- /src/e2etests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/src/e2etests/__init__.py -------------------------------------------------------------------------------- /src/e2etests/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest==6.2.5 2 | pytest-timeout==1.3.4 3 | requests==2.23.0 4 | -------------------------------------------------------------------------------- /src/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/src/lib/__init__.py -------------------------------------------------------------------------------- /src/lib/ursadb.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | import zmq # type: ignore 4 | from typing import Dict, Any, List, Optional 5 | 6 | 7 | Json = Dict[str, Any] 8 | 9 | 10 | class PopResult: 11 | def __init__( 12 | self, 13 | was_locked: bool, 14 | files: List[str], 15 | iterator_pos: int, 16 | total_files: int, 17 | ) -> None: 18 | self.was_locked = was_locked 19 | self.files = files 20 | self.iterator_pos = iterator_pos 21 | self.total_files = total_files 22 | 23 | @property 24 | def iterator_empty(self) -> bool: 25 | """Is it safe to remove the iterator after this operation?""" 26 | if self.was_locked: 27 | return False 28 | return self.iterator_pos >= self.total_files 29 | 30 | def __str__(self) -> str: 31 | """Pretty-print iterator showing all important information.""" 32 | tag = "[locked] " if self.was_locked else "" 33 | pos = f"{self.iterator_pos}/{self.total_files}" 34 | return f"iterator {tag}with {len(self.files)} files ({pos})" 35 | 36 | 37 | class UrsaDb: 38 | def __init__(self, backend: str) -> None: 39 | self.backend = backend 40 | 41 | def __execute(self, command: str, recv_timeout: int = 2000) -> Json: 42 | context = zmq.Context() 43 | try: 44 | socket = context.socket(zmq.REQ) 45 | socket.setsockopt(zmq.LINGER, 0) 46 | socket.setsockopt(zmq.RCVTIMEO, recv_timeout) 47 | socket.connect(self.backend) 48 | socket.send_string(command) 49 | return json.loads(socket.recv_string()) 50 | finally: 51 | socket.close() 52 | 53 | def query( 54 | self, 55 | query: str, 56 | taints: List[str] | None = None, 57 | dataset: Optional[str] = None, 58 | ) -> Json: 59 | command = "select " 60 | if taints: 61 | taints_str = '", "'.join(taints) 62 | taints_whole_str = f'["{taints_str}"]' 63 | command += f"with taints {taints_whole_str} " 64 | if dataset: 65 | command += f'with datasets ["{dataset}"] ' 66 | command += f"into iterator {query};" 67 | 68 | start = time.perf_counter() 69 | res = self.__execute(command, recv_timeout=-1) 70 | end = time.perf_counter() 71 | 72 | if "error" in res: 73 | error = res.get("error", {}).get("message", "(no message)") 74 | return {"error": f"ursadb failed: {error}"} 75 | 76 | return { 77 | "time": (end - start), 78 | "iterator": res["result"]["iterator"], 79 | "file_count": res["result"]["file_count"], 80 | } 81 | 82 | def pop(self, iterator: str, count: int) -> PopResult: 83 | res = self.__execute(f'iterator "{iterator}" pop {count};', -1) 84 | 85 | if "error" in res: 86 | if res["error"].get("retry", False): 87 | # iterator locked, try again in a sec 88 | return PopResult(True, [], 0, 0) 89 | # return empty file set - this will clear the job from the db! 90 | return PopResult(False, [], 0, 0) 91 | 92 | res = res["result"] 93 | iterator_pos = res["iterator_position"] 94 | total_files = res["total_files"] 95 | return PopResult(False, res["files"], iterator_pos, total_files) 96 | 97 | def status(self) -> Json: 98 | return self.__execute("status;") 99 | 100 | def topology(self) -> Json: 101 | return self.__execute("topology;") 102 | 103 | def execute_command(self, command: str) -> Json: 104 | return self.__execute(command, -1) 105 | -------------------------------------------------------------------------------- /src/metadata.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | from typing import Any, Dict, Optional 3 | 4 | from .db import Database 5 | 6 | DEFAULT_CACHE_EXPIRE_TIME = 60 * 60 * 12 7 | 8 | Metadata = Dict[str, Any] 9 | MetadataPluginConfig = Dict[str, str] 10 | 11 | 12 | class MetadataPlugin(ABC): 13 | # Can extract() results be cached? Currently unused. 14 | cacheable: bool = False 15 | # Overrides default cache expire time 16 | cache_expire_time: int = DEFAULT_CACHE_EXPIRE_TIME 17 | # Configuration keys required by plugin with description as a value 18 | config_fields: Dict[str, str] = {} 19 | # can this plugin be used for prefiltering mwdb results? 20 | is_filter = False 21 | # can this plugin be used for extracting metadata? 22 | is_extractor = False 23 | 24 | def __init__(self, db: Database, config: MetadataPluginConfig) -> None: 25 | self.db = db 26 | for key in self.config_fields.keys(): 27 | if key not in config or not config[key]: 28 | raise KeyError( 29 | f"Required configuration key '{key}' is not set" 30 | ) 31 | 32 | @classmethod 33 | def get_name(cls) -> str: 34 | return cls.__name__ 35 | 36 | def identify(self, matched_fname: str) -> Optional[str]: 37 | """Returns file unique identifier based on matched path. 38 | 39 | Intended to be overridden by plugin. 40 | """ 41 | return matched_fname 42 | 43 | def run(self, matched_fname: str, current_meta: Metadata) -> Metadata: 44 | """Extracts metadata and updates cache. This method can only be run if 45 | the plugin sets `is_extractor` to True. 46 | 47 | :param matched_fname: Filename of the processed file 48 | :param current_meta: Metadata that will be updated 49 | :return: New metadata 50 | """ 51 | identifier = self.identify(matched_fname) 52 | if identifier is None: 53 | return {} 54 | 55 | return self.extract(identifier, matched_fname, current_meta) 56 | 57 | def filter(self, matched_fname: str, file_path: str) -> Optional[str]: 58 | """Checks if the file is a good candidate for further processing, 59 | and fix the file path if necessary. 60 | :param matched_fname: Original file path coming from ursadb 61 | :param file_path: Current path to the file contents 62 | :return: New path to a file (may be the same path). None if the file 63 | should be discarded. 64 | """ 65 | raise NotImplementedError 66 | 67 | def cleanup(self) -> None: 68 | """Optionally, clean up after the plugin, for example remove any 69 | temporary files. Called after processing a single batch of files. 70 | """ 71 | pass 72 | 73 | def extract( 74 | self, identifier: str, matched_fname: str, current_meta: Metadata 75 | ) -> Metadata: 76 | """Extracts metadata for matched path. 77 | 78 | Intended to be overridden by plugin, if is_extractor is True. 79 | 80 | :param identifier: File identifier returned by overridable 81 | :py:meth:`MetadataPlugin.identify` method 82 | :param matched_fname: Matched file path 83 | :param current_meta: Metadata extracted so far by dependencies 84 | :return: Metadata object. If you can't extract metadata for current file, 85 | return empty dict. 86 | """ 87 | raise NotImplementedError 88 | -------------------------------------------------------------------------------- /src/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/src/migrations/__init__.py -------------------------------------------------------------------------------- /src/migrations/env.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import create_engine 2 | from alembic import context 3 | from sqlmodel import SQLModel 4 | from logging.config import fileConfig 5 | 6 | from mquery.config import app_config # type: ignore 7 | from mquery.models.agentgroup import AgentGroup # type: ignore # noqa 8 | from mquery.models.configentry import ConfigEntry # type: ignore # noqa 9 | from mquery.models.job import Job # type: ignore # noqa 10 | from mquery.models.match import Match # type: ignore # noqa 11 | from mquery.models.queuedfile import QueuedFile # type: ignore # noqa 12 | 13 | 14 | target_metadata = SQLModel.metadata 15 | 16 | 17 | if context.config.config_file_name is not None: 18 | fileConfig(context.config.config_file_name) 19 | 20 | 21 | def run_migrations_online() -> None: 22 | connectable = create_engine(app_config.database.url) 23 | with connectable.connect() as connection: 24 | context.configure( 25 | connection=connection, target_metadata=target_metadata 26 | ) 27 | with context.begin_transaction(): 28 | context.run_migrations() 29 | 30 | 31 | run_migrations_online() 32 | -------------------------------------------------------------------------------- /src/migrations/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | Revision ID: ${up_revision} 3 | Revises: ${down_revision | comma,n} 4 | Create Date: ${create_date} 5 | """ 6 | from alembic import op 7 | import sqlalchemy as sa 8 | import sqlmodel 9 | ${imports if imports else ""} 10 | 11 | # revision identifiers, used by Alembic. 12 | revision = ${repr(up_revision)} 13 | down_revision = ${repr(down_revision)} 14 | branch_labels = ${repr(branch_labels)} 15 | depends_on = ${repr(depends_on)} 16 | 17 | 18 | def upgrade() -> None: 19 | ${upgrades if upgrades else "pass"} 20 | 21 | 22 | def downgrade() -> None: 23 | ${downgrades if downgrades else "pass"} 24 | -------------------------------------------------------------------------------- /src/migrations/versions/6b495d5a4855_cascade_jobagent_match_and_del_removed_status_create_enum_jobstatus.py: -------------------------------------------------------------------------------- 1 | """add jobstatus 2 | Revision ID: 6b495d5a4855 3 | Revises: dbb81bd4d47f 4 | Create Date: 2024-10-15 08:17:30.036531 5 | """ 6 | from alembic import op 7 | import sqlalchemy as sa 8 | 9 | 10 | # revision identifiers, used by Alembic. 11 | revision = "6b495d5a4855" 12 | down_revision = "dbb81bd4d47f" 13 | branch_labels = None 14 | depends_on = None 15 | 16 | job_status = sa.Enum( 17 | "done", "new", "cancelled", "processing", name="jobstatus" 18 | ) 19 | 20 | 21 | def upgrade() -> None: 22 | op.drop_constraint("jobagent_job_id_fkey", "jobagent", type_="foreignkey") 23 | op.create_foreign_key( 24 | constraint_name="jobagent_job_id_fkey", 25 | source_table="jobagent", 26 | referent_table="job", 27 | local_cols=["job_id"], 28 | remote_cols=["internal_id"], 29 | ondelete="CASCADE", 30 | ) 31 | 32 | op.drop_constraint("match_job_id_fkey", "match", type_="foreignkey") 33 | op.create_foreign_key( 34 | constraint_name="match_job_id_fkey", 35 | source_table="match", 36 | referent_table="job", 37 | local_cols=["job_id"], 38 | remote_cols=["internal_id"], 39 | ondelete="CASCADE", 40 | ) 41 | 42 | op.execute("DELETE FROM job WHERE status = 'removed';") 43 | 44 | job_status.create(op.get_bind()) 45 | op.alter_column( 46 | "job", 47 | "status", 48 | existing_type=sa.VARCHAR(), 49 | type_=job_status, 50 | postgresql_using="status::jobstatus", 51 | nullable=True, 52 | ) 53 | 54 | 55 | def downgrade() -> None: 56 | op.alter_column( 57 | "job", 58 | "status", 59 | existing_type=job_status, 60 | type_=sa.VARCHAR(), 61 | nullable=False, 62 | ) 63 | 64 | op.execute("DROP TYPE IF EXISTS jobstatus") 65 | 66 | op.drop_constraint("jobagent_job_id_fkey", "jobagent", type_="foreignkey") 67 | op.create_foreign_key( 68 | constraint_name="jobagent_job_id_fkey", 69 | source_table="jobagent", 70 | referent_table="job", 71 | local_cols=["job_id"], 72 | remote_cols=["internal_id"], 73 | ) 74 | 75 | op.drop_constraint("match_job_id_fkey", "match", type_="foreignkey") 76 | op.create_foreign_key( 77 | constraint_name="match_job_id_fkey", 78 | source_table="match", 79 | referent_table="job", 80 | local_cols=["job_id"], 81 | remote_cols=["internal_id"], 82 | ) 83 | -------------------------------------------------------------------------------- /src/migrations/versions/702d19cfa063_add_queuedfile.py: -------------------------------------------------------------------------------- 1 | """add queuedfile 2 | Revision ID: 702d19cfa063 3 | Revises: 6b495d5a4855 4 | Create Date: 2024-11-18 22:13:51.562315 5 | """ 6 | from alembic import op 7 | import sqlalchemy as sa 8 | import sqlmodel 9 | 10 | 11 | # revision identifiers, used by Alembic. 12 | revision = "702d19cfa063" 13 | down_revision = "6b495d5a4855" 14 | branch_labels = None 15 | depends_on = None 16 | 17 | 18 | def upgrade() -> None: 19 | # ### commands auto generated by Alembic - please adjust! ### 20 | op.create_table( 21 | "queuedfile", 22 | sa.Column("index_types", sa.ARRAY(sa.String()), nullable=False), 23 | sa.Column("tags", sa.ARRAY(sa.String()), nullable=False), 24 | sa.Column("id", sa.Integer(), nullable=False), 25 | sa.Column( 26 | "ursadb_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False 27 | ), 28 | sa.Column("path", sqlmodel.sql.sqltypes.AutoString(), nullable=False), 29 | sa.Column("created_at", sa.DateTime(), nullable=False), 30 | sa.PrimaryKeyConstraint("id"), 31 | ) 32 | # ### end Alembic commands ### 33 | 34 | 35 | def downgrade() -> None: 36 | # ### commands auto generated by Alembic - please adjust! ### 37 | op.drop_table("queuedfile") 38 | # ### end Alembic commands ### 39 | -------------------------------------------------------------------------------- /src/migrations/versions/cbbba858deb0_init.py: -------------------------------------------------------------------------------- 1 | """Init 2 | Revision ID: cbbba858deb0 3 | Revises: 4 | Create Date: 2024-02-15 16:52:45.261139. 5 | """ 6 | from alembic import op 7 | import sqlalchemy as sa 8 | import sqlmodel 9 | 10 | 11 | revision = "cbbba858deb0" 12 | down_revision = None 13 | branch_labels = None 14 | depends_on = None 15 | 16 | 17 | def upgrade() -> None: 18 | op.create_table( 19 | "agentgroup", 20 | sa.Column("plugins_spec", sa.JSON(), nullable=True), 21 | sa.Column("active_plugins", sa.ARRAY(sa.String()), nullable=True), 22 | sa.Column("name", sqlmodel.sql.sqltypes.AutoString(), nullable=False), 23 | sa.Column( 24 | "ursadb_url", sqlmodel.sql.sqltypes.AutoString(), nullable=False 25 | ), 26 | sa.Column("id", sa.Integer(), nullable=False), 27 | sa.PrimaryKeyConstraint("id"), 28 | ) 29 | op.create_table( 30 | "configentry", 31 | sa.Column("id", sa.Integer(), nullable=False), 32 | sa.Column( 33 | "plugin", sqlmodel.sql.sqltypes.AutoString(), nullable=False 34 | ), 35 | sa.Column("key", sqlmodel.sql.sqltypes.AutoString(), nullable=False), 36 | sa.Column("value", sqlmodel.sql.sqltypes.AutoString(), nullable=False), 37 | sa.PrimaryKeyConstraint("id"), 38 | ) 39 | op.create_table( 40 | "job", 41 | sa.Column("taints", sa.ARRAY(sa.String()), nullable=True), 42 | sa.Column("id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), 43 | sa.Column( 44 | "status", sqlmodel.sql.sqltypes.AutoString(), nullable=False 45 | ), 46 | sa.Column("error", sqlmodel.sql.sqltypes.AutoString(), nullable=True), 47 | sa.Column( 48 | "rule_name", sqlmodel.sql.sqltypes.AutoString(), nullable=False 49 | ), 50 | sa.Column( 51 | "rule_author", sqlmodel.sql.sqltypes.AutoString(), nullable=False 52 | ), 53 | sa.Column( 54 | "raw_yara", sqlmodel.sql.sqltypes.AutoString(), nullable=False 55 | ), 56 | sa.Column("submitted", sa.Integer(), nullable=False), 57 | sa.Column("finished", sa.Integer(), nullable=True), 58 | sa.Column("files_limit", sa.Integer(), nullable=False), 59 | sa.Column( 60 | "reference", sqlmodel.sql.sqltypes.AutoString(), nullable=False 61 | ), 62 | sa.Column("files_processed", sa.Integer(), nullable=False), 63 | sa.Column("files_matched", sa.Integer(), nullable=False), 64 | sa.Column("files_in_progress", sa.Integer(), nullable=False), 65 | sa.Column("total_files", sa.Integer(), nullable=False), 66 | sa.Column("files_errored", sa.Integer(), nullable=False), 67 | sa.Column("datasets_left", sa.Integer(), nullable=False), 68 | sa.Column("total_datasets", sa.Integer(), nullable=False), 69 | sa.Column("agents_left", sa.Integer(), nullable=False), 70 | sa.Column("internal_id", sa.Integer(), nullable=False), 71 | sa.PrimaryKeyConstraint("internal_id"), 72 | ) 73 | op.create_table( 74 | "match", 75 | sa.Column("meta", sa.JSON(), nullable=True), 76 | sa.Column("matches", sa.ARRAY(sa.String()), nullable=True), 77 | sa.Column("id", sa.Integer(), nullable=False), 78 | sa.Column("job_id", sa.Integer(), nullable=False), 79 | sa.Column("file", sqlmodel.sql.sqltypes.AutoString(), nullable=False), 80 | sa.ForeignKeyConstraint( 81 | ["job_id"], 82 | ["job.internal_id"], 83 | ), 84 | sa.PrimaryKeyConstraint("id"), 85 | ) 86 | 87 | 88 | def downgrade() -> None: 89 | op.drop_table("match") 90 | op.drop_table("job") 91 | op.drop_table("configentry") 92 | op.drop_table("agentgroup") 93 | -------------------------------------------------------------------------------- /src/migrations/versions/dbb81bd4d47f_add_jobagent.py: -------------------------------------------------------------------------------- 1 | """add jobagent 2 | Revision ID: dbb81bd4d47f 3 | Revises: cbbba858deb0 4 | Create Date: 2024-05-29 13:13:03.980030 5 | """ 6 | from alembic import op 7 | import sqlalchemy as sa 8 | 9 | 10 | # revision identifiers, used by Alembic. 11 | revision = "dbb81bd4d47f" 12 | down_revision = "cbbba858deb0" 13 | branch_labels = None 14 | depends_on = None 15 | 16 | 17 | def upgrade() -> None: 18 | # ### commands auto generated by Alembic - please adjust! ### 19 | op.create_table( 20 | "jobagent", 21 | sa.Column("id", sa.Integer(), nullable=False), 22 | sa.Column("task_in_progress", sa.Integer(), nullable=False), 23 | sa.Column("job_id", sa.Integer(), nullable=False), 24 | sa.Column("agent_id", sa.Integer(), nullable=False), 25 | sa.ForeignKeyConstraint( 26 | ["agent_id"], 27 | ["agentgroup.id"], 28 | ), 29 | sa.ForeignKeyConstraint( 30 | ["job_id"], 31 | ["job.internal_id"], 32 | ), 33 | sa.PrimaryKeyConstraint("id"), 34 | ) 35 | # ### end Alembic commands ### 36 | 37 | 38 | def downgrade() -> None: 39 | # ### commands auto generated by Alembic - please adjust! ### 40 | op.drop_table("jobagent") 41 | # ### end Alembic commands ### 42 | -------------------------------------------------------------------------------- /src/migrations/versions/f623e1057b00_added_context_column_into_match_table.py: -------------------------------------------------------------------------------- 1 | """Added context column into match table 2 | Revision ID: f623e1057b00 3 | Revises: 6b495d5a4855 4 | Create Date: 2024-11-13 15:14:14.618258 5 | """ 6 | from alembic import op 7 | import sqlalchemy as sa 8 | 9 | 10 | # revision identifiers, used by Alembic. 11 | revision = "f623e1057b00" 12 | down_revision = "702d19cfa063" 13 | branch_labels = None 14 | depends_on = None 15 | 16 | 17 | def upgrade() -> None: 18 | # ### commands auto generated by Alembic - please adjust! ### 19 | op.add_column("match", sa.Column("context", sa.JSON(), nullable=False)) 20 | # ### end Alembic commands ### 21 | 22 | 23 | def downgrade() -> None: 24 | # ### commands auto generated by Alembic - please adjust! ### 25 | op.drop_column("match", "context") 26 | # ### end Alembic commands ### 27 | -------------------------------------------------------------------------------- /src/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/src/models/__init__.py -------------------------------------------------------------------------------- /src/models/agentgroup.py: -------------------------------------------------------------------------------- 1 | from sqlmodel import SQLModel, Field, Column, ARRAY, String, JSON, Relationship 2 | from typing import Union, List, Dict 3 | from ..models.jobagent import JobAgent 4 | 5 | 6 | class AgentGroupView(SQLModel): 7 | name: str 8 | ursadb_url: str 9 | plugins_spec: Dict[str, Dict[str, str]] = Field(sa_column=Column(JSON)) 10 | active_plugins: List[str] = Field(sa_column=Column(ARRAY(String))) 11 | 12 | 13 | class AgentGroup(AgentGroupView, table=True): 14 | """Agent group is a group of processes working on a single 15 | file group, with a shared storage, and a single backing ursadb. 16 | """ 17 | 18 | id: Union[int, None] = Field(default=None, primary_key=True) 19 | jobs: List["JobAgent"] = Relationship(back_populates="agent") 20 | -------------------------------------------------------------------------------- /src/models/configentry.py: -------------------------------------------------------------------------------- 1 | from sqlmodel import Field, SQLModel 2 | from typing import Union 3 | 4 | 5 | class ConfigEntry(SQLModel, table=True): 6 | id: Union[int, None] = Field(default=None, primary_key=True) 7 | plugin: str 8 | key: str 9 | value: str 10 | -------------------------------------------------------------------------------- /src/models/job.py: -------------------------------------------------------------------------------- 1 | import enum 2 | 3 | from sqlalchemy.dialects import postgresql 4 | 5 | from sqlmodel import SQLModel, Field, ARRAY, String, Column, Relationship 6 | from typing import Optional, List, Union, TYPE_CHECKING 7 | 8 | if TYPE_CHECKING: 9 | from ..models.match import Match 10 | from ..models.jobagent import JobAgent 11 | 12 | 13 | class JobStatus(enum.Enum): 14 | done = "done" 15 | new = "new" 16 | cancelled = "cancelled" 17 | processing = "processing" 18 | 19 | 20 | class JobView(SQLModel): 21 | """Public fields of mquery jobs.""" 22 | 23 | __table_args__ = {"extend_existing": True} 24 | 25 | id: str 26 | status: JobStatus = Field(sa_column=Column(postgresql.ENUM(JobStatus, name="jobstatus"))) # type: ignore 27 | error: Optional[str] 28 | rule_name: str 29 | rule_author: str 30 | raw_yara: str 31 | submitted: int 32 | finished: Optional[int] 33 | files_limit: int 34 | reference: str 35 | files_processed: int 36 | files_matched: int 37 | files_in_progress: int 38 | total_files: int 39 | files_errored: int 40 | taints: List[str] = Field(sa_column=Column(ARRAY(String))) 41 | datasets_left: int 42 | total_datasets: int 43 | agents_left: int 44 | 45 | class Config: 46 | arbitrary_types_allowed = True 47 | 48 | 49 | class Job(JobView, table=True): 50 | """Job object in the database. Internal ID is an implementation detail.""" 51 | 52 | internal_id: Union[int, None] = Field(default=None, primary_key=True) 53 | 54 | matches: List["Match"] = Relationship(back_populates="job") 55 | agents: List["JobAgent"] = Relationship(back_populates="job") 56 | -------------------------------------------------------------------------------- /src/models/jobagent.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import ForeignKey 2 | from sqlmodel import SQLModel, Field, Relationship, Column 3 | from typing import Union, TYPE_CHECKING 4 | 5 | if TYPE_CHECKING: 6 | from ..models.match import Job 7 | from ..models.agentgroup import AgentGroup 8 | 9 | 10 | class JobAgent(SQLModel, table=True): 11 | """Information about job relating to a specific agent group.""" 12 | 13 | id: Union[int, None] = Field(default=None, primary_key=True) 14 | task_in_progress: int 15 | 16 | job_id: int = Field( 17 | sa_column=Column( 18 | ForeignKey("job.internal_id", ondelete="CASCADE"), nullable=False 19 | ), 20 | ) 21 | job: "Job" = Relationship(back_populates="agents") 22 | 23 | agent_id: int = Field(foreign_key="agentgroup.id") 24 | agent: "AgentGroup" = Relationship(back_populates="jobs") 25 | -------------------------------------------------------------------------------- /src/models/match.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import ForeignKey 2 | from sqlmodel import SQLModel, Field, ARRAY, String, Column, JSON, Relationship 3 | from typing import List, Union, Dict, Any 4 | 5 | from ..models.job import Job 6 | 7 | 8 | class Match(SQLModel, table=True): 9 | """Represents a file matched to a job, along with a related metadata.""" 10 | 11 | id: Union[int, None] = Field(default=None, primary_key=True) 12 | # A file path on one of the daemons 13 | file: str 14 | # A metadata dictionary - contains various tags added by plugins 15 | meta: Dict[str, Any] = Field(sa_column=Column(JSON)) 16 | # A list of yara rules matched to this file 17 | matches: List[str] = Field(sa_column=Column(ARRAY(String))) 18 | 19 | job_id: int = Field( 20 | sa_column=Column( 21 | ForeignKey("job.internal_id", ondelete="CASCADE"), nullable=False 22 | ) 23 | ) 24 | job: Job = Relationship(back_populates="matches") 25 | context: Dict[str, Dict[str, Dict[str, str]]] = Field( 26 | sa_column=Column(JSON, nullable=False) 27 | ) 28 | -------------------------------------------------------------------------------- /src/models/queuedfile.py: -------------------------------------------------------------------------------- 1 | from sqlmodel import SQLModel, Field, ARRAY, String, Column 2 | from typing import Union, List 3 | from datetime import datetime 4 | 5 | 6 | class QueuedFile(SQLModel, table=True): 7 | """Represents a file that is waiting to be indexed.""" 8 | 9 | id: Union[int, None] = Field(default=None, primary_key=True) 10 | 11 | # ID of the ursadb ("agent group") this file belongs to. 12 | ursadb_id: str 13 | 14 | # A file path that should be indexed. This path should be 15 | # valid on the Ursadb with ID `ursadb_id` (or there should be a plugin 16 | # that knows how to process this path to get a valid file). 17 | path: str 18 | 19 | # Time when this file was added. 20 | created_at: datetime = Field( 21 | default_factory=datetime.utcnow, 22 | ) 23 | 24 | # Desired index types for this file (valid values include ["gram3", 25 | # "text4", "hash4" and "wide8"], database enum feels like an overkill). 26 | index_types: List[str] = Field( 27 | sa_column=Column(ARRAY(String), nullable=False) 28 | ) 29 | 30 | # Desired tags for this file. Warning - overusing tags will have a big 31 | # negative impact on performance, it's best to keep to a few tags at most. 32 | tags: List[str] = Field(sa_column=Column(ARRAY(String), nullable=False)) 33 | -------------------------------------------------------------------------------- /src/mqueryfront/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/ignore-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | 6 | # testing 7 | /coverage 8 | 9 | # production 10 | /build 11 | 12 | # misc 13 | .DS_Store 14 | .env.local 15 | .env.development.local 16 | .env.test.local 17 | .env.production.local 18 | 19 | npm-debug.log* 20 | yarn-debug.log* 21 | yarn-error.log* 22 | -------------------------------------------------------------------------------- /src/mqueryfront/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 9 | 10 | 14 | 15 | 16 | mquery | CERT.PL>_ 17 | 18 | 19 | 22 |
23 | 24 | 34 | 35 | -------------------------------------------------------------------------------- /src/mqueryfront/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mqueryfront", 3 | "version": "0.1.0", 4 | "private": true, 5 | "dependencies": { 6 | "@babel/runtime": "^7.25.6", 7 | "@fortawesome/fontawesome-svg-core": "^6.6.0", 8 | "@fortawesome/free-solid-svg-icons": "^6.6.0", 9 | "@fortawesome/react-fontawesome": "^0.2.2", 10 | "@monaco-editor/react": "^4.6.0", 11 | "@popperjs/core": "^2.11.8", 12 | "@vitejs/plugin-react": "^4.3.1", 13 | "axios": "^1.7.7", 14 | "bootstrap": "^5.3.3", 15 | "filesize": "^10.1.6", 16 | "font-awesome": "^4.7.0", 17 | "http-proxy-middleware": "^3.0.2", 18 | "jquery": "^3.7.1", 19 | "monaco-editor": "^0.52.0", 20 | "path-browserify": "^1.0.1", 21 | "prettier": "^3.3.3", 22 | "prop-types": "^15.8.1", 23 | "react": "^18.3.1", 24 | "react-copy-to-clipboard": "^5.1.0", 25 | "react-dom": "^18.3.1", 26 | "react-draggable": "^4.4.6", 27 | "react-html-parser": "^2.0.2", 28 | "react-router-dom": "^6.26.2", 29 | "react-select": "^5.8.1", 30 | "replace-js-pagination": "^1.0.5", 31 | "vite": "4.5.5", 32 | "vite-plugin-svgr": "3.3.0", 33 | "webpack-dev-server": "^5.1.0" 34 | }, 35 | "devDependencies": {}, 36 | "scripts": { 37 | "start": "vite --host", 38 | "build": "vite build", 39 | "preview": "vite preview" 40 | }, 41 | "prettier": { 42 | "tabWidth": 4 43 | }, 44 | "browserslist": { 45 | "production": [ 46 | ">0.2%", 47 | "not dead", 48 | "not op_mini all" 49 | ], 50 | "development": [ 51 | "last 1 chrome version", 52 | "last 1 firefox version", 53 | "last 1 safari version" 54 | ] 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/mqueryfront/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/src/mqueryfront/public/favicon.ico -------------------------------------------------------------------------------- /src/mqueryfront/public/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "short_name": "React App", 3 | "name": "Create React App Sample", 4 | "icons": [ 5 | { 6 | "src": "favicon.ico", 7 | "sizes": "64x64 32x32 24x24 16x16", 8 | "type": "image/x-icon" 9 | } 10 | ], 11 | "start_url": "./index.html", 12 | "display": "standalone", 13 | "theme_color": "#000000", 14 | "background_color": "#ffffff" 15 | } 16 | -------------------------------------------------------------------------------- /src/mqueryfront/src/App.css: -------------------------------------------------------------------------------- 1 | .mquery-yara-input { 2 | height: calc(100vh - 150px) !important; 3 | font-family: monospace; 4 | } 5 | 6 | .mquery-scroll-matches { 7 | max-height: calc(100vh - 235px); 8 | overflow-y: scroll; 9 | } 10 | 11 | .mquery-scroll-matches td { 12 | word-break: break-all; 13 | font-family: monospace; 14 | } 15 | 16 | .mquery-scroll-matches td i { 17 | visibility: hidden; 18 | } 19 | 20 | .mquery-scroll-matches td:hover i { 21 | visibility: visible; 22 | } 23 | 24 | .mquery-scroll-matches td i:hover { 25 | color: var(--blue); 26 | } 27 | 28 | .copyable-item { 29 | cursor: pointer; 30 | } 31 | 32 | .is-collapsed { 33 | -webkit-transform: translateX(-100%); 34 | transform: translateX(-100%); 35 | position: absolute; 36 | } 37 | 38 | .accordion-toggle { 39 | cursor: pointer; 40 | position: relative; 41 | } 42 | 43 | .accordion-toggle::after { 44 | content: "\f107"; 45 | color: rgb(134, 134, 134); 46 | padding: 1rem; 47 | right: 0px; 48 | position: absolute; 49 | font-family: "FontAwesome"; 50 | } 51 | 52 | .accordion-toggle[aria-expanded="true"]::after { 53 | content: "\f106"; 54 | } 55 | 56 | .table-topology tbody tr:nth-child(4n + 1) { 57 | background: rgba(0, 0, 0, 0.05); 58 | } 59 | 60 | .table-topology tbody tr:nth-child(2n) { 61 | background: rgba(0, 0, 0, 0.02); 62 | } 63 | 64 | .glyphMargin { 65 | background: rgb(255, 134, 134); 66 | width: 5px !important; 67 | } 68 | 69 | .contentError { 70 | background: #ffc7c7; 71 | } 72 | 73 | .monaco-container { 74 | padding: 5px; 75 | box-sizing: border-box; 76 | -moz-box-sizing: border-box; 77 | -webkit-box-sizing: border-box; 78 | border: 2px solid rgb(238, 238, 238); 79 | } 80 | 81 | .dropdown i:hover, 82 | .dropdown.show i { 83 | cursor: pointer; 84 | color: var(--blue) !important; 85 | } 86 | 87 | .cursor-pointer { 88 | cursor: pointer; 89 | } 90 | 91 | .modal-container { 92 | position: absolute; 93 | offset-distance: 10px; 94 | z-index: auto; 95 | right: 5vw; 96 | } 97 | 98 | .modal-container-index-page { 99 | position: fixed; 100 | top: 50%; 101 | left: 50%; 102 | transform: translate(-50%, -50%); 103 | } 104 | 105 | .modal-block { 106 | position: relative; 107 | block-size: "fit-content"; 108 | right: 5vw; 109 | } 110 | 111 | .modal-dialog { 112 | margin: 0; 113 | } 114 | 115 | .modal-header:hover { 116 | cursor: grab; 117 | } 118 | 119 | .modal-table { 120 | overflow-y: scroll; 121 | max-height: 50vh; 122 | } 123 | 124 | .index-form-wrapper { 125 | display: grid; 126 | grid-auto-flow: row; 127 | grid-row-gap: 10px; 128 | } 129 | 130 | .index-links-wrapper { 131 | display: grid; 132 | grid-auto-flow: row; 133 | grid-row-gap: 3px; 134 | } 135 | 136 | .index-navlink { 137 | font-size: large; 138 | display: grid; 139 | grid-auto-flow: row; 140 | grid-row-gap: 10px; 141 | cursor: pointer; 142 | width: fit-content; 143 | } 144 | -------------------------------------------------------------------------------- /src/mqueryfront/src/App.js: -------------------------------------------------------------------------------- 1 | import React, { useState, useEffect } from "react"; 2 | import { Routes, Route } from "react-router-dom"; 3 | import Navigation from "./Navigation"; 4 | import QueryPage from "./query/QueryPage"; 5 | import RecentPage from "./recent/RecentPage"; 6 | import StatusPage from "./status/StatusPage"; 7 | import ConfigPage from "./config/ConfigPage"; 8 | import AboutPage from "./about/AboutPage"; 9 | import AuthPage from "./auth/AuthPage"; 10 | import api, { parseJWT } from "./api"; 11 | import "./App.css"; 12 | import IndexPage from "./indexFiles/IndexPage"; 13 | 14 | function getCurrentTokenOrNull() { 15 | // This function handles missing and corrupted token in the same way. 16 | try { 17 | return parseJWT(localStorage.getItem("rawToken")); 18 | } catch { 19 | return null; 20 | } 21 | } 22 | 23 | function App() { 24 | const [config, setConfig] = useState(null); 25 | 26 | useEffect(() => { 27 | api.get("/server").then((response) => { 28 | setConfig(response.data); 29 | }); 30 | }, []); 31 | 32 | const login = (rawToken) => { 33 | localStorage.setItem("rawToken", rawToken); 34 | window.location.href = "/"; 35 | }; 36 | 37 | const logout = () => { 38 | localStorage.removeItem("rawToken"); 39 | if (config !== null) { 40 | const logout_url = new URL(config["openid_url"] + "/logout"); 41 | logout_url.searchParams.append( 42 | "redirect_uri", 43 | window.location.origin 44 | ); 45 | window.location.href = logout_url; 46 | } else { 47 | // Shouldn't happen, but reload just in case. 48 | window.location.href = "/"; 49 | } 50 | }; 51 | 52 | const token = getCurrentTokenOrNull(); 53 | 54 | return ( 55 |
56 | 57 | 58 | } /> 59 | } /> 60 | } /> 61 | } /> 62 | } /> 63 | } 67 | /> 68 | } 72 | /> 73 | } /> 74 | 75 |
76 | ); 77 | } 78 | 79 | export default App; 80 | -------------------------------------------------------------------------------- /src/mqueryfront/src/App.test.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import ReactDOM from "react-dom"; 3 | import App from "./App"; 4 | 5 | it("renders without crashing", () => { 6 | const div = document.createElement("div"); 7 | ReactDOM.render(, div); 8 | ReactDOM.unmountComponentAtNode(div); 9 | }); 10 | -------------------------------------------------------------------------------- /src/mqueryfront/src/about/AboutPage.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | 3 | const AboutPage = (props) => { 4 | const aboutHtml = props.config ? props.config.about : null; 5 | return ( 6 |
7 |
8 |
9 |

About this instance

10 |

14 |

15 |
16 |
17 | ); 18 | }; 19 | 20 | export default AboutPage; 21 | -------------------------------------------------------------------------------- /src/mqueryfront/src/api.js: -------------------------------------------------------------------------------- 1 | import axios from "axios"; 2 | 3 | export const api_url = "/api"; 4 | 5 | export function parseJWT(token) { 6 | const base64Url = token.split(".")[1]; 7 | const base64 = base64Url.replace("-", "+").replace("_", "/"); 8 | return JSON.parse(atob(base64)); 9 | } 10 | 11 | function request(method, path, payload, params) { 12 | const rawToken = localStorage.getItem("rawToken"); 13 | const headers = rawToken ? { Authorization: `Bearer ${rawToken}` } : {}; 14 | return axios 15 | .request(path, { 16 | method: method, 17 | data: payload, 18 | params: params, 19 | headers: headers, 20 | }) 21 | .catch((error) => { 22 | if (error.response.status === 401) { 23 | window.location = "/auth"; 24 | } 25 | throw error; 26 | }); 27 | } 28 | 29 | function post(path, payload) { 30 | return request("post", `${api_url}${path}`, payload); 31 | } 32 | 33 | function get(path, params) { 34 | return request("get", `${api_url}${path}`, {}, params); 35 | } 36 | 37 | function delete_(path) { 38 | return request("delete", `${api_url}${path}`, {}); 39 | } 40 | 41 | export default { 42 | post: post, 43 | get: get, 44 | delete: delete_, 45 | }; 46 | -------------------------------------------------------------------------------- /src/mqueryfront/src/auth/AuthPage.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from "react"; 2 | import ErrorBoundary from "../components/ErrorBoundary"; 3 | import axios from "axios"; 4 | import { isAuthEnabled, openidLoginUrl } from "../utils"; 5 | 6 | class AuthPage extends Component { 7 | constructor(props) { 8 | super(props); 9 | 10 | this.state = { 11 | error: null, 12 | }; 13 | } 14 | 15 | componentDidUpdate(prevProps) { 16 | if (this.props.config === prevProps.config || !this.props.config) { 17 | return; 18 | } 19 | let authEnabled = isAuthEnabled(this.props.config); 20 | if (!authEnabled) { 21 | this.setState({ error: "OIDC not configured" }); 22 | return; 23 | } 24 | const queryString = window.location.search; 25 | const urlParams = new URLSearchParams(queryString); 26 | const code = urlParams.get("code"); 27 | 28 | if (code === null) { 29 | window.location = openidLoginUrl(this.props.config); 30 | } 31 | 32 | const params = new URLSearchParams(); 33 | params.append("grant_type", "authorization_code"); 34 | params.append("code", code); 35 | params.append("client_id", this.props.config["openid_client_id"]); 36 | params.append("redirect_uri", window.location.origin + "/auth"); 37 | axios 38 | .post(this.props.config["openid_url"] + "/token", params) 39 | .then((response) => { 40 | this.props.login(response.data["access_token"]); 41 | }) 42 | .catch((error) => { 43 | this.setState({ error: error }); 44 | }); 45 | } 46 | 47 | render() { 48 | const message = this.state.username 49 | ? `Logged in as ${JSON.stringify(this.state.username)}` 50 | : "Logging in..."; 51 | return ( 52 | 53 |
54 |

{message}

55 |
56 |
57 | ); 58 | } 59 | } 60 | 61 | export default AuthPage; 62 | -------------------------------------------------------------------------------- /src/mqueryfront/src/components/ActionCancel.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import { FontAwesomeIcon } from "@fortawesome/react-fontawesome"; 3 | import { faMinusCircle } from "@fortawesome/free-solid-svg-icons"; 4 | 5 | const ActionCancel = (props) => ( 6 | 15 | ); 16 | 17 | export default ActionCancel; 18 | -------------------------------------------------------------------------------- /src/mqueryfront/src/components/ActionCopyToClipboard.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import { FontAwesomeIcon } from "@fortawesome/react-fontawesome"; 3 | import { faCopy } from "@fortawesome/free-solid-svg-icons"; 4 | import { CopyToClipboard } from "react-copy-to-clipboard"; 5 | 6 | const ActionCopyToClipboard = (props) => ( 7 | 8 | 9 | 10 | 11 | 12 | ); 13 | 14 | export default ActionCopyToClipboard; 15 | -------------------------------------------------------------------------------- /src/mqueryfront/src/components/ActionDownload.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import { FontAwesomeIcon } from "@fortawesome/react-fontawesome"; 3 | import { faFileDownload } from "@fortawesome/free-solid-svg-icons"; 4 | 5 | const ActionDownload = (props) => ( 6 | 12 | 13 | 14 | ); 15 | 16 | export default ActionDownload; 17 | -------------------------------------------------------------------------------- /src/mqueryfront/src/components/ActionRemove.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import { FontAwesomeIcon } from "@fortawesome/react-fontawesome"; 3 | import { faTrashAlt } from "@fortawesome/free-solid-svg-icons"; 4 | 5 | const ActionRemove = (props) => ( 6 | 15 | ); 16 | 17 | export default ActionRemove; 18 | -------------------------------------------------------------------------------- /src/mqueryfront/src/components/ErrorBoundary.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from "react"; 2 | 3 | class ErrorBoundary extends Component { 4 | constructor(props) { 5 | super(props); 6 | 7 | let error = props.error ? props.error : null; 8 | this.state = { error: error }; 9 | } 10 | 11 | componentDidUpdate(prevProps, prevState, snapshot) { 12 | if (this.props.error !== this.state.error) { 13 | this.setState({ error: this.props.error }); 14 | } 15 | } 16 | 17 | componentDidCatch(error, info) { 18 | this.setState({ error: error }); 19 | } 20 | 21 | render() { 22 | if (this.state.error) { 23 | return ( 24 |
25 |
26 | {this.state.error.toString()} 27 |
28 |
29 | ); 30 | } 31 | 32 | return this.props.children; 33 | } 34 | } 35 | 36 | export default ErrorBoundary; 37 | -------------------------------------------------------------------------------- /src/mqueryfront/src/components/ErrorPage.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | 3 | const ErrorPage = (props) => ( 4 |
5 |

Error occurred

6 | {props.error} 7 |
8 | ); 9 | 10 | export default ErrorPage; 11 | -------------------------------------------------------------------------------- /src/mqueryfront/src/components/FilterIcon.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import { FontAwesomeIcon } from "@fortawesome/react-fontawesome"; 3 | import { faFilter } from "@fortawesome/free-solid-svg-icons"; 4 | 5 | const FilterIcon = (props) => ( 6 | 7 | 8 | 9 | ); 10 | 11 | export default FilterIcon; 12 | -------------------------------------------------------------------------------- /src/mqueryfront/src/components/FilteringTableHeader.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import FilteringThead from "./FilteringThead"; 3 | 4 | const FilteringTableHeader = (props) => { 5 | const head = props.head.map((el, index) => { 6 | const filterData = { 7 | attributeName: el.attributeName, 8 | valueList: el.valueList, 9 | onClick: props.onClick, 10 | }; 11 | 12 | return ( 13 | 19 | ); 20 | }); 21 | 22 | return ( 23 | 24 | {head} 25 | 26 | ); 27 | }; 28 | 29 | export default FilteringTableHeader; 30 | -------------------------------------------------------------------------------- /src/mqueryfront/src/components/FilteringThead.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import FilterIcon from "./FilterIcon"; 3 | 4 | const FilteringThead = (props) => { 5 | let activeColumn = false; 6 | let icon = null; 7 | 8 | if ( 9 | props.currentFilter && 10 | props.currentFilter.name === props.filterData.attributeName 11 | ) { 12 | activeColumn = true; 13 | icon = ( 14 | 15 | 16 | 17 | ); 18 | } 19 | 20 | let thContent; 21 | if (props.filterData && props.filterData.valueList) { 22 | const list = props.filterData.valueList.map((el, index) => { 23 | let activeItem = activeColumn && props.currentFilter.value === el; 24 | const itemStyle = "font-weight-" + (activeItem ? "bold" : "normal"); 25 | 26 | return ( 27 | 40 | ); 41 | }); 42 | 43 | thContent = ( 44 |
45 | 53 |
{list}
54 |
55 | ); 56 | } else thContent = props.title; 57 | 58 | return ( 59 | {thContent} 60 | ); 61 | }; 62 | 63 | export default FilteringThead; 64 | -------------------------------------------------------------------------------- /src/mqueryfront/src/components/FilteringTitle.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import FilterIcon from "./FilterIcon"; 3 | 4 | const FilteringTitle = (props) => { 5 | const { title, filterValue } = props; 6 | 7 | const filter = filterValue && ; 8 | return ( 9 |
10 |
11 |

{title}

12 |
13 | {filter} 14 | {filterValue} 15 |
16 |
17 | ); 18 | }; 19 | 20 | export default FilteringTitle; 21 | -------------------------------------------------------------------------------- /src/mqueryfront/src/components/LoadingPage.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import { FontAwesomeIcon } from "@fortawesome/react-fontawesome"; 3 | import { faSpinner } from "@fortawesome/free-solid-svg-icons"; 4 | 5 | const LoadingPage = () => ( 6 |

7 | 8 | Loading... 9 |

10 | ); 11 | 12 | export default LoadingPage; 13 | -------------------------------------------------------------------------------- /src/mqueryfront/src/components/QueryTimer.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from "react"; 2 | 3 | class QueryTimer extends Component { 4 | constructor(props) { 5 | super(props); 6 | this.state = { currentTime: 0 }; 7 | } 8 | 9 | tick() { 10 | this.setState({ 11 | currentTime: Math.floor(Date.now() / 1000), 12 | }); 13 | } 14 | 15 | componentDidMount() { 16 | this.interval = setInterval(() => this.tick(), 1000); 17 | } 18 | 19 | componentWillUnmount() { 20 | clearInterval(this.interval); 21 | } 22 | 23 | getRenderTime(seconds) { 24 | let minutes; 25 | if (seconds >= 60) { 26 | minutes = Math.floor(seconds / 60); 27 | seconds = seconds % 60; 28 | } 29 | 30 | return minutes ? ( 31 | 32 | {minutes}m {seconds}s 33 | 34 | ) : ( 35 | seconds >= 0 && {seconds}s 36 | ); 37 | } 38 | 39 | render() { 40 | if (!this.props.job.submitted) { 41 | return null; 42 | } 43 | 44 | if (this.props.isFinished) { 45 | const duration = this.props.job.finished - this.props.job.submitted; 46 | return Duration: {this.getRenderTime(duration)}; 47 | } 48 | 49 | let durationSec; 50 | if (this.props.duration) { 51 | durationSec = this.state.currentTime - this.props.job.submitted; 52 | } 53 | 54 | let countDownSec; 55 | if (this.props.job.files_processed > 0 && this.props.countDown) { 56 | let processedFiles = 57 | this.props.job.total_files / this.props.job.files_processed; 58 | let processedTime = 59 | this.state.currentTime - this.props.job.submitted; 60 | countDownSec = Math.round( 61 | processedFiles * processedTime - processedTime 62 | ); 63 | } 64 | 65 | if (this.props.duration && this.props.countDown) { 66 | return ( 67 | 68 | {this.getRenderTime(durationSec)} (~ 69 | {this.getRenderTime(countDownSec)} left) 70 | 71 | ); 72 | } else if (this.props.duration) { 73 | return {this.getRenderTime(durationSec)}; 74 | } else if (this.props.countDown) { 75 | return {this.getRenderTime(countDownSec)}; 76 | } 77 | } 78 | } 79 | export default QueryTimer; 80 | -------------------------------------------------------------------------------- /src/mqueryfront/src/components/WarningPage.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | 3 | const WarningPage = (props) => ( 4 |
5 |

Warning

6 | {props.msg} 7 | {props.dismissable && ( 8 |
16 | ); 17 | 18 | export default WarningPage; 19 | -------------------------------------------------------------------------------- /src/mqueryfront/src/config/ConfigPage.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from "react"; 2 | import ErrorBoundary from "../components/ErrorBoundary"; 3 | import ConfigEntryList from "./ConfigEntries"; 4 | import api from "../api"; 5 | 6 | class ConfigPage extends Component { 7 | constructor(props) { 8 | super(props); 9 | 10 | this.state = { 11 | config: [], 12 | error: null, 13 | }; 14 | } 15 | 16 | componentDidMount() { 17 | api.get("/config") 18 | .then((response) => { 19 | this.setState({ config: response.data }); 20 | }) 21 | .catch((error) => { 22 | this.setState({ error: error }); 23 | }); 24 | } 25 | 26 | render() { 27 | return ( 28 | 29 |
30 |

Config

31 | 32 |
33 |
34 | ); 35 | } 36 | } 37 | 38 | export default ConfigPage; 39 | -------------------------------------------------------------------------------- /src/mqueryfront/src/index.css: -------------------------------------------------------------------------------- 1 | body { 2 | margin: 0; 3 | padding: 0; 4 | font-family: sans-serif; 5 | } 6 | -------------------------------------------------------------------------------- /src/mqueryfront/src/index.js: -------------------------------------------------------------------------------- 1 | import "bootstrap/dist/css/bootstrap.min.css"; 2 | import "font-awesome/css/font-awesome.css"; 3 | import "bootstrap/dist/js/bootstrap.bundle.min"; 4 | import React from "react"; 5 | import ReactDOM from "react-dom"; 6 | import { BrowserRouter } from "react-router-dom"; 7 | import "./index.css"; 8 | import App from "./App"; 9 | 10 | ReactDOM.render( 11 | 12 | 13 | , 14 | document.getElementById("root") 15 | ); 16 | -------------------------------------------------------------------------------- /src/mqueryfront/src/indexFiles/IndexClearQueueButton.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from "react"; 2 | import api from "../api"; 3 | 4 | class IndexClearQueueButton extends Component { 5 | render() { 6 | return ( 7 | 11 | 17 | 18 | ); 19 | } 20 | } 21 | 22 | export default IndexClearQueueButton; 23 | -------------------------------------------------------------------------------- /src/mqueryfront/src/indexFiles/IndexClearedPage.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | 3 | const IndexClearedPage = (props) => ( 4 |
5 |

Cleared!

6 | {props.msg} 7 |
15 | ); 16 | 17 | export default IndexClearedPage; 18 | -------------------------------------------------------------------------------- /src/mqueryfront/src/indexFiles/IndexMultiSelect.js: -------------------------------------------------------------------------------- 1 | import { Component } from "react"; 2 | import Select from "react-select"; 3 | 4 | class IndexMultiselect extends Component { 5 | get optionsList() { 6 | return this.props.options.map((obj) => ({ 7 | label: obj, 8 | value: obj, 9 | })); 10 | } 11 | 12 | render() { 13 | return ( 14 | 28 | ); 29 | }; 30 | 31 | export default QuerySearchNav; 32 | -------------------------------------------------------------------------------- /src/mqueryfront/src/query/QuerySubmitNav.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | 3 | const QuerySubmitNav = (props) => { 4 | const { onClick, forceMode } = props; 5 | 6 | const label = forceMode ? "Force query (may be very slow!)" : "Query"; 7 | const style = forceMode ? "btn-danger" : "btn-success"; 8 | 9 | return ( 10 | 17 | ); 18 | }; 19 | 20 | export default QuerySubmitNav; 21 | -------------------------------------------------------------------------------- /src/mqueryfront/src/recent/SearchJobItem.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import { Link } from "react-router-dom"; 3 | import ActionRemove from "../components/ActionRemove"; 4 | import ActionCancel from "../components/ActionCancel"; 5 | import QueryProgressBar from "../components/QueryProgressBar"; 6 | import { isStatusFinished } from "../utils"; 7 | 8 | export const SearchJobItemEmpty = () => { 9 | return ( 10 | 11 | 12 |
13 |   14 |
15 |

16 |   17 |

18 | 19 | 20 | 21 | 22 | 23 | ); 24 | }; 25 | 26 | const SearchJobItem = (props) => { 27 | const { job, onRemove, onCancel } = props; 28 | const { id, status, submitted, rule_name } = job; 29 | const rule_author = props.job.rule_author 30 | ? props.job.rule_author 31 | : "(no author)"; 32 | const isFinished = isStatusFinished(status); 33 | const submittedDate = new Date(submitted * 1000).toISOString(); 34 | const actionBtn = isFinished ? ( 35 | 36 | ) : ( 37 | 38 | ); 39 | 40 | return ( 41 | 42 | 43 |
44 |
48 | 52 | {rule_name} 53 | 54 |
55 |
56 |

57 | {submittedDate} 58 |

59 | 60 | {rule_author} 61 | 62 | 63 | 64 | {actionBtn} 65 | 66 | ); 67 | }; 68 | 69 | export default SearchJobItem; 70 | -------------------------------------------------------------------------------- /src/mqueryfront/src/recent/SearchJobs.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import FilteringTableHeader from "../components/FilteringTableHeader"; 3 | import FilteringTitle from "../components/FilteringTitle"; 4 | import Pagination from "replace-js-pagination"; 5 | import SearchJobItem, { SearchJobItemEmpty } from "./SearchJobItem"; 6 | 7 | const SearchJobs = (props) => { 8 | const { 9 | jobs, 10 | head, 11 | filter, 12 | onCancel, 13 | onRemove, 14 | onFilter, 15 | pagination, 16 | } = props; 17 | 18 | const filterValue = filter ? filter.value : null; 19 | 20 | const backendJobRows = jobs.map((job) => ( 21 | onRemove(job.id)} 25 | onCancel={() => onCancel(job.id)} 26 | /> 27 | )); 28 | 29 | // make table itemsCountPerPage size 30 | while (backendJobRows.length < pagination.itemsCountPerPage) { 31 | backendJobRows.push(); 32 | } 33 | 34 | return ( 35 |
36 |
37 | 38 | 39 | 44 | {backendJobRows} 45 |
46 |
47 | 56 |
57 |
58 |
59 | ); 60 | }; 61 | 62 | export default SearchJobs; 63 | -------------------------------------------------------------------------------- /src/mqueryfront/src/setupProxy.js: -------------------------------------------------------------------------------- 1 | const { createProxyMiddleware } = require("http-proxy-middleware"); 2 | 3 | module.exports = function (app) { 4 | app.use(createProxyMiddleware("/api", { target: "http://dev-web:5000/" })); 5 | app.use(createProxyMiddleware("/docs", { target: "http://dev-web:5000/" })); 6 | app.use( 7 | createProxyMiddleware("/openapi.json", { 8 | target: "http://dev-web:5000/", 9 | }) 10 | ); 11 | }; 12 | -------------------------------------------------------------------------------- /src/mqueryfront/src/status/BackendStatus.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from "react"; 2 | 3 | class BackendJobRow extends Component { 4 | render() { 5 | let shortRequest = this.props.request; 6 | if (shortRequest.length > 200) { 7 | let prefix = shortRequest.substring(0, 140); 8 | let suffix = shortRequest.substring(shortRequest.length - 60, 60); 9 | shortRequest = prefix + " (...) " + suffix; 10 | } 11 | return ( 12 | 13 | {this.props.id} 14 | {this.props.connection_id} 15 | 16 | {shortRequest} 17 | 18 | 19 | {this.props.work_done} / {this.props.work_estimated} 20 | 21 | 22 | ); 23 | } 24 | } 25 | 26 | class AgentStatus extends Component { 27 | render() { 28 | const backendJobRows = this.props.tasks.map((task) => ( 29 | 30 | )); 31 | 32 | let badge = null; 33 | if (!this.props.alive) { 34 | badge = ( 35 | offline 36 | ); 37 | } 38 | 39 | return ( 40 |
41 |

46 | Agent: {this.props.name} {badge} 47 |

48 |
49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | {backendJobRows} 59 |
IDConnectionRequestProgress
60 |
61 |
62 | ); 63 | } 64 | } 65 | 66 | class BackendStatus extends Component { 67 | render() { 68 | const agentRows = this.props.agents.map((agent) => ( 69 | 76 | )); 77 | 78 | return
{agentRows}
; 79 | } 80 | } 81 | 82 | export default BackendStatus; 83 | -------------------------------------------------------------------------------- /src/mqueryfront/src/status/StatusPage.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from "react"; 2 | import ErrorBoundary from "../components/ErrorBoundary"; 3 | import BackendStatus from "./BackendStatus"; 4 | import DatabaseTopology from "./DatabaseTopology"; 5 | import VersionStatus from "./VersionStatus"; 6 | import api from "../api"; 7 | import WarningPage from "../components/WarningPage"; 8 | import { Link } from "react-router-dom"; 9 | 10 | class StatusPage extends Component { 11 | constructor(props) { 12 | super(props); 13 | 14 | this.state = { 15 | backend: { 16 | agents: [], 17 | components: [], 18 | }, 19 | error: null, 20 | ursaIDs: [], 21 | }; 22 | } 23 | 24 | componentDidMount() { 25 | api.get("/backend") 26 | .then((response) => { 27 | this.setState({ 28 | backend: response.data, 29 | ursaIDs: response.data.agents.map( 30 | (agent) => agent.spec.name 31 | ), // TODO: collect from endpoint 32 | }); 33 | }) 34 | .catch((error) => { 35 | this.setState({ error: error }); 36 | }); 37 | this._ismounted = true; 38 | } 39 | 40 | getAgentsUrsaURLDuplicatesWarning(agentgroups) { 41 | var ursaURLS = agentgroups.map((agent) => agent.spec.ursadb_url); 42 | var duplicateURLS = ursaURLS.filter( 43 | (url, index) => ursaURLS.indexOf(url) !== index 44 | ); 45 | if (!duplicateURLS.length) { 46 | return null; 47 | } 48 | return `At least two agents share the same UrsaDB URL(s): \ 49 | ${duplicateURLS.join( 50 | ", " 51 | )}. Something might be wrong with backend configuration.`; 52 | } 53 | 54 | getNoAgentsWarning(agentgroups) { 55 | if (agentgroups.length) { 56 | return null; 57 | } 58 | return "There are no connected agents! Check your backend configuration."; 59 | } 60 | 61 | render() { 62 | const ursaURLWarning = this.getAgentsUrsaURLDuplicatesWarning( 63 | this.state.backend.agents 64 | ); 65 | const noAgentsWarning = this.getNoAgentsWarning( 66 | this.state.backend.agents 67 | ); 68 | return ( 69 | 70 |
71 | {this._ismounted && ursaURLWarning && ( 72 | 73 | )} 74 |

Status

75 |
76 |
77 | 80 | {this._ismounted && noAgentsWarning ? ( 81 | 82 | ) : ( 83 | 86 | )} 87 |
88 |
89 | 90 |
91 |
92 |
93 |
94 | ); 95 | } 96 | } 97 | 98 | export default StatusPage; 99 | -------------------------------------------------------------------------------- /src/mqueryfront/src/status/VersionStatus.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from "react"; 2 | import { Link } from "react-router-dom"; 3 | 4 | class VersionStatus extends Component { 5 | render() { 6 | function makeButton(component) { 7 | const match = component.match(/^ursadb \(([^)]+)\)$/); 8 | if (!match) { 9 | return component; 10 | } 11 | const ursadb_id = match[1]; 12 | return ( 13 | 14 | {component} 15 | 16 | ); 17 | } 18 | 19 | let rows = Object.keys(this.props.components).map((component) => ( 20 | 21 | {makeButton(component)} 22 | {this.props.components[component]} 23 | 24 | )); 25 | 26 | return ( 27 |
28 |

System Version

29 |
30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | {rows} 38 |
ComponentVersion
39 |
40 |
41 | ); 42 | } 43 | } 44 | 45 | export default VersionStatus; 46 | -------------------------------------------------------------------------------- /src/mqueryfront/src/utils.js: -------------------------------------------------------------------------------- 1 | export const isStatusFinished = (status) => 2 | ["done", "cancelled"].includes(status); 3 | 4 | const statusClassMap = { 5 | done: "success", 6 | new: "info", 7 | processing: "info", 8 | cancelled: "danger", 9 | }; 10 | 11 | export const isAuthEnabled = (config) => 12 | config && config["auth_enabled"] && config["auth_enabled"] !== "false"; 13 | 14 | export const openidLoginUrl = (config) => { 15 | if (config["openid_url"] === null || config["openid_client_id"] === null) { 16 | // Defensive programming - config keys can be null. 17 | return "#"; 18 | } 19 | const login_url = new URL(config["openid_url"] + "/auth"); 20 | login_url.searchParams.append("client_id", config["openid_client_id"]); 21 | login_url.searchParams.append("response_type", "code"); 22 | login_url.searchParams.append( 23 | "redirect_uri", 24 | window.location.origin + "/auth" 25 | ); 26 | return login_url; 27 | }; 28 | -------------------------------------------------------------------------------- /src/mqueryfront/vite.config.js: -------------------------------------------------------------------------------- 1 | import { defineConfig } from "vite"; 2 | import react from "@vitejs/plugin-react"; 3 | import svgr from "vite-plugin-svgr"; 4 | const fs = require("fs").promises; 5 | 6 | export default defineConfig({ 7 | base: "/", 8 | plugins: [react(), svgr({ svgrOptions: {} })], 9 | server: { 10 | port: 3000, 11 | proxy: { 12 | "/api": { target: "http://dev-web:5000/" }, 13 | "/docs": { target: "http://dev-web:5000/" }, 14 | "/openapi.json": { target: "http://dev-web:5000/" }, 15 | }, 16 | }, 17 | esbuild: { 18 | loader: "jsx", 19 | include: /src\/.*\.jsx?$/, 20 | exclude: [], 21 | }, 22 | optimizeDeps: { 23 | esbuildOptions: { 24 | loader: { 25 | ".js": "jsx", 26 | }, 27 | plugins: [ 28 | { 29 | name: "load-js-files-as-jsx", 30 | setup(build) { 31 | build.onLoad( 32 | { filter: /src\/.*\.js$/ }, 33 | async (args) => { 34 | return { 35 | loader: "jsx", 36 | contents: await fs.readFile( 37 | args.path, 38 | "utf8" 39 | ), 40 | }; 41 | } 42 | ); 43 | }, 44 | }, 45 | ], 46 | }, 47 | }, 48 | }); 49 | -------------------------------------------------------------------------------- /src/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import List, Type, Optional 2 | from importlib import import_module 3 | import logging 4 | 5 | from ..metadata import MetadataPlugin 6 | from ..db import Database 7 | 8 | 9 | def parse_plugin_list(plugins: str) -> List[str]: 10 | """Parses and validates a plugin list into a list of non-empty components 11 | divided by `,`. 12 | 13 | >>> parse_plugin_list("plugins.Test:A, plugins.Other:A") 14 | ["plugins.Test:A", "plugins.Other:A"] 15 | 16 | >>> parse_plugin_list("") 17 | [] 18 | 19 | :param plugins: String with a list of comma separated plugins 20 | :return: List of plugins, with no unnecessary spaces. 21 | """ 22 | result = [] 23 | for desc in plugins.split(","): 24 | desc = desc.strip() 25 | if not desc: 26 | continue 27 | assert ":" in desc 28 | result.append(desc) 29 | return result 30 | 31 | 32 | def load_plugins(specs: List[str]) -> List[Type[MetadataPlugin]]: 33 | result = [] 34 | for spec in specs: 35 | module, classname = spec.split(":") 36 | moduleobj = import_module(module) 37 | result.append(getattr(moduleobj, classname)) 38 | return result 39 | 40 | 41 | class PluginManager: 42 | def __init__(self, spec: str, db: Database) -> None: 43 | self.plugin_classes = load_plugins(parse_plugin_list(spec)) 44 | 45 | active_plugins = [] 46 | for plugin_class in self.plugin_classes: 47 | plugin_name = plugin_class.get_name() 48 | plugin_config = db.get_plugin_config(plugin_name) 49 | try: 50 | active_plugins.append(plugin_class(db, plugin_config)) 51 | logging.info("Loaded plugin %s", plugin_name) 52 | except Exception: 53 | logging.exception("Failed to load %s plugin", plugin_name) 54 | self.active_plugins = active_plugins 55 | 56 | def filter(self, orig_name: str) -> Optional[str]: 57 | """Runs all available filter plugins on the provided file. 58 | Returns new file path, or None. User should call cleanup() later. 59 | """ 60 | current_path = orig_name 61 | for plugin in self.active_plugins: 62 | if not plugin.is_filter: 63 | continue 64 | 65 | new_path = plugin.filter(orig_name, current_path) 66 | if not new_path: 67 | return None 68 | 69 | current_path = new_path 70 | 71 | return current_path 72 | 73 | def cleanup(self) -> None: 74 | """Clean up all plugin state. Worth stressing that plugins are *not* thread 75 | safe, and running filter() and cleanup() from different threads will cause 76 | problems. Running a plugin multiple times before the cleanup should be ok. 77 | """ 78 | for plugin in self.active_plugins: 79 | plugin.cleanup() 80 | -------------------------------------------------------------------------------- /src/plugins/archive.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List, IO 2 | import gzip 3 | import shutil 4 | import tempfile 5 | 6 | from ..db import Database 7 | from ..metadata import MetadataPlugin, MetadataPluginConfig 8 | 9 | 10 | class GzipPlugin(MetadataPlugin): 11 | """Can be used to automatically extract gzip contents before running 12 | Yara on them. This plugin will look for all files that end with .gz, 13 | and add extract them to disk before further processing. 14 | """ 15 | 16 | is_filter = True 17 | 18 | def __init__(self, db: Database, config: MetadataPluginConfig) -> None: 19 | super().__init__(db, config) 20 | self.tmpfiles: List[IO[bytes]] = [] 21 | 22 | def filter(self, orig_name: str, file_path: str) -> Optional[str]: 23 | if orig_name.endswith(".gz"): 24 | tmp = tempfile.NamedTemporaryFile() 25 | self.tmpfiles.append(tmp) 26 | with gzip.open(file_path, "rb") as f_in: 27 | with open(tmp.name, "wb") as f_out: 28 | shutil.copyfileobj(f_in, f_out) 29 | return tmp.name 30 | 31 | return file_path 32 | 33 | def clean(self): 34 | for tmp in self.tmpfiles: 35 | tmp.close() 36 | self.tmpfiles = [] 37 | -------------------------------------------------------------------------------- /src/plugins/blacklist.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Optional 3 | 4 | from ..metadata import MetadataPlugin, MetadataPluginConfig 5 | from ..db import Database 6 | 7 | 8 | class RegexBlacklistPlugin(MetadataPlugin): 9 | """Can be used to ignore files with filenames matching a certain 10 | pattern. For example, to ignore all pcap files, set blacklist_pattern 11 | to "[.]pcap$". 12 | """ 13 | 14 | is_filter = True 15 | config_fields = { 16 | "blacklist_pattern": "Regular expression for files that should be ignored", 17 | } 18 | 19 | def __init__(self, db: Database, config: MetadataPluginConfig) -> None: 20 | super().__init__(db, config) 21 | self.blacklist_pattern = config["blacklist_pattern"] 22 | 23 | def filter(self, orig_name: str, file_path: str) -> Optional[str]: 24 | if re.search(self.blacklist_pattern, orig_name): 25 | return None 26 | return file_path 27 | -------------------------------------------------------------------------------- /src/plugins/cuckoo_analysis.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from typing import Optional 4 | 5 | from ..db import Database 6 | from ..metadata import Metadata, MetadataPlugin, MetadataPluginConfig 7 | 8 | 9 | class CuckooAnalysisMetadata(MetadataPlugin): 10 | cacheable = True 11 | is_extractor = True 12 | config_fields = {"path": "Root of cuckoo analysis directory."} 13 | 14 | def __init__(self, db: Database, config: MetadataPluginConfig) -> None: 15 | super().__init__(db, config) 16 | self.path = config["path"] 17 | 18 | def identify(self, matched_fname: str) -> Optional[str]: 19 | m = re.search(r"analyses/([0-9]+)/", matched_fname) 20 | if not m: 21 | return None 22 | return m.group(1) 23 | 24 | def extract( 25 | self, identifier: str, matched_fname: str, current_meta: Metadata 26 | ) -> Metadata: 27 | try: 28 | target = os.readlink(self.path + "{}/binary".format(identifier)) 29 | except OSError: 30 | return {} 31 | 32 | binary_hash = target.split("/")[-1] 33 | 34 | obj = { 35 | "cuckoo_hash": {"value": binary_hash}, 36 | "cuckoo_analysis": { 37 | "display_text": "cuckoo:{}".format(identifier), 38 | "value": identifier, 39 | }, 40 | } 41 | return obj 42 | -------------------------------------------------------------------------------- /src/plugins/cuckoo_binaries.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Optional 3 | 4 | from ..metadata import Metadata, MetadataPlugin 5 | 6 | 7 | class CuckooBinariesMetadata(MetadataPlugin): 8 | is_extractor = True 9 | 10 | def identify(self, matched_fname: str) -> Optional[str]: 11 | m = re.search(r"/binaries/([a-f0-9]+)$", matched_fname) 12 | if not m: 13 | return None 14 | return m.group(1) 15 | 16 | def extract( 17 | self, identifier: str, matched_fname: str, current_meta: Metadata 18 | ) -> Metadata: 19 | return {"cuckoo_hash": {"value": identifier}} 20 | -------------------------------------------------------------------------------- /src/plugins/example_plugin.py: -------------------------------------------------------------------------------- 1 | from ..db import Database 2 | from ..metadata import Metadata, MetadataPlugin, MetadataPluginConfig 3 | 4 | 5 | class ExampleTagPlugin(MetadataPlugin): 6 | """This plugin is a minimal (almost) example of extractor plugin. 7 | It will tag every processed file with configured tag and URL. 8 | """ 9 | 10 | cacheable = True 11 | is_extractor = True 12 | config_fields = { 13 | "tag": "Everything will be tagged using that tag", 14 | "tag_url": "Tag URL e.g. http://google.com?q={tag}", 15 | } 16 | 17 | def __init__(self, db: Database, config: MetadataPluginConfig) -> None: 18 | super().__init__(db, config) 19 | self.tag = config["tag"] 20 | self.tag_url = config["tag_url"] 21 | 22 | def extract( 23 | self, identifier: str, matched_fname: str, current_meta: Metadata 24 | ) -> Metadata: 25 | return {"example_tag": {"display_text": self.tag, "url": self.tag_url}} 26 | -------------------------------------------------------------------------------- /src/plugins/example_typed_config_plugin.py: -------------------------------------------------------------------------------- 1 | """Plugin that serves as an example how to use existing typed-config 2 | machinery to configure your own plugins. 3 | """ 4 | 5 | from typedconfig import Config, key, section 6 | 7 | from ..db import Database 8 | from ..metadata import Metadata, MetadataPlugin, MetadataPluginConfig 9 | from ..config import app_config 10 | 11 | 12 | @section("plugin.example") 13 | class ExamplePluginConfig(Config): 14 | """Plugin configuration.""" 15 | 16 | tag = key(cast=str) 17 | tag_url = key(cast=str) 18 | 19 | 20 | # You will need to add this to your config file (or use env vars): 21 | # 22 | # [plugin.example] 23 | # tag=kot 24 | # tag_url=http://google.com 25 | 26 | 27 | class ExamplePluginWithTypedConfig(MetadataPlugin): 28 | """This plugin serves as an example how to use typed-config and 29 | mquery config file to configure your own plugins. It's equivalent 30 | to ExamplePlugin in all except the configuration method. 31 | """ 32 | 33 | is_extractor = True 34 | 35 | def __init__(self, db: Database, config: MetadataPluginConfig) -> None: 36 | super().__init__(db, config) 37 | my_config = ExamplePluginConfig(provider=app_config.provider) 38 | self.tag = my_config.tag 39 | self.tag_url = my_config.tag_url 40 | 41 | def extract( 42 | self, identifier: str, matched_fname: str, current_meta: Metadata 43 | ) -> Metadata: 44 | return {"example_tag": {"display_text": self.tag, "url": self.tag_url}} 45 | -------------------------------------------------------------------------------- /src/plugins/mwdb_uploads.py: -------------------------------------------------------------------------------- 1 | import re 2 | import urllib.parse 3 | from typing import Optional 4 | from mwdblib import Malwarecage # type: ignore 5 | 6 | from ..db import Database 7 | from ..metadata import Metadata, MetadataPlugin, MetadataPluginConfig 8 | 9 | 10 | class MalwarecageUploadsMetadata(MetadataPlugin): 11 | cacheable = False 12 | is_extractor = True 13 | config_fields = { 14 | "mwdb_url": "URL to the Malwarecage instance (e.g. https://mwdb.cert.pl/)", 15 | "mwdb_api_url": "API URL to the Malwarecage instance (e.g. https://mwdb.cert.pl/api/)", 16 | "mwdb_api_token": "API key for 'mquery' user in Malwarecage (base64-encoded, starts with ey...)", 17 | } 18 | 19 | def __init__(self, db: Database, config: MetadataPluginConfig) -> None: 20 | super().__init__(db, config) 21 | self.mwdb = Malwarecage( 22 | api_url=config["mwdb_api_url"], api_key=config["mwdb_api_token"] 23 | ) 24 | self.mwdb_url = config["mwdb_url"] 25 | 26 | def identify(self, matched_fname: str) -> Optional[str]: 27 | m = re.search( 28 | r"/([a-f0-9])/([a-f0-9])/([a-f0-9])/([a-f0-9])/(\1\2\3\4[a-f0-9]+)$", 29 | matched_fname, 30 | ) 31 | if not m: 32 | return None 33 | return m.group(5) 34 | 35 | def extract( 36 | self, identifier: str, matched_fname: str, current_meta: Metadata 37 | ) -> Metadata: 38 | # '/uploads' Malwarecage directory format 39 | # /mnt/samples/9/d/c/5/9dc571ae13a62954155999cae9cecc4f0689e2ba9a8940f81d1e564271507a3e 40 | metadata = {} 41 | sample = self.mwdb.query(identifier, raise_not_found=False) 42 | 43 | if sample: 44 | for tag in sample.tags: 45 | query = urllib.parse.urlencode({"q": f'tag:"{tag}"'}) 46 | # Add queryable metadata for each tag from Malwarecage 47 | metadata[f"mwdb_tag_{tag}"] = { 48 | "display_text": tag, 49 | "url": f"{self.mwdb_url}?{query}", 50 | } 51 | 52 | # Add metadata with link to sample in Malwarecage instance 53 | metadata["mwdb_analysis"] = { 54 | "display_text": "mwdb", 55 | "url": f"{self.mwdb_url}sample/{identifier}", 56 | } 57 | 58 | job_id = current_meta["job"] 59 | # Add metakey with job identifier 60 | sample.add_metakey("mquery", job_id) 61 | return metadata 62 | -------------------------------------------------------------------------------- /src/plugins/requirements-mwdb_uploads.txt: -------------------------------------------------------------------------------- 1 | mwdblib==3.2.1 -------------------------------------------------------------------------------- /src/plugins/s3_plugin.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List, IO 2 | import os 3 | import shutil 4 | import tempfile 5 | from minio import Minio # type: ignore 6 | 7 | from ..db import Database 8 | from ..metadata import MetadataPlugin, MetadataPluginConfig 9 | 10 | 11 | class S3Plugin(MetadataPlugin): 12 | """Can be used to download files from minio prior to running yara. 13 | Names of the files in configured bucket must be equal to basenames 14 | (filenames without paths) of matched files. 15 | """ 16 | 17 | is_filter = True 18 | config_fields = { 19 | "s3_url": "Url of the S3 server.", 20 | "s3_bucket": "Bucket where the samples are stored.", 21 | "s3_access_key": "S3 access key.", 22 | "s3_secret_key": "S3 secret key.", 23 | "s3_secure": "Use https? Set to 'true' or 'false'.", 24 | } 25 | 26 | def __init__(self, db: Database, config: MetadataPluginConfig) -> None: 27 | super().__init__(db, config) 28 | self.tmpfiles: List[IO[bytes]] = [] 29 | 30 | assert config["s3_secure"] in ["true", "false"] 31 | self.minio = Minio( 32 | config["s3_url"], 33 | config["s3_access_key"], 34 | config["s3_secret_key"], 35 | secure=config["s3_secure"] == "true", 36 | ) 37 | self.bucket = config["s3_bucket"] 38 | 39 | def filter(self, orig_name: str, file_path: str) -> Optional[str]: 40 | if orig_name != file_path: 41 | # We override the file. It doesn't make sense to use other 42 | # content-modifying filters before the s3 plugin. 43 | raise RuntimeError("S3 plugin should be the first filter") 44 | 45 | name = os.path.basename(orig_name) 46 | tmp = tempfile.NamedTemporaryFile() 47 | self.tmpfiles.append(tmp) 48 | 49 | response = self.minio.get_object(self.bucket, name) 50 | try: 51 | with open(tmp.name, "wb") as f_out: 52 | shutil.copyfileobj(response, f_out) 53 | finally: 54 | response.close() 55 | response.release_conn() 56 | return tmp.name 57 | 58 | def clean(self) -> None: 59 | for tmp in self.tmpfiles: 60 | tmp.close() 61 | self.tmpfiles = [] 62 | -------------------------------------------------------------------------------- /src/schema.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from enum import Enum 3 | from typing import List, Dict, Optional, Sequence, Literal 4 | from pydantic import BaseModel, Field # type: ignore 5 | from .models.job import JobView 6 | from .models.agentgroup import AgentGroupView 7 | 8 | 9 | class JobsSchema(BaseModel): 10 | jobs: Sequence[JobView] 11 | 12 | 13 | class ConfigSchema(BaseModel): 14 | plugin: str 15 | key: str 16 | value: str 17 | description: str 18 | 19 | 20 | class TaskSchema(BaseModel): 21 | connection_id: str 22 | epoch_ms: int 23 | id: str 24 | request: str 25 | work_done: int 26 | work_estimated: int 27 | 28 | 29 | class RequestQueryMethod(str, Enum): 30 | query = "query" 31 | parse = "parse" 32 | 33 | 34 | class RequestConfigEdit(BaseModel): 35 | plugin: str 36 | key: str 37 | value: str 38 | 39 | 40 | class QueryRequestSchema(BaseModel): 41 | raw_yara: str 42 | taints: Optional[List[str]] 43 | method: str 44 | files_limit: Optional[int] 45 | reference: Optional[str] # arbitrary data specified by the user 46 | required_plugins: List[str] = Field([]) 47 | force_slow_queries: bool = False 48 | 49 | 50 | class QueryResponseSchema(BaseModel): 51 | query_hash: str 52 | 53 | 54 | class ParseResponseSchema(BaseModel): 55 | rule_name: str 56 | rule_author: str 57 | is_global: bool 58 | is_private: bool 59 | is_degenerate: bool 60 | parsed: str 61 | 62 | 63 | class MatchesSchema(BaseModel): 64 | job: JobView 65 | matches: List[Dict] 66 | 67 | 68 | class StatusSchema(BaseModel): 69 | status: str 70 | 71 | 72 | class UserSettingsSchema(BaseModel): 73 | can_register: bool 74 | plugin_name: str 75 | 76 | 77 | class UserInfoSchema(BaseModel): 78 | id: int 79 | name: str 80 | 81 | 82 | class UserAuthSchema(BaseModel): 83 | username: str 84 | password: str 85 | 86 | 87 | class AgentSchema(BaseModel): 88 | name: str 89 | alive: bool 90 | tasks: List 91 | spec: AgentGroupView 92 | 93 | 94 | class BackendStatusSchema(BaseModel): 95 | agents: List[AgentSchema] 96 | components: Dict[str, str] 97 | 98 | 99 | class BackendStatusDatasetsSchema(BaseModel): 100 | datasets: Dict 101 | 102 | 103 | class ServerSchema(BaseModel): 104 | version: str 105 | auth_enabled: Optional[str] 106 | openid_url: Optional[str] 107 | openid_client_id: Optional[str] 108 | about: str 109 | 110 | 111 | class FileToQueueSchema(BaseModel): 112 | path: str 113 | index_types: List[Literal["gram3", "text4", "hash4", "wide8"]] 114 | tags: List[str] 115 | 116 | 117 | class QueueStatusDatabaseSchema(BaseModel): 118 | size: int 119 | oldest_file: Optional[datetime] 120 | newest_file: Optional[datetime] 121 | 122 | 123 | class QueueStatusSchema(QueueStatusDatabaseSchema): 124 | ursadb_id: str 125 | -------------------------------------------------------------------------------- /src/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CERT-Polska/mquery/ac4716ee405c46bbbbe61a0a0a1d77e90aa81d8c/src/scripts/__init__.py -------------------------------------------------------------------------------- /src/scripts/mquery-daemon: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import mquery.daemon 4 | 5 | 6 | mquery.daemon.main() 7 | -------------------------------------------------------------------------------- /src/tests/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10 2 | RUN apt update; apt install -y cmake 3 | COPY src/tests/requirements.txt /app/requirements.txt 4 | RUN pip3 install -r /app/requirements.txt 5 | COPY src/ /app/ 6 | WORKDIR /app 7 | CMD ["python", "-m", "pytest", "--log-cli-level=INFO", "tests/"] 8 | -------------------------------------------------------------------------------- /src/tests/README.md: -------------------------------------------------------------------------------- 1 | # Unit tests 2 | 3 | Small tests that should execute quickly. It may be useful to run them before 4 | every commit. 5 | 6 | To build and run, execute the following: 7 | 8 | ```bash 9 | $ docker build -t mquery_tests -f ./src/tests/Dockerfile . 10 | $ docker run mquery_tests 11 | ``` 12 | -------------------------------------------------------------------------------- /src/tests/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest==7.1.2 2 | pytest-timeout==1.3.4 3 | pyzmq==24.0.1 4 | yaramod==3.8.0 5 | -------------------------------------------------------------------------------- /src/tests/test_ursadb.py: -------------------------------------------------------------------------------- 1 | """Unit-tests for the ursadb library.""" 2 | 3 | import sys 4 | import pytest # type: ignore 5 | import json 6 | import zmq # type: ignore 7 | import threading 8 | from typing import Dict, Any 9 | 10 | sys.path = [".."] + sys.path 11 | from lib.ursadb import UrsaDb # noqa 12 | 13 | 14 | class UrsadbTestContext: 15 | def __init__(self, socket: zmq.Socket, ursadb: UrsaDb): 16 | self.socket = socket 17 | self.ursadb = ursadb 18 | 19 | def expect(self, request: str, response: Dict[str, Any]): 20 | def server_side(): 21 | assert self.socket.recv_string() == request 22 | self.socket.send_string(json.dumps(response)) 23 | 24 | threading.Thread(target=server_side).start() 25 | 26 | 27 | @pytest.fixture(scope="session", autouse=True) 28 | def db_context(request): 29 | IPC = "ipc:///tmp/ursadb-test" 30 | context = zmq.Context() 31 | socket = context.socket(zmq.REP) 32 | socket.bind(IPC) 33 | return UrsadbTestContext(socket, UrsaDb(IPC)) 34 | 35 | 36 | def test_successful_iterator_pop(db_context: UrsadbTestContext): 37 | db_context.expect( 38 | 'iterator "iter_id" pop 3;', 39 | { 40 | "result": { 41 | "files": ["hmm", "xyz", "www"], 42 | "iterator_position": 3, 43 | "total_files": 100, 44 | } 45 | }, 46 | ) 47 | 48 | result = db_context.ursadb.pop("iter_id", 3) 49 | assert not result.iterator_empty 50 | assert result.files == ["hmm", "xyz", "www"] 51 | assert not result.was_locked 52 | 53 | 54 | def test_incomplete_iterator_pop(db_context: UrsadbTestContext): 55 | db_context.expect( 56 | 'iterator "iter_id" pop 3;', 57 | { 58 | "result": { 59 | "files": ["hmm"], 60 | "iterator_position": 3, 61 | "total_files": 100, 62 | } 63 | }, 64 | ) 65 | 66 | result = db_context.ursadb.pop("iter_id", 3) 67 | assert not result.iterator_empty 68 | assert result.files == ["hmm"] 69 | assert not result.was_locked 70 | 71 | 72 | def test_iterator_pop_error(db_context: UrsadbTestContext): 73 | db_context.expect( 74 | 'iterator "iter_id" pop 3;', 75 | {"error": {"message": "something didn't work"}}, 76 | ) 77 | 78 | result = db_context.ursadb.pop("iter_id", 3) 79 | assert result.iterator_empty 80 | assert result.files == [] 81 | assert not result.was_locked 82 | 83 | 84 | def test_locked_iterator(db_context: UrsadbTestContext): 85 | db_context.expect( 86 | 'iterator "iter_id" pop 3;', 87 | {"error": {"message": "something didn't work", "retry": True}}, 88 | ) 89 | 90 | result = db_context.ursadb.pop("iter_id", 3) 91 | assert not result.iterator_empty 92 | assert result.files == [] 93 | assert result.was_locked 94 | -------------------------------------------------------------------------------- /src/tests/test_yaraparse.py: -------------------------------------------------------------------------------- 1 | """Unit tests for yaraparse.""" 2 | 3 | from lib.yaraparse import ursify_hex, ursify_plain_string, parse_yara 4 | import yaramod 5 | 6 | 7 | def test_literal(): 8 | hex_str = "3F2504E0" 9 | result = ursify_hex(hex_str) 10 | 11 | assert result.query == "({3f2504e0})" 12 | 13 | 14 | def test_literal_wildcard(): 15 | hex_str = "3F25??04E0" 16 | result = ursify_hex(hex_str) 17 | 18 | assert result.query == "({3f25} & {04e0})" 19 | 20 | 21 | def test_literal_alternative(): 22 | hex_str = "11(22|33)44" 23 | result = ursify_hex(hex_str) 24 | 25 | assert result.query == "({11} & {44})" 26 | 27 | 28 | def test_literal_to_hex(): 29 | rule = yaramod.YaraRuleBuilder().with_plain_string("$str", "abc").get() 30 | 31 | new_file = yaramod.YaraFileBuilder() 32 | yara_file = new_file.with_rule(rule).get() 33 | 34 | ascii_str = yara_file.rules[0].strings[0] 35 | result = ursify_plain_string(ascii_str.pure_text, is_ascii=True) 36 | 37 | assert result.query == "{616263}" 38 | 39 | 40 | def rule_to_query(rule): 41 | result = parse_yara(rule) 42 | (rule,) = result 43 | parsed = rule.parse() 44 | return parsed.query 45 | 46 | 47 | def test_condition_gt(): 48 | query = rule_to_query( 49 | """ 50 | rule test { 51 | strings: 52 | $x = "test" 53 | condition: 54 | #x > 1 55 | }""" 56 | ) 57 | assert query == "{74657374}" 58 | 59 | 60 | def test_condition_lt(): 61 | query = rule_to_query( 62 | """ 63 | rule test { 64 | strings: 65 | $x = "test" 66 | condition: 67 | 1 < #x 68 | }""" 69 | ) 70 | assert query == "{74657374}" 71 | 72 | 73 | def test_condition_ge(): 74 | query = rule_to_query( 75 | """ 76 | rule test { 77 | strings: 78 | $x = "test" 79 | condition: 80 | #x >= 1 81 | }""" 82 | ) 83 | assert query == "{74657374}" 84 | 85 | 86 | def test_condition_gt_reversed(): 87 | query = rule_to_query( 88 | """ 89 | rule test { 90 | strings: 91 | $x = "test" 92 | condition: 93 | 1 < #x 94 | }""" 95 | ) 96 | assert query == "{74657374}" 97 | 98 | 99 | def test_condition_eq(): 100 | query = rule_to_query( 101 | """ 102 | rule test { 103 | strings: 104 | $x = "test" 105 | condition: 106 | #x == 1 107 | }""" 108 | ) 109 | assert query == "{74657374}" 110 | 111 | 112 | def test_condition_eq_rev(): 113 | query = rule_to_query( 114 | """ 115 | rule test { 116 | strings: 117 | $x = "test" 118 | condition: 119 | 1 == #x 120 | }""" 121 | ) 122 | assert query == "{74657374}" 123 | 124 | 125 | def test_condition_eq0(): 126 | query = rule_to_query( 127 | """ 128 | rule test { 129 | strings: 130 | $x = "test" 131 | condition: 132 | #x == 0 133 | }""" 134 | ) 135 | assert query == "{}" 136 | 137 | 138 | def test_condition_eq_syms(): 139 | query = rule_to_query( 140 | """ 141 | rule test { 142 | strings: 143 | $x = "test" 144 | $y = "welp" 145 | condition: 146 | #x == #y 147 | }""" 148 | ) 149 | assert query == "{}" 150 | -------------------------------------------------------------------------------- /src/tests/yararules/README.md: -------------------------------------------------------------------------------- 1 | Generation of regression tests files for Yara rules 2 | ======= 3 | 4 | Test data should be formed in pairs of Yara rule file and result txt file: 5 | 6 | E.g.: hex_simple_rule and hex_simple_rule.txt 7 | 8 | **Generation of txt files from yara rule files in testdata dir:** 9 | 10 | When in src of main mquery directory: 11 | 12 | ``` 13 | python3 -m tests.yararules.generate_yaraparse_result_files 14 | ``` 15 | 16 | **Generation of txt file with file name:** 17 | 18 | When in src of main mquery directory: 19 | 20 | ``` 21 | python3 -m tests.yararules.generate_yaraparse_result_files {file_name} 22 | ``` 23 | 24 | \* In {} give name of the file you want to generate txt file from. 25 | 26 | All result files will be generated in tests/yararules/testdata directory. 27 | 28 | Notice: To run the generation script yaramod needs to be installed. 29 | -------------------------------------------------------------------------------- /src/tests/yararules/generate_yaraparse_result_files.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from lib.yaraparse import parse_yara 4 | from lib.yaraparse import combine_rules 5 | 6 | current_path = os.path.abspath(os.path.dirname(__file__)) 7 | testdir = current_path + "/testdata/" 8 | 9 | 10 | def main() -> None: 11 | parser = argparse.ArgumentParser( 12 | description="Generate result files or file from yara rule file." 13 | ) 14 | 15 | parser.add_argument("file_name", nargs="?", help="File name", default="") 16 | 17 | args = parser.parse_args() 18 | 19 | if args.file_name: 20 | with open(testdir + args.file_name) as f: 21 | data = f.read() 22 | 23 | result_txt = testdir + args.file_name + ".txt" 24 | write_rules_to_file(data, result_txt) 25 | 26 | else: 27 | yara_files = [f for f in os.listdir(testdir) if ".txt" not in f] 28 | 29 | for file in yara_files: 30 | with open(testdir + file) as f: 31 | data = f.read() 32 | 33 | result_txt = testdir + file + ".txt" 34 | write_rules_to_file(data, result_txt) 35 | 36 | 37 | def write_rules_to_file(data, result_txt): 38 | rules = [] 39 | try: 40 | rules = parse_yara(data) 41 | with open(result_txt, "w") as fp: 42 | fp.write(combine_rules(rules).query + "\n") 43 | except Exception as e: 44 | with open(result_txt, "w") as fp: 45 | fp.write(str(e) + "\n") 46 | 47 | 48 | if __name__ == "__main__": 49 | main() 50 | -------------------------------------------------------------------------------- /src/tests/yararules/test_corpus_yara_rule.py: -------------------------------------------------------------------------------- 1 | """Yara rule test corpus.""" 2 | 3 | import unittest 4 | from pathlib import Path 5 | from lib.yaraparse import combine_rules 6 | from lib.yaraparse import parse_yara 7 | 8 | testdir = Path(__file__).parent / "testdata" 9 | 10 | 11 | class TestYaraRules(unittest.TestCase): 12 | def test_regression(self) -> None: 13 | for yara_path in testdir.glob("*.yar"): 14 | self.assert_query(yara_path, yara_path.with_suffix(".txt")) 15 | 16 | def assert_query(self, yara_path: Path, results_path: Path) -> None: 17 | expected_data = results_path.read_text() 18 | try: 19 | rules = parse_yara(yara_path.read_text()) 20 | self.assertEqual(expected_data, combine_rules(rules).query + "\n") 21 | except Exception as e: 22 | self.assertEqual(expected_data, str(e) + "\n") 23 | 24 | 25 | if __name__ == "__main__": 26 | unittest.main() 27 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/anonymous_strings.txt: -------------------------------------------------------------------------------- 1 | (((min 1 of ({64756d6d7931}, {64756d6d7932})))) 2 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/anonymous_strings.yar: -------------------------------------------------------------------------------- 1 | rule AnonymousStrings 2 | { 3 | strings: 4 | $ = "dummy1" 5 | $ = "dummy2" 6 | condition: 7 | 1 of them 8 | } 9 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/apt_mal_dns_hijacking_campaign_aa19_024a.txt: -------------------------------------------------------------------------------- 1 | (((min 2 of ({2f436c69656e742f4c6f67696e3f69643d}, {4d6f7a696c6c612f352e30202857696e646f7773204e5420362e313b2054726964656e742f372e303b2072763a31312e3029206c696b65204765636b6f}, {2e5c436f6e6669677572652e747874}, {436f6e74656e742d446973706f736974696f6e3a20666f726d2d646174613b206e616d653d2266696c6573223b2066696c656e616d653d22}, {436f6e74656e742d446973706f736974696f6e3a20666f726d2d646174613b206e616d653d227478747322})))) 2 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/apt_mal_dns_hijacking_campaign_aa19_024a.yar: -------------------------------------------------------------------------------- 1 | rule APT_MAL_DNS_Hijacking_Campaign_AA19_024A { 2 | meta: 3 | description = "Detects malware used in DNS Hijackign campaign" 4 | author = "Florian Roth" 5 | reference = "https://www.us-cert.gov/ncas/alerts/AA19-024A" 6 | date = "2019-01-25" 7 | hash1 = "2010f38ef300be4349e7bc287e720b1ecec678cacbf0ea0556bcf765f6e073ec" 8 | hash2 = "45a9edb24d4174592c69d9d37a534a518fbe2a88d3817fc0cc739e455883b8ff" 9 | strings: 10 | $s2 = "/Client/Login?id=" fullword ascii 11 | $s3 = "Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko" fullword ascii 12 | $s4 = ".\\Configure.txt" fullword ascii 13 | $s5 = "Content-Disposition: form-data; name=\"files\"; filename=\"" fullword ascii 14 | $s6 = "Content-Disposition: form-data; name=\"txts\"" fullword ascii 15 | condition: 16 | uint16(0) == 0x5a4d and filesize < 1000KB and 2 of them 17 | } 18 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/base64_strings.txt: -------------------------------------------------------------------------------- 1 | (({546869732070726f6772616d2063616e6e6f74})) 2 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/base64_strings.yar: -------------------------------------------------------------------------------- 1 | rule Base64Example1 2 | { 3 | strings: 4 | $a = "This program cannot" base64 5 | condition: 6 | $a 7 | } 8 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/conditions.txt: -------------------------------------------------------------------------------- 1 | (((({7465787431} | {7465787432}) & ({7465787433} | {7465787434})))) 2 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/conditions.yar: -------------------------------------------------------------------------------- 1 | rule Example 2 | { 3 | strings: 4 | $a = "text1" 5 | $b = "text2" 6 | $c = "text3" 7 | $d = "text4" 8 | condition: 9 | ($a or $b) and ($c or $d) 10 | } 11 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/count_strings.txt: -------------------------------------------------------------------------------- 1 | ((({64756d6d7931} & {64756d6d7932}))) 2 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/count_strings.yar: -------------------------------------------------------------------------------- 1 | rule CountExample 2 | { 3 | strings: 4 | $a = "dummy1" 5 | $b = "dummy2" 6 | $c = "dummy3" 7 | condition: 8 | #a == 6 and #b > 10 and #c < 10 9 | } 10 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/data_position.txt: -------------------------------------------------------------------------------- 1 | (({})) 2 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/data_position.yar: -------------------------------------------------------------------------------- 1 | rule IsPE 2 | { 3 | condition: 4 | // MZ signature at offset 0 and ... 5 | uint16(0) == 0x5A4D and 6 | // ... PE signature at offset stored in MZ header at 0x3C 7 | uint32(uint32(0x3C)) == 0x00004550 8 | } 9 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/executable_entry_point.txt: -------------------------------------------------------------------------------- 1 | ((({e800000000}) | ({9c5066a1} & {0066a9} & {580f85}))) 2 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/executable_entry_point.yar: -------------------------------------------------------------------------------- 1 | import "pe" 2 | 3 | rule EntryPointExample1 4 | { 5 | strings: 6 | $a = { E8 00 00 00 00 } 7 | condition: 8 | $a at pe.entry_point 9 | } 10 | rule EntryPointExample2 11 | { 12 | strings: 13 | $a = { 9C 50 66 A1 ?? ?? ?? 00 66 A9 ?? ?? 58 0F 85 } 14 | condition: 15 | $a in (pe.entry_point..pe.entry_point + 10) 16 | } 17 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/hex_alternatives.txt: -------------------------------------------------------------------------------- 1 | ((({f423} & {45}))) 2 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/hex_alternatives.yar: -------------------------------------------------------------------------------- 1 | rule AlternativesExample1 2 | { 3 | strings: 4 | $hex_string = { F4 23 ( 62 B4 | 56 ) 45 } 5 | 6 | condition: 7 | $hex_string 8 | } 9 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/hex_alternatives_wildcards.txt: -------------------------------------------------------------------------------- 1 | ((({f423} & {45}))) 2 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/hex_alternatives_wildcards.yar: -------------------------------------------------------------------------------- 1 | rule AlternativesExample2 2 | { 3 | strings: 4 | $hex_string = { F4 23 ( 62 B4 | 56 | 45 ?? 67 ) 45 } 5 | condition: 6 | $hex_string 7 | } 8 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/hex_jumps.txt: -------------------------------------------------------------------------------- 1 | ((({f423} & {62b4}))) 2 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/hex_jumps.yar: -------------------------------------------------------------------------------- 1 | rule JumpExample 2 | { 3 | strings: 4 | $hex_string = { F4 23 [4-6] 62 B4 } 5 | 6 | condition: 7 | $hex_string 8 | } 9 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/hex_simple_rule.txt: -------------------------------------------------------------------------------- 1 | ((({746578742068657265} | ({e234a1c823fb})))) 2 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/hex_simple_rule.yar: -------------------------------------------------------------------------------- 1 | rule ExampleRule 2 | { 3 | strings: 4 | $my_text_string = "text here" 5 | $my_hex_string = { E2 34 A1 C8 23 FB } 6 | 7 | condition: 8 | $my_text_string or $my_hex_string 9 | } 10 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/hex_wild_cards.txt: -------------------------------------------------------------------------------- 1 | ((({e234} & {c8} & {fb}))) 2 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/hex_wild_cards.yar: -------------------------------------------------------------------------------- 1 | rule WildcardExample 2 | { 3 | strings: 4 | $hex_string = { E2 34 ?? C8 A? FB } 5 | 6 | condition: 7 | $hex_string 8 | } 9 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/iter_over_str_occurrences.txt: -------------------------------------------------------------------------------- 1 | (({})) 2 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/iter_over_str_occurrences.yar: -------------------------------------------------------------------------------- 1 | rule Occurrences 2 | { 3 | strings: 4 | $a = "dummy1" 5 | $b = "dummy2" 6 | condition: 7 | for all i in (1,2,3) : ( @a[i] + 10 == @b[i] ) 8 | } 9 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/or_corner_case.yar.txt: -------------------------------------------------------------------------------- 1 | (({})) 2 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/or_corner_case.yar.yar: -------------------------------------------------------------------------------- 1 | rule or_corner_case 2 | { 3 | meta: 4 | description = "presents the problem with discarding other branch from *or* expressions" 5 | date = "2020-05-19" 6 | strings: 7 | $a = "this is a legit string" 8 | $b = /[a-f]{10}/ 9 | condition: 10 | $a or $b 11 | } -------------------------------------------------------------------------------- /src/tests/yararules/testdata/parse_exception_example.txt: -------------------------------------------------------------------------------- 1 | (((min 2 of (({020000}), ({ffff68747470}))))) 2 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/parse_exception_example.yar: -------------------------------------------------------------------------------- 1 | rule parse_exception_example { 2 | strings: 3 | $xor_key_size = { ((BB)|(68))??020000} 4 | $c2 = { FF FF 68 74 74 70 } 5 | condition: 6 | all of them 7 | } 8 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/private_strings.txt: -------------------------------------------------------------------------------- 1 | (({68656c6c6f})) 2 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/private_strings.yar: -------------------------------------------------------------------------------- 1 | rule PrivateStringExample 2 | { 3 | strings: 4 | $text_string = "hello" private 5 | condition: 6 | $text_string 7 | } 8 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/referencing_rules.txt: -------------------------------------------------------------------------------- 1 | (({64756d6d7931} | ({64756d6d7932} & {64756d6d7931}))) 2 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/referencing_rules.yar: -------------------------------------------------------------------------------- 1 | rule Rule1 2 | { 3 | strings: 4 | $a = "dummy1" 5 | condition: 6 | $a 7 | } 8 | rule Rule2 9 | { 10 | strings: 11 | $a = "dummy2" 12 | condition: 13 | $a and Rule1 14 | } 15 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/referencing_rules_global.txt: -------------------------------------------------------------------------------- 1 | (({64756d6d7931} | ({64756d6d7932} & {64756d6d7931})) & {676c6f62616c}) 2 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/referencing_rules_global.yar: -------------------------------------------------------------------------------- 1 | global rule Rule3 2 | { 3 | strings: 4 | $a = "global" 5 | condition: 6 | $a 7 | } 8 | 9 | rule Rule1 10 | { 11 | strings: 12 | $a = "dummy1" 13 | condition: 14 | $a 15 | } 16 | rule Rule2 17 | { 18 | strings: 19 | $a = "dummy2" 20 | condition: 21 | $a and Rule1 22 | } 23 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/referencing_rules_private.txt: -------------------------------------------------------------------------------- 1 | ((({64756d6d7932} & {64756d6d7931}))) 2 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/referencing_rules_private.yar: -------------------------------------------------------------------------------- 1 | private rule Rule1 2 | { 3 | strings: 4 | $a = "dummy1" 5 | condition: 6 | $a 7 | } 8 | rule Rule2 9 | { 10 | strings: 11 | $a = "dummy2" 12 | condition: 13 | $a and Rule1 14 | } 15 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/regex_complex.yar.yar: -------------------------------------------------------------------------------- 1 | rule VirtualBox_Detection : AntiVM 2 | { 3 | meta: 4 | description = "Looks for VirtualBox presence" 5 | author = "Cuckoo project" 6 | strings: 7 | $virtualbox1 = "VBoxHook.dll" nocase wide ascii 8 | $virtualbox2 = "VBoxService" nocase wide ascii 9 | $virtualbox3 = "VBoxTray" nocase wide ascii 10 | $virtualbox4 = "VBoxMouse" nocase wide ascii 11 | $virtualbox5 = "VBoxGuest" nocase wide ascii 12 | $virtualbox6 = "VBoxSF" nocase wide ascii 13 | $virtualbox7 = "VBoxGuestAdditions" nocase wide ascii 14 | $virtualbox8 = "VBOX HARDDISK" nocase wide ascii 15 | $virtualbox9 = "vboxservice" nocase wide ascii 16 | $virtualbox10 = "vboxtray" nocase wide ascii 17 | 18 | // MAC addresses 19 | $virtualbox_mac_1a = "08-00-27" 20 | $virtualbox_mac_1b = "08:00:27" 21 | $virtualbox_mac_1c = "080027" 22 | 23 | // PCI Vendor IDs, from Hacking Team's leak 24 | $virtualbox_vid_1 = "VEN_80EE" nocase wide ascii 25 | 26 | // Registry keys 27 | $virtualbox_reg_1 = "SOFTWARE\\Oracle\\VirtualBox Guest Additions" nocase wide ascii 28 | $virtualbox_reg_2 = /HARDWARE\\ACPI\\(DSDT|FADT|RSDT)\\VBOX__/ nocase wide ascii 29 | 30 | // Other 31 | $virtualbox_files = /C:\\Windows\\System32\\drivers\\vbox.{15}\.(sys|dll)/ nocase wide ascii 32 | $virtualbox_services = "System\\ControlSet001\\Services\\VBox[A-Za-z]+" nocase wide ascii 33 | $virtualbox_pipe = /\\\\.\\pipe\\(VBoxTrayIPC|VBoxMiniRdDN)/ nocase wide ascii 34 | $virtualbox_window = /VBoxTrayToolWnd(Class)?/ nocase wide ascii 35 | condition: 36 | any of them 37 | } 38 | 39 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/regex_escapes.txt: -------------------------------------------------------------------------------- 1 | (((min 3 of (((({22 (74|54) (77|57) (69|49) (73|53) (74|54) 22 0a}) & {5c (64|44) (6f|4f) 20 (74|54) (68|48) (65|45) 09}) | (({22 00 (74|54) 00 (77|57) 00 (69|49) 00 (73|53) 00 (74|54) 00 22 00 0a 00}) & {5c 00 (64|44) 00 (6f|4f) 00 20 00 (74|54) 00 (68|48) 00 (65|45) 00 09 00})), ({0a090d0c07}), (({73706c6974} & {737472696e67})))))) 2 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/regex_escapes.yar: -------------------------------------------------------------------------------- 1 | rule RegexEscapeExample 2 | { 3 | strings: 4 | $escape1 = /\\D\x6f\x20the\t(\"twist\"\n)/ nocase wide ascii 5 | $escape2 = /\n\t\r\f\a/ 6 | $escape3 = /\Bsplit\w\W\s\S\d\Dstring\b/ 7 | condition: 8 | all of them 9 | } 10 | 11 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/regex_simple.yar.txt: -------------------------------------------------------------------------------- 1 | (((((({47494638} & {61}) | ({ffd8ff} & {00104a464946})) | ({89504e470d0a1a0a})) & {3c3f706870}))) 2 | -------------------------------------------------------------------------------- /src/tests/yararules/testdata/regex_simple.yar.yar: -------------------------------------------------------------------------------- 1 | /* 2 | Finds PHP code in JP(E)Gs, GIFs, PNGs. 3 | Magic numbers via Wikipedia. 4 | */ 5 | rule php_in_image 6 | { 7 | meta: 8 | author = "Vlad https://github.com/vlad-s" 9 | date = "2016/07/18" 10 | description = "Finds image files w/ PHP code in images" 11 | strings: 12 | $gif = /^GIF8[79]a/ 13 | $jfif = { ff d8 ff e? 00 10 4a 46 49 46 } 14 | $png = { 89 50 4e 47 0d 0a 1a 0a } 15 | 16 | $php_tag = " None: 11 | logging.basicConfig( 12 | level=logging.INFO, format=LOG_FORMAT, datefmt=LOG_DATEFMT 13 | ) 14 | 15 | 16 | def mquery_version() -> str: 17 | return "1.6.0" 18 | 19 | 20 | def make_sha256_tag(filename: str) -> Dict[str, Any]: 21 | sha256_hash = hashlib.sha256() 22 | with open(filename, "rb") as f: 23 | for byte_block in iter(lambda: f.read(4096), b""): 24 | sha256_hash.update(byte_block) 25 | return {"display_text": sha256_hash.hexdigest(), "hidden": True} 26 | -------------------------------------------------------------------------------- /src/utils/compactall.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from lib.ursadb import UrsaDb 3 | import time 4 | import argparse 5 | 6 | 7 | def main() -> None: 8 | logging.basicConfig(level=logging.INFO) 9 | 10 | parser = argparse.ArgumentParser(description="Keep the database lean.") 11 | parser.add_argument( 12 | "--ursadb", 13 | help="URL of the ursadb instance.", 14 | default="tcp://localhost:9281", 15 | ) 16 | parser.add_argument( 17 | "--mode", 18 | help="Compacting mode. Force (all) or optimise for time (smart).", 19 | default="smart", 20 | choices=["smart", "all"], 21 | ) 22 | 23 | args = parser.parse_args() 24 | ursa = UrsaDb(args.ursadb) 25 | stage = 0 26 | last_datasets = None 27 | while True: 28 | datasets = set( 29 | ursa.execute_command("topology;")["result"]["datasets"].keys() 30 | ) 31 | if last_datasets: 32 | removed = list(last_datasets - datasets) 33 | created = list(datasets - last_datasets) 34 | logging.info("%s => %s", removed, created) 35 | logging.info("Stage %s: %s datasets left.", stage, len(datasets)) 36 | if last_datasets and datasets == last_datasets: 37 | logging.info("Finally, a fixed point! Returning...") 38 | return 39 | 40 | start = time.time() 41 | ursa.execute_command(f"compact {args.mode};") 42 | end = time.time() 43 | logging.info("Compacting took %s seconds...", (end - start)) 44 | stage += 1 45 | last_datasets = datasets 46 | 47 | 48 | if __name__ == "__main__": 49 | main() 50 | --------------------------------------------------------------------------------