├── assemblyline
├── py.typed
├── run
│ ├── __init__.py
│ └── pubsub_reader.py
├── common
│ ├── __init__.py
│ ├── version.py
│ ├── null.py
│ ├── importing.py
│ ├── file.py
│ ├── uid.py
│ ├── frequency.pyx
│ ├── logformat.py
│ ├── chunk.py
│ ├── threading.py
│ ├── memory_zip.py
│ ├── hexdump.py
│ ├── path.py
│ ├── signaturing.py
│ ├── lucene.lark
│ ├── exceptions.py
│ ├── codec.py
│ ├── banner.py
│ ├── metrics.py
│ ├── digests.py
│ ├── entropy.py
│ └── constants.py
├── remote
│ ├── __init__.py
│ └── datatypes
│ │ ├── queues
│ │ ├── __init__.py
│ │ ├── multi.py
│ │ ├── comms.py
│ │ └── named.py
│ │ ├── lock.py
│ │ ├── counters.py
│ │ ├── cache.py
│ │ ├── daily_quota_tracker.py
│ │ ├── user_quota_tracker.py
│ │ └── set.py
├── datastore
│ ├── __init__.py
│ ├── support
│ │ └── __init__.py
│ ├── exceptions.py
│ └── bulk.py
├── odm
│ ├── models
│ │ ├── __init__.py
│ │ ├── ontology
│ │ │ ├── filetypes
│ │ │ │ └── __init__.py
│ │ │ ├── __init__.py
│ │ │ ├── results
│ │ │ │ ├── __init__.py
│ │ │ │ ├── http.py
│ │ │ │ ├── antivirus.py
│ │ │ │ ├── sandbox.py
│ │ │ │ ├── process.py
│ │ │ │ └── signature.py
│ │ │ └── file.py
│ │ ├── cached_file.py
│ │ ├── emptyresult.py
│ │ ├── submission_tree.py
│ │ ├── filescore.py
│ │ ├── statistics.py
│ │ ├── submission_summary.py
│ │ ├── user_favorites.py
│ │ ├── heuristic.py
│ │ ├── apikey.py
│ │ ├── signature.py
│ │ ├── workflow.py
│ │ ├── error.py
│ │ ├── user_settings.py
│ │ ├── retrohunt.py
│ │ └── safelist.py
│ ├── messages
│ │ ├── __init__.py
│ │ ├── alert.py
│ │ ├── metrics.py
│ │ ├── changes.py
│ │ ├── scaler_heartbeat.py
│ │ ├── vacuum_heartbeat.py
│ │ ├── elastic_heartbeat.py
│ │ ├── service_timing_heartbeat.py
│ │ ├── dispatching.py
│ │ ├── scaler_status_heartbeat.py
│ │ ├── retrohunt_heartbeat.py
│ │ ├── alerter_heartbeat.py
│ │ ├── archive_heartbeat.py
│ │ ├── expiry_heartbeat.py
│ │ ├── service_heartbeat.py
│ │ ├── dispatcher_heartbeat.py
│ │ └── submission.py
│ ├── common.py
│ ├── __init__.py
│ └── random_data
│ │ └── create_test_data.py
├── filestore
│ └── transport
│ │ ├── __init__.py
│ │ └── base.py
├── __init__.py
└── datasource
│ ├── __init__.py
│ ├── common.py
│ ├── alert.py
│ └── al.py
├── dev
├── core
│ ├── config
│ │ ├── classification.yml
│ │ ├── config.yml
│ │ └── certs
│ │ │ └── tls.crt
│ ├── .env
│ └── docker-compose-sca-upd.yml
├── hauntedhouse
│ ├── config
│ │ ├── ingest.json
│ │ ├── worker.json
│ │ └── core.json
│ └── docker-compose.yaml
└── depends
│ ├── config
│ ├── apm-server.docker.yml
│ ├── filebeat_policy.json
│ ├── metricbeat_policy.json
│ ├── kibana.docker.yml
│ ├── filebeat.docker.yml
│ └── metricbeat.docker.yml
│ └── docker-compose-minimal.yml
├── test
├── id_file_base
│ ├── text.txt
│ ├── json.json
│ ├── gzip.gz
│ ├── jpg.jpg
│ ├── pdf.pdf
│ ├── png.png
│ ├── excel.xls
│ ├── word.docx
│ ├── powerpoint.pptx
│ ├── html.html
│ ├── javascript.js
│ ├── id_file_base.json
│ ├── xml.xml
│ ├── powershell.ps1
│ └── calendar.ics
├── requirements.txt
├── test_exceptions.py
├── docker-compose.yml
├── test_cachestore.py
├── classification.yml
├── key.pem
├── test_path.py
├── conftest.py
├── test_isotime.py
├── test_identify.py
├── test_datasource.py
├── test_postprocess.py
├── test_metrics.py
└── test_regexes.py
├── MANIFEST.in
├── pyproject.toml
├── setup.cfg
├── .github
└── dependabot.yml
├── docker
├── minio
│ └── Dockerfile
├── nginx-ssl-frontend
│ ├── http_redirect.conf
│ ├── Dockerfile
│ └── minimal.template
├── nginx-ssl-frontend:mui5
│ ├── http_redirect.conf
│ └── Dockerfile
├── push_containers.sh
├── build_containers.sh
├── al_dev
│ └── Dockerfile
├── local_dev.Dockerfile
└── al_management
│ ├── Dockerfile
│ └── pipeline.Dockerfile
├── .dockerignore
├── pipelines
└── config.yml
├── incremental.Dockerfile
├── .vscode
└── settings.json
├── LICENCE.md
├── .gitignore
├── CONTRIBUTING.md
├── external
└── generate_tlds.py
└── Dockerfile
/assemblyline/py.typed:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/assemblyline/run/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/assemblyline/common/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/assemblyline/remote/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/assemblyline/datastore/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/assemblyline/odm/models/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/assemblyline/datastore/support/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/assemblyline/filestore/transport/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/assemblyline/remote/datatypes/queues/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/dev/core/config/classification.yml:
--------------------------------------------------------------------------------
1 | enforce: true
--------------------------------------------------------------------------------
/test/id_file_base/text.txt:
--------------------------------------------------------------------------------
1 | this is a text file
--------------------------------------------------------------------------------
/dev/core/.env:
--------------------------------------------------------------------------------
1 | PRIVATE_REGISTRY=172.17.0.1:32000/
2 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | exclude test/*
2 | exclude bitbucket-pipelines.yml
3 |
--------------------------------------------------------------------------------
/test/id_file_base/json.json:
--------------------------------------------------------------------------------
1 | {
2 | "a": 1,
3 | "b": 2,
4 | "c": 3,
5 | "d": 4
6 | }
--------------------------------------------------------------------------------
/test/requirements.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | retrying
3 | pytest-mock
4 | pyftpdlib
5 | pyopenssl==23.3.0
6 |
--------------------------------------------------------------------------------
/assemblyline/common/version.py:
--------------------------------------------------------------------------------
1 | FRAMEWORK_VERSION = 4
2 | SYSTEM_VERSION = 6
3 | BUILD_MINOR = 0
4 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 | "cython",
4 | "setuptools",
5 | "wheel"
6 | ]
7 |
--------------------------------------------------------------------------------
/assemblyline/odm/models/ontology/filetypes/__init__.py:
--------------------------------------------------------------------------------
1 | from assemblyline.odm.models.ontology.filetypes.pe import PE
2 |
--------------------------------------------------------------------------------
/test/id_file_base/gzip.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CybercentreCanada/assemblyline-base/HEAD/test/id_file_base/gzip.gz
--------------------------------------------------------------------------------
/test/id_file_base/jpg.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CybercentreCanada/assemblyline-base/HEAD/test/id_file_base/jpg.jpg
--------------------------------------------------------------------------------
/test/id_file_base/pdf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CybercentreCanada/assemblyline-base/HEAD/test/id_file_base/pdf.pdf
--------------------------------------------------------------------------------
/test/id_file_base/png.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CybercentreCanada/assemblyline-base/HEAD/test/id_file_base/png.png
--------------------------------------------------------------------------------
/test/id_file_base/excel.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CybercentreCanada/assemblyline-base/HEAD/test/id_file_base/excel.xls
--------------------------------------------------------------------------------
/test/id_file_base/word.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CybercentreCanada/assemblyline-base/HEAD/test/id_file_base/word.docx
--------------------------------------------------------------------------------
/assemblyline/odm/messages/__init__.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 |
3 |
4 | class PerformanceTimer(odm.Float):
5 | pass
6 |
--------------------------------------------------------------------------------
/assemblyline/odm/models/ontology/__init__.py:
--------------------------------------------------------------------------------
1 | from assemblyline.odm.models.ontology.ontology import ResultOntology, ODM_VERSION
2 |
--------------------------------------------------------------------------------
/test/id_file_base/powerpoint.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CybercentreCanada/assemblyline-base/HEAD/test/id_file_base/powerpoint.pptx
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [tool:pytest]
2 | testpaths = test
3 |
4 | # addopts = --cov=assemblyline --cov-report html
5 |
6 | [coverage:run]
7 |
8 | omit = test/*
9 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: pip
4 | directory: "/"
5 | schedule:
6 | interval: daily
7 | time: "10:00"
8 | open-pull-requests-limit: 10
9 |
--------------------------------------------------------------------------------
/docker/minio/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM minio/minio
2 |
3 | # This has to be pre-configured as Azure Pipelines doesn't support a way of running the command on container creation
4 | CMD ["server", "/data"]
5 |
--------------------------------------------------------------------------------
/test/id_file_base/html.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Title
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/test/id_file_base/javascript.js:
--------------------------------------------------------------------------------
1 | var someVar = 0;
2 | var anotherVar = 0;
3 |
4 | function blah()
5 |
6 | {
7 | console.log("We did it!");
8 | }
9 |
10 | function blahblah() {
11 | console.log("hey hey!");
12 | }
--------------------------------------------------------------------------------
/docker/nginx-ssl-frontend/http_redirect.conf:
--------------------------------------------------------------------------------
1 | server {
2 | server_name _;
3 | listen 80;
4 | listen [::]:80;
5 |
6 | location / {
7 | return 301 https://$host;
8 | }
9 | }
--------------------------------------------------------------------------------
/docker/nginx-ssl-frontend:mui5/http_redirect.conf:
--------------------------------------------------------------------------------
1 | server {
2 | server_name _;
3 | listen 80;
4 | listen [::]:80;
5 |
6 | location / {
7 | return 301 https://$host;
8 | }
9 | }
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | Dockerfile
2 | .idea
3 | .git
4 |
5 | pipelines
6 | venv
7 | env
8 | test
9 | tests
10 | exemples
11 | docs
12 |
13 | pip-log.txt
14 | pip-delete-this-directory.txt
15 | .tox
16 | .coverage
17 | .coverage.*
18 | .cache
19 | nosetests.xml
20 | coverage.xml
21 | *,cover
22 | *.log
23 |
--------------------------------------------------------------------------------
/docker/push_containers.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -ex
2 |
3 | # Push core containers
4 | docker push cccs/minio --all-tags
5 | docker push cccs/nginx-ssl-frontend --all-tags
6 |
7 | # Push dev containers
8 | docker push cccs/assemblyline_dev --all-tags
9 | docker push cccs/assemblyline_management --all-tags
10 |
--------------------------------------------------------------------------------
/assemblyline/common/null.py:
--------------------------------------------------------------------------------
1 | # TODO: are we still using this?
2 |
3 | """
4 | Dummy functions and values used to substitute for dynamic loaded methods that
5 | have no interesting implementation by default.
6 | """
7 |
8 |
9 | def always_false(*_, **__):
10 | return False
11 |
12 |
13 | whitelist = []
14 |
--------------------------------------------------------------------------------
/assemblyline/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | __version__ = "4.0.0.dev0"
4 | _package_version_path = os.path.join(os.path.dirname(__file__), 'VERSION')
5 | if os.path.exists(_package_version_path):
6 | with open(_package_version_path) as _package_version_file:
7 | __version__ = _package_version_file.read().strip()
8 |
--------------------------------------------------------------------------------
/assemblyline/odm/models/cached_file.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 |
3 |
4 | @odm.model(index=True, store=True, description="CachedFile Model")
5 | class CachedFile(odm.Model):
6 | component = odm.Keyword(description="Name of component which created the file")
7 | expiry_ts = odm.Date(store=False, description="Expiry timestamp")
8 |
--------------------------------------------------------------------------------
/dev/hauntedhouse/config/ingest.json:
--------------------------------------------------------------------------------
1 | {
2 | "hauntedhouse_api_key": "insecure-ingest-key",
3 | "hauntedhouse_url": "https://hauntedhouse:4443",
4 | "assemblyline_url": "http://al_ui:5000",
5 | "assemblyline_user": "admin",
6 | "assemblyline_api_key": "AL_KEY",
7 | "allow_disabled_access": true,
8 | "write_path": "/data/",
9 | "trust_all": true
10 | }
--------------------------------------------------------------------------------
/assemblyline/odm/models/emptyresult.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 |
3 |
4 | @odm.model(index=True, store=True, description="Model for Empty Results")
5 | class EmptyResult(odm.Model):
6 | # Empty results are gonna be an abstract construct
7 | # Only a record of the key is saved for caching purposes
8 | expiry_ts = odm.Date(store=False, description="Expiry timestamp")
9 |
--------------------------------------------------------------------------------
/assemblyline/odm/common.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 |
3 |
4 | @odm.model()
5 | class Resources(odm.Model):
6 | cpu_usage = odm.Float()
7 | disk_usage_free = odm.Integer()
8 | disk_usage_percent = odm.Float()
9 | mem_usage = odm.Float()
10 |
11 |
12 | @odm.model()
13 | class HostInfo(odm.Model):
14 | host = odm.Keyword()
15 | ip = odm.Keyword()
16 | mac_address = odm.Keyword()
17 |
--------------------------------------------------------------------------------
/pipelines/config.yml:
--------------------------------------------------------------------------------
1 | filestore:
2 | cache:
3 | - file:///var/cache/assemblyline/
4 | storage:
5 | - file:///var/cache/assemblyline/
6 | core:
7 | redis:
8 | nonpersistent:
9 | host: localhost
10 | port: 6379
11 | persistent:
12 | host: localhost
13 | port: 6379
14 | metrics:
15 | export_interval: 1
16 | datastore:
17 | hosts: ["http://elastic:devpass@localhost:9200"]
18 | archive:
19 | enabled: true
20 |
--------------------------------------------------------------------------------
/assemblyline/common/importing.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | import sys
3 |
4 |
5 | def load_module_by_path(name: str, lookup_path=None):
6 | if lookup_path and lookup_path not in sys.path:
7 | sys.path.append(lookup_path)
8 |
9 | module_path, _sep, module_attribute_name = name.rpartition('.')
10 | module = sys.modules.get(module_path, None)
11 | if not module:
12 | module = importlib.import_module(module_path)
13 | return getattr(module, module_attribute_name)
14 |
--------------------------------------------------------------------------------
/assemblyline/datasource/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Defines an interface for hash searching.
3 |
4 | Given a file hash, try to generate a quick description of the file.
5 |
6 | This is extended and used by several services. To expose a service's
7 | datasource specialization, it can be added to the 'datasources' seed key.
8 |
9 | The assemblyline core comes with an implementation for searching all results `al.py`
10 | or the alerts streams `alert.py`. The base class/interface is defined in `common.py`
11 | """
12 |
--------------------------------------------------------------------------------
/dev/depends/config/apm-server.docker.yml:
--------------------------------------------------------------------------------
1 | apm-server:
2 | host: "0.0.0.0:8200"
3 | kibana:
4 | enabled: true
5 | host: kibana:5601
6 | path: kibana
7 | protocol: http
8 | username: elastic
9 | password: devpass
10 |
11 | logging:
12 | level: warning
13 | json: true
14 |
15 | output.elasticsearch:
16 | hosts: ["http://elasticsearch:9200"]
17 | username: elastic
18 | password: devpass
19 |
20 | # Remove the following for 8.x
21 | setup.template.settings.index:
22 | number_of_shards: 1
23 | number_of_replicas: 0
24 |
--------------------------------------------------------------------------------
/assemblyline/odm/models/ontology/results/__init__.py:
--------------------------------------------------------------------------------
1 | from assemblyline.odm.models.ontology.results.antivirus import Antivirus
2 | from assemblyline.odm.models.ontology.results.malware_config import MalwareConfig
3 | from assemblyline.odm.models.ontology.results.process import Process
4 | from assemblyline.odm.models.ontology.results.sandbox import Sandbox
5 | from assemblyline.odm.models.ontology.results.signature import Signature
6 | from assemblyline.odm.models.ontology.results.network import NetworkConnection
7 | from assemblyline.odm.models.ontology.results.http import HTTP
8 |
--------------------------------------------------------------------------------
/test/id_file_base/id_file_base.json:
--------------------------------------------------------------------------------
1 | {
2 | "text.txt": "text/plain",
3 | "excel.xls": "document/office/excel",
4 | "powerpoint.pptx": "document/office/powerpoint",
5 | "word.docx": "document/office/word",
6 | "png.png": "image/png",
7 | "pdf.pdf": "document/pdf",
8 | "html.html": "code/html",
9 | "xml.xml": "code/xml",
10 | "calendar.ics": "text/calendar",
11 | "gzip.gz": "archive/gzip",
12 | "powershell.ps1": "code/ps1",
13 | "jpg.jpg": "image/jpg",
14 | "json.json": "text/json",
15 | "javascript.js": "code/javascript"
16 | }
17 |
--------------------------------------------------------------------------------
/docker/build_containers.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -ex
2 |
3 | # Build core containers
4 | (cd nginx-ssl-frontend && docker build -t cccs/nginx-ssl-frontend .)
5 | (cd nginx-ssl-frontend:mui5 && docker build -t cccs/nginx-ssl-frontend:mui5 .)
6 | (cd minio && docker build -t cccs/minio .)
7 |
8 | # Build default dev containers
9 | (cd ../.. && docker build --no-cache -f assemblyline-base/docker/al_dev/Dockerfile -t cccs/assemblyline_dev:latest -t cccs/assemblyline_dev:4.6.1 .)
10 | (cd ../.. && docker build --no-cache -f assemblyline-base/docker/al_management/Dockerfile -t cccs/assemblyline_management:latest -t cccs/assemblyline_management:4.6.1 .)
11 |
--------------------------------------------------------------------------------
/test/id_file_base/xml.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Takahashi
5 | Jesse
6 | 19
7 | Literature
8 | 3.8
9 | Freshman
10 |
11 |
12 | Nguyen
13 | May
14 | 23
15 | Chemistry
16 | 3.5
17 | Senior
18 |
19 |
--------------------------------------------------------------------------------
/assemblyline/odm/messages/alert.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 | from assemblyline.odm.models.alert import Alert
3 |
4 | MSG_TYPES = {"AlertCreated", "AlertUpdated"}
5 | LOADER_CLASS = "assemblyline.odm.messages.alert.AlertMessage"
6 |
7 |
8 | @odm.model(description="Model of Alert Message")
9 | class AlertMessage(odm.Model):
10 | msg = odm.Compound(Alert, description="Message of alert")
11 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class for messages")
12 | msg_type = odm.Enum(values=MSG_TYPES, default="AlertCreated", description="Type of Message")
13 | sender = odm.Keyword(description="Sender of message")
14 |
--------------------------------------------------------------------------------
/assemblyline/common/file.py:
--------------------------------------------------------------------------------
1 | import tempfile
2 | import yaml
3 |
4 | from assemblyline.common.identify import CUSTOM_URI_ID
5 |
6 |
7 | def make_uri_file(directory: str, uri: str, params=None) -> str:
8 | with tempfile.NamedTemporaryFile(dir=directory, delete=False, mode="w") as out:
9 | out.write(CUSTOM_URI_ID)
10 | yaml.dump({"uri": uri}, out)
11 | if params:
12 | yaml.dump(params, out)
13 | return out.name
14 |
15 |
16 | def normalize_uri_file(directory: str, filename: str) -> str:
17 | with open(filename, "r") as f:
18 | data = yaml.safe_load(f)
19 | uri = data.pop("uri")
20 | return make_uri_file(directory, uri, data)
21 |
--------------------------------------------------------------------------------
/dev/depends/config/filebeat_policy.json:
--------------------------------------------------------------------------------
1 | {
2 | "policy": {
3 | "phases": {
4 | "hot": {
5 | "min_age": "0ms",
6 | "actions": {
7 | "rollover": {
8 | "max_age": "1d",
9 | "max_size": "20gb"
10 | },
11 | "set_priority": {
12 | "priority": 100
13 | }
14 | }
15 | },
16 | "warm": {
17 | "actions": {
18 | "readonly": {},
19 | "set_priority": {
20 | "priority": 50
21 | }
22 | }
23 | },
24 | "delete": {
25 | "min_age": "3d",
26 | "actions": {
27 | "delete": {}
28 | }
29 | }
30 | }
31 | }
32 | }
--------------------------------------------------------------------------------
/assemblyline/odm/models/submission_tree.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 | from assemblyline.common import forge
3 | Classification = forge.get_classification()
4 |
5 |
6 | @odm.model(index=True, store=False, description="Submission Tree Model")
7 | class SubmissionTree(odm.Model):
8 | classification = odm.Classification(default=Classification.UNRESTRICTED, description="Classification of the cache")
9 | filtered = odm.Boolean(default=False, description="Has this cache entry been filtered?")
10 | expiry_ts = odm.Date(description="Expiry timestamp")
11 | supplementary = odm.Text(index=False, description="Tree of supplementary files")
12 | tree = odm.Text(index=False, description="File tree cache")
13 |
--------------------------------------------------------------------------------
/dev/depends/config/metricbeat_policy.json:
--------------------------------------------------------------------------------
1 | {
2 | "policy": {
3 | "phases": {
4 | "hot": {
5 | "min_age": "0ms",
6 | "actions": {
7 | "rollover": {
8 | "max_age": "1d",
9 | "max_size": "5gb"
10 | },
11 | "set_priority": {
12 | "priority": 100
13 | }
14 | }
15 | },
16 | "warm": {
17 | "actions": {
18 | "readonly": {},
19 | "set_priority": {
20 | "priority": 50
21 | }
22 | }
23 | },
24 | "delete": {
25 | "min_age": "4d",
26 | "actions": {
27 | "delete": {}
28 | }
29 | }
30 | }
31 | }
32 | }
--------------------------------------------------------------------------------
/assemblyline/odm/models/filescore.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 |
3 |
4 | @odm.model(index=False, store=False, description="Model of Scoring related to a File")
5 | class FileScore(odm.Model):
6 | psid = odm.Optional(odm.UUID(), description=" Parent submission ID of the associated submission")
7 | expiry_ts = odm.Date(index=True, description="Expiry timestamp, used for garbage collection")
8 | score = odm.Integer(description="Maximum score for the associated submission")
9 | errors = odm.Integer(description="Number of errors that occurred during the previous analysis")
10 | sid = odm.UUID(description="ID of the associated submission")
11 | time = odm.Float(description="Epoch time at which the FileScore entry was created")
12 |
--------------------------------------------------------------------------------
/assemblyline/datastore/exceptions.py:
--------------------------------------------------------------------------------
1 | from typing import Iterable
2 |
3 |
4 | class DataStoreException(Exception):
5 | pass
6 |
7 |
8 | class SearchException(Exception):
9 | pass
10 |
11 |
12 | class SearchDepthException(Exception):
13 | pass
14 |
15 |
16 | class ILMException(Exception):
17 | pass
18 |
19 |
20 | class VersionConflictException(Exception):
21 | pass
22 |
23 |
24 | class UnsupportedElasticVersion(Exception):
25 | pass
26 |
27 |
28 | class ArchiveDisabled(Exception):
29 | pass
30 |
31 |
32 | class MultiKeyError(KeyError):
33 | def __init__(self, keys: Iterable[str], partial_output):
34 | super().__init__(str(keys))
35 | self.keys = set(keys)
36 | self.partial_output = partial_output
37 |
--------------------------------------------------------------------------------
/assemblyline/odm/models/statistics.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 |
3 |
4 | @odm.model(index=True, store=True, description="Statistics Model")
5 | class Statistics(odm.Model):
6 | count = odm.Integer(default=0, description="Count of statistical hits")
7 | min = odm.Integer(default=0, description="Minimum value of all stastical hits")
8 | max = odm.Integer(default=0, description="Maximum value of all stastical hits")
9 | avg = odm.Integer(default=0, description="Average of all stastical hits")
10 | sum = odm.Integer(default=0, description="Sum of all stastical hits")
11 | first_hit = odm.Optional(odm.Date(), description="Date of first hit of statistic")
12 | last_hit = odm.Optional(odm.Date(), description="Date of last hit of statistic")
13 |
--------------------------------------------------------------------------------
/assemblyline/common/uid.py:
--------------------------------------------------------------------------------
1 | import hashlib
2 | import uuid
3 |
4 | import baseconv
5 |
6 | TINY = 8
7 | SHORT = 16
8 | MEDIUM = NORMAL = 32
9 | LONG = 64
10 |
11 |
12 | def get_random_id() -> str:
13 | return baseconv.base62.encode(uuid.uuid4().int)
14 |
15 |
16 | def get_id_from_data(data, prefix=None, length=MEDIUM):
17 | possible_len = [TINY, SHORT, MEDIUM, LONG]
18 | if length not in possible_len:
19 | raise ValueError(f"Invalid hash length of {length}. Possible values are: {str(possible_len)}.")
20 | sha256_hash = hashlib.sha256(str(data).encode()).hexdigest()[:length]
21 | _hash = baseconv.base62.encode(int(sha256_hash, 16))
22 |
23 | if isinstance(prefix, str):
24 | _hash = f"{prefix}_{_hash}"
25 |
26 | return _hash
27 |
--------------------------------------------------------------------------------
/test/id_file_base/powershell.ps1:
--------------------------------------------------------------------------------
1 | # From https://docs.microsoft.com/en-us/powershell/scripting/samples/sorting-objects?view=powershell-7.1
2 | Get-ChildItem |
3 | Sort-Object -Property LastWriteTime, Name |
4 | Format-Table -Property LastWriteTime, Name
5 | Get-ChildItem |
6 | Sort-Object -Property LastWriteTime, Name -Descending |
7 | Format-Table -Property LastWriteTime, Name
8 | Get-ChildItem |
9 | Sort-Object -Property @{ Expression = 'LastWriteTime'; Descending = $true },
10 | @{ Expression = 'Name'; Ascending = $true } |
11 | Format-Table -Property LastWriteTime, Name
12 | Get-ChildItem |
13 | Sort-Object -Property @{ Expression = { $_.LastWriteTime - $_.CreationTime }; Descending = $true } |
14 | Format-Table -Property LastWriteTime, CreationTime
--------------------------------------------------------------------------------
/assemblyline/run/pubsub_reader.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import sys
4 |
5 | from assemblyline.remote.datatypes.queues.comms import CommsQueue
6 | from pprint import pprint
7 |
8 |
9 | if __name__ == "__main__":
10 | queue_name = None
11 | if len(sys.argv) > 1:
12 | queue_name = sys.argv[1]
13 |
14 | if queue_name is None:
15 | print("\nERROR: You must specify a queue name.\n\npubsub_reader.py [queue_name]")
16 | exit(1)
17 |
18 | print(f"Listening for messages on '{queue_name}' queue.")
19 |
20 | q = CommsQueue(queue_name)
21 |
22 | try:
23 | while True:
24 | for msg in q.listen():
25 | pprint(msg)
26 | except KeyboardInterrupt:
27 | print('Exiting')
28 | finally:
29 | q.close()
30 |
--------------------------------------------------------------------------------
/dev/hauntedhouse/docker-compose.yaml:
--------------------------------------------------------------------------------
1 | version: "2.4"
2 |
3 | services:
4 | hauntedhouse:
5 | image: cccs/hauntedhouse:0.0.14
6 | volumes:
7 | - ${PATH_REWRITE:-.}/config/core.json:/config/core.json:ro
8 | - haunted-house-data:/data/
9 | command: haunted-house server --config /config/core.json
10 | environment:
11 | RUST_LOG: "haunted_house=info"
12 |
13 | hauntedhouse-worker:
14 | image: cccs/hauntedhouse:0.0.14
15 | volumes:
16 | - ${PATH_REWRITE:-.}/config/worker.json:/config/worker.json:ro
17 | command: haunted-house worker --config /config/worker.json
18 | environment:
19 | RUST_LOG: "haunted_house=info"
20 |
21 | hauntedhouse-ingest:
22 | image: cccs/hauntedhouse:ingest-0.0.9
23 | volumes:
24 | - ${PATH_REWRITE:-.}/config/ingest.json:/config/ingest.json:ro
25 | command: python -m hauntedhouse.ingest /config/ingest.json
26 |
27 |
--------------------------------------------------------------------------------
/assemblyline/common/frequency.pyx:
--------------------------------------------------------------------------------
1 | # cython: language_level=3
2 |
3 | # noinspection PyUnresolvedReferences
4 | from libc.string cimport memset
5 |
6 | def counts(b, c, d=None):
7 | if d is None:
8 | d = {}
9 | cdef long long t[256]
10 | cdef unsigned char* s = b
11 | cdef int l = c
12 | cdef int i = 0
13 |
14 | memset(t, 0, 256 * sizeof(long long))
15 |
16 | for k, v in d.iteritems():
17 | t[k] = v
18 |
19 | while i < l:
20 | t[s[i]] += 1
21 | i += 1
22 |
23 | return {i: t[i] for i in range(256) if t[i]}
24 |
25 | def counts_old(s, d=None):
26 | if d is None:
27 | d = {}
28 | cdef int i
29 | cdef int t[256]
30 |
31 | memset(t, 0, 256 * sizeof(int))
32 |
33 | for k, v in d.iteritems():
34 | t[k] = v
35 |
36 | for c in s:
37 | t[ord(c)] += 1
38 |
39 | return {i: t[i] for i in range(256) if t[i]}
40 |
--------------------------------------------------------------------------------
/assemblyline/odm/models/ontology/results/http.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 | from assemblyline.odm.models.ontology.file import File
3 |
4 |
5 | @odm.model(index=False, store=False, description="")
6 | class HTTPRedirect(odm.Model):
7 | from_url = odm.Keyword(description="")
8 | to_url = odm.Keyword(description="")
9 |
10 |
11 | @odm.model(index=False, store=False, description="HTTP Task")
12 | class HTTP(odm.Model):
13 | response_code = odm.Integer(description="The status code of the main page")
14 | redirection_url = odm.Optional(odm.Keyword(), description="The final page of the requested url")
15 | redirects = odm.Optional(odm.List(odm.Compound(HTTPRedirect)), description="List of Redirects")
16 | favicon = odm.Optional(odm.Compound(File), description="The file information of the main favicon")
17 | title = odm.Optional(odm.Keyword(), description="The title of the main page after any redirection")
18 |
--------------------------------------------------------------------------------
/assemblyline/odm/models/submission_summary.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 | from assemblyline.common import forge
3 | Classification = forge.get_classification()
4 |
5 |
6 | @odm.model(index=False, store=False, description="Submission Summary Model")
7 | class SubmissionSummary(odm.Model):
8 | classification = odm.Classification(default=Classification.UNRESTRICTED, description="Classification of the cache")
9 | filtered = odm.Boolean(default=False, description="Has this cache entry been filtered?")
10 | expiry_ts = odm.Date(index=True, description="Expiry timestamp")
11 | tags = odm.Text(description="Tags cache")
12 | attack_matrix = odm.Text(description="ATT&CK Matrix cache")
13 | heuristics = odm.Text(description="Heuristics cache")
14 | heuristic_sections = odm.Text(description="All sections mapping to the heuristics")
15 | heuristic_name_map = odm.Text(description="Map of heuristic names to IDs")
16 |
--------------------------------------------------------------------------------
/test/id_file_base/calendar.ics:
--------------------------------------------------------------------------------
1 | BEGIN:VCALENDAR
2 | VERSION:2.0
3 | CALSCALE:GREGORIAN
4 | BEGIN:VEVENT
5 | SUMMARY:Access-A-Ride Pickup
6 | DTSTART;TZID=America/New_York:20130802T103400
7 | DTEND;TZID=America/New_York:20130802T110400
8 | LOCATION:1000 Broadway Ave.\, Brooklyn
9 | DESCRIPTION: Access-A-Ride to 900 Jay St.\, Brooklyn
10 | STATUS:CONFIRMED
11 | SEQUENCE:3
12 | BEGIN:VALARM
13 | TRIGGER:-PT10M
14 | DESCRIPTION:Pickup Reminder
15 | ACTION:DISPLAY
16 | END:VALARM
17 | END:VEVENT
18 | BEGIN:VEVENT
19 | SUMMARY:Access-A-Ride Pickup
20 | DTSTART;TZID=America/New_York:20130802T200000
21 | DTEND;TZID=America/New_York:20130802T203000
22 | LOCATION:900 Jay St.\, Brooklyn
23 | DESCRIPTION: Access-A-Ride to 1000 Broadway Ave.\, Brooklyn
24 | STATUS:CONFIRMED
25 | SEQUENCE:3
26 | BEGIN:VALARM
27 | TRIGGER:-PT10M
28 | DESCRIPTION:Pickup Reminder
29 | ACTION:DISPLAY
30 | END:VALARM
31 | END:VEVENT
32 | END:VCALENDAR
--------------------------------------------------------------------------------
/test/test_exceptions.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from assemblyline.common.exceptions import Chain, ChainAll, ChainException
4 |
5 |
6 | class CustomError(ChainException):
7 | pass
8 |
9 |
10 | @Chain(CustomError)
11 | def fail_function(message):
12 | raise Exception(message)
13 |
14 |
15 | @ChainAll(CustomError)
16 | class FailClass:
17 | def fail_method(self):
18 | raise Exception()
19 |
20 | @staticmethod
21 | def static_fail_method():
22 | raise Exception()
23 |
24 |
25 | def test_exception_chaining():
26 | with pytest.raises(CustomError) as error_info:
27 | fail_function('abc123')
28 | assert isinstance(error_info.value.cause, Exception)
29 | assert error_info.value.cause.args[0] == 'abc123'
30 |
31 | with pytest.raises(CustomError):
32 | FailClass().fail_method()
33 |
34 | with pytest.raises(CustomError):
35 | FailClass.static_fail_method()
36 |
37 |
--------------------------------------------------------------------------------
/assemblyline/odm/messages/metrics.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 |
3 | MSG_TYPES = {"MetricsCounter"}
4 | LOADER_CLASS = "assemblyline.odm.messages.metrics.MetricsMessage"
5 |
6 |
7 | @odm.model(description="Metrics Model")
8 | class Metrics(odm.Model):
9 | host = odm.Keyword(description="Host that generated metric")
10 | type = odm.Keyword(description="Type of metric")
11 | name = odm.Keyword(description="Metric name")
12 | metrics = odm.Mapping(odm.Integer(), description="Metric value")
13 |
14 |
15 | @odm.model(description="Model of Metric Message")
16 | class MetricsMessage(odm.Model):
17 | msg = odm.Compound(Metrics, description="Metrics message")
18 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class for message")
19 | msg_type = odm.Enum(values=MSG_TYPES, default="MetricsCounter", description="Type of message")
20 | sender = odm.Keyword(description="Sender of message")
21 |
--------------------------------------------------------------------------------
/test/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3"
2 |
3 | services:
4 | sftp:
5 | image: linuxserver/openssh-server
6 | environment:
7 | - SUDO_ACCESS=false
8 | - PASSWORD_ACCESS=true
9 | - USER_PASSWORD=password
10 | - USER_NAME=user
11 | - LOG_STDOUT=true
12 | ports:
13 | - "2222:2222"
14 |
15 | minio:
16 | image: minio/minio
17 | environment:
18 | MINIO_ROOT_USER: al_storage_key
19 | MINIO_ROOT_PASSWORD: Ch@ngeTh!sPa33w0rd
20 | ports:
21 | - "9000:9000"
22 | command: server /data
23 |
24 | elasticsearch:
25 | image: docker.elastic.co/elasticsearch/elasticsearch:8.10.2
26 | environment:
27 | - xpack.security.enabled=true
28 | - discovery.type=single-node
29 | - logger.level=WARN
30 | - "ES_JAVA_OPTS=-Xms1024m -Xmx1024m"
31 | - ELASTIC_PASSWORD=devpass
32 | ports:
33 | - "9200:9200"
34 |
35 | redis:
36 | image: redis
37 | ports:
38 | - "6379:6379"
39 |
--------------------------------------------------------------------------------
/assemblyline/common/logformat.py:
--------------------------------------------------------------------------------
1 |
2 | hostname = 'unknownhost'
3 | # noinspection PyBroadException
4 | try:
5 | from assemblyline.common.net import get_hostname
6 | hostname = get_hostname()
7 | except Exception: # pylint:disable=W0702
8 | pass
9 |
10 | ip = 'x.x.x.x'
11 | # noinspection PyBroadException
12 | try:
13 | from assemblyline.common.net import get_hostip
14 | ip = get_hostip()
15 | except Exception: # pylint:disable=W0702
16 | pass
17 |
18 | AL_SYSLOG_FORMAT = f'{ip} AL %(levelname)8s %(process)5d %(name)40s | %(message)s'
19 | AL_LOG_FORMAT = f'%(asctime)-16s %(levelname)8s {hostname} %(process)d %(name)40s | %(message)s'
20 | AL_JSON_FORMAT = f'{{' \
21 | f'"@timestamp": "%(asctime)s", ' \
22 | f'"event": {{ "module": "assemblyline", "dataset": "%(name)s" }}, ' \
23 | f'"host": {{ "ip": "{ip}", "hostname": "{hostname}" }}, ' \
24 | f'"log": {{ "level": "%(levelname)s", "logger": "%(name)s" }}, ' \
25 | f'"process": {{ "pid": "%(process)d" }}, ' \
26 | f'"message": %(message)s}}'
27 |
--------------------------------------------------------------------------------
/assemblyline/remote/datatypes/queues/multi.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | from assemblyline.remote.datatypes import get_client, retry_call
4 |
5 |
6 | class MultiQueue(object):
7 | def __init__(self, host=None, port=None, private=False):
8 | self.c = get_client(host, port, private)
9 |
10 | def delete(self, name):
11 | retry_call(self.c.delete, name)
12 |
13 | def length(self, name):
14 | return retry_call(self.c.llen, name)
15 |
16 | def pop(self, name, blocking=True, timeout=0):
17 | if blocking:
18 | response = retry_call(self.c.blpop, name, timeout)
19 | else:
20 | response = retry_call(self.c.lpop, name)
21 |
22 | if not response:
23 | return response
24 |
25 | if blocking:
26 | return json.loads(response[1])
27 | else:
28 | return json.loads(response)
29 |
30 | def push(self, name, *messages):
31 | for message in messages:
32 | retry_call(self.c.rpush, name, json.dumps(message))
33 |
--------------------------------------------------------------------------------
/docker/nginx-ssl-frontend/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nginx AS builder
2 |
3 | RUN apt-get update
4 | RUN apt-get install openssl
5 | RUN openssl req -nodes -x509 -newkey rsa:4096 -keyout /etc/ssl/nginx.key -out /etc/ssl/nginx.crt -days 3650 -subj "/C=CA/ST=Ontario/L=Ottawa/O=CCCS/CN=assemblyline.local"
6 |
7 | FROM nginx
8 |
9 | ENV DOLLAR $
10 | ENV FQDN localhost
11 | ENV MAX_BODY_SIZE 100M
12 | ENV FRONTEND_HOST al_frontend
13 | ENV SOCKET_HOST al_socketio
14 | ENV UI_HOST al_ui
15 | ENV KIBANA_HOST kibana
16 | ENV TEMPLATE full
17 | ENV ACCESS_LOG off
18 | ENV ERROR_LOG /dev/stderr
19 | ENV ERROR_LEVEL notice
20 | ENV READ_TIMEOUT 60s
21 | ENV CONNECT_TIMEOUT 60s
22 | ENV SEND_TIMEOUT 60s
23 |
24 | COPY http_redirect.conf /etc/nginx/conf.d/
25 | COPY full.template /opt/
26 | COPY minimal.template /opt/
27 |
28 | COPY --from=builder /etc/ssl/ /etc/ssl/
29 |
30 | EXPOSE 443
31 | EXPOSE 80
32 |
33 | CMD /bin/bash -c "envsubst < /opt/$TEMPLATE.template > /etc/nginx/conf.d/default.conf && cat /etc/nginx/conf.d/default.conf && exec nginx -g 'daemon off;'"
34 |
--------------------------------------------------------------------------------
/docker/nginx-ssl-frontend:mui5/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nginx AS builder
2 |
3 | RUN apt-get update
4 | RUN apt-get install openssl
5 | RUN openssl req -nodes -x509 -newkey rsa:4096 -keyout /etc/ssl/nginx.key -out /etc/ssl/nginx.crt -days 3650 -subj "/C=CA/ST=Ontario/L=Ottawa/O=CCCS/CN=assemblyline.local"
6 |
7 | FROM nginx
8 |
9 | ENV DOLLAR $
10 | ENV FQDN localhost
11 | ENV MAX_BODY_SIZE 100M
12 | ENV FRONTEND_HOST al_frontend
13 | ENV SOCKET_HOST al_socketio
14 | ENV UI_HOST al_ui
15 | ENV KIBANA_HOST kibana
16 | ENV TEMPLATE full
17 | ENV ACCESS_LOG off
18 | ENV ERROR_LOG /dev/stderr
19 | ENV ERROR_LEVEL notice
20 | ENV READ_TIMEOUT 60s
21 | ENV CONNECT_TIMEOUT 60s
22 | ENV SEND_TIMEOUT 60s
23 |
24 | COPY http_redirect.conf /etc/nginx/conf.d/
25 | COPY full.template /opt/
26 | COPY minimal.template /opt/
27 |
28 | COPY --from=builder /etc/ssl/ /etc/ssl/
29 |
30 | EXPOSE 443
31 | EXPOSE 80
32 |
33 | CMD /bin/bash -c "envsubst < /opt/$TEMPLATE.template > /etc/nginx/conf.d/default.conf && cat /etc/nginx/conf.d/default.conf && exec nginx -g 'daemon off;'"
34 |
--------------------------------------------------------------------------------
/dev/core/config/config.yml:
--------------------------------------------------------------------------------
1 | core:
2 | alerter:
3 | delay: 0
4 | metrics:
5 | apm_server:
6 | server_url: http://apm_server:8200/
7 | elasticsearch:
8 | hosts: [http://elastic:devpass@elasticsearch:9200]
9 | redis:
10 | host: redis
11 | redis:
12 | nonpersistent:
13 | host: redis
14 | persistent:
15 | host: redis
16 | port: 6379
17 |
18 | datastore:
19 | hosts: [http://elastic:devpass@elasticsearch:9200]
20 |
21 | filestore:
22 | cache:
23 | [
24 | "s3://al_storage_key:Ch@ngeTh!sPa33w0rd@minio:9000?s3_bucket=al-cache&use_ssl=False",
25 | ]
26 | storage:
27 | [
28 | "s3://al_storage_key:Ch@ngeTh!sPa33w0rd@minio:9000?s3_bucket=al-storage&use_ssl=False",
29 | ]
30 |
31 | logging:
32 | log_to_console: true
33 | log_to_file: false
34 | log_to_syslog: false
35 |
36 | services:
37 | preferred_update_channel: dev
38 | allow_insecure_registry: true
39 | image_variables:
40 | PRIVATE_REGISTRY: $PRIVATE_REGISTRY
41 |
42 | ui:
43 | enforce_quota: false
44 |
--------------------------------------------------------------------------------
/dev/depends/config/kibana.docker.yml:
--------------------------------------------------------------------------------
1 | elasticsearch.hosts: ["http://elasticsearch:9200"]
2 |
3 | elasticsearch.password: kibanapass
4 | elasticsearch.username: kibana_system
5 |
6 | logging.root.level: warn
7 |
8 | server.basePath: /kibana
9 | server.publicBaseUrl: https://localhost/kibana
10 | server.rewriteBasePath: true
11 | server.name: kibana
12 | server.host: "0.0.0.0"
13 | xpack.reporting.roles.enabled: false
14 | xpack.reporting.encryptionKey: ThisIsSomeRandomKeyThatYouShouldDefinitelyChange!
15 | xpack.reporting.kibanaServer.hostname: localhost
16 | xpack.security.encryptionKey: ThisIsSomeRandomKeyThatYouShouldDefinitelyChange!
17 | xpack.encryptedSavedObjects.encryptionKey: ThisIsSomeRandomKeyThatYouShouldDefinitelyChange!
18 | xpack.fleet.packages:
19 | - name: apm
20 | version: latest
21 | xpack.fleet.agentPolicies:
22 | - name: APM Server
23 | id: apm
24 | is_managed: false
25 | namespace: default
26 | package_policies:
27 | - name: apm_server
28 | id: default-apm-server
29 | package:
30 | name: apm
31 |
--------------------------------------------------------------------------------
/dev/hauntedhouse/config/worker.json:
--------------------------------------------------------------------------------
1 | {
2 | "api_token": "insecure-worker-key",
3 | "file_cache": {
4 | "Directory": {
5 | "path": "/tmp/files",
6 | "size": "100Gi"
7 | }
8 | },
9 | "blob_cache": {
10 | "Directory": {
11 | "path": "/tmp/blobs",
12 | "size": "100Gi"
13 | }
14 | },
15 | "files": {
16 | "S3": {
17 | "access_key_id": "al_storage_key",
18 | "secret_access_key": "Ch@ngeTh!sPa33w0rd",
19 | "endpoint_url": "http;//minio:9000",
20 | "region_name": "local",
21 | "bucket": "al-storage",
22 | "no_tls_verify": true
23 | }
24 | },
25 | "blobs": {
26 | "S3": {
27 | "access_key_id": "al_storage_key",
28 | "secret_access_key": "Ch@ngeTh!sPa33w0rd",
29 | "endpoint_url": "http;//minio:9000",
30 | "region_name": "local",
31 | "bucket": "retrohunt-storage",
32 | "no_tls_verify": true
33 | }
34 | },
35 | "bind_address": "0.0.0.0:4443",
36 | "tls": null,
37 | "server_address": "https://hauntedhouse:4443",
38 | "server_tls": "AllowAll"
39 | }
--------------------------------------------------------------------------------
/assemblyline/common/chunk.py:
--------------------------------------------------------------------------------
1 | """Sequence manipulation methods used in parsing raw datastore output."""
2 | from __future__ import annotations
3 | from typing import Sequence, Generator, TypeVar, overload
4 |
5 | _T = TypeVar('_T')
6 |
7 |
8 | @overload
9 | def chunk(items: bytes, n: int) -> Generator[bytes, None, None]:
10 | ...
11 |
12 |
13 | @overload
14 | def chunk(items: str, n: int) -> Generator[str, None, None]:
15 | ...
16 |
17 |
18 | @overload
19 | def chunk(items: Sequence[_T], n: int) -> Generator[Sequence[_T], None, None]:
20 | ...
21 |
22 |
23 | def chunk(items, n: int):
24 | """ Yield n-sized chunks from list.
25 |
26 | >>> list(chunk([1,2,3,4,5,6,7], 2))
27 | [[1,2], [3,4], [5,6], [7,]]
28 | """
29 | for i in range(0, len(items), n):
30 | yield items[i:i+n]
31 |
32 |
33 | def chunked_list(items: Sequence[_T], n: int) -> list[Sequence[_T]]:
34 | """ Create a list of n-sized chunks from list.
35 |
36 | >>> chunked_list([1,2,3,4,5,6,7], 2)
37 | [[1,2], [3,4], [5,6], [7,]]
38 | """
39 | return list(chunk(items, n))
40 |
--------------------------------------------------------------------------------
/incremental.Dockerfile:
--------------------------------------------------------------------------------
1 | # This dockerfile only includes the minimal steps to build a package onto
2 | # a periodic root image
3 | ARG build_image
4 | ARG base
5 | ARG tag
6 | FROM $build_image AS builder
7 | ARG version
8 |
9 | # Install assemblyline base (setup.py is just a file we know exists so the command
10 | # won't fail if dist isn't there. The dist* copies in any dist directory only if it exists.)
11 | COPY setup.py dist* dist/
12 | RUN pip install --no-cache-dir --no-warn-script-location -f dist/ -U --user assemblyline==$version && rm -rf ~/.cache/pip
13 |
14 | FROM $base:$tag
15 | ARG version
16 | ARG version_tag=${version}
17 |
18 | # Install assemblyline base
19 | COPY --chown=assemblyline:assemblyline --from=builder /root/.local /var/lib/assemblyline/.local
20 | ENV PATH=/var/lib/assemblyline/.local/bin:$PATH
21 | ENV PYTHONPATH=/var/lib/assemblyline/.local/lib/python3.11/site-packages
22 | ENV ASSEMBLYLINE_VERSION=${version}
23 | ENV ASSEMBLYLINE_IMAGE_TAG=${version_tag}
24 |
25 | # Switch to assemblyline user
26 | USER assemblyline
27 | WORKDIR /var/lib/assemblyline
28 | CMD /bin/bash
29 |
--------------------------------------------------------------------------------
/assemblyline/odm/messages/changes.py:
--------------------------------------------------------------------------------
1 | """
2 | Messages about configuration changes internal to assemblyline.
3 |
4 | Uses standard library
5 | """
6 | from __future__ import annotations
7 | import enum
8 | import json
9 | from dataclasses import asdict, dataclass
10 |
11 |
12 | class Operation(enum.IntEnum):
13 | Added = 1
14 | Removed = 2
15 | Modified = 3
16 | Incompatible = 4
17 |
18 |
19 | @dataclass
20 | class ServiceChange:
21 | name: str
22 | operation: Operation
23 |
24 | @staticmethod
25 | def serialize(obj: ServiceChange) -> str:
26 | return json.dumps(asdict(obj))
27 |
28 | @staticmethod
29 | def deserialize(data: str) -> ServiceChange:
30 | return ServiceChange(**json.loads(data))
31 |
32 |
33 | @dataclass
34 | class SignatureChange:
35 | signature_id: str
36 | signature_type: str
37 | source: str
38 | operation: Operation
39 |
40 | @staticmethod
41 | def serialize(obj: SignatureChange) -> str:
42 | return json.dumps(asdict(obj))
43 |
44 | @staticmethod
45 | def deserialize(data: str) -> SignatureChange:
46 | return SignatureChange(**json.loads(data))
47 |
--------------------------------------------------------------------------------
/assemblyline/common/threading.py:
--------------------------------------------------------------------------------
1 | import elasticapm
2 |
3 | from concurrent.futures import ThreadPoolExecutor
4 | from elasticapm.traces import execution_context
5 |
6 |
7 | def apm_monitored(fn, *args, **kwargs):
8 | with elasticapm.capture_span(fn.__name__, "threadpool"):
9 | return fn(*args, **kwargs)
10 |
11 |
12 | class APMAwareThreadPoolExecutor(ThreadPoolExecutor):
13 | def __init__(self, *args, **kwargs):
14 | # If an APM server is defined we will get the current transaction
15 | self.apm_transaction = execution_context.get_transaction()
16 |
17 | # You are not allowed to use the following
18 | kwargs.pop("initializer", None)
19 |
20 | super().__init__(initializer=self._set_apm_transaction, *args, **kwargs)
21 |
22 | def _set_apm_transaction(self):
23 | # Make sure the context is set in each threads
24 | if self.apm_transaction is not None:
25 | execution_context.set_transaction(self.apm_transaction)
26 |
27 | # Change the submit function so all subfunctions are monitored
28 | def submit(self, fn, /, *args, **kwargs):
29 | return super().submit(apm_monitored, fn, *args, **kwargs)
30 |
--------------------------------------------------------------------------------
/assemblyline/odm/messages/scaler_heartbeat.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 |
3 | MSG_TYPES = {"ScalerHeartbeat"}
4 | LOADER_CLASS = "assemblyline.odm.messages.scaler_heartbeat.ScalerMessage"
5 |
6 |
7 | @odm.model(description="Metrics")
8 | class Metrics(odm.Model):
9 | memory_free = odm.Float(description="Amount of free memory")
10 | cpu_free = odm.Float(description="Amount of free CPU")
11 | memory_total = odm.Float(description="Amount of total memory")
12 | cpu_total = odm.Float(description="Amount of total CPU")
13 |
14 |
15 | @odm.model(description="Heartbeat Model")
16 | class Heartbeat(odm.Model):
17 | instances = odm.Integer(description="Number of instances")
18 | metrics = odm.Compound(Metrics, description="Metrics")
19 |
20 |
21 | @odm.model(description="Model of Scaler Heartbeat Message")
22 | class ScalerMessage(odm.Model):
23 | msg = odm.Compound(Heartbeat, description="Heartbeat message")
24 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class of message")
25 | msg_type = odm.Enum(values=MSG_TYPES, default="ScalerHeartbeat", description="Type of message")
26 | sender = odm.Keyword(description="Sender of message")
27 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "editor.codeActionsOnSave": {
3 | "source.organizeImports": "explicit"
4 | },
5 | "editor.formatOnSave": true,
6 | "editor.rulers": [
7 | 120
8 | ],
9 | "editor.tabSize": 4,
10 | "editor.wordWrap": "wordWrapColumn",
11 | "editor.wordWrapColumn": 120,
12 | "files.insertFinalNewline": true,
13 | "files.trimFinalNewlines": true,
14 | "files.trimTrailingWhitespace": true,
15 | "isort.args": [
16 | "-l",
17 | "120",
18 | "--profile=black",
19 | // "--src=${workspaceFolder}"
20 | ],
21 | "python.formatting.autopep8Args": [
22 | "--max-line-length",
23 | "120",
24 | "--experimental"
25 | ],
26 | "python.formatting.provider": "autopep8",
27 | "python.formatting.blackArgs": [
28 | "--line-length=120"
29 | ],
30 | "python.linting.enabled": true,
31 | "python.linting.flake8Enabled": true,
32 | "python.linting.flake8Args": [
33 | "--max-line-length=120",
34 | //Added the ignore of E203 for now : https://github.com/PyCQA/pycodestyle/issues/373
35 | "--ignore=E203,W503"
36 | ],
37 | "python.linting.pylintEnabled": false,
38 | }
39 |
--------------------------------------------------------------------------------
/test/test_cachestore.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from assemblyline.common import forge
4 |
5 |
6 | KEY = "test1"
7 | DATA = b"THIS IS WHAT I'LL SAVE INTO THE CACHE STORE..."
8 | COMPONENT = "test_component"
9 |
10 |
11 | @pytest.fixture(scope='module')
12 | def cachestore(datastore_connection):
13 | cachestore = forge.get_cachestore(COMPONENT, datastore=datastore_connection)
14 | cachestore.datastore.cached_file.delete_by_query("id:*")
15 | cachestore.save(KEY, DATA)
16 | cachestore.datastore.cached_file.commit()
17 |
18 | return cachestore
19 |
20 |
21 | def test_expiry_field(cachestore):
22 | assert cachestore.datastore.cached_file.search("expiry_ts:*", as_obj=False)['total'] == 1
23 |
24 |
25 | def test_db_cache_entry(cachestore):
26 | key = f"{cachestore.component}_{KEY}"
27 | assert cachestore.datastore.cached_file.get(key, as_obj=False)['component'] == COMPONENT
28 |
29 |
30 | def test_cache_data(cachestore):
31 | assert cachestore.get(KEY) == DATA
32 |
33 |
34 | def test_cache_cleanup(cachestore):
35 | cachestore.delete(KEY)
36 | cachestore.datastore.cached_file.commit()
37 |
38 | assert cachestore.get(KEY) is None
39 | assert cachestore.datastore.cached_file.get(KEY, as_obj=False) is None
40 |
--------------------------------------------------------------------------------
/assemblyline/odm/messages/vacuum_heartbeat.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 |
3 | MSG_TYPES = {"VacuumHeartbeat"}
4 | LOADER_CLASS = "assemblyline.odm.messages.vacuum_heartbeat.VacuumMessage"
5 |
6 |
7 | @odm.model(description="Vacuum Stats")
8 | class Metrics(odm.Model):
9 | ingested = odm.Integer(description="Files ingested")
10 | # protocol = odm.Mapping(odm.Integer())
11 | safelist = odm.Integer(description="Files safelisted")
12 | errors = odm.Integer()
13 | skipped = odm.Integer()
14 |
15 |
16 | @odm.model(description="Heartbeat Model")
17 | class Heartbeat(odm.Model):
18 | # instances = odm.Integer(description="Number of instances")
19 | metrics = odm.Compound(Metrics, description="Vacuum metrics")
20 | # queues = odm.Compound(Metrics, description="Vacuum queues")
21 |
22 |
23 | @odm.model(description="Model of Vacuum Heartbeat Message")
24 | class VacuumMessage(odm.Model):
25 | msg = odm.Compound(Heartbeat, description="Hearbeat message")
26 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class for message")
27 | msg_type = odm.Enum(values=MSG_TYPES, default="VacuumHeartbeat", description="Type of message")
28 | sender = odm.Keyword(description="Sender of message")
29 |
--------------------------------------------------------------------------------
/assemblyline/common/memory_zip.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import io
4 | import zipfile
5 | from typing import Union
6 |
7 |
8 | class InMemoryZip(object):
9 | def __init__(self):
10 | # Create the in-memory file-like object
11 | self.in_memory_zip = io.BytesIO()
12 |
13 | def append(self, filename_in_zip: str, file_contents: Union[str, bytes]) -> InMemoryZip:
14 | """
15 | Appends a file with name filename_in_zip and contents of
16 | file_contents to the in-memory zip.
17 | """
18 |
19 | # Get a handle to the in-memory zip in append mode
20 | zf = zipfile.ZipFile(self.in_memory_zip, "a", zipfile.ZIP_DEFLATED, False)
21 |
22 | # Write the file to the in-memory zip
23 | zf.writestr(filename_in_zip, file_contents)
24 |
25 | # Mark the files as having been created on Windows so that
26 | # Unix permissions are not inferred as 0000
27 | for zfile in zf.filelist:
28 | zfile.create_system = 0
29 |
30 | return self
31 |
32 | def read(self) -> bytes:
33 | """
34 | Returns a string with the contents of the in-memory zip.
35 | """
36 |
37 | self.in_memory_zip.seek(0)
38 | return self.in_memory_zip.read()
39 |
--------------------------------------------------------------------------------
/assemblyline/datasource/common.py:
--------------------------------------------------------------------------------
1 | import re
2 | HASH_RE = r'^[0-9a-fA-F]{32,64}$'
3 | HASH_PATTERN = re.compile(HASH_RE)
4 |
5 |
6 | class DatasourceException(Exception):
7 | pass
8 |
9 |
10 | def hash_type(value):
11 | if HASH_PATTERN.match(value):
12 | return {
13 | 32: "md5", 40: "sha1", 64: "sha256"
14 | }.get(len(value), "invalid")
15 | else:
16 | return "invalid"
17 |
18 |
19 | # noinspection PyUnusedLocal
20 | class Datasource(object):
21 | @staticmethod
22 | def hash_type(value):
23 | return hash_type(value)
24 |
25 | # Subclasses should implement the following methods.
26 | def __init__(self, log, **kw): # pylint: disable=W0613
27 | self.log = log
28 |
29 | def parse(self, result, **kw): # pylint: disable=W0613
30 | pass
31 |
32 | def query(self, value, **kw): # pylint: disable=W0613
33 | pass
34 |
35 |
36 | # noinspection PyMethodMayBeStatic,PyUnusedLocal
37 | class Null(object):
38 | def __init__(self, e=None):
39 | self.e = e
40 |
41 | def parse(self, result, **kw): # pylint: disable=W0613
42 | return []
43 |
44 | def query(self, value, **kw): # pylint: disable=W0613
45 | if self.e:
46 | raise self.e # pylint: disable=E0702
47 |
48 | return []
49 |
--------------------------------------------------------------------------------
/assemblyline/odm/messages/elastic_heartbeat.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 |
3 | MSG_TYPES = {"ElasticHeartbeat"}
4 | LOADER_CLASS = "assemblyline.odm.messages.elastic_heartbeat.ElasticMessage"
5 |
6 |
7 | @odm.model(description="Information about an elasticsearch shard")
8 | class IndexData(odm.Model):
9 | name = odm.keyword()
10 | shard_size = odm.integer()
11 |
12 |
13 | @odm.model(description="Heartbeat Model for Elasticsearch")
14 | class Heartbeat(odm.Model):
15 | instances = odm.Integer(description="Number of Elasticsearch instances with assigned shards")
16 | unassigned_shards = odm.Integer(description="Number of unassigned shards in the cluster")
17 | request_time = odm.Float(description="Time to load shard metrics")
18 | shard_sizes = odm.sequence(odm.compound(IndexData), description="Information about each index")
19 |
20 |
21 | @odm.model(description="Model of Elasticsearch Heartbeat Message")
22 | class ElasticMessage(odm.Model):
23 | msg = odm.Compound(Heartbeat, description="Heartbeat message for elasticsearch")
24 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class for message")
25 | msg_type = odm.Enum(values=MSG_TYPES, default="ElasticHeartbeat", description="Type of message")
26 | sender = odm.Keyword(description="Sender of message")
27 |
--------------------------------------------------------------------------------
/assemblyline/odm/models/user_favorites.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 | from assemblyline.common import forge
3 | Classification = forge.get_classification()
4 |
5 |
6 | @odm.model(index=False, store=False, description="Abstract Model of Favorite")
7 | class Favorite(odm.Model):
8 | created_by = odm.Keyword(description="Who created the favorite")
9 | classification = odm.Classification(is_user_classification=True, copyto="__text__",
10 | default=Classification.UNRESTRICTED,
11 | description="Classification of the favorite")
12 | name = odm.Keyword(description="Name of the favorite")
13 | query = odm.Keyword(description="Query for the favorite")
14 |
15 |
16 | @odm.model(index=False, store=False, description="Model of User Favorites")
17 | class UserFavorites(odm.Model):
18 | alert = odm.List(odm.Compound(Favorite), default=[], description="Alert page favorites")
19 | error = odm.List(odm.Compound(Favorite), default=[], description="Error page favorites")
20 | search = odm.List(odm.Compound(Favorite), default=[], description="Search page favorites")
21 | signature = odm.List(odm.Compound(Favorite), default=[], description="Signature page favorites")
22 | submission = odm.List(odm.Compound(Favorite), default=[], description="Submission page favorites")
23 |
--------------------------------------------------------------------------------
/docker/al_dev/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.11-slim-bookworm
2 |
3 | # Setup environment varibles
4 | ENV PYTHONPATH /opt/alv4/assemblyline-base:/opt/alv4/assemblyline-core:/opt/alv4/assemblyline-service-server:/opt/alv4/assemblyline-service-client:/opt/alv4/assemblyline_client:/opt/alv4/assemblyline-ui
5 |
6 | # Upgrade packages
7 | RUN apt-get update && apt-get -yy upgrade && rm -rf /var/lib/apt/lists/*
8 |
9 | # SSDEEP pkg requirments
10 | RUN apt-get update && apt-get install -yy build-essential libssl-dev libffi-dev libfuzzy-dev libldap2-dev libsasl2-dev libmagic1 zip 7zip && rm -rf /var/lib/apt/lists/*
11 |
12 | # Python packages requirements
13 | RUN pip install --no-warn-script-location --no-cache-dir \
14 | assemblyline[test] \
15 | assemblyline-core \
16 | assemblyline-ui \
17 | assemblyline-service-server \
18 | debugpy \
19 | && pip uninstall -y \
20 | assemblyline \
21 | assemblyline-core \
22 | assemblyline-ui \
23 | assemblyline-service-server \
24 | && rm -rf ~/.cache/pip
25 |
26 |
27 | # Create Assemblyline source directory
28 | RUN mkdir -p /etc/assemblyline
29 | RUN mkdir -p /var/cache/assemblyline
30 | RUN mkdir -p /var/lib/assemblyline
31 | RUN mkdir -p /var/lib/assemblyline/flowjs
32 | RUN mkdir -p /var/lib/assemblyline/bundling
33 | RUN mkdir -p /var/log/assemblyline
34 | RUN mkdir -p /opt/alv4
35 | WORKDIR /opt/alv4
36 |
37 | CMD pip list
38 |
--------------------------------------------------------------------------------
/test/classification.yml:
--------------------------------------------------------------------------------
1 | enforce: true
2 | groups:
3 | - aliases: [DEPTS, ANY]
4 | description: Users of department 1.
5 | name: DEPARTMENT 1
6 | short_name: D1
7 | solitary_display_name: ANY
8 | - aliases: [DEPTS]
9 | description: Users of department 2.
10 | name: DEPARTMENT 2
11 | short_name: D2
12 | levels:
13 | - aliases: []
14 | css: {banner: alert-default, label: label-default, text: text-muted}
15 | description: No restrictions applied to data.
16 | lvl: 100
17 | name: UNRESTRICTED
18 | short_name: U
19 | - aliases: [CLASSIFIED, DO NOT LOOK]
20 | css: {banner: alert-info, label: label-primary, text: text-primary}
21 | description: Data restricted to a certain few...
22 | lvl: 200
23 | name: RESTRICTED
24 | short_name: R
25 | required:
26 | - aliases: []
27 | description: Gotta be a super user to see this!
28 | name: SUPER USER
29 | require_lvl: 200
30 | short_name: SU
31 | - aliases: [GOD]
32 | description: Gotta be an administrator to see this!
33 | name: ADMIN
34 | short_name: ADM
35 | restricted: R//GOD//ANY
36 | subgroups:
37 | - aliases: []
38 | description: Users of group 1 (which are part of deparment 1).
39 | limited_to_group: D1
40 | name: GROUP 1
41 | require_group: D1
42 | short_name: G1
43 | - aliases: []
44 | description: Users of group 2 (can be part of any department).
45 | name: GROUP 2
46 | short_name: G2
47 | unrestricted: U
--------------------------------------------------------------------------------
/LICENCE.md:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Crown Copyright, Government of Canada (Canadian Centre for Cyber Security / Communications Security Establishment)
4 |
5 | Copyright title to all 3rd party software distributed with Assemblyline (AL) is held by the respective copyright holders as noted in those files. Users are asked to read the 3rd Party Licenses referenced with those assets.
6 |
7 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
8 |
9 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
10 |
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
12 |
--------------------------------------------------------------------------------
/assemblyline/odm/models/heuristic.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 | from assemblyline.common import forge
3 | from assemblyline.odm.models.statistics import Statistics
4 |
5 | Classification = forge.get_classification()
6 |
7 |
8 | @odm.model(index=True, store=True, description="Model of Service Heuristics")
9 | class Heuristic(odm.Model):
10 | attack_id = odm.List(odm.Keyword(copyto="__text__"), default=[], description="List of all associated ATT&CK IDs")
11 | classification = odm.Classification(default=Classification.UNRESTRICTED,
12 | description="Classification of the heuristic")
13 | description = odm.Text(copyto="__text__", description="Description of the heuristic")
14 | filetype = odm.Keyword(copyto="__text__", description="What type of files does this heuristic target?")
15 | heur_id = odm.Keyword(copyto="__text__", description="ID of the Heuristic")
16 | name = odm.Keyword(copyto="__text__", description="Name of the heuristic")
17 | score = odm.Integer(description="Default score of the heuristic")
18 | signature_score_map = odm.Mapping(odm.Integer(), default={},
19 | description="Score of signatures for this heuristic")
20 | stats = odm.Compound(Statistics, default={}, description="Statistics related to the Heuristic")
21 | max_score = odm.Optional(odm.Integer(), description="Maximum score for heuristic")
22 |
--------------------------------------------------------------------------------
/assemblyline/odm/messages/service_timing_heartbeat.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 | from assemblyline.odm.messages import PerformanceTimer
3 |
4 | MSG_TYPES = {"ServiceTimingHeartbeat"}
5 | LOADER_CLASS = "assemblyline.odm.messages.service_heartbeat.ServiceTimingMessage"
6 |
7 |
8 | @odm.model(description="Timing Metrics")
9 | class Metrics(odm.Model):
10 | execution = PerformanceTimer(description="Excution time")
11 | execution_count = odm.Integer(description="Number of executes")
12 | idle = PerformanceTimer(description="Idle time")
13 | idle_count = odm.Integer(description="Number of idles")
14 |
15 |
16 | @odm.model(description="Hearbeat Model")
17 | class Heartbeat(odm.Model):
18 | instances = odm.Integer(description="Number of instances")
19 | metrics = odm.Compound(Metrics, description="Metrics")
20 | queue = odm.Integer(description="Queue size")
21 | service_name = odm.Keyword(description="Name of service")
22 |
23 |
24 | @odm.model(description="Model of Service Timing Heartbeat Message")
25 | class ServiceTimingMessage(odm.Model):
26 | msg = odm.Compound(Heartbeat, description="Heartbeat message")
27 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class for message")
28 | msg_type = odm.Enum(values=MSG_TYPES, default="ServiceTimingHeartbeat", description="Type of message")
29 | sender = odm.Keyword(description="Sender of message")
30 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # IDE files
10 | .pydevproject
11 | .python-version
12 | .idea
13 | */.mypy_cache/*
14 |
15 | # Distribution / packaging
16 | .Python
17 | build/
18 | develop-eggs/
19 | dist/
20 | downloads/
21 | eggs/
22 | .eggs/
23 | lib/
24 | lib64/
25 | parts/
26 | sdist/
27 | var/
28 | wheels/
29 | share/python-wheels/
30 | *.egg-info/
31 | .installed.cfg
32 | *.egg
33 | MANIFEST
34 | VERSION
35 |
36 | # PyInstaller
37 | # Usually these files are written by a python script from a template
38 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
39 | *.manifest
40 | *.spec
41 |
42 | # Installer logs
43 | pip-log.txt
44 | pip-delete-this-directory.txt
45 |
46 | # Unit test / coverage reports
47 | htmlcov/
48 | .tox/
49 | .nox/
50 | .coverage
51 | .coverage.*
52 | .cache
53 | nosetests.xml
54 | coverage.xml
55 | *.cover
56 | *.py,cover
57 | .hypothesis/
58 | .pytest_cache/
59 | cover/
60 |
61 | # Translations
62 | *.mo
63 | *.pot
64 |
65 | # PyBuilder
66 | .pybuilder/
67 | target/
68 |
69 | # Jupyter Notebook
70 | .ipynb_checkpoints
71 |
72 | # IPython
73 | profile_default/
74 | ipython_config.py
75 |
76 | # Environments
77 | .env
78 | .venv
79 | env/
80 | venv/
81 | ENV/
82 | env.bak/
83 | venv.bak/
84 |
85 | # Cython debug symbols
86 | cython_debug/
87 | assemblyline/common/frequency.c
88 |
89 | # MacOS
90 | .DS_Store
91 |
--------------------------------------------------------------------------------
/dev/hauntedhouse/config/core.json:
--------------------------------------------------------------------------------
1 | {
2 | "authentication": {
3 | "static_keys": [
4 | {
5 | "key": "insecure-worker-key",
6 | "roles": [
7 | "Worker"
8 | ]
9 | },
10 | {
11 | "key": "insecure-search-key",
12 | "roles": [
13 | "Search"
14 | ]
15 | },
16 | {
17 | "key": "insecure-ingest-key",
18 | "roles": [
19 | "Ingest"
20 | ]
21 | }
22 | ]
23 | },
24 | "database": {
25 | "SQLite": {
26 | "path": "/data/sqlite/staging.db"
27 | }
28 | },
29 | "core": {
30 | "batch_limit_seconds": 60,
31 | "batch_limit_size": 500
32 | },
33 | "cache": {
34 | "Directory": {
35 | "path": "/tmp/",
36 | "size": "100Gi"
37 | }
38 | },
39 | "files": {
40 | "S3": {
41 | "access_key_id": "al_storage_key",
42 | "secret_access_key": "Ch@ngeTh!sPa33w0rd",
43 | "endpoint_url": "http;//minio:9000",
44 | "region_name": "local",
45 | "bucket": "al-storage",
46 | "no_tls_verify": true
47 | }
48 | },
49 | "blobs": {
50 | "S3": {
51 | "access_key_id": "al_storage_key",
52 | "secret_access_key": "Ch@ngeTh!sPa33w0rd",
53 | "endpoint_url": "http;//minio:9000",
54 | "region_name": "local",
55 | "bucket": "retrohunt-storage",
56 | "no_tls_verify": true
57 | }
58 | },
59 | "bind_address": "0.0.0.0:4443",
60 | "tls": null
61 | }
62 |
--------------------------------------------------------------------------------
/assemblyline/odm/messages/dispatching.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 |
3 |
4 | @odm.model(description="These are messages sent by dispatcher on the watch queue")
5 | class WatchQueueMessage(odm.Model):
6 | cache_key = odm.Optional(odm.Keyword(), description="Cache key")
7 | status = odm.Enum(values=['FAIL', 'OK', 'START', 'STOP'], description="Watch statuses")
8 |
9 |
10 | CREATE_WATCH = 'CREATE_WATCH'
11 | LIST_OUTSTANDING = 'LIST_OUTSTANDING'
12 | UPDATE_BAD_SID = 'UPDATE_BAD_SID'
13 |
14 |
15 | @odm.model(description="Create Watch Message")
16 | class CreateWatch(odm.Model):
17 | queue_name: str = odm.Keyword(description="Name of queue")
18 | submission: str = odm.Keyword(description="Submission ID")
19 |
20 |
21 | @odm.model(description="List Outstanding Message")
22 | class ListOutstanding(odm.Model):
23 | response_queue: str = odm.Keyword(description="Response queue")
24 | submission: str = odm.Keyword(description="Submission ID")
25 |
26 |
27 | MESSAGE_CLASSES = {
28 | CREATE_WATCH: CreateWatch,
29 | LIST_OUTSTANDING: ListOutstanding,
30 | UPDATE_BAD_SID: str
31 | }
32 |
33 |
34 | @odm.model(description="Model of Dispatcher Command Message")
35 | class DispatcherCommandMessage(odm.Model):
36 | kind: str = odm.Enum(values=list(MESSAGE_CLASSES.keys()), description="Kind of message")
37 | payload_data = odm.Any(description="Message payload")
38 |
39 | def payload(self):
40 | return MESSAGE_CLASSES[self.kind](self.payload_data)
41 |
--------------------------------------------------------------------------------
/assemblyline/odm/models/apikey.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 | from assemblyline.common import forge
3 | from assemblyline.common.str_utils import StringTable
4 | from assemblyline.odm.models.user import ACL_MAP, USER_ROLES
5 |
6 |
7 | APIKEY_ID_DELIMETER = "+"
8 | APIKEY_ID_FORMAT = "{}"+ APIKEY_ID_DELIMETER + "{}"
9 | FORBIDDEN_APIKEY_CHARACTERS = '[+@!#$%^&*()<>?/\|}{~:]'
10 |
11 |
12 |
13 | @odm.model(index=True, store=True, description="Model of Apikey")
14 | class Apikey(odm.Model):
15 | acl = odm.List(odm.Enum(values=ACL_MAP.keys()), description="Access Control List for the API key")
16 | password = odm.Keyword(description="BCrypt hash of the password for the apikey")
17 | roles = odm.List(odm.Enum(values=USER_ROLES), default=[], description="List of roles tied to the API key")
18 | uname = odm.Keyword(copyto="__text__", description="Username")
19 | key_name = odm.Keyword(copyto="__text__", description="Name of the key")
20 | creation_date = odm.Date(default="NOW", description="The date this API key is created.")
21 | expiry_ts = odm.Optional(odm.Date(), description="Expiry timestamp.")
22 | last_used =odm.Optional(odm.Date(), description="The last time this API key was used.")
23 |
24 | def get_apikey_id(keyname:str , uname:str):
25 | return APIKEY_ID_FORMAT.format(keyname, uname)
26 |
27 | def split_apikey_id(key_id: str):
28 | data = key_id.split(APIKEY_ID_DELIMETER)
29 | username = data[1]
30 | keyname = data[0]
31 |
32 | return keyname, username
33 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Assemblyline contributing guide
2 |
3 | This guide covers the basics of how to contribute to the Assemblyline project.
4 |
5 | Python code should follow the PEP8 guidelines defined here: [PEP8 Guidelines](https://www.python.org/dev/peps/pep-0008/).
6 |
7 | ## Tell us want you want to build/fix
8 | Before you start coding anything you should connect with the Assemblyline community via the [Assemblyline Discord server](https://discord.gg/GUAy9wErNu) and/or the [central Assemblyline GitHub project](https://github.com/CybercentreCanada/assemblyline/issues) to make sure no one else is working on the same thing and that whatever you are going to build still fits with the vision of the system.
9 |
10 | ## Git workflow
11 |
12 | - Clone the repo to your own account
13 | - Checkout and pull the latest commits from the master branch
14 | - Make a branch
15 | - Work in any way you like and make sure your changes actually work
16 | - When you're satisfied with your changes, create a pull requests to the main assemblyline repo
17 |
18 | #### Transfer your service repo
19 | If you've worked on a new service that you want to be included in the default service selection you'll have to transfer the repo into our control.
20 |
21 | #### You are not allowed to merge:
22 |
23 | Even if you try to merge in your pull request, you will be denied. Only a few people in our team are allowed to merge code into our repositories.
24 |
25 | We check for new pull requests every day and will merge them in once they have been approved by someone in our team.
26 |
--------------------------------------------------------------------------------
/assemblyline/odm/messages/scaler_status_heartbeat.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 |
3 | MSG_TYPES = {"ScalerStatusHeartbeat"}
4 | LOADER_CLASS = "assemblyline.odm.messages.scaler_status_heartbeat.ScalerStatusMessage"
5 |
6 |
7 | @odm.model(description="Service Status Model")
8 | class Status(odm.Model):
9 | running = odm.Integer(description="Number of instances running")
10 | target = odm.Integer(description="Target scaling for service")
11 | minimum = odm.Integer(description="Minimum number of instances")
12 | maximum = odm.Integer(description="Maximum number of instances")
13 | dynamic_maximum = odm.Integer(description="Dynamic maximum number of instances")
14 | queue = odm.Integer(description="Service queue")
15 | pressure = odm.Float(description="Service pressure")
16 | duty_cycle = odm.Float(description="Duty Cycle")
17 |
18 |
19 | @odm.model(description="Hearbeat Model")
20 | class Heartbeat(odm.Model):
21 | service_name = odm.Keyword(description="Name of service")
22 | metrics = odm.Compound(Status, description="Status of service")
23 |
24 |
25 | @odm.model(description="Model of Scaler's Status Heartbeat Message")
26 | class ScalerStatusMessage(odm.Model):
27 | msg = odm.Compound(Heartbeat, description="Heartbeat message")
28 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class for message")
29 | msg_type = odm.Enum(values=MSG_TYPES, default="ScalerStatusHeartbeat", description="Type of message")
30 | sender = odm.Keyword(description="Sender of message")
31 |
--------------------------------------------------------------------------------
/dev/depends/config/filebeat.docker.yml:
--------------------------------------------------------------------------------
1 | filebeat.inputs:
2 | - type: container
3 | format: docker
4 | paths:
5 | - '/var/lib/docker/containers/*/*.log'
6 | stream: "all"
7 | json:
8 | keys_under_root: true
9 | message_key: message
10 | ignore_decoding_error: true
11 | processors:
12 | - rename:
13 | fields:
14 | - from: "error"
15 | to: "error.message"
16 | ignore_missing: true
17 | - script:
18 | lang: javascript
19 | id: log_level
20 | source: >
21 | function process(event) {
22 | var value = event.Get("log.level");
23 | if (value === null){
24 | value = "INFO"
25 | }
26 | else if (value.toLowerCase() == "warn"){
27 | value = "WARNING"
28 | }
29 | else if (value.toLowerCase() == "err"){
30 | value = "ERROR"
31 | }
32 | event.Put("log.level", value.toUpperCase());
33 | }
34 |
35 | logging:
36 | level: warning
37 | json: true
38 |
39 | processors:
40 | - add_cloud_metadata: ~
41 | - add_docker_metadata: ~
42 |
43 |
44 | output.elasticsearch:
45 | hosts: 'elasticsearch:9200'
46 | username: elastic
47 | password: devpass
48 |
49 | setup.template.settings:
50 | index.number_of_shards: 1
51 | index.number_of_replicas: 0
52 | setup.ilm:
53 | enabled: true
54 | policy_file: /usr/share/filebeat/filebeat_policy.json
55 |
--------------------------------------------------------------------------------
/assemblyline/common/hexdump.py:
--------------------------------------------------------------------------------
1 | import binascii
2 |
3 | from assemblyline.common.chunk import chunk
4 |
5 | FILTER = b''.join([bytes([x]) if x in range(32, 127) else b'.' for x in range(256)])
6 |
7 |
8 | def dump(binary: bytes, size: int = 2, sep: bytes = b" ") -> bytes:
9 | hexstr = binascii.hexlify(binary)
10 | return sep.join(chunk(hexstr, size))
11 |
12 |
13 | def load(hexstr: bytes) -> bytes:
14 | return binascii.unhexlify(hexstr)
15 |
16 |
17 | def hexdump(binary: bytes, length: int = 16, indent: str = "", indent_size: int = 0, newline: str = '\n',
18 | prefix_offset: int = 0) -> str:
19 | """
20 | Create a string buffer that shows the given data in hexdump format.
21 |
22 | src -> source buffer
23 | length = 16 -> number of bytes per line
24 | indent = "" -> indentation before each lines
25 | indent_size = 0 -> number of time to repeat that indentation
26 | newline = "\n" -> chars used as newline char
27 |
28 | Example of output:
29 | 00000000: 48 54 54 50 2F 31 2E 31 20 34 30 34 20 4E 6F 74 HTTP/1.1 404 Not
30 | 00000010: 20 46 6F 75 6E 64 0D 0A 43 6F 6E 74 Found..Cont
31 | ...
32 | """
33 | generator = chunk(binary, length)
34 | line_frmt = "%%s%%08X: %%-%ss %%s" % ((length * 3) - 1)
35 |
36 | out = [line_frmt % (indent * indent_size, prefix_offset + (addr * length), dump(d).decode(),
37 | d.translate(FILTER).decode())
38 | for addr, d in enumerate(generator)]
39 | return newline.join(out)
40 |
--------------------------------------------------------------------------------
/assemblyline/odm/messages/retrohunt_heartbeat.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 |
3 | MSG_TYPES = {"RetrohuntHeartbeat"}
4 | LOADER_CLASS = "assemblyline.odm.messages.retrohunt_heartbeat.RetrohuntMessage"
5 |
6 |
7 | @odm.model(description="Heartbeat Model for retrohunt")
8 | class Heartbeat(odm.Model):
9 | instances = odm.Integer(description="Number of retrohunt workers")
10 | request_time = odm.Optional(odm.Float(description="Time to load metrics"))
11 | pending_files = odm.integer(description="Files not yet available for searching")
12 | ingested_last_minute = odm.integer(description="Files ingested in last minute")
13 | worker_storage_available = odm.integer(description="Free storage for most depleted worker")
14 | total_storage_available = odm.integer(description="Free storage across workers")
15 | active_searches = odm.integer(description="Number of currently running searches")
16 | last_minute_cpu = odm.Float(description="Last minute cpu load across all workers")
17 | total_memory_used = odm.Float(description="Estimated current memory use across all workers")
18 |
19 |
20 | @odm.model(description="Model of retrohunt heartbeat message")
21 | class RetrohuntMessage(odm.Model):
22 | msg = odm.Compound(Heartbeat, description="Heartbeat message for retrohunt")
23 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class for message")
24 | msg_type = odm.Enum(values=MSG_TYPES, default="RetrohuntHeartbeat", description="Type of message")
25 | sender = odm.Keyword(description="Sender of message")
26 |
--------------------------------------------------------------------------------
/dev/depends/config/metricbeat.docker.yml:
--------------------------------------------------------------------------------
1 | logging:
2 | level: warning
3 | json: true
4 |
5 | metricbeat.modules:
6 | - module: system
7 | metricsets:
8 | - cpu
9 | - load
10 | - memory
11 | - network
12 | - process
13 | - process_summary
14 | - uptime
15 | - socket_summary
16 | - diskio
17 | - fsstat
18 | - socket
19 | enabled: true
20 | period: 10s
21 | processes: ['.*']
22 |
23 | # Configure the metric types that are included by these metricsets.
24 | cpu.metrics: ["percentages"] # The other available options are normalized_percentages and ticks.
25 | core.metrics: ["percentages"]
26 | - module: redis
27 | metricsets:
28 | - "info"
29 | - "keyspace"
30 | period: 10s
31 | hosts: ["redis:6379"]
32 | - module: docker
33 | metricsets:
34 | - "container"
35 | - "cpu"
36 | - "diskio"
37 | - "event"
38 | - "healthcheck"
39 | - "info"
40 | - "memory"
41 | - "network"
42 | hosts: ["unix:///var/run/docker.sock"]
43 | period: 10s
44 | enabled: true
45 | processors:
46 | - add_docker_metadata: ~
47 |
48 | output.console:
49 | enabled: false
50 |
51 | output.elasticsearch:
52 | hosts: 'elasticsearch:9200'
53 | username: elastic
54 | password: devpass
55 |
56 | processors:
57 | - add_cloud_metadata: ~
58 |
59 | setup.template.settings:
60 | index.number_of_shards: 1
61 | index.number_of_replicas: 0
62 | setup.ilm:
63 | enabled: true
64 | policy_file: /usr/share/metricbeat/metricbeat_policy.json
--------------------------------------------------------------------------------
/assemblyline/remote/datatypes/lock.py:
--------------------------------------------------------------------------------
1 |
2 | from assemblyline.common.uid import get_random_id
3 | from assemblyline.remote.datatypes import get_client, retry_call
4 |
5 | lock_acquire_script = """
6 | local lock_holder = ARGV[1]
7 | local uuid = ARGV[2]
8 | local timeout = ARGV[3]
9 | if redis.call('setnx', lock_holder, uuid) == 1 then
10 | redis.call('expire', lock_holder, timeout)
11 | return true
12 | end
13 | return false
14 | """
15 |
16 | lock_release_script = """
17 | local lock_holder = ARGV[1]
18 | local lock_release = ARGV[2]
19 | local uuid = ARGV[3]
20 | if redis.call('get', lock_holder) == uuid then
21 | redis.call('del', lock_holder)
22 | redis.call('rpush', lock_release, uuid)
23 | redis.call('expire', lock_release, 1)
24 | end
25 | """
26 |
27 |
28 | class Lock(object):
29 | def __init__(self, name, timeout, host=None, port=None):
30 | self.uuid = get_random_id()
31 | self.c = get_client(host, port, False)
32 | self.lock_release = '-'.join(('lock', str(timeout), name, 'released'))
33 | self.lock_holder = '-'.join(('lock', str(timeout), name, 'holder'))
34 | self.timeout = timeout
35 | self._acquire = self.c.register_script(lock_acquire_script)
36 | self._release = self.c.register_script(lock_release_script)
37 |
38 | def __enter__(self):
39 | while not retry_call(self._acquire, args=[self.lock_holder, self.uuid, self.timeout]):
40 | retry_call(self.c.blpop, self.lock_release, 1)
41 |
42 | def __exit__(self, unused1, unused2, unused3):
43 | retry_call(self._release, args=[self.lock_holder, self.lock_release, self.uuid])
44 |
--------------------------------------------------------------------------------
/assemblyline/odm/messages/alerter_heartbeat.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 |
3 | MSG_TYPES = {"AlerterHeartbeat"}
4 | LOADER_CLASS = "assemblyline.odm.messages.alerter_heartbeat.AlerterMessage"
5 |
6 |
7 | @odm.model(description="Alerter Queues")
8 | class Queues(odm.Model):
9 | alert = odm.Integer(description="Number of alerts in queue")
10 | alert_retry = odm.Integer(description="Number of alerts in retry queue")
11 |
12 |
13 | @odm.model(description="Alerter Metrics")
14 | class Metrics(odm.Model):
15 | created = odm.Integer(description="Number of alerts created")
16 | error = odm.Integer(description="Number of alerts with errors")
17 | received = odm.Integer(description="Number of alerts received")
18 | updated = odm.Integer(description="Number of alerts updated")
19 | wait = odm.Integer(description="Number of alerts waiting for submission to complete")
20 |
21 |
22 | @odm.model(description="Heartbeat Model for Alerter")
23 | class Heartbeat(odm.Model):
24 | instances = odm.Integer(description="Number of Alerter instances")
25 | metrics = odm.Compound(Metrics, description="Alert metrics")
26 | queues = odm.Compound(Queues, description="Alert queues")
27 |
28 |
29 | @odm.model(description="Model of Alerter Heartbeat Message")
30 | class AlerterMessage(odm.Model):
31 | msg = odm.Compound(Heartbeat, description="Heartbeat message from Alerter")
32 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class for message")
33 | msg_type = odm.Enum(values=MSG_TYPES, default="AlerterHeartbeat", description="Type of message")
34 | sender = odm.Keyword(description="Sender of message")
35 |
--------------------------------------------------------------------------------
/assemblyline/odm/messages/archive_heartbeat.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 |
3 | MSG_TYPES = {"ArchiveHeartbeat"}
4 | LOADER_CLASS = "assemblyline.odm.messages.archive_heartbeat.ArchiveMessage"
5 |
6 |
7 | @odm.model(description="Archive Metrics")
8 | class Metrics(odm.Model):
9 | # Indices metrics
10 | file = odm.Integer(description="Number of files archived")
11 | result = odm.Integer(description="Number of results archived")
12 | submission = odm.Integer(description="Number of submissions archived")
13 | # Messaging metrics
14 | received = odm.Integer(description="Number of received archive messages")
15 | exception = odm.Integer(description="Number of exceptions during archiving")
16 | invalid = odm.Integer(description="Number of invalid archive type errors during archiving")
17 | not_found = odm.Integer(description="Number of submission not found failures during archiving")
18 |
19 |
20 | @odm.model(description="Archive Heartbeat Model")
21 | class Heartbeat(odm.Model):
22 | instances = odm.Integer(description="Number of instances")
23 | metrics = odm.Compound(Metrics, description="Archive metrics")
24 | queued = odm.Integer(description="Number of documents to be archived")
25 |
26 |
27 | @odm.model(description="Model for Archive Heartbeat Messages")
28 | class ArchiveMessage(odm.Model):
29 | msg = odm.Compound(Heartbeat, description="Heartbeat message")
30 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class for message")
31 | msg_type = odm.Enum(values=MSG_TYPES, default="ArchiveHeartbeat", description="Message type")
32 | sender = odm.Keyword(description="Sender of message")
33 |
--------------------------------------------------------------------------------
/assemblyline/datasource/alert.py:
--------------------------------------------------------------------------------
1 | from assemblyline.common import forge
2 | from assemblyline.datasource.common import Datasource
3 |
4 | Classification = forge.get_classification()
5 |
6 |
7 | class Alert(Datasource):
8 | def __init__(self, log, **kw):
9 | super(Alert, self).__init__(log, **kw)
10 | self.datastore = forge.get_datastore()
11 |
12 | def parse(self, results, **kw):
13 | return results
14 |
15 | def query(self, value, **kw):
16 | hash_type = self.hash_type(value)
17 |
18 | query = "file.%s:%s OR file.%s:%s" % (
19 | hash_type, value.lower(), hash_type, value.upper()
20 | )
21 |
22 | res = self.datastore.alert.search(query, rows=5, sort="al.score desc",
23 | access_control=kw['access_control'], as_obj=False)
24 |
25 | count = res['total']
26 | if count <= 0:
27 | return []
28 |
29 | data = []
30 | item = {
31 | "confirmed": False,
32 | "data": data,
33 | "description": "Alerted on %s times" % str(count),
34 | "malicious": False,
35 | }
36 |
37 | for r in res['items']:
38 | score = r['al']['score']
39 | if score >= 500:
40 | item['malicious'] = True
41 | if score >= 2000 or score <= -100:
42 | item['confirmed'] = True
43 |
44 | data.append({
45 | "classification": r['classification'],
46 | "date": r['reporting_ts'],
47 | "id": r['id'],
48 | "score": r['al']['score'],
49 | })
50 |
51 | return [item]
52 |
--------------------------------------------------------------------------------
/assemblyline/odm/models/ontology/file.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 | from assemblyline.odm.models.ontology.filetypes import PE
3 |
4 |
5 | @odm.model(description="File Characteristics")
6 | class File(odm.Model):
7 | # Common information
8 | md5 = odm.MD5(description="MD5 of file")
9 | sha1 = odm.SHA1(description="SHA1 of file")
10 | sha256 = odm.SHA256(description="SHA256 of file")
11 | type = odm.Optional(odm.Keyword(description="Type of file as identified by Assemblyline"))
12 | size = odm.Integer(description="Size of the file in bytes")
13 | names = odm.Optional(odm.List(odm.Text()), description="Known filenames associated to file")
14 | parent = odm.Optional(odm.SHA256(), description="Absolute parent of file relative to submission")
15 |
16 | # Specialized information (List from Tagging.File)
17 | # apk = odm.Optional(odm.Compound(APK), description="APK File Properties")
18 | # jar = odm.Optional(odm.Compound(JAR), description="JAR File Properties")
19 | # img = odm.Optional(odm.Compound(IMG), description="Image File Properties")
20 | # ole = odm.Optional(odm.Compound(OLE), description="OLE File Properties")
21 | pe = odm.Optional(odm.Compound(PE), description="Properties related to PE")
22 | # pdf = odm.Optional(odm.Compound(PDF), description="PDF File Properties")
23 | # plist = odm.Optional(odm.Compound(PList), description="PList File Properties")
24 | # powershell = odm.Optional(odm.Compound(PowerShell), description="PowerShell File Properties")
25 | # shortcut = odm.Optional(odm.Compound(Shortcut), description="Shortcut File Properties")
26 | # swf = odm.Optional(odm.Compound(SWF), description="SWF File Properties")
27 |
--------------------------------------------------------------------------------
/assemblyline/common/path.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 | import os
3 | import string
4 | import sys
5 | from typing import Optional
6 |
7 |
8 | def modulepath(modulename: str) -> str:
9 | m = sys.modules[modulename]
10 | f = getattr(m, '__file__', None)
11 | if not f:
12 | return os.path.abspath(os.getcwd())
13 | return os.path.dirname(os.path.abspath(f))
14 |
15 |
16 | def splitpath(path: str, sep: Optional[str] = None) -> list:
17 | """ Split the path into a list of items """
18 | return list(filter(len, path.split(sep or os.path.sep)))
19 |
20 |
21 | def strip_path_inclusion(path: str, base: str) -> str:
22 | path = path.replace("\\", os.path.sep).replace("/", os.path.sep)
23 | return path if os.path.abspath(os.path.join(base, path)).startswith(base) else os.path.basename(path)
24 |
25 |
26 | ASCII_NUMBERS = list(range(48, 58))
27 | ASCII_UPPER_CASE_LETTERS = list(range(65, 91))
28 | ASCII_LOWER_CASE_LETTERS = list(range(97, 123))
29 | ASCII_OTHER = [45, 46, 92] # "-", ".", and "\"
30 |
31 | # Create a set that contains all of the valid characters that
32 | # are allowed to appear in a Unified Naming Convention (UNC) path.
33 | VALID_UNC_CHARS = [chr(x) for x in ASCII_LOWER_CASE_LETTERS +
34 | ASCII_UPPER_CASE_LETTERS + ASCII_NUMBERS + ASCII_OTHER]
35 |
36 |
37 | def is_unc_legal(path: str) -> bool:
38 | """Determine whether or not a given string representing a Windows file path is legal
39 | or not as per the Unified Naming Convention (UNC) specifications."""
40 | if len(path) <= 0:
41 | return False
42 |
43 | for char in path:
44 | if char not in VALID_UNC_CHARS:
45 | return False
46 | return True
47 |
--------------------------------------------------------------------------------
/assemblyline/common/signaturing.py:
--------------------------------------------------------------------------------
1 | # TODO: Are we still using this?
2 |
3 | import re
4 |
5 |
6 | _operators = {
7 | 'in': lambda args: lambda x: x in args,
8 | 'not in': lambda args: lambda x: x not in args,
9 | 'regexp': lambda args: re.compile(*args).match,
10 | }
11 |
12 |
13 | def _transform(condition):
14 | if isinstance(condition, str):
15 | args = [condition]
16 | func = 'regexp'
17 | else:
18 | args = list(condition[1:])
19 | func = condition[0]
20 |
21 | return _operators[func](args)
22 |
23 |
24 | # noinspection PyBroadException
25 | def _call(cache, data, func, key):
26 | try:
27 | value = cache.get(key, None)
28 | if not value:
29 | cache[key] = value = data.get(key)
30 | if not callable(func):
31 | func = _transform(func)
32 | return {key: value} if func(value) else {}
33 | except Exception: # pylint: disable=W0702
34 | return {}
35 |
36 |
37 | def _match(cache, data, sig):
38 | summary = {}
39 | results = [
40 | _call(cache, data, f, k) for k, f in sig['conditions'].iteritems()
41 | ]
42 | if all(results):
43 | [summary.update(r) for r in results]
44 | return summary
45 |
46 |
47 | def _matches(data, sigs):
48 | cache = {}
49 | unknown = 0
50 | for sig in sigs:
51 | result = _match(cache, data, sig)
52 | if result:
53 | name = sig.get('name', None)
54 | if not name:
55 | unknown += 1
56 | name = "unknown%d" % unknown
57 | yield name, result
58 | return
59 |
60 |
61 | def drop(whitelist, data):
62 | return next(_matches(data, whitelist), ("", {}))
63 |
--------------------------------------------------------------------------------
/dev/depends/docker-compose-minimal.yml:
--------------------------------------------------------------------------------
1 | version: "2.4"
2 |
3 | services:
4 | # Dependancies
5 | minio:
6 | image: minio/minio
7 | environment:
8 | MINIO_ROOT_USER: al_storage_key
9 | MINIO_ROOT_PASSWORD: Ch@ngeTh!sPa33w0rd
10 | ports:
11 | - "9000:9000"
12 | command: server /data
13 |
14 | elasticsearch:
15 | image: docker.elastic.co/elasticsearch/elasticsearch:8.10.2
16 | environment:
17 | - xpack.security.enabled=true
18 | - discovery.type=single-node
19 | - logger.level=WARN
20 | - "ELASTIC_PASSWORD=devpass"
21 | - "ES_JAVA_OPTS=-Xms1024m -Xmx1024m"
22 | - "cluster.routing.allocation.disk.watermark.low=10gb"
23 | - "cluster.routing.allocation.disk.watermark.high=5gb"
24 | - "cluster.routing.allocation.disk.watermark.flood_stage=1gb"
25 | ports:
26 | - "9200:9200"
27 | healthcheck:
28 | test:
29 | [
30 | "CMD-SHELL",
31 | "curl --silent --fail -u elastic:$$ELASTIC_PASSWORD localhost:9200/_cluster/health || exit 1",
32 | ]
33 | interval: 30s
34 | timeout: 30s
35 | retries: 3
36 |
37 | redis:
38 | image: redis
39 | ports:
40 | - "6379:6379"
41 | - "6380:6379"
42 | healthcheck:
43 | test: ["CMD", "redis-cli", "ping"]
44 | interval: 30s
45 | timeout: 10s
46 | retries: 3
47 |
48 | nginx:
49 | image: cccs/nginx-ssl-frontend:mui5
50 | ports:
51 | - "80:80"
52 | - "443:443"
53 | environment:
54 | - FRONTEND_HOST=172.17.0.1
55 | - UI_HOST=172.17.0.1
56 | - SOCKET_HOST=172.17.0.1
57 | - FQDN=localhost
58 | - MAX_BODY_SIZE=100M
59 | - TEMPLATE=minimal
60 |
61 | networks:
62 | default:
63 | name: external
64 |
--------------------------------------------------------------------------------
/assemblyline/common/lucene.lark:
--------------------------------------------------------------------------------
1 | %import common.ESCAPED_STRING
2 | %import common.CNAME
3 | %import common.DIGIT
4 | %import common.LETTER
5 | %ignore WHITESPACE
6 |
7 | WHITESPACE: " "
8 |
9 | start: expression
10 |
11 | expression: or_expr
12 |
13 | or_expr: and_expr ("OR" and_expr)*
14 | and_expr: not_expr ("AND" not_expr)*
15 | not_expr: NOT_OPERATOR? atom
16 |
17 | NOT_OPERATOR: "NOT"
18 |
19 | atom: field
20 | | term
21 | | "(" expression ")"
22 |
23 | term: PREFIX_OPERATOR? (phrase_term | SIMPLE_TERM)
24 | field_term: PREFIX_OPERATOR? (phrase_term | SIMPLE_TERM)
25 |
26 | PREFIX_OPERATOR: "-" | "+" | ">=" | "<=" | ">" | "<"
27 |
28 | SIMPLE_TERM: ("\\+" | "\\-" | "\\&" | "\\&&" | "\\|" | "\\||" | "\\!" | "\\(" | "\\)" | "\\{"
29 | | "\\}" | "\\[" | "\\]" | "\\^" | "\\\"" | "\\~" | "\\*" | "\\ "
30 | | "\\?" | "\\:" | "\\\\" | "*" | "?" | DIGIT | "_" | "-" | LETTER)+
31 |
32 | phrase_term: ESCAPED_STRING
33 |
34 | field: FIELD_LABEL ":" field_value
35 |
36 | FIELD_LABEL: CNAME ["." CNAME]*
37 |
38 | field_value: range
39 | | field_term
40 | | REGEX_TERM
41 | | "(" field_expression ")"
42 |
43 | REGEX_TERM: /\/([^\/]|(\\\/))*\//
44 |
45 | range: RANGE_START first_range_term "TO" second_range_term RANGE_END
46 | RANGE_START: "[" | "{"
47 | RANGE_END: "]" | "}"
48 |
49 |
50 | field_expression: field_or_expr
51 | field_or_expr: field_and_expr ("OR" field_and_expr)*
52 | field_and_expr: field_not_expr ("AND" field_not_expr)*
53 | field_not_expr: NOT_OPERATOR? field_atom
54 | field_atom: field_term
55 | | "(" field_expression ")"
56 |
57 | first_range_term: RANGE_WILD | QUOTED_RANGE | FIRST_RANGE
58 | second_range_term: RANGE_WILD | QUOTED_RANGE | SECOND_RANGE
59 | QUOTED_RANGE: ESCAPED_STRING
60 | FIRST_RANGE: /[^ ]+/
61 | SECOND_RANGE: /[^\]\}]+/
62 | RANGE_WILD: "*"
--------------------------------------------------------------------------------
/test/key.pem:
--------------------------------------------------------------------------------
1 | -----BEGIN PRIVATE KEY-----
2 | MIICdwIBADANBgkqhkiG9w0BAQEFAASCAmEwggJdAgEAAoGBANALE6wPhlNJ+aLt
3 | AX4HL43lI16CDs/9MOlX2a8/4Bp+cXv7sdPPJ6MTbqvrhpnA14cVgHl1LRu2OGxm
4 | etV7MwRQbJZ5NtVMLjKrU9wcYod7B9ZKUel4Jgbjk0CtO2txYbi9gNOkWaxwcmNF
5 | BfKIsaGOQkB5vzK7mnf7dO9ALYE/AgMBAAECgYA9rrsTbbru4OUCGHEz05+W25RE
6 | Bh2sLy6cUK67Fh403L56+yI7YZUn9a//iyJqXdHJPGfOGx7Xs4xBH5VVzGRQXo7i
7 | t6HOsB/oDwOTt5JKImJ+0JY6cn2MhWbsNY+oPJppe7CRoUKURHZY61+WDi8zT1mR
8 | Qrfo3jDgg6cX3zZwcQJBAP/wy8S2LN24okziCfssyF3WHb1Pkvc0/ITQle2+gQTZ
9 | YyF1H+2xGJOF3/wi19sE2bQuXigg0Ou+lyR1z3cFnRcCQQDQF2+AjB2mFrPqZ9Md
10 | qnP4GUrKT574CsHy5G0OniHSFrauKRCBjEwm4RXRm9lfs/RWA81/s7RTFWCJUq9m
11 | hmYZAkEAtK2PnAGjMK7b3Hyh4TAfDqdN/UvEi0FbloMNpHUc7YhtQ7xEWu7vU41p
12 | rrwGN/Z3nYwyKg/ojNPSLQoB+Jr85wJAZjPcc8pdlYF5BBvSOLPLGYNylELe1PyT
13 | nXRLi+5mtgSp3IgWr0n07POH/9cHwFVmIAjmGV5tppDNRSTzOOuxoQJBAKBKAMJm
14 | a64VkrqR1xkm9PYeUbNV8X28USnsPkw4I2shHHmwMwj+Vyo10IC0XtDto7ZrVAM9
15 | v5XYnKwRopUnj9c=
16 | -----END PRIVATE KEY-----
17 | -----BEGIN CERTIFICATE-----
18 | MIICWjCCAcOgAwIBAgIUQJONlWz9w+fbJgb/CmPv7Mj5wT0wDQYJKoZIhvcNAQEL
19 | BQAwPzELMAkGA1UEBhMCQ0ExEDAOBgNVBAgMB09udGFyaW8xDzANBgNVBAcMBk90
20 | dGF3YTENMAsGA1UECgwEQ0NDUzAeFw0yMTA3MjkxNzU1MzBaFw0zMTA3MjcxNzU1
21 | MzBaMD8xCzAJBgNVBAYTAkNBMRAwDgYDVQQIDAdPbnRhcmlvMQ8wDQYDVQQHDAZP
22 | dHRhd2ExDTALBgNVBAoMBENDQ1MwgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGB
23 | ANALE6wPhlNJ+aLtAX4HL43lI16CDs/9MOlX2a8/4Bp+cXv7sdPPJ6MTbqvrhpnA
24 | 14cVgHl1LRu2OGxmetV7MwRQbJZ5NtVMLjKrU9wcYod7B9ZKUel4Jgbjk0CtO2tx
25 | Ybi9gNOkWaxwcmNFBfKIsaGOQkB5vzK7mnf7dO9ALYE/AgMBAAGjUzBRMB0GA1Ud
26 | DgQWBBTpHO34t3bWXUt0+eR9M/7KiGnEnzAfBgNVHSMEGDAWgBTpHO34t3bWXUt0
27 | +eR9M/7KiGnEnzAPBgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4GBAChY
28 | fK7P81aqFQeWimgKD1AE/uzVToHCEcOryUl7VrQkHjToFyzeuXcUF/+n4pjyss8r
29 | mLmZolYrwuQ95UpEsNc0j/uVODFPxztjQYwi25UZS4YUSCxgufulanuaWIm4TdEs
30 | Mxt9/sQFrE0FZ6xivB27BiKEqmP+Q8g7yeZYOS4w
31 | -----END CERTIFICATE-----
32 |
--------------------------------------------------------------------------------
/test/test_path.py:
--------------------------------------------------------------------------------
1 | from assemblyline.common import path
2 |
3 |
4 | def test_strip_path_injection_linux():
5 | test_str = 'filename'
6 | assert path.strip_path_inclusion(test_str, "/home/al-user") == 'filename'
7 |
8 | test_str = 'foldername/filename'
9 | assert path.strip_path_inclusion(test_str, "/home/al-user") == 'foldername/filename'
10 |
11 | test_str = '.filename'
12 | assert path.strip_path_inclusion(test_str, "/home/al-user") == '.filename'
13 |
14 | test_str = '.foldername/filename'
15 | assert path.strip_path_inclusion(test_str, "/home/al-user") == '.foldername/filename'
16 |
17 | test_str = './foldername/filename'
18 | assert path.strip_path_inclusion(test_str, "/home/al-user") == './foldername/filename'
19 |
20 | test_str = '/foldername/filename'
21 | assert path.strip_path_inclusion(test_str, "/home/al-user") == 'filename'
22 |
23 | test_str = '../foldername/filename'
24 | assert path.strip_path_inclusion(test_str, "/home/al-user") == 'filename'
25 |
26 | test_str = '../../../../foldername/filename'
27 | assert path.strip_path_inclusion(test_str, "/home/al-user") == 'filename'
28 |
29 | test_str = '.././//./..//../../../foldername/filename'
30 | assert path.strip_path_inclusion(test_str, "/home/al-user") == 'filename'
31 |
32 | test_str = '////./..//../../../foldername/filename'
33 | assert path.strip_path_inclusion(test_str, "/home/al-user") == 'filename'
34 |
35 | test_str = 'realfolder/../../../foldername/filename'
36 | assert path.strip_path_inclusion(test_str, "/home/al-user") == 'filename'
37 |
38 | test_str = '..foldername/..filename'
39 | assert path.strip_path_inclusion(test_str, "/home/al-user") == '..foldername/..filename'
40 |
41 | test_str = '.././//./..//../../../foldername/../../././//../filename'
42 | assert path.strip_path_inclusion(test_str, "/home/al-user") == 'filename'
43 |
--------------------------------------------------------------------------------
/assemblyline/odm/messages/expiry_heartbeat.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 |
3 | MSG_TYPES = {"ExpiryHeartbeat"}
4 | LOADER_CLASS = "assemblyline.odm.messages.expiry_heartbeat.ExpiryMessage"
5 |
6 |
7 | @odm.model(description="Expiry Stats")
8 | class Metrics(odm.Model):
9 | alert = odm.Integer(description="Number of alerts")
10 | badlist = odm.Integer(description="Number of badlisted items")
11 | cached_file = odm.Integer(description="Number of cached files")
12 | emptyresult = odm.Integer(description="Number of empty results")
13 | error = odm.Integer(description="Number of errors")
14 | file = odm.Integer(description="Number of files")
15 | filescore = odm.Integer(description="Number of filscores")
16 | result = odm.Integer(description="Number of results")
17 | retrohunt_hit = odm.Integer(description="Number of retrohunt hits")
18 | safelist = odm.Integer(description="Number of safelisted items")
19 | submission = odm.Integer(description="Number of submissions")
20 | submission_tree = odm.Integer(description="Number of submission trees")
21 | submission_summary = odm.Integer(description="Number of submission summaries")
22 |
23 |
24 | @odm.model(description="Heartbeat Model")
25 | class Heartbeat(odm.Model):
26 | instances = odm.Integer(description="Number of instances")
27 | metrics = odm.Compound(Metrics, description="Expiry metrics")
28 | queues = odm.Compound(Metrics, description="Expiry queues")
29 |
30 |
31 | @odm.model(description="Model of Expiry Heartbeat Message")
32 | class ExpiryMessage(odm.Model):
33 | msg = odm.Compound(Heartbeat, description="Hearbeat message")
34 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class for message")
35 | msg_type = odm.Enum(values=MSG_TYPES, default="ExpiryHeartbeat", description="Type of message")
36 | sender = odm.Keyword(description="Sender of message")
37 |
--------------------------------------------------------------------------------
/docker/local_dev.Dockerfile:
--------------------------------------------------------------------------------
1 | # NOTE: to build this container you must be in a directory where assemblyline-base, assemblyline-ui,
2 | # assemblyline-core, assemblyline-service-server and assemblyline-service-client code is checked out
3 | FROM python:3.11-slim-bookworm
4 |
5 | # Upgrade packages
6 | RUN apt-get update && apt-get -yy upgrade && rm -rf /var/lib/apt/lists/*
7 |
8 | # SSDEEP pkg requirments
9 | RUN apt-get update -yy \
10 | && apt-get install -yy build-essential libffi-dev libfuzzy-dev libldap2-dev libsasl2-dev libmagic1 libssl-dev \
11 | && rm -rf /var/lib/apt/lists/*
12 |
13 | # Create Assemblyline source directory
14 | RUN mkdir -p /etc/assemblyline
15 | RUN mkdir -p /var/cache/assemblyline
16 | RUN mkdir -p /var/lib/assemblyline
17 | RUN mkdir -p /var/lib/assemblyline/flowjs
18 | RUN mkdir -p /var/lib/assemblyline/bundling
19 | RUN mkdir -p /var/log/assemblyline
20 | RUN mkdir -p /opt/alv4
21 | WORKDIR /opt/alv4
22 |
23 | # Setup environment varibles
24 | ENV PYTHONPATH /opt/alv4/assemblyline-base:/opt/alv4/assemblyline-core:/opt/alv4/assemblyline-service-server:/opt/alv4/assemblyline-service-client:/opt/alv4/assemblyline_client:/opt/alv4/assemblyline-ui
25 |
26 | RUN pip install --upgrade pip
27 | RUN pip install debugpy
28 |
29 | COPY assemblyline-base assemblyline-base
30 | RUN pip install --no-warn-script-location -e ./assemblyline-base[test]
31 |
32 | COPY assemblyline-core assemblyline-core
33 | RUN pip install --no-warn-script-location -e ./assemblyline-core[test]
34 |
35 | COPY assemblyline-ui assemblyline-ui
36 | RUN pip install --no-warn-script-location -e ./assemblyline-ui[test,socketio]
37 |
38 | COPY assemblyline_client assemblyline_client
39 | RUN pip install --no-warn-script-location -e ./assemblyline_client[test]
40 |
41 | RUN pip uninstall -y assemblyline
42 | RUN pip uninstall -y assemblyline_core
43 | RUN pip uninstall -y assemblyline_ui
44 | RUN pip uninstall -y assemblyline_client
45 |
--------------------------------------------------------------------------------
/assemblyline/odm/models/signature.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 | from assemblyline.common import forge
3 | from assemblyline.odm.models.statistics import Statistics
4 |
5 | Classification = forge.get_classification()
6 |
7 | DEPLOYED_STATUSES = ['DEPLOYED', 'NOISY', 'DISABLED']
8 | DRAFT_STATUSES = ['STAGING', 'TESTING']
9 | STALE_STATUSES = ['INVALID']
10 |
11 | RULE_STATUSES = DEPLOYED_STATUSES + DRAFT_STATUSES + STALE_STATUSES
12 |
13 |
14 | @odm.model(index=True, store=True)
15 | class Signature(odm.Model):
16 | classification = odm.Classification(store=True, default=Classification.UNRESTRICTED, description="Security classification assigned to the signature based on its contents and context.")
17 | data = odm.Text(copyto="__text__", store=False)
18 | last_modified = odm.Date(default="NOW", description="Notes the last modification timestamp of the signature.")
19 | name = odm.Keyword(copyto="__text__", description="Name of the signature.")
20 | order = odm.Integer(default=1, store=False, deprecation="no longer used in v4")
21 | revision = odm.Keyword(default="1", description="")
22 | signature_id = odm.Optional(odm.Keyword(), description="ID associated with the signature.")
23 | source = odm.Keyword(description="Source or author of the signature.")
24 | state_change_date = odm.Optional(odm.Date(store=False), description="Date the signature's state was last changed.")
25 | state_change_user = odm.Optional(odm.Keyword(store=False), description="User who last changed the signature's state.")
26 | stats = odm.Compound(Statistics, default={}, description="Stats associated with count, average, min, max, and sum of various signature metrics.")
27 | status = odm.Enum(values=RULE_STATUSES, copyto="__text__", description="The current state of the signature (i.e. NOISY, DISABLED, DEPLOYED, etc.).")
28 | type = odm.Keyword(copyto="__text__", description="The service type that the signature is associated with.")
29 |
30 |
--------------------------------------------------------------------------------
/assemblyline/odm/messages/service_heartbeat.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 |
3 | MSG_TYPES = {"ServiceHeartbeat"}
4 | LOADER_CLASS = "assemblyline.odm.messages.service_heartbeat.ServiceMessage"
5 |
6 |
7 | @odm.model(description="Service Metrics")
8 | class Metrics(odm.Model):
9 | cache_hit = odm.Integer(description="Number of cache hits")
10 | cache_miss = odm.Integer(description="Number of cache misses")
11 | cache_skipped = odm.Integer(description="Number of cache skips")
12 | execute = odm.Integer(description="Number of service executes")
13 | fail_recoverable = odm.Integer(description="Number of recoverable fails")
14 | fail_nonrecoverable = odm.Integer(description="Number of non-recoverable fails")
15 | scored = odm.Integer(description="Number of tasks scored")
16 | not_scored = odm.Integer(description="Number of tasks not scored")
17 |
18 |
19 | @odm.model(description="Service Activity")
20 | class Activity(odm.Model):
21 | busy = odm.Integer(description="Number of busy instances")
22 | idle = odm.Integer(description="Number of idle instances")
23 |
24 |
25 | @odm.model(description="Heartbeat Model")
26 | class Heartbeat(odm.Model):
27 | activity = odm.Compound(Activity, description="Service activity")
28 | instances = odm.Integer(description="Service instances")
29 | metrics = odm.Compound(Metrics, description="Service metrics")
30 | queue = odm.Integer(description="Service queue")
31 | service_name = odm.Keyword(description="Service name")
32 |
33 |
34 | @odm.model(description="Model of Service Heartbeat Message")
35 | class ServiceMessage(odm.Model):
36 | msg = odm.Compound(Heartbeat, description="Heartbeat message")
37 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class for message")
38 | msg_type = odm.Enum(values=MSG_TYPES, default="ServiceHeartbeat", description="Type of message")
39 | sender = odm.Keyword(description="Sender of message")
40 |
--------------------------------------------------------------------------------
/dev/core/docker-compose-sca-upd.yml:
--------------------------------------------------------------------------------
1 | version: "2.4"
2 |
3 | services:
4 | al_scaler:
5 | image: cccs/assemblyline_dev:4.6.1
6 | env_file:
7 | - .env
8 | environment:
9 | DOCKER_CONFIGURATION_PATH: /mount/service_config/
10 | DOCKER_CONFIGURATION_VOLUME: service_config
11 | AL_CORE_NETWORK: external
12 | volumes:
13 | - type: volume
14 | source: service_config
15 | target: /mount/service_config/
16 | read_only: false
17 | - ${PATH_REWRITE:-.}/config/:/etc/assemblyline/
18 | - ${ROOT_REWRITE:-../../..}/:/opt/alv4/
19 | - /var/run/docker.sock:/var/run/docker.sock # NOTE, this container has access to docker socket (this is like root)
20 | command: python3 /opt/alv4/assemblyline-core/assemblyline_core/scaler/run_scaler.py
21 | healthcheck:
22 | test:
23 | [
24 | "CMD",
25 | "bash",
26 | "-c",
27 | "if [[ ! `find /tmp/heartbeat -newermt '-30 seconds'` ]]; then false; fi",
28 | ]
29 |
30 | al_updater:
31 | image: cccs/assemblyline_dev:4.6.1
32 | env_file:
33 | - .env
34 | environment:
35 | AL_CORE_NETWORK: external
36 | CONTAINER_CHECK_INTERVAL: 5
37 | UPDATE_CHECK_INTERVAL: 5
38 | volumes:
39 | - ${PATH_REWRITE:-.}/config/:/etc/assemblyline/
40 | - ${ROOT_REWRITE:-../../..}/:/opt/alv4/
41 | - /var/run/docker.sock:/var/run/docker.sock # NOTE, this container has access to docker socket (this is like root)
42 | command: python3 /opt/alv4/assemblyline-core/assemblyline_core/updater/run_updater.py
43 | healthcheck:
44 | test:
45 | [
46 | "CMD",
47 | "bash",
48 | "-c",
49 | "if [[ ! `find /tmp/heartbeat -newermt '-30 seconds'` ]]; then false; fi",
50 | ]
51 |
52 | networks:
53 | default:
54 | external: true
55 | name: external
56 |
57 | volumes:
58 | service_config:
59 | name: service_config
60 |
--------------------------------------------------------------------------------
/assemblyline/odm/models/ontology/results/antivirus.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 | from assemblyline.odm.models.ontology.results.process import ObjectID
3 | from assemblyline.common.dict_utils import get_dict_fingerprint_hash
4 |
5 | OID_PARTS = ['engine_name', 'virus_name']
6 | TAG_PARTS = ['engine_name', 'virus_name']
7 |
8 |
9 | @odm.model(description="Antivirus Ontology Model")
10 | class Antivirus(odm.Model):
11 | objectid = odm.Compound(ObjectID, description="The object ID of the antivirus object")
12 | engine_name = odm.Keyword(description="Name of antivirus engine")
13 | engine_version = odm.Optional(odm.Keyword(), description="Version of antivirus engine")
14 | engine_definition_version = odm.Optional(odm.Keyword(), description="Version of definition set")
15 | virus_name = odm.Optional(odm.Keyword(), description="The name of the virus")
16 | # What category does the verdict fall under?
17 | category = odm.Optional(odm.Enum(['type-unsupported',
18 | 'undetected',
19 | 'failure',
20 | 'suspicious',
21 | 'malicious']),
22 | description="What category does the verdict fall under?
"
23 | "- `type-unsupported`: File sent to antivirus is unsupported
"
24 | "- `undetected`: File not detected by antivirus
"
25 | "- `failure`: Antivirus failed during detection
"
26 | "- `suspicious`: Antivirus deems suspicious
"
27 | "- `malicious`: Antivirus deems malicious
")
28 |
29 | def get_oid(data: dict):
30 | return f"antivirus_{get_dict_fingerprint_hash({key: data.get(key) for key in OID_PARTS})}"
31 |
32 | def get_tag(data: dict):
33 | return ".".join([data.get(key) for key in TAG_PARTS if data.get(key)])
34 |
--------------------------------------------------------------------------------
/assemblyline/odm/models/workflow.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 | from assemblyline.common import forge
3 |
4 | Classification = forge.get_classification()
5 |
6 |
7 | PRIORITIES = {None, "LOW", "MEDIUM", "HIGH", "CRITICAL"}
8 | STATUSES = {None, "MALICIOUS", "NON-MALICIOUS", "ASSESS", "TRIAGE"}
9 |
10 |
11 | @odm.model(index=True, store=True, description="Model of Workflow")
12 | class Workflow(odm.Model):
13 | classification = odm.Classification(copyto="__text__", default=Classification.UNRESTRICTED,
14 | description="Classification of the workflow")
15 | creation_date = odm.Date(default="NOW", description="Creation date of the workflow")
16 | creator = odm.Keyword(description="UID of the creator of the workflow")
17 | edited_by = odm.Keyword(description="UID of the last user to edit the workflow")
18 | enabled = odm.Boolean(default=True, description="Is this workflow enabled?")
19 | first_seen = odm.Optional(odm.Date(), description="Date of first hit on workflow")
20 | hit_count = odm.Integer(default=0, description="Number of times there was a workflow hit")
21 | labels = odm.List(odm.Keyword(), copyto="__text__", default=[], description="Labels applied by the workflow")
22 | last_edit = odm.Date(default="NOW", description="Date of last edit on workflow")
23 | last_seen = odm.Optional(odm.Date(), description="Date of last hit on workflow")
24 | name = odm.Keyword(copyto="__text__", description="Name of the workflow")
25 | origin = odm.Optional(odm.Keyword(), description="Which did this originate from?")
26 | priority = odm.Optional(odm.Enum(copyto="__text__", values=PRIORITIES),
27 | description="Priority applied by the workflow")
28 | query = odm.Keyword(description="Query that the workflow runs")
29 | status = odm.Optional(odm.Enum(copyto="__text__", values=STATUSES), description="Status applied by the workflow")
30 | workflow_id = odm.Optional(odm.UUID(), description="ID of the workflow")
31 |
--------------------------------------------------------------------------------
/dev/core/config/certs/tls.crt:
--------------------------------------------------------------------------------
1 | -----BEGIN CERTIFICATE-----
2 | MIIFazCCA1OgAwIBAgIUTxaWQFirgQeVWS/UZzROzKunuz4wDQYJKoZIhvcNAQEL
3 | BQAwRTELMAkGA1UEBhMCQVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM
4 | GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDAeFw0yMzA0MjAyMDA1MDNaFw0yNDA0
5 | MTkyMDA1MDNaMEUxCzAJBgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEw
6 | HwYDVQQKDBhJbnRlcm5ldCBXaWRnaXRzIFB0eSBMdGQwggIiMA0GCSqGSIb3DQEB
7 | AQUAA4ICDwAwggIKAoICAQC4tiGvsQH33LePuPWr7lq1Km6/GFF1bdPQsgKAwdTe
8 | YHxTRu0yqMhaNTQbJp53nw/KPqfquraoTfS7wUC4miQwweIYtv+IhWIXSMTPtFui
9 | HIbl43ibtl1+y93ARqSvkuRIUdToVYsshH9HbcFzFECPWtOQS3hifocDzfc53lkI
10 | jRFtdQ9++O812RXuX/i3fLKfJ1WbEQUJMbRyPny0wmWG6x4s4d3tjY4PQ/hGJ8DZ
11 | cKvJROWnztHRzcSpEoGWskPxjxZ2MATDFLC8pvpgFDS4ZgKiOUvSqEq6npFJK3g6
12 | bRlG6S3tZp/hzhDXVFVBsqKv//FuBjBVB4MyFQgWIsCcfTeb0NF6FVZpAVgGdysF
13 | enLW83pHxbgcFYe24i3aRybU0gjwj/qm6I0RGeMM9jc/lAttskIi0Qf84ZGqFwiP
14 | 7Gp+dtftiAz5TzdGuYtVGTt6IRzjyuOo+8UqVGbcOAJ5t4foym2mwVshm2mOhx2d
15 | Gc/f+Bj/Qfmtrzo1uM8wrgcHzI1CjwrMi7m5eC5VehpxZeJPgIgrNW9Y7P0E2Yb+
16 | qAGDy/Nz7V97rN6u7waszI4DnUS1TTvTMniOlQNgXeGAPhuYcDnvUDngxbnG9eUh
17 | sult2peYX1Be5V1jnykGBi0XG0NU0JekOr/CtGjmCh9o2dt04b7Uy4g4oZGREsxz
18 | bwIDAQABo1MwUTAdBgNVHQ4EFgQUBHuYSPOJSxY2s/bl18Hald2+zE4wHwYDVR0j
19 | BBgwFoAUBHuYSPOJSxY2s/bl18Hald2+zE4wDwYDVR0TAQH/BAUwAwEB/zANBgkq
20 | hkiG9w0BAQsFAAOCAgEAUrzPFpJhjMpzeCCiqiMOTqQkFC07N3Oj6EbqWeoK6NgM
21 | cqtzNvkedkQ+DV18/CTGjgS8bNHUJIA0mWWHa8aezuZANM1wiYmKHwDzYckzPZB0
22 | WIj63BVIzXvGVMANk/Wbnfuyvnkd84FzaIPa1T0tmIJkBikX93IMPPhwUFJ509wU
23 | HceZq8QJbX6KSrDxNQ+bmLMhDM7OyV8CE5VYO63DDCccN1++g7FESN1ZzgfydbUe
24 | 82Up6iGpvaf2xbaHSn3pNbwZmalQqN0sE+9FwgGlWqOZN80qU0VwdmJA1kevxwcB
25 | Y8Uh6j9KNTPRg9Gl5dCg7P0fWsUzoa7DUtpFQO8qm0h5KMacxfZ1J6ySf5viFLWz
26 | ze6J1aFgM8LDNrSQGWIuULLFIACiv3Ct5DgkCISWiBjFG/em2dPQpG+MNSJg7NI+
27 | 5R9uQPc2GU7vd9geg+SnNj74Hj7KRQdP+2iJsFbGvYL56ZoUWeaqAth/qtkZA5WX
28 | Q9YdabSvztarj0ZIua9MFwYmnJBb1gwNglTuGJKXxGh1j5FH4zMqIG7rppM8Vj67
29 | VYx8J4sVcjgBd2Q+Qz0n4sI1iaSmqdTSd9g0/craTpFDyp/zNqyy5yMgYT0WRBE9
30 | n2Z4EjFPkGOrcOTac0PV0akQXhgqDMti1t27wUJOtcXLSwPKpU3UJ2FfCPoqZgI=
31 | -----END CERTIFICATE-----
32 |
--------------------------------------------------------------------------------
/assemblyline/remote/datatypes/queues/comms.py:
--------------------------------------------------------------------------------
1 | import json
2 | import redis
3 |
4 | from assemblyline.remote.datatypes import get_client, retry_call, log, decode
5 |
6 |
7 | class CommsQueue(object):
8 | def __init__(self, names, host=None, port=None, private=False):
9 | self.c = get_client(host, port, private)
10 | self.p = retry_call(self.c.pubsub)
11 | if not isinstance(names, list):
12 | names = [names]
13 | self.names = names
14 | self._connected = False
15 |
16 | def __enter__(self):
17 | return self
18 |
19 | def __exit__(self, exc_type, exc_val, exc_tb):
20 | retry_call(self.p.unsubscribe)
21 |
22 | def _connect(self):
23 | if not self._connected:
24 | retry_call(self.p.subscribe, self.names)
25 | self._connected = True
26 |
27 | def close(self):
28 | retry_call(self.p.close)
29 |
30 | def listen(self, blocking=True):
31 | retried = False
32 | while True:
33 | self._connect()
34 | try:
35 | if blocking:
36 | i = self.p.listen()
37 | v = next(i)
38 | else:
39 | v = self.p.get_message()
40 | if v is None:
41 | yield None
42 | continue
43 |
44 | if isinstance(v, dict) and v.get('type', None) == 'message':
45 | data = decode(v.get('data', 'null'))
46 | yield data
47 | except redis.ConnectionError:
48 | log.warning('No connection to Redis, reconnecting...')
49 | self._connected = False
50 | retried = True
51 | finally:
52 | if self._connected and retried:
53 | log.info('Reconnected to Redis!')
54 | retried = False
55 |
56 | def publish(self, message):
57 | for name in self.names:
58 | retry_call(self.c.publish, name, json.dumps(message))
59 |
--------------------------------------------------------------------------------
/test/conftest.py:
--------------------------------------------------------------------------------
1 | """
2 | Pytest configuration file, setup global pytest fixtures and functions here.
3 | """
4 | import os
5 |
6 | from assemblyline.common import forge
7 | from assemblyline.datastore.helper import AssemblylineDatastore
8 | from assemblyline.datastore.store import ESStore, ESCollection
9 | from redis.exceptions import ConnectionError
10 |
11 | import pytest
12 | original_skip = pytest.skip
13 |
14 | # Check if we are in an unattended build environment where skips won't be noticed
15 | IN_CI_ENVIRONMENT = any(indicator in os.environ for indicator in
16 | ['CI', 'BITBUCKET_BUILD_NUMBER', 'AGENT_JOBSTATUS'])
17 |
18 |
19 | def skip_or_fail(message):
20 | """Skip or fail the current test, based on the environment"""
21 | if IN_CI_ENVIRONMENT:
22 | pytest.fail(message)
23 | else:
24 | original_skip(message)
25 |
26 |
27 | # Replace the built in skip function with our own
28 | pytest.skip = skip_or_fail
29 |
30 |
31 | @pytest.fixture(scope='session')
32 | def config():
33 | return forge.get_config()
34 |
35 |
36 | @pytest.fixture(scope='module')
37 | def filestore(config):
38 | try:
39 | return forge.get_filestore(config, connection_attempts=1)
40 | except ConnectionError as err:
41 | pytest.skip(str(err))
42 |
43 |
44 | @pytest.fixture(scope='module')
45 | def datastore_connection(config):
46 | ESCollection.MAX_RETRY_BACKOFF = 0.5
47 | store = ESStore(config.datastore.hosts)
48 | ret_val = store.ping()
49 | if not ret_val:
50 | pytest.skip("Could not connect to datastore")
51 |
52 | return AssemblylineDatastore(store)
53 |
54 |
55 | @pytest.fixture(scope='session')
56 | def redis_connection():
57 | from assemblyline.remote.datatypes import get_client
58 | c = get_client(None, None, False)
59 | try:
60 | ret_val = c.ping()
61 | if ret_val:
62 | return c
63 | except ConnectionError:
64 | pass
65 |
66 | return pytest.skip("Connection to the Redis server failed. This test cannot be performed...")
67 |
--------------------------------------------------------------------------------
/external/generate_tlds.py:
--------------------------------------------------------------------------------
1 | import os
2 | import requests
3 |
4 |
5 | def get_tlds(url):
6 | comments = []
7 | tlds = []
8 |
9 | response = requests.get(url)
10 | for line in response.text.splitlines():
11 | if not line:
12 | continue
13 | if line.startswith('#'):
14 | comments.append(line)
15 | else:
16 | tlds.append(line)
17 |
18 | return comments, tlds
19 |
20 |
21 | def get_special_tlds(url):
22 | response = requests.get(url)
23 | # Ignore first line from CSV and return list of domains without the period suffix
24 | return [line.split(',', 1)[0][:-1].upper() for line in response.text.splitlines()[1:]]
25 |
26 |
27 | if __name__ == "__main__":
28 | tlds_url = 'https://data.iana.org/TLD/tlds-alpha-by-domain.txt'
29 | tlds_location = "../assemblyline/common/net_static.py"
30 | if not os.path.exists(tlds_location):
31 | print("Could not find net_static.py file. Make sure you run this script "
32 | "in its home directory otherwise this won't work.")
33 | exit(1)
34 |
35 | comments, tlds = get_tlds(tlds_url)
36 | comments_lines = '\n'.join(comments)
37 | tlds_lines = '",\n "'.join(tlds)
38 |
39 | special_tlds_url = "https://www.iana.org/assignments/special-use-domain-names/special-use-domain.csv"
40 | special_tlds = get_special_tlds(special_tlds_url)
41 | special_tlds_lines = '",\n "'.join(special_tlds)
42 |
43 | with open(tlds_location, "w") as tlds_fh:
44 | tlds_fh.write("# This file is generated using generate_tlds.py script\n"
45 | "# DO NOT EDIT! Re-run the script instead...\n\n"
46 | f"# Top level domains from: {tlds_url}\n"
47 | f"{comments_lines}\n"
48 | f"TLDS_ALPHA_BY_DOMAIN = {{\n \"{tlds_lines}\"\n}}\n\n"
49 | f"# Special-use TLDs from: {special_tlds_url}\n"
50 | f"TLDS_SPECIAL_BY_DOMAIN = {{\n \"{special_tlds_lines}\"\n}}")
51 |
52 | print(f"TLDS list file written into: {tlds_location}")
53 | print("You can now commit the new net_static.py file to your git.")
54 |
--------------------------------------------------------------------------------
/assemblyline/common/exceptions.py:
--------------------------------------------------------------------------------
1 | from inspect import getmembers, isfunction
2 | from sys import exc_info
3 | from traceback import format_tb
4 |
5 |
6 | class ChainException(Exception):
7 | def __init__(self, message, cause=None):
8 | Exception.__init__(self, message)
9 | self.cause = cause
10 |
11 |
12 | class NonRecoverableError(ChainException):
13 | pass
14 |
15 |
16 | class RecoverableError(ChainException):
17 | pass
18 |
19 |
20 | class ConfigException(Exception):
21 | pass
22 |
23 |
24 | class Chain(object):
25 | """
26 | This class can be used as a decorator to override the type of exceptions returned by a function
27 | """
28 |
29 | def __init__(self, exception):
30 | self.exception = exception
31 |
32 | def __call__(self, original):
33 | def wrapper(*args, **kwargs):
34 | try:
35 | return original(*args, **kwargs)
36 | except Exception as e:
37 | wrapped = self.exception(str(e), e)
38 | raise wrapped.with_traceback(exc_info()[2])
39 |
40 | wrapper.__name__ = original.__name__
41 | wrapper.__doc__ = original.__doc__
42 | wrapper.__dict__.update(original.__dict__)
43 |
44 | return wrapper
45 |
46 | def execute(self, func, *args, **kwargs):
47 | try:
48 | return func(*args, **kwargs)
49 | except Exception as e:
50 | wrapped = self.exception(str(e), e)
51 | raise wrapped.with_traceback(exc_info()[2])
52 |
53 |
54 | class ChainAll:
55 | """
56 | This class can be used as a decorator to override the type of exceptions returned by every method of a class
57 | """
58 |
59 | def __init__(self, exception):
60 | self.exception = Chain(exception)
61 |
62 | def __call__(self, cls):
63 | """We can use an instance of this class as a decorator."""
64 | for method in getmembers(cls, predicate=isfunction):
65 | setattr(cls, method[0], self.exception(method[1]))
66 |
67 | return cls
68 |
69 |
70 | def get_stacktrace_info(ex: Exception) -> str:
71 | return ''.join(format_tb(exc_info()[2]) + [': '.join((ex.__class__.__name__, str(ex)))])
72 |
--------------------------------------------------------------------------------
/assemblyline/odm/models/error.py:
--------------------------------------------------------------------------------
1 |
2 | from assemblyline import odm
3 | from assemblyline.common.caching import generate_conf_key
4 |
5 | STATUSES = {"FAIL_NONRECOVERABLE", "FAIL_RECOVERABLE"}
6 | ERROR_TYPES = {
7 | "UNKNOWN": 0,
8 | "EXCEPTION": 1,
9 | "MAX DEPTH REACHED": 10,
10 | "MAX FILES REACHED": 11,
11 | "MAX RETRY REACHED": 12,
12 | "SERVICE BUSY": 20,
13 | "SERVICE DOWN": 21,
14 | "TASK PRE-EMPTED": 30
15 | }
16 |
17 |
18 | @odm.model(index=True, store=True, description="Error Response from a Service")
19 | class Response(odm.Model):
20 | message = odm.Text(copyto="__text__", description="Error message")
21 | service_debug_info = odm.Optional(odm.Keyword(), description="Information about where the service was processed")
22 | service_name = odm.Keyword(copyto="__text__", description="Service Name")
23 | service_tool_version = odm.Optional(odm.Keyword(copyto="__text__"), description="Service Tool Version")
24 | service_version = odm.Keyword(description="Service Version")
25 | status = odm.Enum(values=STATUSES, description="Status of error produced by service")
26 |
27 |
28 | @odm.model(index=True, store=True, description="Error Model used by Error Viewer")
29 | class Error(odm.Model):
30 | archive_ts = odm.Optional(odm.Date(description="Time at which the error was archived"))
31 | created = odm.Date(default="NOW", description="Error creation timestamp")
32 | expiry_ts = odm.Optional(odm.Date(store=False), description="Expiry timestamp")
33 | response: Response = odm.Compound(Response, description="Response from the service")
34 | sha256 = odm.SHA256(copyto="__text__", description="SHA256 of file related to service error")
35 | type = odm.Enum(values=list(ERROR_TYPES.keys()), default="EXCEPTION", description="Type of error")
36 |
37 | def build_key(self, service_tool_version=None, task=None):
38 | key_list = [
39 | self.sha256,
40 | self.response.service_name.replace('.', '_'),
41 | f"v{self.response.service_version.replace('.', '_')}",
42 | f"c{generate_conf_key(service_tool_version=service_tool_version, task=task)}",
43 | f"e{ERROR_TYPES.get(self.type, 0)}"]
44 |
45 | return '.'.join(key_list)
46 |
--------------------------------------------------------------------------------
/test/test_isotime.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from assemblyline.common.isotime import (
4 | LOCAL_FMT_WITH_MS,
5 | ensure_time_format,
6 | epoch_to_iso,
7 | epoch_to_local,
8 | epoch_to_local_with_ms,
9 | iso_to_epoch,
10 | local_to_epoch,
11 | local_to_local_with_ms,
12 | local_with_ms_to_epoch,
13 | now,
14 | now_as_iso,
15 | now_as_local,
16 | )
17 |
18 |
19 | def test_isotime_iso():
20 | iso_date = now_as_iso()
21 | iso_format = re.compile(r'[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}Z')
22 |
23 | assert isinstance(iso_date, str)
24 | assert iso_format.match(iso_date)
25 | assert epoch_to_iso(iso_to_epoch(iso_date)) == iso_date
26 | assert iso_date == epoch_to_iso(local_with_ms_to_epoch(epoch_to_local_with_ms(local_to_epoch(epoch_to_local(iso_to_epoch(iso_date))))))
27 |
28 |
29 | def test_isotime_local():
30 | local_date = now_as_local()
31 | local_format = re.compile(r'[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}.*')
32 |
33 | assert isinstance(local_date, str)
34 | assert local_format.match(local_date)
35 | assert epoch_to_local(local_to_epoch(local_date)) == local_date
36 | assert epoch_to_local_with_ms(local_with_ms_to_epoch(local_date)) == local_date
37 | assert local_date == epoch_to_local(iso_to_epoch(epoch_to_iso(local_to_epoch(local_date))))
38 |
39 |
40 | def test_isotime_epoch():
41 | epoch_date = now(200)
42 | assert epoch_date == local_to_epoch(epoch_to_local(epoch_date))
43 | assert epoch_date == local_with_ms_to_epoch(epoch_to_local_with_ms(epoch_date))
44 | assert epoch_date == iso_to_epoch(epoch_to_iso(epoch_date))
45 |
46 | assert isinstance(epoch_date, float)
47 |
48 |
49 | def test_isotime_rounding_error():
50 | for t in ["2020-01-29 18:41:25.758416", "2020-01-29 18:41:25.127600"]:
51 | epoch = local_to_epoch(t)
52 | local = epoch_to_local(epoch)
53 | assert local == t
54 |
55 | def test_local_to_local_with_ms():
56 | local_date = now_as_local()
57 | assert local_to_local_with_ms(local_date) == local_date[:-3]
58 |
59 | def test_ensure_time_format():
60 | local_date = now_as_local()
61 | assert ensure_time_format(local_date, LOCAL_FMT_WITH_MS)
62 |
--------------------------------------------------------------------------------
/assemblyline/odm/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 | import typing
3 |
4 | from assemblyline.odm.base import *
5 |
6 | # Imports that have the same effect as some part of the one above so that
7 | # type checking can use this file properly.
8 | from assemblyline.odm.base import Keyword, Optional, Boolean, Integer, List, Compound, Mapping, \
9 | Date, Long, Enum, Wildcard
10 | from datetime import datetime
11 |
12 | _InnerType = typing.TypeVar("_InnerType")
13 |
14 | """
15 | Helper functions to invoke ODM types without requiring type annotations.
16 |
17 | These can be used like the type objects they wrap, but will provide better hints to type checking tools.
18 | """
19 |
20 |
21 | def description(text):
22 | def _fn(obj):
23 | obj.description = text
24 | return _fn
25 |
26 |
27 | def keyword(*args, **kwargs) -> str:
28 | return typing.cast(str, Keyword(*args, **kwargs))
29 |
30 |
31 | def wildcard(*args, **kwargs) -> str:
32 | return typing.cast(str, Wildcard(*args, **kwargs))
33 |
34 |
35 | def date(*args, **kwargs) -> datetime:
36 | return typing.cast(datetime, Date(*args, **kwargs))
37 |
38 |
39 | def optional(child_type: _InnerType, **kwargs) -> typing.Optional[_InnerType]:
40 | return typing.cast(typing.Optional[_InnerType], Optional(child_type, **kwargs))
41 |
42 |
43 | def boolean(*args, **kwargs) -> bool:
44 | return typing.cast(bool, Boolean(*args, **kwargs))
45 |
46 |
47 | def integer(*args, **kwargs) -> int:
48 | return typing.cast(int, Integer(*args, **kwargs))
49 |
50 |
51 | def long(*args, **kwargs) -> int:
52 | return typing.cast(int, Long(*args, **kwargs))
53 |
54 |
55 | def sequence(child_type: _InnerType, **kwargs) -> list[_InnerType]:
56 | return typing.cast(list[_InnerType], List(child_type, **kwargs))
57 |
58 |
59 | def mapping(child_type: _InnerType, **kwargs) -> dict[str, _InnerType]:
60 | return typing.cast(dict[str, _InnerType], Mapping(child_type, **kwargs))
61 |
62 |
63 | def compound(child_type: typing.Callable[..., _InnerType], **kwargs) -> _InnerType:
64 | return typing.cast(_InnerType, Compound(child_type, **kwargs))
65 |
66 |
67 | def enum(values: typing.Iterable[str], **kwargs) -> str:
68 | return typing.cast(str, Enum(values, **kwargs))
69 |
--------------------------------------------------------------------------------
/assemblyline/remote/datatypes/counters.py:
--------------------------------------------------------------------------------
1 |
2 | from redis.exceptions import ConnectionError
3 |
4 | from assemblyline.remote.datatypes import get_client, retry_call, now_as_iso
5 | from assemblyline.remote.datatypes.hash import Hash
6 |
7 |
8 | class Counters(object):
9 | def __init__(self, prefix="counter", host=None, port=None, track_counters=False):
10 | self.c = get_client(host, port, False)
11 | self.prefix = prefix
12 | if track_counters:
13 | self.tracker = Hash("c-tracker-%s" % prefix, host=host, port=port)
14 | else:
15 | self.tracker = None
16 |
17 | def __enter__(self):
18 | return self
19 |
20 | def __exit__(self, exc_type, exc_val, exc_tb):
21 | self.delete()
22 |
23 | def inc(self, name, value=1, track_id=None):
24 | if self.tracker:
25 | self.tracker.add(track_id or name, now_as_iso())
26 | return retry_call(self.c.incr, "%s-%s" % (self.prefix, name), value)
27 |
28 | def dec(self, name, value=1, track_id=None):
29 | if self.tracker:
30 | self.tracker.pop(str(track_id or name))
31 | return retry_call(self.c.decr, "%s-%s" % (self.prefix, name), value)
32 |
33 | def get_queues_sizes(self):
34 | out = {}
35 | for queue in retry_call(self.c.keys, "%s-*" % self.prefix):
36 | queue_size = int(retry_call(self.c.get, queue))
37 | out[queue] = queue_size
38 |
39 | return {k.decode('utf-8'): v for k, v in out.items()}
40 |
41 | def get_queues(self):
42 | return [k.decode('utf-8') for k in retry_call(self.c.keys, "%s-*" % self.prefix)]
43 |
44 | def ready(self):
45 | try:
46 | self.c.ping()
47 | except ConnectionError:
48 | return False
49 |
50 | return True
51 |
52 | def reset_queues(self):
53 | if self.tracker:
54 | self.tracker.delete()
55 | for queue in retry_call(self.c.keys, "%s-*" % self.prefix):
56 | retry_call(self.c.set, queue, "0")
57 |
58 | def delete(self):
59 | if self.tracker:
60 | self.tracker.delete()
61 | for queue in retry_call(self.c.keys, "%s-*" % self.prefix):
62 | retry_call(self.c.delete, queue)
63 |
--------------------------------------------------------------------------------
/assemblyline/remote/datatypes/cache.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | from redis.exceptions import ConnectionError
4 |
5 | from assemblyline.common.uid import get_id_from_data
6 | from assemblyline.remote.datatypes import get_client, retry_call
7 |
8 | DEFAULT_TTL = 60 * 60 # 1 Hour
9 |
10 |
11 | class Cache(object):
12 | def __init__(self, prefix="al_cache", separator="-", host=None, port=None, ttl=DEFAULT_TTL):
13 | self.c = get_client(host, port, False)
14 | self.prefix = prefix + separator
15 | self.ttl = DEFAULT_TTL
16 |
17 | def __enter__(self):
18 | return self
19 |
20 | def _get_key(self, name):
21 | return f"{self.prefix}-{name}"
22 |
23 | def clear(self, key=None):
24 | # Clear all items belonging to this cahce
25 | if key:
26 | retry_call(self.c.delete, f"{self.prefix}{key}")
27 | else:
28 | for queue in retry_call(self.c.keys, f"{self.prefix}*"):
29 | retry_call(self.c.delete, queue)
30 |
31 | def create_key(self, *args):
32 | key_str = "-".join([str(x) for x in args])
33 | return get_id_from_data(key_str)
34 |
35 | def get(self, key, ttl=None, reset=True):
36 | # Get the key name
37 | cache_name = self._get_key(key)
38 |
39 | # Get the value from the cache
40 | item = retry_call(self.c.get, cache_name)
41 | if not item:
42 | return item
43 |
44 | if reset:
45 | # Reset the cache while we're still using it
46 | retry_call(self.c.expire, cache_name, ttl or self.ttl)
47 |
48 | return json.loads(item)
49 |
50 | def list(self):
51 | for key in retry_call(self.c.keys, f"{self.prefix}*"):
52 | yield json.loads(retry_call(self.c.get, key))
53 |
54 | def ready(self):
55 | try:
56 | self.c.ping()
57 | except ConnectionError:
58 | return False
59 |
60 | return True
61 |
62 | def set(self, key, value, ttl=None):
63 | # Get the key name
64 | cache_name = self._get_key(key)
65 |
66 | # Set the value and the expiry for the name
67 | retry_call(self.c.set, cache_name, json.dumps(value))
68 | retry_call(self.c.expire, cache_name, ttl or self.ttl)
69 |
--------------------------------------------------------------------------------
/assemblyline/remote/datatypes/daily_quota_tracker.py:
--------------------------------------------------------------------------------
1 | from assemblyline.common.isotime import now_as_iso
2 | from assemblyline.remote.datatypes import get_client, retry_call
3 |
4 |
5 | class DailyQuotaTracker(object):
6 | def __init__(self, redis=None, host=None, port=None, private=False):
7 | self.c = redis or get_client(host, port, private)
8 | self.ttl = 60*60*24
9 |
10 | def _counter_name(self, user, type):
11 | return f"DAILY-QUOTA-{now_as_iso()[:10]}-{user}-{type}"
12 |
13 | def _decrement(self, user, type):
14 | counter = self._counter_name(user, type)
15 | with self.c.pipeline() as pipe:
16 | pipe.decr(counter)
17 | pipe.expire(counter, self.ttl, nx=True)
18 |
19 | val, _ = retry_call(pipe.execute)
20 |
21 | return val
22 |
23 | def decrement_api(self, user):
24 | return self._decrement(user, 'api')
25 |
26 | def decrement_submission(self, user):
27 | return self._decrement(user, 'submission')
28 |
29 | def _increment(self, user, type):
30 | counter = self._counter_name(user, type)
31 | with self.c.pipeline() as pipe:
32 | pipe.incr(counter)
33 | pipe.expire(counter, self.ttl, nx=True)
34 |
35 | val, _ = retry_call(pipe.execute)
36 |
37 | return val
38 |
39 | def increment_api(self, user):
40 | return self._increment(user, 'api')
41 |
42 | def increment_submission(self, user):
43 | return self._increment(user, 'submission')
44 |
45 | def _get(self, user, type):
46 | counter = self._counter_name(user, type)
47 | return retry_call(self.c.get, counter) or 0
48 |
49 | def get_api(self, user):
50 | return int(self._get(user, 'api'))
51 |
52 | def get_submission(self, user):
53 | return int(self._get(user, 'submission'))
54 |
55 | def _reset(self, user, type):
56 | counter = self._counter_name(user, type)
57 | with self.c.pipeline() as pipe:
58 | pipe.set(counter, 0)
59 | pipe.expire(counter, self.ttl, nx=True)
60 |
61 | val, _ = retry_call(pipe.execute)
62 |
63 | def reset_api(self, user):
64 | self._reset(user, "api")
65 |
66 | def reset_submission(self, user):
67 | self._reset(user, "submission")
68 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.11-slim-bookworm AS base
2 |
3 | # Upgrade packages
4 | RUN apt-get update && apt-get -yy upgrade && rm -rf /var/lib/apt/lists/*
5 |
6 | # Get required apt packages
7 | RUN apt-get update && apt-get install -yy libffi8 libfuzzy2 libmagic1 && rm -rf /var/lib/apt/lists/*
8 |
9 | # Make sure root account is locked so 'su' commands fail all the time
10 | RUN passwd -l root
11 |
12 | FROM base AS builder
13 | ARG version
14 | ARG version_tag=${version}
15 |
16 | # Get required apt packages
17 | RUN apt-get update \
18 | && apt-get install -yy build-essential libffi-dev libfuzzy-dev \
19 | && rm -rf /var/lib/apt/lists/*
20 |
21 | # Install assemblyline base (setup.py is just a file we know exists so the command
22 | # won't fail if dist isn't there. The dist* copies in any dist directory only if it exists.)
23 | COPY setup.py dist* dist/
24 | RUN pip install --no-cache-dir --no-warn-script-location -f dist/ --user assemblyline==$version && rm -rf ~/.cache/pip
25 | RUN chmod 750 /root/.local/lib/python3.11/site-packages
26 |
27 | FROM base
28 |
29 | # Add assemblyline user
30 | RUN useradd -b /var/lib -U -m assemblyline
31 |
32 | # Create assemblyline config directory
33 | RUN mkdir -p /etc/assemblyline
34 | RUN chmod 750 /etc/assemblyline
35 | RUN chown root:assemblyline /etc/assemblyline
36 |
37 | # Create assemblyline cache directory
38 | RUN mkdir -p /var/cache/assemblyline
39 | RUN chmod 770 /var/cache/assemblyline
40 | RUN chown assemblyline:assemblyline /var/cache/assemblyline
41 |
42 | # Create assemblyline home directory
43 | RUN mkdir -p /var/lib/assemblyline
44 | RUN chmod 750 /var/lib/assemblyline
45 | RUN chown assemblyline:assemblyline /var/lib/assemblyline
46 |
47 | # Create assemblyline log directory
48 | RUN mkdir -p /var/log/assemblyline
49 | RUN chmod 770 /var/log/assemblyline
50 | RUN chown assemblyline:assemblyline /var/log/assemblyline
51 |
52 | # Install assemblyline base
53 | COPY --chown=assemblyline:assemblyline --from=builder /root/.local /var/lib/assemblyline/.local
54 | ENV PATH=/var/lib/assemblyline/.local/bin:$PATH
55 | ENV PYTHONPATH=/var/lib/assemblyline/.local/lib/python3.11/site-packages
56 | ENV ASSEMBLYLINE_VERSION=${version}
57 | ENV ASSEMBLYLINE_IMAGE_TAG=${version_tag}
58 |
59 | # Switch to assemblyline user
60 | USER assemblyline
61 | WORKDIR /var/lib/assemblyline
62 | CMD /bin/bash
63 |
--------------------------------------------------------------------------------
/test/test_identify.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 | import pytest
4 |
5 | from cart import unpack_file
6 | from json import loads
7 | from pathlib import Path
8 |
9 | from assemblyline.common import forge
10 |
11 | SAMPLES_LOCATION = os.environ.get("SAMPLES_LOCATION", None)
12 |
13 |
14 | def test_id_file_base():
15 | with forge.get_identify(use_cache=False) as identify:
16 | tests_dir = os.path.dirname(__file__)
17 | id_file_base = "id_file_base"
18 | file_base_dir = os.path.join(tests_dir, id_file_base)
19 | map_file = "id_file_base.json"
20 | map_path = os.path.join(file_base_dir, map_file)
21 | with open(map_path, "r") as f:
22 | contents = f.read()
23 | json_contents = loads(contents)
24 | for _, _, files in os.walk(file_base_dir):
25 | for file_name in files:
26 | if file_name == map_file:
27 | continue
28 |
29 | file_path = os.path.join(file_base_dir, file_name)
30 | data = identify.fileinfo(file_path, generate_hashes=False)
31 | actual_value = data.get("type", "")
32 | expected_value = json_contents[file_name]
33 | assert actual_value == expected_value
34 |
35 |
36 | def get_ids(filepath):
37 | if not isinstance(filepath, (str, bytes, os.PathLike)):
38 | return "skipped"
39 | return "-".join(split_sample(filepath))
40 |
41 |
42 | def split_sample(filepath):
43 | target_file = os.path.join("/tmp", os.path.basename(filepath).rstrip(".cart"))
44 | identify_result = str(filepath.relative_to(Path(SAMPLES_LOCATION)).parent)
45 | return (target_file, identify_result)
46 |
47 |
48 | @pytest.fixture()
49 | def sample(request):
50 | target_file, identify_result = split_sample(request.param)
51 | try:
52 | unpack_file(request.param, target_file)
53 | yield (target_file, identify_result)
54 | finally:
55 | if target_file:
56 | os.unlink(target_file)
57 |
58 |
59 | if SAMPLES_LOCATION:
60 | @pytest.mark.parametrize("sample", Path(SAMPLES_LOCATION).rglob("*.cart"), ids=get_ids, indirect=True)
61 | def test_identify_samples(sample):
62 | with forge.get_identify(use_cache=False) as identify:
63 | assert identify.fileinfo(sample[0], generate_hashes=False)["type"] == sample[1]
64 |
--------------------------------------------------------------------------------
/assemblyline/remote/datatypes/user_quota_tracker.py:
--------------------------------------------------------------------------------
1 | import redis
2 | from assemblyline.remote.datatypes import get_client, retry_call
3 |
4 | begin_script = """
5 | local t = redis.call('time')
6 | local key = tonumber(t[1] .. string.format("%06d", t[2]))
7 |
8 | local name = ARGV[1]
9 | local max = tonumber(ARGV[2])
10 | local timeout = tonumber(ARGV[3] .. "000000")
11 |
12 | redis.call('zremrangebyscore', name, 0, key - timeout)
13 | if redis.call('zcard', name) < max then
14 | redis.call('zadd', name, key, key)
15 | return true
16 | else
17 | return false
18 | end
19 | """
20 |
21 |
22 | class UserQuotaTracker(object):
23 | def __init__(self, prefix, timeout=120, redis=None, host=None, port=None, private=False):
24 | self.c = redis or get_client(host, port, private)
25 | self.bs = self.c.register_script(begin_script)
26 | self.prefix = prefix
27 | self.timeout = timeout
28 |
29 | def _queue_name(self, user):
30 | return f"{self.prefix}-{user}"
31 |
32 | def begin(self, user, max_quota):
33 | try:
34 | return retry_call(self.bs, args=[self._queue_name(user), max_quota, self.timeout]) == 1
35 | except redis.exceptions.ResponseError as er:
36 | # TODO: This is a failsafe for upgrade purposes could be removed in a future version
37 | if "WRONGTYPE" in str(er):
38 | retry_call(self.c.delete, self._queue_name(user))
39 | return retry_call(self.bs, args=[self._queue_name(user), max_quota, self.timeout]) == 1
40 | else:
41 | raise
42 |
43 | def end(self, user):
44 | """When only one item is requested, blocking is is possible."""
45 | try:
46 | retry_call(self.c.zpopmin, self._queue_name(user))
47 | except redis.exceptions.ResponseError as er:
48 | # TODO: This is a failsafe for upgrade purposes could be removed in a future version
49 | if "WRONGTYPE" in str(er):
50 | retry_call(self.c.delete, self._queue_name(user))
51 | retry_call(self.c.zpopmin, self._queue_name(user))
52 | else:
53 | raise
54 |
55 | def reset(self, user):
56 | retry_call(self.c.delete, self._queue_name(user))
57 |
58 | def get_count(self, user):
59 | return retry_call(self.c.zcard, self._queue_name(user))
60 |
--------------------------------------------------------------------------------
/assemblyline/common/codec.py:
--------------------------------------------------------------------------------
1 | import os
2 | import tempfile
3 |
4 | from cart import is_cart, pack_stream, unpack_stream
5 | from cart.cart import _unpack_header
6 | from assemblyline.common.dict_utils import flatten
7 | from assemblyline.common.file import normalize_uri_file
8 |
9 |
10 | def decode_file(original_path, fileinfo, identify):
11 | extracted_path = None
12 | hdr = {}
13 | with open(original_path, 'rb') as original_file:
14 | if is_cart(original_file.read(256)):
15 | original_file.seek(0)
16 |
17 | _, hdr, _ = _unpack_header(original_file)
18 | al_type = flatten(hdr).get('al.type', None)
19 | if not al_type:
20 | original_file.seek(0)
21 |
22 | extracted_fd, extracted_path = tempfile.mkstemp()
23 | extracted_file = os.fdopen(extracted_fd, 'wb')
24 |
25 | cart_extracted = False
26 | try:
27 | hdr, _ = unpack_stream(original_file, extracted_file)
28 | cart_extracted = True
29 |
30 | except Exception:
31 | extracted_path = None
32 | hdr = {}
33 | fileinfo['type'] = 'corrupted/cart'
34 |
35 | finally:
36 | extracted_file.close()
37 |
38 | if cart_extracted and extracted_path:
39 | fileinfo = identify.fileinfo(extracted_path)
40 | elif fileinfo['type'].startswith("uri/"):
41 | dir_path = tempfile.mkdtemp() # This does not get cleaned after execution, like the mkstemp()
42 | extracted_path = normalize_uri_file(dir_path, original_path)
43 | fileinfo = identify.fileinfo(extracted_path)
44 |
45 | return extracted_path, fileinfo, hdr
46 |
47 |
48 | def encode_file(input_path, name, metadata=None):
49 | if metadata is None:
50 | metadata = {}
51 |
52 | _, output_path = tempfile.mkstemp()
53 |
54 | with open(output_path, 'wb') as oh:
55 | with open(input_path, 'rb') as ih:
56 | data = ih.read(64)
57 | if not is_cart(data):
58 | ih.seek(0)
59 | metadata.update({'name': name})
60 | pack_stream(ih, oh, metadata)
61 | return output_path, f"{name}.cart"
62 | else:
63 | return input_path, name
64 |
--------------------------------------------------------------------------------
/assemblyline/odm/models/user_settings.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 | from assemblyline.common import forge
3 | from assemblyline.odm.models.config import SubmissionProfileParams
4 |
5 | Classification = forge.get_classification()
6 |
7 | ENCODINGS = {"cart", "raw", "zip"}
8 | VIEWS = {"report", "details"}
9 |
10 |
11 | @odm.model(index=False, store=False, description="Model of User Settings")
12 | class UserSettings(odm.Model):
13 | download_encoding = odm.Enum(values=ENCODINGS, default="cart",
14 | description="Default download encoding when downloading files")
15 | default_external_sources = odm.List(odm.Keyword(), default=[],
16 | description="List of sha256 sources to check by default")
17 | default_zip_password = odm.Text(
18 | default="infected",
19 | description="Default user-defined password for creating password protected ZIPs when downloading files"
20 | )
21 | default_metadata = odm.Mapping(odm.Text(), default={}, description="Default metadata to add to submissions")
22 | executive_summary = odm.Boolean(default=True, description="Should executive summary sections be shown?")
23 | expand_min_score = odm.Integer(default=500, description="Auto-expand section when score bigger then this")
24 | preferred_submission_profile = odm.Optional(odm.Text(), description="Preferred submission profile")
25 | submission_profiles = odm.Mapping(odm.Compound(SubmissionProfileParams), default={},
26 | description="Default submission profile settings")
27 | submission_view = odm.Enum(values=VIEWS, default="report", description="Default view for completed submissions")
28 |
29 |
30 | DEFAULT_USER_PROFILE_SETTINGS = {
31 | "download_encoding": "cart",
32 | "default_external_sources": [],
33 | "default_zip_password": "infected",
34 | "executive_summary": True,
35 | "expand_min_score": 500,
36 | "submission_view": "report",
37 | "default_metadata": {}
38 | }
39 |
40 | DEFAULT_SUBMISSION_PROFILE_SETTINGS = {
41 | "classification": Classification.UNRESTRICTED,
42 | "deep_scan": False,
43 | "generate_alert": False,
44 | "ignore_cache": False,
45 | "ignore_recursion_prevention": False,
46 | "ignore_filtering": False,
47 | "priority": 1000,
48 | "service_spec": {},
49 | "services": {},
50 | "ttl": 30
51 | }
52 |
--------------------------------------------------------------------------------
/test/test_datasource.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | import pytest
4 |
5 | from assemblyline.datasource.al import AL
6 | from assemblyline.datasource.alert import Alert
7 | from assemblyline.common import forge
8 | from assemblyline.odm.random_data import wipe_alerts, create_alerts, wipe_submissions, create_submission, NullLogger
9 |
10 |
11 | @pytest.fixture(scope="module")
12 | def fs():
13 | return forge.get_filestore()
14 |
15 |
16 | def purge_alert(ds, fs):
17 | wipe_alerts(ds)
18 | wipe_submissions(ds, fs)
19 |
20 |
21 | @pytest.fixture(scope="module")
22 | def datastore(request, datastore_connection, fs):
23 | create_alerts(datastore_connection, alert_count=1)
24 | create_submission(datastore_connection, fs)
25 |
26 | request.addfinalizer(lambda: purge_alert(datastore_connection, fs))
27 | return datastore_connection
28 |
29 |
30 | # noinspection PyUnusedLocal
31 | def test_al_source(datastore):
32 | submission_id = random.choice(datastore.submission.search("id:*", fl="id", rows=1, as_obj=False)['items'])['id']
33 | submission = datastore.submission.get(submission_id)
34 | al_datasource = AL(NullLogger())
35 | resp = al_datasource.query(submission.files[0].sha256, access_control=None)
36 | for res in resp:
37 | score = res['data']['score']
38 | if score >= 2000:
39 | assert res['malicious']
40 | assert res['confirmed']
41 | elif 1000 <= score < 2000:
42 | assert res['malicious']
43 | assert not res['confirmed']
44 | else:
45 | assert not res['malicious']
46 |
47 |
48 | # noinspection PyUnusedLocal
49 | def test_alert_source(datastore):
50 | alert_id = random.choice(datastore.alert.search("id:*", fl="id", rows=1, as_obj=False)['items'])['id']
51 | alert = datastore.alert.get(alert_id)
52 |
53 | alert_datasource = Alert(NullLogger())
54 | resp = alert_datasource.query(alert.file.sha256, access_control=None)
55 | for res in resp:
56 | score = None
57 | for item in res['data']:
58 | if score is None or item['score'] > score:
59 | score = item['score']
60 |
61 | if score >= 2000:
62 | assert res['malicious']
63 | assert res['confirmed']
64 | elif 500 <= score < 2000:
65 | assert res['malicious']
66 | assert not res['confirmed']
67 | else:
68 | assert not res['malicious']
69 |
--------------------------------------------------------------------------------
/assemblyline/datasource/al.py:
--------------------------------------------------------------------------------
1 | from assemblyline.common import forge
2 | from assemblyline.datasource.common import Datasource
3 |
4 | Classification = forge.get_classification()
5 |
6 |
7 | class AL(Datasource):
8 | def __init__(self, log, **kw):
9 | super(AL, self).__init__(log, **kw)
10 | self.datastore = forge.get_datastore()
11 |
12 | def parse(self, results, **kw):
13 | return results
14 |
15 | def query(self, value, **kw):
16 | results = []
17 |
18 | hash_type = self.hash_type(value)
19 |
20 | query = "%s:%s OR %s:%s" % (
21 | hash_type, value.lower(), hash_type, value.upper()
22 | )
23 |
24 | res = self.datastore.file.search(query, rows=5, access_control=kw['access_control'], as_obj=False)
25 |
26 | for r in res['items']:
27 | score = 0
28 | score_map = {}
29 |
30 | res = self.datastore.result.grouped_search("response.service_name", f"id:{r['sha256']}*",
31 | fl="result.score,id", rows=100, sort="created desc",
32 | access_control=kw["access_control"], as_obj=False)
33 |
34 | for group in res['items']:
35 | service_name = group['value']
36 | for doc in group['items']:
37 | score_map[service_name] = doc['result']['score']
38 | score += doc['result']['score']
39 |
40 | result = {
41 | "classification": r['classification'],
42 | "confirmed": score >= 2000 or score < -499,
43 | "data": {
44 | "classification": r['classification'],
45 | "md5": r['md5'],
46 | "sha1": r['sha1'],
47 | "sha256": r['sha256'],
48 | "size": r['size'],
49 | "type": r['type'],
50 | "seen": {
51 | "count": r['seen']['count'],
52 | "last": r['seen']['last']
53 | },
54 | "score": score,
55 | "score_map": score_map
56 | },
57 | "description": "File found in AL with score of %s." % score,
58 | "malicious": score >= 1000,
59 | }
60 |
61 | results.append(result)
62 |
63 | return results
64 |
--------------------------------------------------------------------------------
/assemblyline/common/banner.py:
--------------------------------------------------------------------------------
1 | BANNER = r"""
2 | #########
3 | ###### ##############################
4 | ##### ###################### ###
5 | ##### ###################### ###
6 | ###### ############## ########
7 | ############ ########
8 | ########### --------
9 | ############ /..........\
10 | ############# /..............\
11 | ############# \..../ \..../
12 | ###### ##### |..| |..|
13 | ####### #### |..| |..|
14 | ########### ### |..| |..|
15 | ################# #### \.| |./
16 | ####################### #####
17 | ##############################
18 | #############################
19 | ##########################
20 | ########################
21 | ######################
22 | ###################
23 | .......................
24 | ........................
25 | ..........................
26 | ..........................
27 | ..........................
28 | ..........................
29 | ................................
30 | """
31 |
--------------------------------------------------------------------------------
/test/test_postprocess.py:
--------------------------------------------------------------------------------
1 | import queue
2 | import threading
3 | import http.server
4 | import json
5 |
6 | import pytest
7 | from assemblyline.common.postprocess import ActionWorker, SubmissionFilter
8 |
9 | from assemblyline.odm.models.actions import PostprocessAction, Webhook
10 | from assemblyline.odm.models.submission import Submission
11 | from assemblyline.odm.randomizer import random_minimal_obj
12 |
13 |
14 | @pytest.fixture
15 | def server():
16 | hits = queue.Queue()
17 |
18 | class TestServer(http.server.BaseHTTPRequestHandler):
19 | def do_POST(self):
20 | try:
21 | hits.put(dict(
22 | headers=self.headers,
23 | body=self.rfile.read(int(self.headers.get('Content-Length', '1')))
24 | ))
25 | self.send_response(200, 'data received')
26 | self.end_headers()
27 | except Exception as error:
28 | hits.put(error)
29 |
30 | test_server = http.server.ThreadingHTTPServer(('localhost', 0), TestServer)
31 | thread = threading.Thread(target=test_server.serve_forever, daemon=True)
32 | thread.start()
33 | try:
34 | yield f'http://localhost:{test_server.server_address[1]}', hits
35 | finally:
36 | test_server.shutdown()
37 | thread.join()
38 |
39 |
40 | def test_hook(server, config, datastore_connection, redis_connection):
41 | server_uri, server_hits = server
42 |
43 | action = PostprocessAction(dict(
44 | enabled=True,
45 | run_on_completed=True,
46 | filter="metadata.do_hello: *",
47 | webhook=Webhook(dict(
48 | uri=server_uri,
49 | headers=[dict(name='care-of', value='assemblyline')]
50 | ))
51 | ))
52 |
53 | worker = ActionWorker(cache=False, config=config, datastore=datastore_connection, redis_persist=redis_connection)
54 |
55 | worker.actions = {
56 | 'action': (SubmissionFilter(action.filter), action)
57 | }
58 |
59 | sub: Submission = random_minimal_obj(Submission)
60 | sub.metadata = dict(ok='bad')
61 | worker.process_submission(sub, tags=[])
62 |
63 | sub: Submission = random_minimal_obj(Submission)
64 | sub.metadata = dict(ok='good', do_hello='yes')
65 | worker.process_submission(sub, tags=[])
66 |
67 | obj = server_hits.get(timeout=3)
68 | assert obj['headers']['CARE-OF'] == 'assemblyline'
69 | assert json.loads(obj['body'])['submission']['metadata']['ok'] == 'good'
70 |
71 | assert server_hits.qsize() == 0
72 |
--------------------------------------------------------------------------------
/assemblyline/common/metrics.py:
--------------------------------------------------------------------------------
1 | from assemblyline.common import forge
2 | from assemblyline.odm.messages import PerformanceTimer
3 | from assemblyline.remote.datatypes import get_client
4 | from assemblyline.remote.datatypes.exporting_counter import AutoExportingCounters
5 |
6 | # Which datastore tables have an expiry and we want to monitor how many files are due
7 | # for expiry but still exist.
8 | EXPIRY_METRICS = [
9 | 'alert',
10 | 'badlist',
11 | 'cached_file',
12 | 'emptyresult',
13 | 'error',
14 | 'file',
15 | 'filescore',
16 | 'result',
17 | 'retrohunt_hit',
18 | 'safelist',
19 | 'submission',
20 | 'submission_tree',
21 | 'submission_summary'
22 | ]
23 |
24 |
25 | class MetricsFactory(object):
26 | """A wrapper around what was once, multiple metrics methods.
27 |
28 | Left in place until we decide we are absolutely not switching methods again.
29 | """
30 |
31 | def __init__(self, metrics_type, schema, name=None, redis=None, config=None, export_zero=True):
32 | self.config = config or forge.get_config()
33 | self.redis = redis or get_client(
34 | self.config.core.metrics.redis.host,
35 | self.config.core.metrics.redis.port,
36 | False
37 | )
38 |
39 | # Separate out the timers and normal counters
40 | timer_schema = set()
41 | counter_schema = set()
42 |
43 | for _k, field_type in schema.fields().items():
44 | if isinstance(field_type, PerformanceTimer):
45 | timer_schema.add(_k)
46 | else:
47 | counter_schema.add(_k)
48 |
49 | for _k in timer_schema:
50 | counter_schema.discard(_k + '_count')
51 |
52 | self.type = metrics_type
53 | self.name = name or metrics_type
54 |
55 | # Initialize legacy metrics
56 | self.metrics_handler = AutoExportingCounters(
57 | self.name,
58 | redis=self.redis,
59 | config=self.config,
60 | counter_type=metrics_type,
61 | timer_names=timer_schema,
62 | counter_names=counter_schema,
63 | export_zero=export_zero
64 | )
65 | self.metrics_handler.start()
66 |
67 | def stop(self):
68 | self.metrics_handler.stop()
69 |
70 | def set(self, name, value):
71 | self.metrics_handler.set(name, value)
72 |
73 | def increment(self, name, increment_by=1):
74 | self.metrics_handler.increment(name, increment_by=increment_by)
75 |
76 | def increment_execution_time(self, name, execution_time):
77 | self.metrics_handler.increment_execution_time(name, execution_time)
78 |
--------------------------------------------------------------------------------
/assemblyline/odm/messages/dispatcher_heartbeat.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 | from assemblyline.odm.messages import PerformanceTimer
3 |
4 | MSG_TYPES = {"DispatcherHeartbeat"}
5 | LOADER_CLASS = "assemblyline.odm.messages.dispatcher_heartbeat.DispatcherMessage"
6 |
7 |
8 | @odm.model(description="Queue Model")
9 | class Queues(odm.Model):
10 | ingest = odm.Integer(description="Number of submissions in ingest queue")
11 | start = odm.List(odm.Integer(), description="Number of submissions that started")
12 | result = odm.List(odm.Integer(), description="Number of results in queue")
13 | command = odm.List(odm.Integer(), description="Number of commands in queue")
14 |
15 |
16 | @odm.model(description="Inflight Model")
17 | class Inflight(odm.Model):
18 | max = odm.Integer(description="Maximum number of submissions")
19 | outstanding = odm.Integer(description="Number of outstanding submissions")
20 | per_instance = odm.List(odm.Integer(), description="Number of submissions per Dispatcher instance")
21 |
22 |
23 | @odm.model(description="Metrics Model")
24 | class Metrics(odm.Model):
25 | files_completed = odm.Integer(description="Number of files completed")
26 | submissions_completed = odm.Integer(description="Number of submissions completed")
27 | service_timeouts = odm.Integer(description="Number of service timeouts")
28 | cpu_seconds = PerformanceTimer(description="CPU time")
29 | cpu_seconds_count = odm.Integer(description="CPU count")
30 | busy_seconds = PerformanceTimer(description="Busy CPU time")
31 | busy_seconds_count = odm.Integer(description="Busy CPU count")
32 | save_queue = odm.Integer(description="Processed submissions waiting to be saved")
33 | error_queue = odm.Integer(description="Errors waiting to be saved")
34 |
35 |
36 | @odm.model(description="Heartbeat Model")
37 | class Heartbeat(odm.Model):
38 | inflight = odm.Compound(Inflight, description="Inflight submissions")
39 | instances = odm.Integer(description="Number of instances")
40 | metrics = odm.Compound(Metrics, description="Dispatcher metrics")
41 | queues = odm.Compound(Queues, description="Dispatcher queues")
42 | component = odm.Keyword(description="Component name")
43 |
44 |
45 | @odm.model(description="Model of Dispatcher Heartbeat Messages")
46 | class DispatcherMessage(odm.Model):
47 | msg = odm.Compound(Heartbeat, description="Heartbeat message")
48 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class for message")
49 | msg_type = odm.Enum(values=MSG_TYPES, default="DispatcherHeartbeat", description="Type of message")
50 | sender = odm.Keyword(description="Sender of message")
51 |
--------------------------------------------------------------------------------
/assemblyline/odm/models/ontology/results/sandbox.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 | from assemblyline.common.dict_utils import get_dict_fingerprint_hash, flatten
3 | from assemblyline.odm.models.ontology.results.process import ObjectID
4 |
5 | OID_PARTS = ['sandbox_name', 'sandbox_version', 'analysis_metadata.start_time',
6 | 'analysis_metadata.end_time', 'analysis_metadata.task_id']
7 |
8 |
9 | @odm.model(description="Sandbox Ontology Model")
10 | class Sandbox(odm.Model):
11 | @odm.model(description="The metadata of the analysis, per analysis")
12 | class AnalysisMetadata(odm.Model):
13 | @odm.model(description="The metadata regarding the machine where the analysis took place")
14 | class MachineMetadata(odm.Model):
15 | ip = odm.Optional(odm.IP(), description="The IP of the machine used for analysis")
16 | hypervisor = odm.Optional(odm.Keyword(), description="The hypervisor of the machine used for analysis")
17 | hostname = odm.Optional(odm.Keyword(), description="The name of the machine used for analysis")
18 | platform = odm.Optional(odm.Platform(), description="The platform of the machine used for analysis")
19 | version = odm.Optional(odm.Keyword(),
20 | description="The version of the operating system of the machine used for analysis")
21 | architecture = odm.Optional(odm.Processor(),
22 | description="The architecture of the machine used for analysis")
23 |
24 | task_id = odm.Optional(odm.Keyword(), description="The ID used for identifying the analysis task")
25 | start_time = odm.Date(description="The start time of the analysis")
26 | end_time = odm.Optional(odm.Date(), description="The end time of the analysis")
27 | routing = odm.Optional(odm.Keyword(),
28 | description="The routing used in the sandbox setup (Spoofed, Internet, Tor, VPN)")
29 | machine_metadata = odm.Optional(odm.Compound(MachineMetadata), description="The metadata of the analysis")
30 | window_size = odm.Optional(odm.Keyword(), description="The resolution used for the analysis")
31 |
32 | objectid = odm.Compound(ObjectID, description="The object ID of the sandbox object")
33 |
34 | analysis_metadata = odm.Compound(AnalysisMetadata, description="Metadata for the analysis")
35 | sandbox_name = odm.Keyword(description="The name of the sandbox")
36 | sandbox_version = odm.Optional(odm.Keyword(), description="The version of the sandbox")
37 |
38 | def get_oid(data: dict):
39 | return f"sandbox_{get_dict_fingerprint_hash({key: flatten(data).get(key) for key in OID_PARTS})}"
40 |
41 | def get_tag(data: dict):
42 | return data['sandbox_name']
43 |
--------------------------------------------------------------------------------
/assemblyline/odm/messages/submission.py:
--------------------------------------------------------------------------------
1 | from typing import List, Dict, Optional as Opt
2 | from assemblyline import odm
3 | from assemblyline.odm.models.submission import SubmissionParams, File, Submission as DatabaseSubmission
4 |
5 | MSG_TYPES = {"SubmissionIngested", "SubmissionReceived", "SubmissionStarted", "SubmissionCompleted"}
6 | LOADER_CLASS = "assemblyline.odm.messages.submission.SubmissionMessage"
7 |
8 |
9 | @odm.model(index=True, store=True, description="Notification Model")
10 | class Notification(odm.Model):
11 | queue = odm.Optional(odm.Keyword(), description="Queue to publish the completion message")
12 | threshold = odm.Optional(odm.Integer(), description="Notify only if this score threshold is met")
13 |
14 |
15 | @odm.model(description="Submission Model")
16 | class Submission(odm.Model):
17 | sid = odm.UUID(description="Submission ID to use")
18 | time = odm.Date(default="NOW", description="Message time")
19 | files: List[File] = odm.List(odm.Compound(File), default=[], description="File block")
20 | metadata: Dict[str, str] = odm.FlatMapping(odm.MetadataValue(), default={}, description="Metadata submitted with the file")
21 | notification: Notification = odm.Compound(Notification, default={}, description="Notification queue parameters")
22 | params: SubmissionParams = odm.Compound(SubmissionParams, description="Parameters of the submission")
23 | scan_key: Opt[str] = odm.Optional(odm.Keyword())
24 | file_tree = odm.Any(default={}, description="File tree of the files in this submission")
25 | file_infos = odm.Mapping(odm.Any(), default={}, description="SHA256 and file information in the file.")
26 | errors = odm.List(odm.Keyword(), default=[], description="List of error keys")
27 | results = odm.Mapping(odm.Any(), default={}, description="Result key value mapping")
28 |
29 |
30 | def from_datastore_submission(submission: DatabaseSubmission):
31 | """
32 | A helper to convert between database model version of Submission
33 | and the message version of Submission.
34 | """
35 | return Submission(
36 | {
37 | "sid": submission.sid,
38 | "files": submission.files,
39 | "metadata": submission.metadata,
40 | "params": submission.params,
41 | "scan_key": submission.scan_key,
42 | }
43 | )
44 |
45 |
46 | @odm.model(description="Model of Submission Message")
47 | class SubmissionMessage(odm.Model):
48 | msg = odm.Compound(Submission, description="Body of the message")
49 | msg_loader = odm.Enum(
50 | values={LOADER_CLASS}, default=LOADER_CLASS, description="Class to use to load the message as an object"
51 | ) #
52 | msg_type = odm.Enum(values=MSG_TYPES, description="Type of message")
53 | sender = odm.Keyword(description="Sender of the message")
54 |
--------------------------------------------------------------------------------
/docker/al_management/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.11-slim-bookworm
2 |
3 | # Make sure root account is locked so 'su' commands fail all the time
4 | RUN passwd -l root
5 |
6 | # Upgrade packages
7 | RUN apt-get update && apt-get -yy upgrade && rm -rf /var/lib/apt/lists/*
8 |
9 | # Get required apt packages
10 | RUN apt-get update && apt-get install -yy build-essential libssl-dev libffi-dev libfuzzy-dev libldap2-dev libsasl2-dev libmagic1 && rm -rf /var/lib/apt/lists/*
11 |
12 | # Add assemblyline user
13 | RUN useradd -s /bin/bash -b /var/lib -U -m assemblyline
14 |
15 | # Create assemblyline config directory
16 | RUN mkdir -p /etc/assemblyline
17 | RUN chmod 750 /etc/assemblyline
18 | RUN chown root:assemblyline /etc/assemblyline
19 |
20 | # Create assemblyline cache directory
21 | RUN mkdir -p /var/cache/assemblyline
22 | RUN chmod 770 /var/cache/assemblyline
23 | RUN chown assemblyline:assemblyline /var/cache/assemblyline
24 |
25 | # Create assemblyline home directory
26 | RUN mkdir -p /var/lib/assemblyline
27 | RUN chmod 770 /var/lib/assemblyline
28 | RUN chown assemblyline:assemblyline /var/lib/assemblyline
29 |
30 | # Create assemblyline log directory
31 | RUN mkdir -p /var/log/assemblyline
32 | RUN chmod 770 /var/log/assemblyline
33 | RUN chown assemblyline:assemblyline /var/log/assemblyline
34 |
35 | # Switch to assemblyline user
36 | USER assemblyline
37 |
38 | # Create the assemblyline venv
39 | RUN python -m venv /var/lib/assemblyline/venv
40 |
41 | # Install packages in the venv
42 | RUN /bin/bash -c "source /var/lib/assemblyline/venv/bin/activate && pip install --no-warn-script-location --no-cache-dir --upgrade pip wheel && pip install --no-warn-script-location --no-cache-dir assemblyline assemblyline_core assemblyline_ui assemblyline-client ipython jupyter"
43 |
44 | # Setup venv when bash is launched
45 | RUN echo "source /var/lib/assemblyline/venv/bin/activate" >> /var/lib/assemblyline/.bashrc
46 |
47 | RUN mkdir -p /var/lib/assemblyline/jupyter
48 | RUN mkdir -p /var/lib/assemblyline/.jupyter
49 | RUN touch /var/lib/assemblyline/.jupyter/jupyter_notebook_config.py
50 | RUN echo 'import os' >> /var/lib/assemblyline/.jupyter/jupyter_notebook_config.py
51 | RUN echo 'from jupyter_server.auth import passwd' >> /var/lib/assemblyline/.jupyter/jupyter_notebook_config.py
52 | RUN echo 'c.NotebookApp.password = passwd(os.getenv("NB_PASSWORD", "devpass"))' >> /var/lib/assemblyline/.jupyter/jupyter_notebook_config.py
53 | RUN echo 'c.NotebookApp.allow_remote_access = True' >> /var/lib/assemblyline/.jupyter/jupyter_notebook_config.py
54 | RUN echo 'c.NotebookApp.base_url = "/notebook/"' >> /var/lib/assemblyline/.jupyter/jupyter_notebook_config.py
55 |
56 | WORKDIR /var/lib/assemblyline
57 |
58 | CMD /bin/bash -c "source /var/lib/assemblyline/venv/bin/activate && (cd /var/lib/assemblyline/jupyter && jupyter notebook -y --no-browser --ip=*)"
59 |
--------------------------------------------------------------------------------
/assemblyline/odm/models/retrohunt.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 |
3 |
4 | INDEX_CATAGORIES = [
5 | 'hot',
6 | 'archive',
7 | 'hot_and_archive',
8 | ]
9 |
10 |
11 | @odm.model(index=True, store=True, description="A search run on stored files.")
12 | class Retrohunt(odm.Model):
13 | # Metadata
14 | indices = odm.Enum(INDEX_CATAGORIES, default='hot_and_archive',
15 | description="Defines the indices used for this retrohunt job")
16 | classification = odm.Classification(description="Classification for this retrohunt job")
17 | search_classification = odm.ClassificationString(description="Maximum classification of results in the search")
18 | creator = odm.keyword(copyto="__text__", description="User who created this retrohunt job")
19 | description = odm.Text(copyto="__text__", description="Human readable description of this retrohunt job")
20 | expiry_ts = odm.Optional(odm.Date(store=False), description="Expiry timestamp of this retrohunt job")
21 |
22 | start_group = odm.long(description="Earliest expiry group this search will include")
23 | end_group = odm.long(description="Latest expiry group this search will include")
24 |
25 | created_time = odm.date(description="Start time for the search.")
26 | started_time = odm.date(description="Start time for the search.")
27 | completed_time = odm.Optional(odm.Date(store=False), description="Time that the search ended")
28 |
29 | # Search data
30 | key = odm.keyword(description="Unique code identifying this retrohunt job")
31 | raw_query = odm.keyword(store=False, description="Text of filter query derived from yara signature")
32 | yara_signature = odm.keyword(copyto="__text__", store=False, description="Text of original yara signature run")
33 |
34 | # Completion data
35 | errors = odm.sequence(odm.keyword(store=False), store=False,
36 | description="List of error messages that occured during the search")
37 | warnings = odm.sequence(odm.keyword(store=False), store=False,
38 | description="List of warning messages that occured during the search")
39 | finished = odm.boolean(default=False, description="Boolean that indicates if this retrohunt job is finished")
40 | truncated = odm.boolean(default=False, description="Indicates if the list of hits been truncated at some limit")
41 |
42 |
43 | @odm.model(index=True, store=True, description="A hit encountered during a retrohunt search.")
44 | class RetrohuntHit(odm.Model):
45 | key = odm.keyword(description="Unique code indentifying this hit")
46 | classification = odm.Classification(description="Classification string for the retrohunt job and results list")
47 | sha256 = odm.SHA256()
48 | expiry_ts = odm.Optional(odm.Date(store=False), description="Expiry for this entry.")
49 | search = odm.keyword()
50 |
--------------------------------------------------------------------------------
/docker/al_management/pipeline.Dockerfile:
--------------------------------------------------------------------------------
1 | ARG build_image
2 | FROM $build_image
3 | ARG version
4 | ARG version_tag=${version}
5 |
6 | ENV ASSEMBLYLINE_VERSION=${version}
7 | ENV ASSEMBLYLINE_IMAGE_TAG=${version_tag}
8 |
9 | # Make sure root account is locked so 'su' commands fail all the time
10 | RUN passwd -l root
11 |
12 | # Get required apt packages
13 | RUN apt-get update && apt-get install -yy build-essential libssl-dev libffi-dev libfuzzy-dev libldap2-dev libsasl2-dev libmagic1 && rm -rf /var/lib/apt/lists/*
14 |
15 | # Add assemblyline user
16 | RUN useradd -s /bin/bash -b /var/lib -U -m assemblyline
17 |
18 | # Create assemblyline config directory
19 | RUN mkdir -p /etc/assemblyline
20 | RUN chmod 750 /etc/assemblyline
21 | RUN chown root:assemblyline /etc/assemblyline
22 |
23 | # Create assemblyline cache directory
24 | RUN mkdir -p /var/cache/assemblyline
25 | RUN chmod 770 /var/cache/assemblyline
26 | RUN chown assemblyline:assemblyline /var/cache/assemblyline
27 |
28 | # Create assemblyline home directory
29 | RUN mkdir -p /var/lib/assemblyline
30 | RUN chmod 770 /var/lib/assemblyline
31 | RUN chown assemblyline:assemblyline /var/lib/assemblyline
32 |
33 | # Create assemblyline log directory
34 | RUN mkdir -p /var/log/assemblyline
35 | RUN chmod 770 /var/log/assemblyline
36 | RUN chown assemblyline:assemblyline /var/log/assemblyline
37 |
38 | # Switch to assemblyline user
39 | USER assemblyline
40 |
41 | # Create the assemblyline venv
42 | RUN python -m venv /var/lib/assemblyline/venv
43 |
44 | # Install packages in the venv
45 | COPY setup.py dist* dist/
46 | RUN /bin/bash -c "source /var/lib/assemblyline/venv/bin/activate && pip install --no-cache-dir --upgrade pip wheel && pip install --no-cache-dir -f dist/ assemblyline==$version assemblyline_core==$version assemblyline_ui==$version assemblyline-client ipython jupyter"
47 |
48 | # Setup venv when bash is launched
49 | RUN echo "source /var/lib/assemblyline/venv/bin/activate" >> /var/lib/assemblyline/.bashrc
50 |
51 | RUN mkdir -p /var/lib/assemblyline/jupyter
52 | RUN mkdir -p /var/lib/assemblyline/.jupyter
53 | RUN touch /var/lib/assemblyline/.jupyter/jupyter_notebook_config.py
54 | RUN echo 'import os' >> /var/lib/assemblyline/.jupyter/jupyter_notebook_config.py
55 | RUN echo 'from notebook.auth import passwd' >> /var/lib/assemblyline/.jupyter/jupyter_notebook_config.py
56 | RUN echo 'c.NotebookApp.password = passwd(os.getenv("NB_PASSWORD", "devpass"))' >> /var/lib/assemblyline/.jupyter/jupyter_notebook_config.py
57 | RUN echo 'c.NotebookApp.allow_remote_access = True' >> /var/lib/assemblyline/.jupyter/jupyter_notebook_config.py
58 | RUN echo 'c.NotebookApp.base_url = "/notebook/"' >> /var/lib/assemblyline/.jupyter/jupyter_notebook_config.py
59 |
60 | WORKDIR /var/lib/assemblyline
61 |
62 | CMD /bin/bash -c "source /var/lib/assemblyline/venv/bin/activate && (cd /var/lib/assemblyline/jupyter && jupyter notebook -y --no-browser --ip=*)"
63 |
--------------------------------------------------------------------------------
/assemblyline/odm/models/ontology/results/process.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 | from assemblyline.common.dict_utils import get_dict_fingerprint_hash
3 | from os import environ
4 |
5 | OID_PARTS = ['pid', 'ppid', 'image', 'command_line']
6 |
7 |
8 | @odm.model(description="Details about the characteristics used to identify an object")
9 | class ObjectID(odm.Model):
10 | tag = odm.Text(description="The normalized tag of the object")
11 | ontology_id = odm.Keyword(description="Deterministic identifier of ontology. This value should be able to be "
12 | "replicable between services that have access to similar object details, "
13 | "such that it can be used for relating objects in post-processing.")
14 | service_name = odm.Keyword(default=environ.get('AL_SERVICE_NAME', 'unknown'),
15 | description="Component that generated this section")
16 | guid = odm.Optional(odm.Text(), description="The GUID associated with the object")
17 | treeid = odm.Optional(odm.Text(), description="The hash of the tree ID")
18 | processtree = odm.Optional(odm.Keyword(), description="Human-readable tree ID (concatenation of tags)")
19 | time_observed = odm.Optional(odm.Date(), description="The time at which the object was observed")
20 | session = odm.Optional(odm.Keyword(), description="Unifying session name/ID")
21 |
22 | @odm.model(description="Details about a process")
23 | class Process(odm.Model):
24 | objectid = odm.Compound(ObjectID, description="The object ID of the process object")
25 | image = odm.Text(default="", description="The image of the process")
26 | start_time = odm.Date(description="The time of creation for the process")
27 |
28 | # Parent process details
29 | pobjectid = odm.Optional(odm.Compound(ObjectID), description="The object ID of the parent process object")
30 | pimage = odm.Optional(odm.Text(), description="The image of the parent process that spawned this process")
31 | pcommand_line = odm.Optional(odm.Text(), description="The command line that the parent process ran")
32 | ppid = odm.Optional(odm.Integer(), description="The process ID of the parent process")
33 |
34 | pid = odm.Optional(odm.Integer(), description="The process ID")
35 | command_line = odm.Optional(odm.Text(), description="The command line that the process ran")
36 | end_time = odm.Optional(odm.Date(), description="The time of termination for the process")
37 | integrity_level = odm.Optional(odm.Text(), description="The integrity level of the process")
38 | image_hash = odm.Optional(odm.Text(), description="The hash of the file run")
39 | original_file_name = odm.Optional(odm.Text(), description="The original name of the file")
40 |
41 | def get_oid(data: dict):
42 | return f"process_{get_dict_fingerprint_hash({key: data.get(key) for key in OID_PARTS})}"
43 |
--------------------------------------------------------------------------------
/test/test_metrics.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | import pytest
4 |
5 | from assemblyline.common.metrics import MetricsFactory, PerformanceTimer
6 | from assemblyline import odm
7 | from assemblyline.common import forge
8 | from assemblyline.remote.datatypes.exporting_counter import export_metrics_once
9 |
10 |
11 | @odm.model()
12 | class Metrics(odm.Model):
13 | counter = odm.Integer()
14 | performance_counter = PerformanceTimer()
15 |
16 |
17 | def test_metrics_counter(redis_connection):
18 | source = MetricsFactory('test', Metrics, redis=redis_connection)
19 |
20 | channel = forge.get_metrics_sink(redis_connection)
21 | channel.listen(blocking=False)
22 |
23 | source.increment('counter', 55)
24 | source.increment_execution_time('performance_counter', 6)
25 | source.increment_execution_time('performance_counter', 6)
26 |
27 | start = time.time()
28 | read = {}
29 | for metric_message in channel.listen(blocking=False):
30 | if 'counter' in read and 'performance_counter.t' in read:
31 | break
32 |
33 | if time.time() - start > 30:
34 | pytest.fail()
35 |
36 | if metric_message is None:
37 | time.sleep(0.1)
38 | continue
39 |
40 | if metric_message['type'] == 'test':
41 | for key, value in metric_message.items():
42 | if isinstance(value, (int, float)):
43 | read[key] = read.get(key, 0) + value
44 |
45 | assert read['counter'] == 55
46 | assert read['performance_counter.t'] == 12
47 | assert read['performance_counter.c'] == 2
48 |
49 | source.stop()
50 |
51 |
52 | # FIXME: This particular test is hit-or-miss when actually ran in pipelines
53 | # def test_metrics_export(redis_connection):
54 | # channel = forge.get_metrics_sink(redis_connection)
55 |
56 | # start = time.time()
57 | # read = {}
58 | # sent = False
59 |
60 | # for metric_message in channel.listen(blocking=False):
61 | # if 'counter' in read and 'performance_counter.t' in read:
62 | # break
63 |
64 | # if sent and time.time() - start > 20:
65 | # assert False, read
66 |
67 | # if not sent:
68 | # sent = True
69 | # export_metrics_once('test', Metrics, {'counter': 99, 'performance_counter': 6}, redis=redis_connection)
70 | # # Set the start time to when the metrics should've been exported
71 | # start = time.time()
72 |
73 | # if metric_message is None:
74 | # time.sleep(0.1)
75 | # continue
76 |
77 | # if metric_message['type'] == 'test':
78 | # for key, value in metric_message.items():
79 | # if isinstance(value, (int, float)):
80 | # read[key] = read.get(key, 0) + value
81 |
82 | # assert read['counter'] == 99
83 | # assert read['performance_counter.t'] == 6
84 | # assert read['performance_counter.c'] == 1
85 |
--------------------------------------------------------------------------------
/assemblyline/common/digests.py:
--------------------------------------------------------------------------------
1 | import hashlib
2 | import ssdeep
3 | import tlsh
4 | from typing import Dict
5 |
6 | from assemblyline.common import entropy
7 |
8 | DEFAULT_BLOCKSIZE = 65536
9 |
10 |
11 | # noinspection PyBroadException
12 | def get_digests_for_file(path: str, blocksize: int = DEFAULT_BLOCKSIZE, calculate_entropy: bool = True,
13 | on_first_block=lambda _b, _l, _p: {}, skip_fuzzy_hashes: bool = False) -> Dict:
14 | """ Generate digests for file reading only 'blocksize bytes at a time."""
15 | bc = None
16 | if calculate_entropy:
17 | try:
18 | bc = entropy.BufferedCalculator()
19 | except Exception:
20 | pass
21 |
22 | result = {}
23 |
24 | md5 = hashlib.md5()
25 | sha1 = hashlib.sha1()
26 | sha256 = hashlib.sha256()
27 | if not skip_fuzzy_hashes:
28 | th = tlsh.Tlsh()
29 | size = 0
30 |
31 | with open(path, 'rb') as f:
32 | data = f.read(blocksize)
33 | length = len(data)
34 |
35 | if not size:
36 | result.update(on_first_block(data, length, path))
37 |
38 | while length > 0:
39 | if bc is not None:
40 | bc.update(data, length)
41 | md5.update(data)
42 | sha1.update(data)
43 | sha256.update(data)
44 | if not skip_fuzzy_hashes:
45 | th.update(data)
46 | size += length
47 |
48 | data = f.read(blocksize)
49 | length = len(data)
50 |
51 | if bc is not None:
52 | result['entropy'] = bc.entropy()
53 | else:
54 | result['entropy'] = 0
55 | result['md5'] = md5.hexdigest()
56 | result['sha1'] = sha1.hexdigest()
57 | result['sha256'] = sha256.hexdigest()
58 | result['size'] = size
59 |
60 | if not skip_fuzzy_hashes:
61 | result["ssdeep"] = ssdeep.hash_from_file(path)
62 | # Try to finalise the TLSH Hash and add it to the results
63 | try:
64 | th.final()
65 | result['tlsh'] = th.hexdigest()
66 | except Exception:
67 | pass
68 |
69 | return result
70 |
71 |
72 | def get_md5_for_file(path: str, blocksize: int = DEFAULT_BLOCKSIZE) -> str:
73 | md5 = hashlib.md5()
74 | with open(path, 'rb') as f:
75 | data = f.read(blocksize)
76 | length = len(data)
77 |
78 | while length > 0:
79 | md5.update(data)
80 | data = f.read(blocksize)
81 | length = len(data)
82 |
83 | return md5.hexdigest()
84 |
85 |
86 | def get_sha256_for_file(path: str, blocksize: int = DEFAULT_BLOCKSIZE) -> str:
87 | sha256 = hashlib.sha256()
88 | with open(path, 'rb') as f:
89 | data = f.read(blocksize)
90 | length = len(data)
91 |
92 | while length > 0:
93 | sha256.update(data)
94 | data = f.read(blocksize)
95 | length = len(data)
96 |
97 | return sha256.hexdigest()
98 |
--------------------------------------------------------------------------------
/assemblyline/common/entropy.py:
--------------------------------------------------------------------------------
1 | import io
2 |
3 | from math import log
4 | from typing import Tuple, List, BinaryIO, AnyStr
5 |
6 | frequency = None
7 |
8 | # The minimum partition size should be 256 bytes as the keyspace
9 | # for a char is 256 bytes
10 | MIN_PARTITION_SIZE = 256
11 |
12 |
13 | def calculate_entropy(contents: bytes) -> float:
14 | """ this function calculates the entropy of the file
15 | It is given by the formula:
16 | E = -SUM[v in 0..255](p(v) * ln(p(v)))
17 | """
18 | calculator = BufferedCalculator()
19 | calculator.update(contents)
20 | return calculator.entropy()
21 |
22 |
23 | def calculate_partition_entropy(fin: BinaryIO, num_partitions: int = 50) -> Tuple[float, List[float]]:
24 | """Calculate the entropy of a file and its partitions."""
25 |
26 | # Split input into num_parititions and calculate
27 | # parition entropy.
28 | fin.seek(0, io.SEEK_END)
29 | size = fin.tell()
30 | fin.seek(0)
31 |
32 | if size == 0:
33 | return 0, [0]
34 |
35 | # Calculate the partition size to get the desired amount of partitions but make sure those
36 | # partitions are the minimum partition size
37 | partition_size = max((size - 1)//num_partitions + 1, MIN_PARTITION_SIZE)
38 |
39 | # If our calculated partition size is the minimum partition size, our files is likely too small we will
40 | # calculate an alternate partition size that will make sure all blocks are equal size
41 | if partition_size == MIN_PARTITION_SIZE:
42 | partition_size = (size-1) // ((size-1)//partition_size + 1) + 1
43 |
44 | # Also calculate full file entropy using buffered calculator.
45 | p_entropies = []
46 | full_entropy_calculator = BufferedCalculator()
47 | for _ in range(num_partitions):
48 | partition = fin.read(partition_size)
49 | if not partition:
50 | break
51 | p_entropies.append(calculate_entropy(partition))
52 | full_entropy_calculator.update(partition)
53 | return full_entropy_calculator.entropy(), p_entropies
54 |
55 |
56 | class BufferedCalculator(object):
57 | def __init__(self):
58 | global frequency
59 | import pyximport
60 | pyximport.install()
61 | # noinspection PyUnresolvedReferences
62 | from assemblyline.common import frequency
63 |
64 | self.c = {}
65 | self.length = 0
66 |
67 | def entropy(self) -> float:
68 | if self.length == 0:
69 | return 0.0
70 |
71 | length = float(self.length)
72 |
73 | entropy = 0.0
74 | for v in self.c.values():
75 | prob = float(v) / length
76 | entropy += prob * log(prob, 2)
77 |
78 | entropy *= -1
79 |
80 | # Make sure we don't return -0.0.
81 | if not entropy:
82 | entropy = 0.0
83 |
84 | return entropy
85 |
86 | def update(self, data: AnyStr, length: int = 0):
87 | if not length:
88 | length = len(data)
89 |
90 | self.length += length
91 | self.c = frequency.counts(data, length, self.c)
92 |
--------------------------------------------------------------------------------
/assemblyline/filestore/transport/base.py:
--------------------------------------------------------------------------------
1 | from typing import AnyStr, Iterable, Optional
2 |
3 | from assemblyline.common.exceptions import ChainException
4 |
5 |
6 | def normalize_srl_path(srl):
7 | if '/' in srl:
8 | return srl
9 |
10 | return '{0}/{1}/{2}/{3}/{4}'.format(srl[0], srl[1], srl[2], srl[3], srl)
11 |
12 |
13 | class TransportException(ChainException):
14 | """
15 | FileTransport exception base class.
16 |
17 | TransportException is a subclass of ChainException so that it can be
18 | used with the Chain and ChainAll decorators.
19 | """
20 | pass
21 |
22 |
23 | class Transport(object):
24 | """
25 | FileTransport base class.
26 |
27 | - Subclasses should override all methods.
28 | - Except as noted, FileTransport methods do not return value and raise
29 | - TransportException on failure.
30 | - Methods should only raise TransportExceptions. (The decorators
31 | Chain and ChainAll can be applied to a function/method and class,
32 | respectively, to ensure that any exceptions raised are converted to
33 | TransportExceptions.
34 | """
35 |
36 | def __init__(self, normalize=normalize_srl_path):
37 | self.normalize = normalize
38 |
39 | def close(self):
40 | pass
41 |
42 | def delete(self, path: str):
43 | """
44 | Deletes the file.
45 | """
46 | raise TransportException("Not Implemented")
47 |
48 | def exists(self, path: str) -> bool:
49 | """
50 | Returns True if the path exists, False otherwise.
51 | Should work with both files and directories.
52 | """
53 | raise TransportException("Not Implemented")
54 |
55 | def makedirs(self, path: str):
56 | """
57 | Like os.makedirs the super-mkdir, create the leaf directory path and
58 | any intermediate path segments.
59 | """
60 | raise TransportException("Not Implemented")
61 |
62 | # File based functions
63 | def download(self, src_path: str, dst_path: str):
64 | """
65 | Copies the content of the filestore src_path to the local dst_path.
66 | """
67 | raise TransportException("Not Implemented")
68 |
69 | def upload(self, src_path: str, dst_path: str):
70 | """
71 | Save upload source file src_path to to the filesotre dst_path, overwriting dst_path if it already exists.
72 | """
73 | raise TransportException("Not Implemented")
74 |
75 | # Buffer based functions
76 | def get(self, path: str) -> bytes:
77 | """
78 | Returns the content of the file.
79 | """
80 | raise TransportException("Not Implemented")
81 |
82 | def put(self, dst_path: str, content: AnyStr):
83 | """
84 | Put the content of the file in memory directly to the filestore dst_path
85 | """
86 | raise TransportException("Not Implemented")
87 |
88 | def list(self, prefix: Optional[str] = None) -> Iterable[str]:
89 | """List all files in the store filtered by name prefix."""
90 | raise NotImplementedError()
91 |
--------------------------------------------------------------------------------
/assemblyline/odm/models/ontology/results/signature.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 | from assemblyline.common.dict_utils import get_dict_fingerprint_hash
3 | from assemblyline.odm.models.result import Attack
4 | from assemblyline.odm.models.ontology.results.process import ObjectID
5 |
6 | OID_PARTS = ['name', 'type']
7 | TAG_PARTS = ['type', 'name']
8 |
9 |
10 | @odm.model(description="Attribute relating to the signature that was raised during the analysis of the task")
11 | class Attribute(odm.Model):
12 | source = odm.Compound(ObjectID, description="Object that the rule triggered on")
13 | target = odm.Optional(odm.Compound(ObjectID), description="Object targetted by source object")
14 | action = odm.Optional(odm.Enum(values= # Process actions
15 | ['clipboard_capture', 'create_remote_thread', 'create_stream_hash', 'dns_query', 'driver_loaded',
16 | 'file_change', 'file_creation', 'file_delete', 'image_loaded', 'network_connection', 'network_connection_linux',
17 | 'pipe_created', 'process_access', 'process_creation', 'process_creation_linux', 'process_tampering',
18 | 'process_terminated', 'raw_access_thread', 'registry_add', 'registry_delete', 'registry_event', 'registry_rename',
19 | 'registry_set', 'sysmon_error', 'sysmon_status', 'wmi_event'],
20 | description="The relation between the source and target"))
21 | meta = odm.Optional(odm.Text(), description="Metadata about the detection")
22 | event_record_id = odm.Optional(odm.Text(), description="Event Record ID (Event Logs)")
23 | domain = odm.Optional(odm.Domain(), description="Domain")
24 | uri = odm.Optional(odm.URI(), description="URI")
25 | file_hash = odm.Optional(odm.SHA256(), description="SHA256 of file")
26 |
27 |
28 | @ odm.model(index=False, store=False, description="A signature that was raised during the analysis of the task")
29 | class Signature(odm.Model):
30 | objectid = odm.Compound(ObjectID, description="The object ID of the signature object")
31 |
32 | name = odm.Keyword(description="The name of the signature")
33 | type = odm.Enum(values=['CUCKOO', 'YARA', 'SIGMA', 'SURICATA'], description="Type of signature")
34 | classification = odm.ClassificationString(description="Classification of signature")
35 | attributes = odm.Optional(odm.List(odm.Compound(Attribute)), description="Attributes about the signature")
36 | attacks = odm.Optional(odm.List(odm.Compound(Attack)),
37 | description="A list of ATT&CK patterns and categories of the signature")
38 | actors = odm.Optional(odm.List(odm.Text()), description="List of actors of the signature")
39 | malware_families = odm.Optional(odm.List(odm.Text()), description="List of malware families of the signature")
40 | signature_id = odm.Optional(odm.Text(), description="ID of signature")
41 |
42 | def get_oid(data: dict):
43 | return f"signature_{get_dict_fingerprint_hash({key: data.get(key) for key in OID_PARTS})}"
44 |
45 | def get_tag(data: dict):
46 | return '.'.join([data.get(key) for key in TAG_PARTS])
47 |
--------------------------------------------------------------------------------
/assemblyline/datastore/bulk.py:
--------------------------------------------------------------------------------
1 | import json
2 | import typing
3 | from copy import deepcopy
4 |
5 |
6 | class ElasticBulkPlan(object):
7 | def __init__(self, indexes: typing.List[str], model: typing.Optional[type] = None):
8 | self.indexes = indexes
9 | self.model = model
10 | self.operations: typing.List[str] = []
11 |
12 | @property
13 | def empty(self):
14 | return len(self.operations) == 0
15 |
16 | def add_delete_operation(self, doc_id, index=None):
17 | if index:
18 | self.operations.append(json.dumps({"delete": {"_index": index, "_id": doc_id}}))
19 | else:
20 | for cur_index in self.indexes:
21 | self.operations.append(json.dumps({"delete": {"_index": cur_index, "_id": doc_id}}))
22 |
23 | def add_insert_operation(self, doc_id, doc, index=None):
24 | if self.model and isinstance(doc, self.model):
25 | saved_doc = doc.as_primitives(hidden_fields=True)
26 | elif self.model:
27 | saved_doc = self.model(doc).as_primitives(hidden_fields=True)
28 | else:
29 | if not isinstance(doc, dict):
30 | saved_doc = {'__non_doc_raw__': doc}
31 | else:
32 | saved_doc = deepcopy(doc)
33 | saved_doc['id'] = doc_id
34 |
35 | self.operations.append(json.dumps({"create": {"_index": index or self.indexes[0], "_id": doc_id}}))
36 | self.operations.append(json.dumps(saved_doc))
37 |
38 | def add_upsert_operation(self, doc_id, doc, index=None):
39 | if self.model and isinstance(doc, self.model):
40 | saved_doc = doc.as_primitives(hidden_fields=True)
41 | elif self.model:
42 | saved_doc = self.model(doc).as_primitives(hidden_fields=True)
43 | else:
44 | if not isinstance(doc, dict):
45 | saved_doc = {'__non_doc_raw__': doc}
46 | else:
47 | saved_doc = deepcopy(doc)
48 | saved_doc['id'] = doc_id
49 |
50 | self.operations.append(json.dumps({"update": {"_index": index or self.indexes[0], "_id": doc_id}}))
51 | self.operations.append(json.dumps({"doc": saved_doc, "doc_as_upsert": True}))
52 |
53 | def add_update_operation(self, doc_id, doc, index=None):
54 |
55 | if self.model and isinstance(doc, self.model):
56 | saved_doc = doc.as_primitives(hidden_fields=True)
57 | elif self.model:
58 | saved_doc = self.model(doc, mask=list(doc.keys())).as_primitives(hidden_fields=True)
59 | else:
60 | if not isinstance(doc, dict):
61 | saved_doc = {'__non_doc_raw__': doc}
62 | else:
63 | saved_doc = deepcopy(doc)
64 |
65 | if index:
66 | self.operations.append(json.dumps({"update": {"_index": index, "_id": doc_id}}))
67 | self.operations.append(json.dumps({"doc": saved_doc}))
68 | else:
69 | for cur_index in self.indexes:
70 | self.operations.append(json.dumps({"update": {"_index": cur_index, "_id": doc_id}}))
71 | self.operations.append(json.dumps({"doc": saved_doc}))
72 |
73 | def get_plan_data(self):
74 | return "\n".join(self.operations)
75 |
--------------------------------------------------------------------------------
/assemblyline/common/constants.py:
--------------------------------------------------------------------------------
1 | import enum
2 | import os
3 | from assemblyline.common.path import modulepath
4 |
5 |
6 | SUBMISSION_QUEUE = 'dispatch-submission-queue'
7 | DISPATCH_TASK_HASH = 'dispatch-active-submissions'
8 | DISPATCH_RUNNING_TASK_HASH = 'dispatch-active-tasks'
9 | SCALER_TIMEOUT_QUEUE = 'scaler-timeout-queue'
10 | CONFIG_HASH = 'al-config'
11 | POST_PROCESS_CONFIG_KEY = 'post-process-actions'
12 |
13 |
14 | # Some pure functions for getting queue lengths (effectively for appending/prepending constants to strings)
15 | def service_queue_name(service: str) -> str:
16 | """Take the name of a service, and provide the queue name to send tasks to that service."""
17 | return 'service-queue-' + service
18 |
19 |
20 | def make_watcher_list_name(sid: str) -> str:
21 | """Get the name of the list dispatcher will pull for sending out submission events."""
22 | return 'dispatch-watcher-list-' + sid
23 |
24 |
25 | def get_temporary_submission_data_name(sid: str, file_hash: str) -> str:
26 | """The HashMap used for tracking auxiliary processing data."""
27 | return '/'.join((sid, file_hash, 'temp_data'))
28 |
29 |
30 | def get_tag_set_name(sid: str, file_hash: str) -> str:
31 | """The HashSet used to track the tags for an in-process file."""
32 | return '/'.join((sid, file_hash, 'tags'))
33 |
34 |
35 | # A table storing information about the state of a service, expected type is ExpiringHash
36 | # with a default ttl of None, and the ttl set per field based on the timeouts of queries
37 | # and service operation
38 | class ServiceStatus(enum.IntEnum):
39 | Idle = 0
40 | Running = 1
41 |
42 |
43 | SERVICE_STATE_HASH = 'service-stasis-table'
44 |
45 | # A null empty accepts, accepts all. A null rejects, rejects nothing
46 | DEFAULT_SERVICE_ACCEPTS = ".*"
47 | DEFAULT_SERVICE_REJECTS = "empty|metadata/.*"
48 |
49 | # Priority used to drop tasks in the ingester
50 | DROP_PRIORITY = 0
51 |
52 | # Maximum priority that can be assigned to a submission
53 | MAX_PRIORITY = 1500
54 |
55 | # Queue priority values for each bucket in the ingester
56 | PRIORITIES = {
57 | 'low': 100, # 1 -> 100
58 | 'medium': 200, # 101 -> 200
59 | 'high': 300, # 201 -> 300
60 | 'critical': 400, # 301 -> 400
61 | 'user-low': 500, # 401 -> 500
62 | 'user-medium': 1000, # 501 -> 1000
63 | 'user-high': MAX_PRIORITY # 1001 -> 1500
64 | }
65 |
66 |
67 | # The above priority values presented as a range for consistency
68 | PRIORITY_RANGES = {}
69 | _start = DROP_PRIORITY
70 | for _end, _level in sorted((val, key) for key, val in PRIORITIES.items()):
71 | PRIORITY_RANGES[_level] = (_start + 1, _end)
72 | _start = _end
73 |
74 |
75 | # Score thresholds for determining which queue priority a reingested item
76 | # gets based on its previous score.
77 | # eg.: item with a previous score of 99 will get 'low' priority
78 | # item with a previous score of 300 will get a 'high' priority
79 | PRIORITY_THRESHOLDS = {
80 | 'critical': 500,
81 | 'high': 100,
82 | }
83 |
84 | MAGIC_RULE_PATH = os.path.join(modulepath(__name__), 'custom.magic')
85 | YARA_RULE_PATH = os.path.join(modulepath(__name__), 'custom.yara')
86 |
87 | MAX_INT = 2_147_483_647
88 |
--------------------------------------------------------------------------------
/test/test_regexes.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | import pytest
4 | from assemblyline.odm.base import FULL_URI, TLSH_REGEX, UNC_PATH_REGEX
5 |
6 | FULL_URI_COMP = re.compile(FULL_URI)
7 | TLSH_REGEX_COMP = re.compile(TLSH_REGEX)
8 | UNC_PATH_COMP = re.compile(UNC_PATH_REGEX)
9 |
10 |
11 | @pytest.mark.parametrize("value, ismatch", [
12 | ("blah", False),
13 | ("http://blah", False),
14 | ("http://blah.com", True),
15 | ("http://blah.com:abc", False),
16 | ("http://blah.com:123", True),
17 | ("http://blah.com:123?blah", True),
18 | ("http://blah.com:123/blah", True),
19 | ("http://blah.com:123/blah?blah", True),
20 | ("1.1.1.1", False),
21 | ("http://1.1.1.1", True),
22 | ("http://1.1.1.1:123", True),
23 | ("http://1.1.1.1:123/blah", True),
24 | ("http://1.1.1.1:123/blah?blah", True),
25 | ("net.tcp://1.1.1.1:123", True),
26 | ("net.tcp://1.1.1.1:1", True),
27 | # URI requires a scheme: https://en.wikipedia.org/wiki/Uniform_Resource_Identifier#scheme
28 | ("//1.1.1.1:1", False),
29 | # Scheme must start with A-Z: https://datatracker.ietf.org/doc/html/rfc3986#section-3.1
30 | ("7://site.com:8080/stuff", False),
31 | ("9http://1.1.1.1/stuff", False),
32 | (".jpg-ohttp://1.1.1.1/", False),
33 | (".://site.com/?e=stuff", False),
34 | ("-urihttps://site.com/", False),
35 | ("+://site.com/", False),
36 | ])
37 | def test_full_uri_regex(value, ismatch):
38 | if ismatch:
39 | assert FULL_URI_COMP.match(value) is not None
40 | else:
41 | assert FULL_URI_COMP.match(value) is None
42 |
43 |
44 | @pytest.mark.parametrize(("value", "expected"), [
45 | ("https://example.com/@this/is/a/path", "example.com"),
46 | ("https://example.com?@query", "example.com"),
47 | ("https://example.com#@fragment", "example.com"),
48 | ])
49 | def test_full_uri_capture(value, expected):
50 | assert FULL_URI_COMP.match(value).group(2) == expected
51 |
52 |
53 | @pytest.mark.parametrize("value, ismatch", [
54 | ("T1A0F4F19BB9A15CDED5F2937AC6B293A35221FF23A357462F1498270D69202C8EA4D36F", True),
55 | ("abcdef01234567899876543210fedcba", False),
56 | ("A034F19BB7A15CDED5F2037AC6B293A35221FF23A357462F1498270D69202C8EA4D36F", True),
57 | ("034F1/9BB7A15CDED5F2037AC6B293A35221FF23A357462F1498270D69202C8EA4D36F", False),
58 | ("T1A034F19BB7A15CDEZ5F2037AC6B293A35221FF23A357462F1498270D69202C8EA4D36F", False),
59 | ("T1A034F19BB7A15CDED5F2037AC6B293A35221FF23A357462F1498270D69202C8EA4D36F2", False),
60 | ("T1a0f4f19bb9a15cded5f2937ac6b293a35221ff23a357462f1498270d69202c8ea4d36f", True),
61 | ("T1a0f4f19bb9a15cdED5F2937AC6B293A35221FF23A357462f1498270d69202c8ea4d36f", True),
62 | ("", False),
63 | ])
64 | def test_tlsh_regex(value, ismatch):
65 | if ismatch:
66 | assert TLSH_REGEX_COMP.match(value) is not None
67 | else:
68 | assert TLSH_REGEX_COMP.match(value) is None
69 |
70 |
71 | @pytest.mark.parametrize(("value", "is_match"), [
72 | (R"\\domain-segment-that-is-long.trycloudflare.com@SSL\DavWWWRoot\4ABCDEFGI", True),
73 | (R"\\127.0.0.1\c$\temp\test-file.txt", True),
74 | (R"\temp\test-file.txt", False),
75 | ])
76 | def test_unc_path_regex(value, is_match):
77 | assert is_match == bool(UNC_PATH_COMP.match(value))
78 |
79 |
--------------------------------------------------------------------------------
/docker/nginx-ssl-frontend/minimal.template:
--------------------------------------------------------------------------------
1 | error_log ${ERROR_LOG} ${ERROR_LEVEL};
2 |
3 | server {
4 | server_name ${FQDN};
5 | listen 443 ssl;
6 | charset utf-8;
7 | client_max_body_size ${MAX_BODY_SIZE};
8 |
9 | ssl_session_cache shared:SSL:20m;
10 | ssl_session_timeout 60m;
11 | ssl_prefer_server_ciphers on;
12 | ssl_ciphers ECDH+AESGCM:ECDH+AES256:ECDH+AES128:DHE+AES128:!ADH:!AECDH:!MD5;
13 | ssl_protocols TLSv1 TLSv1.1 TLSv1.2;
14 |
15 | ssl_certificate /etc/ssl/nginx.crt;
16 | ssl_certificate_key /etc/ssl/nginx.key;
17 |
18 | server_tokens off;
19 |
20 | access_log ${ACCESS_LOG};
21 |
22 | proxy_read_timeout ${READ_TIMEOUT};
23 | proxy_connect_timeout ${CONNECT_TIMEOUT};
24 | proxy_send_timeout ${SEND_TIMEOUT};
25 |
26 | location / {
27 | try_files ${DOLLAR}uri @frontend;
28 | }
29 |
30 | location @frontend {
31 | add_header X-Frame-Options SAMEORIGIN;
32 | add_header Strict-Transport-Security "max-age=31536000; includeSubDomains";
33 |
34 | proxy_set_header X-Forwarded-For ${DOLLAR}proxy_add_x_forwarded_for;
35 | proxy_set_header X-Remote-Cert-Verified ${DOLLAR}ssl_client_verify;
36 | proxy_set_header X-Remote-DN ${DOLLAR}ssl_client_s_dn;
37 | proxy_set_header Host ${DOLLAR}http_host;
38 | proxy_set_header Scheme ${DOLLAR}scheme;
39 | proxy_set_header Server-Port ${DOLLAR}server_port;
40 |
41 | proxy_pass http://${FRONTEND_HOST}:3000;
42 | }
43 |
44 | location /socket.io/ {
45 | add_header Strict-Transport-Security "max-age=31536000; includeSubDomains";
46 |
47 | proxy_set_header X-Remote-User ${DOLLAR}remote_user;
48 | proxy_set_header X-Forwarded-For ${DOLLAR}proxy_add_x_forwarded_for;
49 | proxy_set_header Host ${DOLLAR}http_host;
50 | proxy_redirect off;
51 | proxy_buffering off;
52 | proxy_http_version 1.1;
53 | proxy_set_header Upgrade ${DOLLAR}http_upgrade;
54 | proxy_set_header Connection "upgrade";
55 |
56 | proxy_pass http://${SOCKET_HOST}:5002;
57 | }
58 |
59 | location /api/ {
60 | add_header X-Frame-Options SAMEORIGIN;
61 | add_header Strict-Transport-Security "max-age=31536000; includeSubDomains";
62 |
63 | proxy_set_header X-Forwarded-For ${DOLLAR}proxy_add_x_forwarded_for;
64 | proxy_set_header X-Remote-Cert-Verified ${DOLLAR}ssl_client_verify;
65 | proxy_set_header X-Remote-DN ${DOLLAR}ssl_client_s_dn;
66 | proxy_set_header Host ${DOLLAR}http_host;
67 | proxy_set_header Scheme ${DOLLAR}scheme;
68 | proxy_set_header Server-Port ${DOLLAR}server_port;
69 |
70 | proxy_pass http://${UI_HOST}:5000;
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/assemblyline/remote/datatypes/set.py:
--------------------------------------------------------------------------------
1 | import json
2 | import time
3 |
4 | from assemblyline.remote.datatypes import get_client, retry_call
5 |
6 | _drop_card_script = """
7 | local set_name = ARGV[1]
8 | local key = ARGV[2]
9 |
10 | redis.call('srem', set_name, key)
11 | return redis.call('scard', set_name)
12 | """
13 |
14 | _limited_add = """
15 | local set_name = KEYS[1]
16 | local key = ARGV[1]
17 | local limit = tonumber(ARGV[2])
18 |
19 | if redis.call('scard', set_name) < limit then
20 | redis.call('sadd', set_name, key)
21 | return true
22 | end
23 | return false
24 | """
25 |
26 |
27 | class Set(object):
28 | def __init__(self, name, host=None, port=None):
29 | self.c = get_client(host, port, False)
30 | self.name = name
31 | self._drop_card = self.c.register_script(_drop_card_script)
32 | self._limited_add = self.c.register_script(_limited_add)
33 |
34 | def __enter__(self):
35 | return self
36 |
37 | def __exit__(self, exc_type, exc_val, exc_tb):
38 | self.delete()
39 |
40 | def add(self, *values):
41 | return retry_call(self.c.sadd, self.name,
42 | *[json.dumps(v) for v in values])
43 |
44 | def limited_add(self, value, size_limit):
45 | """Add a single value to the set, but only if that wouldn't make the set grow past a given size."""
46 | return retry_call(self._limited_add, keys=[self.name], args=[json.dumps(value), size_limit])
47 |
48 | def exist(self, value):
49 | return retry_call(self.c.sismember, self.name, json.dumps(value))
50 |
51 | def length(self):
52 | return retry_call(self.c.scard, self.name)
53 |
54 | def members(self):
55 | return [json.loads(s) for s in retry_call(self.c.smembers, self.name)]
56 |
57 | def remove(self, *values):
58 | return retry_call(self.c.srem, self.name,
59 | *[json.dumps(v) for v in values])
60 |
61 | def drop(self, value):
62 | return retry_call(self._drop_card, args=[value])
63 |
64 | def random(self, num=None):
65 | ret_val = retry_call(self.c.srandmember, self.name, num)
66 | if isinstance(ret_val, list):
67 | return [json.loads(s) for s in ret_val]
68 | else:
69 | return json.loads(ret_val)
70 |
71 | def pop(self):
72 | data = retry_call(self.c.spop, self.name)
73 | return json.loads(data) if data else None
74 |
75 | def pop_all(self):
76 | return [json.loads(s) for s in retry_call(self.c.spop, self.name, self.length())]
77 |
78 | def delete(self):
79 | retry_call(self.c.delete, self.name)
80 |
81 |
82 | class ExpiringSet(Set):
83 | def __init__(self, name, ttl=86400, host=None, port=None):
84 | super(ExpiringSet, self).__init__(name, host, port)
85 | self.ttl = ttl
86 | self.last_expire_time = 0
87 |
88 | def _conditional_expire(self):
89 | if self.ttl:
90 | ctime = time.time()
91 | if ctime > self.last_expire_time + (self.ttl / 2):
92 | retry_call(self.c.expire, self.name, self.ttl)
93 | self.last_expire_time = ctime
94 |
95 | def add(self, *values):
96 | rval = super(ExpiringSet, self).add(*values)
97 | self._conditional_expire()
98 | return rval
99 |
--------------------------------------------------------------------------------
/assemblyline/odm/models/safelist.py:
--------------------------------------------------------------------------------
1 | from assemblyline import odm
2 | from assemblyline.common import forge
3 |
4 | Classification = forge.get_classification()
5 | SAFEHASH_TYPES = ["file", "tag", "signature"]
6 | SOURCE_TYPES = ["user", "external"]
7 |
8 |
9 | @odm.model(index=True, store=True, description="Hashes of a safelisted file")
10 | class Hashes(odm.Model):
11 | md5 = odm.Optional(odm.MD5(copyto="__text__"), description="MD5")
12 | sha1 = odm.Optional(odm.SHA1(copyto="__text__"), description="SHA1")
13 | sha256 = odm.Optional(odm.SHA256(copyto="__text__"), description="SHA256")
14 |
15 |
16 | @odm.model(index=True, store=False, description="File Details")
17 | class File(odm.Model):
18 | name = odm.List(odm.Keyword(store=True, copyto="__text__"), default=[],
19 | description="List of names seen for that file")
20 | size = odm.Optional(odm.long(), description="Size of the file in bytes")
21 | type = odm.Optional(odm.Keyword(), description="Type of file as identified by Assemblyline")
22 |
23 |
24 | @odm.model(index=True, store=False, description="Safelist source")
25 | class Source(odm.Model):
26 | classification = odm.Classification(default=Classification.UNRESTRICTED,
27 | description="Classification of the source")
28 | name = odm.Keyword(store=True, description="Name of the source")
29 | reason = odm.List(odm.Keyword(), description="Reason for why file was safelisted")
30 | type = odm.Enum(values=SOURCE_TYPES, description="Type of safelisting source")
31 |
32 |
33 | @odm.model(index=True, store=True, description="Tag associated to file")
34 | class Tag(odm.Model):
35 | type = odm.Keyword(description="Tag type")
36 | value = odm.Keyword(copyto="__text__", description="Tag value")
37 |
38 |
39 | @odm.model(index=True, store=True, description="Signature")
40 | class Signature(odm.Model):
41 | name = odm.Keyword(copyto="__text__", description="Name of the signature")
42 |
43 |
44 | @odm.model(index=True, store=True, description="Safelist Model")
45 | class Safelist(odm.Model):
46 |
47 | added = odm.Date(default="NOW", description="Date when the safelisted hash was added")
48 | classification = odm.Classification(description="Computed max classification for the safe hash")
49 | enabled = odm.Boolean(default=True, description="Is safe hash enabled or not?")
50 | expiry_ts = odm.Optional(odm.Date(), description="When does this item expire from the list?")
51 | hashes = odm.Compound(Hashes, default={}, description="List of hashes related to the safe hash")
52 | file = odm.Optional(odm.Compound(File), description="Information about the file")
53 | sources = odm.List(odm.Compound(Source), description="List of reasons why hash is safelisted")
54 | tag = odm.Optional(odm.Compound(Tag), description="Information about the tag")
55 | signature = odm.Optional(odm.Compound(Signature), description="Information about the signature")
56 | type = odm.Enum(values=SAFEHASH_TYPES, description="Type of safe hash")
57 | updated = odm.Date(default="NOW", description="Last date when sources were added to the safe hash")
58 |
59 |
60 | if __name__ == "__main__":
61 | from pprint import pprint
62 | from assemblyline.odm.randomizer import random_model_obj
63 | pprint(random_model_obj(Safelist, as_json=True))
64 |
--------------------------------------------------------------------------------
/assemblyline/odm/random_data/create_test_data.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | from assemblyline.common import forge
4 | from assemblyline.odm.random_data import create_heuristics, create_users, create_services, create_signatures, \
5 | create_submission, create_alerts, create_safelists, create_workflows, wipe_all_except_users, create_badlists
6 |
7 |
8 | class PrintLogger(object):
9 | def __init__(self, indent=""):
10 | self.indent = indent
11 |
12 | def info(self, msg):
13 | print(f"{self.indent}{msg}")
14 |
15 | def warn(self, msg):
16 | print(f"{self.indent}[W] {msg}")
17 |
18 | def error(self, msg):
19 | print(f"{self.indent}[E] {msg}")
20 |
21 |
22 | def create_basic_data(log=None, ds=None, svc=True, sigs=True, safelist=True, reset=False, badlist=True):
23 | ds = ds or forge.get_datastore()
24 |
25 | if reset:
26 | log.info("Wiping all collections...")
27 | for name in ds.ds._models:
28 | collection = ds.ds.__getattr__(name)
29 | collection.wipe()
30 | log.info(f"\t{name}")
31 |
32 | log.info("\nCreating user objects...")
33 | create_users(ds, log=log)
34 |
35 | if svc:
36 | log.info("\nCreating services...")
37 | create_services(ds, log=log)
38 |
39 | if badlist:
40 | log.info("\nCreating random badlist...")
41 | create_badlists(ds, log=log)
42 |
43 | if safelist:
44 | log.info("\nCreating random safelist...")
45 | create_safelists(ds, log=log)
46 |
47 | if sigs:
48 | log.info("\nImporting test signatures...")
49 | signatures = create_signatures(ds)
50 | for s in signatures:
51 | log.info(f"\t{s}")
52 |
53 | if svc:
54 | log.info("\nCreating random heuristics...")
55 | create_heuristics(ds, log=log)
56 |
57 |
58 | def create_extra_data(log=None, ds=None, fs=None):
59 | ds = ds or forge.get_datastore()
60 | fs = fs or forge.get_filestore()
61 |
62 | log.info("\nCreating 10 Submissions...")
63 | submissions = []
64 | for _ in range(10):
65 | s = create_submission(ds, fs, log=log)
66 | submissions.append(s)
67 |
68 | log.info("\n Creating 20 Workflows...")
69 | workflows = create_workflows(ds, log=log)
70 |
71 | log.info("\nCreating 50 Alerts...")
72 | create_alerts(ds, submission_list=submissions, log=log, workflows=workflows)
73 |
74 | log.info("\nGenerating statistics for signatures and heuristics...")
75 | ds.calculate_signature_stats()
76 | ds.calculate_heuristic_stats()
77 |
78 |
79 | if __name__ == "__main__":
80 | datastore = forge.get_datastore()
81 | filestore = forge.get_datastore()
82 | logger = PrintLogger()
83 | if "clean" in sys.argv:
84 | # Clean up data in indices except user
85 | wipe_all_except_users(datastore, filestore)
86 |
87 | create_basic_data(log=logger, ds=datastore, svc="nosvc" not in sys.argv, sigs="nosigs" not in sys.argv,
88 | safelist="nosl" not in sys.argv, reset="reset" in sys.argv, badlist="nobl" not in sys.argv)
89 | if "full" in sys.argv:
90 | create_extra_data(log=logger, ds=datastore)
91 |
92 | if "alerts" in sys.argv:
93 | logger.info("\nCreating extra 1000 Alerts...")
94 | create_alerts(datastore, alert_count=1000, log=logger)
95 |
96 | logger.info("\nDone.")
97 |
--------------------------------------------------------------------------------
/assemblyline/remote/datatypes/queues/named.py:
--------------------------------------------------------------------------------
1 | import json
2 | import time
3 |
4 | from typing import Generic, TypeVar, Optional
5 |
6 | from assemblyline.remote.datatypes import get_client, retry_call
7 |
8 | T = TypeVar('T')
9 |
10 |
11 | class NamedQueue(Generic[T]):
12 | def __init__(self, name: str, host=None, port=None, private: bool = False, ttl: int = 0):
13 | self.c = get_client(host, port, private)
14 | self.name: str = name
15 | self.ttl: int = ttl
16 | self.last_expire_time = 0
17 |
18 | def __enter__(self):
19 | return self
20 |
21 | def __exit__(self, exc_type, exc_val, exc_tb):
22 | self.delete()
23 |
24 | def _conditional_expire(self):
25 | if self.ttl:
26 | ctime = time.time()
27 | if ctime > self.last_expire_time + (self.ttl / 2):
28 | retry_call(self.c.expire, self.name, self.ttl)
29 | self.last_expire_time = ctime
30 |
31 | def delete(self):
32 | retry_call(self.c.delete, self.name)
33 |
34 | def __len__(self):
35 | return self.length()
36 |
37 | def length(self):
38 | return retry_call(self.c.llen, self.name)
39 |
40 | def peek_next(self) -> Optional[T]:
41 | response = retry_call(self.c.lrange, self.name, 0, 0)
42 |
43 | if response:
44 | return json.loads(response[0])
45 | return None
46 |
47 | def content(self) -> list[T]:
48 | response = retry_call(self.c.lrange, self.name, 0, -1)
49 | if response:
50 | return [json.loads(resp) for resp in response]
51 | return []
52 |
53 | def pop_batch(self, size) -> list[T]:
54 | response = retry_call(self.c.lpop, self.name, size)
55 |
56 | if not response:
57 | return []
58 | return [json.loads(r) for r in response]
59 |
60 | def pop(self, blocking: bool = True, timeout: int = 0) -> Optional[T]:
61 | if blocking:
62 | response = retry_call(self.c.blpop, self.name, timeout)
63 | else:
64 | response = retry_call(self.c.lpop, self.name)
65 |
66 | if not response:
67 | return response
68 |
69 | if blocking:
70 | return json.loads(response[1])
71 | else:
72 | return json.loads(response)
73 |
74 | def push(self, *messages: T):
75 | for message in messages:
76 | retry_call(self.c.rpush, self.name, json.dumps(message))
77 | self._conditional_expire()
78 |
79 | def unpop(self, *messages: T):
80 | """Put all messages passed back at the head of the FIFO queue."""
81 | for message in messages:
82 | retry_call(self.c.lpush, self.name, json.dumps(message))
83 | self._conditional_expire()
84 |
85 |
86 | def select(*queues, **kw):
87 | timeout = kw.get('timeout', 0)
88 | if len(queues) < 1:
89 | raise TypeError('At least one queue must be specified')
90 | if any([type(q) != NamedQueue for q in queues]):
91 | raise TypeError('Only NamedQueues supported')
92 |
93 | c = queues[0].c
94 | response = retry_call(c.blpop, [q.name for q in queues], timeout)
95 |
96 | if not response:
97 | return response
98 |
99 | return response[0].decode('utf-8'), json.loads(response[1])
100 |
--------------------------------------------------------------------------------