├── assemblyline ├── py.typed ├── run │ ├── __init__.py │ └── pubsub_reader.py ├── common │ ├── __init__.py │ ├── version.py │ ├── null.py │ ├── importing.py │ ├── file.py │ ├── uid.py │ ├── frequency.pyx │ ├── logformat.py │ ├── chunk.py │ ├── threading.py │ ├── memory_zip.py │ ├── hexdump.py │ ├── path.py │ ├── signaturing.py │ ├── lucene.lark │ ├── exceptions.py │ ├── codec.py │ ├── banner.py │ ├── metrics.py │ ├── digests.py │ ├── entropy.py │ └── constants.py ├── remote │ ├── __init__.py │ └── datatypes │ │ ├── queues │ │ ├── __init__.py │ │ ├── multi.py │ │ ├── comms.py │ │ └── named.py │ │ ├── lock.py │ │ ├── counters.py │ │ ├── cache.py │ │ ├── daily_quota_tracker.py │ │ ├── user_quota_tracker.py │ │ └── set.py ├── datastore │ ├── __init__.py │ ├── support │ │ └── __init__.py │ ├── exceptions.py │ └── bulk.py ├── odm │ ├── models │ │ ├── __init__.py │ │ ├── ontology │ │ │ ├── filetypes │ │ │ │ └── __init__.py │ │ │ ├── __init__.py │ │ │ ├── results │ │ │ │ ├── __init__.py │ │ │ │ ├── http.py │ │ │ │ ├── antivirus.py │ │ │ │ ├── sandbox.py │ │ │ │ ├── process.py │ │ │ │ └── signature.py │ │ │ └── file.py │ │ ├── cached_file.py │ │ ├── emptyresult.py │ │ ├── submission_tree.py │ │ ├── filescore.py │ │ ├── statistics.py │ │ ├── submission_summary.py │ │ ├── user_favorites.py │ │ ├── heuristic.py │ │ ├── apikey.py │ │ ├── signature.py │ │ ├── workflow.py │ │ ├── error.py │ │ ├── user_settings.py │ │ ├── retrohunt.py │ │ └── safelist.py │ ├── messages │ │ ├── __init__.py │ │ ├── alert.py │ │ ├── metrics.py │ │ ├── changes.py │ │ ├── scaler_heartbeat.py │ │ ├── vacuum_heartbeat.py │ │ ├── elastic_heartbeat.py │ │ ├── service_timing_heartbeat.py │ │ ├── dispatching.py │ │ ├── scaler_status_heartbeat.py │ │ ├── retrohunt_heartbeat.py │ │ ├── alerter_heartbeat.py │ │ ├── archive_heartbeat.py │ │ ├── expiry_heartbeat.py │ │ ├── service_heartbeat.py │ │ ├── dispatcher_heartbeat.py │ │ └── submission.py │ ├── common.py │ ├── __init__.py │ └── random_data │ │ └── create_test_data.py ├── filestore │ └── transport │ │ ├── __init__.py │ │ └── base.py ├── __init__.py └── datasource │ ├── __init__.py │ ├── common.py │ ├── alert.py │ └── al.py ├── dev ├── core │ ├── config │ │ ├── classification.yml │ │ ├── config.yml │ │ └── certs │ │ │ └── tls.crt │ ├── .env │ └── docker-compose-sca-upd.yml ├── hauntedhouse │ ├── config │ │ ├── ingest.json │ │ ├── worker.json │ │ └── core.json │ └── docker-compose.yaml └── depends │ ├── config │ ├── apm-server.docker.yml │ ├── filebeat_policy.json │ ├── metricbeat_policy.json │ ├── kibana.docker.yml │ ├── filebeat.docker.yml │ └── metricbeat.docker.yml │ └── docker-compose-minimal.yml ├── test ├── id_file_base │ ├── text.txt │ ├── json.json │ ├── gzip.gz │ ├── jpg.jpg │ ├── pdf.pdf │ ├── png.png │ ├── excel.xls │ ├── word.docx │ ├── powerpoint.pptx │ ├── html.html │ ├── javascript.js │ ├── id_file_base.json │ ├── xml.xml │ ├── powershell.ps1 │ └── calendar.ics ├── requirements.txt ├── test_exceptions.py ├── docker-compose.yml ├── test_cachestore.py ├── classification.yml ├── key.pem ├── test_path.py ├── conftest.py ├── test_isotime.py ├── test_identify.py ├── test_datasource.py ├── test_postprocess.py ├── test_metrics.py └── test_regexes.py ├── MANIFEST.in ├── pyproject.toml ├── setup.cfg ├── .github └── dependabot.yml ├── docker ├── minio │ └── Dockerfile ├── nginx-ssl-frontend │ ├── http_redirect.conf │ ├── Dockerfile │ └── minimal.template ├── nginx-ssl-frontend:mui5 │ ├── http_redirect.conf │ └── Dockerfile ├── push_containers.sh ├── build_containers.sh ├── al_dev │ └── Dockerfile ├── local_dev.Dockerfile └── al_management │ ├── Dockerfile │ └── pipeline.Dockerfile ├── .dockerignore ├── pipelines └── config.yml ├── incremental.Dockerfile ├── .vscode └── settings.json ├── LICENCE.md ├── .gitignore ├── CONTRIBUTING.md ├── external └── generate_tlds.py └── Dockerfile /assemblyline/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assemblyline/run/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assemblyline/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assemblyline/remote/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assemblyline/datastore/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assemblyline/odm/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assemblyline/datastore/support/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assemblyline/filestore/transport/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assemblyline/remote/datatypes/queues/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dev/core/config/classification.yml: -------------------------------------------------------------------------------- 1 | enforce: true -------------------------------------------------------------------------------- /test/id_file_base/text.txt: -------------------------------------------------------------------------------- 1 | this is a text file -------------------------------------------------------------------------------- /dev/core/.env: -------------------------------------------------------------------------------- 1 | PRIVATE_REGISTRY=172.17.0.1:32000/ 2 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | exclude test/* 2 | exclude bitbucket-pipelines.yml 3 | -------------------------------------------------------------------------------- /test/id_file_base/json.json: -------------------------------------------------------------------------------- 1 | { 2 | "a": 1, 3 | "b": 2, 4 | "c": 3, 5 | "d": 4 6 | } -------------------------------------------------------------------------------- /test/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | retrying 3 | pytest-mock 4 | pyftpdlib 5 | pyopenssl==23.3.0 6 | -------------------------------------------------------------------------------- /assemblyline/common/version.py: -------------------------------------------------------------------------------- 1 | FRAMEWORK_VERSION = 4 2 | SYSTEM_VERSION = 6 3 | BUILD_MINOR = 0 4 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "cython", 4 | "setuptools", 5 | "wheel" 6 | ] 7 | -------------------------------------------------------------------------------- /assemblyline/odm/models/ontology/filetypes/__init__.py: -------------------------------------------------------------------------------- 1 | from assemblyline.odm.models.ontology.filetypes.pe import PE 2 | -------------------------------------------------------------------------------- /test/id_file_base/gzip.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CybercentreCanada/assemblyline-base/HEAD/test/id_file_base/gzip.gz -------------------------------------------------------------------------------- /test/id_file_base/jpg.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CybercentreCanada/assemblyline-base/HEAD/test/id_file_base/jpg.jpg -------------------------------------------------------------------------------- /test/id_file_base/pdf.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CybercentreCanada/assemblyline-base/HEAD/test/id_file_base/pdf.pdf -------------------------------------------------------------------------------- /test/id_file_base/png.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CybercentreCanada/assemblyline-base/HEAD/test/id_file_base/png.png -------------------------------------------------------------------------------- /test/id_file_base/excel.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CybercentreCanada/assemblyline-base/HEAD/test/id_file_base/excel.xls -------------------------------------------------------------------------------- /test/id_file_base/word.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CybercentreCanada/assemblyline-base/HEAD/test/id_file_base/word.docx -------------------------------------------------------------------------------- /assemblyline/odm/messages/__init__.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | 3 | 4 | class PerformanceTimer(odm.Float): 5 | pass 6 | -------------------------------------------------------------------------------- /assemblyline/odm/models/ontology/__init__.py: -------------------------------------------------------------------------------- 1 | from assemblyline.odm.models.ontology.ontology import ResultOntology, ODM_VERSION 2 | -------------------------------------------------------------------------------- /test/id_file_base/powerpoint.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CybercentreCanada/assemblyline-base/HEAD/test/id_file_base/powerpoint.pptx -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [tool:pytest] 2 | testpaths = test 3 | 4 | # addopts = --cov=assemblyline --cov-report html 5 | 6 | [coverage:run] 7 | 8 | omit = test/* 9 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: pip 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | time: "10:00" 8 | open-pull-requests-limit: 10 9 | -------------------------------------------------------------------------------- /docker/minio/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM minio/minio 2 | 3 | # This has to be pre-configured as Azure Pipelines doesn't support a way of running the command on container creation 4 | CMD ["server", "/data"] 5 | -------------------------------------------------------------------------------- /test/id_file_base/html.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Title 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /test/id_file_base/javascript.js: -------------------------------------------------------------------------------- 1 | var someVar = 0; 2 | var anotherVar = 0; 3 | 4 | function blah() 5 | 6 | { 7 | console.log("We did it!"); 8 | } 9 | 10 | function blahblah() { 11 | console.log("hey hey!"); 12 | } -------------------------------------------------------------------------------- /docker/nginx-ssl-frontend/http_redirect.conf: -------------------------------------------------------------------------------- 1 | server { 2 | server_name _; 3 | listen 80; 4 | listen [::]:80; 5 | 6 | location / { 7 | return 301 https://$host; 8 | } 9 | } -------------------------------------------------------------------------------- /docker/nginx-ssl-frontend:mui5/http_redirect.conf: -------------------------------------------------------------------------------- 1 | server { 2 | server_name _; 3 | listen 80; 4 | listen [::]:80; 5 | 6 | location / { 7 | return 301 https://$host; 8 | } 9 | } -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | Dockerfile 2 | .idea 3 | .git 4 | 5 | pipelines 6 | venv 7 | env 8 | test 9 | tests 10 | exemples 11 | docs 12 | 13 | pip-log.txt 14 | pip-delete-this-directory.txt 15 | .tox 16 | .coverage 17 | .coverage.* 18 | .cache 19 | nosetests.xml 20 | coverage.xml 21 | *,cover 22 | *.log 23 | -------------------------------------------------------------------------------- /docker/push_containers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | # Push core containers 4 | docker push cccs/minio --all-tags 5 | docker push cccs/nginx-ssl-frontend --all-tags 6 | 7 | # Push dev containers 8 | docker push cccs/assemblyline_dev --all-tags 9 | docker push cccs/assemblyline_management --all-tags 10 | -------------------------------------------------------------------------------- /assemblyline/common/null.py: -------------------------------------------------------------------------------- 1 | # TODO: are we still using this? 2 | 3 | """ 4 | Dummy functions and values used to substitute for dynamic loaded methods that 5 | have no interesting implementation by default. 6 | """ 7 | 8 | 9 | def always_false(*_, **__): 10 | return False 11 | 12 | 13 | whitelist = [] 14 | -------------------------------------------------------------------------------- /assemblyline/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | __version__ = "4.0.0.dev0" 4 | _package_version_path = os.path.join(os.path.dirname(__file__), 'VERSION') 5 | if os.path.exists(_package_version_path): 6 | with open(_package_version_path) as _package_version_file: 7 | __version__ = _package_version_file.read().strip() 8 | -------------------------------------------------------------------------------- /assemblyline/odm/models/cached_file.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | 3 | 4 | @odm.model(index=True, store=True, description="CachedFile Model") 5 | class CachedFile(odm.Model): 6 | component = odm.Keyword(description="Name of component which created the file") 7 | expiry_ts = odm.Date(store=False, description="Expiry timestamp") 8 | -------------------------------------------------------------------------------- /dev/hauntedhouse/config/ingest.json: -------------------------------------------------------------------------------- 1 | { 2 | "hauntedhouse_api_key": "insecure-ingest-key", 3 | "hauntedhouse_url": "https://hauntedhouse:4443", 4 | "assemblyline_url": "http://al_ui:5000", 5 | "assemblyline_user": "admin", 6 | "assemblyline_api_key": "AL_KEY", 7 | "allow_disabled_access": true, 8 | "write_path": "/data/", 9 | "trust_all": true 10 | } -------------------------------------------------------------------------------- /assemblyline/odm/models/emptyresult.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | 3 | 4 | @odm.model(index=True, store=True, description="Model for Empty Results") 5 | class EmptyResult(odm.Model): 6 | # Empty results are gonna be an abstract construct 7 | # Only a record of the key is saved for caching purposes 8 | expiry_ts = odm.Date(store=False, description="Expiry timestamp") 9 | -------------------------------------------------------------------------------- /assemblyline/odm/common.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | 3 | 4 | @odm.model() 5 | class Resources(odm.Model): 6 | cpu_usage = odm.Float() 7 | disk_usage_free = odm.Integer() 8 | disk_usage_percent = odm.Float() 9 | mem_usage = odm.Float() 10 | 11 | 12 | @odm.model() 13 | class HostInfo(odm.Model): 14 | host = odm.Keyword() 15 | ip = odm.Keyword() 16 | mac_address = odm.Keyword() 17 | -------------------------------------------------------------------------------- /pipelines/config.yml: -------------------------------------------------------------------------------- 1 | filestore: 2 | cache: 3 | - file:///var/cache/assemblyline/ 4 | storage: 5 | - file:///var/cache/assemblyline/ 6 | core: 7 | redis: 8 | nonpersistent: 9 | host: localhost 10 | port: 6379 11 | persistent: 12 | host: localhost 13 | port: 6379 14 | metrics: 15 | export_interval: 1 16 | datastore: 17 | hosts: ["http://elastic:devpass@localhost:9200"] 18 | archive: 19 | enabled: true 20 | -------------------------------------------------------------------------------- /assemblyline/common/importing.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import sys 3 | 4 | 5 | def load_module_by_path(name: str, lookup_path=None): 6 | if lookup_path and lookup_path not in sys.path: 7 | sys.path.append(lookup_path) 8 | 9 | module_path, _sep, module_attribute_name = name.rpartition('.') 10 | module = sys.modules.get(module_path, None) 11 | if not module: 12 | module = importlib.import_module(module_path) 13 | return getattr(module, module_attribute_name) 14 | -------------------------------------------------------------------------------- /assemblyline/datasource/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Defines an interface for hash searching. 3 | 4 | Given a file hash, try to generate a quick description of the file. 5 | 6 | This is extended and used by several services. To expose a service's 7 | datasource specialization, it can be added to the 'datasources' seed key. 8 | 9 | The assemblyline core comes with an implementation for searching all results `al.py` 10 | or the alerts streams `alert.py`. The base class/interface is defined in `common.py` 11 | """ 12 | -------------------------------------------------------------------------------- /dev/depends/config/apm-server.docker.yml: -------------------------------------------------------------------------------- 1 | apm-server: 2 | host: "0.0.0.0:8200" 3 | kibana: 4 | enabled: true 5 | host: kibana:5601 6 | path: kibana 7 | protocol: http 8 | username: elastic 9 | password: devpass 10 | 11 | logging: 12 | level: warning 13 | json: true 14 | 15 | output.elasticsearch: 16 | hosts: ["http://elasticsearch:9200"] 17 | username: elastic 18 | password: devpass 19 | 20 | # Remove the following for 8.x 21 | setup.template.settings.index: 22 | number_of_shards: 1 23 | number_of_replicas: 0 24 | -------------------------------------------------------------------------------- /assemblyline/odm/models/ontology/results/__init__.py: -------------------------------------------------------------------------------- 1 | from assemblyline.odm.models.ontology.results.antivirus import Antivirus 2 | from assemblyline.odm.models.ontology.results.malware_config import MalwareConfig 3 | from assemblyline.odm.models.ontology.results.process import Process 4 | from assemblyline.odm.models.ontology.results.sandbox import Sandbox 5 | from assemblyline.odm.models.ontology.results.signature import Signature 6 | from assemblyline.odm.models.ontology.results.network import NetworkConnection 7 | from assemblyline.odm.models.ontology.results.http import HTTP 8 | -------------------------------------------------------------------------------- /test/id_file_base/id_file_base.json: -------------------------------------------------------------------------------- 1 | { 2 | "text.txt": "text/plain", 3 | "excel.xls": "document/office/excel", 4 | "powerpoint.pptx": "document/office/powerpoint", 5 | "word.docx": "document/office/word", 6 | "png.png": "image/png", 7 | "pdf.pdf": "document/pdf", 8 | "html.html": "code/html", 9 | "xml.xml": "code/xml", 10 | "calendar.ics": "text/calendar", 11 | "gzip.gz": "archive/gzip", 12 | "powershell.ps1": "code/ps1", 13 | "jpg.jpg": "image/jpg", 14 | "json.json": "text/json", 15 | "javascript.js": "code/javascript" 16 | } 17 | -------------------------------------------------------------------------------- /docker/build_containers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | # Build core containers 4 | (cd nginx-ssl-frontend && docker build -t cccs/nginx-ssl-frontend .) 5 | (cd nginx-ssl-frontend:mui5 && docker build -t cccs/nginx-ssl-frontend:mui5 .) 6 | (cd minio && docker build -t cccs/minio .) 7 | 8 | # Build default dev containers 9 | (cd ../.. && docker build --no-cache -f assemblyline-base/docker/al_dev/Dockerfile -t cccs/assemblyline_dev:latest -t cccs/assemblyline_dev:4.6.1 .) 10 | (cd ../.. && docker build --no-cache -f assemblyline-base/docker/al_management/Dockerfile -t cccs/assemblyline_management:latest -t cccs/assemblyline_management:4.6.1 .) 11 | -------------------------------------------------------------------------------- /test/id_file_base/xml.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Takahashi 5 | Jesse 6 | 19 7 | Literature 8 | 3.8 9 | Freshman 10 | 11 | 12 | Nguyen 13 | May 14 | 23 15 | Chemistry 16 | 3.5 17 | Senior 18 | 19 | -------------------------------------------------------------------------------- /assemblyline/odm/messages/alert.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | from assemblyline.odm.models.alert import Alert 3 | 4 | MSG_TYPES = {"AlertCreated", "AlertUpdated"} 5 | LOADER_CLASS = "assemblyline.odm.messages.alert.AlertMessage" 6 | 7 | 8 | @odm.model(description="Model of Alert Message") 9 | class AlertMessage(odm.Model): 10 | msg = odm.Compound(Alert, description="Message of alert") 11 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class for messages") 12 | msg_type = odm.Enum(values=MSG_TYPES, default="AlertCreated", description="Type of Message") 13 | sender = odm.Keyword(description="Sender of message") 14 | -------------------------------------------------------------------------------- /assemblyline/common/file.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | import yaml 3 | 4 | from assemblyline.common.identify import CUSTOM_URI_ID 5 | 6 | 7 | def make_uri_file(directory: str, uri: str, params=None) -> str: 8 | with tempfile.NamedTemporaryFile(dir=directory, delete=False, mode="w") as out: 9 | out.write(CUSTOM_URI_ID) 10 | yaml.dump({"uri": uri}, out) 11 | if params: 12 | yaml.dump(params, out) 13 | return out.name 14 | 15 | 16 | def normalize_uri_file(directory: str, filename: str) -> str: 17 | with open(filename, "r") as f: 18 | data = yaml.safe_load(f) 19 | uri = data.pop("uri") 20 | return make_uri_file(directory, uri, data) 21 | -------------------------------------------------------------------------------- /dev/depends/config/filebeat_policy.json: -------------------------------------------------------------------------------- 1 | { 2 | "policy": { 3 | "phases": { 4 | "hot": { 5 | "min_age": "0ms", 6 | "actions": { 7 | "rollover": { 8 | "max_age": "1d", 9 | "max_size": "20gb" 10 | }, 11 | "set_priority": { 12 | "priority": 100 13 | } 14 | } 15 | }, 16 | "warm": { 17 | "actions": { 18 | "readonly": {}, 19 | "set_priority": { 20 | "priority": 50 21 | } 22 | } 23 | }, 24 | "delete": { 25 | "min_age": "3d", 26 | "actions": { 27 | "delete": {} 28 | } 29 | } 30 | } 31 | } 32 | } -------------------------------------------------------------------------------- /assemblyline/odm/models/submission_tree.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | from assemblyline.common import forge 3 | Classification = forge.get_classification() 4 | 5 | 6 | @odm.model(index=True, store=False, description="Submission Tree Model") 7 | class SubmissionTree(odm.Model): 8 | classification = odm.Classification(default=Classification.UNRESTRICTED, description="Classification of the cache") 9 | filtered = odm.Boolean(default=False, description="Has this cache entry been filtered?") 10 | expiry_ts = odm.Date(description="Expiry timestamp") 11 | supplementary = odm.Text(index=False, description="Tree of supplementary files") 12 | tree = odm.Text(index=False, description="File tree cache") 13 | -------------------------------------------------------------------------------- /dev/depends/config/metricbeat_policy.json: -------------------------------------------------------------------------------- 1 | { 2 | "policy": { 3 | "phases": { 4 | "hot": { 5 | "min_age": "0ms", 6 | "actions": { 7 | "rollover": { 8 | "max_age": "1d", 9 | "max_size": "5gb" 10 | }, 11 | "set_priority": { 12 | "priority": 100 13 | } 14 | } 15 | }, 16 | "warm": { 17 | "actions": { 18 | "readonly": {}, 19 | "set_priority": { 20 | "priority": 50 21 | } 22 | } 23 | }, 24 | "delete": { 25 | "min_age": "4d", 26 | "actions": { 27 | "delete": {} 28 | } 29 | } 30 | } 31 | } 32 | } -------------------------------------------------------------------------------- /assemblyline/odm/models/filescore.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | 3 | 4 | @odm.model(index=False, store=False, description="Model of Scoring related to a File") 5 | class FileScore(odm.Model): 6 | psid = odm.Optional(odm.UUID(), description=" Parent submission ID of the associated submission") 7 | expiry_ts = odm.Date(index=True, description="Expiry timestamp, used for garbage collection") 8 | score = odm.Integer(description="Maximum score for the associated submission") 9 | errors = odm.Integer(description="Number of errors that occurred during the previous analysis") 10 | sid = odm.UUID(description="ID of the associated submission") 11 | time = odm.Float(description="Epoch time at which the FileScore entry was created") 12 | -------------------------------------------------------------------------------- /assemblyline/datastore/exceptions.py: -------------------------------------------------------------------------------- 1 | from typing import Iterable 2 | 3 | 4 | class DataStoreException(Exception): 5 | pass 6 | 7 | 8 | class SearchException(Exception): 9 | pass 10 | 11 | 12 | class SearchDepthException(Exception): 13 | pass 14 | 15 | 16 | class ILMException(Exception): 17 | pass 18 | 19 | 20 | class VersionConflictException(Exception): 21 | pass 22 | 23 | 24 | class UnsupportedElasticVersion(Exception): 25 | pass 26 | 27 | 28 | class ArchiveDisabled(Exception): 29 | pass 30 | 31 | 32 | class MultiKeyError(KeyError): 33 | def __init__(self, keys: Iterable[str], partial_output): 34 | super().__init__(str(keys)) 35 | self.keys = set(keys) 36 | self.partial_output = partial_output 37 | -------------------------------------------------------------------------------- /assemblyline/odm/models/statistics.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | 3 | 4 | @odm.model(index=True, store=True, description="Statistics Model") 5 | class Statistics(odm.Model): 6 | count = odm.Integer(default=0, description="Count of statistical hits") 7 | min = odm.Integer(default=0, description="Minimum value of all stastical hits") 8 | max = odm.Integer(default=0, description="Maximum value of all stastical hits") 9 | avg = odm.Integer(default=0, description="Average of all stastical hits") 10 | sum = odm.Integer(default=0, description="Sum of all stastical hits") 11 | first_hit = odm.Optional(odm.Date(), description="Date of first hit of statistic") 12 | last_hit = odm.Optional(odm.Date(), description="Date of last hit of statistic") 13 | -------------------------------------------------------------------------------- /assemblyline/common/uid.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import uuid 3 | 4 | import baseconv 5 | 6 | TINY = 8 7 | SHORT = 16 8 | MEDIUM = NORMAL = 32 9 | LONG = 64 10 | 11 | 12 | def get_random_id() -> str: 13 | return baseconv.base62.encode(uuid.uuid4().int) 14 | 15 | 16 | def get_id_from_data(data, prefix=None, length=MEDIUM): 17 | possible_len = [TINY, SHORT, MEDIUM, LONG] 18 | if length not in possible_len: 19 | raise ValueError(f"Invalid hash length of {length}. Possible values are: {str(possible_len)}.") 20 | sha256_hash = hashlib.sha256(str(data).encode()).hexdigest()[:length] 21 | _hash = baseconv.base62.encode(int(sha256_hash, 16)) 22 | 23 | if isinstance(prefix, str): 24 | _hash = f"{prefix}_{_hash}" 25 | 26 | return _hash 27 | -------------------------------------------------------------------------------- /test/id_file_base/powershell.ps1: -------------------------------------------------------------------------------- 1 | # From https://docs.microsoft.com/en-us/powershell/scripting/samples/sorting-objects?view=powershell-7.1 2 | Get-ChildItem | 3 | Sort-Object -Property LastWriteTime, Name | 4 | Format-Table -Property LastWriteTime, Name 5 | Get-ChildItem | 6 | Sort-Object -Property LastWriteTime, Name -Descending | 7 | Format-Table -Property LastWriteTime, Name 8 | Get-ChildItem | 9 | Sort-Object -Property @{ Expression = 'LastWriteTime'; Descending = $true }, 10 | @{ Expression = 'Name'; Ascending = $true } | 11 | Format-Table -Property LastWriteTime, Name 12 | Get-ChildItem | 13 | Sort-Object -Property @{ Expression = { $_.LastWriteTime - $_.CreationTime }; Descending = $true } | 14 | Format-Table -Property LastWriteTime, CreationTime -------------------------------------------------------------------------------- /assemblyline/run/pubsub_reader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | 5 | from assemblyline.remote.datatypes.queues.comms import CommsQueue 6 | from pprint import pprint 7 | 8 | 9 | if __name__ == "__main__": 10 | queue_name = None 11 | if len(sys.argv) > 1: 12 | queue_name = sys.argv[1] 13 | 14 | if queue_name is None: 15 | print("\nERROR: You must specify a queue name.\n\npubsub_reader.py [queue_name]") 16 | exit(1) 17 | 18 | print(f"Listening for messages on '{queue_name}' queue.") 19 | 20 | q = CommsQueue(queue_name) 21 | 22 | try: 23 | while True: 24 | for msg in q.listen(): 25 | pprint(msg) 26 | except KeyboardInterrupt: 27 | print('Exiting') 28 | finally: 29 | q.close() 30 | -------------------------------------------------------------------------------- /dev/hauntedhouse/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: "2.4" 2 | 3 | services: 4 | hauntedhouse: 5 | image: cccs/hauntedhouse:0.0.14 6 | volumes: 7 | - ${PATH_REWRITE:-.}/config/core.json:/config/core.json:ro 8 | - haunted-house-data:/data/ 9 | command: haunted-house server --config /config/core.json 10 | environment: 11 | RUST_LOG: "haunted_house=info" 12 | 13 | hauntedhouse-worker: 14 | image: cccs/hauntedhouse:0.0.14 15 | volumes: 16 | - ${PATH_REWRITE:-.}/config/worker.json:/config/worker.json:ro 17 | command: haunted-house worker --config /config/worker.json 18 | environment: 19 | RUST_LOG: "haunted_house=info" 20 | 21 | hauntedhouse-ingest: 22 | image: cccs/hauntedhouse:ingest-0.0.9 23 | volumes: 24 | - ${PATH_REWRITE:-.}/config/ingest.json:/config/ingest.json:ro 25 | command: python -m hauntedhouse.ingest /config/ingest.json 26 | 27 | -------------------------------------------------------------------------------- /assemblyline/common/frequency.pyx: -------------------------------------------------------------------------------- 1 | # cython: language_level=3 2 | 3 | # noinspection PyUnresolvedReferences 4 | from libc.string cimport memset 5 | 6 | def counts(b, c, d=None): 7 | if d is None: 8 | d = {} 9 | cdef long long t[256] 10 | cdef unsigned char* s = b 11 | cdef int l = c 12 | cdef int i = 0 13 | 14 | memset(t, 0, 256 * sizeof(long long)) 15 | 16 | for k, v in d.iteritems(): 17 | t[k] = v 18 | 19 | while i < l: 20 | t[s[i]] += 1 21 | i += 1 22 | 23 | return {i: t[i] for i in range(256) if t[i]} 24 | 25 | def counts_old(s, d=None): 26 | if d is None: 27 | d = {} 28 | cdef int i 29 | cdef int t[256] 30 | 31 | memset(t, 0, 256 * sizeof(int)) 32 | 33 | for k, v in d.iteritems(): 34 | t[k] = v 35 | 36 | for c in s: 37 | t[ord(c)] += 1 38 | 39 | return {i: t[i] for i in range(256) if t[i]} 40 | -------------------------------------------------------------------------------- /assemblyline/odm/models/ontology/results/http.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | from assemblyline.odm.models.ontology.file import File 3 | 4 | 5 | @odm.model(index=False, store=False, description="") 6 | class HTTPRedirect(odm.Model): 7 | from_url = odm.Keyword(description="") 8 | to_url = odm.Keyword(description="") 9 | 10 | 11 | @odm.model(index=False, store=False, description="HTTP Task") 12 | class HTTP(odm.Model): 13 | response_code = odm.Integer(description="The status code of the main page") 14 | redirection_url = odm.Optional(odm.Keyword(), description="The final page of the requested url") 15 | redirects = odm.Optional(odm.List(odm.Compound(HTTPRedirect)), description="List of Redirects") 16 | favicon = odm.Optional(odm.Compound(File), description="The file information of the main favicon") 17 | title = odm.Optional(odm.Keyword(), description="The title of the main page after any redirection") 18 | -------------------------------------------------------------------------------- /assemblyline/odm/models/submission_summary.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | from assemblyline.common import forge 3 | Classification = forge.get_classification() 4 | 5 | 6 | @odm.model(index=False, store=False, description="Submission Summary Model") 7 | class SubmissionSummary(odm.Model): 8 | classification = odm.Classification(default=Classification.UNRESTRICTED, description="Classification of the cache") 9 | filtered = odm.Boolean(default=False, description="Has this cache entry been filtered?") 10 | expiry_ts = odm.Date(index=True, description="Expiry timestamp") 11 | tags = odm.Text(description="Tags cache") 12 | attack_matrix = odm.Text(description="ATT&CK Matrix cache") 13 | heuristics = odm.Text(description="Heuristics cache") 14 | heuristic_sections = odm.Text(description="All sections mapping to the heuristics") 15 | heuristic_name_map = odm.Text(description="Map of heuristic names to IDs") 16 | -------------------------------------------------------------------------------- /test/id_file_base/calendar.ics: -------------------------------------------------------------------------------- 1 | BEGIN:VCALENDAR 2 | VERSION:2.0 3 | CALSCALE:GREGORIAN 4 | BEGIN:VEVENT 5 | SUMMARY:Access-A-Ride Pickup 6 | DTSTART;TZID=America/New_York:20130802T103400 7 | DTEND;TZID=America/New_York:20130802T110400 8 | LOCATION:1000 Broadway Ave.\, Brooklyn 9 | DESCRIPTION: Access-A-Ride to 900 Jay St.\, Brooklyn 10 | STATUS:CONFIRMED 11 | SEQUENCE:3 12 | BEGIN:VALARM 13 | TRIGGER:-PT10M 14 | DESCRIPTION:Pickup Reminder 15 | ACTION:DISPLAY 16 | END:VALARM 17 | END:VEVENT 18 | BEGIN:VEVENT 19 | SUMMARY:Access-A-Ride Pickup 20 | DTSTART;TZID=America/New_York:20130802T200000 21 | DTEND;TZID=America/New_York:20130802T203000 22 | LOCATION:900 Jay St.\, Brooklyn 23 | DESCRIPTION: Access-A-Ride to 1000 Broadway Ave.\, Brooklyn 24 | STATUS:CONFIRMED 25 | SEQUENCE:3 26 | BEGIN:VALARM 27 | TRIGGER:-PT10M 28 | DESCRIPTION:Pickup Reminder 29 | ACTION:DISPLAY 30 | END:VALARM 31 | END:VEVENT 32 | END:VCALENDAR -------------------------------------------------------------------------------- /test/test_exceptions.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from assemblyline.common.exceptions import Chain, ChainAll, ChainException 4 | 5 | 6 | class CustomError(ChainException): 7 | pass 8 | 9 | 10 | @Chain(CustomError) 11 | def fail_function(message): 12 | raise Exception(message) 13 | 14 | 15 | @ChainAll(CustomError) 16 | class FailClass: 17 | def fail_method(self): 18 | raise Exception() 19 | 20 | @staticmethod 21 | def static_fail_method(): 22 | raise Exception() 23 | 24 | 25 | def test_exception_chaining(): 26 | with pytest.raises(CustomError) as error_info: 27 | fail_function('abc123') 28 | assert isinstance(error_info.value.cause, Exception) 29 | assert error_info.value.cause.args[0] == 'abc123' 30 | 31 | with pytest.raises(CustomError): 32 | FailClass().fail_method() 33 | 34 | with pytest.raises(CustomError): 35 | FailClass.static_fail_method() 36 | 37 | -------------------------------------------------------------------------------- /assemblyline/odm/messages/metrics.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | 3 | MSG_TYPES = {"MetricsCounter"} 4 | LOADER_CLASS = "assemblyline.odm.messages.metrics.MetricsMessage" 5 | 6 | 7 | @odm.model(description="Metrics Model") 8 | class Metrics(odm.Model): 9 | host = odm.Keyword(description="Host that generated metric") 10 | type = odm.Keyword(description="Type of metric") 11 | name = odm.Keyword(description="Metric name") 12 | metrics = odm.Mapping(odm.Integer(), description="Metric value") 13 | 14 | 15 | @odm.model(description="Model of Metric Message") 16 | class MetricsMessage(odm.Model): 17 | msg = odm.Compound(Metrics, description="Metrics message") 18 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class for message") 19 | msg_type = odm.Enum(values=MSG_TYPES, default="MetricsCounter", description="Type of message") 20 | sender = odm.Keyword(description="Sender of message") 21 | -------------------------------------------------------------------------------- /test/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | services: 4 | sftp: 5 | image: linuxserver/openssh-server 6 | environment: 7 | - SUDO_ACCESS=false 8 | - PASSWORD_ACCESS=true 9 | - USER_PASSWORD=password 10 | - USER_NAME=user 11 | - LOG_STDOUT=true 12 | ports: 13 | - "2222:2222" 14 | 15 | minio: 16 | image: minio/minio 17 | environment: 18 | MINIO_ROOT_USER: al_storage_key 19 | MINIO_ROOT_PASSWORD: Ch@ngeTh!sPa33w0rd 20 | ports: 21 | - "9000:9000" 22 | command: server /data 23 | 24 | elasticsearch: 25 | image: docker.elastic.co/elasticsearch/elasticsearch:8.10.2 26 | environment: 27 | - xpack.security.enabled=true 28 | - discovery.type=single-node 29 | - logger.level=WARN 30 | - "ES_JAVA_OPTS=-Xms1024m -Xmx1024m" 31 | - ELASTIC_PASSWORD=devpass 32 | ports: 33 | - "9200:9200" 34 | 35 | redis: 36 | image: redis 37 | ports: 38 | - "6379:6379" 39 | -------------------------------------------------------------------------------- /assemblyline/common/logformat.py: -------------------------------------------------------------------------------- 1 | 2 | hostname = 'unknownhost' 3 | # noinspection PyBroadException 4 | try: 5 | from assemblyline.common.net import get_hostname 6 | hostname = get_hostname() 7 | except Exception: # pylint:disable=W0702 8 | pass 9 | 10 | ip = 'x.x.x.x' 11 | # noinspection PyBroadException 12 | try: 13 | from assemblyline.common.net import get_hostip 14 | ip = get_hostip() 15 | except Exception: # pylint:disable=W0702 16 | pass 17 | 18 | AL_SYSLOG_FORMAT = f'{ip} AL %(levelname)8s %(process)5d %(name)40s | %(message)s' 19 | AL_LOG_FORMAT = f'%(asctime)-16s %(levelname)8s {hostname} %(process)d %(name)40s | %(message)s' 20 | AL_JSON_FORMAT = f'{{' \ 21 | f'"@timestamp": "%(asctime)s", ' \ 22 | f'"event": {{ "module": "assemblyline", "dataset": "%(name)s" }}, ' \ 23 | f'"host": {{ "ip": "{ip}", "hostname": "{hostname}" }}, ' \ 24 | f'"log": {{ "level": "%(levelname)s", "logger": "%(name)s" }}, ' \ 25 | f'"process": {{ "pid": "%(process)d" }}, ' \ 26 | f'"message": %(message)s}}' 27 | -------------------------------------------------------------------------------- /assemblyline/remote/datatypes/queues/multi.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from assemblyline.remote.datatypes import get_client, retry_call 4 | 5 | 6 | class MultiQueue(object): 7 | def __init__(self, host=None, port=None, private=False): 8 | self.c = get_client(host, port, private) 9 | 10 | def delete(self, name): 11 | retry_call(self.c.delete, name) 12 | 13 | def length(self, name): 14 | return retry_call(self.c.llen, name) 15 | 16 | def pop(self, name, blocking=True, timeout=0): 17 | if blocking: 18 | response = retry_call(self.c.blpop, name, timeout) 19 | else: 20 | response = retry_call(self.c.lpop, name) 21 | 22 | if not response: 23 | return response 24 | 25 | if blocking: 26 | return json.loads(response[1]) 27 | else: 28 | return json.loads(response) 29 | 30 | def push(self, name, *messages): 31 | for message in messages: 32 | retry_call(self.c.rpush, name, json.dumps(message)) 33 | -------------------------------------------------------------------------------- /docker/nginx-ssl-frontend/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nginx AS builder 2 | 3 | RUN apt-get update 4 | RUN apt-get install openssl 5 | RUN openssl req -nodes -x509 -newkey rsa:4096 -keyout /etc/ssl/nginx.key -out /etc/ssl/nginx.crt -days 3650 -subj "/C=CA/ST=Ontario/L=Ottawa/O=CCCS/CN=assemblyline.local" 6 | 7 | FROM nginx 8 | 9 | ENV DOLLAR $ 10 | ENV FQDN localhost 11 | ENV MAX_BODY_SIZE 100M 12 | ENV FRONTEND_HOST al_frontend 13 | ENV SOCKET_HOST al_socketio 14 | ENV UI_HOST al_ui 15 | ENV KIBANA_HOST kibana 16 | ENV TEMPLATE full 17 | ENV ACCESS_LOG off 18 | ENV ERROR_LOG /dev/stderr 19 | ENV ERROR_LEVEL notice 20 | ENV READ_TIMEOUT 60s 21 | ENV CONNECT_TIMEOUT 60s 22 | ENV SEND_TIMEOUT 60s 23 | 24 | COPY http_redirect.conf /etc/nginx/conf.d/ 25 | COPY full.template /opt/ 26 | COPY minimal.template /opt/ 27 | 28 | COPY --from=builder /etc/ssl/ /etc/ssl/ 29 | 30 | EXPOSE 443 31 | EXPOSE 80 32 | 33 | CMD /bin/bash -c "envsubst < /opt/$TEMPLATE.template > /etc/nginx/conf.d/default.conf && cat /etc/nginx/conf.d/default.conf && exec nginx -g 'daemon off;'" 34 | -------------------------------------------------------------------------------- /docker/nginx-ssl-frontend:mui5/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nginx AS builder 2 | 3 | RUN apt-get update 4 | RUN apt-get install openssl 5 | RUN openssl req -nodes -x509 -newkey rsa:4096 -keyout /etc/ssl/nginx.key -out /etc/ssl/nginx.crt -days 3650 -subj "/C=CA/ST=Ontario/L=Ottawa/O=CCCS/CN=assemblyline.local" 6 | 7 | FROM nginx 8 | 9 | ENV DOLLAR $ 10 | ENV FQDN localhost 11 | ENV MAX_BODY_SIZE 100M 12 | ENV FRONTEND_HOST al_frontend 13 | ENV SOCKET_HOST al_socketio 14 | ENV UI_HOST al_ui 15 | ENV KIBANA_HOST kibana 16 | ENV TEMPLATE full 17 | ENV ACCESS_LOG off 18 | ENV ERROR_LOG /dev/stderr 19 | ENV ERROR_LEVEL notice 20 | ENV READ_TIMEOUT 60s 21 | ENV CONNECT_TIMEOUT 60s 22 | ENV SEND_TIMEOUT 60s 23 | 24 | COPY http_redirect.conf /etc/nginx/conf.d/ 25 | COPY full.template /opt/ 26 | COPY minimal.template /opt/ 27 | 28 | COPY --from=builder /etc/ssl/ /etc/ssl/ 29 | 30 | EXPOSE 443 31 | EXPOSE 80 32 | 33 | CMD /bin/bash -c "envsubst < /opt/$TEMPLATE.template > /etc/nginx/conf.d/default.conf && cat /etc/nginx/conf.d/default.conf && exec nginx -g 'daemon off;'" 34 | -------------------------------------------------------------------------------- /dev/core/config/config.yml: -------------------------------------------------------------------------------- 1 | core: 2 | alerter: 3 | delay: 0 4 | metrics: 5 | apm_server: 6 | server_url: http://apm_server:8200/ 7 | elasticsearch: 8 | hosts: [http://elastic:devpass@elasticsearch:9200] 9 | redis: 10 | host: redis 11 | redis: 12 | nonpersistent: 13 | host: redis 14 | persistent: 15 | host: redis 16 | port: 6379 17 | 18 | datastore: 19 | hosts: [http://elastic:devpass@elasticsearch:9200] 20 | 21 | filestore: 22 | cache: 23 | [ 24 | "s3://al_storage_key:Ch@ngeTh!sPa33w0rd@minio:9000?s3_bucket=al-cache&use_ssl=False", 25 | ] 26 | storage: 27 | [ 28 | "s3://al_storage_key:Ch@ngeTh!sPa33w0rd@minio:9000?s3_bucket=al-storage&use_ssl=False", 29 | ] 30 | 31 | logging: 32 | log_to_console: true 33 | log_to_file: false 34 | log_to_syslog: false 35 | 36 | services: 37 | preferred_update_channel: dev 38 | allow_insecure_registry: true 39 | image_variables: 40 | PRIVATE_REGISTRY: $PRIVATE_REGISTRY 41 | 42 | ui: 43 | enforce_quota: false 44 | -------------------------------------------------------------------------------- /dev/depends/config/kibana.docker.yml: -------------------------------------------------------------------------------- 1 | elasticsearch.hosts: ["http://elasticsearch:9200"] 2 | 3 | elasticsearch.password: kibanapass 4 | elasticsearch.username: kibana_system 5 | 6 | logging.root.level: warn 7 | 8 | server.basePath: /kibana 9 | server.publicBaseUrl: https://localhost/kibana 10 | server.rewriteBasePath: true 11 | server.name: kibana 12 | server.host: "0.0.0.0" 13 | xpack.reporting.roles.enabled: false 14 | xpack.reporting.encryptionKey: ThisIsSomeRandomKeyThatYouShouldDefinitelyChange! 15 | xpack.reporting.kibanaServer.hostname: localhost 16 | xpack.security.encryptionKey: ThisIsSomeRandomKeyThatYouShouldDefinitelyChange! 17 | xpack.encryptedSavedObjects.encryptionKey: ThisIsSomeRandomKeyThatYouShouldDefinitelyChange! 18 | xpack.fleet.packages: 19 | - name: apm 20 | version: latest 21 | xpack.fleet.agentPolicies: 22 | - name: APM Server 23 | id: apm 24 | is_managed: false 25 | namespace: default 26 | package_policies: 27 | - name: apm_server 28 | id: default-apm-server 29 | package: 30 | name: apm 31 | -------------------------------------------------------------------------------- /dev/hauntedhouse/config/worker.json: -------------------------------------------------------------------------------- 1 | { 2 | "api_token": "insecure-worker-key", 3 | "file_cache": { 4 | "Directory": { 5 | "path": "/tmp/files", 6 | "size": "100Gi" 7 | } 8 | }, 9 | "blob_cache": { 10 | "Directory": { 11 | "path": "/tmp/blobs", 12 | "size": "100Gi" 13 | } 14 | }, 15 | "files": { 16 | "S3": { 17 | "access_key_id": "al_storage_key", 18 | "secret_access_key": "Ch@ngeTh!sPa33w0rd", 19 | "endpoint_url": "http;//minio:9000", 20 | "region_name": "local", 21 | "bucket": "al-storage", 22 | "no_tls_verify": true 23 | } 24 | }, 25 | "blobs": { 26 | "S3": { 27 | "access_key_id": "al_storage_key", 28 | "secret_access_key": "Ch@ngeTh!sPa33w0rd", 29 | "endpoint_url": "http;//minio:9000", 30 | "region_name": "local", 31 | "bucket": "retrohunt-storage", 32 | "no_tls_verify": true 33 | } 34 | }, 35 | "bind_address": "0.0.0.0:4443", 36 | "tls": null, 37 | "server_address": "https://hauntedhouse:4443", 38 | "server_tls": "AllowAll" 39 | } -------------------------------------------------------------------------------- /assemblyline/common/chunk.py: -------------------------------------------------------------------------------- 1 | """Sequence manipulation methods used in parsing raw datastore output.""" 2 | from __future__ import annotations 3 | from typing import Sequence, Generator, TypeVar, overload 4 | 5 | _T = TypeVar('_T') 6 | 7 | 8 | @overload 9 | def chunk(items: bytes, n: int) -> Generator[bytes, None, None]: 10 | ... 11 | 12 | 13 | @overload 14 | def chunk(items: str, n: int) -> Generator[str, None, None]: 15 | ... 16 | 17 | 18 | @overload 19 | def chunk(items: Sequence[_T], n: int) -> Generator[Sequence[_T], None, None]: 20 | ... 21 | 22 | 23 | def chunk(items, n: int): 24 | """ Yield n-sized chunks from list. 25 | 26 | >>> list(chunk([1,2,3,4,5,6,7], 2)) 27 | [[1,2], [3,4], [5,6], [7,]] 28 | """ 29 | for i in range(0, len(items), n): 30 | yield items[i:i+n] 31 | 32 | 33 | def chunked_list(items: Sequence[_T], n: int) -> list[Sequence[_T]]: 34 | """ Create a list of n-sized chunks from list. 35 | 36 | >>> chunked_list([1,2,3,4,5,6,7], 2) 37 | [[1,2], [3,4], [5,6], [7,]] 38 | """ 39 | return list(chunk(items, n)) 40 | -------------------------------------------------------------------------------- /incremental.Dockerfile: -------------------------------------------------------------------------------- 1 | # This dockerfile only includes the minimal steps to build a package onto 2 | # a periodic root image 3 | ARG build_image 4 | ARG base 5 | ARG tag 6 | FROM $build_image AS builder 7 | ARG version 8 | 9 | # Install assemblyline base (setup.py is just a file we know exists so the command 10 | # won't fail if dist isn't there. The dist* copies in any dist directory only if it exists.) 11 | COPY setup.py dist* dist/ 12 | RUN pip install --no-cache-dir --no-warn-script-location -f dist/ -U --user assemblyline==$version && rm -rf ~/.cache/pip 13 | 14 | FROM $base:$tag 15 | ARG version 16 | ARG version_tag=${version} 17 | 18 | # Install assemblyline base 19 | COPY --chown=assemblyline:assemblyline --from=builder /root/.local /var/lib/assemblyline/.local 20 | ENV PATH=/var/lib/assemblyline/.local/bin:$PATH 21 | ENV PYTHONPATH=/var/lib/assemblyline/.local/lib/python3.11/site-packages 22 | ENV ASSEMBLYLINE_VERSION=${version} 23 | ENV ASSEMBLYLINE_IMAGE_TAG=${version_tag} 24 | 25 | # Switch to assemblyline user 26 | USER assemblyline 27 | WORKDIR /var/lib/assemblyline 28 | CMD /bin/bash 29 | -------------------------------------------------------------------------------- /assemblyline/odm/messages/changes.py: -------------------------------------------------------------------------------- 1 | """ 2 | Messages about configuration changes internal to assemblyline. 3 | 4 | Uses standard library 5 | """ 6 | from __future__ import annotations 7 | import enum 8 | import json 9 | from dataclasses import asdict, dataclass 10 | 11 | 12 | class Operation(enum.IntEnum): 13 | Added = 1 14 | Removed = 2 15 | Modified = 3 16 | Incompatible = 4 17 | 18 | 19 | @dataclass 20 | class ServiceChange: 21 | name: str 22 | operation: Operation 23 | 24 | @staticmethod 25 | def serialize(obj: ServiceChange) -> str: 26 | return json.dumps(asdict(obj)) 27 | 28 | @staticmethod 29 | def deserialize(data: str) -> ServiceChange: 30 | return ServiceChange(**json.loads(data)) 31 | 32 | 33 | @dataclass 34 | class SignatureChange: 35 | signature_id: str 36 | signature_type: str 37 | source: str 38 | operation: Operation 39 | 40 | @staticmethod 41 | def serialize(obj: SignatureChange) -> str: 42 | return json.dumps(asdict(obj)) 43 | 44 | @staticmethod 45 | def deserialize(data: str) -> SignatureChange: 46 | return SignatureChange(**json.loads(data)) 47 | -------------------------------------------------------------------------------- /assemblyline/common/threading.py: -------------------------------------------------------------------------------- 1 | import elasticapm 2 | 3 | from concurrent.futures import ThreadPoolExecutor 4 | from elasticapm.traces import execution_context 5 | 6 | 7 | def apm_monitored(fn, *args, **kwargs): 8 | with elasticapm.capture_span(fn.__name__, "threadpool"): 9 | return fn(*args, **kwargs) 10 | 11 | 12 | class APMAwareThreadPoolExecutor(ThreadPoolExecutor): 13 | def __init__(self, *args, **kwargs): 14 | # If an APM server is defined we will get the current transaction 15 | self.apm_transaction = execution_context.get_transaction() 16 | 17 | # You are not allowed to use the following 18 | kwargs.pop("initializer", None) 19 | 20 | super().__init__(initializer=self._set_apm_transaction, *args, **kwargs) 21 | 22 | def _set_apm_transaction(self): 23 | # Make sure the context is set in each threads 24 | if self.apm_transaction is not None: 25 | execution_context.set_transaction(self.apm_transaction) 26 | 27 | # Change the submit function so all subfunctions are monitored 28 | def submit(self, fn, /, *args, **kwargs): 29 | return super().submit(apm_monitored, fn, *args, **kwargs) 30 | -------------------------------------------------------------------------------- /assemblyline/odm/messages/scaler_heartbeat.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | 3 | MSG_TYPES = {"ScalerHeartbeat"} 4 | LOADER_CLASS = "assemblyline.odm.messages.scaler_heartbeat.ScalerMessage" 5 | 6 | 7 | @odm.model(description="Metrics") 8 | class Metrics(odm.Model): 9 | memory_free = odm.Float(description="Amount of free memory") 10 | cpu_free = odm.Float(description="Amount of free CPU") 11 | memory_total = odm.Float(description="Amount of total memory") 12 | cpu_total = odm.Float(description="Amount of total CPU") 13 | 14 | 15 | @odm.model(description="Heartbeat Model") 16 | class Heartbeat(odm.Model): 17 | instances = odm.Integer(description="Number of instances") 18 | metrics = odm.Compound(Metrics, description="Metrics") 19 | 20 | 21 | @odm.model(description="Model of Scaler Heartbeat Message") 22 | class ScalerMessage(odm.Model): 23 | msg = odm.Compound(Heartbeat, description="Heartbeat message") 24 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class of message") 25 | msg_type = odm.Enum(values=MSG_TYPES, default="ScalerHeartbeat", description="Type of message") 26 | sender = odm.Keyword(description="Sender of message") 27 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "editor.codeActionsOnSave": { 3 | "source.organizeImports": "explicit" 4 | }, 5 | "editor.formatOnSave": true, 6 | "editor.rulers": [ 7 | 120 8 | ], 9 | "editor.tabSize": 4, 10 | "editor.wordWrap": "wordWrapColumn", 11 | "editor.wordWrapColumn": 120, 12 | "files.insertFinalNewline": true, 13 | "files.trimFinalNewlines": true, 14 | "files.trimTrailingWhitespace": true, 15 | "isort.args": [ 16 | "-l", 17 | "120", 18 | "--profile=black", 19 | // "--src=${workspaceFolder}" 20 | ], 21 | "python.formatting.autopep8Args": [ 22 | "--max-line-length", 23 | "120", 24 | "--experimental" 25 | ], 26 | "python.formatting.provider": "autopep8", 27 | "python.formatting.blackArgs": [ 28 | "--line-length=120" 29 | ], 30 | "python.linting.enabled": true, 31 | "python.linting.flake8Enabled": true, 32 | "python.linting.flake8Args": [ 33 | "--max-line-length=120", 34 | //Added the ignore of E203 for now : https://github.com/PyCQA/pycodestyle/issues/373 35 | "--ignore=E203,W503" 36 | ], 37 | "python.linting.pylintEnabled": false, 38 | } 39 | -------------------------------------------------------------------------------- /test/test_cachestore.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from assemblyline.common import forge 4 | 5 | 6 | KEY = "test1" 7 | DATA = b"THIS IS WHAT I'LL SAVE INTO THE CACHE STORE..." 8 | COMPONENT = "test_component" 9 | 10 | 11 | @pytest.fixture(scope='module') 12 | def cachestore(datastore_connection): 13 | cachestore = forge.get_cachestore(COMPONENT, datastore=datastore_connection) 14 | cachestore.datastore.cached_file.delete_by_query("id:*") 15 | cachestore.save(KEY, DATA) 16 | cachestore.datastore.cached_file.commit() 17 | 18 | return cachestore 19 | 20 | 21 | def test_expiry_field(cachestore): 22 | assert cachestore.datastore.cached_file.search("expiry_ts:*", as_obj=False)['total'] == 1 23 | 24 | 25 | def test_db_cache_entry(cachestore): 26 | key = f"{cachestore.component}_{KEY}" 27 | assert cachestore.datastore.cached_file.get(key, as_obj=False)['component'] == COMPONENT 28 | 29 | 30 | def test_cache_data(cachestore): 31 | assert cachestore.get(KEY) == DATA 32 | 33 | 34 | def test_cache_cleanup(cachestore): 35 | cachestore.delete(KEY) 36 | cachestore.datastore.cached_file.commit() 37 | 38 | assert cachestore.get(KEY) is None 39 | assert cachestore.datastore.cached_file.get(KEY, as_obj=False) is None 40 | -------------------------------------------------------------------------------- /assemblyline/odm/messages/vacuum_heartbeat.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | 3 | MSG_TYPES = {"VacuumHeartbeat"} 4 | LOADER_CLASS = "assemblyline.odm.messages.vacuum_heartbeat.VacuumMessage" 5 | 6 | 7 | @odm.model(description="Vacuum Stats") 8 | class Metrics(odm.Model): 9 | ingested = odm.Integer(description="Files ingested") 10 | # protocol = odm.Mapping(odm.Integer()) 11 | safelist = odm.Integer(description="Files safelisted") 12 | errors = odm.Integer() 13 | skipped = odm.Integer() 14 | 15 | 16 | @odm.model(description="Heartbeat Model") 17 | class Heartbeat(odm.Model): 18 | # instances = odm.Integer(description="Number of instances") 19 | metrics = odm.Compound(Metrics, description="Vacuum metrics") 20 | # queues = odm.Compound(Metrics, description="Vacuum queues") 21 | 22 | 23 | @odm.model(description="Model of Vacuum Heartbeat Message") 24 | class VacuumMessage(odm.Model): 25 | msg = odm.Compound(Heartbeat, description="Hearbeat message") 26 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class for message") 27 | msg_type = odm.Enum(values=MSG_TYPES, default="VacuumHeartbeat", description="Type of message") 28 | sender = odm.Keyword(description="Sender of message") 29 | -------------------------------------------------------------------------------- /assemblyline/common/memory_zip.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import io 4 | import zipfile 5 | from typing import Union 6 | 7 | 8 | class InMemoryZip(object): 9 | def __init__(self): 10 | # Create the in-memory file-like object 11 | self.in_memory_zip = io.BytesIO() 12 | 13 | def append(self, filename_in_zip: str, file_contents: Union[str, bytes]) -> InMemoryZip: 14 | """ 15 | Appends a file with name filename_in_zip and contents of 16 | file_contents to the in-memory zip. 17 | """ 18 | 19 | # Get a handle to the in-memory zip in append mode 20 | zf = zipfile.ZipFile(self.in_memory_zip, "a", zipfile.ZIP_DEFLATED, False) 21 | 22 | # Write the file to the in-memory zip 23 | zf.writestr(filename_in_zip, file_contents) 24 | 25 | # Mark the files as having been created on Windows so that 26 | # Unix permissions are not inferred as 0000 27 | for zfile in zf.filelist: 28 | zfile.create_system = 0 29 | 30 | return self 31 | 32 | def read(self) -> bytes: 33 | """ 34 | Returns a string with the contents of the in-memory zip. 35 | """ 36 | 37 | self.in_memory_zip.seek(0) 38 | return self.in_memory_zip.read() 39 | -------------------------------------------------------------------------------- /assemblyline/datasource/common.py: -------------------------------------------------------------------------------- 1 | import re 2 | HASH_RE = r'^[0-9a-fA-F]{32,64}$' 3 | HASH_PATTERN = re.compile(HASH_RE) 4 | 5 | 6 | class DatasourceException(Exception): 7 | pass 8 | 9 | 10 | def hash_type(value): 11 | if HASH_PATTERN.match(value): 12 | return { 13 | 32: "md5", 40: "sha1", 64: "sha256" 14 | }.get(len(value), "invalid") 15 | else: 16 | return "invalid" 17 | 18 | 19 | # noinspection PyUnusedLocal 20 | class Datasource(object): 21 | @staticmethod 22 | def hash_type(value): 23 | return hash_type(value) 24 | 25 | # Subclasses should implement the following methods. 26 | def __init__(self, log, **kw): # pylint: disable=W0613 27 | self.log = log 28 | 29 | def parse(self, result, **kw): # pylint: disable=W0613 30 | pass 31 | 32 | def query(self, value, **kw): # pylint: disable=W0613 33 | pass 34 | 35 | 36 | # noinspection PyMethodMayBeStatic,PyUnusedLocal 37 | class Null(object): 38 | def __init__(self, e=None): 39 | self.e = e 40 | 41 | def parse(self, result, **kw): # pylint: disable=W0613 42 | return [] 43 | 44 | def query(self, value, **kw): # pylint: disable=W0613 45 | if self.e: 46 | raise self.e # pylint: disable=E0702 47 | 48 | return [] 49 | -------------------------------------------------------------------------------- /assemblyline/odm/messages/elastic_heartbeat.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | 3 | MSG_TYPES = {"ElasticHeartbeat"} 4 | LOADER_CLASS = "assemblyline.odm.messages.elastic_heartbeat.ElasticMessage" 5 | 6 | 7 | @odm.model(description="Information about an elasticsearch shard") 8 | class IndexData(odm.Model): 9 | name = odm.keyword() 10 | shard_size = odm.integer() 11 | 12 | 13 | @odm.model(description="Heartbeat Model for Elasticsearch") 14 | class Heartbeat(odm.Model): 15 | instances = odm.Integer(description="Number of Elasticsearch instances with assigned shards") 16 | unassigned_shards = odm.Integer(description="Number of unassigned shards in the cluster") 17 | request_time = odm.Float(description="Time to load shard metrics") 18 | shard_sizes = odm.sequence(odm.compound(IndexData), description="Information about each index") 19 | 20 | 21 | @odm.model(description="Model of Elasticsearch Heartbeat Message") 22 | class ElasticMessage(odm.Model): 23 | msg = odm.Compound(Heartbeat, description="Heartbeat message for elasticsearch") 24 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class for message") 25 | msg_type = odm.Enum(values=MSG_TYPES, default="ElasticHeartbeat", description="Type of message") 26 | sender = odm.Keyword(description="Sender of message") 27 | -------------------------------------------------------------------------------- /assemblyline/odm/models/user_favorites.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | from assemblyline.common import forge 3 | Classification = forge.get_classification() 4 | 5 | 6 | @odm.model(index=False, store=False, description="Abstract Model of Favorite") 7 | class Favorite(odm.Model): 8 | created_by = odm.Keyword(description="Who created the favorite") 9 | classification = odm.Classification(is_user_classification=True, copyto="__text__", 10 | default=Classification.UNRESTRICTED, 11 | description="Classification of the favorite") 12 | name = odm.Keyword(description="Name of the favorite") 13 | query = odm.Keyword(description="Query for the favorite") 14 | 15 | 16 | @odm.model(index=False, store=False, description="Model of User Favorites") 17 | class UserFavorites(odm.Model): 18 | alert = odm.List(odm.Compound(Favorite), default=[], description="Alert page favorites") 19 | error = odm.List(odm.Compound(Favorite), default=[], description="Error page favorites") 20 | search = odm.List(odm.Compound(Favorite), default=[], description="Search page favorites") 21 | signature = odm.List(odm.Compound(Favorite), default=[], description="Signature page favorites") 22 | submission = odm.List(odm.Compound(Favorite), default=[], description="Submission page favorites") 23 | -------------------------------------------------------------------------------- /docker/al_dev/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim-bookworm 2 | 3 | # Setup environment varibles 4 | ENV PYTHONPATH /opt/alv4/assemblyline-base:/opt/alv4/assemblyline-core:/opt/alv4/assemblyline-service-server:/opt/alv4/assemblyline-service-client:/opt/alv4/assemblyline_client:/opt/alv4/assemblyline-ui 5 | 6 | # Upgrade packages 7 | RUN apt-get update && apt-get -yy upgrade && rm -rf /var/lib/apt/lists/* 8 | 9 | # SSDEEP pkg requirments 10 | RUN apt-get update && apt-get install -yy build-essential libssl-dev libffi-dev libfuzzy-dev libldap2-dev libsasl2-dev libmagic1 zip 7zip && rm -rf /var/lib/apt/lists/* 11 | 12 | # Python packages requirements 13 | RUN pip install --no-warn-script-location --no-cache-dir \ 14 | assemblyline[test] \ 15 | assemblyline-core \ 16 | assemblyline-ui \ 17 | assemblyline-service-server \ 18 | debugpy \ 19 | && pip uninstall -y \ 20 | assemblyline \ 21 | assemblyline-core \ 22 | assemblyline-ui \ 23 | assemblyline-service-server \ 24 | && rm -rf ~/.cache/pip 25 | 26 | 27 | # Create Assemblyline source directory 28 | RUN mkdir -p /etc/assemblyline 29 | RUN mkdir -p /var/cache/assemblyline 30 | RUN mkdir -p /var/lib/assemblyline 31 | RUN mkdir -p /var/lib/assemblyline/flowjs 32 | RUN mkdir -p /var/lib/assemblyline/bundling 33 | RUN mkdir -p /var/log/assemblyline 34 | RUN mkdir -p /opt/alv4 35 | WORKDIR /opt/alv4 36 | 37 | CMD pip list 38 | -------------------------------------------------------------------------------- /test/classification.yml: -------------------------------------------------------------------------------- 1 | enforce: true 2 | groups: 3 | - aliases: [DEPTS, ANY] 4 | description: Users of department 1. 5 | name: DEPARTMENT 1 6 | short_name: D1 7 | solitary_display_name: ANY 8 | - aliases: [DEPTS] 9 | description: Users of department 2. 10 | name: DEPARTMENT 2 11 | short_name: D2 12 | levels: 13 | - aliases: [] 14 | css: {banner: alert-default, label: label-default, text: text-muted} 15 | description: No restrictions applied to data. 16 | lvl: 100 17 | name: UNRESTRICTED 18 | short_name: U 19 | - aliases: [CLASSIFIED, DO NOT LOOK] 20 | css: {banner: alert-info, label: label-primary, text: text-primary} 21 | description: Data restricted to a certain few... 22 | lvl: 200 23 | name: RESTRICTED 24 | short_name: R 25 | required: 26 | - aliases: [] 27 | description: Gotta be a super user to see this! 28 | name: SUPER USER 29 | require_lvl: 200 30 | short_name: SU 31 | - aliases: [GOD] 32 | description: Gotta be an administrator to see this! 33 | name: ADMIN 34 | short_name: ADM 35 | restricted: R//GOD//ANY 36 | subgroups: 37 | - aliases: [] 38 | description: Users of group 1 (which are part of deparment 1). 39 | limited_to_group: D1 40 | name: GROUP 1 41 | require_group: D1 42 | short_name: G1 43 | - aliases: [] 44 | description: Users of group 2 (can be part of any department). 45 | name: GROUP 2 46 | short_name: G2 47 | unrestricted: U -------------------------------------------------------------------------------- /LICENCE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Crown Copyright, Government of Canada (Canadian Centre for Cyber Security / Communications Security Establishment) 4 | 5 | Copyright title to all 3rd party software distributed with Assemblyline (AL) is held by the respective copyright holders as noted in those files. Users are asked to read the 3rd Party Licenses referenced with those assets. 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 8 | 9 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 12 | -------------------------------------------------------------------------------- /assemblyline/odm/models/heuristic.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | from assemblyline.common import forge 3 | from assemblyline.odm.models.statistics import Statistics 4 | 5 | Classification = forge.get_classification() 6 | 7 | 8 | @odm.model(index=True, store=True, description="Model of Service Heuristics") 9 | class Heuristic(odm.Model): 10 | attack_id = odm.List(odm.Keyword(copyto="__text__"), default=[], description="List of all associated ATT&CK IDs") 11 | classification = odm.Classification(default=Classification.UNRESTRICTED, 12 | description="Classification of the heuristic") 13 | description = odm.Text(copyto="__text__", description="Description of the heuristic") 14 | filetype = odm.Keyword(copyto="__text__", description="What type of files does this heuristic target?") 15 | heur_id = odm.Keyword(copyto="__text__", description="ID of the Heuristic") 16 | name = odm.Keyword(copyto="__text__", description="Name of the heuristic") 17 | score = odm.Integer(description="Default score of the heuristic") 18 | signature_score_map = odm.Mapping(odm.Integer(), default={}, 19 | description="Score of signatures for this heuristic") 20 | stats = odm.Compound(Statistics, default={}, description="Statistics related to the Heuristic") 21 | max_score = odm.Optional(odm.Integer(), description="Maximum score for heuristic") 22 | -------------------------------------------------------------------------------- /assemblyline/odm/messages/service_timing_heartbeat.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | from assemblyline.odm.messages import PerformanceTimer 3 | 4 | MSG_TYPES = {"ServiceTimingHeartbeat"} 5 | LOADER_CLASS = "assemblyline.odm.messages.service_heartbeat.ServiceTimingMessage" 6 | 7 | 8 | @odm.model(description="Timing Metrics") 9 | class Metrics(odm.Model): 10 | execution = PerformanceTimer(description="Excution time") 11 | execution_count = odm.Integer(description="Number of executes") 12 | idle = PerformanceTimer(description="Idle time") 13 | idle_count = odm.Integer(description="Number of idles") 14 | 15 | 16 | @odm.model(description="Hearbeat Model") 17 | class Heartbeat(odm.Model): 18 | instances = odm.Integer(description="Number of instances") 19 | metrics = odm.Compound(Metrics, description="Metrics") 20 | queue = odm.Integer(description="Queue size") 21 | service_name = odm.Keyword(description="Name of service") 22 | 23 | 24 | @odm.model(description="Model of Service Timing Heartbeat Message") 25 | class ServiceTimingMessage(odm.Model): 26 | msg = odm.Compound(Heartbeat, description="Heartbeat message") 27 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class for message") 28 | msg_type = odm.Enum(values=MSG_TYPES, default="ServiceTimingHeartbeat", description="Type of message") 29 | sender = odm.Keyword(description="Sender of message") 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # IDE files 10 | .pydevproject 11 | .python-version 12 | .idea 13 | */.mypy_cache/* 14 | 15 | # Distribution / packaging 16 | .Python 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | share/python-wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | MANIFEST 34 | VERSION 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Unit test / coverage reports 47 | htmlcov/ 48 | .tox/ 49 | .nox/ 50 | .coverage 51 | .coverage.* 52 | .cache 53 | nosetests.xml 54 | coverage.xml 55 | *.cover 56 | *.py,cover 57 | .hypothesis/ 58 | .pytest_cache/ 59 | cover/ 60 | 61 | # Translations 62 | *.mo 63 | *.pot 64 | 65 | # PyBuilder 66 | .pybuilder/ 67 | target/ 68 | 69 | # Jupyter Notebook 70 | .ipynb_checkpoints 71 | 72 | # IPython 73 | profile_default/ 74 | ipython_config.py 75 | 76 | # Environments 77 | .env 78 | .venv 79 | env/ 80 | venv/ 81 | ENV/ 82 | env.bak/ 83 | venv.bak/ 84 | 85 | # Cython debug symbols 86 | cython_debug/ 87 | assemblyline/common/frequency.c 88 | 89 | # MacOS 90 | .DS_Store 91 | -------------------------------------------------------------------------------- /dev/hauntedhouse/config/core.json: -------------------------------------------------------------------------------- 1 | { 2 | "authentication": { 3 | "static_keys": [ 4 | { 5 | "key": "insecure-worker-key", 6 | "roles": [ 7 | "Worker" 8 | ] 9 | }, 10 | { 11 | "key": "insecure-search-key", 12 | "roles": [ 13 | "Search" 14 | ] 15 | }, 16 | { 17 | "key": "insecure-ingest-key", 18 | "roles": [ 19 | "Ingest" 20 | ] 21 | } 22 | ] 23 | }, 24 | "database": { 25 | "SQLite": { 26 | "path": "/data/sqlite/staging.db" 27 | } 28 | }, 29 | "core": { 30 | "batch_limit_seconds": 60, 31 | "batch_limit_size": 500 32 | }, 33 | "cache": { 34 | "Directory": { 35 | "path": "/tmp/", 36 | "size": "100Gi" 37 | } 38 | }, 39 | "files": { 40 | "S3": { 41 | "access_key_id": "al_storage_key", 42 | "secret_access_key": "Ch@ngeTh!sPa33w0rd", 43 | "endpoint_url": "http;//minio:9000", 44 | "region_name": "local", 45 | "bucket": "al-storage", 46 | "no_tls_verify": true 47 | } 48 | }, 49 | "blobs": { 50 | "S3": { 51 | "access_key_id": "al_storage_key", 52 | "secret_access_key": "Ch@ngeTh!sPa33w0rd", 53 | "endpoint_url": "http;//minio:9000", 54 | "region_name": "local", 55 | "bucket": "retrohunt-storage", 56 | "no_tls_verify": true 57 | } 58 | }, 59 | "bind_address": "0.0.0.0:4443", 60 | "tls": null 61 | } 62 | -------------------------------------------------------------------------------- /assemblyline/odm/messages/dispatching.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | 3 | 4 | @odm.model(description="These are messages sent by dispatcher on the watch queue") 5 | class WatchQueueMessage(odm.Model): 6 | cache_key = odm.Optional(odm.Keyword(), description="Cache key") 7 | status = odm.Enum(values=['FAIL', 'OK', 'START', 'STOP'], description="Watch statuses") 8 | 9 | 10 | CREATE_WATCH = 'CREATE_WATCH' 11 | LIST_OUTSTANDING = 'LIST_OUTSTANDING' 12 | UPDATE_BAD_SID = 'UPDATE_BAD_SID' 13 | 14 | 15 | @odm.model(description="Create Watch Message") 16 | class CreateWatch(odm.Model): 17 | queue_name: str = odm.Keyword(description="Name of queue") 18 | submission: str = odm.Keyword(description="Submission ID") 19 | 20 | 21 | @odm.model(description="List Outstanding Message") 22 | class ListOutstanding(odm.Model): 23 | response_queue: str = odm.Keyword(description="Response queue") 24 | submission: str = odm.Keyword(description="Submission ID") 25 | 26 | 27 | MESSAGE_CLASSES = { 28 | CREATE_WATCH: CreateWatch, 29 | LIST_OUTSTANDING: ListOutstanding, 30 | UPDATE_BAD_SID: str 31 | } 32 | 33 | 34 | @odm.model(description="Model of Dispatcher Command Message") 35 | class DispatcherCommandMessage(odm.Model): 36 | kind: str = odm.Enum(values=list(MESSAGE_CLASSES.keys()), description="Kind of message") 37 | payload_data = odm.Any(description="Message payload") 38 | 39 | def payload(self): 40 | return MESSAGE_CLASSES[self.kind](self.payload_data) 41 | -------------------------------------------------------------------------------- /assemblyline/odm/models/apikey.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | from assemblyline.common import forge 3 | from assemblyline.common.str_utils import StringTable 4 | from assemblyline.odm.models.user import ACL_MAP, USER_ROLES 5 | 6 | 7 | APIKEY_ID_DELIMETER = "+" 8 | APIKEY_ID_FORMAT = "{}"+ APIKEY_ID_DELIMETER + "{}" 9 | FORBIDDEN_APIKEY_CHARACTERS = '[+@!#$%^&*()<>?/\|}{~:]' 10 | 11 | 12 | 13 | @odm.model(index=True, store=True, description="Model of Apikey") 14 | class Apikey(odm.Model): 15 | acl = odm.List(odm.Enum(values=ACL_MAP.keys()), description="Access Control List for the API key") 16 | password = odm.Keyword(description="BCrypt hash of the password for the apikey") 17 | roles = odm.List(odm.Enum(values=USER_ROLES), default=[], description="List of roles tied to the API key") 18 | uname = odm.Keyword(copyto="__text__", description="Username") 19 | key_name = odm.Keyword(copyto="__text__", description="Name of the key") 20 | creation_date = odm.Date(default="NOW", description="The date this API key is created.") 21 | expiry_ts = odm.Optional(odm.Date(), description="Expiry timestamp.") 22 | last_used =odm.Optional(odm.Date(), description="The last time this API key was used.") 23 | 24 | def get_apikey_id(keyname:str , uname:str): 25 | return APIKEY_ID_FORMAT.format(keyname, uname) 26 | 27 | def split_apikey_id(key_id: str): 28 | data = key_id.split(APIKEY_ID_DELIMETER) 29 | username = data[1] 30 | keyname = data[0] 31 | 32 | return keyname, username 33 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Assemblyline contributing guide 2 | 3 | This guide covers the basics of how to contribute to the Assemblyline project. 4 | 5 | Python code should follow the PEP8 guidelines defined here: [PEP8 Guidelines](https://www.python.org/dev/peps/pep-0008/). 6 | 7 | ## Tell us want you want to build/fix 8 | Before you start coding anything you should connect with the Assemblyline community via the [Assemblyline Discord server](https://discord.gg/GUAy9wErNu) and/or the [central Assemblyline GitHub project](https://github.com/CybercentreCanada/assemblyline/issues) to make sure no one else is working on the same thing and that whatever you are going to build still fits with the vision of the system. 9 | 10 | ## Git workflow 11 | 12 | - Clone the repo to your own account 13 | - Checkout and pull the latest commits from the master branch 14 | - Make a branch 15 | - Work in any way you like and make sure your changes actually work 16 | - When you're satisfied with your changes, create a pull requests to the main assemblyline repo 17 | 18 | #### Transfer your service repo 19 | If you've worked on a new service that you want to be included in the default service selection you'll have to transfer the repo into our control. 20 | 21 | #### You are not allowed to merge: 22 | 23 | Even if you try to merge in your pull request, you will be denied. Only a few people in our team are allowed to merge code into our repositories. 24 | 25 | We check for new pull requests every day and will merge them in once they have been approved by someone in our team. 26 | -------------------------------------------------------------------------------- /assemblyline/odm/messages/scaler_status_heartbeat.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | 3 | MSG_TYPES = {"ScalerStatusHeartbeat"} 4 | LOADER_CLASS = "assemblyline.odm.messages.scaler_status_heartbeat.ScalerStatusMessage" 5 | 6 | 7 | @odm.model(description="Service Status Model") 8 | class Status(odm.Model): 9 | running = odm.Integer(description="Number of instances running") 10 | target = odm.Integer(description="Target scaling for service") 11 | minimum = odm.Integer(description="Minimum number of instances") 12 | maximum = odm.Integer(description="Maximum number of instances") 13 | dynamic_maximum = odm.Integer(description="Dynamic maximum number of instances") 14 | queue = odm.Integer(description="Service queue") 15 | pressure = odm.Float(description="Service pressure") 16 | duty_cycle = odm.Float(description="Duty Cycle") 17 | 18 | 19 | @odm.model(description="Hearbeat Model") 20 | class Heartbeat(odm.Model): 21 | service_name = odm.Keyword(description="Name of service") 22 | metrics = odm.Compound(Status, description="Status of service") 23 | 24 | 25 | @odm.model(description="Model of Scaler's Status Heartbeat Message") 26 | class ScalerStatusMessage(odm.Model): 27 | msg = odm.Compound(Heartbeat, description="Heartbeat message") 28 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class for message") 29 | msg_type = odm.Enum(values=MSG_TYPES, default="ScalerStatusHeartbeat", description="Type of message") 30 | sender = odm.Keyword(description="Sender of message") 31 | -------------------------------------------------------------------------------- /dev/depends/config/filebeat.docker.yml: -------------------------------------------------------------------------------- 1 | filebeat.inputs: 2 | - type: container 3 | format: docker 4 | paths: 5 | - '/var/lib/docker/containers/*/*.log' 6 | stream: "all" 7 | json: 8 | keys_under_root: true 9 | message_key: message 10 | ignore_decoding_error: true 11 | processors: 12 | - rename: 13 | fields: 14 | - from: "error" 15 | to: "error.message" 16 | ignore_missing: true 17 | - script: 18 | lang: javascript 19 | id: log_level 20 | source: > 21 | function process(event) { 22 | var value = event.Get("log.level"); 23 | if (value === null){ 24 | value = "INFO" 25 | } 26 | else if (value.toLowerCase() == "warn"){ 27 | value = "WARNING" 28 | } 29 | else if (value.toLowerCase() == "err"){ 30 | value = "ERROR" 31 | } 32 | event.Put("log.level", value.toUpperCase()); 33 | } 34 | 35 | logging: 36 | level: warning 37 | json: true 38 | 39 | processors: 40 | - add_cloud_metadata: ~ 41 | - add_docker_metadata: ~ 42 | 43 | 44 | output.elasticsearch: 45 | hosts: 'elasticsearch:9200' 46 | username: elastic 47 | password: devpass 48 | 49 | setup.template.settings: 50 | index.number_of_shards: 1 51 | index.number_of_replicas: 0 52 | setup.ilm: 53 | enabled: true 54 | policy_file: /usr/share/filebeat/filebeat_policy.json 55 | -------------------------------------------------------------------------------- /assemblyline/common/hexdump.py: -------------------------------------------------------------------------------- 1 | import binascii 2 | 3 | from assemblyline.common.chunk import chunk 4 | 5 | FILTER = b''.join([bytes([x]) if x in range(32, 127) else b'.' for x in range(256)]) 6 | 7 | 8 | def dump(binary: bytes, size: int = 2, sep: bytes = b" ") -> bytes: 9 | hexstr = binascii.hexlify(binary) 10 | return sep.join(chunk(hexstr, size)) 11 | 12 | 13 | def load(hexstr: bytes) -> bytes: 14 | return binascii.unhexlify(hexstr) 15 | 16 | 17 | def hexdump(binary: bytes, length: int = 16, indent: str = "", indent_size: int = 0, newline: str = '\n', 18 | prefix_offset: int = 0) -> str: 19 | """ 20 | Create a string buffer that shows the given data in hexdump format. 21 | 22 | src -> source buffer 23 | length = 16 -> number of bytes per line 24 | indent = "" -> indentation before each lines 25 | indent_size = 0 -> number of time to repeat that indentation 26 | newline = "\n" -> chars used as newline char 27 | 28 | Example of output: 29 | 00000000: 48 54 54 50 2F 31 2E 31 20 34 30 34 20 4E 6F 74 HTTP/1.1 404 Not 30 | 00000010: 20 46 6F 75 6E 64 0D 0A 43 6F 6E 74 Found..Cont 31 | ... 32 | """ 33 | generator = chunk(binary, length) 34 | line_frmt = "%%s%%08X: %%-%ss %%s" % ((length * 3) - 1) 35 | 36 | out = [line_frmt % (indent * indent_size, prefix_offset + (addr * length), dump(d).decode(), 37 | d.translate(FILTER).decode()) 38 | for addr, d in enumerate(generator)] 39 | return newline.join(out) 40 | -------------------------------------------------------------------------------- /assemblyline/odm/messages/retrohunt_heartbeat.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | 3 | MSG_TYPES = {"RetrohuntHeartbeat"} 4 | LOADER_CLASS = "assemblyline.odm.messages.retrohunt_heartbeat.RetrohuntMessage" 5 | 6 | 7 | @odm.model(description="Heartbeat Model for retrohunt") 8 | class Heartbeat(odm.Model): 9 | instances = odm.Integer(description="Number of retrohunt workers") 10 | request_time = odm.Optional(odm.Float(description="Time to load metrics")) 11 | pending_files = odm.integer(description="Files not yet available for searching") 12 | ingested_last_minute = odm.integer(description="Files ingested in last minute") 13 | worker_storage_available = odm.integer(description="Free storage for most depleted worker") 14 | total_storage_available = odm.integer(description="Free storage across workers") 15 | active_searches = odm.integer(description="Number of currently running searches") 16 | last_minute_cpu = odm.Float(description="Last minute cpu load across all workers") 17 | total_memory_used = odm.Float(description="Estimated current memory use across all workers") 18 | 19 | 20 | @odm.model(description="Model of retrohunt heartbeat message") 21 | class RetrohuntMessage(odm.Model): 22 | msg = odm.Compound(Heartbeat, description="Heartbeat message for retrohunt") 23 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class for message") 24 | msg_type = odm.Enum(values=MSG_TYPES, default="RetrohuntHeartbeat", description="Type of message") 25 | sender = odm.Keyword(description="Sender of message") 26 | -------------------------------------------------------------------------------- /dev/depends/config/metricbeat.docker.yml: -------------------------------------------------------------------------------- 1 | logging: 2 | level: warning 3 | json: true 4 | 5 | metricbeat.modules: 6 | - module: system 7 | metricsets: 8 | - cpu 9 | - load 10 | - memory 11 | - network 12 | - process 13 | - process_summary 14 | - uptime 15 | - socket_summary 16 | - diskio 17 | - fsstat 18 | - socket 19 | enabled: true 20 | period: 10s 21 | processes: ['.*'] 22 | 23 | # Configure the metric types that are included by these metricsets. 24 | cpu.metrics: ["percentages"] # The other available options are normalized_percentages and ticks. 25 | core.metrics: ["percentages"] 26 | - module: redis 27 | metricsets: 28 | - "info" 29 | - "keyspace" 30 | period: 10s 31 | hosts: ["redis:6379"] 32 | - module: docker 33 | metricsets: 34 | - "container" 35 | - "cpu" 36 | - "diskio" 37 | - "event" 38 | - "healthcheck" 39 | - "info" 40 | - "memory" 41 | - "network" 42 | hosts: ["unix:///var/run/docker.sock"] 43 | period: 10s 44 | enabled: true 45 | processors: 46 | - add_docker_metadata: ~ 47 | 48 | output.console: 49 | enabled: false 50 | 51 | output.elasticsearch: 52 | hosts: 'elasticsearch:9200' 53 | username: elastic 54 | password: devpass 55 | 56 | processors: 57 | - add_cloud_metadata: ~ 58 | 59 | setup.template.settings: 60 | index.number_of_shards: 1 61 | index.number_of_replicas: 0 62 | setup.ilm: 63 | enabled: true 64 | policy_file: /usr/share/metricbeat/metricbeat_policy.json -------------------------------------------------------------------------------- /assemblyline/remote/datatypes/lock.py: -------------------------------------------------------------------------------- 1 | 2 | from assemblyline.common.uid import get_random_id 3 | from assemblyline.remote.datatypes import get_client, retry_call 4 | 5 | lock_acquire_script = """ 6 | local lock_holder = ARGV[1] 7 | local uuid = ARGV[2] 8 | local timeout = ARGV[3] 9 | if redis.call('setnx', lock_holder, uuid) == 1 then 10 | redis.call('expire', lock_holder, timeout) 11 | return true 12 | end 13 | return false 14 | """ 15 | 16 | lock_release_script = """ 17 | local lock_holder = ARGV[1] 18 | local lock_release = ARGV[2] 19 | local uuid = ARGV[3] 20 | if redis.call('get', lock_holder) == uuid then 21 | redis.call('del', lock_holder) 22 | redis.call('rpush', lock_release, uuid) 23 | redis.call('expire', lock_release, 1) 24 | end 25 | """ 26 | 27 | 28 | class Lock(object): 29 | def __init__(self, name, timeout, host=None, port=None): 30 | self.uuid = get_random_id() 31 | self.c = get_client(host, port, False) 32 | self.lock_release = '-'.join(('lock', str(timeout), name, 'released')) 33 | self.lock_holder = '-'.join(('lock', str(timeout), name, 'holder')) 34 | self.timeout = timeout 35 | self._acquire = self.c.register_script(lock_acquire_script) 36 | self._release = self.c.register_script(lock_release_script) 37 | 38 | def __enter__(self): 39 | while not retry_call(self._acquire, args=[self.lock_holder, self.uuid, self.timeout]): 40 | retry_call(self.c.blpop, self.lock_release, 1) 41 | 42 | def __exit__(self, unused1, unused2, unused3): 43 | retry_call(self._release, args=[self.lock_holder, self.lock_release, self.uuid]) 44 | -------------------------------------------------------------------------------- /assemblyline/odm/messages/alerter_heartbeat.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | 3 | MSG_TYPES = {"AlerterHeartbeat"} 4 | LOADER_CLASS = "assemblyline.odm.messages.alerter_heartbeat.AlerterMessage" 5 | 6 | 7 | @odm.model(description="Alerter Queues") 8 | class Queues(odm.Model): 9 | alert = odm.Integer(description="Number of alerts in queue") 10 | alert_retry = odm.Integer(description="Number of alerts in retry queue") 11 | 12 | 13 | @odm.model(description="Alerter Metrics") 14 | class Metrics(odm.Model): 15 | created = odm.Integer(description="Number of alerts created") 16 | error = odm.Integer(description="Number of alerts with errors") 17 | received = odm.Integer(description="Number of alerts received") 18 | updated = odm.Integer(description="Number of alerts updated") 19 | wait = odm.Integer(description="Number of alerts waiting for submission to complete") 20 | 21 | 22 | @odm.model(description="Heartbeat Model for Alerter") 23 | class Heartbeat(odm.Model): 24 | instances = odm.Integer(description="Number of Alerter instances") 25 | metrics = odm.Compound(Metrics, description="Alert metrics") 26 | queues = odm.Compound(Queues, description="Alert queues") 27 | 28 | 29 | @odm.model(description="Model of Alerter Heartbeat Message") 30 | class AlerterMessage(odm.Model): 31 | msg = odm.Compound(Heartbeat, description="Heartbeat message from Alerter") 32 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class for message") 33 | msg_type = odm.Enum(values=MSG_TYPES, default="AlerterHeartbeat", description="Type of message") 34 | sender = odm.Keyword(description="Sender of message") 35 | -------------------------------------------------------------------------------- /assemblyline/odm/messages/archive_heartbeat.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | 3 | MSG_TYPES = {"ArchiveHeartbeat"} 4 | LOADER_CLASS = "assemblyline.odm.messages.archive_heartbeat.ArchiveMessage" 5 | 6 | 7 | @odm.model(description="Archive Metrics") 8 | class Metrics(odm.Model): 9 | # Indices metrics 10 | file = odm.Integer(description="Number of files archived") 11 | result = odm.Integer(description="Number of results archived") 12 | submission = odm.Integer(description="Number of submissions archived") 13 | # Messaging metrics 14 | received = odm.Integer(description="Number of received archive messages") 15 | exception = odm.Integer(description="Number of exceptions during archiving") 16 | invalid = odm.Integer(description="Number of invalid archive type errors during archiving") 17 | not_found = odm.Integer(description="Number of submission not found failures during archiving") 18 | 19 | 20 | @odm.model(description="Archive Heartbeat Model") 21 | class Heartbeat(odm.Model): 22 | instances = odm.Integer(description="Number of instances") 23 | metrics = odm.Compound(Metrics, description="Archive metrics") 24 | queued = odm.Integer(description="Number of documents to be archived") 25 | 26 | 27 | @odm.model(description="Model for Archive Heartbeat Messages") 28 | class ArchiveMessage(odm.Model): 29 | msg = odm.Compound(Heartbeat, description="Heartbeat message") 30 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class for message") 31 | msg_type = odm.Enum(values=MSG_TYPES, default="ArchiveHeartbeat", description="Message type") 32 | sender = odm.Keyword(description="Sender of message") 33 | -------------------------------------------------------------------------------- /assemblyline/datasource/alert.py: -------------------------------------------------------------------------------- 1 | from assemblyline.common import forge 2 | from assemblyline.datasource.common import Datasource 3 | 4 | Classification = forge.get_classification() 5 | 6 | 7 | class Alert(Datasource): 8 | def __init__(self, log, **kw): 9 | super(Alert, self).__init__(log, **kw) 10 | self.datastore = forge.get_datastore() 11 | 12 | def parse(self, results, **kw): 13 | return results 14 | 15 | def query(self, value, **kw): 16 | hash_type = self.hash_type(value) 17 | 18 | query = "file.%s:%s OR file.%s:%s" % ( 19 | hash_type, value.lower(), hash_type, value.upper() 20 | ) 21 | 22 | res = self.datastore.alert.search(query, rows=5, sort="al.score desc", 23 | access_control=kw['access_control'], as_obj=False) 24 | 25 | count = res['total'] 26 | if count <= 0: 27 | return [] 28 | 29 | data = [] 30 | item = { 31 | "confirmed": False, 32 | "data": data, 33 | "description": "Alerted on %s times" % str(count), 34 | "malicious": False, 35 | } 36 | 37 | for r in res['items']: 38 | score = r['al']['score'] 39 | if score >= 500: 40 | item['malicious'] = True 41 | if score >= 2000 or score <= -100: 42 | item['confirmed'] = True 43 | 44 | data.append({ 45 | "classification": r['classification'], 46 | "date": r['reporting_ts'], 47 | "id": r['id'], 48 | "score": r['al']['score'], 49 | }) 50 | 51 | return [item] 52 | -------------------------------------------------------------------------------- /assemblyline/odm/models/ontology/file.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | from assemblyline.odm.models.ontology.filetypes import PE 3 | 4 | 5 | @odm.model(description="File Characteristics") 6 | class File(odm.Model): 7 | # Common information 8 | md5 = odm.MD5(description="MD5 of file") 9 | sha1 = odm.SHA1(description="SHA1 of file") 10 | sha256 = odm.SHA256(description="SHA256 of file") 11 | type = odm.Optional(odm.Keyword(description="Type of file as identified by Assemblyline")) 12 | size = odm.Integer(description="Size of the file in bytes") 13 | names = odm.Optional(odm.List(odm.Text()), description="Known filenames associated to file") 14 | parent = odm.Optional(odm.SHA256(), description="Absolute parent of file relative to submission") 15 | 16 | # Specialized information (List from Tagging.File) 17 | # apk = odm.Optional(odm.Compound(APK), description="APK File Properties") 18 | # jar = odm.Optional(odm.Compound(JAR), description="JAR File Properties") 19 | # img = odm.Optional(odm.Compound(IMG), description="Image File Properties") 20 | # ole = odm.Optional(odm.Compound(OLE), description="OLE File Properties") 21 | pe = odm.Optional(odm.Compound(PE), description="Properties related to PE") 22 | # pdf = odm.Optional(odm.Compound(PDF), description="PDF File Properties") 23 | # plist = odm.Optional(odm.Compound(PList), description="PList File Properties") 24 | # powershell = odm.Optional(odm.Compound(PowerShell), description="PowerShell File Properties") 25 | # shortcut = odm.Optional(odm.Compound(Shortcut), description="Shortcut File Properties") 26 | # swf = odm.Optional(odm.Compound(SWF), description="SWF File Properties") 27 | -------------------------------------------------------------------------------- /assemblyline/common/path.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | import os 3 | import string 4 | import sys 5 | from typing import Optional 6 | 7 | 8 | def modulepath(modulename: str) -> str: 9 | m = sys.modules[modulename] 10 | f = getattr(m, '__file__', None) 11 | if not f: 12 | return os.path.abspath(os.getcwd()) 13 | return os.path.dirname(os.path.abspath(f)) 14 | 15 | 16 | def splitpath(path: str, sep: Optional[str] = None) -> list: 17 | """ Split the path into a list of items """ 18 | return list(filter(len, path.split(sep or os.path.sep))) 19 | 20 | 21 | def strip_path_inclusion(path: str, base: str) -> str: 22 | path = path.replace("\\", os.path.sep).replace("/", os.path.sep) 23 | return path if os.path.abspath(os.path.join(base, path)).startswith(base) else os.path.basename(path) 24 | 25 | 26 | ASCII_NUMBERS = list(range(48, 58)) 27 | ASCII_UPPER_CASE_LETTERS = list(range(65, 91)) 28 | ASCII_LOWER_CASE_LETTERS = list(range(97, 123)) 29 | ASCII_OTHER = [45, 46, 92] # "-", ".", and "\" 30 | 31 | # Create a set that contains all of the valid characters that 32 | # are allowed to appear in a Unified Naming Convention (UNC) path. 33 | VALID_UNC_CHARS = [chr(x) for x in ASCII_LOWER_CASE_LETTERS + 34 | ASCII_UPPER_CASE_LETTERS + ASCII_NUMBERS + ASCII_OTHER] 35 | 36 | 37 | def is_unc_legal(path: str) -> bool: 38 | """Determine whether or not a given string representing a Windows file path is legal 39 | or not as per the Unified Naming Convention (UNC) specifications.""" 40 | if len(path) <= 0: 41 | return False 42 | 43 | for char in path: 44 | if char not in VALID_UNC_CHARS: 45 | return False 46 | return True 47 | -------------------------------------------------------------------------------- /assemblyline/common/signaturing.py: -------------------------------------------------------------------------------- 1 | # TODO: Are we still using this? 2 | 3 | import re 4 | 5 | 6 | _operators = { 7 | 'in': lambda args: lambda x: x in args, 8 | 'not in': lambda args: lambda x: x not in args, 9 | 'regexp': lambda args: re.compile(*args).match, 10 | } 11 | 12 | 13 | def _transform(condition): 14 | if isinstance(condition, str): 15 | args = [condition] 16 | func = 'regexp' 17 | else: 18 | args = list(condition[1:]) 19 | func = condition[0] 20 | 21 | return _operators[func](args) 22 | 23 | 24 | # noinspection PyBroadException 25 | def _call(cache, data, func, key): 26 | try: 27 | value = cache.get(key, None) 28 | if not value: 29 | cache[key] = value = data.get(key) 30 | if not callable(func): 31 | func = _transform(func) 32 | return {key: value} if func(value) else {} 33 | except Exception: # pylint: disable=W0702 34 | return {} 35 | 36 | 37 | def _match(cache, data, sig): 38 | summary = {} 39 | results = [ 40 | _call(cache, data, f, k) for k, f in sig['conditions'].iteritems() 41 | ] 42 | if all(results): 43 | [summary.update(r) for r in results] 44 | return summary 45 | 46 | 47 | def _matches(data, sigs): 48 | cache = {} 49 | unknown = 0 50 | for sig in sigs: 51 | result = _match(cache, data, sig) 52 | if result: 53 | name = sig.get('name', None) 54 | if not name: 55 | unknown += 1 56 | name = "unknown%d" % unknown 57 | yield name, result 58 | return 59 | 60 | 61 | def drop(whitelist, data): 62 | return next(_matches(data, whitelist), ("", {})) 63 | -------------------------------------------------------------------------------- /dev/depends/docker-compose-minimal.yml: -------------------------------------------------------------------------------- 1 | version: "2.4" 2 | 3 | services: 4 | # Dependancies 5 | minio: 6 | image: minio/minio 7 | environment: 8 | MINIO_ROOT_USER: al_storage_key 9 | MINIO_ROOT_PASSWORD: Ch@ngeTh!sPa33w0rd 10 | ports: 11 | - "9000:9000" 12 | command: server /data 13 | 14 | elasticsearch: 15 | image: docker.elastic.co/elasticsearch/elasticsearch:8.10.2 16 | environment: 17 | - xpack.security.enabled=true 18 | - discovery.type=single-node 19 | - logger.level=WARN 20 | - "ELASTIC_PASSWORD=devpass" 21 | - "ES_JAVA_OPTS=-Xms1024m -Xmx1024m" 22 | - "cluster.routing.allocation.disk.watermark.low=10gb" 23 | - "cluster.routing.allocation.disk.watermark.high=5gb" 24 | - "cluster.routing.allocation.disk.watermark.flood_stage=1gb" 25 | ports: 26 | - "9200:9200" 27 | healthcheck: 28 | test: 29 | [ 30 | "CMD-SHELL", 31 | "curl --silent --fail -u elastic:$$ELASTIC_PASSWORD localhost:9200/_cluster/health || exit 1", 32 | ] 33 | interval: 30s 34 | timeout: 30s 35 | retries: 3 36 | 37 | redis: 38 | image: redis 39 | ports: 40 | - "6379:6379" 41 | - "6380:6379" 42 | healthcheck: 43 | test: ["CMD", "redis-cli", "ping"] 44 | interval: 30s 45 | timeout: 10s 46 | retries: 3 47 | 48 | nginx: 49 | image: cccs/nginx-ssl-frontend:mui5 50 | ports: 51 | - "80:80" 52 | - "443:443" 53 | environment: 54 | - FRONTEND_HOST=172.17.0.1 55 | - UI_HOST=172.17.0.1 56 | - SOCKET_HOST=172.17.0.1 57 | - FQDN=localhost 58 | - MAX_BODY_SIZE=100M 59 | - TEMPLATE=minimal 60 | 61 | networks: 62 | default: 63 | name: external 64 | -------------------------------------------------------------------------------- /assemblyline/common/lucene.lark: -------------------------------------------------------------------------------- 1 | %import common.ESCAPED_STRING 2 | %import common.CNAME 3 | %import common.DIGIT 4 | %import common.LETTER 5 | %ignore WHITESPACE 6 | 7 | WHITESPACE: " " 8 | 9 | start: expression 10 | 11 | expression: or_expr 12 | 13 | or_expr: and_expr ("OR" and_expr)* 14 | and_expr: not_expr ("AND" not_expr)* 15 | not_expr: NOT_OPERATOR? atom 16 | 17 | NOT_OPERATOR: "NOT" 18 | 19 | atom: field 20 | | term 21 | | "(" expression ")" 22 | 23 | term: PREFIX_OPERATOR? (phrase_term | SIMPLE_TERM) 24 | field_term: PREFIX_OPERATOR? (phrase_term | SIMPLE_TERM) 25 | 26 | PREFIX_OPERATOR: "-" | "+" | ">=" | "<=" | ">" | "<" 27 | 28 | SIMPLE_TERM: ("\\+" | "\\-" | "\\&" | "\\&&" | "\\|" | "\\||" | "\\!" | "\\(" | "\\)" | "\\{" 29 | | "\\}" | "\\[" | "\\]" | "\\^" | "\\\"" | "\\~" | "\\*" | "\\ " 30 | | "\\?" | "\\:" | "\\\\" | "*" | "?" | DIGIT | "_" | "-" | LETTER)+ 31 | 32 | phrase_term: ESCAPED_STRING 33 | 34 | field: FIELD_LABEL ":" field_value 35 | 36 | FIELD_LABEL: CNAME ["." CNAME]* 37 | 38 | field_value: range 39 | | field_term 40 | | REGEX_TERM 41 | | "(" field_expression ")" 42 | 43 | REGEX_TERM: /\/([^\/]|(\\\/))*\// 44 | 45 | range: RANGE_START first_range_term "TO" second_range_term RANGE_END 46 | RANGE_START: "[" | "{" 47 | RANGE_END: "]" | "}" 48 | 49 | 50 | field_expression: field_or_expr 51 | field_or_expr: field_and_expr ("OR" field_and_expr)* 52 | field_and_expr: field_not_expr ("AND" field_not_expr)* 53 | field_not_expr: NOT_OPERATOR? field_atom 54 | field_atom: field_term 55 | | "(" field_expression ")" 56 | 57 | first_range_term: RANGE_WILD | QUOTED_RANGE | FIRST_RANGE 58 | second_range_term: RANGE_WILD | QUOTED_RANGE | SECOND_RANGE 59 | QUOTED_RANGE: ESCAPED_STRING 60 | FIRST_RANGE: /[^ ]+/ 61 | SECOND_RANGE: /[^\]\}]+/ 62 | RANGE_WILD: "*" -------------------------------------------------------------------------------- /test/key.pem: -------------------------------------------------------------------------------- 1 | -----BEGIN PRIVATE KEY----- 2 | MIICdwIBADANBgkqhkiG9w0BAQEFAASCAmEwggJdAgEAAoGBANALE6wPhlNJ+aLt 3 | AX4HL43lI16CDs/9MOlX2a8/4Bp+cXv7sdPPJ6MTbqvrhpnA14cVgHl1LRu2OGxm 4 | etV7MwRQbJZ5NtVMLjKrU9wcYod7B9ZKUel4Jgbjk0CtO2txYbi9gNOkWaxwcmNF 5 | BfKIsaGOQkB5vzK7mnf7dO9ALYE/AgMBAAECgYA9rrsTbbru4OUCGHEz05+W25RE 6 | Bh2sLy6cUK67Fh403L56+yI7YZUn9a//iyJqXdHJPGfOGx7Xs4xBH5VVzGRQXo7i 7 | t6HOsB/oDwOTt5JKImJ+0JY6cn2MhWbsNY+oPJppe7CRoUKURHZY61+WDi8zT1mR 8 | Qrfo3jDgg6cX3zZwcQJBAP/wy8S2LN24okziCfssyF3WHb1Pkvc0/ITQle2+gQTZ 9 | YyF1H+2xGJOF3/wi19sE2bQuXigg0Ou+lyR1z3cFnRcCQQDQF2+AjB2mFrPqZ9Md 10 | qnP4GUrKT574CsHy5G0OniHSFrauKRCBjEwm4RXRm9lfs/RWA81/s7RTFWCJUq9m 11 | hmYZAkEAtK2PnAGjMK7b3Hyh4TAfDqdN/UvEi0FbloMNpHUc7YhtQ7xEWu7vU41p 12 | rrwGN/Z3nYwyKg/ojNPSLQoB+Jr85wJAZjPcc8pdlYF5BBvSOLPLGYNylELe1PyT 13 | nXRLi+5mtgSp3IgWr0n07POH/9cHwFVmIAjmGV5tppDNRSTzOOuxoQJBAKBKAMJm 14 | a64VkrqR1xkm9PYeUbNV8X28USnsPkw4I2shHHmwMwj+Vyo10IC0XtDto7ZrVAM9 15 | v5XYnKwRopUnj9c= 16 | -----END PRIVATE KEY----- 17 | -----BEGIN CERTIFICATE----- 18 | MIICWjCCAcOgAwIBAgIUQJONlWz9w+fbJgb/CmPv7Mj5wT0wDQYJKoZIhvcNAQEL 19 | BQAwPzELMAkGA1UEBhMCQ0ExEDAOBgNVBAgMB09udGFyaW8xDzANBgNVBAcMBk90 20 | dGF3YTENMAsGA1UECgwEQ0NDUzAeFw0yMTA3MjkxNzU1MzBaFw0zMTA3MjcxNzU1 21 | MzBaMD8xCzAJBgNVBAYTAkNBMRAwDgYDVQQIDAdPbnRhcmlvMQ8wDQYDVQQHDAZP 22 | dHRhd2ExDTALBgNVBAoMBENDQ1MwgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGB 23 | ANALE6wPhlNJ+aLtAX4HL43lI16CDs/9MOlX2a8/4Bp+cXv7sdPPJ6MTbqvrhpnA 24 | 14cVgHl1LRu2OGxmetV7MwRQbJZ5NtVMLjKrU9wcYod7B9ZKUel4Jgbjk0CtO2tx 25 | Ybi9gNOkWaxwcmNFBfKIsaGOQkB5vzK7mnf7dO9ALYE/AgMBAAGjUzBRMB0GA1Ud 26 | DgQWBBTpHO34t3bWXUt0+eR9M/7KiGnEnzAfBgNVHSMEGDAWgBTpHO34t3bWXUt0 27 | +eR9M/7KiGnEnzAPBgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4GBAChY 28 | fK7P81aqFQeWimgKD1AE/uzVToHCEcOryUl7VrQkHjToFyzeuXcUF/+n4pjyss8r 29 | mLmZolYrwuQ95UpEsNc0j/uVODFPxztjQYwi25UZS4YUSCxgufulanuaWIm4TdEs 30 | Mxt9/sQFrE0FZ6xivB27BiKEqmP+Q8g7yeZYOS4w 31 | -----END CERTIFICATE----- 32 | -------------------------------------------------------------------------------- /test/test_path.py: -------------------------------------------------------------------------------- 1 | from assemblyline.common import path 2 | 3 | 4 | def test_strip_path_injection_linux(): 5 | test_str = 'filename' 6 | assert path.strip_path_inclusion(test_str, "/home/al-user") == 'filename' 7 | 8 | test_str = 'foldername/filename' 9 | assert path.strip_path_inclusion(test_str, "/home/al-user") == 'foldername/filename' 10 | 11 | test_str = '.filename' 12 | assert path.strip_path_inclusion(test_str, "/home/al-user") == '.filename' 13 | 14 | test_str = '.foldername/filename' 15 | assert path.strip_path_inclusion(test_str, "/home/al-user") == '.foldername/filename' 16 | 17 | test_str = './foldername/filename' 18 | assert path.strip_path_inclusion(test_str, "/home/al-user") == './foldername/filename' 19 | 20 | test_str = '/foldername/filename' 21 | assert path.strip_path_inclusion(test_str, "/home/al-user") == 'filename' 22 | 23 | test_str = '../foldername/filename' 24 | assert path.strip_path_inclusion(test_str, "/home/al-user") == 'filename' 25 | 26 | test_str = '../../../../foldername/filename' 27 | assert path.strip_path_inclusion(test_str, "/home/al-user") == 'filename' 28 | 29 | test_str = '.././//./..//../../../foldername/filename' 30 | assert path.strip_path_inclusion(test_str, "/home/al-user") == 'filename' 31 | 32 | test_str = '////./..//../../../foldername/filename' 33 | assert path.strip_path_inclusion(test_str, "/home/al-user") == 'filename' 34 | 35 | test_str = 'realfolder/../../../foldername/filename' 36 | assert path.strip_path_inclusion(test_str, "/home/al-user") == 'filename' 37 | 38 | test_str = '..foldername/..filename' 39 | assert path.strip_path_inclusion(test_str, "/home/al-user") == '..foldername/..filename' 40 | 41 | test_str = '.././//./..//../../../foldername/../../././//../filename' 42 | assert path.strip_path_inclusion(test_str, "/home/al-user") == 'filename' 43 | -------------------------------------------------------------------------------- /assemblyline/odm/messages/expiry_heartbeat.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | 3 | MSG_TYPES = {"ExpiryHeartbeat"} 4 | LOADER_CLASS = "assemblyline.odm.messages.expiry_heartbeat.ExpiryMessage" 5 | 6 | 7 | @odm.model(description="Expiry Stats") 8 | class Metrics(odm.Model): 9 | alert = odm.Integer(description="Number of alerts") 10 | badlist = odm.Integer(description="Number of badlisted items") 11 | cached_file = odm.Integer(description="Number of cached files") 12 | emptyresult = odm.Integer(description="Number of empty results") 13 | error = odm.Integer(description="Number of errors") 14 | file = odm.Integer(description="Number of files") 15 | filescore = odm.Integer(description="Number of filscores") 16 | result = odm.Integer(description="Number of results") 17 | retrohunt_hit = odm.Integer(description="Number of retrohunt hits") 18 | safelist = odm.Integer(description="Number of safelisted items") 19 | submission = odm.Integer(description="Number of submissions") 20 | submission_tree = odm.Integer(description="Number of submission trees") 21 | submission_summary = odm.Integer(description="Number of submission summaries") 22 | 23 | 24 | @odm.model(description="Heartbeat Model") 25 | class Heartbeat(odm.Model): 26 | instances = odm.Integer(description="Number of instances") 27 | metrics = odm.Compound(Metrics, description="Expiry metrics") 28 | queues = odm.Compound(Metrics, description="Expiry queues") 29 | 30 | 31 | @odm.model(description="Model of Expiry Heartbeat Message") 32 | class ExpiryMessage(odm.Model): 33 | msg = odm.Compound(Heartbeat, description="Hearbeat message") 34 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class for message") 35 | msg_type = odm.Enum(values=MSG_TYPES, default="ExpiryHeartbeat", description="Type of message") 36 | sender = odm.Keyword(description="Sender of message") 37 | -------------------------------------------------------------------------------- /docker/local_dev.Dockerfile: -------------------------------------------------------------------------------- 1 | # NOTE: to build this container you must be in a directory where assemblyline-base, assemblyline-ui, 2 | # assemblyline-core, assemblyline-service-server and assemblyline-service-client code is checked out 3 | FROM python:3.11-slim-bookworm 4 | 5 | # Upgrade packages 6 | RUN apt-get update && apt-get -yy upgrade && rm -rf /var/lib/apt/lists/* 7 | 8 | # SSDEEP pkg requirments 9 | RUN apt-get update -yy \ 10 | && apt-get install -yy build-essential libffi-dev libfuzzy-dev libldap2-dev libsasl2-dev libmagic1 libssl-dev \ 11 | && rm -rf /var/lib/apt/lists/* 12 | 13 | # Create Assemblyline source directory 14 | RUN mkdir -p /etc/assemblyline 15 | RUN mkdir -p /var/cache/assemblyline 16 | RUN mkdir -p /var/lib/assemblyline 17 | RUN mkdir -p /var/lib/assemblyline/flowjs 18 | RUN mkdir -p /var/lib/assemblyline/bundling 19 | RUN mkdir -p /var/log/assemblyline 20 | RUN mkdir -p /opt/alv4 21 | WORKDIR /opt/alv4 22 | 23 | # Setup environment varibles 24 | ENV PYTHONPATH /opt/alv4/assemblyline-base:/opt/alv4/assemblyline-core:/opt/alv4/assemblyline-service-server:/opt/alv4/assemblyline-service-client:/opt/alv4/assemblyline_client:/opt/alv4/assemblyline-ui 25 | 26 | RUN pip install --upgrade pip 27 | RUN pip install debugpy 28 | 29 | COPY assemblyline-base assemblyline-base 30 | RUN pip install --no-warn-script-location -e ./assemblyline-base[test] 31 | 32 | COPY assemblyline-core assemblyline-core 33 | RUN pip install --no-warn-script-location -e ./assemblyline-core[test] 34 | 35 | COPY assemblyline-ui assemblyline-ui 36 | RUN pip install --no-warn-script-location -e ./assemblyline-ui[test,socketio] 37 | 38 | COPY assemblyline_client assemblyline_client 39 | RUN pip install --no-warn-script-location -e ./assemblyline_client[test] 40 | 41 | RUN pip uninstall -y assemblyline 42 | RUN pip uninstall -y assemblyline_core 43 | RUN pip uninstall -y assemblyline_ui 44 | RUN pip uninstall -y assemblyline_client 45 | -------------------------------------------------------------------------------- /assemblyline/odm/models/signature.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | from assemblyline.common import forge 3 | from assemblyline.odm.models.statistics import Statistics 4 | 5 | Classification = forge.get_classification() 6 | 7 | DEPLOYED_STATUSES = ['DEPLOYED', 'NOISY', 'DISABLED'] 8 | DRAFT_STATUSES = ['STAGING', 'TESTING'] 9 | STALE_STATUSES = ['INVALID'] 10 | 11 | RULE_STATUSES = DEPLOYED_STATUSES + DRAFT_STATUSES + STALE_STATUSES 12 | 13 | 14 | @odm.model(index=True, store=True) 15 | class Signature(odm.Model): 16 | classification = odm.Classification(store=True, default=Classification.UNRESTRICTED, description="Security classification assigned to the signature based on its contents and context.") 17 | data = odm.Text(copyto="__text__", store=False) 18 | last_modified = odm.Date(default="NOW", description="Notes the last modification timestamp of the signature.") 19 | name = odm.Keyword(copyto="__text__", description="Name of the signature.") 20 | order = odm.Integer(default=1, store=False, deprecation="no longer used in v4") 21 | revision = odm.Keyword(default="1", description="") 22 | signature_id = odm.Optional(odm.Keyword(), description="ID associated with the signature.") 23 | source = odm.Keyword(description="Source or author of the signature.") 24 | state_change_date = odm.Optional(odm.Date(store=False), description="Date the signature's state was last changed.") 25 | state_change_user = odm.Optional(odm.Keyword(store=False), description="User who last changed the signature's state.") 26 | stats = odm.Compound(Statistics, default={}, description="Stats associated with count, average, min, max, and sum of various signature metrics.") 27 | status = odm.Enum(values=RULE_STATUSES, copyto="__text__", description="The current state of the signature (i.e. NOISY, DISABLED, DEPLOYED, etc.).") 28 | type = odm.Keyword(copyto="__text__", description="The service type that the signature is associated with.") 29 | 30 | -------------------------------------------------------------------------------- /assemblyline/odm/messages/service_heartbeat.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | 3 | MSG_TYPES = {"ServiceHeartbeat"} 4 | LOADER_CLASS = "assemblyline.odm.messages.service_heartbeat.ServiceMessage" 5 | 6 | 7 | @odm.model(description="Service Metrics") 8 | class Metrics(odm.Model): 9 | cache_hit = odm.Integer(description="Number of cache hits") 10 | cache_miss = odm.Integer(description="Number of cache misses") 11 | cache_skipped = odm.Integer(description="Number of cache skips") 12 | execute = odm.Integer(description="Number of service executes") 13 | fail_recoverable = odm.Integer(description="Number of recoverable fails") 14 | fail_nonrecoverable = odm.Integer(description="Number of non-recoverable fails") 15 | scored = odm.Integer(description="Number of tasks scored") 16 | not_scored = odm.Integer(description="Number of tasks not scored") 17 | 18 | 19 | @odm.model(description="Service Activity") 20 | class Activity(odm.Model): 21 | busy = odm.Integer(description="Number of busy instances") 22 | idle = odm.Integer(description="Number of idle instances") 23 | 24 | 25 | @odm.model(description="Heartbeat Model") 26 | class Heartbeat(odm.Model): 27 | activity = odm.Compound(Activity, description="Service activity") 28 | instances = odm.Integer(description="Service instances") 29 | metrics = odm.Compound(Metrics, description="Service metrics") 30 | queue = odm.Integer(description="Service queue") 31 | service_name = odm.Keyword(description="Service name") 32 | 33 | 34 | @odm.model(description="Model of Service Heartbeat Message") 35 | class ServiceMessage(odm.Model): 36 | msg = odm.Compound(Heartbeat, description="Heartbeat message") 37 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class for message") 38 | msg_type = odm.Enum(values=MSG_TYPES, default="ServiceHeartbeat", description="Type of message") 39 | sender = odm.Keyword(description="Sender of message") 40 | -------------------------------------------------------------------------------- /dev/core/docker-compose-sca-upd.yml: -------------------------------------------------------------------------------- 1 | version: "2.4" 2 | 3 | services: 4 | al_scaler: 5 | image: cccs/assemblyline_dev:4.6.1 6 | env_file: 7 | - .env 8 | environment: 9 | DOCKER_CONFIGURATION_PATH: /mount/service_config/ 10 | DOCKER_CONFIGURATION_VOLUME: service_config 11 | AL_CORE_NETWORK: external 12 | volumes: 13 | - type: volume 14 | source: service_config 15 | target: /mount/service_config/ 16 | read_only: false 17 | - ${PATH_REWRITE:-.}/config/:/etc/assemblyline/ 18 | - ${ROOT_REWRITE:-../../..}/:/opt/alv4/ 19 | - /var/run/docker.sock:/var/run/docker.sock # NOTE, this container has access to docker socket (this is like root) 20 | command: python3 /opt/alv4/assemblyline-core/assemblyline_core/scaler/run_scaler.py 21 | healthcheck: 22 | test: 23 | [ 24 | "CMD", 25 | "bash", 26 | "-c", 27 | "if [[ ! `find /tmp/heartbeat -newermt '-30 seconds'` ]]; then false; fi", 28 | ] 29 | 30 | al_updater: 31 | image: cccs/assemblyline_dev:4.6.1 32 | env_file: 33 | - .env 34 | environment: 35 | AL_CORE_NETWORK: external 36 | CONTAINER_CHECK_INTERVAL: 5 37 | UPDATE_CHECK_INTERVAL: 5 38 | volumes: 39 | - ${PATH_REWRITE:-.}/config/:/etc/assemblyline/ 40 | - ${ROOT_REWRITE:-../../..}/:/opt/alv4/ 41 | - /var/run/docker.sock:/var/run/docker.sock # NOTE, this container has access to docker socket (this is like root) 42 | command: python3 /opt/alv4/assemblyline-core/assemblyline_core/updater/run_updater.py 43 | healthcheck: 44 | test: 45 | [ 46 | "CMD", 47 | "bash", 48 | "-c", 49 | "if [[ ! `find /tmp/heartbeat -newermt '-30 seconds'` ]]; then false; fi", 50 | ] 51 | 52 | networks: 53 | default: 54 | external: true 55 | name: external 56 | 57 | volumes: 58 | service_config: 59 | name: service_config 60 | -------------------------------------------------------------------------------- /assemblyline/odm/models/ontology/results/antivirus.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | from assemblyline.odm.models.ontology.results.process import ObjectID 3 | from assemblyline.common.dict_utils import get_dict_fingerprint_hash 4 | 5 | OID_PARTS = ['engine_name', 'virus_name'] 6 | TAG_PARTS = ['engine_name', 'virus_name'] 7 | 8 | 9 | @odm.model(description="Antivirus Ontology Model") 10 | class Antivirus(odm.Model): 11 | objectid = odm.Compound(ObjectID, description="The object ID of the antivirus object") 12 | engine_name = odm.Keyword(description="Name of antivirus engine") 13 | engine_version = odm.Optional(odm.Keyword(), description="Version of antivirus engine") 14 | engine_definition_version = odm.Optional(odm.Keyword(), description="Version of definition set") 15 | virus_name = odm.Optional(odm.Keyword(), description="The name of the virus") 16 | # What category does the verdict fall under? 17 | category = odm.Optional(odm.Enum(['type-unsupported', 18 | 'undetected', 19 | 'failure', 20 | 'suspicious', 21 | 'malicious']), 22 | description="What category does the verdict fall under?
") 28 | 29 | def get_oid(data: dict): 30 | return f"antivirus_{get_dict_fingerprint_hash({key: data.get(key) for key in OID_PARTS})}" 31 | 32 | def get_tag(data: dict): 33 | return ".".join([data.get(key) for key in TAG_PARTS if data.get(key)]) 34 | -------------------------------------------------------------------------------- /assemblyline/odm/models/workflow.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | from assemblyline.common import forge 3 | 4 | Classification = forge.get_classification() 5 | 6 | 7 | PRIORITIES = {None, "LOW", "MEDIUM", "HIGH", "CRITICAL"} 8 | STATUSES = {None, "MALICIOUS", "NON-MALICIOUS", "ASSESS", "TRIAGE"} 9 | 10 | 11 | @odm.model(index=True, store=True, description="Model of Workflow") 12 | class Workflow(odm.Model): 13 | classification = odm.Classification(copyto="__text__", default=Classification.UNRESTRICTED, 14 | description="Classification of the workflow") 15 | creation_date = odm.Date(default="NOW", description="Creation date of the workflow") 16 | creator = odm.Keyword(description="UID of the creator of the workflow") 17 | edited_by = odm.Keyword(description="UID of the last user to edit the workflow") 18 | enabled = odm.Boolean(default=True, description="Is this workflow enabled?") 19 | first_seen = odm.Optional(odm.Date(), description="Date of first hit on workflow") 20 | hit_count = odm.Integer(default=0, description="Number of times there was a workflow hit") 21 | labels = odm.List(odm.Keyword(), copyto="__text__", default=[], description="Labels applied by the workflow") 22 | last_edit = odm.Date(default="NOW", description="Date of last edit on workflow") 23 | last_seen = odm.Optional(odm.Date(), description="Date of last hit on workflow") 24 | name = odm.Keyword(copyto="__text__", description="Name of the workflow") 25 | origin = odm.Optional(odm.Keyword(), description="Which did this originate from?") 26 | priority = odm.Optional(odm.Enum(copyto="__text__", values=PRIORITIES), 27 | description="Priority applied by the workflow") 28 | query = odm.Keyword(description="Query that the workflow runs") 29 | status = odm.Optional(odm.Enum(copyto="__text__", values=STATUSES), description="Status applied by the workflow") 30 | workflow_id = odm.Optional(odm.UUID(), description="ID of the workflow") 31 | -------------------------------------------------------------------------------- /dev/core/config/certs/tls.crt: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIFazCCA1OgAwIBAgIUTxaWQFirgQeVWS/UZzROzKunuz4wDQYJKoZIhvcNAQEL 3 | BQAwRTELMAkGA1UEBhMCQVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM 4 | GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDAeFw0yMzA0MjAyMDA1MDNaFw0yNDA0 5 | MTkyMDA1MDNaMEUxCzAJBgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEw 6 | HwYDVQQKDBhJbnRlcm5ldCBXaWRnaXRzIFB0eSBMdGQwggIiMA0GCSqGSIb3DQEB 7 | AQUAA4ICDwAwggIKAoICAQC4tiGvsQH33LePuPWr7lq1Km6/GFF1bdPQsgKAwdTe 8 | YHxTRu0yqMhaNTQbJp53nw/KPqfquraoTfS7wUC4miQwweIYtv+IhWIXSMTPtFui 9 | HIbl43ibtl1+y93ARqSvkuRIUdToVYsshH9HbcFzFECPWtOQS3hifocDzfc53lkI 10 | jRFtdQ9++O812RXuX/i3fLKfJ1WbEQUJMbRyPny0wmWG6x4s4d3tjY4PQ/hGJ8DZ 11 | cKvJROWnztHRzcSpEoGWskPxjxZ2MATDFLC8pvpgFDS4ZgKiOUvSqEq6npFJK3g6 12 | bRlG6S3tZp/hzhDXVFVBsqKv//FuBjBVB4MyFQgWIsCcfTeb0NF6FVZpAVgGdysF 13 | enLW83pHxbgcFYe24i3aRybU0gjwj/qm6I0RGeMM9jc/lAttskIi0Qf84ZGqFwiP 14 | 7Gp+dtftiAz5TzdGuYtVGTt6IRzjyuOo+8UqVGbcOAJ5t4foym2mwVshm2mOhx2d 15 | Gc/f+Bj/Qfmtrzo1uM8wrgcHzI1CjwrMi7m5eC5VehpxZeJPgIgrNW9Y7P0E2Yb+ 16 | qAGDy/Nz7V97rN6u7waszI4DnUS1TTvTMniOlQNgXeGAPhuYcDnvUDngxbnG9eUh 17 | sult2peYX1Be5V1jnykGBi0XG0NU0JekOr/CtGjmCh9o2dt04b7Uy4g4oZGREsxz 18 | bwIDAQABo1MwUTAdBgNVHQ4EFgQUBHuYSPOJSxY2s/bl18Hald2+zE4wHwYDVR0j 19 | BBgwFoAUBHuYSPOJSxY2s/bl18Hald2+zE4wDwYDVR0TAQH/BAUwAwEB/zANBgkq 20 | hkiG9w0BAQsFAAOCAgEAUrzPFpJhjMpzeCCiqiMOTqQkFC07N3Oj6EbqWeoK6NgM 21 | cqtzNvkedkQ+DV18/CTGjgS8bNHUJIA0mWWHa8aezuZANM1wiYmKHwDzYckzPZB0 22 | WIj63BVIzXvGVMANk/Wbnfuyvnkd84FzaIPa1T0tmIJkBikX93IMPPhwUFJ509wU 23 | HceZq8QJbX6KSrDxNQ+bmLMhDM7OyV8CE5VYO63DDCccN1++g7FESN1ZzgfydbUe 24 | 82Up6iGpvaf2xbaHSn3pNbwZmalQqN0sE+9FwgGlWqOZN80qU0VwdmJA1kevxwcB 25 | Y8Uh6j9KNTPRg9Gl5dCg7P0fWsUzoa7DUtpFQO8qm0h5KMacxfZ1J6ySf5viFLWz 26 | ze6J1aFgM8LDNrSQGWIuULLFIACiv3Ct5DgkCISWiBjFG/em2dPQpG+MNSJg7NI+ 27 | 5R9uQPc2GU7vd9geg+SnNj74Hj7KRQdP+2iJsFbGvYL56ZoUWeaqAth/qtkZA5WX 28 | Q9YdabSvztarj0ZIua9MFwYmnJBb1gwNglTuGJKXxGh1j5FH4zMqIG7rppM8Vj67 29 | VYx8J4sVcjgBd2Q+Qz0n4sI1iaSmqdTSd9g0/craTpFDyp/zNqyy5yMgYT0WRBE9 30 | n2Z4EjFPkGOrcOTac0PV0akQXhgqDMti1t27wUJOtcXLSwPKpU3UJ2FfCPoqZgI= 31 | -----END CERTIFICATE----- 32 | -------------------------------------------------------------------------------- /assemblyline/remote/datatypes/queues/comms.py: -------------------------------------------------------------------------------- 1 | import json 2 | import redis 3 | 4 | from assemblyline.remote.datatypes import get_client, retry_call, log, decode 5 | 6 | 7 | class CommsQueue(object): 8 | def __init__(self, names, host=None, port=None, private=False): 9 | self.c = get_client(host, port, private) 10 | self.p = retry_call(self.c.pubsub) 11 | if not isinstance(names, list): 12 | names = [names] 13 | self.names = names 14 | self._connected = False 15 | 16 | def __enter__(self): 17 | return self 18 | 19 | def __exit__(self, exc_type, exc_val, exc_tb): 20 | retry_call(self.p.unsubscribe) 21 | 22 | def _connect(self): 23 | if not self._connected: 24 | retry_call(self.p.subscribe, self.names) 25 | self._connected = True 26 | 27 | def close(self): 28 | retry_call(self.p.close) 29 | 30 | def listen(self, blocking=True): 31 | retried = False 32 | while True: 33 | self._connect() 34 | try: 35 | if blocking: 36 | i = self.p.listen() 37 | v = next(i) 38 | else: 39 | v = self.p.get_message() 40 | if v is None: 41 | yield None 42 | continue 43 | 44 | if isinstance(v, dict) and v.get('type', None) == 'message': 45 | data = decode(v.get('data', 'null')) 46 | yield data 47 | except redis.ConnectionError: 48 | log.warning('No connection to Redis, reconnecting...') 49 | self._connected = False 50 | retried = True 51 | finally: 52 | if self._connected and retried: 53 | log.info('Reconnected to Redis!') 54 | retried = False 55 | 56 | def publish(self, message): 57 | for name in self.names: 58 | retry_call(self.c.publish, name, json.dumps(message)) 59 | -------------------------------------------------------------------------------- /test/conftest.py: -------------------------------------------------------------------------------- 1 | """ 2 | Pytest configuration file, setup global pytest fixtures and functions here. 3 | """ 4 | import os 5 | 6 | from assemblyline.common import forge 7 | from assemblyline.datastore.helper import AssemblylineDatastore 8 | from assemblyline.datastore.store import ESStore, ESCollection 9 | from redis.exceptions import ConnectionError 10 | 11 | import pytest 12 | original_skip = pytest.skip 13 | 14 | # Check if we are in an unattended build environment where skips won't be noticed 15 | IN_CI_ENVIRONMENT = any(indicator in os.environ for indicator in 16 | ['CI', 'BITBUCKET_BUILD_NUMBER', 'AGENT_JOBSTATUS']) 17 | 18 | 19 | def skip_or_fail(message): 20 | """Skip or fail the current test, based on the environment""" 21 | if IN_CI_ENVIRONMENT: 22 | pytest.fail(message) 23 | else: 24 | original_skip(message) 25 | 26 | 27 | # Replace the built in skip function with our own 28 | pytest.skip = skip_or_fail 29 | 30 | 31 | @pytest.fixture(scope='session') 32 | def config(): 33 | return forge.get_config() 34 | 35 | 36 | @pytest.fixture(scope='module') 37 | def filestore(config): 38 | try: 39 | return forge.get_filestore(config, connection_attempts=1) 40 | except ConnectionError as err: 41 | pytest.skip(str(err)) 42 | 43 | 44 | @pytest.fixture(scope='module') 45 | def datastore_connection(config): 46 | ESCollection.MAX_RETRY_BACKOFF = 0.5 47 | store = ESStore(config.datastore.hosts) 48 | ret_val = store.ping() 49 | if not ret_val: 50 | pytest.skip("Could not connect to datastore") 51 | 52 | return AssemblylineDatastore(store) 53 | 54 | 55 | @pytest.fixture(scope='session') 56 | def redis_connection(): 57 | from assemblyline.remote.datatypes import get_client 58 | c = get_client(None, None, False) 59 | try: 60 | ret_val = c.ping() 61 | if ret_val: 62 | return c 63 | except ConnectionError: 64 | pass 65 | 66 | return pytest.skip("Connection to the Redis server failed. This test cannot be performed...") 67 | -------------------------------------------------------------------------------- /external/generate_tlds.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | 4 | 5 | def get_tlds(url): 6 | comments = [] 7 | tlds = [] 8 | 9 | response = requests.get(url) 10 | for line in response.text.splitlines(): 11 | if not line: 12 | continue 13 | if line.startswith('#'): 14 | comments.append(line) 15 | else: 16 | tlds.append(line) 17 | 18 | return comments, tlds 19 | 20 | 21 | def get_special_tlds(url): 22 | response = requests.get(url) 23 | # Ignore first line from CSV and return list of domains without the period suffix 24 | return [line.split(',', 1)[0][:-1].upper() for line in response.text.splitlines()[1:]] 25 | 26 | 27 | if __name__ == "__main__": 28 | tlds_url = 'https://data.iana.org/TLD/tlds-alpha-by-domain.txt' 29 | tlds_location = "../assemblyline/common/net_static.py" 30 | if not os.path.exists(tlds_location): 31 | print("Could not find net_static.py file. Make sure you run this script " 32 | "in its home directory otherwise this won't work.") 33 | exit(1) 34 | 35 | comments, tlds = get_tlds(tlds_url) 36 | comments_lines = '\n'.join(comments) 37 | tlds_lines = '",\n "'.join(tlds) 38 | 39 | special_tlds_url = "https://www.iana.org/assignments/special-use-domain-names/special-use-domain.csv" 40 | special_tlds = get_special_tlds(special_tlds_url) 41 | special_tlds_lines = '",\n "'.join(special_tlds) 42 | 43 | with open(tlds_location, "w") as tlds_fh: 44 | tlds_fh.write("# This file is generated using generate_tlds.py script\n" 45 | "# DO NOT EDIT! Re-run the script instead...\n\n" 46 | f"# Top level domains from: {tlds_url}\n" 47 | f"{comments_lines}\n" 48 | f"TLDS_ALPHA_BY_DOMAIN = {{\n \"{tlds_lines}\"\n}}\n\n" 49 | f"# Special-use TLDs from: {special_tlds_url}\n" 50 | f"TLDS_SPECIAL_BY_DOMAIN = {{\n \"{special_tlds_lines}\"\n}}") 51 | 52 | print(f"TLDS list file written into: {tlds_location}") 53 | print("You can now commit the new net_static.py file to your git.") 54 | -------------------------------------------------------------------------------- /assemblyline/common/exceptions.py: -------------------------------------------------------------------------------- 1 | from inspect import getmembers, isfunction 2 | from sys import exc_info 3 | from traceback import format_tb 4 | 5 | 6 | class ChainException(Exception): 7 | def __init__(self, message, cause=None): 8 | Exception.__init__(self, message) 9 | self.cause = cause 10 | 11 | 12 | class NonRecoverableError(ChainException): 13 | pass 14 | 15 | 16 | class RecoverableError(ChainException): 17 | pass 18 | 19 | 20 | class ConfigException(Exception): 21 | pass 22 | 23 | 24 | class Chain(object): 25 | """ 26 | This class can be used as a decorator to override the type of exceptions returned by a function 27 | """ 28 | 29 | def __init__(self, exception): 30 | self.exception = exception 31 | 32 | def __call__(self, original): 33 | def wrapper(*args, **kwargs): 34 | try: 35 | return original(*args, **kwargs) 36 | except Exception as e: 37 | wrapped = self.exception(str(e), e) 38 | raise wrapped.with_traceback(exc_info()[2]) 39 | 40 | wrapper.__name__ = original.__name__ 41 | wrapper.__doc__ = original.__doc__ 42 | wrapper.__dict__.update(original.__dict__) 43 | 44 | return wrapper 45 | 46 | def execute(self, func, *args, **kwargs): 47 | try: 48 | return func(*args, **kwargs) 49 | except Exception as e: 50 | wrapped = self.exception(str(e), e) 51 | raise wrapped.with_traceback(exc_info()[2]) 52 | 53 | 54 | class ChainAll: 55 | """ 56 | This class can be used as a decorator to override the type of exceptions returned by every method of a class 57 | """ 58 | 59 | def __init__(self, exception): 60 | self.exception = Chain(exception) 61 | 62 | def __call__(self, cls): 63 | """We can use an instance of this class as a decorator.""" 64 | for method in getmembers(cls, predicate=isfunction): 65 | setattr(cls, method[0], self.exception(method[1])) 66 | 67 | return cls 68 | 69 | 70 | def get_stacktrace_info(ex: Exception) -> str: 71 | return ''.join(format_tb(exc_info()[2]) + [': '.join((ex.__class__.__name__, str(ex)))]) 72 | -------------------------------------------------------------------------------- /assemblyline/odm/models/error.py: -------------------------------------------------------------------------------- 1 | 2 | from assemblyline import odm 3 | from assemblyline.common.caching import generate_conf_key 4 | 5 | STATUSES = {"FAIL_NONRECOVERABLE", "FAIL_RECOVERABLE"} 6 | ERROR_TYPES = { 7 | "UNKNOWN": 0, 8 | "EXCEPTION": 1, 9 | "MAX DEPTH REACHED": 10, 10 | "MAX FILES REACHED": 11, 11 | "MAX RETRY REACHED": 12, 12 | "SERVICE BUSY": 20, 13 | "SERVICE DOWN": 21, 14 | "TASK PRE-EMPTED": 30 15 | } 16 | 17 | 18 | @odm.model(index=True, store=True, description="Error Response from a Service") 19 | class Response(odm.Model): 20 | message = odm.Text(copyto="__text__", description="Error message") 21 | service_debug_info = odm.Optional(odm.Keyword(), description="Information about where the service was processed") 22 | service_name = odm.Keyword(copyto="__text__", description="Service Name") 23 | service_tool_version = odm.Optional(odm.Keyword(copyto="__text__"), description="Service Tool Version") 24 | service_version = odm.Keyword(description="Service Version") 25 | status = odm.Enum(values=STATUSES, description="Status of error produced by service") 26 | 27 | 28 | @odm.model(index=True, store=True, description="Error Model used by Error Viewer") 29 | class Error(odm.Model): 30 | archive_ts = odm.Optional(odm.Date(description="Time at which the error was archived")) 31 | created = odm.Date(default="NOW", description="Error creation timestamp") 32 | expiry_ts = odm.Optional(odm.Date(store=False), description="Expiry timestamp") 33 | response: Response = odm.Compound(Response, description="Response from the service") 34 | sha256 = odm.SHA256(copyto="__text__", description="SHA256 of file related to service error") 35 | type = odm.Enum(values=list(ERROR_TYPES.keys()), default="EXCEPTION", description="Type of error") 36 | 37 | def build_key(self, service_tool_version=None, task=None): 38 | key_list = [ 39 | self.sha256, 40 | self.response.service_name.replace('.', '_'), 41 | f"v{self.response.service_version.replace('.', '_')}", 42 | f"c{generate_conf_key(service_tool_version=service_tool_version, task=task)}", 43 | f"e{ERROR_TYPES.get(self.type, 0)}"] 44 | 45 | return '.'.join(key_list) 46 | -------------------------------------------------------------------------------- /test/test_isotime.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from assemblyline.common.isotime import ( 4 | LOCAL_FMT_WITH_MS, 5 | ensure_time_format, 6 | epoch_to_iso, 7 | epoch_to_local, 8 | epoch_to_local_with_ms, 9 | iso_to_epoch, 10 | local_to_epoch, 11 | local_to_local_with_ms, 12 | local_with_ms_to_epoch, 13 | now, 14 | now_as_iso, 15 | now_as_local, 16 | ) 17 | 18 | 19 | def test_isotime_iso(): 20 | iso_date = now_as_iso() 21 | iso_format = re.compile(r'[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}Z') 22 | 23 | assert isinstance(iso_date, str) 24 | assert iso_format.match(iso_date) 25 | assert epoch_to_iso(iso_to_epoch(iso_date)) == iso_date 26 | assert iso_date == epoch_to_iso(local_with_ms_to_epoch(epoch_to_local_with_ms(local_to_epoch(epoch_to_local(iso_to_epoch(iso_date)))))) 27 | 28 | 29 | def test_isotime_local(): 30 | local_date = now_as_local() 31 | local_format = re.compile(r'[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}.*') 32 | 33 | assert isinstance(local_date, str) 34 | assert local_format.match(local_date) 35 | assert epoch_to_local(local_to_epoch(local_date)) == local_date 36 | assert epoch_to_local_with_ms(local_with_ms_to_epoch(local_date)) == local_date 37 | assert local_date == epoch_to_local(iso_to_epoch(epoch_to_iso(local_to_epoch(local_date)))) 38 | 39 | 40 | def test_isotime_epoch(): 41 | epoch_date = now(200) 42 | assert epoch_date == local_to_epoch(epoch_to_local(epoch_date)) 43 | assert epoch_date == local_with_ms_to_epoch(epoch_to_local_with_ms(epoch_date)) 44 | assert epoch_date == iso_to_epoch(epoch_to_iso(epoch_date)) 45 | 46 | assert isinstance(epoch_date, float) 47 | 48 | 49 | def test_isotime_rounding_error(): 50 | for t in ["2020-01-29 18:41:25.758416", "2020-01-29 18:41:25.127600"]: 51 | epoch = local_to_epoch(t) 52 | local = epoch_to_local(epoch) 53 | assert local == t 54 | 55 | def test_local_to_local_with_ms(): 56 | local_date = now_as_local() 57 | assert local_to_local_with_ms(local_date) == local_date[:-3] 58 | 59 | def test_ensure_time_format(): 60 | local_date = now_as_local() 61 | assert ensure_time_format(local_date, LOCAL_FMT_WITH_MS) 62 | -------------------------------------------------------------------------------- /assemblyline/odm/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | import typing 3 | 4 | from assemblyline.odm.base import * 5 | 6 | # Imports that have the same effect as some part of the one above so that 7 | # type checking can use this file properly. 8 | from assemblyline.odm.base import Keyword, Optional, Boolean, Integer, List, Compound, Mapping, \ 9 | Date, Long, Enum, Wildcard 10 | from datetime import datetime 11 | 12 | _InnerType = typing.TypeVar("_InnerType") 13 | 14 | """ 15 | Helper functions to invoke ODM types without requiring type annotations. 16 | 17 | These can be used like the type objects they wrap, but will provide better hints to type checking tools. 18 | """ 19 | 20 | 21 | def description(text): 22 | def _fn(obj): 23 | obj.description = text 24 | return _fn 25 | 26 | 27 | def keyword(*args, **kwargs) -> str: 28 | return typing.cast(str, Keyword(*args, **kwargs)) 29 | 30 | 31 | def wildcard(*args, **kwargs) -> str: 32 | return typing.cast(str, Wildcard(*args, **kwargs)) 33 | 34 | 35 | def date(*args, **kwargs) -> datetime: 36 | return typing.cast(datetime, Date(*args, **kwargs)) 37 | 38 | 39 | def optional(child_type: _InnerType, **kwargs) -> typing.Optional[_InnerType]: 40 | return typing.cast(typing.Optional[_InnerType], Optional(child_type, **kwargs)) 41 | 42 | 43 | def boolean(*args, **kwargs) -> bool: 44 | return typing.cast(bool, Boolean(*args, **kwargs)) 45 | 46 | 47 | def integer(*args, **kwargs) -> int: 48 | return typing.cast(int, Integer(*args, **kwargs)) 49 | 50 | 51 | def long(*args, **kwargs) -> int: 52 | return typing.cast(int, Long(*args, **kwargs)) 53 | 54 | 55 | def sequence(child_type: _InnerType, **kwargs) -> list[_InnerType]: 56 | return typing.cast(list[_InnerType], List(child_type, **kwargs)) 57 | 58 | 59 | def mapping(child_type: _InnerType, **kwargs) -> dict[str, _InnerType]: 60 | return typing.cast(dict[str, _InnerType], Mapping(child_type, **kwargs)) 61 | 62 | 63 | def compound(child_type: typing.Callable[..., _InnerType], **kwargs) -> _InnerType: 64 | return typing.cast(_InnerType, Compound(child_type, **kwargs)) 65 | 66 | 67 | def enum(values: typing.Iterable[str], **kwargs) -> str: 68 | return typing.cast(str, Enum(values, **kwargs)) 69 | -------------------------------------------------------------------------------- /assemblyline/remote/datatypes/counters.py: -------------------------------------------------------------------------------- 1 | 2 | from redis.exceptions import ConnectionError 3 | 4 | from assemblyline.remote.datatypes import get_client, retry_call, now_as_iso 5 | from assemblyline.remote.datatypes.hash import Hash 6 | 7 | 8 | class Counters(object): 9 | def __init__(self, prefix="counter", host=None, port=None, track_counters=False): 10 | self.c = get_client(host, port, False) 11 | self.prefix = prefix 12 | if track_counters: 13 | self.tracker = Hash("c-tracker-%s" % prefix, host=host, port=port) 14 | else: 15 | self.tracker = None 16 | 17 | def __enter__(self): 18 | return self 19 | 20 | def __exit__(self, exc_type, exc_val, exc_tb): 21 | self.delete() 22 | 23 | def inc(self, name, value=1, track_id=None): 24 | if self.tracker: 25 | self.tracker.add(track_id or name, now_as_iso()) 26 | return retry_call(self.c.incr, "%s-%s" % (self.prefix, name), value) 27 | 28 | def dec(self, name, value=1, track_id=None): 29 | if self.tracker: 30 | self.tracker.pop(str(track_id or name)) 31 | return retry_call(self.c.decr, "%s-%s" % (self.prefix, name), value) 32 | 33 | def get_queues_sizes(self): 34 | out = {} 35 | for queue in retry_call(self.c.keys, "%s-*" % self.prefix): 36 | queue_size = int(retry_call(self.c.get, queue)) 37 | out[queue] = queue_size 38 | 39 | return {k.decode('utf-8'): v for k, v in out.items()} 40 | 41 | def get_queues(self): 42 | return [k.decode('utf-8') for k in retry_call(self.c.keys, "%s-*" % self.prefix)] 43 | 44 | def ready(self): 45 | try: 46 | self.c.ping() 47 | except ConnectionError: 48 | return False 49 | 50 | return True 51 | 52 | def reset_queues(self): 53 | if self.tracker: 54 | self.tracker.delete() 55 | for queue in retry_call(self.c.keys, "%s-*" % self.prefix): 56 | retry_call(self.c.set, queue, "0") 57 | 58 | def delete(self): 59 | if self.tracker: 60 | self.tracker.delete() 61 | for queue in retry_call(self.c.keys, "%s-*" % self.prefix): 62 | retry_call(self.c.delete, queue) 63 | -------------------------------------------------------------------------------- /assemblyline/remote/datatypes/cache.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from redis.exceptions import ConnectionError 4 | 5 | from assemblyline.common.uid import get_id_from_data 6 | from assemblyline.remote.datatypes import get_client, retry_call 7 | 8 | DEFAULT_TTL = 60 * 60 # 1 Hour 9 | 10 | 11 | class Cache(object): 12 | def __init__(self, prefix="al_cache", separator="-", host=None, port=None, ttl=DEFAULT_TTL): 13 | self.c = get_client(host, port, False) 14 | self.prefix = prefix + separator 15 | self.ttl = DEFAULT_TTL 16 | 17 | def __enter__(self): 18 | return self 19 | 20 | def _get_key(self, name): 21 | return f"{self.prefix}-{name}" 22 | 23 | def clear(self, key=None): 24 | # Clear all items belonging to this cahce 25 | if key: 26 | retry_call(self.c.delete, f"{self.prefix}{key}") 27 | else: 28 | for queue in retry_call(self.c.keys, f"{self.prefix}*"): 29 | retry_call(self.c.delete, queue) 30 | 31 | def create_key(self, *args): 32 | key_str = "-".join([str(x) for x in args]) 33 | return get_id_from_data(key_str) 34 | 35 | def get(self, key, ttl=None, reset=True): 36 | # Get the key name 37 | cache_name = self._get_key(key) 38 | 39 | # Get the value from the cache 40 | item = retry_call(self.c.get, cache_name) 41 | if not item: 42 | return item 43 | 44 | if reset: 45 | # Reset the cache while we're still using it 46 | retry_call(self.c.expire, cache_name, ttl or self.ttl) 47 | 48 | return json.loads(item) 49 | 50 | def list(self): 51 | for key in retry_call(self.c.keys, f"{self.prefix}*"): 52 | yield json.loads(retry_call(self.c.get, key)) 53 | 54 | def ready(self): 55 | try: 56 | self.c.ping() 57 | except ConnectionError: 58 | return False 59 | 60 | return True 61 | 62 | def set(self, key, value, ttl=None): 63 | # Get the key name 64 | cache_name = self._get_key(key) 65 | 66 | # Set the value and the expiry for the name 67 | retry_call(self.c.set, cache_name, json.dumps(value)) 68 | retry_call(self.c.expire, cache_name, ttl or self.ttl) 69 | -------------------------------------------------------------------------------- /assemblyline/remote/datatypes/daily_quota_tracker.py: -------------------------------------------------------------------------------- 1 | from assemblyline.common.isotime import now_as_iso 2 | from assemblyline.remote.datatypes import get_client, retry_call 3 | 4 | 5 | class DailyQuotaTracker(object): 6 | def __init__(self, redis=None, host=None, port=None, private=False): 7 | self.c = redis or get_client(host, port, private) 8 | self.ttl = 60*60*24 9 | 10 | def _counter_name(self, user, type): 11 | return f"DAILY-QUOTA-{now_as_iso()[:10]}-{user}-{type}" 12 | 13 | def _decrement(self, user, type): 14 | counter = self._counter_name(user, type) 15 | with self.c.pipeline() as pipe: 16 | pipe.decr(counter) 17 | pipe.expire(counter, self.ttl, nx=True) 18 | 19 | val, _ = retry_call(pipe.execute) 20 | 21 | return val 22 | 23 | def decrement_api(self, user): 24 | return self._decrement(user, 'api') 25 | 26 | def decrement_submission(self, user): 27 | return self._decrement(user, 'submission') 28 | 29 | def _increment(self, user, type): 30 | counter = self._counter_name(user, type) 31 | with self.c.pipeline() as pipe: 32 | pipe.incr(counter) 33 | pipe.expire(counter, self.ttl, nx=True) 34 | 35 | val, _ = retry_call(pipe.execute) 36 | 37 | return val 38 | 39 | def increment_api(self, user): 40 | return self._increment(user, 'api') 41 | 42 | def increment_submission(self, user): 43 | return self._increment(user, 'submission') 44 | 45 | def _get(self, user, type): 46 | counter = self._counter_name(user, type) 47 | return retry_call(self.c.get, counter) or 0 48 | 49 | def get_api(self, user): 50 | return int(self._get(user, 'api')) 51 | 52 | def get_submission(self, user): 53 | return int(self._get(user, 'submission')) 54 | 55 | def _reset(self, user, type): 56 | counter = self._counter_name(user, type) 57 | with self.c.pipeline() as pipe: 58 | pipe.set(counter, 0) 59 | pipe.expire(counter, self.ttl, nx=True) 60 | 61 | val, _ = retry_call(pipe.execute) 62 | 63 | def reset_api(self, user): 64 | self._reset(user, "api") 65 | 66 | def reset_submission(self, user): 67 | self._reset(user, "submission") 68 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim-bookworm AS base 2 | 3 | # Upgrade packages 4 | RUN apt-get update && apt-get -yy upgrade && rm -rf /var/lib/apt/lists/* 5 | 6 | # Get required apt packages 7 | RUN apt-get update && apt-get install -yy libffi8 libfuzzy2 libmagic1 && rm -rf /var/lib/apt/lists/* 8 | 9 | # Make sure root account is locked so 'su' commands fail all the time 10 | RUN passwd -l root 11 | 12 | FROM base AS builder 13 | ARG version 14 | ARG version_tag=${version} 15 | 16 | # Get required apt packages 17 | RUN apt-get update \ 18 | && apt-get install -yy build-essential libffi-dev libfuzzy-dev \ 19 | && rm -rf /var/lib/apt/lists/* 20 | 21 | # Install assemblyline base (setup.py is just a file we know exists so the command 22 | # won't fail if dist isn't there. The dist* copies in any dist directory only if it exists.) 23 | COPY setup.py dist* dist/ 24 | RUN pip install --no-cache-dir --no-warn-script-location -f dist/ --user assemblyline==$version && rm -rf ~/.cache/pip 25 | RUN chmod 750 /root/.local/lib/python3.11/site-packages 26 | 27 | FROM base 28 | 29 | # Add assemblyline user 30 | RUN useradd -b /var/lib -U -m assemblyline 31 | 32 | # Create assemblyline config directory 33 | RUN mkdir -p /etc/assemblyline 34 | RUN chmod 750 /etc/assemblyline 35 | RUN chown root:assemblyline /etc/assemblyline 36 | 37 | # Create assemblyline cache directory 38 | RUN mkdir -p /var/cache/assemblyline 39 | RUN chmod 770 /var/cache/assemblyline 40 | RUN chown assemblyline:assemblyline /var/cache/assemblyline 41 | 42 | # Create assemblyline home directory 43 | RUN mkdir -p /var/lib/assemblyline 44 | RUN chmod 750 /var/lib/assemblyline 45 | RUN chown assemblyline:assemblyline /var/lib/assemblyline 46 | 47 | # Create assemblyline log directory 48 | RUN mkdir -p /var/log/assemblyline 49 | RUN chmod 770 /var/log/assemblyline 50 | RUN chown assemblyline:assemblyline /var/log/assemblyline 51 | 52 | # Install assemblyline base 53 | COPY --chown=assemblyline:assemblyline --from=builder /root/.local /var/lib/assemblyline/.local 54 | ENV PATH=/var/lib/assemblyline/.local/bin:$PATH 55 | ENV PYTHONPATH=/var/lib/assemblyline/.local/lib/python3.11/site-packages 56 | ENV ASSEMBLYLINE_VERSION=${version} 57 | ENV ASSEMBLYLINE_IMAGE_TAG=${version_tag} 58 | 59 | # Switch to assemblyline user 60 | USER assemblyline 61 | WORKDIR /var/lib/assemblyline 62 | CMD /bin/bash 63 | -------------------------------------------------------------------------------- /test/test_identify.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import pytest 4 | 5 | from cart import unpack_file 6 | from json import loads 7 | from pathlib import Path 8 | 9 | from assemblyline.common import forge 10 | 11 | SAMPLES_LOCATION = os.environ.get("SAMPLES_LOCATION", None) 12 | 13 | 14 | def test_id_file_base(): 15 | with forge.get_identify(use_cache=False) as identify: 16 | tests_dir = os.path.dirname(__file__) 17 | id_file_base = "id_file_base" 18 | file_base_dir = os.path.join(tests_dir, id_file_base) 19 | map_file = "id_file_base.json" 20 | map_path = os.path.join(file_base_dir, map_file) 21 | with open(map_path, "r") as f: 22 | contents = f.read() 23 | json_contents = loads(contents) 24 | for _, _, files in os.walk(file_base_dir): 25 | for file_name in files: 26 | if file_name == map_file: 27 | continue 28 | 29 | file_path = os.path.join(file_base_dir, file_name) 30 | data = identify.fileinfo(file_path, generate_hashes=False) 31 | actual_value = data.get("type", "") 32 | expected_value = json_contents[file_name] 33 | assert actual_value == expected_value 34 | 35 | 36 | def get_ids(filepath): 37 | if not isinstance(filepath, (str, bytes, os.PathLike)): 38 | return "skipped" 39 | return "-".join(split_sample(filepath)) 40 | 41 | 42 | def split_sample(filepath): 43 | target_file = os.path.join("/tmp", os.path.basename(filepath).rstrip(".cart")) 44 | identify_result = str(filepath.relative_to(Path(SAMPLES_LOCATION)).parent) 45 | return (target_file, identify_result) 46 | 47 | 48 | @pytest.fixture() 49 | def sample(request): 50 | target_file, identify_result = split_sample(request.param) 51 | try: 52 | unpack_file(request.param, target_file) 53 | yield (target_file, identify_result) 54 | finally: 55 | if target_file: 56 | os.unlink(target_file) 57 | 58 | 59 | if SAMPLES_LOCATION: 60 | @pytest.mark.parametrize("sample", Path(SAMPLES_LOCATION).rglob("*.cart"), ids=get_ids, indirect=True) 61 | def test_identify_samples(sample): 62 | with forge.get_identify(use_cache=False) as identify: 63 | assert identify.fileinfo(sample[0], generate_hashes=False)["type"] == sample[1] 64 | -------------------------------------------------------------------------------- /assemblyline/remote/datatypes/user_quota_tracker.py: -------------------------------------------------------------------------------- 1 | import redis 2 | from assemblyline.remote.datatypes import get_client, retry_call 3 | 4 | begin_script = """ 5 | local t = redis.call('time') 6 | local key = tonumber(t[1] .. string.format("%06d", t[2])) 7 | 8 | local name = ARGV[1] 9 | local max = tonumber(ARGV[2]) 10 | local timeout = tonumber(ARGV[3] .. "000000") 11 | 12 | redis.call('zremrangebyscore', name, 0, key - timeout) 13 | if redis.call('zcard', name) < max then 14 | redis.call('zadd', name, key, key) 15 | return true 16 | else 17 | return false 18 | end 19 | """ 20 | 21 | 22 | class UserQuotaTracker(object): 23 | def __init__(self, prefix, timeout=120, redis=None, host=None, port=None, private=False): 24 | self.c = redis or get_client(host, port, private) 25 | self.bs = self.c.register_script(begin_script) 26 | self.prefix = prefix 27 | self.timeout = timeout 28 | 29 | def _queue_name(self, user): 30 | return f"{self.prefix}-{user}" 31 | 32 | def begin(self, user, max_quota): 33 | try: 34 | return retry_call(self.bs, args=[self._queue_name(user), max_quota, self.timeout]) == 1 35 | except redis.exceptions.ResponseError as er: 36 | # TODO: This is a failsafe for upgrade purposes could be removed in a future version 37 | if "WRONGTYPE" in str(er): 38 | retry_call(self.c.delete, self._queue_name(user)) 39 | return retry_call(self.bs, args=[self._queue_name(user), max_quota, self.timeout]) == 1 40 | else: 41 | raise 42 | 43 | def end(self, user): 44 | """When only one item is requested, blocking is is possible.""" 45 | try: 46 | retry_call(self.c.zpopmin, self._queue_name(user)) 47 | except redis.exceptions.ResponseError as er: 48 | # TODO: This is a failsafe for upgrade purposes could be removed in a future version 49 | if "WRONGTYPE" in str(er): 50 | retry_call(self.c.delete, self._queue_name(user)) 51 | retry_call(self.c.zpopmin, self._queue_name(user)) 52 | else: 53 | raise 54 | 55 | def reset(self, user): 56 | retry_call(self.c.delete, self._queue_name(user)) 57 | 58 | def get_count(self, user): 59 | return retry_call(self.c.zcard, self._queue_name(user)) 60 | -------------------------------------------------------------------------------- /assemblyline/common/codec.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | 4 | from cart import is_cart, pack_stream, unpack_stream 5 | from cart.cart import _unpack_header 6 | from assemblyline.common.dict_utils import flatten 7 | from assemblyline.common.file import normalize_uri_file 8 | 9 | 10 | def decode_file(original_path, fileinfo, identify): 11 | extracted_path = None 12 | hdr = {} 13 | with open(original_path, 'rb') as original_file: 14 | if is_cart(original_file.read(256)): 15 | original_file.seek(0) 16 | 17 | _, hdr, _ = _unpack_header(original_file) 18 | al_type = flatten(hdr).get('al.type', None) 19 | if not al_type: 20 | original_file.seek(0) 21 | 22 | extracted_fd, extracted_path = tempfile.mkstemp() 23 | extracted_file = os.fdopen(extracted_fd, 'wb') 24 | 25 | cart_extracted = False 26 | try: 27 | hdr, _ = unpack_stream(original_file, extracted_file) 28 | cart_extracted = True 29 | 30 | except Exception: 31 | extracted_path = None 32 | hdr = {} 33 | fileinfo['type'] = 'corrupted/cart' 34 | 35 | finally: 36 | extracted_file.close() 37 | 38 | if cart_extracted and extracted_path: 39 | fileinfo = identify.fileinfo(extracted_path) 40 | elif fileinfo['type'].startswith("uri/"): 41 | dir_path = tempfile.mkdtemp() # This does not get cleaned after execution, like the mkstemp() 42 | extracted_path = normalize_uri_file(dir_path, original_path) 43 | fileinfo = identify.fileinfo(extracted_path) 44 | 45 | return extracted_path, fileinfo, hdr 46 | 47 | 48 | def encode_file(input_path, name, metadata=None): 49 | if metadata is None: 50 | metadata = {} 51 | 52 | _, output_path = tempfile.mkstemp() 53 | 54 | with open(output_path, 'wb') as oh: 55 | with open(input_path, 'rb') as ih: 56 | data = ih.read(64) 57 | if not is_cart(data): 58 | ih.seek(0) 59 | metadata.update({'name': name}) 60 | pack_stream(ih, oh, metadata) 61 | return output_path, f"{name}.cart" 62 | else: 63 | return input_path, name 64 | -------------------------------------------------------------------------------- /assemblyline/odm/models/user_settings.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | from assemblyline.common import forge 3 | from assemblyline.odm.models.config import SubmissionProfileParams 4 | 5 | Classification = forge.get_classification() 6 | 7 | ENCODINGS = {"cart", "raw", "zip"} 8 | VIEWS = {"report", "details"} 9 | 10 | 11 | @odm.model(index=False, store=False, description="Model of User Settings") 12 | class UserSettings(odm.Model): 13 | download_encoding = odm.Enum(values=ENCODINGS, default="cart", 14 | description="Default download encoding when downloading files") 15 | default_external_sources = odm.List(odm.Keyword(), default=[], 16 | description="List of sha256 sources to check by default") 17 | default_zip_password = odm.Text( 18 | default="infected", 19 | description="Default user-defined password for creating password protected ZIPs when downloading files" 20 | ) 21 | default_metadata = odm.Mapping(odm.Text(), default={}, description="Default metadata to add to submissions") 22 | executive_summary = odm.Boolean(default=True, description="Should executive summary sections be shown?") 23 | expand_min_score = odm.Integer(default=500, description="Auto-expand section when score bigger then this") 24 | preferred_submission_profile = odm.Optional(odm.Text(), description="Preferred submission profile") 25 | submission_profiles = odm.Mapping(odm.Compound(SubmissionProfileParams), default={}, 26 | description="Default submission profile settings") 27 | submission_view = odm.Enum(values=VIEWS, default="report", description="Default view for completed submissions") 28 | 29 | 30 | DEFAULT_USER_PROFILE_SETTINGS = { 31 | "download_encoding": "cart", 32 | "default_external_sources": [], 33 | "default_zip_password": "infected", 34 | "executive_summary": True, 35 | "expand_min_score": 500, 36 | "submission_view": "report", 37 | "default_metadata": {} 38 | } 39 | 40 | DEFAULT_SUBMISSION_PROFILE_SETTINGS = { 41 | "classification": Classification.UNRESTRICTED, 42 | "deep_scan": False, 43 | "generate_alert": False, 44 | "ignore_cache": False, 45 | "ignore_recursion_prevention": False, 46 | "ignore_filtering": False, 47 | "priority": 1000, 48 | "service_spec": {}, 49 | "services": {}, 50 | "ttl": 30 51 | } 52 | -------------------------------------------------------------------------------- /test/test_datasource.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import pytest 4 | 5 | from assemblyline.datasource.al import AL 6 | from assemblyline.datasource.alert import Alert 7 | from assemblyline.common import forge 8 | from assemblyline.odm.random_data import wipe_alerts, create_alerts, wipe_submissions, create_submission, NullLogger 9 | 10 | 11 | @pytest.fixture(scope="module") 12 | def fs(): 13 | return forge.get_filestore() 14 | 15 | 16 | def purge_alert(ds, fs): 17 | wipe_alerts(ds) 18 | wipe_submissions(ds, fs) 19 | 20 | 21 | @pytest.fixture(scope="module") 22 | def datastore(request, datastore_connection, fs): 23 | create_alerts(datastore_connection, alert_count=1) 24 | create_submission(datastore_connection, fs) 25 | 26 | request.addfinalizer(lambda: purge_alert(datastore_connection, fs)) 27 | return datastore_connection 28 | 29 | 30 | # noinspection PyUnusedLocal 31 | def test_al_source(datastore): 32 | submission_id = random.choice(datastore.submission.search("id:*", fl="id", rows=1, as_obj=False)['items'])['id'] 33 | submission = datastore.submission.get(submission_id) 34 | al_datasource = AL(NullLogger()) 35 | resp = al_datasource.query(submission.files[0].sha256, access_control=None) 36 | for res in resp: 37 | score = res['data']['score'] 38 | if score >= 2000: 39 | assert res['malicious'] 40 | assert res['confirmed'] 41 | elif 1000 <= score < 2000: 42 | assert res['malicious'] 43 | assert not res['confirmed'] 44 | else: 45 | assert not res['malicious'] 46 | 47 | 48 | # noinspection PyUnusedLocal 49 | def test_alert_source(datastore): 50 | alert_id = random.choice(datastore.alert.search("id:*", fl="id", rows=1, as_obj=False)['items'])['id'] 51 | alert = datastore.alert.get(alert_id) 52 | 53 | alert_datasource = Alert(NullLogger()) 54 | resp = alert_datasource.query(alert.file.sha256, access_control=None) 55 | for res in resp: 56 | score = None 57 | for item in res['data']: 58 | if score is None or item['score'] > score: 59 | score = item['score'] 60 | 61 | if score >= 2000: 62 | assert res['malicious'] 63 | assert res['confirmed'] 64 | elif 500 <= score < 2000: 65 | assert res['malicious'] 66 | assert not res['confirmed'] 67 | else: 68 | assert not res['malicious'] 69 | -------------------------------------------------------------------------------- /assemblyline/datasource/al.py: -------------------------------------------------------------------------------- 1 | from assemblyline.common import forge 2 | from assemblyline.datasource.common import Datasource 3 | 4 | Classification = forge.get_classification() 5 | 6 | 7 | class AL(Datasource): 8 | def __init__(self, log, **kw): 9 | super(AL, self).__init__(log, **kw) 10 | self.datastore = forge.get_datastore() 11 | 12 | def parse(self, results, **kw): 13 | return results 14 | 15 | def query(self, value, **kw): 16 | results = [] 17 | 18 | hash_type = self.hash_type(value) 19 | 20 | query = "%s:%s OR %s:%s" % ( 21 | hash_type, value.lower(), hash_type, value.upper() 22 | ) 23 | 24 | res = self.datastore.file.search(query, rows=5, access_control=kw['access_control'], as_obj=False) 25 | 26 | for r in res['items']: 27 | score = 0 28 | score_map = {} 29 | 30 | res = self.datastore.result.grouped_search("response.service_name", f"id:{r['sha256']}*", 31 | fl="result.score,id", rows=100, sort="created desc", 32 | access_control=kw["access_control"], as_obj=False) 33 | 34 | for group in res['items']: 35 | service_name = group['value'] 36 | for doc in group['items']: 37 | score_map[service_name] = doc['result']['score'] 38 | score += doc['result']['score'] 39 | 40 | result = { 41 | "classification": r['classification'], 42 | "confirmed": score >= 2000 or score < -499, 43 | "data": { 44 | "classification": r['classification'], 45 | "md5": r['md5'], 46 | "sha1": r['sha1'], 47 | "sha256": r['sha256'], 48 | "size": r['size'], 49 | "type": r['type'], 50 | "seen": { 51 | "count": r['seen']['count'], 52 | "last": r['seen']['last'] 53 | }, 54 | "score": score, 55 | "score_map": score_map 56 | }, 57 | "description": "File found in AL with score of %s." % score, 58 | "malicious": score >= 1000, 59 | } 60 | 61 | results.append(result) 62 | 63 | return results 64 | -------------------------------------------------------------------------------- /assemblyline/common/banner.py: -------------------------------------------------------------------------------- 1 | BANNER = r""" 2 | ######### 3 | ###### ############################## 4 | ##### ###################### ### 5 | ##### ###################### ### 6 | ###### ############## ######## 7 | ############ ######## 8 | ########### -------- 9 | ############ /..........\ 10 | ############# /..............\ 11 | ############# \..../ \..../ 12 | ###### ##### |..| |..| 13 | ####### #### |..| |..| 14 | ########### ### |..| |..| 15 | ################# #### \.| |./ 16 | ####################### ##### 17 | ############################## 18 | ############################# 19 | ########################## 20 | ######################## 21 | ###################### 22 | ################### 23 | ....................... 24 | ........................ 25 | .......................... 26 | .......................... 27 | .......................... 28 | .......................... 29 | ................................ 30 | """ 31 | -------------------------------------------------------------------------------- /test/test_postprocess.py: -------------------------------------------------------------------------------- 1 | import queue 2 | import threading 3 | import http.server 4 | import json 5 | 6 | import pytest 7 | from assemblyline.common.postprocess import ActionWorker, SubmissionFilter 8 | 9 | from assemblyline.odm.models.actions import PostprocessAction, Webhook 10 | from assemblyline.odm.models.submission import Submission 11 | from assemblyline.odm.randomizer import random_minimal_obj 12 | 13 | 14 | @pytest.fixture 15 | def server(): 16 | hits = queue.Queue() 17 | 18 | class TestServer(http.server.BaseHTTPRequestHandler): 19 | def do_POST(self): 20 | try: 21 | hits.put(dict( 22 | headers=self.headers, 23 | body=self.rfile.read(int(self.headers.get('Content-Length', '1'))) 24 | )) 25 | self.send_response(200, 'data received') 26 | self.end_headers() 27 | except Exception as error: 28 | hits.put(error) 29 | 30 | test_server = http.server.ThreadingHTTPServer(('localhost', 0), TestServer) 31 | thread = threading.Thread(target=test_server.serve_forever, daemon=True) 32 | thread.start() 33 | try: 34 | yield f'http://localhost:{test_server.server_address[1]}', hits 35 | finally: 36 | test_server.shutdown() 37 | thread.join() 38 | 39 | 40 | def test_hook(server, config, datastore_connection, redis_connection): 41 | server_uri, server_hits = server 42 | 43 | action = PostprocessAction(dict( 44 | enabled=True, 45 | run_on_completed=True, 46 | filter="metadata.do_hello: *", 47 | webhook=Webhook(dict( 48 | uri=server_uri, 49 | headers=[dict(name='care-of', value='assemblyline')] 50 | )) 51 | )) 52 | 53 | worker = ActionWorker(cache=False, config=config, datastore=datastore_connection, redis_persist=redis_connection) 54 | 55 | worker.actions = { 56 | 'action': (SubmissionFilter(action.filter), action) 57 | } 58 | 59 | sub: Submission = random_minimal_obj(Submission) 60 | sub.metadata = dict(ok='bad') 61 | worker.process_submission(sub, tags=[]) 62 | 63 | sub: Submission = random_minimal_obj(Submission) 64 | sub.metadata = dict(ok='good', do_hello='yes') 65 | worker.process_submission(sub, tags=[]) 66 | 67 | obj = server_hits.get(timeout=3) 68 | assert obj['headers']['CARE-OF'] == 'assemblyline' 69 | assert json.loads(obj['body'])['submission']['metadata']['ok'] == 'good' 70 | 71 | assert server_hits.qsize() == 0 72 | -------------------------------------------------------------------------------- /assemblyline/common/metrics.py: -------------------------------------------------------------------------------- 1 | from assemblyline.common import forge 2 | from assemblyline.odm.messages import PerformanceTimer 3 | from assemblyline.remote.datatypes import get_client 4 | from assemblyline.remote.datatypes.exporting_counter import AutoExportingCounters 5 | 6 | # Which datastore tables have an expiry and we want to monitor how many files are due 7 | # for expiry but still exist. 8 | EXPIRY_METRICS = [ 9 | 'alert', 10 | 'badlist', 11 | 'cached_file', 12 | 'emptyresult', 13 | 'error', 14 | 'file', 15 | 'filescore', 16 | 'result', 17 | 'retrohunt_hit', 18 | 'safelist', 19 | 'submission', 20 | 'submission_tree', 21 | 'submission_summary' 22 | ] 23 | 24 | 25 | class MetricsFactory(object): 26 | """A wrapper around what was once, multiple metrics methods. 27 | 28 | Left in place until we decide we are absolutely not switching methods again. 29 | """ 30 | 31 | def __init__(self, metrics_type, schema, name=None, redis=None, config=None, export_zero=True): 32 | self.config = config or forge.get_config() 33 | self.redis = redis or get_client( 34 | self.config.core.metrics.redis.host, 35 | self.config.core.metrics.redis.port, 36 | False 37 | ) 38 | 39 | # Separate out the timers and normal counters 40 | timer_schema = set() 41 | counter_schema = set() 42 | 43 | for _k, field_type in schema.fields().items(): 44 | if isinstance(field_type, PerformanceTimer): 45 | timer_schema.add(_k) 46 | else: 47 | counter_schema.add(_k) 48 | 49 | for _k in timer_schema: 50 | counter_schema.discard(_k + '_count') 51 | 52 | self.type = metrics_type 53 | self.name = name or metrics_type 54 | 55 | # Initialize legacy metrics 56 | self.metrics_handler = AutoExportingCounters( 57 | self.name, 58 | redis=self.redis, 59 | config=self.config, 60 | counter_type=metrics_type, 61 | timer_names=timer_schema, 62 | counter_names=counter_schema, 63 | export_zero=export_zero 64 | ) 65 | self.metrics_handler.start() 66 | 67 | def stop(self): 68 | self.metrics_handler.stop() 69 | 70 | def set(self, name, value): 71 | self.metrics_handler.set(name, value) 72 | 73 | def increment(self, name, increment_by=1): 74 | self.metrics_handler.increment(name, increment_by=increment_by) 75 | 76 | def increment_execution_time(self, name, execution_time): 77 | self.metrics_handler.increment_execution_time(name, execution_time) 78 | -------------------------------------------------------------------------------- /assemblyline/odm/messages/dispatcher_heartbeat.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | from assemblyline.odm.messages import PerformanceTimer 3 | 4 | MSG_TYPES = {"DispatcherHeartbeat"} 5 | LOADER_CLASS = "assemblyline.odm.messages.dispatcher_heartbeat.DispatcherMessage" 6 | 7 | 8 | @odm.model(description="Queue Model") 9 | class Queues(odm.Model): 10 | ingest = odm.Integer(description="Number of submissions in ingest queue") 11 | start = odm.List(odm.Integer(), description="Number of submissions that started") 12 | result = odm.List(odm.Integer(), description="Number of results in queue") 13 | command = odm.List(odm.Integer(), description="Number of commands in queue") 14 | 15 | 16 | @odm.model(description="Inflight Model") 17 | class Inflight(odm.Model): 18 | max = odm.Integer(description="Maximum number of submissions") 19 | outstanding = odm.Integer(description="Number of outstanding submissions") 20 | per_instance = odm.List(odm.Integer(), description="Number of submissions per Dispatcher instance") 21 | 22 | 23 | @odm.model(description="Metrics Model") 24 | class Metrics(odm.Model): 25 | files_completed = odm.Integer(description="Number of files completed") 26 | submissions_completed = odm.Integer(description="Number of submissions completed") 27 | service_timeouts = odm.Integer(description="Number of service timeouts") 28 | cpu_seconds = PerformanceTimer(description="CPU time") 29 | cpu_seconds_count = odm.Integer(description="CPU count") 30 | busy_seconds = PerformanceTimer(description="Busy CPU time") 31 | busy_seconds_count = odm.Integer(description="Busy CPU count") 32 | save_queue = odm.Integer(description="Processed submissions waiting to be saved") 33 | error_queue = odm.Integer(description="Errors waiting to be saved") 34 | 35 | 36 | @odm.model(description="Heartbeat Model") 37 | class Heartbeat(odm.Model): 38 | inflight = odm.Compound(Inflight, description="Inflight submissions") 39 | instances = odm.Integer(description="Number of instances") 40 | metrics = odm.Compound(Metrics, description="Dispatcher metrics") 41 | queues = odm.Compound(Queues, description="Dispatcher queues") 42 | component = odm.Keyword(description="Component name") 43 | 44 | 45 | @odm.model(description="Model of Dispatcher Heartbeat Messages") 46 | class DispatcherMessage(odm.Model): 47 | msg = odm.Compound(Heartbeat, description="Heartbeat message") 48 | msg_loader = odm.Enum(values={LOADER_CLASS}, default=LOADER_CLASS, description="Loader class for message") 49 | msg_type = odm.Enum(values=MSG_TYPES, default="DispatcherHeartbeat", description="Type of message") 50 | sender = odm.Keyword(description="Sender of message") 51 | -------------------------------------------------------------------------------- /assemblyline/odm/models/ontology/results/sandbox.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | from assemblyline.common.dict_utils import get_dict_fingerprint_hash, flatten 3 | from assemblyline.odm.models.ontology.results.process import ObjectID 4 | 5 | OID_PARTS = ['sandbox_name', 'sandbox_version', 'analysis_metadata.start_time', 6 | 'analysis_metadata.end_time', 'analysis_metadata.task_id'] 7 | 8 | 9 | @odm.model(description="Sandbox Ontology Model") 10 | class Sandbox(odm.Model): 11 | @odm.model(description="The metadata of the analysis, per analysis") 12 | class AnalysisMetadata(odm.Model): 13 | @odm.model(description="The metadata regarding the machine where the analysis took place") 14 | class MachineMetadata(odm.Model): 15 | ip = odm.Optional(odm.IP(), description="The IP of the machine used for analysis") 16 | hypervisor = odm.Optional(odm.Keyword(), description="The hypervisor of the machine used for analysis") 17 | hostname = odm.Optional(odm.Keyword(), description="The name of the machine used for analysis") 18 | platform = odm.Optional(odm.Platform(), description="The platform of the machine used for analysis") 19 | version = odm.Optional(odm.Keyword(), 20 | description="The version of the operating system of the machine used for analysis") 21 | architecture = odm.Optional(odm.Processor(), 22 | description="The architecture of the machine used for analysis") 23 | 24 | task_id = odm.Optional(odm.Keyword(), description="The ID used for identifying the analysis task") 25 | start_time = odm.Date(description="The start time of the analysis") 26 | end_time = odm.Optional(odm.Date(), description="The end time of the analysis") 27 | routing = odm.Optional(odm.Keyword(), 28 | description="The routing used in the sandbox setup (Spoofed, Internet, Tor, VPN)") 29 | machine_metadata = odm.Optional(odm.Compound(MachineMetadata), description="The metadata of the analysis") 30 | window_size = odm.Optional(odm.Keyword(), description="The resolution used for the analysis") 31 | 32 | objectid = odm.Compound(ObjectID, description="The object ID of the sandbox object") 33 | 34 | analysis_metadata = odm.Compound(AnalysisMetadata, description="Metadata for the analysis") 35 | sandbox_name = odm.Keyword(description="The name of the sandbox") 36 | sandbox_version = odm.Optional(odm.Keyword(), description="The version of the sandbox") 37 | 38 | def get_oid(data: dict): 39 | return f"sandbox_{get_dict_fingerprint_hash({key: flatten(data).get(key) for key in OID_PARTS})}" 40 | 41 | def get_tag(data: dict): 42 | return data['sandbox_name'] 43 | -------------------------------------------------------------------------------- /assemblyline/odm/messages/submission.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict, Optional as Opt 2 | from assemblyline import odm 3 | from assemblyline.odm.models.submission import SubmissionParams, File, Submission as DatabaseSubmission 4 | 5 | MSG_TYPES = {"SubmissionIngested", "SubmissionReceived", "SubmissionStarted", "SubmissionCompleted"} 6 | LOADER_CLASS = "assemblyline.odm.messages.submission.SubmissionMessage" 7 | 8 | 9 | @odm.model(index=True, store=True, description="Notification Model") 10 | class Notification(odm.Model): 11 | queue = odm.Optional(odm.Keyword(), description="Queue to publish the completion message") 12 | threshold = odm.Optional(odm.Integer(), description="Notify only if this score threshold is met") 13 | 14 | 15 | @odm.model(description="Submission Model") 16 | class Submission(odm.Model): 17 | sid = odm.UUID(description="Submission ID to use") 18 | time = odm.Date(default="NOW", description="Message time") 19 | files: List[File] = odm.List(odm.Compound(File), default=[], description="File block") 20 | metadata: Dict[str, str] = odm.FlatMapping(odm.MetadataValue(), default={}, description="Metadata submitted with the file") 21 | notification: Notification = odm.Compound(Notification, default={}, description="Notification queue parameters") 22 | params: SubmissionParams = odm.Compound(SubmissionParams, description="Parameters of the submission") 23 | scan_key: Opt[str] = odm.Optional(odm.Keyword()) 24 | file_tree = odm.Any(default={}, description="File tree of the files in this submission") 25 | file_infos = odm.Mapping(odm.Any(), default={}, description="SHA256 and file information in the file.") 26 | errors = odm.List(odm.Keyword(), default=[], description="List of error keys") 27 | results = odm.Mapping(odm.Any(), default={}, description="Result key value mapping") 28 | 29 | 30 | def from_datastore_submission(submission: DatabaseSubmission): 31 | """ 32 | A helper to convert between database model version of Submission 33 | and the message version of Submission. 34 | """ 35 | return Submission( 36 | { 37 | "sid": submission.sid, 38 | "files": submission.files, 39 | "metadata": submission.metadata, 40 | "params": submission.params, 41 | "scan_key": submission.scan_key, 42 | } 43 | ) 44 | 45 | 46 | @odm.model(description="Model of Submission Message") 47 | class SubmissionMessage(odm.Model): 48 | msg = odm.Compound(Submission, description="Body of the message") 49 | msg_loader = odm.Enum( 50 | values={LOADER_CLASS}, default=LOADER_CLASS, description="Class to use to load the message as an object" 51 | ) # 52 | msg_type = odm.Enum(values=MSG_TYPES, description="Type of message") 53 | sender = odm.Keyword(description="Sender of the message") 54 | -------------------------------------------------------------------------------- /docker/al_management/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim-bookworm 2 | 3 | # Make sure root account is locked so 'su' commands fail all the time 4 | RUN passwd -l root 5 | 6 | # Upgrade packages 7 | RUN apt-get update && apt-get -yy upgrade && rm -rf /var/lib/apt/lists/* 8 | 9 | # Get required apt packages 10 | RUN apt-get update && apt-get install -yy build-essential libssl-dev libffi-dev libfuzzy-dev libldap2-dev libsasl2-dev libmagic1 && rm -rf /var/lib/apt/lists/* 11 | 12 | # Add assemblyline user 13 | RUN useradd -s /bin/bash -b /var/lib -U -m assemblyline 14 | 15 | # Create assemblyline config directory 16 | RUN mkdir -p /etc/assemblyline 17 | RUN chmod 750 /etc/assemblyline 18 | RUN chown root:assemblyline /etc/assemblyline 19 | 20 | # Create assemblyline cache directory 21 | RUN mkdir -p /var/cache/assemblyline 22 | RUN chmod 770 /var/cache/assemblyline 23 | RUN chown assemblyline:assemblyline /var/cache/assemblyline 24 | 25 | # Create assemblyline home directory 26 | RUN mkdir -p /var/lib/assemblyline 27 | RUN chmod 770 /var/lib/assemblyline 28 | RUN chown assemblyline:assemblyline /var/lib/assemblyline 29 | 30 | # Create assemblyline log directory 31 | RUN mkdir -p /var/log/assemblyline 32 | RUN chmod 770 /var/log/assemblyline 33 | RUN chown assemblyline:assemblyline /var/log/assemblyline 34 | 35 | # Switch to assemblyline user 36 | USER assemblyline 37 | 38 | # Create the assemblyline venv 39 | RUN python -m venv /var/lib/assemblyline/venv 40 | 41 | # Install packages in the venv 42 | RUN /bin/bash -c "source /var/lib/assemblyline/venv/bin/activate && pip install --no-warn-script-location --no-cache-dir --upgrade pip wheel && pip install --no-warn-script-location --no-cache-dir assemblyline assemblyline_core assemblyline_ui assemblyline-client ipython jupyter" 43 | 44 | # Setup venv when bash is launched 45 | RUN echo "source /var/lib/assemblyline/venv/bin/activate" >> /var/lib/assemblyline/.bashrc 46 | 47 | RUN mkdir -p /var/lib/assemblyline/jupyter 48 | RUN mkdir -p /var/lib/assemblyline/.jupyter 49 | RUN touch /var/lib/assemblyline/.jupyter/jupyter_notebook_config.py 50 | RUN echo 'import os' >> /var/lib/assemblyline/.jupyter/jupyter_notebook_config.py 51 | RUN echo 'from jupyter_server.auth import passwd' >> /var/lib/assemblyline/.jupyter/jupyter_notebook_config.py 52 | RUN echo 'c.NotebookApp.password = passwd(os.getenv("NB_PASSWORD", "devpass"))' >> /var/lib/assemblyline/.jupyter/jupyter_notebook_config.py 53 | RUN echo 'c.NotebookApp.allow_remote_access = True' >> /var/lib/assemblyline/.jupyter/jupyter_notebook_config.py 54 | RUN echo 'c.NotebookApp.base_url = "/notebook/"' >> /var/lib/assemblyline/.jupyter/jupyter_notebook_config.py 55 | 56 | WORKDIR /var/lib/assemblyline 57 | 58 | CMD /bin/bash -c "source /var/lib/assemblyline/venv/bin/activate && (cd /var/lib/assemblyline/jupyter && jupyter notebook -y --no-browser --ip=*)" 59 | -------------------------------------------------------------------------------- /assemblyline/odm/models/retrohunt.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | 3 | 4 | INDEX_CATAGORIES = [ 5 | 'hot', 6 | 'archive', 7 | 'hot_and_archive', 8 | ] 9 | 10 | 11 | @odm.model(index=True, store=True, description="A search run on stored files.") 12 | class Retrohunt(odm.Model): 13 | # Metadata 14 | indices = odm.Enum(INDEX_CATAGORIES, default='hot_and_archive', 15 | description="Defines the indices used for this retrohunt job") 16 | classification = odm.Classification(description="Classification for this retrohunt job") 17 | search_classification = odm.ClassificationString(description="Maximum classification of results in the search") 18 | creator = odm.keyword(copyto="__text__", description="User who created this retrohunt job") 19 | description = odm.Text(copyto="__text__", description="Human readable description of this retrohunt job") 20 | expiry_ts = odm.Optional(odm.Date(store=False), description="Expiry timestamp of this retrohunt job") 21 | 22 | start_group = odm.long(description="Earliest expiry group this search will include") 23 | end_group = odm.long(description="Latest expiry group this search will include") 24 | 25 | created_time = odm.date(description="Start time for the search.") 26 | started_time = odm.date(description="Start time for the search.") 27 | completed_time = odm.Optional(odm.Date(store=False), description="Time that the search ended") 28 | 29 | # Search data 30 | key = odm.keyword(description="Unique code identifying this retrohunt job") 31 | raw_query = odm.keyword(store=False, description="Text of filter query derived from yara signature") 32 | yara_signature = odm.keyword(copyto="__text__", store=False, description="Text of original yara signature run") 33 | 34 | # Completion data 35 | errors = odm.sequence(odm.keyword(store=False), store=False, 36 | description="List of error messages that occured during the search") 37 | warnings = odm.sequence(odm.keyword(store=False), store=False, 38 | description="List of warning messages that occured during the search") 39 | finished = odm.boolean(default=False, description="Boolean that indicates if this retrohunt job is finished") 40 | truncated = odm.boolean(default=False, description="Indicates if the list of hits been truncated at some limit") 41 | 42 | 43 | @odm.model(index=True, store=True, description="A hit encountered during a retrohunt search.") 44 | class RetrohuntHit(odm.Model): 45 | key = odm.keyword(description="Unique code indentifying this hit") 46 | classification = odm.Classification(description="Classification string for the retrohunt job and results list") 47 | sha256 = odm.SHA256() 48 | expiry_ts = odm.Optional(odm.Date(store=False), description="Expiry for this entry.") 49 | search = odm.keyword() 50 | -------------------------------------------------------------------------------- /docker/al_management/pipeline.Dockerfile: -------------------------------------------------------------------------------- 1 | ARG build_image 2 | FROM $build_image 3 | ARG version 4 | ARG version_tag=${version} 5 | 6 | ENV ASSEMBLYLINE_VERSION=${version} 7 | ENV ASSEMBLYLINE_IMAGE_TAG=${version_tag} 8 | 9 | # Make sure root account is locked so 'su' commands fail all the time 10 | RUN passwd -l root 11 | 12 | # Get required apt packages 13 | RUN apt-get update && apt-get install -yy build-essential libssl-dev libffi-dev libfuzzy-dev libldap2-dev libsasl2-dev libmagic1 && rm -rf /var/lib/apt/lists/* 14 | 15 | # Add assemblyline user 16 | RUN useradd -s /bin/bash -b /var/lib -U -m assemblyline 17 | 18 | # Create assemblyline config directory 19 | RUN mkdir -p /etc/assemblyline 20 | RUN chmod 750 /etc/assemblyline 21 | RUN chown root:assemblyline /etc/assemblyline 22 | 23 | # Create assemblyline cache directory 24 | RUN mkdir -p /var/cache/assemblyline 25 | RUN chmod 770 /var/cache/assemblyline 26 | RUN chown assemblyline:assemblyline /var/cache/assemblyline 27 | 28 | # Create assemblyline home directory 29 | RUN mkdir -p /var/lib/assemblyline 30 | RUN chmod 770 /var/lib/assemblyline 31 | RUN chown assemblyline:assemblyline /var/lib/assemblyline 32 | 33 | # Create assemblyline log directory 34 | RUN mkdir -p /var/log/assemblyline 35 | RUN chmod 770 /var/log/assemblyline 36 | RUN chown assemblyline:assemblyline /var/log/assemblyline 37 | 38 | # Switch to assemblyline user 39 | USER assemblyline 40 | 41 | # Create the assemblyline venv 42 | RUN python -m venv /var/lib/assemblyline/venv 43 | 44 | # Install packages in the venv 45 | COPY setup.py dist* dist/ 46 | RUN /bin/bash -c "source /var/lib/assemblyline/venv/bin/activate && pip install --no-cache-dir --upgrade pip wheel && pip install --no-cache-dir -f dist/ assemblyline==$version assemblyline_core==$version assemblyline_ui==$version assemblyline-client ipython jupyter" 47 | 48 | # Setup venv when bash is launched 49 | RUN echo "source /var/lib/assemblyline/venv/bin/activate" >> /var/lib/assemblyline/.bashrc 50 | 51 | RUN mkdir -p /var/lib/assemblyline/jupyter 52 | RUN mkdir -p /var/lib/assemblyline/.jupyter 53 | RUN touch /var/lib/assemblyline/.jupyter/jupyter_notebook_config.py 54 | RUN echo 'import os' >> /var/lib/assemblyline/.jupyter/jupyter_notebook_config.py 55 | RUN echo 'from notebook.auth import passwd' >> /var/lib/assemblyline/.jupyter/jupyter_notebook_config.py 56 | RUN echo 'c.NotebookApp.password = passwd(os.getenv("NB_PASSWORD", "devpass"))' >> /var/lib/assemblyline/.jupyter/jupyter_notebook_config.py 57 | RUN echo 'c.NotebookApp.allow_remote_access = True' >> /var/lib/assemblyline/.jupyter/jupyter_notebook_config.py 58 | RUN echo 'c.NotebookApp.base_url = "/notebook/"' >> /var/lib/assemblyline/.jupyter/jupyter_notebook_config.py 59 | 60 | WORKDIR /var/lib/assemblyline 61 | 62 | CMD /bin/bash -c "source /var/lib/assemblyline/venv/bin/activate && (cd /var/lib/assemblyline/jupyter && jupyter notebook -y --no-browser --ip=*)" 63 | -------------------------------------------------------------------------------- /assemblyline/odm/models/ontology/results/process.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | from assemblyline.common.dict_utils import get_dict_fingerprint_hash 3 | from os import environ 4 | 5 | OID_PARTS = ['pid', 'ppid', 'image', 'command_line'] 6 | 7 | 8 | @odm.model(description="Details about the characteristics used to identify an object") 9 | class ObjectID(odm.Model): 10 | tag = odm.Text(description="The normalized tag of the object") 11 | ontology_id = odm.Keyword(description="Deterministic identifier of ontology. This value should be able to be " 12 | "replicable between services that have access to similar object details, " 13 | "such that it can be used for relating objects in post-processing.") 14 | service_name = odm.Keyword(default=environ.get('AL_SERVICE_NAME', 'unknown'), 15 | description="Component that generated this section") 16 | guid = odm.Optional(odm.Text(), description="The GUID associated with the object") 17 | treeid = odm.Optional(odm.Text(), description="The hash of the tree ID") 18 | processtree = odm.Optional(odm.Keyword(), description="Human-readable tree ID (concatenation of tags)") 19 | time_observed = odm.Optional(odm.Date(), description="The time at which the object was observed") 20 | session = odm.Optional(odm.Keyword(), description="Unifying session name/ID") 21 | 22 | @odm.model(description="Details about a process") 23 | class Process(odm.Model): 24 | objectid = odm.Compound(ObjectID, description="The object ID of the process object") 25 | image = odm.Text(default="", description="The image of the process") 26 | start_time = odm.Date(description="The time of creation for the process") 27 | 28 | # Parent process details 29 | pobjectid = odm.Optional(odm.Compound(ObjectID), description="The object ID of the parent process object") 30 | pimage = odm.Optional(odm.Text(), description="The image of the parent process that spawned this process") 31 | pcommand_line = odm.Optional(odm.Text(), description="The command line that the parent process ran") 32 | ppid = odm.Optional(odm.Integer(), description="The process ID of the parent process") 33 | 34 | pid = odm.Optional(odm.Integer(), description="The process ID") 35 | command_line = odm.Optional(odm.Text(), description="The command line that the process ran") 36 | end_time = odm.Optional(odm.Date(), description="The time of termination for the process") 37 | integrity_level = odm.Optional(odm.Text(), description="The integrity level of the process") 38 | image_hash = odm.Optional(odm.Text(), description="The hash of the file run") 39 | original_file_name = odm.Optional(odm.Text(), description="The original name of the file") 40 | 41 | def get_oid(data: dict): 42 | return f"process_{get_dict_fingerprint_hash({key: data.get(key) for key in OID_PARTS})}" 43 | -------------------------------------------------------------------------------- /test/test_metrics.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import pytest 4 | 5 | from assemblyline.common.metrics import MetricsFactory, PerformanceTimer 6 | from assemblyline import odm 7 | from assemblyline.common import forge 8 | from assemblyline.remote.datatypes.exporting_counter import export_metrics_once 9 | 10 | 11 | @odm.model() 12 | class Metrics(odm.Model): 13 | counter = odm.Integer() 14 | performance_counter = PerformanceTimer() 15 | 16 | 17 | def test_metrics_counter(redis_connection): 18 | source = MetricsFactory('test', Metrics, redis=redis_connection) 19 | 20 | channel = forge.get_metrics_sink(redis_connection) 21 | channel.listen(blocking=False) 22 | 23 | source.increment('counter', 55) 24 | source.increment_execution_time('performance_counter', 6) 25 | source.increment_execution_time('performance_counter', 6) 26 | 27 | start = time.time() 28 | read = {} 29 | for metric_message in channel.listen(blocking=False): 30 | if 'counter' in read and 'performance_counter.t' in read: 31 | break 32 | 33 | if time.time() - start > 30: 34 | pytest.fail() 35 | 36 | if metric_message is None: 37 | time.sleep(0.1) 38 | continue 39 | 40 | if metric_message['type'] == 'test': 41 | for key, value in metric_message.items(): 42 | if isinstance(value, (int, float)): 43 | read[key] = read.get(key, 0) + value 44 | 45 | assert read['counter'] == 55 46 | assert read['performance_counter.t'] == 12 47 | assert read['performance_counter.c'] == 2 48 | 49 | source.stop() 50 | 51 | 52 | # FIXME: This particular test is hit-or-miss when actually ran in pipelines 53 | # def test_metrics_export(redis_connection): 54 | # channel = forge.get_metrics_sink(redis_connection) 55 | 56 | # start = time.time() 57 | # read = {} 58 | # sent = False 59 | 60 | # for metric_message in channel.listen(blocking=False): 61 | # if 'counter' in read and 'performance_counter.t' in read: 62 | # break 63 | 64 | # if sent and time.time() - start > 20: 65 | # assert False, read 66 | 67 | # if not sent: 68 | # sent = True 69 | # export_metrics_once('test', Metrics, {'counter': 99, 'performance_counter': 6}, redis=redis_connection) 70 | # # Set the start time to when the metrics should've been exported 71 | # start = time.time() 72 | 73 | # if metric_message is None: 74 | # time.sleep(0.1) 75 | # continue 76 | 77 | # if metric_message['type'] == 'test': 78 | # for key, value in metric_message.items(): 79 | # if isinstance(value, (int, float)): 80 | # read[key] = read.get(key, 0) + value 81 | 82 | # assert read['counter'] == 99 83 | # assert read['performance_counter.t'] == 6 84 | # assert read['performance_counter.c'] == 1 85 | -------------------------------------------------------------------------------- /assemblyline/common/digests.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import ssdeep 3 | import tlsh 4 | from typing import Dict 5 | 6 | from assemblyline.common import entropy 7 | 8 | DEFAULT_BLOCKSIZE = 65536 9 | 10 | 11 | # noinspection PyBroadException 12 | def get_digests_for_file(path: str, blocksize: int = DEFAULT_BLOCKSIZE, calculate_entropy: bool = True, 13 | on_first_block=lambda _b, _l, _p: {}, skip_fuzzy_hashes: bool = False) -> Dict: 14 | """ Generate digests for file reading only 'blocksize bytes at a time.""" 15 | bc = None 16 | if calculate_entropy: 17 | try: 18 | bc = entropy.BufferedCalculator() 19 | except Exception: 20 | pass 21 | 22 | result = {} 23 | 24 | md5 = hashlib.md5() 25 | sha1 = hashlib.sha1() 26 | sha256 = hashlib.sha256() 27 | if not skip_fuzzy_hashes: 28 | th = tlsh.Tlsh() 29 | size = 0 30 | 31 | with open(path, 'rb') as f: 32 | data = f.read(blocksize) 33 | length = len(data) 34 | 35 | if not size: 36 | result.update(on_first_block(data, length, path)) 37 | 38 | while length > 0: 39 | if bc is not None: 40 | bc.update(data, length) 41 | md5.update(data) 42 | sha1.update(data) 43 | sha256.update(data) 44 | if not skip_fuzzy_hashes: 45 | th.update(data) 46 | size += length 47 | 48 | data = f.read(blocksize) 49 | length = len(data) 50 | 51 | if bc is not None: 52 | result['entropy'] = bc.entropy() 53 | else: 54 | result['entropy'] = 0 55 | result['md5'] = md5.hexdigest() 56 | result['sha1'] = sha1.hexdigest() 57 | result['sha256'] = sha256.hexdigest() 58 | result['size'] = size 59 | 60 | if not skip_fuzzy_hashes: 61 | result["ssdeep"] = ssdeep.hash_from_file(path) 62 | # Try to finalise the TLSH Hash and add it to the results 63 | try: 64 | th.final() 65 | result['tlsh'] = th.hexdigest() 66 | except Exception: 67 | pass 68 | 69 | return result 70 | 71 | 72 | def get_md5_for_file(path: str, blocksize: int = DEFAULT_BLOCKSIZE) -> str: 73 | md5 = hashlib.md5() 74 | with open(path, 'rb') as f: 75 | data = f.read(blocksize) 76 | length = len(data) 77 | 78 | while length > 0: 79 | md5.update(data) 80 | data = f.read(blocksize) 81 | length = len(data) 82 | 83 | return md5.hexdigest() 84 | 85 | 86 | def get_sha256_for_file(path: str, blocksize: int = DEFAULT_BLOCKSIZE) -> str: 87 | sha256 = hashlib.sha256() 88 | with open(path, 'rb') as f: 89 | data = f.read(blocksize) 90 | length = len(data) 91 | 92 | while length > 0: 93 | sha256.update(data) 94 | data = f.read(blocksize) 95 | length = len(data) 96 | 97 | return sha256.hexdigest() 98 | -------------------------------------------------------------------------------- /assemblyline/common/entropy.py: -------------------------------------------------------------------------------- 1 | import io 2 | 3 | from math import log 4 | from typing import Tuple, List, BinaryIO, AnyStr 5 | 6 | frequency = None 7 | 8 | # The minimum partition size should be 256 bytes as the keyspace 9 | # for a char is 256 bytes 10 | MIN_PARTITION_SIZE = 256 11 | 12 | 13 | def calculate_entropy(contents: bytes) -> float: 14 | """ this function calculates the entropy of the file 15 | It is given by the formula: 16 | E = -SUM[v in 0..255](p(v) * ln(p(v))) 17 | """ 18 | calculator = BufferedCalculator() 19 | calculator.update(contents) 20 | return calculator.entropy() 21 | 22 | 23 | def calculate_partition_entropy(fin: BinaryIO, num_partitions: int = 50) -> Tuple[float, List[float]]: 24 | """Calculate the entropy of a file and its partitions.""" 25 | 26 | # Split input into num_parititions and calculate 27 | # parition entropy. 28 | fin.seek(0, io.SEEK_END) 29 | size = fin.tell() 30 | fin.seek(0) 31 | 32 | if size == 0: 33 | return 0, [0] 34 | 35 | # Calculate the partition size to get the desired amount of partitions but make sure those 36 | # partitions are the minimum partition size 37 | partition_size = max((size - 1)//num_partitions + 1, MIN_PARTITION_SIZE) 38 | 39 | # If our calculated partition size is the minimum partition size, our files is likely too small we will 40 | # calculate an alternate partition size that will make sure all blocks are equal size 41 | if partition_size == MIN_PARTITION_SIZE: 42 | partition_size = (size-1) // ((size-1)//partition_size + 1) + 1 43 | 44 | # Also calculate full file entropy using buffered calculator. 45 | p_entropies = [] 46 | full_entropy_calculator = BufferedCalculator() 47 | for _ in range(num_partitions): 48 | partition = fin.read(partition_size) 49 | if not partition: 50 | break 51 | p_entropies.append(calculate_entropy(partition)) 52 | full_entropy_calculator.update(partition) 53 | return full_entropy_calculator.entropy(), p_entropies 54 | 55 | 56 | class BufferedCalculator(object): 57 | def __init__(self): 58 | global frequency 59 | import pyximport 60 | pyximport.install() 61 | # noinspection PyUnresolvedReferences 62 | from assemblyline.common import frequency 63 | 64 | self.c = {} 65 | self.length = 0 66 | 67 | def entropy(self) -> float: 68 | if self.length == 0: 69 | return 0.0 70 | 71 | length = float(self.length) 72 | 73 | entropy = 0.0 74 | for v in self.c.values(): 75 | prob = float(v) / length 76 | entropy += prob * log(prob, 2) 77 | 78 | entropy *= -1 79 | 80 | # Make sure we don't return -0.0. 81 | if not entropy: 82 | entropy = 0.0 83 | 84 | return entropy 85 | 86 | def update(self, data: AnyStr, length: int = 0): 87 | if not length: 88 | length = len(data) 89 | 90 | self.length += length 91 | self.c = frequency.counts(data, length, self.c) 92 | -------------------------------------------------------------------------------- /assemblyline/filestore/transport/base.py: -------------------------------------------------------------------------------- 1 | from typing import AnyStr, Iterable, Optional 2 | 3 | from assemblyline.common.exceptions import ChainException 4 | 5 | 6 | def normalize_srl_path(srl): 7 | if '/' in srl: 8 | return srl 9 | 10 | return '{0}/{1}/{2}/{3}/{4}'.format(srl[0], srl[1], srl[2], srl[3], srl) 11 | 12 | 13 | class TransportException(ChainException): 14 | """ 15 | FileTransport exception base class. 16 | 17 | TransportException is a subclass of ChainException so that it can be 18 | used with the Chain and ChainAll decorators. 19 | """ 20 | pass 21 | 22 | 23 | class Transport(object): 24 | """ 25 | FileTransport base class. 26 | 27 | - Subclasses should override all methods. 28 | - Except as noted, FileTransport methods do not return value and raise 29 | - TransportException on failure. 30 | - Methods should only raise TransportExceptions. (The decorators 31 | Chain and ChainAll can be applied to a function/method and class, 32 | respectively, to ensure that any exceptions raised are converted to 33 | TransportExceptions. 34 | """ 35 | 36 | def __init__(self, normalize=normalize_srl_path): 37 | self.normalize = normalize 38 | 39 | def close(self): 40 | pass 41 | 42 | def delete(self, path: str): 43 | """ 44 | Deletes the file. 45 | """ 46 | raise TransportException("Not Implemented") 47 | 48 | def exists(self, path: str) -> bool: 49 | """ 50 | Returns True if the path exists, False otherwise. 51 | Should work with both files and directories. 52 | """ 53 | raise TransportException("Not Implemented") 54 | 55 | def makedirs(self, path: str): 56 | """ 57 | Like os.makedirs the super-mkdir, create the leaf directory path and 58 | any intermediate path segments. 59 | """ 60 | raise TransportException("Not Implemented") 61 | 62 | # File based functions 63 | def download(self, src_path: str, dst_path: str): 64 | """ 65 | Copies the content of the filestore src_path to the local dst_path. 66 | """ 67 | raise TransportException("Not Implemented") 68 | 69 | def upload(self, src_path: str, dst_path: str): 70 | """ 71 | Save upload source file src_path to to the filesotre dst_path, overwriting dst_path if it already exists. 72 | """ 73 | raise TransportException("Not Implemented") 74 | 75 | # Buffer based functions 76 | def get(self, path: str) -> bytes: 77 | """ 78 | Returns the content of the file. 79 | """ 80 | raise TransportException("Not Implemented") 81 | 82 | def put(self, dst_path: str, content: AnyStr): 83 | """ 84 | Put the content of the file in memory directly to the filestore dst_path 85 | """ 86 | raise TransportException("Not Implemented") 87 | 88 | def list(self, prefix: Optional[str] = None) -> Iterable[str]: 89 | """List all files in the store filtered by name prefix.""" 90 | raise NotImplementedError() 91 | -------------------------------------------------------------------------------- /assemblyline/odm/models/ontology/results/signature.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | from assemblyline.common.dict_utils import get_dict_fingerprint_hash 3 | from assemblyline.odm.models.result import Attack 4 | from assemblyline.odm.models.ontology.results.process import ObjectID 5 | 6 | OID_PARTS = ['name', 'type'] 7 | TAG_PARTS = ['type', 'name'] 8 | 9 | 10 | @odm.model(description="Attribute relating to the signature that was raised during the analysis of the task") 11 | class Attribute(odm.Model): 12 | source = odm.Compound(ObjectID, description="Object that the rule triggered on") 13 | target = odm.Optional(odm.Compound(ObjectID), description="Object targetted by source object") 14 | action = odm.Optional(odm.Enum(values= # Process actions 15 | ['clipboard_capture', 'create_remote_thread', 'create_stream_hash', 'dns_query', 'driver_loaded', 16 | 'file_change', 'file_creation', 'file_delete', 'image_loaded', 'network_connection', 'network_connection_linux', 17 | 'pipe_created', 'process_access', 'process_creation', 'process_creation_linux', 'process_tampering', 18 | 'process_terminated', 'raw_access_thread', 'registry_add', 'registry_delete', 'registry_event', 'registry_rename', 19 | 'registry_set', 'sysmon_error', 'sysmon_status', 'wmi_event'], 20 | description="The relation between the source and target")) 21 | meta = odm.Optional(odm.Text(), description="Metadata about the detection") 22 | event_record_id = odm.Optional(odm.Text(), description="Event Record ID (Event Logs)") 23 | domain = odm.Optional(odm.Domain(), description="Domain") 24 | uri = odm.Optional(odm.URI(), description="URI") 25 | file_hash = odm.Optional(odm.SHA256(), description="SHA256 of file") 26 | 27 | 28 | @ odm.model(index=False, store=False, description="A signature that was raised during the analysis of the task") 29 | class Signature(odm.Model): 30 | objectid = odm.Compound(ObjectID, description="The object ID of the signature object") 31 | 32 | name = odm.Keyword(description="The name of the signature") 33 | type = odm.Enum(values=['CUCKOO', 'YARA', 'SIGMA', 'SURICATA'], description="Type of signature") 34 | classification = odm.ClassificationString(description="Classification of signature") 35 | attributes = odm.Optional(odm.List(odm.Compound(Attribute)), description="Attributes about the signature") 36 | attacks = odm.Optional(odm.List(odm.Compound(Attack)), 37 | description="A list of ATT&CK patterns and categories of the signature") 38 | actors = odm.Optional(odm.List(odm.Text()), description="List of actors of the signature") 39 | malware_families = odm.Optional(odm.List(odm.Text()), description="List of malware families of the signature") 40 | signature_id = odm.Optional(odm.Text(), description="ID of signature") 41 | 42 | def get_oid(data: dict): 43 | return f"signature_{get_dict_fingerprint_hash({key: data.get(key) for key in OID_PARTS})}" 44 | 45 | def get_tag(data: dict): 46 | return '.'.join([data.get(key) for key in TAG_PARTS]) 47 | -------------------------------------------------------------------------------- /assemblyline/datastore/bulk.py: -------------------------------------------------------------------------------- 1 | import json 2 | import typing 3 | from copy import deepcopy 4 | 5 | 6 | class ElasticBulkPlan(object): 7 | def __init__(self, indexes: typing.List[str], model: typing.Optional[type] = None): 8 | self.indexes = indexes 9 | self.model = model 10 | self.operations: typing.List[str] = [] 11 | 12 | @property 13 | def empty(self): 14 | return len(self.operations) == 0 15 | 16 | def add_delete_operation(self, doc_id, index=None): 17 | if index: 18 | self.operations.append(json.dumps({"delete": {"_index": index, "_id": doc_id}})) 19 | else: 20 | for cur_index in self.indexes: 21 | self.operations.append(json.dumps({"delete": {"_index": cur_index, "_id": doc_id}})) 22 | 23 | def add_insert_operation(self, doc_id, doc, index=None): 24 | if self.model and isinstance(doc, self.model): 25 | saved_doc = doc.as_primitives(hidden_fields=True) 26 | elif self.model: 27 | saved_doc = self.model(doc).as_primitives(hidden_fields=True) 28 | else: 29 | if not isinstance(doc, dict): 30 | saved_doc = {'__non_doc_raw__': doc} 31 | else: 32 | saved_doc = deepcopy(doc) 33 | saved_doc['id'] = doc_id 34 | 35 | self.operations.append(json.dumps({"create": {"_index": index or self.indexes[0], "_id": doc_id}})) 36 | self.operations.append(json.dumps(saved_doc)) 37 | 38 | def add_upsert_operation(self, doc_id, doc, index=None): 39 | if self.model and isinstance(doc, self.model): 40 | saved_doc = doc.as_primitives(hidden_fields=True) 41 | elif self.model: 42 | saved_doc = self.model(doc).as_primitives(hidden_fields=True) 43 | else: 44 | if not isinstance(doc, dict): 45 | saved_doc = {'__non_doc_raw__': doc} 46 | else: 47 | saved_doc = deepcopy(doc) 48 | saved_doc['id'] = doc_id 49 | 50 | self.operations.append(json.dumps({"update": {"_index": index or self.indexes[0], "_id": doc_id}})) 51 | self.operations.append(json.dumps({"doc": saved_doc, "doc_as_upsert": True})) 52 | 53 | def add_update_operation(self, doc_id, doc, index=None): 54 | 55 | if self.model and isinstance(doc, self.model): 56 | saved_doc = doc.as_primitives(hidden_fields=True) 57 | elif self.model: 58 | saved_doc = self.model(doc, mask=list(doc.keys())).as_primitives(hidden_fields=True) 59 | else: 60 | if not isinstance(doc, dict): 61 | saved_doc = {'__non_doc_raw__': doc} 62 | else: 63 | saved_doc = deepcopy(doc) 64 | 65 | if index: 66 | self.operations.append(json.dumps({"update": {"_index": index, "_id": doc_id}})) 67 | self.operations.append(json.dumps({"doc": saved_doc})) 68 | else: 69 | for cur_index in self.indexes: 70 | self.operations.append(json.dumps({"update": {"_index": cur_index, "_id": doc_id}})) 71 | self.operations.append(json.dumps({"doc": saved_doc})) 72 | 73 | def get_plan_data(self): 74 | return "\n".join(self.operations) 75 | -------------------------------------------------------------------------------- /assemblyline/common/constants.py: -------------------------------------------------------------------------------- 1 | import enum 2 | import os 3 | from assemblyline.common.path import modulepath 4 | 5 | 6 | SUBMISSION_QUEUE = 'dispatch-submission-queue' 7 | DISPATCH_TASK_HASH = 'dispatch-active-submissions' 8 | DISPATCH_RUNNING_TASK_HASH = 'dispatch-active-tasks' 9 | SCALER_TIMEOUT_QUEUE = 'scaler-timeout-queue' 10 | CONFIG_HASH = 'al-config' 11 | POST_PROCESS_CONFIG_KEY = 'post-process-actions' 12 | 13 | 14 | # Some pure functions for getting queue lengths (effectively for appending/prepending constants to strings) 15 | def service_queue_name(service: str) -> str: 16 | """Take the name of a service, and provide the queue name to send tasks to that service.""" 17 | return 'service-queue-' + service 18 | 19 | 20 | def make_watcher_list_name(sid: str) -> str: 21 | """Get the name of the list dispatcher will pull for sending out submission events.""" 22 | return 'dispatch-watcher-list-' + sid 23 | 24 | 25 | def get_temporary_submission_data_name(sid: str, file_hash: str) -> str: 26 | """The HashMap used for tracking auxiliary processing data.""" 27 | return '/'.join((sid, file_hash, 'temp_data')) 28 | 29 | 30 | def get_tag_set_name(sid: str, file_hash: str) -> str: 31 | """The HashSet used to track the tags for an in-process file.""" 32 | return '/'.join((sid, file_hash, 'tags')) 33 | 34 | 35 | # A table storing information about the state of a service, expected type is ExpiringHash 36 | # with a default ttl of None, and the ttl set per field based on the timeouts of queries 37 | # and service operation 38 | class ServiceStatus(enum.IntEnum): 39 | Idle = 0 40 | Running = 1 41 | 42 | 43 | SERVICE_STATE_HASH = 'service-stasis-table' 44 | 45 | # A null empty accepts, accepts all. A null rejects, rejects nothing 46 | DEFAULT_SERVICE_ACCEPTS = ".*" 47 | DEFAULT_SERVICE_REJECTS = "empty|metadata/.*" 48 | 49 | # Priority used to drop tasks in the ingester 50 | DROP_PRIORITY = 0 51 | 52 | # Maximum priority that can be assigned to a submission 53 | MAX_PRIORITY = 1500 54 | 55 | # Queue priority values for each bucket in the ingester 56 | PRIORITIES = { 57 | 'low': 100, # 1 -> 100 58 | 'medium': 200, # 101 -> 200 59 | 'high': 300, # 201 -> 300 60 | 'critical': 400, # 301 -> 400 61 | 'user-low': 500, # 401 -> 500 62 | 'user-medium': 1000, # 501 -> 1000 63 | 'user-high': MAX_PRIORITY # 1001 -> 1500 64 | } 65 | 66 | 67 | # The above priority values presented as a range for consistency 68 | PRIORITY_RANGES = {} 69 | _start = DROP_PRIORITY 70 | for _end, _level in sorted((val, key) for key, val in PRIORITIES.items()): 71 | PRIORITY_RANGES[_level] = (_start + 1, _end) 72 | _start = _end 73 | 74 | 75 | # Score thresholds for determining which queue priority a reingested item 76 | # gets based on its previous score. 77 | # eg.: item with a previous score of 99 will get 'low' priority 78 | # item with a previous score of 300 will get a 'high' priority 79 | PRIORITY_THRESHOLDS = { 80 | 'critical': 500, 81 | 'high': 100, 82 | } 83 | 84 | MAGIC_RULE_PATH = os.path.join(modulepath(__name__), 'custom.magic') 85 | YARA_RULE_PATH = os.path.join(modulepath(__name__), 'custom.yara') 86 | 87 | MAX_INT = 2_147_483_647 88 | -------------------------------------------------------------------------------- /test/test_regexes.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import pytest 4 | from assemblyline.odm.base import FULL_URI, TLSH_REGEX, UNC_PATH_REGEX 5 | 6 | FULL_URI_COMP = re.compile(FULL_URI) 7 | TLSH_REGEX_COMP = re.compile(TLSH_REGEX) 8 | UNC_PATH_COMP = re.compile(UNC_PATH_REGEX) 9 | 10 | 11 | @pytest.mark.parametrize("value, ismatch", [ 12 | ("blah", False), 13 | ("http://blah", False), 14 | ("http://blah.com", True), 15 | ("http://blah.com:abc", False), 16 | ("http://blah.com:123", True), 17 | ("http://blah.com:123?blah", True), 18 | ("http://blah.com:123/blah", True), 19 | ("http://blah.com:123/blah?blah", True), 20 | ("1.1.1.1", False), 21 | ("http://1.1.1.1", True), 22 | ("http://1.1.1.1:123", True), 23 | ("http://1.1.1.1:123/blah", True), 24 | ("http://1.1.1.1:123/blah?blah", True), 25 | ("net.tcp://1.1.1.1:123", True), 26 | ("net.tcp://1.1.1.1:1", True), 27 | # URI requires a scheme: https://en.wikipedia.org/wiki/Uniform_Resource_Identifier#scheme 28 | ("//1.1.1.1:1", False), 29 | # Scheme must start with A-Z: https://datatracker.ietf.org/doc/html/rfc3986#section-3.1 30 | ("7://site.com:8080/stuff", False), 31 | ("9http://1.1.1.1/stuff", False), 32 | (".jpg-ohttp://1.1.1.1/", False), 33 | (".://site.com/?e=stuff", False), 34 | ("-urihttps://site.com/", False), 35 | ("+://site.com/", False), 36 | ]) 37 | def test_full_uri_regex(value, ismatch): 38 | if ismatch: 39 | assert FULL_URI_COMP.match(value) is not None 40 | else: 41 | assert FULL_URI_COMP.match(value) is None 42 | 43 | 44 | @pytest.mark.parametrize(("value", "expected"), [ 45 | ("https://example.com/@this/is/a/path", "example.com"), 46 | ("https://example.com?@query", "example.com"), 47 | ("https://example.com#@fragment", "example.com"), 48 | ]) 49 | def test_full_uri_capture(value, expected): 50 | assert FULL_URI_COMP.match(value).group(2) == expected 51 | 52 | 53 | @pytest.mark.parametrize("value, ismatch", [ 54 | ("T1A0F4F19BB9A15CDED5F2937AC6B293A35221FF23A357462F1498270D69202C8EA4D36F", True), 55 | ("abcdef01234567899876543210fedcba", False), 56 | ("A034F19BB7A15CDED5F2037AC6B293A35221FF23A357462F1498270D69202C8EA4D36F", True), 57 | ("034F1/9BB7A15CDED5F2037AC6B293A35221FF23A357462F1498270D69202C8EA4D36F", False), 58 | ("T1A034F19BB7A15CDEZ5F2037AC6B293A35221FF23A357462F1498270D69202C8EA4D36F", False), 59 | ("T1A034F19BB7A15CDED5F2037AC6B293A35221FF23A357462F1498270D69202C8EA4D36F2", False), 60 | ("T1a0f4f19bb9a15cded5f2937ac6b293a35221ff23a357462f1498270d69202c8ea4d36f", True), 61 | ("T1a0f4f19bb9a15cdED5F2937AC6B293A35221FF23A357462f1498270d69202c8ea4d36f", True), 62 | ("", False), 63 | ]) 64 | def test_tlsh_regex(value, ismatch): 65 | if ismatch: 66 | assert TLSH_REGEX_COMP.match(value) is not None 67 | else: 68 | assert TLSH_REGEX_COMP.match(value) is None 69 | 70 | 71 | @pytest.mark.parametrize(("value", "is_match"), [ 72 | (R"\\domain-segment-that-is-long.trycloudflare.com@SSL\DavWWWRoot\4ABCDEFGI", True), 73 | (R"\\127.0.0.1\c$\temp\test-file.txt", True), 74 | (R"\temp\test-file.txt", False), 75 | ]) 76 | def test_unc_path_regex(value, is_match): 77 | assert is_match == bool(UNC_PATH_COMP.match(value)) 78 | 79 | -------------------------------------------------------------------------------- /docker/nginx-ssl-frontend/minimal.template: -------------------------------------------------------------------------------- 1 | error_log ${ERROR_LOG} ${ERROR_LEVEL}; 2 | 3 | server { 4 | server_name ${FQDN}; 5 | listen 443 ssl; 6 | charset utf-8; 7 | client_max_body_size ${MAX_BODY_SIZE}; 8 | 9 | ssl_session_cache shared:SSL:20m; 10 | ssl_session_timeout 60m; 11 | ssl_prefer_server_ciphers on; 12 | ssl_ciphers ECDH+AESGCM:ECDH+AES256:ECDH+AES128:DHE+AES128:!ADH:!AECDH:!MD5; 13 | ssl_protocols TLSv1 TLSv1.1 TLSv1.2; 14 | 15 | ssl_certificate /etc/ssl/nginx.crt; 16 | ssl_certificate_key /etc/ssl/nginx.key; 17 | 18 | server_tokens off; 19 | 20 | access_log ${ACCESS_LOG}; 21 | 22 | proxy_read_timeout ${READ_TIMEOUT}; 23 | proxy_connect_timeout ${CONNECT_TIMEOUT}; 24 | proxy_send_timeout ${SEND_TIMEOUT}; 25 | 26 | location / { 27 | try_files ${DOLLAR}uri @frontend; 28 | } 29 | 30 | location @frontend { 31 | add_header X-Frame-Options SAMEORIGIN; 32 | add_header Strict-Transport-Security "max-age=31536000; includeSubDomains"; 33 | 34 | proxy_set_header X-Forwarded-For ${DOLLAR}proxy_add_x_forwarded_for; 35 | proxy_set_header X-Remote-Cert-Verified ${DOLLAR}ssl_client_verify; 36 | proxy_set_header X-Remote-DN ${DOLLAR}ssl_client_s_dn; 37 | proxy_set_header Host ${DOLLAR}http_host; 38 | proxy_set_header Scheme ${DOLLAR}scheme; 39 | proxy_set_header Server-Port ${DOLLAR}server_port; 40 | 41 | proxy_pass http://${FRONTEND_HOST}:3000; 42 | } 43 | 44 | location /socket.io/ { 45 | add_header Strict-Transport-Security "max-age=31536000; includeSubDomains"; 46 | 47 | proxy_set_header X-Remote-User ${DOLLAR}remote_user; 48 | proxy_set_header X-Forwarded-For ${DOLLAR}proxy_add_x_forwarded_for; 49 | proxy_set_header Host ${DOLLAR}http_host; 50 | proxy_redirect off; 51 | proxy_buffering off; 52 | proxy_http_version 1.1; 53 | proxy_set_header Upgrade ${DOLLAR}http_upgrade; 54 | proxy_set_header Connection "upgrade"; 55 | 56 | proxy_pass http://${SOCKET_HOST}:5002; 57 | } 58 | 59 | location /api/ { 60 | add_header X-Frame-Options SAMEORIGIN; 61 | add_header Strict-Transport-Security "max-age=31536000; includeSubDomains"; 62 | 63 | proxy_set_header X-Forwarded-For ${DOLLAR}proxy_add_x_forwarded_for; 64 | proxy_set_header X-Remote-Cert-Verified ${DOLLAR}ssl_client_verify; 65 | proxy_set_header X-Remote-DN ${DOLLAR}ssl_client_s_dn; 66 | proxy_set_header Host ${DOLLAR}http_host; 67 | proxy_set_header Scheme ${DOLLAR}scheme; 68 | proxy_set_header Server-Port ${DOLLAR}server_port; 69 | 70 | proxy_pass http://${UI_HOST}:5000; 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /assemblyline/remote/datatypes/set.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | 4 | from assemblyline.remote.datatypes import get_client, retry_call 5 | 6 | _drop_card_script = """ 7 | local set_name = ARGV[1] 8 | local key = ARGV[2] 9 | 10 | redis.call('srem', set_name, key) 11 | return redis.call('scard', set_name) 12 | """ 13 | 14 | _limited_add = """ 15 | local set_name = KEYS[1] 16 | local key = ARGV[1] 17 | local limit = tonumber(ARGV[2]) 18 | 19 | if redis.call('scard', set_name) < limit then 20 | redis.call('sadd', set_name, key) 21 | return true 22 | end 23 | return false 24 | """ 25 | 26 | 27 | class Set(object): 28 | def __init__(self, name, host=None, port=None): 29 | self.c = get_client(host, port, False) 30 | self.name = name 31 | self._drop_card = self.c.register_script(_drop_card_script) 32 | self._limited_add = self.c.register_script(_limited_add) 33 | 34 | def __enter__(self): 35 | return self 36 | 37 | def __exit__(self, exc_type, exc_val, exc_tb): 38 | self.delete() 39 | 40 | def add(self, *values): 41 | return retry_call(self.c.sadd, self.name, 42 | *[json.dumps(v) for v in values]) 43 | 44 | def limited_add(self, value, size_limit): 45 | """Add a single value to the set, but only if that wouldn't make the set grow past a given size.""" 46 | return retry_call(self._limited_add, keys=[self.name], args=[json.dumps(value), size_limit]) 47 | 48 | def exist(self, value): 49 | return retry_call(self.c.sismember, self.name, json.dumps(value)) 50 | 51 | def length(self): 52 | return retry_call(self.c.scard, self.name) 53 | 54 | def members(self): 55 | return [json.loads(s) for s in retry_call(self.c.smembers, self.name)] 56 | 57 | def remove(self, *values): 58 | return retry_call(self.c.srem, self.name, 59 | *[json.dumps(v) for v in values]) 60 | 61 | def drop(self, value): 62 | return retry_call(self._drop_card, args=[value]) 63 | 64 | def random(self, num=None): 65 | ret_val = retry_call(self.c.srandmember, self.name, num) 66 | if isinstance(ret_val, list): 67 | return [json.loads(s) for s in ret_val] 68 | else: 69 | return json.loads(ret_val) 70 | 71 | def pop(self): 72 | data = retry_call(self.c.spop, self.name) 73 | return json.loads(data) if data else None 74 | 75 | def pop_all(self): 76 | return [json.loads(s) for s in retry_call(self.c.spop, self.name, self.length())] 77 | 78 | def delete(self): 79 | retry_call(self.c.delete, self.name) 80 | 81 | 82 | class ExpiringSet(Set): 83 | def __init__(self, name, ttl=86400, host=None, port=None): 84 | super(ExpiringSet, self).__init__(name, host, port) 85 | self.ttl = ttl 86 | self.last_expire_time = 0 87 | 88 | def _conditional_expire(self): 89 | if self.ttl: 90 | ctime = time.time() 91 | if ctime > self.last_expire_time + (self.ttl / 2): 92 | retry_call(self.c.expire, self.name, self.ttl) 93 | self.last_expire_time = ctime 94 | 95 | def add(self, *values): 96 | rval = super(ExpiringSet, self).add(*values) 97 | self._conditional_expire() 98 | return rval 99 | -------------------------------------------------------------------------------- /assemblyline/odm/models/safelist.py: -------------------------------------------------------------------------------- 1 | from assemblyline import odm 2 | from assemblyline.common import forge 3 | 4 | Classification = forge.get_classification() 5 | SAFEHASH_TYPES = ["file", "tag", "signature"] 6 | SOURCE_TYPES = ["user", "external"] 7 | 8 | 9 | @odm.model(index=True, store=True, description="Hashes of a safelisted file") 10 | class Hashes(odm.Model): 11 | md5 = odm.Optional(odm.MD5(copyto="__text__"), description="MD5") 12 | sha1 = odm.Optional(odm.SHA1(copyto="__text__"), description="SHA1") 13 | sha256 = odm.Optional(odm.SHA256(copyto="__text__"), description="SHA256") 14 | 15 | 16 | @odm.model(index=True, store=False, description="File Details") 17 | class File(odm.Model): 18 | name = odm.List(odm.Keyword(store=True, copyto="__text__"), default=[], 19 | description="List of names seen for that file") 20 | size = odm.Optional(odm.long(), description="Size of the file in bytes") 21 | type = odm.Optional(odm.Keyword(), description="Type of file as identified by Assemblyline") 22 | 23 | 24 | @odm.model(index=True, store=False, description="Safelist source") 25 | class Source(odm.Model): 26 | classification = odm.Classification(default=Classification.UNRESTRICTED, 27 | description="Classification of the source") 28 | name = odm.Keyword(store=True, description="Name of the source") 29 | reason = odm.List(odm.Keyword(), description="Reason for why file was safelisted") 30 | type = odm.Enum(values=SOURCE_TYPES, description="Type of safelisting source") 31 | 32 | 33 | @odm.model(index=True, store=True, description="Tag associated to file") 34 | class Tag(odm.Model): 35 | type = odm.Keyword(description="Tag type") 36 | value = odm.Keyword(copyto="__text__", description="Tag value") 37 | 38 | 39 | @odm.model(index=True, store=True, description="Signature") 40 | class Signature(odm.Model): 41 | name = odm.Keyword(copyto="__text__", description="Name of the signature") 42 | 43 | 44 | @odm.model(index=True, store=True, description="Safelist Model") 45 | class Safelist(odm.Model): 46 | 47 | added = odm.Date(default="NOW", description="Date when the safelisted hash was added") 48 | classification = odm.Classification(description="Computed max classification for the safe hash") 49 | enabled = odm.Boolean(default=True, description="Is safe hash enabled or not?") 50 | expiry_ts = odm.Optional(odm.Date(), description="When does this item expire from the list?") 51 | hashes = odm.Compound(Hashes, default={}, description="List of hashes related to the safe hash") 52 | file = odm.Optional(odm.Compound(File), description="Information about the file") 53 | sources = odm.List(odm.Compound(Source), description="List of reasons why hash is safelisted") 54 | tag = odm.Optional(odm.Compound(Tag), description="Information about the tag") 55 | signature = odm.Optional(odm.Compound(Signature), description="Information about the signature") 56 | type = odm.Enum(values=SAFEHASH_TYPES, description="Type of safe hash") 57 | updated = odm.Date(default="NOW", description="Last date when sources were added to the safe hash") 58 | 59 | 60 | if __name__ == "__main__": 61 | from pprint import pprint 62 | from assemblyline.odm.randomizer import random_model_obj 63 | pprint(random_model_obj(Safelist, as_json=True)) 64 | -------------------------------------------------------------------------------- /assemblyline/odm/random_data/create_test_data.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from assemblyline.common import forge 4 | from assemblyline.odm.random_data import create_heuristics, create_users, create_services, create_signatures, \ 5 | create_submission, create_alerts, create_safelists, create_workflows, wipe_all_except_users, create_badlists 6 | 7 | 8 | class PrintLogger(object): 9 | def __init__(self, indent=""): 10 | self.indent = indent 11 | 12 | def info(self, msg): 13 | print(f"{self.indent}{msg}") 14 | 15 | def warn(self, msg): 16 | print(f"{self.indent}[W] {msg}") 17 | 18 | def error(self, msg): 19 | print(f"{self.indent}[E] {msg}") 20 | 21 | 22 | def create_basic_data(log=None, ds=None, svc=True, sigs=True, safelist=True, reset=False, badlist=True): 23 | ds = ds or forge.get_datastore() 24 | 25 | if reset: 26 | log.info("Wiping all collections...") 27 | for name in ds.ds._models: 28 | collection = ds.ds.__getattr__(name) 29 | collection.wipe() 30 | log.info(f"\t{name}") 31 | 32 | log.info("\nCreating user objects...") 33 | create_users(ds, log=log) 34 | 35 | if svc: 36 | log.info("\nCreating services...") 37 | create_services(ds, log=log) 38 | 39 | if badlist: 40 | log.info("\nCreating random badlist...") 41 | create_badlists(ds, log=log) 42 | 43 | if safelist: 44 | log.info("\nCreating random safelist...") 45 | create_safelists(ds, log=log) 46 | 47 | if sigs: 48 | log.info("\nImporting test signatures...") 49 | signatures = create_signatures(ds) 50 | for s in signatures: 51 | log.info(f"\t{s}") 52 | 53 | if svc: 54 | log.info("\nCreating random heuristics...") 55 | create_heuristics(ds, log=log) 56 | 57 | 58 | def create_extra_data(log=None, ds=None, fs=None): 59 | ds = ds or forge.get_datastore() 60 | fs = fs or forge.get_filestore() 61 | 62 | log.info("\nCreating 10 Submissions...") 63 | submissions = [] 64 | for _ in range(10): 65 | s = create_submission(ds, fs, log=log) 66 | submissions.append(s) 67 | 68 | log.info("\n Creating 20 Workflows...") 69 | workflows = create_workflows(ds, log=log) 70 | 71 | log.info("\nCreating 50 Alerts...") 72 | create_alerts(ds, submission_list=submissions, log=log, workflows=workflows) 73 | 74 | log.info("\nGenerating statistics for signatures and heuristics...") 75 | ds.calculate_signature_stats() 76 | ds.calculate_heuristic_stats() 77 | 78 | 79 | if __name__ == "__main__": 80 | datastore = forge.get_datastore() 81 | filestore = forge.get_datastore() 82 | logger = PrintLogger() 83 | if "clean" in sys.argv: 84 | # Clean up data in indices except user 85 | wipe_all_except_users(datastore, filestore) 86 | 87 | create_basic_data(log=logger, ds=datastore, svc="nosvc" not in sys.argv, sigs="nosigs" not in sys.argv, 88 | safelist="nosl" not in sys.argv, reset="reset" in sys.argv, badlist="nobl" not in sys.argv) 89 | if "full" in sys.argv: 90 | create_extra_data(log=logger, ds=datastore) 91 | 92 | if "alerts" in sys.argv: 93 | logger.info("\nCreating extra 1000 Alerts...") 94 | create_alerts(datastore, alert_count=1000, log=logger) 95 | 96 | logger.info("\nDone.") 97 | -------------------------------------------------------------------------------- /assemblyline/remote/datatypes/queues/named.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | 4 | from typing import Generic, TypeVar, Optional 5 | 6 | from assemblyline.remote.datatypes import get_client, retry_call 7 | 8 | T = TypeVar('T') 9 | 10 | 11 | class NamedQueue(Generic[T]): 12 | def __init__(self, name: str, host=None, port=None, private: bool = False, ttl: int = 0): 13 | self.c = get_client(host, port, private) 14 | self.name: str = name 15 | self.ttl: int = ttl 16 | self.last_expire_time = 0 17 | 18 | def __enter__(self): 19 | return self 20 | 21 | def __exit__(self, exc_type, exc_val, exc_tb): 22 | self.delete() 23 | 24 | def _conditional_expire(self): 25 | if self.ttl: 26 | ctime = time.time() 27 | if ctime > self.last_expire_time + (self.ttl / 2): 28 | retry_call(self.c.expire, self.name, self.ttl) 29 | self.last_expire_time = ctime 30 | 31 | def delete(self): 32 | retry_call(self.c.delete, self.name) 33 | 34 | def __len__(self): 35 | return self.length() 36 | 37 | def length(self): 38 | return retry_call(self.c.llen, self.name) 39 | 40 | def peek_next(self) -> Optional[T]: 41 | response = retry_call(self.c.lrange, self.name, 0, 0) 42 | 43 | if response: 44 | return json.loads(response[0]) 45 | return None 46 | 47 | def content(self) -> list[T]: 48 | response = retry_call(self.c.lrange, self.name, 0, -1) 49 | if response: 50 | return [json.loads(resp) for resp in response] 51 | return [] 52 | 53 | def pop_batch(self, size) -> list[T]: 54 | response = retry_call(self.c.lpop, self.name, size) 55 | 56 | if not response: 57 | return [] 58 | return [json.loads(r) for r in response] 59 | 60 | def pop(self, blocking: bool = True, timeout: int = 0) -> Optional[T]: 61 | if blocking: 62 | response = retry_call(self.c.blpop, self.name, timeout) 63 | else: 64 | response = retry_call(self.c.lpop, self.name) 65 | 66 | if not response: 67 | return response 68 | 69 | if blocking: 70 | return json.loads(response[1]) 71 | else: 72 | return json.loads(response) 73 | 74 | def push(self, *messages: T): 75 | for message in messages: 76 | retry_call(self.c.rpush, self.name, json.dumps(message)) 77 | self._conditional_expire() 78 | 79 | def unpop(self, *messages: T): 80 | """Put all messages passed back at the head of the FIFO queue.""" 81 | for message in messages: 82 | retry_call(self.c.lpush, self.name, json.dumps(message)) 83 | self._conditional_expire() 84 | 85 | 86 | def select(*queues, **kw): 87 | timeout = kw.get('timeout', 0) 88 | if len(queues) < 1: 89 | raise TypeError('At least one queue must be specified') 90 | if any([type(q) != NamedQueue for q in queues]): 91 | raise TypeError('Only NamedQueues supported') 92 | 93 | c = queues[0].c 94 | response = retry_call(c.blpop, [q.name for q in queues], timeout) 95 | 96 | if not response: 97 | return response 98 | 99 | return response[0].decode('utf-8'), json.loads(response[1]) 100 | --------------------------------------------------------------------------------