├── src ├── __init__.py ├── Api │ ├── __init__.py │ ├── pathmagic.py │ ├── av_count.py │ ├── cron.py │ ├── queue_count.py │ ├── last_uploaded.py │ ├── export.py │ └── queue_tasks.py ├── Modules │ ├── __init__.py │ ├── Module.py │ ├── MetaDataModule.py │ └── PEFileModule.py ├── PlugIns │ ├── __init__.py │ ├── Generic │ │ ├── __init__.py │ │ ├── SizePlug.py │ │ ├── FuzzyPlug.py │ │ ├── DescPlug.py │ │ ├── HashPlug.py │ │ ├── MimePlug.py │ │ └── DatePlug.py │ ├── PE │ │ ├── pathmagic.py │ │ ├── CypherPlug.py │ │ ├── ChildOfPlug.py │ │ ├── TestPlug.py │ │ ├── AVAnalysisPlug.py │ │ ├── __init__.py │ │ ├── PackerVersionPlug.py │ │ ├── EntropyPlug.py │ │ ├── CRCCheckPlug.py │ │ ├── TimeStampPlug.py │ │ ├── AddImportsToTreePlug.py │ │ ├── CheckEPSectionPlug.py │ │ ├── ImportsPlug.py │ │ ├── SectionsPlug.py │ │ ├── CheckPackerPlug.py │ │ ├── ResourceEntriesPlug.py │ │ ├── ExportsPlug.py │ │ ├── VersionInfoPlug.py │ │ ├── StringPlug.py │ │ └── HeadersPlug.py │ └── PlugIn.py ├── Query │ ├── __init__.py │ ├── pathmagic.py │ ├── query.py │ ├── getFile.py │ ├── searchForFuzzy.py │ ├── fuzzyCompare.py │ └── apifuzz.py ├── Utils │ ├── __init__.py │ ├── pathmagic.py │ ├── mailSender.py │ ├── test.py │ ├── InfoExtractor.py │ ├── TimeLogger.py │ ├── task.py │ ├── ProcessDate.py │ └── PEHeaderReader.py ├── config │ └── __init__.py ├── KeyManager │ ├── __init__.py │ └── pathmagic.py ├── MetaControl │ ├── __init__.py │ ├── pathmagic.py │ ├── Page.py │ ├── test.xml │ ├── XMLCreator.py │ └── MetaController.py ├── MetaDataPKG │ ├── __init__.py │ ├── PE │ │ ├── ImportsMeta.py │ │ ├── __init__.py │ │ └── SectionsMeta.py │ ├── Generic │ │ ├── HashMeta.py │ │ ├── __init__.py │ │ └── SizeMeta.py │ └── Metadata.py ├── Processors │ ├── __init__.py │ ├── ProcessorFactory.py │ ├── DummyProcessor.py │ ├── HashProcessor.py │ ├── PEProcessor.py │ └── Processor.py ├── PackageControl │ ├── __init__.py │ ├── pathmagic.py │ └── PackageController.py ├── VersionControl │ ├── __init__.py │ └── VersionController.py ├── pip_test_requirements.txt ├── TreeMenu │ ├── __init__.py │ └── GenericTree.py ├── pip_yargen_requirements.txt ├── pip_vt_api_requirements.txt ├── Scripts │ ├── pathmagic.py │ ├── release_semaphore.py │ └── create_indexes.py ├── health │ ├── pathmagic.py │ ├── delete_list_of_object_ids.py │ ├── 2016-12-22-2045-process_files_without_version.py │ ├── check_file_ids.py │ ├── 2016-12-22-2013-check_file_ids2.py │ ├── check_dates.py │ ├── check_dates_av.py │ └── 2017-01-17-1623-fix_dates_in_av.py ├── cron │ └── cronDockerfile ├── pip_requirements.txt ├── clear_failed_queue.py ├── tree_menu.py ├── env.py ├── workerDockerfile ├── Ram.py ├── db_pool.py ├── process_hash.py ├── Cataloger.py ├── default_config.py ├── tests │ └── test_health.py ├── ProcessControl.py ├── loadToMongo.py ├── Sample.py └── Launcher.py ├── yara ├── yara.zipdummy ├── pestudio.zipdummy ├── yarGen-master.zipdummy ├── YaraGenerator │ ├── modules │ │ ├── jshtml_blacklist.txt │ │ ├── office_blacklist.txt │ │ ├── pdf_blacklist.txt │ │ ├── unknown_blacklist.txt │ │ ├── email_regexblacklist.txt │ │ ├── jshtml_regexblacklist.txt │ │ ├── office_regexblacklist.txt │ │ ├── pdf_regexblacklist.txt │ │ ├── unknown_regexblacklist.txt │ │ ├── email_blacklist.txt │ │ └── exe_regexblacklist.txt │ └── README.md ├── readme.txt └── setupYarGen.sh ├── .dockerignore ├── TEMP └── readme.txt ├── CHANGELOG ├── doc ├── 26-charts.png ├── 39-equals.png ├── 01-download.png ├── 03-searchbox.png ├── 08-buttons.png ├── 09-buttons2.png ├── 10-buttons.png ├── 15-file-json.png ├── 47-preview.png ├── 02-search_tree.png ├── 05-search-dll.png ├── 14-button-yara.png ├── 18-file-export.png ├── 20-file-scans.png ├── 40-menu-upload.png ├── 06-results-filter.png ├── 11-export-example.png ├── 12-button-process.png ├── 16-file-download.png ├── 17-file-process.png ├── 19-file-vt-scan.png ├── 21-search-tree2.png ├── 27-results-sort.png ├── 28-results-hash.png ├── 34-simple-compare.png ├── 41-upload-example.png ├── 42-load-example.png ├── 43-load-example2.png ├── 44-load-example3.png ├── 04-library_textbox.png ├── 07-results-filter2.png ├── 22-search-dino.exe.png ├── 23-results-dino.exe.png ├── 35-simple-compare2.png ├── 36-simple-compare3.png ├── 37-simple-compare4.png ├── 38-simple-compare5.png ├── 45-process-example.png ├── 46-process-example2.png ├── 13-button-copy-hashes.png ├── 24-search-tree-section-hash.png ├── 29-tree-menu-section-name.png ├── 30-searchbox-section-hash.png ├── 31-searchbox-section-hash2.png ├── 32-searchbox-section-hash3.png ├── 33-searchbox-section-hash4.png └── 25-results-search-by-section-hash.png ├── files_to_load └── .gitignore ├── circle.yml ├── .gitignore ├── LICENSE ├── Dockerfile ├── pull-install.yml ├── debug.yml ├── docker-compose.yml └── README.md /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /yara/yara.zipdummy: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/Api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/Modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/PlugIns/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/Query/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/Utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /yara/pestudio.zipdummy: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/KeyManager/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/MetaControl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/MetaDataPKG/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/Processors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /yara/yarGen-master.zipdummy: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/MetaDataPKG/PE/ImportsMeta.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/MetaDataPKG/PE/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/PackageControl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/VersionControl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/MetaDataPKG/Generic/HashMeta.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/MetaDataPKG/Generic/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/MetaDataPKG/PE/SectionsMeta.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | Dockerfile 2 | .dockerignore 3 | -------------------------------------------------------------------------------- /TEMP/readme.txt: -------------------------------------------------------------------------------- 1 | Zip creation temporal folder 2 | -------------------------------------------------------------------------------- /yara/YaraGenerator/modules/jshtml_blacklist.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /yara/YaraGenerator/modules/office_blacklist.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /yara/YaraGenerator/modules/pdf_blacklist.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /yara/YaraGenerator/modules/unknown_blacklist.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /yara/YaraGenerator/modules/email_regexblacklist.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /yara/YaraGenerator/modules/jshtml_regexblacklist.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /yara/YaraGenerator/modules/office_regexblacklist.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /yara/YaraGenerator/modules/pdf_regexblacklist.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /yara/YaraGenerator/modules/unknown_regexblacklist.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/pip_test_requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pycodestyle 3 | -------------------------------------------------------------------------------- /yara/readme.txt: -------------------------------------------------------------------------------- 1 | On this folder yarGen script will be extracted 2 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | * Search for multiple exports name at the same time 2 | 3 | -------------------------------------------------------------------------------- /src/TreeMenu/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["GenericTree", "ExecutableTree"] 2 | -------------------------------------------------------------------------------- /yara/YaraGenerator/modules/email_blacklist.txt: -------------------------------------------------------------------------------- 1 | undisclosed-recipients:; -------------------------------------------------------------------------------- /yara/YaraGenerator/modules/exe_regexblacklist.txt: -------------------------------------------------------------------------------- 1 | ^thisisaplaceholder$ -------------------------------------------------------------------------------- /src/pip_yargen_requirements.txt: -------------------------------------------------------------------------------- 1 | lxml 2 | naiveBayesClassifier 3 | pefile 4 | scandir 5 | -------------------------------------------------------------------------------- /doc/26-charts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/26-charts.png -------------------------------------------------------------------------------- /doc/39-equals.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/39-equals.png -------------------------------------------------------------------------------- /doc/01-download.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/01-download.png -------------------------------------------------------------------------------- /doc/03-searchbox.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/03-searchbox.png -------------------------------------------------------------------------------- /doc/08-buttons.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/08-buttons.png -------------------------------------------------------------------------------- /doc/09-buttons2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/09-buttons2.png -------------------------------------------------------------------------------- /doc/10-buttons.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/10-buttons.png -------------------------------------------------------------------------------- /doc/15-file-json.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/15-file-json.png -------------------------------------------------------------------------------- /doc/47-preview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/47-preview.png -------------------------------------------------------------------------------- /doc/02-search_tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/02-search_tree.png -------------------------------------------------------------------------------- /doc/05-search-dll.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/05-search-dll.png -------------------------------------------------------------------------------- /doc/14-button-yara.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/14-button-yara.png -------------------------------------------------------------------------------- /doc/18-file-export.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/18-file-export.png -------------------------------------------------------------------------------- /doc/20-file-scans.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/20-file-scans.png -------------------------------------------------------------------------------- /doc/40-menu-upload.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/40-menu-upload.png -------------------------------------------------------------------------------- /doc/06-results-filter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/06-results-filter.png -------------------------------------------------------------------------------- /doc/11-export-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/11-export-example.png -------------------------------------------------------------------------------- /doc/12-button-process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/12-button-process.png -------------------------------------------------------------------------------- /doc/16-file-download.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/16-file-download.png -------------------------------------------------------------------------------- /doc/17-file-process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/17-file-process.png -------------------------------------------------------------------------------- /doc/19-file-vt-scan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/19-file-vt-scan.png -------------------------------------------------------------------------------- /doc/21-search-tree2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/21-search-tree2.png -------------------------------------------------------------------------------- /doc/27-results-sort.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/27-results-sort.png -------------------------------------------------------------------------------- /doc/28-results-hash.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/28-results-hash.png -------------------------------------------------------------------------------- /doc/34-simple-compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/34-simple-compare.png -------------------------------------------------------------------------------- /doc/41-upload-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/41-upload-example.png -------------------------------------------------------------------------------- /doc/42-load-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/42-load-example.png -------------------------------------------------------------------------------- /doc/43-load-example2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/43-load-example2.png -------------------------------------------------------------------------------- /doc/44-load-example3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/44-load-example3.png -------------------------------------------------------------------------------- /src/pip_vt_api_requirements.txt: -------------------------------------------------------------------------------- 1 | texttable 2 | olefile 3 | requests 4 | python-dateutil 5 | redis-semaphore 6 | -------------------------------------------------------------------------------- /doc/04-library_textbox.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/04-library_textbox.png -------------------------------------------------------------------------------- /doc/07-results-filter2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/07-results-filter2.png -------------------------------------------------------------------------------- /doc/22-search-dino.exe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/22-search-dino.exe.png -------------------------------------------------------------------------------- /doc/23-results-dino.exe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/23-results-dino.exe.png -------------------------------------------------------------------------------- /doc/35-simple-compare2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/35-simple-compare2.png -------------------------------------------------------------------------------- /doc/36-simple-compare3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/36-simple-compare3.png -------------------------------------------------------------------------------- /doc/37-simple-compare4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/37-simple-compare4.png -------------------------------------------------------------------------------- /doc/38-simple-compare5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/38-simple-compare5.png -------------------------------------------------------------------------------- /doc/45-process-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/45-process-example.png -------------------------------------------------------------------------------- /doc/46-process-example2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/46-process-example2.png -------------------------------------------------------------------------------- /files_to_load/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore 5 | 6 | -------------------------------------------------------------------------------- /doc/13-button-copy-hashes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/13-button-copy-hashes.png -------------------------------------------------------------------------------- /doc/24-search-tree-section-hash.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/24-search-tree-section-hash.png -------------------------------------------------------------------------------- /doc/29-tree-menu-section-name.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/29-tree-menu-section-name.png -------------------------------------------------------------------------------- /doc/30-searchbox-section-hash.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/30-searchbox-section-hash.png -------------------------------------------------------------------------------- /doc/31-searchbox-section-hash2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/31-searchbox-section-hash2.png -------------------------------------------------------------------------------- /doc/32-searchbox-section-hash3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/32-searchbox-section-hash3.png -------------------------------------------------------------------------------- /doc/33-searchbox-section-hash4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/33-searchbox-section-hash4.png -------------------------------------------------------------------------------- /src/PlugIns/Generic/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["HashPlug", "SizePlug", "DescPlug", 2 | "FuzzyPlug", "MimePlug", "DatePlug"] 3 | -------------------------------------------------------------------------------- /doc/25-results-search-by-section-hash.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codexgigassys/codex-backend/HEAD/doc/25-results-search-by-section-hash.png -------------------------------------------------------------------------------- /src/Api/pathmagic.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | path = os.path.abspath(os.path.join( 4 | os.path.dirname(os.path.abspath(__file__)), '..')) 5 | sys.path.insert(0, path) 6 | -------------------------------------------------------------------------------- /src/Query/pathmagic.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | path = os.path.abspath(os.path.join( 4 | os.path.dirname(os.path.abspath(__file__)), '..')) 5 | sys.path.insert(0, path) 6 | -------------------------------------------------------------------------------- /src/Scripts/pathmagic.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | path = os.path.abspath(os.path.join( 4 | os.path.dirname(os.path.abspath(__file__)), '..')) 5 | sys.path.insert(0, path) 6 | -------------------------------------------------------------------------------- /src/Utils/pathmagic.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | path = os.path.abspath(os.path.join( 4 | os.path.dirname(os.path.abspath(__file__)), '..')) 5 | sys.path.insert(0, path) 6 | -------------------------------------------------------------------------------- /src/KeyManager/pathmagic.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | path = os.path.abspath(os.path.join( 4 | os.path.dirname(os.path.abspath(__file__)), '..')) 5 | sys.path.insert(0, path) 6 | -------------------------------------------------------------------------------- /src/MetaControl/pathmagic.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | path = os.path.abspath(os.path.join( 4 | os.path.dirname(os.path.abspath(__file__)), '..')) 5 | sys.path.insert(0, path) 6 | -------------------------------------------------------------------------------- /circle.yml: -------------------------------------------------------------------------------- 1 | general: 2 | build_dir: src 3 | 4 | dependencies: 5 | post: 6 | - pip install -r pip_test_requirements.txt 7 | 8 | test: 9 | override: 10 | - pytest 11 | -------------------------------------------------------------------------------- /src/PackageControl/pathmagic.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | path = os.path.abspath(os.path.join( 4 | os.path.dirname(os.path.abspath(__file__)), '..')) 5 | sys.path.insert(0, path) 6 | -------------------------------------------------------------------------------- /src/health/pathmagic.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | 5 | path = os.path.abspath(os.path.join( 6 | os.path.dirname(os.path.abspath(__file__)), '..')) 7 | sys.path.insert(0, path) 8 | -------------------------------------------------------------------------------- /src/PlugIns/PE/pathmagic.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | source_path = os.path.abspath(os.path.join( 4 | os.path.dirname(os.path.abspath(__file__)), '..', '..')) 5 | sys.path.insert(0, source_path) 6 | -------------------------------------------------------------------------------- /src/cron/cronDockerfile: -------------------------------------------------------------------------------- 1 | FROM busybox 2 | # copied from https://hub.docker.com/r/lodatol/cron/~/dockerfile/ 3 | RUN mkdir -p /var/spool/cron/crontabs 4 | CMD ntpd -p pool.ntp.org && echo "$CRON_ENTRY" | crontab - && crond -f 5 | 6 | -------------------------------------------------------------------------------- /src/pip_requirements.txt: -------------------------------------------------------------------------------- 1 | python-magic 2 | pymongo 3 | bottle 4 | czipfile 5 | ssdeep 6 | pefile 7 | entropy 8 | validators 9 | ipython 10 | statistics 11 | matplotlib 12 | cycler 13 | tqdm 14 | tailer 15 | gevent 16 | requests 17 | rq 18 | validate_email 19 | pyasn1 20 | pyasn1_modules 21 | scipy 22 | numpy 23 | -------------------------------------------------------------------------------- /src/MetaControl/Page.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | 5 | 6 | class Page(): 7 | 8 | def __init__(self): 9 | pass 10 | 11 | def __delete__(self): 12 | pass 13 | -------------------------------------------------------------------------------- /src/Api/av_count.py: -------------------------------------------------------------------------------- 1 | import pathmagic 2 | from bottle import route 3 | from bottle import request 4 | from bottle import response 5 | from bson.json_util import dumps 6 | from db_pool import * 7 | 8 | 9 | @route('/api/v1/av_count', method='GET') 10 | def av_count(): 11 | count = db.av_analysis.count() 12 | return dumps({"count": count}) 13 | -------------------------------------------------------------------------------- /src/MetaDataPKG/Generic/SizeMeta.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from MetaDataPKG.Metadata import * 5 | 6 | 7 | class SizeMeta(Metadata): 8 | 9 | def __init__(self): 10 | Metadata.__init__(self) 11 | -------------------------------------------------------------------------------- /src/clear_failed_queue.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from redis import Redis 5 | from rq import Queue 6 | import sys 7 | from env import envget 8 | qfail = Queue(sys.argv[1], connection=Redis(host=envget('redis.host'))) 9 | qfail.count 10 | qfail.empty() 11 | -------------------------------------------------------------------------------- /src/tree_menu.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | 5 | from TreeMenu import * 6 | 7 | package = __import__("TreeMenu") 8 | tree = [] 9 | ids = {} 10 | for module_name in package.__all__: 11 | module = getattr(package, module_name) 12 | tree.append(module.tree_element) 13 | ids.update(module.id_element) 14 | -------------------------------------------------------------------------------- /src/Api/cron.py: -------------------------------------------------------------------------------- 1 | import pathmagic 2 | from bottle import route, request, response 3 | from Utils.Functions import jsonize, change_date_to_str 4 | from PackageControl.PackageController import * 5 | from KeyManager.KeyManager import * 6 | 7 | # Resets the daily counters 8 | # of the private keys 9 | 10 | 11 | @route('/api/v1/cron', method='GET') 12 | def cron(): 13 | key_manager = KeyManager() 14 | key_manager.reset_daily_counter() 15 | return jsonize({"status": "ok"}) 16 | -------------------------------------------------------------------------------- /src/Api/queue_count.py: -------------------------------------------------------------------------------- 1 | import pathmagic 2 | from bottle import route 3 | from bottle import request 4 | from bottle import response 5 | from bson.json_util import dumps 6 | from Utils.Functions import number_of_jobs_on_queue 7 | 8 | 9 | @route('/api/v1/queue_count', method='GET') 10 | def task_finished(): 11 | count = (number_of_jobs_on_queue('task_private_vt') + 12 | number_of_jobs_on_queue('task_public_vt') + 13 | number_of_jobs_on_queue('task_no_vt')) 14 | return dumps({"count": count}) 15 | -------------------------------------------------------------------------------- /src/PlugIns/Generic/SizePlug.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from PlugIns.PlugIn import PlugIn 5 | 6 | 7 | class SizePlug(PlugIn): 8 | 9 | def __init__(self, sample=None): 10 | PlugIn.__init__(self, sample) 11 | 12 | def getName(self): 13 | return "size" 14 | 15 | def getVersion(self): 16 | return 1 17 | 18 | def process(self): 19 | return len(self.sample.getBinary()) 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #Ignore backup files and compiled python 2 | *~ 3 | *.pyc 4 | *.wpr 5 | *.wpu 6 | 7 | #Ignore TEMP files except for readme.txt 8 | TEMP/* 9 | !TEMP/readme.txt 10 | 11 | #Ignore Visual Studio Code files 12 | .vscode/* 13 | 14 | #Ignore logs 15 | src/logs.csv 16 | src/log.log 17 | src/launcher.log 18 | logs.csv 19 | log.log 20 | src/loadToMongo_log.txt 21 | src/error_log.txt 22 | 23 | #Ignore credentials 24 | src/secrets.py 25 | src/config/secrets.py 26 | 27 | #Ignore 28 | yara/yarGen-master/ 29 | yara/yara.zip 30 | yara/binarly-sdk/ 31 | yara/pestudio.zip 32 | yara/yarGen-master.zip 33 | -------------------------------------------------------------------------------- /src/PlugIns/Generic/FuzzyPlug.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from PlugIns.PlugIn import PlugIn 5 | from Utils.InfoExtractor import * 6 | 7 | 8 | class FuzzyPlug(PlugIn): 9 | 10 | def __init__(self, sample=None): 11 | PlugIn.__init__(self, sample) 12 | 13 | def getName(self): 14 | return "fuzzy_hash" 15 | 16 | def getVersion(self): 17 | return 1 18 | 19 | def process(self): 20 | return getSsdeep(self.sample.getBinary()) 21 | -------------------------------------------------------------------------------- /src/Query/query.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | import pathmagic 5 | from pymongo import MongoClient 6 | from env import envget 7 | 8 | client = MongoClient(envget('metadata.host'), envget('metadata.port')) 9 | db = client[envget('db_metadata_name')] 10 | coll_meta = db[envget('db_metadata_collection')] 11 | 12 | query = {} 13 | query["particular_header.file_entropy"] = {"$gt": 7.999} 14 | res = coll_meta.find(query) 15 | for e in res: 16 | print(("Found: %s") % (e,)) 17 | -------------------------------------------------------------------------------- /src/Modules/Module.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | 5 | 6 | class Module(): 7 | 8 | def __init__(self, lib=None): 9 | self.library = lib 10 | if(lib is None): 11 | self.already_initialized = False 12 | else: 13 | self.already_initialized = True 14 | 15 | def getLibrary(self): 16 | return self.library 17 | 18 | def getName(self): 19 | pass 20 | 21 | def initialize(self, sample): 22 | pass 23 | -------------------------------------------------------------------------------- /src/PlugIns/Generic/DescPlug.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from PlugIns.PlugIn import PlugIn 5 | from Utils.InfoExtractor import * 6 | 7 | 8 | class DescPlug(PlugIn): 9 | 10 | def __init__(self, sample=None): 11 | PlugIn.__init__(self, sample) 12 | 13 | def getName(self): 14 | return "description" 15 | 16 | def getVersion(self): 17 | return 1 18 | 19 | def process(self): 20 | return MIME_TYPE(self.sample.getBinary(), False) 21 | -------------------------------------------------------------------------------- /src/PlugIns/PE/CypherPlug.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from PlugIns.PlugIn import PlugIn 5 | 6 | 7 | class CypherPlug(PlugIn): 8 | 9 | def __init__(self, sample=None): 10 | PlugIn.__init__(self, sample) 11 | 12 | def getPath(self): 13 | return "particular_header.cypher" 14 | 15 | def getName(self): 16 | return "cypher" 17 | 18 | def getVersion(self): 19 | return 1 20 | 21 | def process(self): 22 | return "Not_implemented" 23 | -------------------------------------------------------------------------------- /src/health/delete_list_of_object_ids.py: -------------------------------------------------------------------------------- 1 | # This file deletes a list of documents 2 | # from the meta_container collection. 3 | # ObjectId's to be deleted should be in ids.txt 4 | # one id per line. 5 | import pathmagic 6 | from db_pool import * 7 | from bson.objectid import ObjectId 8 | 9 | 10 | def main(): 11 | collection = db[envget('db_metadata_collection')] 12 | f = open("ids.txt", "r") 13 | for idd in f.readlines(): 14 | clean_id = idd.replace('\n', '') 15 | print str(clean_id) 16 | collection.remove({"_id": ObjectId(clean_id)}) 17 | 18 | 19 | if __name__ == "__main__": 20 | main() 21 | -------------------------------------------------------------------------------- /src/PlugIns/PE/ChildOfPlug.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from PlugIns.PlugIn import PlugIn 5 | 6 | 7 | class ChildOfPlug(PlugIn): 8 | 9 | def __init__(self, sample=None): 10 | PlugIn.__init__(self, sample) 11 | 12 | def getPath(self): 13 | return "particular_header.child_of" 14 | 15 | def getName(self): 16 | return "child_of" 17 | 18 | def getVersion(self): 19 | return 1 20 | 21 | def process(self): 22 | return "Not_implemented" 23 | -------------------------------------------------------------------------------- /src/PlugIns/PE/TestPlug.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from PlugIns.PlugIn import PlugIn 5 | # import entropy 6 | 7 | 8 | class TestPlug(PlugIn): 9 | 10 | def __init__(self, sample=None): 11 | PlugIn.__init__(self, sample) 12 | 13 | def getPath(self): 14 | return "particular_header.test" 15 | 16 | def getName(self): 17 | return "test" 18 | 19 | def getVersion(self): 20 | return 7 21 | 22 | def process(self): 23 | return "dist" 24 | -------------------------------------------------------------------------------- /src/PlugIns/PE/AVAnalysisPlug.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from PlugIns.PlugIn import PlugIn 5 | 6 | 7 | class AVAnalysisPlug(PlugIn): 8 | 9 | def __init__(self, sample=None): 10 | PlugIn.__init__(self, sample) 11 | 12 | def getPath(self): 13 | return "particular_header.av_analysis" 14 | 15 | def getName(self): 16 | return "av_analysis" 17 | 18 | def getVersion(self): 19 | return 1 20 | 21 | def process(self): 22 | return "Not_implemented" 23 | -------------------------------------------------------------------------------- /src/PlugIns/PE/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["ImportsPlug", 2 | "EntropyPlug", 3 | "SectionsPlug", 4 | "ResourceEntriesPlug", 5 | "CheckEPSectionPlug", 6 | "CRCCheckPlug", 7 | "TimeStampPlug", 8 | "HeadersPlug", 9 | "VersionInfoPlug", 10 | "ChildOfPlug", 11 | "AVAnalysisPlug", 12 | "CypherPlug", 13 | "PackerVersionPlug", 14 | "CheckPackerPlug", 15 | "TestPlug", 16 | "AddImportsToTreePlug", 17 | "StringPlug", 18 | "ExportsPlug", 19 | "CertficatePlug", 20 | ] 21 | -------------------------------------------------------------------------------- /src/PlugIns/PE/PackerVersionPlug.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from PlugIns.PlugIn import PlugIn 5 | 6 | 7 | class PackerVersionPlug(PlugIn): 8 | 9 | def __init__(self, sample=None): 10 | PlugIn.__init__(self, sample) 11 | 12 | def getPath(self): 13 | return "particular_header.packer_version" 14 | 15 | def getName(self): 16 | return "packer_version" 17 | 18 | def getVersion(self): 19 | return 1 20 | 21 | def process(self): 22 | return "Not_implemented" 23 | -------------------------------------------------------------------------------- /src/Modules/MetaDataModule.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from MetaControl.MetaController import MetaController 5 | from Modules.Module import * 6 | 7 | 8 | class MetaDataModule(Module): 9 | 10 | def __init__(self): 11 | Module.__init__(self) 12 | 13 | def getName(self): 14 | return "metaDataModule" 15 | 16 | def initialize(self, sample): 17 | if(not self.already_initialized): 18 | self.already_initialized = True 19 | self.library = MetaController() 20 | return self.library 21 | -------------------------------------------------------------------------------- /src/PlugIns/PE/EntropyPlug.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from PlugIns.PlugIn import PlugIn 5 | import entropy 6 | 7 | 8 | class EntropyPlug(PlugIn): 9 | 10 | def __init__(self, sample=None): 11 | PlugIn.__init__(self, sample) 12 | 13 | def getPath(self): 14 | return "particular_header.file_entropy" 15 | 16 | def getName(self): 17 | return "file_entropy" 18 | 19 | def getVersion(self): 20 | return 1 21 | 22 | def process(self): 23 | res = entropy.shannon_entropy(self.sample.getBinary()) * 8 24 | return res 25 | -------------------------------------------------------------------------------- /src/MetaControl/test.xml: -------------------------------------------------------------------------------- 1 | 5a2c369bc0d5736033c761623ebf2afb31cd567ade3bf09ae15b362558e59a610a0a152867e26cb7222942bdbcef8ed6998c75f9408ad7b4f17af8eed1c68ef566d2aab4PE32 executable (DLL) (GUI) Intel 80386, for MS Windows, PECompact2 compressed1536:Zwoq+LsVXP/z8MWhsLLVaaoW7PcGyF46Yt8O0JI1m1fsBr0daxl:ON+oVX8MLoaoW7PcGy3YtOiefegdGl0x4CB8528D [Fri Oct 15 13:09:33 2010 UTC]Claimed: 0xb592, Actual: 0x13a7cHOLAAAA5.704388440150x1000b9d0 .rsrc 1/3application/x-dosexec68200 -------------------------------------------------------------------------------- /src/env.py: -------------------------------------------------------------------------------- 1 | try: 2 | from config.secrets import env as env_secrets 3 | except ImportError: 4 | env_secrets = {} 5 | from default_config import env as env_default 6 | 7 | 8 | def envget(option): 9 | if option is None or len(option) == 0: 10 | raise ValueError("empty or null option string") 11 | array = option.split('.') 12 | tmp = env_secrets 13 | for x in array: 14 | if type(tmp) is not dict: 15 | break 16 | tmp = tmp.get(x) 17 | if tmp is not None: 18 | return tmp 19 | tmp = env_default 20 | for x in array: 21 | tmp = tmp.get(x) 22 | if tmp is None: 23 | raise ValueError("env variable not found: " + str(option)) 24 | return tmp 25 | -------------------------------------------------------------------------------- /yara/setupYarGen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ -d "./yarGen-master/" ]; then 3 | echo "yarGen-master folder already exists"; 4 | exit 0 5 | elif [ -f "./yarGen-master.zip" ]; then 6 | unzip yarGen-master.zip 7 | 7z x yarGen-master/good-opcodes.db.zip.001 -oyarGen-master 8 | 7z x yarGen-master/good-strings.db.zip.001 -oyarGen-master 9 | else 10 | wget -nc https://github.com/Neo23x0/yarGen/archive/master.zip -O yarGen-master.zip 11 | unzip yarGen-master.zip 12 | 7z x yarGen-master/good-opcodes.db.zip.001 -oyarGen-master 13 | 7z x yarGen-master/good-strings.db.zip.001 -oyarGen-master 14 | fi 15 | git clone --depth 1 https://github.com/binarlyhq/binarly-sdk/ 16 | #git clone https://github.com/Xen0ph0n/YaraGenerator/ 17 | -------------------------------------------------------------------------------- /src/PlugIns/Generic/HashPlug.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from PlugIns.PlugIn import PlugIn 5 | from Utils.InfoExtractor import * 6 | 7 | 8 | class HashPlug(PlugIn): 9 | 10 | def __init__(self, sample=None): 11 | PlugIn.__init__(self, sample) 12 | 13 | def getName(self): 14 | return "hash" 15 | 16 | def getVersion(self): 17 | return 1 18 | 19 | def process(self): 20 | data = self.sample.getBinary() 21 | dic = {} 22 | dic["sha1"] = SHA1(data) 23 | dic["sha2"] = SHA256(data) 24 | dic["md5"] = MD5(data) 25 | return dic 26 | -------------------------------------------------------------------------------- /src/workerDockerfile: -------------------------------------------------------------------------------- 1 | FROM python:2.7 2 | RUN mkdir /myapp 3 | WORKDIR /myapp 4 | ADD . /myapp/ 5 | ENV PYTHONUNBUFFERED=0 6 | RUN apt-get clean && apt-get update && apt-get install -y build-essential \ 7 | libpq-dev \ 8 | libffi-dev \ 9 | libssl-dev \ 10 | python-dev \ 11 | libfuzzy-dev \ 12 | python-gevent \ 13 | python-pip \ 14 | python-magic \ 15 | python-crypto \ 16 | python-dateutil \ 17 | autoconf \ 18 | openssl \ 19 | file \ 20 | python \ 21 | autoconf \ 22 | automake \ 23 | libc-dev \ 24 | libtool && \ 25 | echo "Installing pip requirements" && \ 26 | pip install -r /myapp/pip_requirements.txt && \ 27 | pip install -r /myapp/pip_vt_api_requirements.txt 28 | CMD ["/usr/local/bin/rq","worker", "-u", "redis://codexbackend_redis_1:6379/0", "task"] 29 | -------------------------------------------------------------------------------- /src/PlugIns/Generic/MimePlug.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from PlugIns.PlugIn import PlugIn 5 | from Utils.InfoExtractor import * 6 | 7 | 8 | class MimePlug(PlugIn): 9 | 10 | def __init__(self, sample=None): 11 | PlugIn.__init__(self, sample) 12 | 13 | def getName(self): 14 | return "mime_type" 15 | 16 | def getVersion(self): 17 | return 2 18 | 19 | def process(self): 20 | cat = MIME_TYPE(self.sample.getBinary(), True) 21 | self.sample.setCategory(cat) 22 | ver = self.sample.getCalculatedVersion() 23 | ver["category"] = cat 24 | return cat 25 | -------------------------------------------------------------------------------- /src/PlugIns/Generic/DatePlug.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | import datetime 5 | 6 | from PlugIns.PlugIn import PlugIn 7 | 8 | 9 | class DatePlug(PlugIn): 10 | 11 | def __init__(self, sample=None): 12 | PlugIn.__init__(self, sample) 13 | 14 | def getName(self): 15 | return "date" 16 | 17 | def getVersion(self): 18 | return 1 19 | 20 | def process(self): 21 | new_date = datetime.datetime.now() 22 | old_date = self.sample.getLastValue("date") 23 | if old_date is None: 24 | return new_date 25 | 26 | if new_date < old_date: 27 | return new_date 28 | 29 | return old_date 30 | -------------------------------------------------------------------------------- /src/Utils/mailSender.py: -------------------------------------------------------------------------------- 1 | import smtplib 2 | from email.mime.text import MIMEText 3 | from db_pool import * 4 | 5 | 6 | def send_mail(toaddrs, subject, text): 7 | if toaddrs is None or len(toaddrs) == 0: 8 | return 9 | fromaddr = envget('mailsender.fromaddr') 10 | 11 | msg = MIMEText(text) 12 | msg['Subject'] = subject 13 | msg['From'] = fromaddr 14 | msg['To'] = toaddrs 15 | 16 | # Credentials (if needed) 17 | username = envget('mailsender.username') 18 | password = envget('mailsender.password') 19 | 20 | # The actual mail send 21 | server = smtplib.SMTP_SSL( 22 | envget('mailsender.smtp_host'), envget('mailsender.smtp_ssl_port')) 23 | server.login(username, password) 24 | server.sendmail(fromaddr, toaddrs, msg.as_string()) 25 | server.quit() 26 | -------------------------------------------------------------------------------- /src/Query/getFile.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | import pathmagic 5 | from pymongo import MongoClient 6 | import gridfs 7 | from env import envget 8 | 9 | # file_id="906f21f436b0dbb2c9cf37b80a90cdeb061ced3d" 10 | # file_id="109bf9de7b82ffd7b8194aa3741b5d42ee878ebb" 11 | file_id = "6abec077e93226f4d9d9a5351092f3e0baef6d78" 12 | 13 | client = MongoClient(envget('files.host'), envget('files.port')) 14 | db = client[envget('db_files_name')] 15 | fs = gridfs.GridFS(db) 16 | f = fs.find_one({"filename": file_id}) 17 | if(f is None): 18 | print("File does not exist.") 19 | exit(0) 20 | data = f.read() 21 | fd = open(file_id, "w+") 22 | fd.write(data) 23 | fd.close() 24 | print("File found") 25 | -------------------------------------------------------------------------------- /src/Scripts/release_semaphore.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (C) 2016 Deloitte Argentina. 3 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 4 | # See the file 'LICENSE' for copying permission. 5 | # 6 | # In the unfortunate case that a worker is forcefully 7 | # stopped while having the semaphore, all workers 8 | # will lock. The chance of this happening is low 9 | # but can happen (and has happened). 10 | # Here is the code to release the semaphore. 11 | # 12 | import pathmagic 13 | from redis import Redis 14 | from redis_semaphore import Semaphore 15 | from threading import Thread 16 | from db_pool import * 17 | 18 | 19 | semaphore = Semaphore(Redis(host=envget('redis.host')), 20 | count=1, namespace='example') 21 | token = semaphore.get_namespaced_key('example') 22 | semaphore.signal(token) 23 | -------------------------------------------------------------------------------- /src/Ram.py: -------------------------------------------------------------------------------- 1 | # http://stackoverflow.com/a/17718729 2 | class Ram: 3 | 4 | def __init__(self): 5 | print "ram loaded" 6 | 7 | # returns a float from 0 to 1 with the % of free memory 8 | def free_percent(self): 9 | mem = self.memory() 10 | return float(mem['free']) / float(mem['total']) 11 | 12 | def memory(self): 13 | with open('/proc/meminfo', 'r') as mem: 14 | ret = {} 15 | tmp = 0 16 | for i in mem: 17 | sline = i.split() 18 | if str(sline[0]) == 'MemTotal:': 19 | ret['total'] = int(sline[1]) 20 | elif str(sline[0]) in ('MemFree:', 'Buffers:', 'Cached:'): 21 | tmp += int(sline[1]) 22 | ret['free'] = tmp 23 | ret['used'] = int(ret['total']) - int(ret['free']) 24 | return ret 25 | -------------------------------------------------------------------------------- /src/db_pool.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from pymongo import MongoClient 5 | from env import envget 6 | 7 | if 'client' not in globals(): 8 | print "Creating connection pool..." 9 | client = MongoClient(envget('metadata.host'), envget('metadata.port')) 10 | db = client[envget('db_metadata_name')] 11 | client_fs = MongoClient(envget('files.host'), envget('files.port')) 12 | db_fs = client_fs[envget('db_files_name')] 13 | client_ver = MongoClient(envget('versions.host'), envget('versions.port')) 14 | db_ver = client_ver[envget('db_versions_name')] 15 | if(envget('temporal_files_db')): 16 | client_temp = MongoClient( 17 | envget('temp_files.host'), envget('temp_files.port')) 18 | db_temp = client_temp[envget('db_temp_files_name')] 19 | -------------------------------------------------------------------------------- /src/process_hash.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | import logging 5 | from Utils.Functions import process_file, valid_hash, clean_hash, get_file_id 6 | from PackageControl.PackageController import * 7 | 8 | 9 | def generic_process_hash(hash_str): 10 | if hash_str is None: 11 | return None 12 | hash_str = clean_hash(hash_str) 13 | if(not valid_hash(hash_str)): 14 | return None 15 | if(len(hash_str) == 64): 16 | hash_str = get_file_id(hash_str) 17 | elif(len(hash_str) == 32): 18 | pc = PackageController() 19 | hash_str = pc.md5_to_sha1(hash_str) 20 | logging.debug("generic_process_hash-->sha1: " + str(hash_str)) 21 | if(hash_str is not None): 22 | return process_file(hash_str) 23 | else: 24 | return None 25 | -------------------------------------------------------------------------------- /src/PlugIns/PE/CRCCheckPlug.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from PlugIns.PlugIn import PlugIn 5 | from Modules.PEFileModule import PEFileModule 6 | import pefile 7 | 8 | 9 | class CRCCheckPlug(PlugIn): 10 | 11 | def __init__(self, sample=None): 12 | PlugIn.__init__(self, sample) 13 | 14 | def getPath(self): 15 | return "particular_header.crc" 16 | 17 | def getName(self): 18 | return "crc" 19 | 20 | def getVersion(self): 21 | return 1 22 | 23 | def process(self): 24 | pelib = self._getLibrary(PEFileModule().getName()) 25 | if(pelib is None): 26 | return "" 27 | 28 | crc_claimed = pelib.OPTIONAL_HEADER.CheckSum 29 | crc_actual = pelib.generate_checksum() 30 | s = "Claimed: 0x%x, Actual: 0x%x" % (crc_claimed, crc_actual) 31 | return self._normalize(s) 32 | -------------------------------------------------------------------------------- /src/Cataloger.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from Utils.InfoExtractor import * 5 | from Utils.test import test 6 | import os 7 | 8 | 9 | class Cataloger(): 10 | 11 | def __init__(self, data=None): 12 | self.data = str(data) 13 | 14 | def __delete__(self): 15 | pass 16 | 17 | def catalogData(self): 18 | mime = MIME_TYPE(self.data, True) 19 | return mime 20 | 21 | def catalog(self, data): 22 | mime = MIME_TYPE(data, True) 23 | return mime 24 | 25 | 26 | # ****************TEST_CODE****************** 27 | 28 | 29 | def testCode(): 30 | dir = os.getcwd() 31 | # file=dir+"/Test_files/test.exe" 32 | file = dir + "/Test_files/t.lz" 33 | cat = Cataloger(open(file, 'rb').read()) 34 | res = cat.catalog() 35 | print(res) 36 | 37 | 38 | # ****************TEST_EXECUTE****************** 39 | 40 | 41 | test("-test_Cataloger", testCode) 42 | -------------------------------------------------------------------------------- /src/Api/last_uploaded.py: -------------------------------------------------------------------------------- 1 | import pathmagic 2 | from bottle import route, request, response 3 | from Utils.Functions import jsonize, change_date_to_str 4 | from PackageControl.PackageController import * 5 | 6 | 7 | @route('/api/v1/last_uploaded', method='GET') 8 | # Returns a json with the last n files 9 | # added to the database. 10 | def last_uploaded(): 11 | number = request.query.get("n") 12 | if number is None: 13 | response.status = 400 14 | return jsonize({"error": 1, "error_message": "Parameter n is missing"}) 15 | if number.isdigit() is False: 16 | response.status = 400 17 | return jsonize({"error": 2, "error_message": "Parameter n must be a number"}) 18 | if int(number) == 0: 19 | return jsonize({"error": 3, "error_message": "Parameter n must be greater than zero."}) 20 | 21 | pc = PackageController() 22 | lasts = pc.last_updated(int(number)) 23 | for i in range(0, len(lasts)): # Convert datetime objects 24 | lasts[i] = change_date_to_str(lasts[i]) 25 | return jsonize(lasts) 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 Deloitte Argentina 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /src/PlugIns/PE/TimeStampPlug.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from PlugIns.PlugIn import PlugIn 5 | from Modules.PEFileModule import PEFileModule 6 | import pefile 7 | 8 | 9 | class TimeStampPlug(PlugIn): 10 | 11 | def __init__(self, sample=None): 12 | PlugIn.__init__(self, sample) 13 | 14 | def getPath(self): 15 | return "particular_header.date" 16 | 17 | def getName(self): 18 | return "date" 19 | 20 | def getVersion(self): 21 | return 1 22 | 23 | def process(self): 24 | pelib = self._getLibrary(PEFileModule().getName()) 25 | if(pelib is None): 26 | return "" 27 | 28 | val = pelib.FILE_HEADER.TimeDateStamp 29 | ts = '0x%-8X' % (val) 30 | try: 31 | ts += ' [%s UTC]' % time.asctime(time.gmtime(val)) 32 | except Exception, e: 33 | ts += ' [SUSPICIOUS]' 34 | 35 | return self._normalize(ts) 36 | -------------------------------------------------------------------------------- /src/Processors/ProcessorFactory.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from Processors.PEProcessor import * 5 | from Processors.DummyProcessor import * 6 | from Utils.test import test 7 | 8 | 9 | class ProcessorFactory(): 10 | 11 | def __init__(self): 12 | pass 13 | 14 | def __delete__(self): 15 | pass 16 | 17 | def getHashProcessor(self, processor, sample): 18 | return HashProcessor(sample) 19 | 20 | def createProcessor(self, processor, sample): 21 | obj = None 22 | if(processor == "application/x-dosexec"): 23 | obj = PEProcessor(sample) 24 | else: 25 | obj = DummyProcessor(sample) 26 | 27 | return obj 28 | 29 | # ****************TEST_CODE****************** 30 | 31 | 32 | def testCode(): 33 | pass 34 | 35 | # ****************TEST_EXECUTE****************** 36 | 37 | 38 | test("-test_ProcessorFactory", testCode) 39 | -------------------------------------------------------------------------------- /src/PlugIns/PE/AddImportsToTreePlug.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from PlugIns.PlugIn import PlugIn 5 | from Modules.MetaDataModule import * 6 | 7 | 8 | class AddImportsToTreePlug(PlugIn): 9 | 10 | def __init__(self, sample=None): 11 | PlugIn.__init__(self, sample) 12 | 13 | def getPath(self): 14 | return "particular_header.imports_tree" 15 | 16 | def getName(self): 17 | return "imports_tree" 18 | 19 | def getVersion(self): 20 | return 1 21 | 22 | def process(self): 23 | imports = self.sample.getLastValue("particular_header.imports") 24 | if(imports is None): 25 | return "no_imports" 26 | if(len(imports) == 0): 27 | return "no_imports" 28 | mdc = self._getLibrary(MetaDataModule().getName()) 29 | if(mdc is None): 30 | return "not_added" 31 | if(mdc.writeImportsTree(imports) == 0): 32 | return "added" 33 | else: 34 | return "not_added" 35 | -------------------------------------------------------------------------------- /src/Query/searchForFuzzy.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | import pathmagic 5 | from pymongo import MongoClient 6 | import ssdeep 7 | 8 | fuzzy_to_compare = "12288:lTurEUKhROhnCzrwsrsNuRIHZB62atXtjBIuMAI0VpnJJyeVxy5la8AJv:lqrEJhROh8rwKsNrDK9xM3cJyeg0Jv" 9 | 10 | client = MongoClient(envget('metadata.host'), envget('metadata.port')) 11 | db = client[envget('db_metadata_name')] 12 | coll_meta = db[envget('db_metadata_collection')] 13 | print("loading") 14 | f1 = coll_meta.find({}, {"file_id": 1, "fuzzy_hash": 1}) 15 | results = [] 16 | for f in f1: 17 | results.append(f) 18 | print("compearing") 19 | count = 0 20 | reset = 0 21 | for a in results: 22 | try: 23 | res = ssdeep.compare(a["fuzzy_hash"], fuzzy_to_compare) 24 | except Exception, e: 25 | print str(e) 26 | continue 27 | if(res >= 50): 28 | print("%s - %s" % (res, a["file_id"])) 29 | 30 | # print count 31 | # reset+=1; count+=1 32 | # if(reset>=1000): 33 | # print(str(count)+" procesados") 34 | # reset=0 35 | -------------------------------------------------------------------------------- /src/Utils/test.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | # Utilitario para testear codigo 5 | 6 | 7 | def test(TEST, function): 8 | import sys 9 | import traceback 10 | if(len(sys.argv) >= 2): 11 | if(sys.argv[1] == TEST): 12 | try: 13 | print("[--------------- Test of " + 14 | str(sys.argv[0]) + " -------------]") 15 | function() 16 | except Exception, e: 17 | print (str(e)) 18 | print("####### Error detected #######") 19 | print("") 20 | print(traceback.format_exc()) 21 | print("") 22 | print("###### End of log ######") 23 | # raw_input("Press a key...") 24 | 25 | # ****************TEST_CODE****************** 26 | 27 | 28 | def probando(): 29 | print("testing") 30 | 31 | 32 | def testCode(): 33 | test("-test_test", probando) 34 | 35 | # ****************TEST_EXECUTE****************** 36 | 37 | 38 | test("-test_test", testCode) 39 | -------------------------------------------------------------------------------- /src/Processors/DummyProcessor.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from Processors.Processor import * 5 | import traceback 6 | import time 7 | 8 | 9 | class DummyProcessor(Processor): 10 | 11 | def __init__(self, sample): 12 | Processor.__init__(self, sample) 13 | 14 | def __delete__(self): 15 | Processor.__delete__(self) 16 | 17 | # Processing method 18 | def process(self): 19 | Processor.process(self) 20 | # Plugins go here 21 | self._executeAllPlugIns() 22 | return self.metadata_to_store 23 | 24 | 25 | # ****************TEST_CODE****************** 26 | 27 | 28 | def testCode(): 29 | file = "Test_files/test.exe" 30 | data = open(file, "rb").read() 31 | 32 | start_time = time.time() 33 | dic = {} 34 | dp = DummyProcessor(data, dic) 35 | print(dp.process()) 36 | print(str(dic)) 37 | elapsed = time.time() - start_time 38 | 39 | print("Time Elapsed: " + str(elapsed * 1000) + " ms") 40 | print("") 41 | 42 | 43 | # ****************TEST_EXECUTE****************** 44 | # from Utils.test import test 45 | 46 | 47 | test("-test_PEProcessor", testCode) 48 | -------------------------------------------------------------------------------- /src/Query/fuzzyCompare.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | import pathmagic 5 | from pymongo import MongoClient 6 | import ssdeep 7 | 8 | 9 | client = MongoClient(envget('metadata.host'), envget('metadata.port')) 10 | db = client[envget('db_metadata_name')] 11 | coll_meta = db[envget('db_metadata_collection')] 12 | 13 | """ 14 | f=coll_meta.count({"particular_header.packer_detection":"True"}) 15 | print("%s documentos encontrados"%(f,)) 16 | 17 | #for a in f: 18 | # print(a["file_id"]) 19 | 20 | 21 | f=coll_meta.count({"particular_header.packer_detection":"False"}) 22 | print("%s documentos falsos"%(f,)) 23 | 24 | 25 | f=coll_meta.count({"particular_header.packer_detection":"Unknown"}) 26 | print("%s documentos desconocidos"%(f,)) 27 | """ 28 | 29 | f1 = coll_meta.find({}, {"file_id": 1, "fuzzy_hash": 1}) 30 | l = [] 31 | for f in f1: 32 | l.append(f) 33 | 34 | count = 0 35 | for a in l: 36 | count += 1 37 | for b in l[count:]: 38 | res = ssdeep.compare(a["fuzzy_hash"], b["fuzzy_hash"]) 39 | if(res > 0): 40 | print("%s - %s - %s" % (res, a["file_id"], b["file_id"])) 41 | 42 | print("***** %s ******" % (count,)) 43 | 44 | # raw_input() 45 | -------------------------------------------------------------------------------- /src/PlugIns/PE/CheckEPSectionPlug.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from PlugIns.PlugIn import PlugIn 5 | from Modules.PEFileModule import PEFileModule 6 | import pefile 7 | 8 | 9 | class CheckEPSectionPlug(PlugIn): 10 | 11 | def __init__(self, sample=None): 12 | PlugIn.__init__(self, sample) 13 | 14 | def getPath(self): 15 | return "particular_header.ep" 16 | 17 | def getName(self): 18 | return "ep" 19 | 20 | def getVersion(self): 21 | return 1 22 | 23 | def process(self): 24 | pelib = self._getLibrary(PEFileModule().getName()) 25 | if(pelib is None): 26 | return "" 27 | 28 | name = '' 29 | ep = pelib.OPTIONAL_HEADER.AddressOfEntryPoint 30 | pos = 0 31 | for sec in pelib.sections: 32 | if (ep >= sec.VirtualAddress) and (ep < (sec.VirtualAddress + sec.Misc_VirtualSize)): 33 | name = sec.Name.replace('\x00', '') 34 | break 35 | else: 36 | pos += 1 37 | s = "%s %s %d/%d" % (hex(ep + pelib.OPTIONAL_HEADER.ImageBase), 38 | name, pos, len(pelib.sections)) 39 | return self._normalize(s) 40 | -------------------------------------------------------------------------------- /src/health/2016-12-22-2045-process_files_without_version.py: -------------------------------------------------------------------------------- 1 | # This scripts iterates through meta_container 2 | # documents and searchs for a hash that do not 3 | # have a version_container document. Once found, it 4 | # process it. 5 | import pathmagic 6 | from db_pool import * 7 | from Utils.Functions import process_file 8 | from IPython import embed 9 | 10 | 11 | def check_if_has_version(file_id, collection_version): 12 | res = collection_version.find({"file_id": file_id}).limit(1) 13 | return res.count() != 0 14 | 15 | 16 | def main(): 17 | collection_version = db["version_container"] 18 | collection_meta = db[envget('db_metadata_collection')] 19 | start = 0 20 | count = 0 21 | test = 0 22 | mis = 0 23 | print_flag = 1000000 24 | res = collection_meta.find( 25 | {}, {"file_id": 1}, no_cursor_timeout=True).skip(start) 26 | for r in res: 27 | count += 1 28 | test += 1 29 | file_id = r.get('file_id') 30 | if not check_if_has_version(file_id, collection_version): 31 | mis += 1 32 | process_file(file_id) 33 | if(test >= print_flag): 34 | test = 0 35 | print "count-miss," + str(count) + "," + str(mis) 36 | print "count-miss," + str(count) + "," + str(mis) 37 | 38 | 39 | if __name__ == "__main__": 40 | main() 41 | -------------------------------------------------------------------------------- /src/PlugIns/PlugIn.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from Sample import * 5 | 6 | 7 | class PlugIn(): 8 | 9 | def __init__(self, sample=None): 10 | self.modules = {} 11 | self.requires = [] 12 | self.sample = sample 13 | 14 | def addModule(self, module): 15 | self.modules[module.getName()] = module 16 | 17 | def setModules(self, modules): 18 | self.modules = modules 19 | 20 | def _addRequiere(self, requiere): 21 | self.requires.append(requiere) 22 | 23 | def setSample(self, s): 24 | self.sample = s 25 | 26 | def _getLibrary(self, m): 27 | mod = self.modules.get(m) 28 | if(mod is None): 29 | return None 30 | mod.initialize(self.sample) 31 | lib = mod.getLibrary() 32 | return lib 33 | 34 | def getVersion(self): 35 | pass 36 | 37 | def getName(self): 38 | pass 39 | 40 | def process(self): 41 | pass 42 | 43 | def getPath(self): 44 | return self.getName() 45 | 46 | def _normalize(self, data): # TODO remove this from here 47 | try: 48 | res = repr(hex(data)) 49 | except TypeError: 50 | res = repr(data) 51 | return res 52 | -------------------------------------------------------------------------------- /src/VersionControl/VersionController.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | import os 5 | from db_pool import * 6 | from Utils.test import test 7 | # versioning controller of executed plugins. 8 | 9 | 10 | class VersionController: 11 | 12 | def __init__(self): 13 | self.collection = db_ver.version_container 14 | # print(self.collection) 15 | 16 | def __delete__(self): 17 | pass 18 | 19 | def updateVersion(self, file_id, ver_dic): 20 | if len(file_id) != 40: 21 | raise ValueError("VersionController: file_id not sha1") 22 | command = {"$set": ver_dic} 23 | self.collection.update_one({"file_id": file_id}, command, upsert=True) 24 | 25 | def searchVersion(self, file_id): 26 | f = self.collection.find_one({"file_id": file_id}) 27 | return f 28 | 29 | 30 | # ****************TEST_CODE****************** 31 | def testCode(): 32 | db = DBVersion() 33 | ver = {} 34 | for i in range(0, 10): 35 | ver[str(i)] = i + 10 36 | # db.updateVersion("0000",ver) 37 | lver = db.loadVersion("0000") 38 | n = lver["3"] 39 | print(type(n)) 40 | 41 | 42 | # ****************TEST_EXECUTE****************** 43 | test("-test_VersionController", testCode) 44 | -------------------------------------------------------------------------------- /src/default_config.py: -------------------------------------------------------------------------------- 1 | env = { 2 | "metadata": 3 | { 4 | "host": "codexbackend_db_1", 5 | "port": 27017 6 | }, 7 | "files": 8 | { 9 | "host": "codexbackend_db_1", 10 | "port": 27017 11 | }, 12 | "temp_files": 13 | { 14 | "host": "codexbackend_db_2", 15 | "port": 27017 16 | }, 17 | "versions": 18 | { 19 | "host": "codexbackend_db_1", 20 | "port": 27017 21 | }, 22 | "redis": 23 | { 24 | "host": "codexbackend_redis_1", 25 | "port": 6379 26 | }, 27 | "temporal_files_db": False, 28 | "db_temp_files_name": "DB_temp_files", 29 | "db_files_name": "DB_files", 30 | "db_versions_name": "DB_versions", 31 | "db_metadata_name": "DB_metadata", 32 | "db_metadata_collection": "meta_container", 33 | "vt_private_apikey": "", 34 | "vt_public_apikey": "", 35 | 'yara-script1': '/myapp/yara/YaraGenerator/yaraGenerator.py', 36 | 'yara-script2': '/myapp/yara/yarGen-master/yarGen.py', 37 | 'csv-upx-file': '', 38 | 'auto_get_av_result': False, 39 | 'debug': False, 40 | 'spend_credit_to_fix_inconsistency': False, 41 | "mailsender": 42 | { 43 | "fromaddr": "", 44 | "smtp_host": "smtp.gmail.com", 45 | "smtp_ssl_port": 465, 46 | "username": '', 47 | "password": '' 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/Api/export.py: -------------------------------------------------------------------------------- 1 | import pathmagic 2 | from bottle import route 3 | from bottle import request 4 | from bottle import static_file 5 | from MetaControl.MetaController import * 6 | from Utils.Functions import clean_hash 7 | from Utils.Functions import id_generator 8 | from bson.json_util import dumps 9 | from Utils.Functions import call_with_output 10 | import shutil 11 | 12 | 13 | @route('/api/v1/export', method='POST') 14 | def export_metadata(): 15 | mdc = MetaController() 16 | hashes = request.forms.dict.get("file_hash[]") 17 | dump_to_save = "" 18 | random_id = id_generator() 19 | tmp_path = "/tmp/meta_export" 20 | tmp_folder = os.path.join(tmp_path, random_id) 21 | call_with_output(["mkdir", "-p", tmp_folder]) 22 | for hash in hashes: 23 | hash = clean_hash(hash.replace('\r', '')) 24 | res = mdc.read(hash) 25 | dump = dumps(res, indent=4) 26 | file_name = os.path.join(tmp_folder, str(hash) + '.txt') 27 | fd = open(file_name, "w") 28 | fd.write(dump) 29 | fd.close() 30 | zip_path = os.path.join(tmp_path, random_id + '.zip') 31 | call_with_output(["zip", "-jr", zip_path, tmp_folder]) 32 | resp = static_file(str(random_id) + '.zip', root=tmp_path, download=True) 33 | resp.set_cookie('fileDownload', 'true') 34 | shutil.rmtree(tmp_folder) 35 | os.remove(zip_path) 36 | return resp 37 | -------------------------------------------------------------------------------- /src/health/check_file_ids.py: -------------------------------------------------------------------------------- 1 | # This checks that all the 2 | # file_id attributes in metadata documents 3 | # are valid sha1 hashes. If they are not 4 | # the value file_id and sha1 of the offender is printed. 5 | import pathmagic 6 | from db_pool import * 7 | 8 | 9 | def is_sha1(maybe_sha): 10 | if len(maybe_sha) != 40: 11 | return False 12 | try: 13 | sha_int = int(maybe_sha, 16) 14 | except ValueError: 15 | return False 16 | return True 17 | 18 | 19 | def compare(_id, sha1, file_id): 20 | if sha1 != file_id or not is_sha1(sha1): 21 | print "idsha1fileid," + str(_id) + "," + str(sha1) + "," + str(file_id) 22 | sys.stdout.flush() 23 | return True 24 | else: 25 | return False 26 | 27 | 28 | def main(): 29 | collection = db[envget('db_metadata_collection')] 30 | start = 0 31 | count = 0 32 | test = 0 33 | mis = 0 34 | print_flag = 1000000 35 | res = collection.find({}, {"hash": 1, "file_id": 1}, 36 | no_cursor_timeout=True).skip(start) 37 | for r in res: 38 | count += 1 39 | test += 1 40 | if compare(r.get('_id'), r.get('hash', {}).get('sha1', ''), r.get('file_id')): 41 | mis += 1 42 | if(test >= print_flag): 43 | test = 0 44 | print "count-miss," + str(count) + "," + str(mis) 45 | sys.stdout.flush() 46 | 47 | 48 | if __name__ == "__main__": 49 | main() 50 | -------------------------------------------------------------------------------- /src/Modules/PEFileModule.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from Modules.Module import Module 5 | import traceback 6 | import pefile 7 | import logging 8 | 9 | 10 | class PEFileModule(Module): 11 | 12 | def __init__(self): 13 | Module.__init__(self) 14 | 15 | def getName(self): 16 | return "pefileModule" 17 | 18 | def initialize(self, sample): 19 | if(self.already_initialized): 20 | return self.library 21 | self.already_initialized = True 22 | try: 23 | self.library = pefile.PE(data=sample.getBinary(), fast_load=True) 24 | # see if this initializations can be done on plugins. 25 | self.library.parse_data_directories(directories=[ 26 | pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT'], 27 | pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_EXPORT'], 28 | pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_TLS'], 29 | pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_SECURITY'], 30 | pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_RESOURCE']]) 31 | 32 | except pefile.PEFormatError: 33 | # print("parse fail") 34 | self.library = None 35 | # print(traceback.format_exc()) 36 | logging.error("Error parsing pefileModule with sample:%s", 37 | sample.getID(), exc_info=True) 38 | -------------------------------------------------------------------------------- /src/Utils/InfoExtractor.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | import hashlib 5 | import magic 6 | import ssdeep 7 | import logging 8 | from test import test 9 | 10 | 11 | def MIME_TYPE(data, mime=True): 12 | try: 13 | return magic.from_buffer(data, mime=mime) 14 | except magic.MagicException: 15 | return "none/none" 16 | 17 | 18 | def SHA1(data): 19 | return hashlib.sha1(data).hexdigest() 20 | 21 | 22 | def SHA256(data): 23 | return hashlib.sha256(data).hexdigest() 24 | 25 | 26 | def MD5(data): 27 | return hashlib.md5(data).hexdigest() 28 | 29 | 30 | def getSsdeep(data): 31 | try: 32 | res = ssdeep.hash(data) 33 | return res 34 | except Exception, e: 35 | logging.exception(str(e)) 36 | return '' 37 | 38 | # ****************TEST_CODE****************** 39 | 40 | 41 | def testCode(): 42 | file = "../Test_files/test.exe" 43 | data = (open(file, 'rb').read()) 44 | inf = InfoExtractor(data) 45 | print("Type: " + str(inf.type())) 46 | print("MIME_TYPE: " + str(inf.MIME_TYPE())) 47 | print("SHA1: " + str(inf.SHA1())) 48 | print("SHA256: " + str(inf.SHA256())) 49 | print("MD5: " + str(inf.MD5())) 50 | print("Size: " + str(inf.size())) 51 | print("Info: " + str(inf.MIME_TYPE(False))) 52 | 53 | # ****************TEST_EXECUTE****************** 54 | 55 | 56 | test("-test_InfoExtractor", testCode) 57 | -------------------------------------------------------------------------------- /src/tests/test_health.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import subprocess 3 | import re 4 | 5 | 6 | class TestHealth(): 7 | 8 | def call_with_output(self, array): 9 | p = subprocess.Popen(array, stdout=subprocess.PIPE, 10 | stderr=subprocess.PIPE) 11 | output, err = p.communicate() 12 | return output 13 | 14 | def test_embed(self): 15 | ''' 16 | Check if someone forgot an embed in the code 17 | ''' 18 | output = self.call_with_output( 19 | ["find", ".", "-type", "f", "-name", "*.py", 20 | "-exec", "grep", "-H", "embed(" + ")", "{}", ";"]) 21 | assert len( 22 | output.split() 23 | ) == 0, ( 24 | 'Should be zero. (Did you forget an embed?)\n' 25 | + "find output=\n" + str(output)) 26 | 27 | def test_pep8(self): 28 | output = self.call_with_output( 29 | ["find", ".", "-type", "f", "-name", "*.py", 30 | "-exec", "pycodestyle", "--max-line-length=400", 31 | "--ignore=E121,E123,E126,E226,E24,E704,W503,E741", 32 | "--exclude=XMLCreator.py,pescanner.py,/yara/", 33 | "-q", "{}", ";"]) 34 | output = output.split() 35 | result = [] 36 | for line in output: 37 | m = re.search('(/yara/|/pescanner.py)', line) 38 | if not m: # we don't want to check PEP8 on migrations. 39 | result.append(line) 40 | 41 | assert len( 42 | result 43 | ) == 0, 'Should be zero. (PEP8 test failed)\n' + str(result) 44 | -------------------------------------------------------------------------------- /src/Utils/TimeLogger.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | import time 5 | from test import test 6 | 7 | 8 | class TimeLogger(): 9 | 10 | def __init__(self): 11 | self.start_time = 0 12 | self.log_dic = {} 13 | 14 | def __delete__(self): 15 | pass 16 | 17 | def startCounter(self): 18 | self.start_time = time.time() 19 | 20 | def logTime(self, name): 21 | end = time.time() 22 | elapsed = (end - self.start_time) * 1000 23 | self.log_dic[name] = elapsed 24 | self.start_time = time.time() 25 | 26 | def __str__(self): 27 | ret = "" 28 | for l in self.log_dic: 29 | ret = ret + (str(l) + " -> " + str(self.log_dic[l])) + "\n" 30 | return ret 31 | 32 | # ****************TEST_CODE****************** 33 | 34 | 35 | def testCode(): 36 | tl2 = TimeLogger() 37 | tl = TimeLogger() 38 | tl.startCounter() 39 | a = 0 40 | for i in range(0, 1000): 41 | a += 1 42 | tl2.startCounter() 43 | tl.logTime("1000 loop") 44 | tl2.logTime("logTime") 45 | a = 0 46 | for i in range(0, 10000): 47 | a += 1 48 | tl.logTime("10000 loop") 49 | 50 | a = 0 51 | for i in range(0, 100000): 52 | a += 1 53 | tl.logTime("100000 loop") 54 | 55 | print(str(tl)) 56 | print("") 57 | print(str(tl2)) 58 | 59 | # ****************TEST_EXECUTE****************** 60 | 61 | 62 | test("-test_TimeLoger", testCode) 63 | -------------------------------------------------------------------------------- /src/ProcessControl.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from multiprocessing import Process 5 | from multiprocessing import Semaphore 6 | 7 | 8 | def processCall(semaphore, obj, function_to_execute, data): 9 | getattr(obj, function_to_execute)(data) 10 | semaphore.release() 11 | 12 | 13 | class ProcessControl(): 14 | 15 | def __init__(self, forks_number): 16 | self.forks_number = forks_number 17 | self.semaphore = Semaphore(self.forks_number) 18 | 19 | def execute(self, obj, function_to_execute, data): 20 | self.semaphore.acquire() 21 | # print("Launching new process") 22 | p = Process(target=processCall, args=( 23 | self.semaphore, obj, function_to_execute, data)) 24 | p.start() 25 | 26 | def wait(self): 27 | for i in range(self.forks_number): 28 | self.semaphore.acquire() 29 | 30 | # test############################################## 31 | 32 | 33 | def test(): 34 | import random 35 | import time 36 | 37 | class MyClass(): 38 | 39 | def function_to_run(self, data): 40 | time.sleep(random.randint(0, 3)) 41 | print(data) 42 | 43 | simultaneus_workers = 5 44 | pc = ProcessControl(simultaneus_workers) 45 | 46 | obj = MyClass() 47 | function = "function_to_run" 48 | 49 | for data in range(0, 10): 50 | pc.execute(obj, function, data) 51 | 52 | pc.wait() 53 | 54 | 55 | if __name__ == "__main__": 56 | test() 57 | -------------------------------------------------------------------------------- /src/Query/apifuzz.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | import pathmagic 5 | from pymongo import MongoClient 6 | import ssdeep 7 | from env import envget 8 | 9 | 10 | def searchFuzzy(fuzz, limit, thresh): 11 | client = MongoClient(envget('metadata.host'), envget('metadata.port')) 12 | db = client[envget('db_metadata_name')] 13 | coll_meta = db["db_metadata_collection"] 14 | 15 | f1 = coll_meta.find({}, {"file_id": 1, "fuzzy_hash": 1}).limit(limit) 16 | l = [] 17 | for f in f1: 18 | l.append(f) 19 | 20 | ret = {} 21 | for a in l: 22 | res = -1 23 | try: 24 | res = ssdeep.compare(a["fuzzy_hash"], fuzz) 25 | except InternalError: 26 | print(str(res) + "------" + 27 | str(a["fuzzy_hash"]) + "-----" + str(a["file_id"])) 28 | continue 29 | if(res >= thresh): 30 | ret[a["file_id"]] = res 31 | 32 | return ret 33 | 34 | 35 | def searchFull(search, limit): 36 | # print("1") 37 | client = MongoClient(envget('metadata.host'), envget('metadata.port')) 38 | # print("2") 39 | db = client[envget('db_metadata_name')] 40 | # print("3") 41 | coll_meta = db["db_metadata_collection"] 42 | # print("4") 43 | f1 = coll_meta.find(search).limit(limit) 44 | # print("5") 45 | l = [] 46 | for f in f1: 47 | l.append(f) 48 | 49 | # print("6") 50 | ret = [] 51 | for a in l: 52 | ret.append(str(a["file_id"])) 53 | # print("7") 54 | 55 | return ret 56 | -------------------------------------------------------------------------------- /src/health/2016-12-22-2013-check_file_ids2.py: -------------------------------------------------------------------------------- 1 | # This checks that all the 2 | # file_id attributes in version_container documents 3 | # are valid sha1 hashes. If they are not 4 | # the value of _id, and file_id of the offender is printed. 5 | # and the document gets deleted. 6 | import pathmagic 7 | from db_pool import * 8 | from bson.objectid import ObjectId 9 | 10 | 11 | def is_sha1(maybe_sha): 12 | if len(maybe_sha) != 40: 13 | return False 14 | try: 15 | sha_int = int(maybe_sha, 16) 16 | except ValueError: 17 | return False 18 | return True 19 | 20 | 21 | def compare(_id, sha1, file_id): 22 | if sha1 != file_id or not is_sha1(sha1): 23 | print "idsha1fileid," + str(_id) + "," + str(sha1) + "," + str(file_id) 24 | return True 25 | else: 26 | return False 27 | 28 | 29 | def main(): 30 | collection = db_ver["version_container"] 31 | start = 0 32 | count = 0 33 | test = 0 34 | mis = 0 35 | print_flag = 1000000 36 | res = collection.find({}, {"_id": 1, "file_id": 1}, 37 | no_cursor_timeout=True).skip(start) 38 | for r in res: 39 | count += 1 40 | test += 1 41 | doc_id = r.get("_id") 42 | file_id = r.get('file_id') 43 | if not is_sha1(file_id): 44 | mis += 1 45 | print str(doc_id) + "," + str(file_id) 46 | collection.remove({"_id": ObjectId(str(doc_id))}) 47 | if(test >= print_flag): 48 | test = 0 49 | print "count-miss," + str(count) + "," + str(mis) 50 | print "count-miss," + str(count) + "," + str(mis) 51 | 52 | 53 | if __name__ == "__main__": 54 | main() 55 | -------------------------------------------------------------------------------- /src/Processors/HashProcessor.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from Processors.Processor import * 5 | import traceback 6 | import time 7 | import string 8 | # PlugIns 9 | from PlugIns.PE import * 10 | # Modulos 11 | from Modules.PEFileModule import * 12 | from Modules.MetaDataModule import * 13 | from Sample import * 14 | 15 | 16 | class HashProcessor(Processor): 17 | 18 | def __init__(self, sample): 19 | Processor.__init__(self, sample) 20 | 21 | def __delete__(self): 22 | Processor.__delete__(self) 23 | 24 | # Processing method 25 | def process(self): 26 | Processor.process(self) 27 | self._addModule(PEFileModule()) 28 | self._addModule(MetaDataModule()) 29 | self._addPlugIn(SectionsPlug.SectionsPlug()) 30 | self._executeAllPlugIns() 31 | return self.metadata_to_store 32 | 33 | # ****************TEST_CODE****************** 34 | 35 | 36 | def testCode(): 37 | file = "Test_files/error_de_codificacion_en_nombre_de_libreria" 38 | data = open(file, "rb").read() 39 | 40 | start_time = time.time() 41 | dic = {} 42 | sample = Sample() 43 | sample.setBinary(data) 44 | pe = PEProcessor(sample, dic) 45 | res = pe.process() 46 | print(res) 47 | # print(res["particular_header"]["sections"]) 48 | elapsed = time.time() - start_time 49 | print("Time Elapsed: " + str(elapsed * 1000) + " ms") 50 | print("") 51 | 52 | 53 | # ****************TEST_EXECUTE****************** 54 | # from Utils.test import test 55 | 56 | 57 | test("-test_PEProcessor", testCode) 58 | -------------------------------------------------------------------------------- /src/MetaDataPKG/Metadata.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | 5 | 6 | class Metadata(): 7 | 8 | def __init__(self): 9 | self.data = {} 10 | self.empty = True 11 | 12 | def isEmpty(self): 13 | return self.empty 14 | 15 | def setData(self, data): 16 | self.empty = False 17 | self.data = data 18 | 19 | def getData(self): 20 | return self.data 21 | 22 | def getValue(self, source): 23 | path = source.split('.') 24 | root = self.data 25 | for p in path: 26 | try: 27 | root = root.get(p) 28 | if(root is None): 29 | return None 30 | except KeyboardInterrupt: 31 | raise KeyboardInterrupt 32 | except Exception, e: 33 | print str(e) 34 | return None 35 | return root 36 | 37 | def setValue(self, source, value): 38 | self.empty = False 39 | path = source.split('.') 40 | root = self.data 41 | for p in path[:-1]: 42 | n = root.get(p) 43 | if (n is not None): 44 | root = n 45 | else: 46 | aux = {} 47 | root[p] = aux 48 | root = aux 49 | root[path[-1]] = value 50 | 51 | 52 | # m=Metadata() 53 | # m.setData({"pepe":"nosenose","manaos":"horrriibleeee","aver":{"otra":"ahoraSi"}}) 54 | # m.setValue("pepe","nosenose") 55 | # m.setValue("aver.otra","ahoraSi") 56 | # m.setValue("aver.otra2","tambien") 57 | 58 | # print(m.getValue("pepe")) 59 | # print(m.getValue("pepe2")) 60 | # print(m.getValue("manaos")) 61 | # print(m.getValue("aver")) 62 | # print(m.getValue("aver.otra")) 63 | # print(m.getValue("aver.otra2")) 64 | -------------------------------------------------------------------------------- /src/TreeMenu/GenericTree.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | 5 | tree_element = {"name": "Generic", "children": [ 6 | {"name": "Hash", "id": 1, "type": "string", "example": "MD5, SHA1, SHA2", "children": [], "searchable":True, "projectable":True, "call_func": "validate_hash", "lower": True}, 7 | {"name": "Description", "id": 146, "type": "string", "children": [], "searchable":False, "projectable":True, "call_func": "", "lower": False}, 8 | {"name": "Size", "id": 4, "type": "number", "min": 0, "example": 103140, "children": [], "searchable":True, "projectable":True, "call_func": ""}, 9 | {"name": "Mime type", "id": 5, "type": "string", "example": "application/x-dosexec", "children": [], "searchable":True, "projectable":True, "call_func": "", "lower": True}, 10 | {"name": "AntiVirus", "children": [ 11 | {"name": "AV signature", "id": 10000, "type": "string", "example": "trojan", "children": [], "searchable":True, "projectable":False}, 12 | ]}, 13 | {"name": "Date", "id": 147, "type": "string", "children": [], "example": "e.g.: \"[2015-05-10;2015-05-15]\", \">=2015-05-15\", \"2015-05-10\" ", "searchable": True, "projectable": True, "call_func":"", "lower": False} 14 | ]} 15 | 16 | id_element = { 17 | 1: {"path": "hash.md5", "type": "string", "do": "clean_hash"}, 18 | 2: {"path": "hash.sha1", "type": "string", "do": "clean_hash"}, 19 | 3: {"path": "hash.sha2", "type": "string", "do": "clean_hash"}, 20 | 146: {"path": "description", "type": "string"}, 21 | 4: {"path": "size", "type": "int"}, 22 | 5: {"path": "mime_type", "type": "string"}, 23 | 10: {"path": "fuzzy_hash", "type": "string"}, 24 | 11: {}, 25 | 10000: {"path": "scans.result", "type": "string"}, # for searcher greater than 10000 26 | 147: {"path": "date", "type": "date_range"} 27 | } 28 | -------------------------------------------------------------------------------- /src/health/check_dates.py: -------------------------------------------------------------------------------- 1 | # This checks that all the 2 | # date attributes in metadata documents 3 | # are valid datetime.datetime objects. If they are not 4 | # the value file_id and date of the offender is printed. 5 | import pathmagic 6 | from db_pool import * 7 | import datetime 8 | from bson.objectid import ObjectId 9 | from Utils.ProcessDate import process_date 10 | 11 | 12 | def fix_date(r): 13 | str_date = r.get('date') 14 | if type(str_date) is not unicode: 15 | print("datenotunicode," + str(r.get('_id')) + "," + 16 | str(r.get('file_id')) + "," + 17 | str(r.get('date')) + "," + 18 | str(type(r.get('date')))) 19 | sys.stdout.flush() 20 | return False 21 | else: 22 | try: 23 | date = process_date(str_date) 24 | except Exception, e: 25 | print "failed to convert date for " + str(str_date) + " in " + str(r.get('_id')) 26 | sys.stdout.flush() 27 | return False 28 | 29 | collection = db[envget('db_metadata_collection')] 30 | collection.update_one({"_id": ObjectId(r.get('_id'))}, { 31 | "$set": {"date": date}}) 32 | return True 33 | 34 | 35 | def main(): 36 | collection = db[envget('db_metadata_collection')] 37 | start = 0 38 | count = 0 39 | test = 0 40 | mis = 0 41 | fixed = 0 42 | print_flag = 1000000 43 | res = collection.find({}, {"date": 1, "file_id": 1}, 44 | no_cursor_timeout=True).skip(start) 45 | for r in res: 46 | count += 1 47 | test += 1 48 | if type(r.get('date')) is not datetime.datetime and r.get('date') is not None: 49 | if(fix_date(r)): 50 | fixed += 1 51 | else: 52 | mis += 1 53 | if(test >= print_flag): 54 | test = 0 55 | print "count-fix-miss," + str(count) + "," + str(fixed) + "," + str(mis) 56 | sys.stdout.flush() 57 | 58 | 59 | if __name__ == "__main__": 60 | main() 61 | -------------------------------------------------------------------------------- /src/Api/queue_tasks.py: -------------------------------------------------------------------------------- 1 | import pathmagic 2 | from bottle import route 3 | from bottle import request 4 | from bottle import response 5 | from bson.json_util import dumps 6 | from env import envget 7 | from rq import Queue 8 | from redis import Redis 9 | import re 10 | import datetime 11 | from Utils.ProcessDate import process_date 12 | from Utils.task import count_valid_hashes_in_task 13 | 14 | 15 | # Returns a json like 16 | # {'queue_tasks': [ {'queue_name': 'task_no_vt', 17 | # 'tasks': [ 18 | # {'task_id': 'asdf', 19 | # 'date_enqueued': '2017-03.'}, 20 | # etc 21 | @route('/api/v1/queue_tasks', method='GET') 22 | def tasks_on_queue(): 23 | tasks_on_queue = [] 24 | for queue_name in ['task_private_vt', 'task_public_vt', 'task_no_vt']: 25 | tasks_on_queue.append({'queue_name': queue_name, 26 | 'tasks': get_tasks_on_queue(queue_name)}) 27 | 28 | return dumps({"queue_tasks": tasks_on_queue, "current_date": str(datetime.datetime.now())}) 29 | 30 | 31 | def get_tasks_on_queue(queue_name): 32 | q = Queue(queue_name, connection=Redis(host=envget('redis.host'))) 33 | jobs = q.jobs 34 | tasks = [] 35 | for job in jobs: 36 | task = {"date_enqueued": str( 37 | process_date(job.to_dict().get('enqueued_at')))} 38 | ''' 39 | to_dict() returns something like this: 40 | {u'origin': u'task_no_vt', u'status': u'queued', u'description': u"Api.task.generic_task('N7UFZ56FQDITJ34F40TZB50XAWVNW575QGIL4YEC')", u'created_at': '2017-03-03T20:14:47Z', u'enqueued_at': '2017-03-03T20:14:47Z', u'timeout': 31536000, u'data': '\x80\x02(X\x15\x00\x00\x00Api.task.generic_taskq\x01NU(N7UFZ56FQDITJ34F40TZB50XAWVNW575QGIL4YECq\x02\x85q\x03}q\x04tq\x05.'} 41 | ''' 42 | task_id = re.search('[A-Z0-9]{40}', job.to_dict().get('description')) 43 | if task_id is None: 44 | continue 45 | task['task_id'] = task_id.group(0) 46 | task['hashes'] = count_valid_hashes_in_task(task['task_id']) 47 | tasks.append(task) 48 | return tasks 49 | -------------------------------------------------------------------------------- /src/health/check_dates_av.py: -------------------------------------------------------------------------------- 1 | # This checks that all the 2 | # date attributes in av_analysis collection 3 | # are valid datetime.datetime objects. If they are not 4 | # the value sha1 and date of the offender is printed. 5 | # It tries to fix the issue by using first_seen date. 6 | import pathmagic 7 | from db_pool import * 8 | import datetime 9 | from bson.objectid import ObjectId 10 | from Utils.ProcessDate import process_date 11 | 12 | 13 | def fix_date(r): 14 | str_date = r.get('date', r.get('first_seen')) 15 | if type(str_date) is not unicode: 16 | print("datenotunicode," + str(r.get('_id')) + "," + 17 | str(r.get('sha1')) + "," + str(r.get('date')) + 18 | "," + str(type(r.get('date')))) 19 | sys.stdout.flush() 20 | return False 21 | else: 22 | try: 23 | date = process_date(str_date) 24 | except Exception, e: 25 | print("failed to convert date for " + 26 | str(str_date) + " in " + str(r.get('_id'))) 27 | sys.stdout.flush() 28 | return False 29 | 30 | collection = db["av_analysis"] 31 | collection.update_one({"_id": ObjectId(r.get('_id'))}, { 32 | "$set": {"date": date}}) 33 | return True 34 | 35 | 36 | def main(): 37 | collection = db["av_analysis"] 38 | start = 0 39 | count = 0 40 | test = 0 41 | mis = 0 42 | fixed = 0 43 | print_flag = 1000000 44 | res = collection.find( 45 | {}, {"date": 1, "sha1": 1, "first_seen": 1}, no_cursor_timeout=True).skip(start) 46 | for r in res: 47 | count += 1 48 | test += 1 49 | if (type(r.get('date')) is not datetime.datetime and r.get('date') is not None) or r.get('date') is None and r.get('first_seen') is not None: 50 | if(fix_date(r)): 51 | fixed += 1 52 | else: 53 | mis += 1 54 | if(test >= print_flag): 55 | test = 0 56 | print("count-fix-miss," + str(count) + 57 | "," + str(fixed) + "," + str(mis)) 58 | sys.stdout.flush() 59 | 60 | 61 | if __name__ == "__main__": 62 | main() 63 | -------------------------------------------------------------------------------- /src/PlugIns/PE/ImportsPlug.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | import pathmagic 5 | from Sample import Sample 6 | from PlugIns.PlugIn import PlugIn 7 | from Modules.PEFileModule import PEFileModule 8 | import pefile 9 | 10 | 11 | class ImportsPlug(PlugIn): 12 | 13 | def __init__(self, sample=None): 14 | PlugIn.__init__(self, sample) 15 | 16 | def getPath(self): 17 | return "particular_header.imports" 18 | 19 | def getName(self): 20 | return "imports" 21 | 22 | def getVersion(self): 23 | return 4 24 | 25 | def process(self): 26 | pelib = self._getLibrary(PEFileModule().getName()) 27 | if(pelib is None): 28 | return "" 29 | 30 | try: 31 | if (pelib.OPTIONAL_HEADER.DATA_DIRECTORY[pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT']].VirtualAddress == 0): 32 | return "" 33 | except Exception, e: 34 | print str(e) 35 | return "" 36 | 37 | d = [] 38 | dir_ent_imp = None 39 | try: 40 | dir_ent_imp = pelib.DIRECTORY_ENTRY_IMPORT 41 | except Exception, e: 42 | print str(e) 43 | return "" 44 | for entry in dir_ent_imp: 45 | 46 | dll_name = repr(entry.dll).lower() 47 | l = [] 48 | for imp in entry.imports: 49 | l.append(repr(imp.name).lower()) 50 | # aux={} 51 | # aux["name"]=imp.name 52 | # aux["ordinal"]=imp.ordinal 53 | # l.append(aux) 54 | 55 | dic_ent = {"lib": dll_name, "functions": l} 56 | d.append(dic_ent) 57 | 58 | return d 59 | 60 | 61 | if __name__ == "__main__": 62 | data = open(source_path + "/Test_files/test.exe", "rb").read() 63 | sample = Sample() 64 | sample.setBinary(data) 65 | modules = {} 66 | pfm = PEFileModule() 67 | modules[pfm.getName()] = pfm 68 | plug = ImportsPlug() 69 | plug.setModules(modules) 70 | plug.setSample(sample) 71 | res = plug.process() 72 | print(res) 73 | -------------------------------------------------------------------------------- /src/PlugIns/PE/SectionsPlug.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from PlugIns.PlugIn import PlugIn 5 | from Modules.PEFileModule import PEFileModule 6 | import pefile 7 | from Utils.InfoExtractor import * 8 | import logging 9 | import entropy 10 | 11 | 12 | class SectionsPlug(PlugIn): 13 | 14 | def __init__(self, sample=None): 15 | PlugIn.__init__(self, sample) 16 | 17 | def getPath(self): 18 | return "particular_header.sections" 19 | 20 | def getName(self): 21 | return "sections" 22 | 23 | def getVersion(self): 24 | return 15 25 | 26 | def process(self): 27 | # print("SECTIONS") 28 | # logging.debug("loading pefile") 29 | pelib = self._getLibrary(PEFileModule().getName()) 30 | if(pelib is None): 31 | return "" 32 | 33 | # logging.debug("iterating sections") 34 | ret = [] 35 | number = 0 36 | 37 | for section in pelib.sections: 38 | # print(section) 39 | dic_sec = {} 40 | dic_sec["name"] = repr(section.Name) 41 | 42 | dic_sec["size_raw_data"] = int(hex(section.SizeOfRawData), 16) 43 | dic_sec["virtual_size"] = int(hex(section.Misc_VirtualSize), 16) 44 | dic_sec["characteristics"] = hex(section.Characteristics) 45 | 46 | if (section.__dict__.get('IMAGE_SCN_MEM_WRITE', False) and 47 | section.__dict__.get('IMAGE_SCN_MEM_EXECUTE', False)): 48 | dic_sec["write_executable"] = "True" 49 | else: 50 | dic_sec["write_executable"] = "False" 51 | 52 | data = section.get_data() 53 | # logging.debug("calculating hashes") 54 | dic_sec["sha1"] = SHA1(data) 55 | dic_sec["sha2"] = SHA256(data) 56 | dic_sec["md5"] = MD5(data) 57 | # logging.debug("calculating fuzzy") 58 | dic_sec["fuzzy_hash"] = getSsdeep(data) 59 | dic_sec["entropy"] = entropy.shannon_entropy(data) * 8 60 | # logging.debug("finished calculating") 61 | 62 | ret.append(dic_sec) 63 | 64 | return ret 65 | -------------------------------------------------------------------------------- /src/Utils/task.py: -------------------------------------------------------------------------------- 1 | import pathmagic 2 | from MetaControl.MetaController import * 3 | from Utils.Functions import change_date_to_str 4 | from Utils.Functions import add_error 5 | from Utils.Functions import id_generator 6 | from Utils.Functions import check_hashes 7 | from rq import Queue 8 | from redis import Redis 9 | import datetime 10 | 11 | 12 | def get_task(task_id): 13 | task_report = load_task(task_id) 14 | if task_report is not None: 15 | return change_date_to_str(task_report) 16 | else: 17 | return add_error({}, 8, "Task not found") 18 | 19 | 20 | def add_task(requested): 21 | task_id = id_generator(40) 22 | if requested.get('document_name') is None: 23 | requested["document_name"] = "" 24 | 25 | response = {"requested": requested, 26 | "date_enqueued": datetime.datetime.now(), 27 | "task_id": task_id} 28 | save(response) 29 | if requested.get('vt_samples'): 30 | queue_name = "task_private_vt" # task needs a private VT api 31 | elif requested.get('vt_av') and not requested.get('vt_samples'): 32 | queue_name = "task_public_vt" # task needs a public VT api 33 | else: 34 | queue_name = "task_no_vt" # task doesn't need VT 35 | q = Queue(queue_name, connection=Redis(host=envget('redis.host'))) 36 | job = q.enqueue('Api.task.generic_task', args=(task_id,), timeout=31536000) 37 | return task_id 38 | 39 | 40 | def add_task_to_download_av_result(file_hash): 41 | requested = {'process': True, 42 | 'file_hash': file_hash, 43 | 'vt_av': vt_av, 44 | 'vt_samples': False, 45 | 'email': '', 46 | 'document_name': '[automatic-request-from-api]', 47 | 'ip': '127.0.0.1'} 48 | return add_task(requested) 49 | 50 | 51 | def save(document): 52 | mc = MetaController() 53 | task_id = document["task_id"] 54 | return mc.write_task(task_id, document) 55 | 56 | 57 | def load_task(task_id): 58 | mc = MetaController() 59 | return mc.read_task(task_id) 60 | 61 | 62 | def count_valid_hashes_in_task(task_id): 63 | task = get_task(task_id) 64 | file_hash = task.get('requested', {}).get('file_hash') 65 | if file_hash is None: 66 | return 0 67 | output = check_hashes(file_hash) 68 | if output.get('hashes') is None: 69 | return 0 70 | return len(output.get('hashes')) 71 | -------------------------------------------------------------------------------- /src/loadToMongo.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | # Tool to load malware into the mongo database. 5 | import os 6 | import sys 7 | import getopt 8 | from PackageControl.PackageController import * 9 | from Utils.Functions import recursive_read 10 | import hashlib 11 | import traceback 12 | from Sample import * 13 | import datetime 14 | from Ram import * 15 | from Launcher import * 16 | 17 | 18 | def thetime(start, finish, count): 19 | return str(count) + " loaded to mongo ( in " + str(int((finish - start).total_seconds())) + " seconds. (" + str(round(((finish - start).total_seconds() / 3600), 2)) + " hours))" 20 | 21 | 22 | def load_to_mongo2(folder_path): 23 | pc = PackageController() 24 | ram = Ram() 25 | files = recursive_read(folder_path) 26 | count = 0 27 | reset = 0 28 | already_loaded = 0 29 | time_start = datetime.datetime.now() 30 | uploaded = 0 31 | in_mem = 0 32 | loaded_ram_counter = 0 33 | lc = Launcher() 34 | if(files is None): 35 | return "No files where found." 36 | while (uploaded < len(files)): 37 | loaded_ram_counter = 0 38 | data_vector = [] 39 | print "loading files to memory" 40 | while (in_mem < len(files)): 41 | f = files[in_mem] 42 | file_cursor = open(f, "r") 43 | data_vector.append(file_cursor.read()) 44 | in_mem = in_mem + 1 45 | loaded_ram_counter = loaded_ram_counter + 1 46 | if(loaded_ram_counter > 100): 47 | if(ram.free_percent() < 0.3): 48 | print "Ram full" 49 | break 50 | for data in data_vector: 51 | file_id = hashlib.sha1(data).hexdigest() 52 | print "loading to db: " + str(file_id) 53 | res = pc.searchFile(file_id) 54 | if(res is None): 55 | pc.append(file_id, data) 56 | sample = Sample() 57 | sample.setID(file_id) 58 | sample.setBinary(data) 59 | sample.setStorageVersion({}) 60 | count += 1 61 | lc.launchAnalysisByID(sample) 62 | else: 63 | already_loaded += 1 64 | uploaded = uploaded + 1 65 | 66 | result = str(already_loaded) + " were already loaded to mongo.\n" 67 | result += thetime(time_start, datetime.datetime.now(), count) 68 | print result 69 | return result 70 | -------------------------------------------------------------------------------- /src/health/2017-01-17-1623-fix_dates_in_av.py: -------------------------------------------------------------------------------- 1 | # AV analysis downloaded with vt public key have no first_seen 2 | # We should use the last scan date to get an aproximation 3 | import pathmagic 4 | from db_pool import * 5 | from MetaControl.MetaController import * 6 | from pymongo.errors import WriteError 7 | 8 | 9 | # Walk through a dictionary structure 10 | def read_from_dictionary(source, dic): 11 | path = source.split('.') 12 | root = dic 13 | for p in path: 14 | try: 15 | root = root.get(p) 16 | if(root is None): 17 | return None 18 | except Exception, e: 19 | print str(e) 20 | return None 21 | return root 22 | 23 | 24 | def main(): 25 | mdc = MetaController() 26 | 27 | collection = db["av_analysis"] 28 | all_analysis = collection.find({"date": None}) 29 | count = 0 30 | reset = 0 31 | for analysis in all_analysis: 32 | count += 1 33 | reset += 1 34 | if reset == 1000: 35 | reset = 0 36 | print("Count: %s" % count) 37 | file_id = analysis.get('sha1') 38 | date_stored = analysis.get('date') 39 | if(date_stored is not None): 40 | # mdc.save_first_seen(file_id,date_stored) #Uncoment to copy all av 41 | # dates to meta dates 42 | continue 43 | 44 | # Trying to get the best date 45 | date_registers = ['first_seen', 46 | 'additional_info.first_seen_itw', 'scan_date'] 47 | for register in date_registers: 48 | vt_date = read_from_dictionary(register, analysis) 49 | if vt_date is not None: 50 | break 51 | 52 | try: 53 | # The "date" value is use to speed up time queries for av 54 | # signatures 55 | new_date = process_date(vt_date) 56 | except ValueError: 57 | new_date = None 58 | print "fix_dates_in_av: invalid date in AV_metda:" + str(vt_date) 59 | 60 | command = {"$set": {"date": new_date}} 61 | try: 62 | collection.update_one({"sha1": file_id}, command, upsert=False) 63 | except WriteError: 64 | print("**** Error File: %s ****" % (file_id,)) 65 | print(command) 66 | err = str(traceback.format_exc()) 67 | print(err) 68 | continue 69 | mdc.save_first_seen(file_id, new_date) 70 | print("%s fixed -> new date: %s" % (file_id, new_date)) 71 | 72 | 73 | if __name__ == "__main__": 74 | main() 75 | -------------------------------------------------------------------------------- /src/PlugIns/PE/CheckPackerPlug.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from PlugIns.PlugIn import PlugIn 5 | 6 | 7 | class CheckPackerPlug(PlugIn): 8 | 9 | def __init__(self, sample=None): 10 | PlugIn.__init__(self, sample) 11 | 12 | def getPath(self): 13 | return "particular_header.packer_detection" 14 | 15 | def getName(self): 16 | return "packer_detection" 17 | 18 | def getVersion(self): 19 | return 2 20 | 21 | def process(self): 22 | entropy = self.sample.getLastValue("file_entropy") 23 | sections = self.sample.getLastValue("sections") 24 | imports = self.sample.getLastValue("imports") 25 | if(entropy is None or sections is None or imports is None): 26 | return "Unknown" 27 | flags = 0 28 | 29 | if (entropy >= 6.7): 30 | flags += 1 31 | 32 | real_sum = 0 33 | virtual_sum = 0 34 | we_sum = 0 35 | we_real_sum = 0 36 | we_virtual_sum = 0 37 | for s in sections: 38 | raw = s["size_raw_data"] 39 | vir = s["virtual_size"] 40 | we = s["write_executable"] 41 | real_sum += raw 42 | virtual_sum += vir 43 | if(we == "True"): 44 | we_sum += 1 45 | we_real_sum = raw 46 | we_virtual_sum += vir 47 | if(we_sum >= 1): 48 | flags += 1 49 | try: 50 | if((1.0 * we_virtual_sum / we_real_sum) >= 1): 51 | flags += 1 52 | except Exception, e: 53 | print str(e) 54 | flags += 1 55 | try: 56 | if((1.0 * virual_sum / real_sum) >= 1): 57 | flags += 1 58 | except Exception, e: 59 | flags += 1 60 | 61 | cant_libs = len(imports) 62 | total_imports = 0 63 | for i in imports: 64 | try: 65 | total_imports += len(i["functions"]) 66 | except Exception, e: 67 | print str(e) 68 | break 69 | 70 | if(cant_libs != 0): 71 | promedio = total_imports / cant_libs 72 | if(promedio <= 6 and promedio >= 1): 73 | flags += 1 74 | if(total_imports <= 35 and total_imports >= 1): 75 | flags += 1 76 | 77 | if(flags >= 4): 78 | return "True" 79 | return "False" 80 | -------------------------------------------------------------------------------- /src/Processors/PEProcessor.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from Processors.Processor import * 5 | import traceback 6 | import time 7 | import string 8 | # PlugIns 9 | from PlugIns.PE import * 10 | # Modulos 11 | from Modules.PEFileModule import * 12 | from Modules.MetaDataModule import * 13 | from Sample import * 14 | 15 | 16 | class PEProcessor(Processor): 17 | 18 | def __init__(self, sample): 19 | Processor.__init__(self, sample) 20 | 21 | def __delete__(self): 22 | Processor.__delete__(self) 23 | 24 | # metodo de procesamiento 25 | def process(self): 26 | Processor.process(self) 27 | 28 | self._addModule(PEFileModule()) 29 | self._addModule(MetaDataModule()) 30 | 31 | self._addPlugIn(ImportsPlug.ImportsPlug()) 32 | self._addPlugIn(EntropyPlug.EntropyPlug()) 33 | self._addPlugIn(SectionsPlug.SectionsPlug()) 34 | self._addPlugIn(ResourceEntriesPlug.ResourceEntriesPlug()) 35 | self._addPlugIn(CheckEPSectionPlug.CheckEPSectionPlug()) 36 | # self._addPlugIn(CRCCheckPlug.CRCCheckPlug()) 37 | self._addPlugIn(TimeStampPlug.TimeStampPlug()) 38 | self._addPlugIn(HeadersPlug.HeadersPlug()) 39 | self._addPlugIn(VersionInfoPlug.VersionInfoPlug()) 40 | self._addPlugIn(ChildOfPlug.ChildOfPlug()) 41 | self._addPlugIn(CypherPlug.CypherPlug()) 42 | self._addPlugIn(PackerVersionPlug.PackerVersionPlug()) 43 | self._addPlugIn(AVAnalysisPlug.AVAnalysisPlug()) 44 | self._addPlugIn(CheckPackerPlug.CheckPackerPlug()) 45 | # self._addPlugIn(TestPlug.TestPlug()) 46 | self._addPlugIn(AddImportsToTreePlug.AddImportsToTreePlug()) 47 | self._addPlugIn(ExportsPlug.ExportsPlug()) 48 | 49 | self._addPlugIn(CertficatePlug.CertficatePlug()) 50 | 51 | self._addPlugIn(StringPlug.StringPlug()) 52 | 53 | self._executeAllPlugIns() 54 | 55 | return self.metadata_to_store 56 | 57 | # ****************TEST_CODE****************** 58 | 59 | 60 | def testCode(): 61 | file = "Test_files/error_de_codificacion_en_nombre_de_libreria" 62 | data = open(file, "rb").read() 63 | 64 | start_time = time.time() 65 | dic = {} 66 | sample = Sample() 67 | sample.setBinary(data) 68 | pe = PEProcessor(sample, dic) 69 | res = pe.process() 70 | # print(res) 71 | # print(res["particular_header"]["sections"]) 72 | elapsed = time.time() - start_time 73 | # print("Time Elapsed: "+str(elapsed*1000)+" ms") 74 | # print("") 75 | 76 | 77 | # ****************TEST_EXECUTE****************** 78 | # from Utils.test import test 79 | 80 | 81 | test("-test_PEProcessor", testCode) 82 | -------------------------------------------------------------------------------- /src/Utils/ProcessDate.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | import datetime 5 | 6 | 7 | # Given a date in str (epoch or ISO) 8 | # will return a datetime object. 9 | def process_date(str_date): 10 | if str_date is None: 11 | return None 12 | str_date = str_date.strip() 13 | if str_date == "": 14 | return None 15 | if str_date.isdigit(): 16 | return datetime.datetime.fromtimestamp(int(str_date)) 17 | elif len(str_date) == 20 and str_date[10] == "T": 18 | return datetime.datetime.strptime(str_date, "%Y-%m-%dT%H:%M:%SZ") 19 | elif len(str_date) == 19 and str_date[10] == " ": 20 | return datetime.datetime.strptime(str_date, "%Y-%m-%d %H:%M:%S") 21 | elif len(str_date) == 19 and str_date[10] == "T": 22 | return datetime.datetime.strptime(str_date, "%Y-%m-%dT%H:%M:%S") 23 | elif len(str_date) == 16 and str_date[10] == "T": 24 | return datetime.datetime.strptime(str_date, "%Y-%m-%dT%H:%M") 25 | elif len(str_date) == 16 and str_date[10] == " ": 26 | return datetime.datetime.strptime(str_date, "%Y-%m-%d %H:%M") 27 | else: 28 | return datetime.datetime.strptime(str_date, "%Y-%m-%d") 29 | 30 | # Given a string like >=YYYY-MM-DD 31 | # returns an object like 32 | # {"$gte": datetime.datetime(YYYY,MM,DD,0,0,0,0)} 33 | 34 | 35 | def parse_date_range(str_date): 36 | if str_date is None: 37 | return None 38 | str_date = str_date.strip() 39 | if str_date[1] == "=": # for dates like >=YYYY-MM-DD 40 | if str_date[0] == ">": 41 | operator = "$gte" 42 | elif str_date[0] == "<": 43 | operator = "$lte" 44 | datetime_object = process_date(str_date[2:]) 45 | return {operator: datetime_object} 46 | elif str_date[0] == ">": # for >YYYY-MM-DD 47 | operator = "$gt" 48 | datetime_object = process_date(str_date[1:]) 49 | return {operator: datetime_object} 50 | elif str_date[0] == "<": 51 | operator = "$lt" 52 | datetime_object = process_date(str_date[1:]) 53 | return {operator: datetime_object} 54 | elif str_date[0] == "[" and str_date[len(str_date) - 1] == "]": 55 | # for [YYYY-MM-DD;YYYY-MM-DD] 56 | operator1 = "$gte" 57 | operator2 = "$lte" 58 | dates = str_date[1:(len(str_date) - 1)].split(";") 59 | date1 = process_date(dates[0]) 60 | date2 = process_date(dates[1]) 61 | return {operator1: date1, operator2: date2} 62 | elif len(str_date) == 10: 63 | return {"$gte": process_date(str_date), "$lt": process_date(str_date) + datetime.timedelta(hours=24)} 64 | else: 65 | print "Invalid date for parse_date_range: " + str(str_date) 66 | raise ValueError 67 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:2.7 2 | RUN mkdir /myapp 3 | WORKDIR /myapp 4 | ADD . /myapp 5 | ENV PYTHONUNBUFFERED=0 6 | RUN apt-get clean && apt-get -o Debug::pkgProblemResolver=true -o Debug::Acquire::http=true update -qq && apt-get -o Debug::pkgProblemResolver=true -o Debug::Acquire::http=true install -y build-essential \ 7 | libpq-dev \ 8 | python-hachoir-metadata \ 9 | p7zip-full \ 10 | libffi-dev \ 11 | libssl-dev \ 12 | python-dev \ 13 | libfuzzy-dev \ 14 | python-gevent \ 15 | python-pip \ 16 | python-magic \ 17 | python-crypto \ 18 | zip \ 19 | python-dateutil \ 20 | python-mysqldb \ 21 | autoconf \ 22 | openssl \ 23 | file \ 24 | python \ 25 | git \ 26 | autoconf \ 27 | automake \ 28 | libc-dev \ 29 | libtool \ 30 | python-dev \ 31 | unzip \ 32 | libfreetype6-dev \ 33 | libtaoframework-freetype-cil-dev \ 34 | libxft-dev && \ 35 | echo "Installing yarGen-master" && \ 36 | wget -nv -nc https://github.com/Neo23x0/yarGen/archive/master.zip -O /myapp/yara/yarGen-master.zip && \ 37 | cd /myapp/yara/ && \ 38 | unzip /myapp/yara/yarGen-master.zip -d /myapp/yara/ && \ 39 | #7z x /myapp/yara/yarGen-master/good-opcodes.db.zip.001 -oyarGen-master && \ 40 | #7z x /myapp/yara/yarGen-master/good-strings.db.zip.001 -oyarGen-master && \ 41 | git clone --depth 1 https://github.com/binarlyhq/binarly-sdk/ /myapp/yara/binarly-sdk && \ 42 | wget -nv -nc https://github.com/plusvic/yara/archive/v3.4.0.zip -O /tmp/yara.zip && \ 43 | unzip /tmp/yara.zip -d /tmp && \ 44 | echo "Installing pip requirements" && \ 45 | pip install -r /myapp/src/pip_requirements.txt && \ 46 | pip install -r /myapp/src/pip_yargen_requirements.txt && \ 47 | pip install -r /myapp/src/pip_vt_api_requirements.txt && \ 48 | pip install -r /myapp/src/pip_test_requirements.txt && \ 49 | cd /tmp/yara-3.4.0/ && ./bootstrap.sh && ./configure && \ 50 | cd /tmp/yara-3.4.0/ && make && make install && \ 51 | cd /myapp/yara && \ 52 | python /myapp/yara/binarly-sdk/setup.py install && \ 53 | cd /myapp/yara/yarGen-master && \ 54 | python yarGen.py --update && \ 55 | #7z x -y /myapp/yara/yarGen-master/good-strings.db.zip.001 -o/myapp/yara/yarGen-master && \ 56 | #7z x -y /myapp/yara/yarGen-master/good-opcodes.db.zip.001 -o/myapp/yara/yarGen-master && \ 57 | wget -nv -nc https://winitor.com/tools/pestudio/current/pestudio.zip -O /tmp/pestudio.zip && \ 58 | unzip /tmp/pestudio.zip -d /tmp && \ 59 | cp /tmp/xml/strings.xml /myapp/yara/yarGen-master/ && \ 60 | rm -rf /tmp/yara-3.4.0/ && \ 61 | rm -f /tmp/pestudio.zip && \ 62 | rm -f /tmp/yara.zip && \ 63 | rm -rf /tmp/pestudio/ && \ 64 | rm -f /myapp/yara/yarGen-master.zip 65 | #rm -f /myapp/yara/yarGen-master/good-opcodes.db.zip.001 && \ 66 | #rm -f /myapp/yara/yarGen-master/good-opcodes.db.zip.002 && \ 67 | #rm -f /myapp/yara/yarGen-master/good-opcodes.db.zip.003 && \ 68 | #rm -f /myapp/yara/yarGen-master/good-opcodes.db.zip.004 && \ 69 | #rm -f /myapp/yara/yarGen-master/good-opcodes.db.zip.005 && \ 70 | #rm -f /myapp/yara/yarGen-master/good-opcodes.db.zip.006 && \ 71 | #rm -f /myapp/yara/yarGen-master/good-strings.db.zip.001 && \ 72 | #rm -f /myapp/yara/yarGen-master/good-strings.db.zip.002 && \ 73 | #rm -f /myapp/yara/yarGen-master/good-strings.db.zip.003 && \ 74 | #rm -f /myapp/yara/yarGen-master/good-strings.db.zip.004 75 | 76 | #CMD ["python","/myapp/src/api2.py"] 77 | -------------------------------------------------------------------------------- /src/PlugIns/PE/ResourceEntriesPlug.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from PlugIns.PlugIn import PlugIn 5 | from Modules.PEFileModule import PEFileModule 6 | import pefile 7 | from Utils.InfoExtractor import * 8 | 9 | 10 | class ResourceEntriesPlug(PlugIn): 11 | 12 | def __init__(self, sample=None): 13 | PlugIn.__init__(self, sample) 14 | 15 | def getPath(self): 16 | return "particular_header.res_entries" 17 | 18 | def getName(self): 19 | return "res_entries" 20 | 21 | def getVersion(self): 22 | return 6 23 | 24 | def process(self): 25 | pelib = self._getLibrary(PEFileModule().getName()) 26 | if(pelib is None): 27 | return "" 28 | 29 | ret = [] 30 | if hasattr(pelib, 'DIRECTORY_ENTRY_RESOURCE'): 31 | i = 0 32 | for resource_type in pelib.DIRECTORY_ENTRY_RESOURCE.entries: 33 | if resource_type.name is not None: 34 | name = "%s" % resource_type.name 35 | else: 36 | name = "%s" % pefile.RESOURCE_TYPE.get( 37 | resource_type.struct.Id) 38 | if name is None: 39 | name = "%d" % resource_type.struct.Id 40 | if hasattr(resource_type, 'directory'): 41 | for resource_id in resource_type.directory.entries: 42 | if hasattr(resource_id, 'directory'): 43 | for resource_lang in resource_id.directory.entries: 44 | try: 45 | data = pelib.get_data( 46 | resource_lang.data.struct.OffsetToData, resource_lang.data.struct.Size) 47 | # fd=open(name,'wb') 48 | # fd.write(data) 49 | # (data) 50 | except pefile.PEFormatError: 51 | return "corrupt" 52 | filetype = MIME_TYPE(data, False) 53 | lang = pefile.LANG.get( 54 | resource_lang.data.lang, 'unknown') 55 | sublang = pefile.get_sublang_name_for_lang( 56 | resource_lang.data.lang, resource_lang.data.sublang) 57 | entry = {} 58 | entry["name"] = self._normalize(name) 59 | entry["rva"] = self._normalize( 60 | hex(resource_lang.data.struct.OffsetToData)) 61 | entry["size"] = self._normalize( 62 | hex(resource_lang.data.struct.Size)) 63 | entry["type"] = self._normalize(filetype) 64 | entry["lang"] = self._normalize(lang) 65 | entry["sublang"] = self._normalize(sublang) 66 | entry["sha1"] = SHA1(data) 67 | ret.append(entry) 68 | 69 | return ret 70 | -------------------------------------------------------------------------------- /src/PlugIns/PE/ExportsPlug.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | import pathmagic 5 | from PlugIns.PlugIn import PlugIn 6 | from Modules.PEFileModule import PEFileModule 7 | import pefile 8 | from Sample import Sample 9 | 10 | 11 | class ExportsPlug(PlugIn): 12 | 13 | def __init__(self, sample=None): 14 | PlugIn.__init__(self, sample) 15 | 16 | def getPath(self): 17 | return "particular_header.exports" 18 | 19 | def getName(self): 20 | return "exports" 21 | 22 | def getVersion(self): 23 | return 1 24 | 25 | def process(self): 26 | pelib = self._getLibrary(PEFileModule().getName()) 27 | if(pelib is None): 28 | return "" 29 | 30 | ret = {} 31 | 32 | # print(dir(pelib.DIRECTORY_ENTRY_EXPORT)) 33 | # print(dir(pelib.DIRECTORY_ENTRY_EXPORT.symbols)) 34 | 35 | if not hasattr(pelib, 'DIRECTORY_ENTRY_EXPORT'): 36 | return ret 37 | 38 | ret["characteristics"] = pelib.DIRECTORY_ENTRY_EXPORT.struct.Characteristics 39 | ret["timeDateStamp"] = pelib.DIRECTORY_ENTRY_EXPORT.struct.TimeDateStamp 40 | ret["majorVersion"] = pelib.DIRECTORY_ENTRY_EXPORT.struct.MajorVersion 41 | ret["minorVersion"] = pelib.DIRECTORY_ENTRY_EXPORT.struct.MinorVersion 42 | ret["name"] = pelib.DIRECTORY_ENTRY_EXPORT.struct.Name 43 | ret["base"] = pelib.DIRECTORY_ENTRY_EXPORT.struct.Base 44 | ret["numberOfFunctions"] = pelib.DIRECTORY_ENTRY_EXPORT.struct.NumberOfFunctions 45 | ret["numberOfNames"] = pelib.DIRECTORY_ENTRY_EXPORT.struct.NumberOfNames 46 | ret["addressOfFunctions"] = pelib.DIRECTORY_ENTRY_EXPORT.struct.AddressOfFunctions 47 | ret["AddressOfNames"] = pelib.DIRECTORY_ENTRY_EXPORT.struct.AddressOfNames 48 | ret["AddressOfOrdinals"] = pelib.DIRECTORY_ENTRY_EXPORT.struct.AddressOfNameOrdinals 49 | 50 | symbols = [] 51 | # print(dir(pelib.DIRECTORY_ENTRY_EXPORT.symbols)) 52 | for export in pelib.DIRECTORY_ENTRY_EXPORT.symbols: 53 | 54 | # print(dir(export)) 55 | # print(export.address) 56 | # print(hex(export.address)) 57 | 58 | symbol = {} 59 | 60 | symbol["ordinal"] = export.ordinal 61 | symbol["name"] = str(export.name).lower() 62 | symbol["RVA"] = export.address 63 | if(export.forwarder is not None): 64 | if(export.forwarder.find('.') != -1): 65 | symbol["forwarder_dll"] = repr( 66 | str(export.forwarder).lower().split('.')[0] + ".dll") 67 | symbol["forwarder_function"] = repr( 68 | str(export.forwarder).lower().split('.')[1]) 69 | 70 | # symbol["address"]=hex(export.address) 71 | # symbol["address_offset"]=hex(export.address_offset) 72 | # symbol["forwarder_offset"]=hex(export.forwarder_offset) 73 | # symbol["name_offset"]=hex(export.name_offset) 74 | # symbol["ordinal_offset"]=hex(export.ordinal_offset) 75 | # symbol["pe"]=export.pe 76 | symbols.append(symbol) 77 | 78 | # print(symbol) 79 | # raw_input() 80 | 81 | ret["symbols"] = symbols 82 | 83 | return ret 84 | 85 | 86 | if __name__ == "__main__": 87 | data = open(source_path + "/Test_files/kernel32.dll", "rb").read() 88 | sample = Sample() 89 | sample.setBinary(data) 90 | modules = {} 91 | pfm = PEFileModule() 92 | modules[pfm.getName()] = pfm 93 | plug = ExportsPlug() 94 | plug.setModules(modules) 95 | plug.setSample(sample) 96 | res = plug.process() 97 | # print(res) 98 | -------------------------------------------------------------------------------- /pull-install.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | db: 4 | image: mongo:3.2 5 | volumes: 6 | - ../mongo-data/:/data/db 7 | - syslog 8 | logging: 9 | driver: syslog 10 | options: 11 | syslog-address: "udp://127.0.0.1:5514" 12 | tag: "db" 13 | 14 | httpd: 15 | image: codexgigassys/codex-frontend:latest 16 | ports: 17 | - "6100:80" 18 | links: 19 | - syslog 20 | logging: 21 | driver: syslog 22 | options: 23 | syslog-address: "udp://127.0.0.1:5514" 24 | tag: "httpd" 25 | 26 | api: 27 | image: codexgigassys/codex-backend:latest 28 | command: bash -c "echo updating pip packages && pip install -r /myapp/src/pip_requirements.txt && echo starting api...Check 127.0.0.1:8080 && python -u /myapp/src/api2.py -H $$HOSTNAME" 29 | volumes: 30 | - ./src/config:/myapp/src/config 31 | - ./files_to_load/:/myapp/files_to_load/ 32 | ports: 33 | - "4500:8080" 34 | links: 35 | - db 36 | - httpd 37 | - syslog 38 | logging: 39 | driver: syslog 40 | options: 41 | syslog-address: "udp://127.0.0.1:5514" 42 | tag: "api" 43 | 44 | redis: 45 | image: redis:latest 46 | links: 47 | - api 48 | - syslog 49 | logging: 50 | driver: syslog 51 | options: 52 | syslog-address: "udp://127.0.0.1:5514" 53 | tag: "redis" 54 | 55 | worker: 56 | image: codexgigassys/codex-backend:worker 57 | command: bash -c "rq worker --path /myapp/ --url redis://codexbackend_redis_1:6379/0 task_private_vt task_public_vt task_no_vt process checkup" 58 | volumes: 59 | - ./src/config/:/myapp/config/ 60 | links: 61 | - db 62 | - redis 63 | - syslog 64 | logging: 65 | driver: syslog 66 | options: 67 | syslog-address: "udp://127.0.0.1:5514" 68 | tag: "worker" 69 | 70 | worker_private_vt: 71 | image: codexgigassys/codex-backend:worker 72 | command: bash -c "rq worker --path /myapp/ --url redis://codexbackend_redis_1:6379/0 task_private_vt" 73 | volumes: 74 | - ./src/config/:/myapp/config/ 75 | links: 76 | - db 77 | - redis 78 | - syslog 79 | logging: 80 | driver: syslog 81 | options: 82 | syslog-address: "udp://127.0.0.1:5514" 83 | tag: "worker_private_vt" 84 | 85 | worker_public_vt: 86 | image: codexgigassys/codex-backend:worker 87 | command: bash -c "rq worker --path /myapp/ --url redis://codexbackend_redis_1:6379/0 task_public_vt" 88 | volumes: 89 | - ./src/config/:/myapp/config/ 90 | links: 91 | - db 92 | - redis 93 | - syslog 94 | logging: 95 | driver: syslog 96 | options: 97 | syslog-address: "udp://127.0.0.1:5514" 98 | 99 | worker_no_vt: 100 | image: codexgigassys/codex-backend:worker 101 | command: bash -c "rq worker --path /myapp/ --url redis://codexbackend_redis_1:6379/0 task_no_vt process checkup" 102 | volumes: 103 | - ./src/config/:/myapp/config/ 104 | links: 105 | - db 106 | - redis 107 | - syslog 108 | logging: 109 | driver: syslog 110 | options: 111 | syslog-address: "udp://127.0.0.1:5514" 112 | tag: "worker_no_vt" 113 | 114 | cron: 115 | image: codexgigassys/codex-backend:cron 116 | #build: 117 | # context: ./src/cron 118 | # dockerfile: cronDockerfile 119 | environment: 120 | - CRON_ENTRY=5 0 * * * wget -O - http://codexbackend_api_1:8080/api/v1/cron 121 | links: 122 | - api 123 | - syslog 124 | logging: 125 | driver: syslog 126 | options: 127 | syslog-address: "udp://127.0.0.1:5514" 128 | tag: "cron" 129 | 130 | syslog: 131 | image: voxxit/rsyslog 132 | ports: 133 | - "5514:514/udp" 134 | -------------------------------------------------------------------------------- /src/MetaControl/XMLCreator.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | import xml.etree.cElementTree as XML 5 | import os 6 | 7 | 8 | class XMLCreator(): 9 | 10 | def __init__(self): 11 | pass 12 | 13 | def __delete__(self): 14 | pass 15 | 16 | def serialize(self, node): 17 | ser = XML.tostring(node) 18 | return ser 19 | 20 | def parse(self, meta_plain): 21 | try: 22 | root = XML.fromstring(meta_plain) 23 | dic = self._iterateNode(root) 24 | return dic 25 | except Exception, e: 26 | print str(e) 27 | return {} 28 | 29 | def appendValueFromDictionary(self, node, dic, value): 30 | n = XML.SubElement(node, value) 31 | n.text = str(dic[value]) 32 | 33 | def saveToFile(self, xml_node, file_name): 34 | tree = XML.ElementTree(xml_node) 35 | tree.write("../DB/metadata/" + str(file_name) + ".xml") 36 | return 0 37 | 38 | def createXMLNode(self, node_name, dic): 39 | root = XML.Element(node_name) 40 | self.appendAll(root, dic) 41 | return root 42 | 43 | def appendAll(self, node, contenedor): 44 | tipo = type(contenedor) 45 | if(tipo == type({})): # appends to a dictionary 46 | for d in contenedor: 47 | n = XML.SubElement(node, d) 48 | self.appendAll(n, contenedor[d]) 49 | elif(tipo == type([])): # appends to a list 50 | for v in contenedor: 51 | n = XML.SubElement(node, "item") 52 | n.text = v 53 | else: 54 | node.text = str(contenedor) # saves a value 55 | 56 | # return node 57 | 58 | def readAll(self, file_name): 59 | try: 60 | tree = XML.parse("../DB/metadata/" + str(file_name) + ".xml") 61 | root = tree.getroot() 62 | dic = self._iterateNode(root) 63 | return dic 64 | except Exception, e: 65 | print str(e) 66 | return {} 67 | 68 | def _iterateNode(self, node): 69 | dic = {} 70 | lista = [] 71 | for sub in node: 72 | text = sub.text 73 | if(text == None): 74 | dic[sub.tag] = self._iterateNode(sub) 75 | else: 76 | if(sub.tag == "item"): 77 | lista.append(text) 78 | else: 79 | dic[sub.tag] = text 80 | if(len(lista) > 0): 81 | return lista 82 | return dic 83 | 84 | 85 | #****************TEST_CODE****************** 86 | 87 | TEST = "-test_XMLCreator" 88 | 89 | 90 | def testCode(): 91 | #------------------------------------------------------------------ 92 | dic = {} 93 | dic["pepe"] = "pepe" 94 | lista = [] 95 | lista.append("one thing") 96 | lista.append("other thing") 97 | dic["lista"] = lista 98 | xml = XMLCreator() 99 | node = xml.createXMLNode("principal_node", dic) 100 | ser = xml.serialize(node) 101 | print(ser) 102 | 103 | #------------------------------------------------------------------ 104 | #~ xml=XMLCreator() 105 | #~ dic=xml.readAll("filename") 106 | #~ print(dic) 107 | #------------------------------------------------------------------ 108 | #~ xml=XMLCreator() 109 | #~ data=open("test.xml","r").read() 110 | #~ dic=xml.parse(data) 111 | #~ print(dic) 112 | 113 | 114 | #***********************TEST*************************** 115 | import sys 116 | import traceback 117 | if(len(sys.argv) >= 2): 118 | if(sys.argv[1] == TEST): 119 | try: 120 | print("######## Test of " + str(sys.argv[0]) + " ########") 121 | testCode() 122 | 123 | except: 124 | print(traceback.format_exc()) 125 | raw_input("Press a key...") 126 | -------------------------------------------------------------------------------- /debug.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | db: 4 | image: mongo:3.2 5 | volumes: 6 | - ../mongo-data/:/data/db 7 | links: 8 | - syslog 9 | logging: 10 | driver: syslog 11 | options: 12 | syslog-address: "udp://127.0.0.1:5514" 13 | tag: "db" 14 | 15 | httpd: 16 | image: httpd:2.4 17 | volumes: 18 | - ../codex-frontend/:/usr/local/apache2/htdocs/ 19 | ports: 20 | - "6100:80" 21 | links: 22 | - syslog 23 | logging: 24 | driver: syslog 25 | options: 26 | syslog-address: "udp://127.0.0.1:5514" 27 | tag: "httpd" 28 | 29 | api: 30 | image: codexgigassys/codex-backend:latest 31 | #build: . 32 | command: bash -c "echo updating pip packages && pip install -r /myapp/src/pip_requirements.txt && pip install -r /myapp/src/pip_vt_api_requirements.txt && echo checking indexes && python -u /myapp/src/Scripts/create_indexes.py && echo starting api...Check 127.0.0.1:8080 && python -u /myapp/src/api2.py -H $$HOSTNAME" 33 | volumes: 34 | - .:/myapp 35 | ports: 36 | - "4500:8080" 37 | links: 38 | - db 39 | - httpd 40 | - syslog 41 | logging: 42 | driver: syslog 43 | options: 44 | syslog-address: "udp://127.0.0.1:5514" 45 | tag: "api" 46 | 47 | redis: 48 | image: redis:latest 49 | links: 50 | - api 51 | - syslog 52 | logging: 53 | driver: syslog 54 | options: 55 | syslog-address: "udp://127.0.0.1:5514" 56 | tag: "redis" 57 | 58 | worker: 59 | image: codexgigassys/codex-backend:worker 60 | #build: 61 | # context: ./src 62 | # dockerfile: workerDockerfile 63 | command: bash -c "rq worker --path /myapp/ --url redis://codexbackend_redis_1:6379/0 task_private_vt task_public_vt task_no_vt process checkup" 64 | volumes: 65 | - ./src/:/myapp/ 66 | links: 67 | - db 68 | - redis 69 | - syslog 70 | logging: 71 | driver: syslog 72 | options: 73 | syslog-address: "udp://127.0.0.1:5514" 74 | tag: "worker" 75 | 76 | worker_private_vt: 77 | image: codexgigassys/codex-backend:worker 78 | #build: 79 | # context: ./src 80 | # dockerfile: workerDockerfile 81 | command: bash -c "rq worker --path /myapp/ --url redis://codexbackend_redis_1:6379/0 task_private_vt" 82 | volumes: 83 | - ./src/:/myapp/ 84 | links: 85 | - db 86 | - redis 87 | - syslog 88 | logging: 89 | driver: syslog 90 | options: 91 | syslog-address: "udp://127.0.0.1:5514" 92 | tag: "worker_private_vt" 93 | 94 | worker_public_vt: 95 | image: codexgigassys/codex-backend:worker 96 | #build: 97 | # context: ./src 98 | # dockerfile: workerDockerfile 99 | command: bash -c "rq worker --path /myapp/ --url redis://codexbackend_redis_1:6379/0 task_public_vt" 100 | volumes: 101 | - ./src/:/myapp/ 102 | links: 103 | - db 104 | - redis 105 | - syslog 106 | logging: 107 | driver: syslog 108 | options: 109 | syslog-address: "udp://127.0.0.1:5514" 110 | 111 | worker_no_vt: 112 | image: codexgigassys/codex-backend:worker 113 | #build: 114 | # context: ./src 115 | # dockerfile: workerDockerfile 116 | command: bash -c "rq worker --path /myapp/ --url redis://codexbackend_redis_1:6379/0 task_no_vt process checkup" 117 | volumes: 118 | - ./src/:/myapp/ 119 | links: 120 | - db 121 | - redis 122 | - syslog 123 | logging: 124 | driver: syslog 125 | options: 126 | syslog-address: "udp://127.0.0.1:5514" 127 | tag: "worker_no_vt" 128 | 129 | cron: 130 | image: codexgigassys/codex-backend:cron 131 | #build: 132 | # context: ./src/cron 133 | # dockerfile: cronDockerfile 134 | environment: 135 | - CRON_ENTRY=5 0 * * * wget -O - http://codexbackend_api_1:8080/api/v1/cron 136 | links: 137 | - api 138 | - syslog 139 | logging: 140 | driver: syslog 141 | options: 142 | syslog-address: "udp://127.0.0.1:5514" 143 | tag: "cron" 144 | 145 | syslog: 146 | image: voxxit/rsyslog 147 | ports: 148 | - "5514:514/udp" 149 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | db: 4 | image: mongo:3.2 5 | volumes: 6 | - ../mongo-data/:/data/db 7 | links: 8 | - syslog 9 | logging: 10 | driver: syslog 11 | options: 12 | syslog-address: "udp://127.0.0.1:5514" 13 | tag: "db" 14 | 15 | httpd: 16 | image: httpd:2.4 17 | volumes: 18 | - ../codex-frontend/:/usr/local/apache2/htdocs/ 19 | ports: 20 | - "6100:80" 21 | links: 22 | - syslog 23 | logging: 24 | driver: syslog 25 | options: 26 | syslog-address: "udp://127.0.0.1:5514" 27 | tag: "httpd" 28 | 29 | api: 30 | #image: codexgigassys/codex-backend:latest 31 | build: . 32 | command: bash -c "echo updating pip packages && pip install -r /myapp/src/pip_requirements.txt && pip install -r /myapp/src/pip_vt_api_requirements.txt && echo checking indexes && python -u /myapp/src/Scripts/create_indexes.py && echo starting api...Check 127.0.0.1:8080 && python -u /myapp/src/api2.py -H $$HOSTNAME" 33 | volumes: 34 | - ./src/config/:/myapp/src/config/ 35 | - ./files_to_load/:/myapp/files_to_load/ 36 | ports: 37 | - "4501:8080" 38 | links: 39 | - db 40 | - httpd 41 | - syslog 42 | logging: 43 | driver: syslog 44 | options: 45 | syslog-address: "udp://127.0.0.1:5514" 46 | tag: "api" 47 | 48 | redis: 49 | image: redis:latest 50 | links: 51 | - api 52 | - syslog 53 | logging: 54 | driver: syslog 55 | options: 56 | syslog-address: "udp://127.0.0.1:5514" 57 | tag: "redis" 58 | 59 | worker: 60 | #image: codexgigassys/codex-backend:worker 61 | build: 62 | context: ./src 63 | dockerfile: workerDockerfile 64 | command: bash -c "rq worker --path /myapp/ --url redis://codexbackend_redis_1:6379/0 task_private_vt task_public_vt task_no_vt process checkup" 65 | volumes: 66 | - ./src/config/:/myapp/config/ 67 | links: 68 | - db 69 | - redis 70 | - syslog 71 | logging: 72 | driver: syslog 73 | options: 74 | syslog-address: "udp://127.0.0.1:5514" 75 | tag: "worker" 76 | 77 | worker_private_vt: 78 | #image: codexgigassys/codex-backend:worker 79 | build: 80 | context: ./src 81 | dockerfile: workerDockerfile 82 | command: bash -c "rq worker --path /myapp/ --url redis://codexbackend_redis_1:6379/0 task_private_vt" 83 | volumes: 84 | - ./src/config/:/myapp/config/ 85 | links: 86 | - db 87 | - redis 88 | - syslog 89 | logging: 90 | driver: syslog 91 | options: 92 | syslog-address: "udp://127.0.0.1:5514" 93 | tag: "worker_private_vt" 94 | 95 | worker_public_vt: 96 | #image: codexgigassys/codex-backend:worker 97 | build: 98 | context: ./src 99 | dockerfile: workerDockerfile 100 | command: bash -c "rq worker --path /myapp/ --url redis://codexbackend_redis_1:6379/0 task_public_vt" 101 | volumes: 102 | - ./src/config/:/myapp/config/ 103 | links: 104 | - db 105 | - redis 106 | - syslog 107 | logging: 108 | driver: syslog 109 | options: 110 | syslog-address: "udp://127.0.0.1:5514" 111 | tag: "worker_public_vt" 112 | 113 | worker_no_vt: 114 | #image: codexgigassys/codex-backend:worker 115 | build: 116 | context: ./src 117 | dockerfile: workerDockerfile 118 | command: bash -c "rq worker --path /myapp/ --url redis://codexbackend_redis_1:6379/0 task_no_vt process checkup" 119 | volumes: 120 | - ./src/config/:/myapp/config/ 121 | links: 122 | - db 123 | - redis 124 | - syslog 125 | logging: 126 | driver: syslog 127 | options: 128 | syslog-address: "udp://127.0.0.1:5514" 129 | tag: "worker_no_vt" 130 | 131 | cron: 132 | #image: codexgigassys/codex-backend:cron 133 | build: 134 | context: ./src/cron 135 | dockerfile: cronDockerfile 136 | environment: 137 | - CRON_ENTRY=5 0 * * * wget -O - http://codexbackend_api_1:8080/api/v1/cron 138 | links: 139 | - api 140 | - syslog 141 | logging: 142 | driver: syslog 143 | options: 144 | syslog-address: "udp://127.0.0.1:5514" 145 | tag: "cron" 146 | 147 | syslog: 148 | image: voxxit/rsyslog 149 | ports: 150 | - "5514:514/udp" 151 | -------------------------------------------------------------------------------- /src/Processors/Processor.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from Utils.InfoExtractor import * 5 | import datetime 6 | import math 7 | from Utils.TimeLogger import TimeLogger 8 | import traceback 9 | from PlugIns.Generic import * 10 | import logging 11 | from Utils.test import test 12 | 13 | 14 | class Processor(): 15 | 16 | def __init__(self, sample): 17 | # self.result=Metadata() # result of processing 18 | self.sample = sample # data for analyzing 19 | # self.sample.setCalculatedMetadata(self.result) 20 | self.version = sample.getStorageVersion() # dictionary of current versions 21 | self.result_version = sample.getCalculatedVersion() # up to date versions 22 | self.plugins = [] # plugins to execute. 23 | self.modules = {} # Modules of libraries used by plugins. 24 | self.metadata_to_store = {} 25 | 26 | def __delete__(self): 27 | pass 28 | 29 | # General processing that gets executed for every sample. 30 | def process(self): 31 | self._addPlugIn(FuzzyPlug.FuzzyPlug()) 32 | self._addPlugIn(HashPlug.HashPlug()) 33 | self._addPlugIn(SizePlug.SizePlug()) 34 | self._addPlugIn(DescPlug.DescPlug()) 35 | self._addPlugIn(MimePlug.MimePlug()) 36 | self._addPlugIn(DatePlug.DatePlug()) 37 | 38 | return self.metadata_to_store 39 | 40 | def _executeAllPlugIns(self): 41 | for plug in self.plugins: 42 | plug.setSample(self.sample) 43 | plug.setModules(self.modules) 44 | self._executePlugIn(plug) 45 | 46 | def _addPlugIn(self, plug): 47 | self.plugins.append(plug) 48 | 49 | def _addModule(self, mod): 50 | self.modules[mod.getName()] = mod 51 | 52 | # Execute plugins in a safe way. 53 | def _executePlugIn(self, plugin): 54 | info_string = plugin.getName() 55 | code_version = plugin.getVersion() 56 | path = plugin.getPath() 57 | if(self._version_is_update(info_string, code_version)): 58 | return 0 59 | # compute 60 | try: 61 | logging.debug("Running %s v.%s PlugIn", 62 | info_string, str(code_version)) 63 | # tl=TimeLoger() 64 | # tl.startCounter() 65 | res = plugin.process() 66 | # tl.logTime(info_string) 67 | except KeyboardInterrupt: 68 | raise KeyboardInterrupt 69 | except Exception, e: 70 | logging.error("Error in %s PlugIn with sample:%s", 71 | info_string, self.sample.getID(), exc_info=True) 72 | res = "ERROR_EXECUTE_PLUGIN" 73 | logging.exception("**** Error File: %s ****" % 74 | (self.sample.getID(),)) 75 | logging.info("**** PlugIn : %s ****" % (info_string,)) 76 | err = str(traceback.format_exc()) 77 | logging.info(err) 78 | self._update(plugin, res) 79 | return 0 80 | 81 | # check if the version of "info string" is up to date. 82 | def _version_is_update(self, info_string, code_version): 83 | if(self.version is None): 84 | return False 85 | ver = self.version.get(info_string) 86 | if(ver is None): 87 | return False 88 | if(ver < code_version): 89 | return False 90 | return True 91 | 92 | # saves the result and the version. 93 | def _update(self, plugin, res): 94 | code_version = plugin.getVersion() 95 | name = plugin.getName() 96 | info_string = plugin.getPath() 97 | self.sample.setCalculatedValue(info_string, res) 98 | # self.version[name]=code_version 99 | self.result_version[name] = code_version 100 | self.metadata_to_store[info_string] = res 101 | return 0 102 | 103 | # returns up to date versions. 104 | def getVersion(self): 105 | return self.result_version 106 | 107 | # redefine str() 108 | # def __str__(self): 109 | # string="" 110 | # for word in self.result: 111 | # #tabs='\t' 112 | # tabs=" " 113 | # #for i in range(6-int((len(word)+1)/8)): 114 | # # tabs+=" " 115 | # string+=(str(word)+":"+tabs+str(self.result[word])+'\n') 116 | # 117 | # return string 118 | 119 | 120 | # ****************TEST_CODE****************** 121 | def testCode(): 122 | pass 123 | 124 | # ****************TEST_EXECUTE****************** 125 | 126 | 127 | test("-test_Processor", testCode) 128 | -------------------------------------------------------------------------------- /src/PlugIns/PE/VersionInfoPlug.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | import pathmagic 5 | from PlugIns.PlugIn import PlugIn 6 | from Modules.PEFileModule import PEFileModule 7 | import pefile 8 | from Sample import Sample 9 | 10 | 11 | class VersionInfoPlug(PlugIn): 12 | 13 | def __init__(self, sample=None): 14 | PlugIn.__init__(self, sample) 15 | 16 | def getPath(self): 17 | return "particular_header.version" 18 | 19 | def getName(self): 20 | return "version" 21 | 22 | def getVersion(self): 23 | return 2 24 | 25 | def process(self): 26 | pelib = self._getLibrary(PEFileModule().getName()) 27 | if(pelib is None): 28 | return "" 29 | 30 | res = {} 31 | if(hasattr(pelib, "VS_VERSIONINFO")): 32 | vi = {} 33 | vi["Length"] = self._normalize(pelib.VS_VERSIONINFO.Length) 34 | vi["ValueLength"] = self._normalize( 35 | pelib.VS_VERSIONINFO.ValueLength) 36 | vi["Type"] = self._normalize(pelib.VS_VERSIONINFO.Type) 37 | res["version_info"] = vi 38 | 39 | if(hasattr(pelib, "VS_FIXEDFILEINFO")): 40 | ffi = {} 41 | ffi["Signature"] = self._normalize( 42 | pelib.VS_FIXEDFILEINFO.Signature) 43 | ffi["StrucVersion"] = self._normalize( 44 | pelib.VS_FIXEDFILEINFO.StrucVersion) 45 | ffi["FileVersionMS"] = self._normalize( 46 | pelib.VS_FIXEDFILEINFO.FileVersionMS) 47 | ffi["FileVersionLS"] = self._normalize( 48 | pelib.VS_FIXEDFILEINFO.FileVersionLS) 49 | ffi["ProductVersionMS"] = self._normalize( 50 | pelib.VS_FIXEDFILEINFO.ProductVersionMS) 51 | ffi["ProductVersionLS"] = self._normalize( 52 | pelib.VS_FIXEDFILEINFO.ProductVersionLS) 53 | ffi["FileFlagsMask"] = self._normalize( 54 | pelib.VS_FIXEDFILEINFO.FileFlagsMask) 55 | ffi["FileFlags"] = self._normalize( 56 | pelib.VS_FIXEDFILEINFO.FileFlags) 57 | ffi["FileOS"] = self._normalize(pelib.VS_FIXEDFILEINFO.FileOS) 58 | ffi["FileType"] = self._normalize( 59 | pelib.VS_FIXEDFILEINFO.FileType) 60 | ffi["FileSubtype"] = self._normalize( 61 | pelib.VS_FIXEDFILEINFO.FileSubtype) 62 | ffi["FileDateMS"] = self._normalize( 63 | pelib.VS_FIXEDFILEINFO.FileDateMS) 64 | ffi["FileDateLS"] = self._normalize( 65 | pelib.VS_FIXEDFILEINFO.FileDateLS) 66 | res["fixed_file_info"] = ffi 67 | 68 | if(hasattr(pelib, "FileInfo")): 69 | fst = {} 70 | for entry in pelib.FileInfo: 71 | if(hasattr(entry, "StringTable")): 72 | for str_entry in entry.StringTable: # check this. its an array. 73 | # print(str_entry.entries) 74 | # print(dir(str_entry)) 75 | fst["LangID"] = str(str_entry.LangID) 76 | fst["LegalCopyright"] = str( 77 | str_entry.entries.get("LegalCopyright")) 78 | fst["InternalName"] = str( 79 | str_entry.entries.get("InternalName")) 80 | fst["FileVersion"] = str( 81 | str_entry.entries.get("FileVersion")) 82 | fst["CompanyName"] = str( 83 | str_entry.entries.get("CompanyName")) 84 | fst["ProductName"] = str( 85 | str_entry.entries.get("ProductName")) 86 | fst["ProductVersion"] = str( 87 | str_entry.entries.get("ProductVersion")) 88 | fst["FileDescription"] = str( 89 | str_entry.entries.get("FileDescription")) 90 | fst["OriginalFilename"] = str( 91 | str_entry.entries.get("OriginalFilename")) 92 | fst["Comments"] = str( 93 | str_entry.entries.get("Comments")) 94 | fst["LegalTrademarks"] = str( 95 | str_entry.entries.get("LegalTrademarks")) 96 | fst["PrivateBuild"] = str( 97 | str_entry.entries.get("PrivateBuild")) 98 | fst["SpecialBuild"] = str( 99 | str_entry.entries.get("SpecialBuild")) 100 | 101 | res["string_file_info"] = fst 102 | 103 | return res 104 | 105 | 106 | if __name__ == "__main__": 107 | data = open(source_path + "/Test_files/kernel32.dll", "rb").read() 108 | sample = Sample() 109 | sample.setBinary(data) 110 | modules = {} 111 | pfm = PEFileModule() 112 | modules[pfm.getName()] = pfm 113 | plug = VersionInfoPlug() 114 | plug.setModules(modules) 115 | plug.setSample(sample) 116 | res = plug.process() 117 | print(res) 118 | -------------------------------------------------------------------------------- /src/Utils/PEHeaderReader.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | import pefile 5 | import math 6 | import os 7 | import sys 8 | import shutil 9 | import time 10 | from test import test 11 | 12 | 13 | class PEHeaderReader(): 14 | # def __init__(self,file): 15 | # self.pe=pefile.PE(file,fast_load=False) 16 | # #self.pe=pefile.PE(file,fast_load=True) 17 | 18 | def __init__(self, data): 19 | self.pe = None 20 | try: 21 | self.pe = pefile.PE(data=data, fast_load=True) 22 | except Exception, e: 23 | print str(e) 24 | return None 25 | 26 | # try: 27 | # self.pe=pefile.PE(data=data,fast_load=False) 28 | # except: 29 | # self.pe=pefile.PE(data=data,fast_load=True) 30 | 31 | def get_import_size(self): 32 | # self.pe.parse_data_directories() # si it has fast load. 33 | sizes = [] 34 | for entry in self.pe.DIRECTORY_ENTRY_IMPORT: 35 | sizes.append(len(entry.imports)) 36 | return sizes 37 | 38 | def get_import_size_stats(self): 39 | # self.pe.parse_data_directories() # si if has fast load. 40 | total = 0 41 | if (self.pe.OPTIONAL_HEADER.DATA_DIRECTORY[pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT']].VirtualAddress == 0): 42 | return 0, 0, 0 43 | for entry in self.pe.DIRECTORY_ENTRY_IMPORT: 44 | total = total + len(entry.imports) 45 | # print entry.dll 46 | # for imp in entry.imports: 47 | # print '\t', hex(imp.address), imp.name 48 | 49 | cant_librerias = (len(self.pe.DIRECTORY_ENTRY_IMPORT)) 50 | total_imports = total 51 | promedio = total / cant_librerias 52 | 53 | return total_imports, cant_librerias, promedio 54 | 55 | def get_section_stats(self): 56 | real_sum = 0 57 | virtual_sum = 0 58 | w_e = 0 59 | w_real_sum = 0 60 | w_virtual_sum = 0 61 | for section in self.pe.sections: 62 | real = int(hex(section.SizeOfRawData), 16) 63 | virtual = int(hex(section.Misc_VirtualSize), 16) 64 | real_sum += real 65 | virtual_sum += virtual 66 | # print(hex(section.Characteristics)) 67 | if (section.__dict__.get('IMAGE_SCN_MEM_WRITE', False) and 68 | section.__dict__.get('IMAGE_SCN_MEM_EXECUTE', False)): 69 | # print("Write Exe") 70 | w_e += 1 71 | w_real_sum += real 72 | w_virtual_sum += virtual 73 | 74 | # print (section.Name, real,virtual,rate) 75 | # print("") 76 | 77 | return real, virtual, w_e, w_real_sum, w_virtual_sum 78 | 79 | def getArquitecture(self): 80 | try: 81 | 82 | if(self.pe.OPTIONAL_HEADER.Magic == int("0x020B", 16)): 83 | return ("PE+") 84 | elif(self.pe.OPTIONAL_HEADER.Magic == int("0x010B", 16)): 85 | return ("PE") 86 | elif(self.pe.OPTIONAL_HEADER.Magic == int("0x0107", 16)): 87 | return ("IMG_ROM") 88 | else: 89 | return "UNKNOWN" 90 | except pefile.PEFormatError: 91 | return "FORMAT" 92 | 93 | return None 94 | 95 | def getImports(self): 96 | if (self.pe.OPTIONAL_HEADER.DATA_DIRECTORY[pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT']].VirtualAddress == 0): 97 | return None 98 | 99 | d = {} 100 | # print(self.pe.DIRECTORY_ENTRY_IMPORT) 101 | for entry in self.pe.DIRECTORY_ENTRY_IMPORT: 102 | aux = [] 103 | for i in range(len(entry.dll)): 104 | if(ord(entry.dll[i]) >= 128): 105 | aux.append('.') 106 | else: 107 | aux.append(entry.dll[i]) 108 | 109 | dll_name = "".join(aux) 110 | 111 | # print entry.dll 112 | # print entry.imports 113 | l = [] 114 | for imp in entry.imports: 115 | l.append(str(imp.name)) 116 | # print '\t', hex(imp.address), imp.name 117 | d[unicode(str(dll_name), "utf-8")] = l 118 | 119 | return d 120 | 121 | def load(self): 122 | self.pe.parse_data_directories() 123 | 124 | # ****************TEST_CODE****************** 125 | 126 | 127 | def testCode(): 128 | 129 | file = "../Test_files/test.exe" 130 | data = open(file, "rb").read() 131 | 132 | start_time = time.time() 133 | cr = PEHeaderReader(data=data) 134 | cr.load() 135 | total_imports, cant_librerias, promedio = cr.get_import_size_stats() 136 | real, virtual, w_e, w_real_sum, w_virtual_sum = cr.get_section_stats() 137 | elapsed = time.time() - start_time 138 | 139 | line1 = str(total_imports) + "|" + \ 140 | str(cant_librerias) + "|" + str(promedio) 141 | line2 = str(real) + "|" + str(virtual) + "|" + str(w_e) + \ 142 | "|" + str(w_real_sum) + "|" + str(w_virtual_sum) 143 | 144 | print(line1) 145 | print(line2) 146 | 147 | imp = cr.getImports() 148 | print(str(imp)) 149 | print("Elapsed time: " + str(elapsed)) 150 | 151 | 152 | # ****************TEST_EXECUTE****************** 153 | 154 | 155 | test("-test_PEHeaderReader", testCode) 156 | -------------------------------------------------------------------------------- /src/PackageControl/PackageController.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | import pathmagic 5 | import hashlib 6 | import gridfs 7 | import logging 8 | from db_pool import * 9 | 10 | # Writes binaries on the DB 11 | 12 | 13 | class PackageController(): 14 | 15 | def __init__(self): 16 | self.fs = gridfs.GridFS(db_fs) 17 | self.collection = db_fs["fs.files"] 18 | if(envget('temporal_files_db')): 19 | self.fs_temp = gridfs.GridFS(db_temp) 20 | self.collection_tmp = db_temp["fs.files"] 21 | 22 | def __delete__(self): 23 | pass 24 | 25 | # adds a file to the file database. 26 | def append(self, file_id, data, vt_blocked=False): 27 | if(envget('temporal_files_db')): 28 | self.fs_temp.put(data, filename=file_id, metadata={ 29 | "vt_blocked": vt_blocked}) 30 | else: 31 | self.fs.put(data, filename=file_id, metadata={ 32 | "vt_blocked": vt_blocked}) 33 | 34 | # returns searched file 35 | # returns None if it does not exist. 36 | def getFile(self, file_id): 37 | if(len(file_id) == 40): 38 | f = self.fs.find_one({"filename": file_id}) 39 | elif(len(file_id) == 32): 40 | f = self.fs.find_one({"md5": file_id}) 41 | else: 42 | logging.warning("PackageController: invalid file_id:" + 43 | str(file_id) + "(len=" + str(len(file_id)) + ")") 44 | f = None 45 | if f is None: 46 | if envget('temporal_files_db') is False: 47 | return None 48 | else: 49 | if(len(file_id) == 40): 50 | f = self.fs_temp.find_one({"filename": file_id}) 51 | elif(len(file_id) == 32): 52 | f = self.fs_temp.find_one({"md5": file_id}) 53 | else: 54 | f = None 55 | logging.warning( 56 | "PackageController tmp: invalid file_id" + str(file_id)) 57 | if f is None: 58 | return None 59 | return f.read() 60 | 61 | def md5_to_sha1(self, md5): 62 | if len(md5) != 32: 63 | raise ValueError("not a valid md5") 64 | f = self.collection.find_one({"md5": md5}) 65 | if f is None: 66 | if envget('temporal_files_db') is False: 67 | logging.debug("md5_to_sha1= none") 68 | return None 69 | else: 70 | f = self.collection_tmp.find_one({"md5": md5}) 71 | if f is None: 72 | logging.debug("md5_to_sha1= none") 73 | return None 74 | return f["filename"] 75 | 76 | def last_updated(self, number): 77 | if(envget('temporal_files_db')): 78 | db_files = db_temp 79 | else: 80 | db_files = db_fs 81 | collection_files = db_files["fs.files"].find().sort( 82 | [("_id", -1)]).limit(number) 83 | result = [] 84 | for document in collection_files: 85 | sha1 = document.get('filename') 86 | md5 = document.get('md5') 87 | tmp_doc = {} 88 | tmp_doc["hash"] = {"sha1": sha1, "md5": md5} 89 | tmp_doc["upload_date"] = document.get('uploadDate') 90 | result.append(tmp_doc) 91 | return result 92 | 93 | # returns None if the file can't be found on the DB. 94 | # 0 if the file exists. 95 | # 1 if the file exists but can't be downloaded. 96 | # (Check if it is being used) 97 | def searchFile(self, file_id): 98 | ret = self.fs.find_one({"filename": file_id}) 99 | if(ret is None): 100 | if(envget('temporal_files_db') is False): 101 | return None 102 | else: 103 | ret = self.fs_temp.find_one({"filename": file_id}) 104 | if(ret is None): 105 | return None 106 | if(ret.metadata is not None and ret.metadata.get("vt_blocked") is True): 107 | return 1 108 | else: 109 | return 0 110 | 111 | # ****************TEST_CODE****************** 112 | 113 | 114 | def testCode(): 115 | pc = PackageController(host="192.168.0.45", db_name="DATABASE_TEST") 116 | 117 | for dato in ["test_vt1", "test_vt2"]: 118 | hs = hashlib.sha1(dato).hexdigest() 119 | res = pc.searchFile(hs) 120 | if(res is None): 121 | print("appending: " + dato) 122 | if(dato == "test_vt1"): 123 | pc.append(hs, dato, True) 124 | else: 125 | pc.append(hs, dato) 126 | if(res == 0): 127 | print(dato + " already exists with:" + str(res)) 128 | if(res == 1): 129 | print(dato + " blocked:" + str(res)) 130 | 131 | for dato in ["test_vt1", "test_vt2", "test_vt3"]: 132 | hs = hashlib.sha1(dato).hexdigest() 133 | res = pc.searchFile(hs) 134 | if(res is None): 135 | print("File does not exist: " + dato) 136 | if(res == 0): 137 | print(dato + " File already exist:" + str(res)) 138 | if(res == 1): 139 | print(dato + " blocked:" + str(res)) 140 | 141 | # ****************TEST_EXECUTE****************** 142 | # from Utils.test import test 143 | # test("-test_PackageController",testCode) 144 | 145 | 146 | if __name__ == "__main__": 147 | testCode() 148 | -------------------------------------------------------------------------------- /src/PlugIns/PE/StringPlug.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from PlugIns.PlugIn import PlugIn 5 | from Modules.MetaDataModule import * 6 | from Modules.PEFileModule import PEFileModule 7 | import validators 8 | import re 9 | 10 | 11 | class StringPlug(PlugIn): 12 | 13 | def __init__(self, sample=None): 14 | PlugIn.__init__(self, sample) 15 | 16 | def getPath(self): 17 | return "particular_header.strings" 18 | 19 | def getName(self): 20 | return "strings" 21 | 22 | def getVersion(self): 23 | return 3 24 | 25 | def process(self): 26 | ret = {} 27 | data = "" 28 | pelib = self._getLibrary(PEFileModule().getName()) 29 | if(pelib is None): 30 | data = self.sample.getBinary() 31 | else: 32 | for section in pelib.sections: 33 | data = data + section.get_data() 34 | 35 | regexp = '[A-Za-z0-9/\-:.,_$&@=?%()[\]<> ]{4,}' 36 | strings = re.findall(regexp, data) 37 | aux = {} 38 | for s in strings: 39 | aux[repr(s).lower()] = True 40 | 41 | unique_strings = [] 42 | for k in aux: 43 | unique_strings.append(k) 44 | 45 | mdc = self._getLibrary(MetaDataModule().getName()) 46 | if(mdc is None): 47 | return ret 48 | 49 | searchUsed = {} 50 | imports = self.sample.getLastValue("particular_header.imports") 51 | if(imports is not None): 52 | for i in imports: 53 | searchUsed[i["lib"]] = True 54 | for f in i["functions"]: 55 | searchUsed[f] = True 56 | 57 | exports = self.sample.getLastValue("particular_header.exports.symbols") 58 | if(exports is not None): 59 | # print("No exports") 60 | for i in exports: 61 | searchUsed[i["name"]] = True 62 | if(hasattr(i, "forwarder_dll") and hasattr(i, "forwarder_function")): 63 | searchUsed[i["forwarder_dll"]] = True 64 | searchUsed[i["forwarder_function"]] = True 65 | 66 | version_p = self.sample.getLastValue( 67 | "particular_header.version.string_file_info") 68 | if(version_p is not None): 69 | for k in version_p.keys(): 70 | searchUsed["'" + str(version_p[k]) + "'"] = True 71 | 72 | raw = [] 73 | hidden = [] 74 | email = [] 75 | url = [] 76 | ip_l = [] 77 | 78 | dll = [] 79 | domain = [] 80 | interesting = [] 81 | 82 | registry = [] 83 | for s in unique_strings: 84 | # checking if the import is declared or not 85 | # print(s) 86 | # print(searchUsed.get(repr(s).lower())) 87 | # raw_input() 88 | if(searchUsed.get(s) is True): 89 | continue 90 | raw.append(s) 91 | 92 | # searching if its an import or not 93 | r = mdc.searchImportByName(s) 94 | if(r is not None): 95 | hidden.append(s) 96 | continue 97 | evaluado = eval(s) 98 | 99 | # searching dll 100 | r = mdc.searchDllByName(s) 101 | if(r is not None): 102 | dll.append(s) 103 | continue 104 | 105 | # searching for filenames 106 | types = ["exe", "dll", "bat", "sys", "htm", "html", "js", "jar", "jpg", 107 | "png", "vb", "scr", "pif", "chm", "zip", "rar", "cab", "pdf", 108 | "doc", "docx", "ppt", "pptx", "xls", "xlsx", "swf", "gif", "pdb", "cpp"] 109 | salir = False 110 | for pat in types: 111 | if(s.find("." + pat) != -1): 112 | interesting.append(s) 113 | salir = True 114 | break 115 | if salir: 116 | continue 117 | 118 | # searching email 119 | if(validators.email(evaluado)): 120 | email.append(s) 121 | continue 122 | 123 | # searching url 124 | if(validators.url(evaluado)): 125 | url.append(s) 126 | continue 127 | 128 | # searching ips 129 | if(validators.ipv4(evaluado)): # or validators.ipv6(evaluado)): 130 | ip_l.append(s) 131 | continue 132 | 133 | # searching registry 134 | if(s.find("HKLM\\") != -1 or s.find("HKCU\\") != -1): 135 | registry.append(s) 136 | continue 137 | 138 | # searching domains 139 | if(validators.domain(evaluado)): 140 | domain.append(s) 141 | continue 142 | 143 | ret["raw_strings"] = sorted(raw) 144 | if(len(hidden) > 0): 145 | ret["hidden_imports"] = sorted(hidden) 146 | if(len(email) > 0): 147 | ret["emails"] = sorted(email) 148 | if(len(url) > 0): 149 | ret["urls"] = sorted(url) 150 | if(len(ip_l) > 0): 151 | ret["ips"] = sorted(ip_l) 152 | if(len(dll) > 0): 153 | ret["hidden_dll"] = sorted(dll) 154 | if(len(domain) > 0): 155 | ret["domains"] = sorted(domain) 156 | if(len(interesting) > 0): 157 | ret["interesting"] = sorted(interesting) 158 | if(len(registry) > 0): 159 | ret["registry"] = sorted(registry) 160 | 161 | return ret 162 | -------------------------------------------------------------------------------- /src/Sample.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from Utils.TimeLogger import TimeLogger 5 | from MetaDataPKG.Metadata import Metadata 6 | from PackageControl.PackageController import * 7 | 8 | 9 | class Sample: 10 | 11 | def __init__(self, packageController=None, metaController=None, versionController=None): 12 | self.sample_id = None 13 | 14 | self.pc = packageController 15 | self.mc = metaController 16 | self.vc = versionController 17 | # self.cataloger=cataloger 18 | 19 | self.binary = None 20 | self.binary_try_to_load = True 21 | 22 | self.metadata_storeada = Metadata() 23 | self.metadata_calculada = Metadata() 24 | self.metadata_try_to_load = True 25 | 26 | self.version_storeada = None 27 | self.version_calculada = {} 28 | self.version_try_to_load = True 29 | 30 | self.category = None 31 | 32 | # self.metaDB=None 33 | # self.metaCalc=None 34 | # self.metaDB_try=False 35 | 36 | def addAdditionalObject(self, obj): 37 | self.additional_objs.append(obj) 38 | 39 | def getAdditionalObjects(self): 40 | return self.additional_objs 41 | 42 | def setVersionController(self, versionController): 43 | self.vc = versionController 44 | 45 | def setPackageController(self, packageController): 46 | self.pc = packageController 47 | 48 | def setMetaController(self, metaController): 49 | self.mc = metaController 50 | 51 | # def setCataloger(self,cataloger): 52 | # self.cataloger=cataloger 53 | 54 | def setStorageVersion(self, ver): 55 | self.version_storeada = ver 56 | 57 | def getCalculatedVersion(self): 58 | return self.version_calculada 59 | 60 | def getStorageVersion(self): 61 | if(self.version_storeada is not None): 62 | return self.version_storeada 63 | if(self.version_try_to_load): 64 | self.version_try_to_load = False 65 | if(self.vc is None): 66 | return None 67 | self.version_storeada = self.vc.searchVersion(self.sample_id) 68 | return self.version_storeada 69 | 70 | def getCategory(self): 71 | if(self.category is not None): 72 | return self.category 73 | st = self.getStorageVersion() 74 | if(st is None): 75 | return None 76 | self.category = st.get("category") 77 | return self.category 78 | 79 | # val=self.getStorageMetadata().get("mime_type")#cambiar 80 | # if(val!=None): return val 81 | # print("NOOOOOOOOOO") 82 | # return self.cataloger.catalog(self.getBinary()) 83 | # remove from versions. 84 | 85 | def setCategory(self, cat): 86 | self.category = cat 87 | 88 | def getLastValue(self, key): 89 | val = self.metadata_calculada.getValue(key) 90 | if(val is not None): 91 | return val 92 | if(self.metadata_try_to_load): 93 | self.metadata_try_to_load = False 94 | if(self.mc is None): 95 | return None 96 | self.metadata_storeada.setData(self.mc.read(self.sample_id)) 97 | val = self.metadata_storeada.getValue(key) 98 | return val 99 | 100 | # if(self.metaCalc!=None): 101 | # res=self.metaCalc.get(key) 102 | # if(res!=None): return res 103 | # 104 | # if(self.metaDB!=None): 105 | # return self.metaDB["particular_header"].get(key) 106 | # 107 | # if(self.metaDB_try): 108 | # return None 109 | # 110 | # self.metaDB_try=True 111 | # if(self.mc==None):return None 112 | # self.metaDB=self.mc.read(self.sample_id) 113 | # if(self.metaDB==None):return None 114 | # return self.metaDB["particular_header"].get(key) 115 | 116 | def setStorageMetadata(self, meta): 117 | self.metadata_storeada = meta 118 | 119 | def getStorageMetadata(self): 120 | if(self.metadata_try_to_load): 121 | self.metadata_try_to_load = False 122 | if(self.mc is None): 123 | return None 124 | self.metadata_storeada.setData(self.mc.read(self.sample_id)) 125 | return self.metadata_storeada 126 | 127 | def setCalculatedMetadata(self, cal): 128 | self.metadata_calculada = cal 129 | 130 | def getCalculatedMetadata(self): 131 | return self.metadata_calculada 132 | # if(self.metaDB!=None): 133 | # return self.metaDB 134 | # if(self.metaDB_try): 135 | # return None 136 | # self.metaDB_try=True 137 | # if(self.mc==None):return None 138 | # self.metaDB=self.mc.read(self.sample_id) 139 | # return self.metaDB 140 | 141 | def setCalculatedValue(self, path, value): 142 | self.metadata_calculada.setValue(path, value) 143 | 144 | def setID(self, sample_id): 145 | self.sample_id = sample_id 146 | 147 | def getID(self): 148 | return self.sample_id 149 | 150 | def setBinary(self, binary): 151 | self.binary = binary 152 | self.binary_try = True 153 | 154 | def getBinary(self): 155 | if(self.binary is not None): 156 | return self.binary 157 | if(not self.binary_try_to_load): 158 | return None 159 | self.binary_try_to_load = False 160 | if(self.pc is None): 161 | # we use a temporary PackageController so we don't leave a mongo 162 | # cursor open. 163 | tmp_pc = PackageController() 164 | self.binary = tmp_pc.getFile(self.sample_id) 165 | return self.binary 166 | 167 | self.binary = self.pc.getFile(self.sample_id) 168 | return self.binary 169 | -------------------------------------------------------------------------------- /src/PlugIns/PE/HeadersPlug.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | from PlugIns.PlugIn import PlugIn 5 | from Modules.PEFileModule import PEFileModule 6 | import pefile 7 | 8 | 9 | class HeadersPlug(PlugIn): 10 | 11 | def __init__(self, sample=None): 12 | PlugIn.__init__(self, sample) 13 | 14 | def getPath(self): 15 | return "particular_header.headers" 16 | 17 | def getName(self): 18 | return "headers" 19 | 20 | def getVersion(self): 21 | return 2 22 | 23 | def process(self): 24 | pelib = self._getLibrary(PEFileModule().getName()) 25 | if(pelib is None): 26 | return "" 27 | 28 | dos = {} 29 | dos["magic"] = self._normalize(pelib.DOS_HEADER.e_magic) 30 | dos["cblp"] = self._normalize(pelib.DOS_HEADER.e_cblp) 31 | dos["cp"] = self._normalize(pelib.DOS_HEADER.e_cp) 32 | dos["crlc"] = self._normalize(pelib.DOS_HEADER.e_crlc) 33 | dos["cparhdr"] = self._normalize(pelib.DOS_HEADER.e_cparhdr) 34 | dos["minalloc"] = self._normalize(pelib.DOS_HEADER.e_minalloc) 35 | dos["maxalloc"] = self._normalize(pelib.DOS_HEADER.e_maxalloc) 36 | dos["ss"] = self._normalize(pelib.DOS_HEADER.e_ss) 37 | dos["sp"] = self._normalize(pelib.DOS_HEADER.e_sp) 38 | dos["csum"] = self._normalize(pelib.DOS_HEADER.e_csum) 39 | dos["ip"] = self._normalize(pelib.DOS_HEADER.e_ip) 40 | dos["cs"] = self._normalize(pelib.DOS_HEADER.e_cs) 41 | dos["lfarlc"] = self._normalize(pelib.DOS_HEADER.e_lfarlc) 42 | dos["ovno"] = self._normalize(pelib.DOS_HEADER.e_ovno) 43 | dos["res"] = self._normalize(pelib.DOS_HEADER.e_res) 44 | dos["oemid"] = self._normalize(pelib.DOS_HEADER.e_oemid) 45 | dos["oeminfo"] = self._normalize(pelib.DOS_HEADER.e_oeminfo) 46 | dos["res2"] = self._normalize(pelib.DOS_HEADER.e_res2) 47 | dos["lfanew"] = self._normalize(pelib.DOS_HEADER.e_lfanew) 48 | 49 | nt = {} 50 | nt["Signature"] = self._normalize(pelib.NT_HEADERS.Signature) 51 | 52 | fh = {} 53 | fh["Machine"] = self._normalize(pelib.FILE_HEADER.Machine) 54 | fh["NumberOfSections"] = self._normalize( 55 | pelib.FILE_HEADER.NumberOfSections) 56 | fh["TimeDateStamp"] = self._normalize(pelib.FILE_HEADER.TimeDateStamp) 57 | fh["PointerToSymbolTable"] = self._normalize( 58 | pelib.FILE_HEADER.PointerToSymbolTable) 59 | fh["NumberOfSymbols"] = self._normalize( 60 | pelib.FILE_HEADER.NumberOfSymbols) 61 | fh["SizeOfOptionalHeader"] = self._normalize( 62 | pelib.FILE_HEADER.SizeOfOptionalHeader) 63 | fh["Characteristics"] = self._normalize( 64 | pelib.FILE_HEADER.Characteristics) 65 | 66 | oh = {} 67 | oh["Magic"] = self._normalize(pelib.OPTIONAL_HEADER.Magic) 68 | oh["MajorLinkerVersion"] = self._normalize( 69 | pelib.OPTIONAL_HEADER.MajorLinkerVersion) 70 | oh["MinorLinkerVersion"] = self._normalize( 71 | pelib.OPTIONAL_HEADER.MinorLinkerVersion) 72 | oh["SizeOfCode"] = self._normalize(pelib.OPTIONAL_HEADER.SizeOfCode) 73 | oh["SizeOfInitializedData"] = self._normalize( 74 | pelib.OPTIONAL_HEADER.SizeOfInitializedData) 75 | oh["SizeOfUninitializedData"] = self._normalize( 76 | pelib.OPTIONAL_HEADER.SizeOfUninitializedData) 77 | oh["AddressOfEntryPoint"] = self._normalize( 78 | pelib.OPTIONAL_HEADER.AddressOfEntryPoint) 79 | oh["BaseOfCode"] = self._normalize(pelib.OPTIONAL_HEADER.BaseOfCode) 80 | oh["ImageBase"] = self._normalize(pelib.OPTIONAL_HEADER.ImageBase) 81 | oh["SectionAlignment"] = self._normalize( 82 | pelib.OPTIONAL_HEADER.SectionAlignment) 83 | oh["FileAlignment"] = self._normalize( 84 | pelib.OPTIONAL_HEADER.FileAlignment) 85 | oh["MajorOperatingSystemVersion"] = self._normalize( 86 | pelib.OPTIONAL_HEADER.MajorOperatingSystemVersion) 87 | oh["MinorOperatingSystemVersion"] = self._normalize( 88 | pelib.OPTIONAL_HEADER.MinorOperatingSystemVersion) 89 | oh["MajorImageVersion"] = self._normalize( 90 | pelib.OPTIONAL_HEADER.MajorImageVersion) 91 | oh["MinorImageVersion"] = self._normalize( 92 | pelib.OPTIONAL_HEADER.MinorImageVersion) 93 | oh["MajorSubsystemVersion"] = self._normalize( 94 | pelib.OPTIONAL_HEADER.MajorSubsystemVersion) 95 | oh["MinorSubsystemVersion"] = self._normalize( 96 | pelib.OPTIONAL_HEADER.MinorSubsystemVersion) 97 | oh["Reserved1"] = self._normalize(pelib.OPTIONAL_HEADER.Reserved1) 98 | oh["SizeOfImage"] = self._normalize(pelib.OPTIONAL_HEADER.SizeOfImage) 99 | oh["SizeOfHeaders"] = self._normalize( 100 | pelib.OPTIONAL_HEADER.SizeOfHeaders) 101 | oh["CheckSum"] = self._normalize(pelib.OPTIONAL_HEADER.CheckSum) 102 | oh["Subsystem"] = self._normalize(pelib.OPTIONAL_HEADER.Subsystem) 103 | oh["DllCharacteristics"] = self._normalize( 104 | pelib.OPTIONAL_HEADER.DllCharacteristics) 105 | oh["SizeOfStackReserve"] = self._normalize( 106 | pelib.OPTIONAL_HEADER.SizeOfStackReserve) 107 | oh["SizeOfStackCommit"] = self._normalize( 108 | pelib.OPTIONAL_HEADER.SizeOfStackCommit) 109 | oh["SizeOfHeapReserve"] = self._normalize( 110 | pelib.OPTIONAL_HEADER.SizeOfHeapReserve) 111 | oh["SizeOfHeapCommit"] = self._normalize( 112 | pelib.OPTIONAL_HEADER.SizeOfHeapCommit) 113 | oh["LoaderFlags"] = self._normalize(pelib.OPTIONAL_HEADER.LoaderFlags) 114 | oh["NumberOfRvaAndSizes"] = self._normalize( 115 | pelib.OPTIONAL_HEADER.NumberOfRvaAndSizes) 116 | 117 | res = {} 118 | res["dos_header"] = dos 119 | res["nt_header"] = nt 120 | res["file_header"] = fh 121 | res["optional_header"] = oh 122 | 123 | return res 124 | -------------------------------------------------------------------------------- /src/MetaControl/MetaController.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | import pathmagic 5 | import math 6 | import traceback 7 | import logging 8 | from db_pool import * 9 | from Utils.ProcessDate import process_date 10 | import datetime 11 | from Utils.test import test 12 | from pymongo.errors import WriteError 13 | from pymongo.errors import BulkWriteError 14 | # Saves and reads metadata to/from the db. 15 | 16 | 17 | class MetaController(): 18 | 19 | def __init__(self, db_collection=None): 20 | if db_collection is None: 21 | db_collection = envget('db_metadata_collection') 22 | self.collection = db[db_collection] 23 | self.import_coll = db.imports_tree 24 | self.av_coll = db.av_analysis 25 | self.tasks = db.tasks 26 | 27 | def __delete__(self): 28 | pass 29 | 30 | def read(self, file_id): 31 | if file_id is None: 32 | return None 33 | f = self.collection.find_one({"file_id": file_id}) 34 | if(f is None): 35 | return None 36 | 37 | # Antivirus stuff is in another collection 38 | av_analysis = self.search_av_analysis(file_id) 39 | if(av_analysis is not None): 40 | # we don't want all VT metadata to get displayed. 41 | f["av_analysis"] = {your_key: av_analysis.get(your_key) for your_key in [ 42 | "scans", "positives", "total", "scan_date"]} 43 | # f["av_analysis"]=av_analysis 44 | 45 | return f 46 | 47 | def write(self, file_id, data_dic): 48 | command = {"$set": data_dic} 49 | try: 50 | self.collection.update_one( 51 | {"file_id": file_id}, command, upsert=True) 52 | except WriteError: 53 | logging.exception("MetaController() write(). file_id=" + 54 | str(file_id) + "\ncommand=" + str(command)) 55 | # print(command) 56 | # err=str(traceback.format_exc()) 57 | # print(err) 58 | return -1 59 | return 0 60 | 61 | def writeImportsTree(self, imports): 62 | command = {"$inc": {"count": 1}} 63 | bulk = self.import_coll.initialize_unordered_bulk_op() 64 | execute_bool = False 65 | for i in imports: 66 | dll_name = i["lib"] 67 | funcs = i["functions"] 68 | for imp_name in funcs: 69 | execute_bool = True 70 | bulk.find({"function_name": imp_name.lower(), 71 | "dll_name": dll_name.lower()}).upsert().update(command) 72 | # print("**** Error Imports Tree ****") 73 | # err=str(traceback.format_exc()) 74 | # print(err) 75 | # return -1 76 | try: 77 | if(execute_bool): 78 | bulk.execute({'w': 0}) 79 | except BulkWriteError: 80 | logging.exception("MetaController(): " + 81 | str("**** Error Imports Tree ****")) 82 | # err=str(traceback.format_exc()) 83 | # print(err) 84 | return -1 85 | return 0 86 | 87 | def searchImportByName(self, import_name): 88 | r = self.import_coll.find_one({"function_name": import_name}) 89 | return r 90 | 91 | def searchDllByName(self, dll_name): 92 | r = self.import_coll.find_one({"dll_name": dll_name}) 93 | return r 94 | 95 | def searchExactImport(self, import_name, dll_name): 96 | r = self.import_coll.find_one( 97 | {"function_name": import_name, "dll_name": dll_name}) 98 | return r 99 | 100 | def count_section_used(self, section_sha1): 101 | f = self.collection.find( 102 | {"particular_header.sections.sha1": section_sha1}).count() 103 | return f 104 | 105 | def count_resources_used(self, resources_sha1): 106 | f = self.collection.find( 107 | {"particular_header.res_entries.sha1": resources_sha1}).count() 108 | return f 109 | 110 | def search_av_analysis(self, file_id): 111 | f = self.av_coll.find_one({"sha1": file_id}) 112 | return f 113 | 114 | def save_first_seen(self, file_id, vt_date): 115 | if vt_date is None: 116 | return None 117 | old_date = self.get_first_date(file_id) 118 | if old_date is None or vt_date < old_date: 119 | self.write(file_id, {"date": vt_date}) 120 | 121 | def get_first_date(self, file_id): 122 | meta = self.read(file_id) 123 | if meta is None: 124 | return None 125 | else: 126 | date = meta.get('date') 127 | if(isinstance(date, datetime.datetime)): 128 | return date 129 | else: 130 | try: 131 | date = datetime.datetime.strptime( 132 | date, "%Y-%m-%d %H:%M:%S") 133 | except ValueError: 134 | date = None 135 | return date 136 | 137 | def read_task(self, task_id): 138 | f = self.tasks.find_one({"task_id": task_id}) 139 | return f 140 | 141 | def write_task(self, task_id, data_dic): 142 | command = {"$set": data_dic} 143 | logging.debug("write_task(): data_dic=") 144 | logging.debug(str(data_dic)) 145 | return self.tasks.update_one({"task_id": task_id}, command, upsert=True) 146 | 147 | def save_av_analysis(self, file_id, analysis_result): 148 | command = {"$set": analysis_result} 149 | try: 150 | self.av_coll.update_one({"sha1": file_id}, command, upsert=True) 151 | except WriteError: 152 | logging.exception("**** Error File: %s ****" % (file_id,)) 153 | # print(command) 154 | # err=str(traceback.format_exc()) 155 | # print(err) 156 | return -1 157 | self.save_first_seen(file_id, analysis_result.get('date')) 158 | return 0 159 | 160 | # ****************TEST_CODE****************** 161 | 162 | 163 | def testCode(): 164 | pass 165 | 166 | 167 | # ****************TEST_EXECUTE****************** 168 | test("-test_MetaController", testCode) 169 | -------------------------------------------------------------------------------- /src/Scripts/create_indexes.py: -------------------------------------------------------------------------------- 1 | import pathmagic 2 | import pymongo 3 | import traceback 4 | 5 | from db_pool import * 6 | 7 | 8 | index_list = [ 9 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.imports.functions", pymongo.ASCENDING)]}, 10 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("size", pymongo.ASCENDING)]}, 11 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.sections.name", pymongo.ASCENDING)]}, 12 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.sections.size_of_raw_data", pymongo.ASCENDING)]}, 13 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.headers.file_header.TimeDateStamp", pymongo.ASCENDING)]}, 14 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.imports.lib", pymongo.ASCENDING)]}, 15 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.sections.virtual_size", pymongo.ASCENDING)]}, 16 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.sections.size_raw_data", pymongo.ASCENDING)]}, 17 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.strings.hidden_imports", pymongo.ASCENDING)]}, 18 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.strings.hidden_dll", pymongo.ASCENDING)]}, 19 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.res_entries.size", pymongo.ASCENDING)]}, 20 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.strings.emails", pymongo.ASCENDING)]}, 21 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.strings.urls", pymongo.ASCENDING)]}, 22 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.strings.domains", pymongo.ASCENDING)]}, 23 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.strings.ips", pymongo.ASCENDING)]}, 24 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.strings.interesting", pymongo.ASCENDING)]}, 25 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.exports.symbols.name", pymongo.ASCENDING)]}, 26 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.version.string_file_info.LegalCopyright", pymongo.ASCENDING)]}, 27 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.version.string_file_info.LangID", pymongo.ASCENDING)]}, 28 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.version.string_file_info.InternalName", pymongo.ASCENDING)]}, 29 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.version.string_file_info.CompanyName", pymongo.ASCENDING)]}, 30 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.version.string_file_info.ProductName", pymongo.ASCENDING)]}, 31 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.version.string_file_info.FileDescription", pymongo.ASCENDING)]}, 32 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.version.string_file_info.OriginalFilename", pymongo.ASCENDING)]}, 33 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.version.string_file_info.LegalTrademarks", pymongo.ASCENDING)]}, 34 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.certificate.certificates.serial", pymongo.ASCENDING)]}, 35 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("date", pymongo.ASCENDING)]}, 36 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.res_entries.sha1", pymongo.ASCENDING)]}, 37 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.sections.md5", pymongo.ASCENDING)]}, 38 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.sections.sha1", pymongo.ASCENDING)]}, 39 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.sections.sha2", pymongo.ASCENDING)]}, 40 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("file_id", pymongo.HASHED)]}, 41 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("hash.md5", pymongo.HASHED)]}, 42 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("hash.sha2", pymongo.HASHED)]}, 43 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("mime_type", pymongo.HASHED)]}, 44 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("hash.sha1", pymongo.HASHED)]}, 45 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.headers.optional_header.AddressOfEntryPoint", pymongo.HASHED)]}, 46 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("particular_header.version.fixed_file_info.Signature", pymongo.HASHED)]}, 47 | {"db": "DB_metadata", "coll": "meta_container", "keys": [("dynamic.sha1", pymongo.HASHED)]}, 48 | {"db": "DB_metadata", "coll": "imports_tree", "keys": [("function_name", pymongo.ASCENDING)]}, 49 | {"db": "DB_metadata", "coll": "imports_tree", "keys": [("dll_name", pymongo.ASCENDING), ("function_name", pymongo.ASCENDING)]}, 50 | {"db": "DB_metadata", "coll": "av_analysis", "keys": [("sha1", pymongo.HASHED)]}, 51 | {"db": "DB_metadata", "coll": "av_analysis", "keys": [("scans.result", pymongo.ASCENDING)]}, 52 | {"db": "DB_versions", "coll": "version_container", "keys": [("file_id", pymongo.HASHED)]}, 53 | {"db": "DB_metadata", "coll": "tasks", "keys": [("task_id", pymongo.HASHED)]}, 54 | ] 55 | 56 | 57 | def check_if_index_exist(index, list_indexes): 58 | for key, value in list_indexes.items(): 59 | if(set(index) == set(value.get('key'))): 60 | return True 61 | return False 62 | 63 | 64 | if __name__ == '__main__': 65 | db_ip = envget('metadata.host') 66 | db_port = envget('metadata.port') 67 | 68 | for index in index_list: 69 | client = pymongo.MongoClient(db_ip, db_port) 70 | db = client[index["db"]] 71 | collection = db[index["coll"]] 72 | list_of_indexes = collection.index_information() 73 | try: 74 | if(not check_if_index_exist(index["keys"], list_of_indexes)): 75 | print("Creating %s index" % index["keys"]) 76 | collection.create_index(index["keys"], sparse=True, background=False) 77 | except Exception, e: 78 | print "Exception" 79 | print str(e) 80 | print(traceback.format_exc()) 81 | -------------------------------------------------------------------------------- /yara/YaraGenerator/README.md: -------------------------------------------------------------------------------- 1 | ### Information 2 | This is a project to build a tool to attempt to allow for quick, simple, and effective yara rule creation to isolate malware families and other malicious objects of interest. This is an experiment and thus far I've had pretty good success with it. It is a work in progress and I welcome forks and feedback! 3 | 4 | To utilize this you must find a few files from a malware family, or if not executables then containing the attribute of interest, you wish to profile, (the more the better, three to four samples seems to be effective for malware, however to isolate exploits in carrier documents it often takes many more). Please note however that this tool will only be as precise as you are in choosing what you are looking for... visit http://yaragenerator.com for a webapplication version of this tool. 5 | 6 | ### Version and Updates 7 | 0.6.1 - Added logic for parsing and prioritizing strings/emails/headers from emails (must submit in .eml file in order for python library to parse it properly). Added per filetype string prioritization logic (IE include all email addresses and IP's common across emails before random words from email bodys). Due to targeted parsing, effective signatures can be built from a single email. Also boolean logic for email rules is "all of them" to allow for future variance in delivery methods. 8 | 9 | 0.6 - Refactored all of the code to allow for selectable filetype of samples (-f). This allows for entirely different signature generation for PDFs vs EXEs vs EMails. In addition to dispirate execution paths, each filetype has it's own string blacklist and regexlist to exclude unwanted things such as your gateway, usernames, @yourco.com etc. (Note: No custom per file code exists for anything beyond executables at this point, but the framework is now there) 10 | 11 | 0.5 - Added Regexes in modules/regexblacklist.txt which will remove matches from potential strings included in yara rules also added 30K strings to blacklist. Lowered hit requirment from 100 to 95% to allow more true positives from slight variants (example change of embeded C2 or UA) 12 | 13 | 0.4 - Added PEfile (http://code.google.com/p/pefile/) to extract and remove imports and functions from yara rules, added blacklist.txt to remove unwanted strings 14 | 15 | 0.3 - Added support for Tags, Unicode Wide Strings (Automatically Adds "wide" tag) 16 | 17 | 0.2 - Updated CLI and error handeling, removed hidden files, and ignored subdirectories 18 | 19 | 0.1 - Released, supports regular string extraction 20 | 21 | ### ToDo 22 | [+] Allow for scanning of benign/baseline files to automatically populate blacklists for various filetypes 23 | 24 | [+] Create custom execution paths leveraging opensource tools for various filetypes (IE email/pdf/office docs ..etc) 25 | 26 | [+] Continue to improve fidelity and flexibility of algos and underlying methodologies to generate signatures 27 | 28 | 29 | ### Example 30 | 31 | Usage is as follows with an example of a basic search + hitting all of 32 | the switches below: 33 | ``` 34 | 35 | usage: yaraGenerator.py [-h] -r RULENAME -f FILETYPE [-a AUTHOR] [-d DESCRIPTION] [-t TAGS] InputDirectory 36 | 37 | YaraGenerator 38 | 39 | positional arguments: 40 | InputDirectory Path To Files To Create Yara Rule From 41 | 42 | optional arguments: 43 | -h , --help show this help message and exit 44 | -r , --RuleName Enter A Rule/Alert Name (No Spaces + Must Start with Letter) 45 | -a , --Author Enter Author Name 46 | -d , --Description Provide a useful description of the Yara Rule 47 | -t , --Tags Apply Tags to Yara Rule For Easy Reference (AlphaNumeric) 48 | -v , --Verbose Print Finished Rule To Standard Out 49 | -f , --FileType Select Sample Set FileType choices are: unknown, exe, 50 | pdf, email, office, js-html 51 | ``` 52 | 53 | The blacklist.txt file in the /modules directory allows entry of one string per line, these strings will never appear in a rule generated by YaraGenerator. 54 | 55 | The regexblacklist.txt in the /modules directory allows entry of one Regular Expression per line. * Remember to use ^ and $ for the begining and end of a string if you wish to match exactly* YaraGenerator will disqualify any string which hits on any regex in the list from input into a Yara Rule. 56 | 57 | Example for a Specific Family of APT1 Malware: 58 | 59 | ``` 60 | python yaraGenerator.py ../greencat/ -r Win_Trojan_APT1_GreenCat -a "Chris Clark" -d "APT Trojan Comment Panda" -t "APT" -f "exe" 61 | 62 | [+] Generating Yara Rule Win_Trojan_APT1_GreenCat from files located in: ../greencat/ 63 | 64 | [+] Yara Rule Generated: Win_Trojan_APT1_GreenCat.yar 65 | 66 | [+] Files Examined: ['871cc547feb9dbec0285321068e392b8', '6570163cd34454b3d1476c134d44b9d9', '57e79f7df13c0cb01910d0c688fcd296'] 67 | [+] Author Credited: Chris Clark 68 | [+] Rule Description: APT Trojan Comment Panda 69 | [+] Rule Tags: APT 70 | 71 | [+] YaraGenerator (C) 2013 Chris@xenosec.org https://github.com/Xen0ph0n/YaraGenerator 72 | ``` 73 | Resulting Yara Rules: 74 | ``` 75 | rule Win_Trojan_APT_APT1_Greencat : APT 76 | { 77 | meta: 78 | author = "Chris Clark" 79 | date = "2013-06-04" 80 | description = "APT Trojan Comment Crew Greencat" 81 | hash0 = "57e79f7df13c0cb01910d0c688fcd296" 82 | hash1 = "871cc547feb9dbec0285321068e392b8" 83 | hash2 = "6570163cd34454b3d1476c134d44b9d9" 84 | sample_filetype = "exe" 85 | yaragenerator = "https://github.com/Xen0ph0n/YaraGenerator" 86 | strings: 87 | $string0 = "Ramdisk" 88 | $string1 = "Cache-Control:max-age" 89 | $string2 = "YYSSSSS" 90 | $string3 = "\\cmd.exe" 91 | $string4 = "Translation" wide 92 | $string5 = "CD-ROM" 93 | $string6 = "Mozilla/5.0" 94 | $string7 = "Volume on this computer:" 95 | $string8 = "pidrun" 96 | $string9 = "3@YAXPAX@Z" 97 | $string10 = "SMAgent.exe" wide 98 | $string11 = "Shell started successfully" 99 | $string12 = "Content-Length: %d" 100 | $string13 = "t4j SV3" 101 | $string14 = "Program started" 102 | $string15 = "Started already," 103 | $string16 = "SoundMAX service agent" wide 104 | condition: 105 | 16 of them 106 | } 107 | 108 | 109 | ``` 110 | ### Results 111 | 112 | GreenCat Rule: 113 | 114 | ``` 115 | 100% Hits on Test Samples: 116 | 117 | $ yara -rg Trojan_Win_GreenCat.yar greencat/ 118 | Trojan_Win_GreenCat [APT] ../greencat//8bf5a9e8d5bc1f44133c3f118fe8ca1701d9665a72b3893f509367905feb0a00 119 | Trojan_Win_GreenCat [APT] ../greencat//c196cac319e5c55e8169b6ed6930a10359b3db322abe8f00ed8cb83cf0888d3b 120 | Trojan_Win_GreenCat [APT] ../greencat//c23039cf2f859e659e59ec362277321fbcdac680e6d9bc93fc03c8971333c25e 121 | 122 | 100% True Positives On Other Samples In the APT1 Cadre which were detected as Green Cat By Other Yara Rules: 123 | 124 | $ yara -r Trojan_Win_GreenCat.yar .Win_Trojan_APT1_GreenCat [APT] ../../MalwareSamples/APT1Malware//1877a5d2f9c415109a8ac323f43be1dc10c546a72ab7207a96c6e6e71a132956 125 | Win_Trojan_APT1_GreenCat [APT] ../../MalwareSamples/APT1Malware//20ed6218575155517f19d4ce46a9addbf49dcadb8f5d7bd93efdccfe1925c7d0 126 | Win_Trojan_APT1_GreenCat [APT] ../../MalwareSamples/APT1Malware//4144820d9b31c4d3c54025a4368b32f727077c3ec253753360349a783846747f 127 | Win_Trojan_APT1_GreenCat [APT] ../../MalwareSamples/APT1Malware//4487b345f63d20c6b91eec8ee86c307911b1f2c3e29f337aa96a4a238bf2e87c 128 | Win_Trojan_APT1_GreenCat [APT] ../../MalwareSamples/APT1Malware//8bf5a9e8d5bc1f44133c3f118fe8ca1701d9665a72b3893f509367905feb0a00 129 | Win_Trojan_APT1_GreenCat [APT] ../../MalwareSamples/APT1Malware//c196cac319e5c55e8169b6ed6930a10359b3db322abe8f00ed8cb83cf0888d3b 130 | Win_Trojan_APT1_GreenCat [APT] ../../MalwareSamples/APT1Malware//c23039cf2f859e659e59ec362277321fbcdac680e6d9bc93fc03c8971333c25e 131 | Win_Trojan_APT1_GreenCat [APT] ../../MalwareSamples/APT1Malware//f76dd93b10fc173eaf901ff1fb00ff8a9e1f31e3bd86e00ff773b244b54292c5 132 | 133 | 100% True Negatives on clean files: 134 | 135 | $ yara -r Trojan_Win_GreenCat.yar ../../CleanFiles/ 136 | 137 | ``` 138 | 139 | ### Author & License 140 | 141 | YaraGenerator is copyrighted by Chris Clark 2013. Contact me at Chris@xenosys.org 142 | 143 | YaraGenerator is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. 144 | YaraGenerator is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 145 | 146 | You should have received a copy of the GNU General Public License along with YaraGenerator. If not, see http://www.gnu.org/licenses/. 147 | 148 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **Codex Gigas** malware DNA profiling search engine discovers malware patterns and characteristics assisting individuals who are attracted in malware hunting. 2 | 3 | ![img](doc/47-preview.png?raw=true) 4 | 5 | # Codex Gigas 6 | Codex Gigas is a malware profiling search engine that allows malware hunters and analysts to truly interrogate the internals of malware and perform searches over a large number of file characteristics. For instance, instead of relying on file-level hashes, we can compute other features such as imported functions, strings, constants, file segments, code regions, or anything that is defined in the file type specification, and that provides us with more than 142 possible searchable patterns, that can be combined. 7 | 8 | Read the [user guide](doc/user-guide.md) to learn how it works. 9 | 10 | ## Contents 11 | * [Configuration (optional)](#configuration-optional) 12 | * [MongoDB path](#mongodb-path) 13 | * [VirusTotal](#virustotal) 14 | * [Installation](#installation) 15 | * [Method 1: Download ready-to-use virtual machines](#method-1-download-ready-to-use-virtual-machines) 16 | * [Method 2: Installation without virtual machines](#method-2-installation-without-virtual-machines) 17 | * [Method 3: Manually build Codex Gigas Community Version](#method-3-manually-build-codex-gigas-community-version) 18 | * [Load files](#load-files) 19 | * [Method 1: files_to_load folder](#method-1-files_to_load-folder) 20 | * [Method 2: via upload API](#method-2-via-upload-api) 21 | * [Method 3: via loadFile python script.](#method-3-via-loadfile-python-script) 22 | * [APT-notes samples](#apt-notes-samples) 23 | * [Logs](#logs) 24 | * [Development](#development) 25 | * [Codex Gigas Thanks](#codex-gigas-thanks) 26 | * [License](#license) 27 | 28 | 29 | ## Configuration (optional) 30 | ### MongoDB path 31 | The default path of the Mongo database is the parent folder of ```codex-backend```. If you want to change that edit the following line of ```pull-install.yml``` and ```docker-compose.yml``` before installation: 32 | ``` 33 | - ../mongo-data/:/data/db 34 | ``` 35 | 36 | ### VirusTotal 37 | VirusTotal is used for retrieving antivirus results at request for a file or downloading new files. You can add one private API key, and one public API key. The private API key will only be used when required (downloading samples). Public API key will be used for downloading Antivirus scans. You can add your own [VirusTotal API keys](https://www.virustotal.com/es-ar/documentation/public-api/) in ```src/config/secrets.py```. Then you should restart the container: 38 | ``` 39 | sudo docker-compose up 40 | ``` 41 | ## Installation 42 | There are three ways to install CodexGigas Community version. We have ready-to-use Virtual Machines (easiest way). We also provide the docker pre-built images, so you can use CodexGigas without virtual machines. Lastly, you can manually build Codex Gigas docker images. 43 | ### Method 1: Download ready-to-use virtual machines 44 | You can download your preferred vm file. Inside you'll find Codex Gigas running at startup on ```http://127.0.0.1:6100```. 45 | * [VMware](https://www.dropbox.com/s/9qn13x9d8eegpgr/codex_vmware.zip?dl=0) (sha1: 9C6B3F8F37C8BD119E5C8A07050CB28C1A7E2DF3) 46 | * [VirtualBox](https://www.dropbox.com/s/a6hxhkjpa8a3ek0/codex_vtbox.ova?dl=0) (sha1: 8289A8BEAF2D75A6D2B4E80ADEB943A806E26373) 47 | 48 | VMs password: codex 49 | 50 | ### Method 2: Installation without virtual machines 51 | First install [docker](https://www.docker.com) and [docker-compose](https://docs.docker.com/compose/), then: 52 | ``` 53 | mkdir codexgigas && cd codexgigas 54 | git clone https://github.com/codexgigassys/codex-backend 55 | cd codex-backend 56 | sudo docker-compose -f pull-install.yml up 57 | ``` 58 | This will download the pre-built docker images (about 2GB) and start them up. 59 | The next time you want to stop/start the containers: 60 | ``` 61 | sudo docker-compose stop 62 | sudo docker-compose start 63 | ``` 64 | ### Method 3: Manually build Codex Gigas Community Version 65 | If you don't want to use a Virtual Machine, you can manually install Codex Gigas on your system. 66 | First install [docker](https://www.docker.com) and [docker-compose](https://docs.docker.com/compose/), then: 67 | ``` 68 | git clone https://github.com/codexgigassys/codex-backend 69 | git clone https://github.com/codexgigassys/codex-frontend 70 | cd codex-backend/ 71 | # if you want to use a DB on a different host, copy default_config and edit it (optional) 72 | cp src/config/default_config.py src/config/secrets.py 73 | sudo docker-compose up 74 | ``` 75 | The next time you want to stop/start the containers: 76 | ``` 77 | sudo docker-compose stop 78 | sudo docker-compose start 79 | ``` 80 | If everything goes well, Codex Gigas should be up and running on ```http://127.0.0.1:6100```. 81 | 82 | ## Load files 83 | ### Method 1: files_to_load folder 84 | To load files on a mass scale, drop them to ```files_to_load``` folder and execute the following command: 85 | ``` 86 | curl http://127.0.0.1:4500/api/v1/load_to_mongo 87 | ``` 88 | 89 | ### Method 2: via upload API 90 | Is possible to upload a file via POST 91 | ``` 92 | curl -F file="@/home/user/somefile.exe" http://127.0.0.1:4500/api/v1/file/add 93 | ``` 94 | 95 | You can upload all the files under a folder with find+curl 96 | ``` 97 | find . -type f -exec curl -F file="@{}" http://127.0.0.1:4500/api/v1/file/add \; 98 | ``` 99 | 100 | ### Method 3: via loadFile python script. 101 | 102 | 103 | 104 | 105 | 106 | ## APT-notes samples 107 | We have gathered 5437 [executable samples](https://www.dropbox.com/s/zhv2du99ehlmm24/APTnotes-Samples.zip?dl=0) (sha1: 6EA9BBFBB5FB0EB0D025221A522D907E6D4956A0) 108 | mentioned in APT reports over the last years. Ask for the zip password sending a DM to [CodexGigasSys twitter](https://twitter.com/codexgigassys). Source: [https://github.com/aptnotes/data](https://github.com/aptnotes/data) 109 | 110 | ## Logs 111 | From 2017-01-23, logging system has been moved from the default docker logging system, to a syslog container that uses rsyslog deamon. To view logs, cd to the codex-backend folder and execute: 112 | ```sudo docker-compose exec syslog tail -f /var/log/messages``` 113 | (change ```tail -f ``` for ```cat```, ```less``` or whatever suits your needs) 114 | 115 | ## Development 116 | Wanna contribute? Codex Gigas Community is an open, BSD-licensed, collaborative development effort that heavily relies on contributions from the whole community. We welcome tickets, pull requests, feature suggestions and bug fixing. 117 | 118 | When developing new modules or patches, please try to comply to the general code style that we try to maintain across the project. When introducing new features or fixing significant bugs, please also include a summary and possibly also introduce comprehensive documentation in our guide. 119 | If you want to debug the app it will be easier starting it as: 120 | ``` 121 | sudo docker-compose run --service-ports --rm api 122 | ``` 123 | This way the app does not run in the background and you can use ```embed()``` from [IPython](https://en.wikipedia.org/wiki/IPython) 124 | 125 | Codex Gigas extracts the file metadata via plugins. Each plugin receives a file and returns a python dictionary that is saved in MongoDB. Plugins are located in ```src/PlugIns```. To add a new plugin for Windows executables create a file in ```src/PlugIns/PE/``` and add the plugin name in ```Prosessors/PEProcessor.py``` and ```PlugIns/PE/__init__.py```. 126 | 127 | ## Codex Gigas Thanks 128 | We would like to thanks the authors of the following tools, coming from other projects: 129 | 130 | #### Projects 131 | * yarGen (Florian Roth) https://github.com/Neo23x0/yarGen 132 | * pefile (Ero Carrera) https://github.com/erocarrera/pefile 133 | * ssdeep (jollheef) https://github.com/jollheef/ssdeep 134 | 135 | ## License 136 | Copyright (c) 2016 Deloitte Argentina 137 | 138 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 139 | 140 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 141 | 142 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 143 | -------------------------------------------------------------------------------- /src/Launcher.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Deloitte Argentina. 2 | # This file is part of CodexGigas - https://github.com/codexgigassys/ 3 | # See the file 'LICENSE' for copying permission. 4 | # Funciones para realizar los analisis 5 | import os 6 | import time 7 | from czipfile import ZipFile 8 | from Cataloger import Cataloger 9 | from Processors.ProcessorFactory import * 10 | from PackageControl.PackageController import * 11 | from VersionControl.VersionController import * 12 | from MetaControl.MetaController import * 13 | from Utils.TimeLogger import TimeLogger 14 | from Sample import * 15 | import logging 16 | from env import envget 17 | from pymongo import MongoClient 18 | import gridfs 19 | from Utils.test import test 20 | import time 21 | 22 | 23 | class Launcher(): 24 | 25 | def __init__(self): 26 | formato = '[%(asctime)-15s][%(levelname)s] %(message)s' 27 | path = os.path.abspath(os.path.dirname(os.path.abspath(__file__))) 28 | logfile = os.path.join(path, "launcher.log") 29 | logging.basicConfig( 30 | format=formato, filename=logfile, level=logging.INFO) 31 | 32 | self.vc = VersionController() 33 | self.pc = PackageController() 34 | self.mdc = MetaController() 35 | 36 | def launchOnlyHashingByID(self, sample): 37 | sample.setPackageController(self.pc) 38 | sample.setMetaController(self.mdc) 39 | sample.setVersionController(self.vc) 40 | category = sample.getCategory() 41 | if(category is None): 42 | category = Cataloger().catalog(sample.getBinary()) 43 | logging.debug( 44 | "Category not found in DB, categorized as %s", str(category)) 45 | else: 46 | logging.debug( 47 | "Category found in DB, categorized as %s", str(category)) 48 | processor = ProcessorFactory().getHashProcessor(category, sample) 49 | result_dic = processor.process() 50 | result_version = processor.getVersion() 51 | 52 | if(len(result_version) > 0): 53 | logging.debug("Updating metadata") 54 | 55 | if(self.mdc.write(sample.getID(), result_dic) != 0): 56 | logging.error( 57 | "Error writing Metadata to DB, sample:%s", sample.getID()) 58 | return -1 59 | logging.debug("Metadata writed in DB") 60 | 61 | self.vc.updateVersion(sample.getID(), result_version) 62 | logging.debug("Versions writed to DB") 63 | else: 64 | 65 | logging.debug("Nothing to update") 66 | 67 | logging.debug("Analysis Finished OK") 68 | return 0 69 | 70 | def launchAnalysisByID(self, sample): 71 | logging.debug("Launching Analysis on sample:%s", sample.getID()) 72 | sample.setPackageController(self.pc) 73 | sample.setMetaController(self.mdc) 74 | sample.setVersionController(self.vc) 75 | 76 | category = sample.getCategory() 77 | if(category is None): 78 | category = Cataloger().catalog(sample.getBinary()) 79 | logging.debug( 80 | "Category not found in DB, categorized as %s", str(category)) 81 | else: 82 | logging.debug( 83 | "Category found in DB, categorized as %s", str(category)) 84 | 85 | processor = ProcessorFactory().createProcessor(category, sample) 86 | result_dic = processor.process() 87 | result_version = processor.getVersion() 88 | 89 | if(len(result_version) > 0): 90 | logging.debug("Updating metadata") 91 | 92 | if(self.mdc.write(sample.getID(), result_dic) != 0): 93 | logging.error( 94 | "Error writing Metadata to DB, sample:%s", sample.getID()) 95 | return -1 96 | logging.debug("Metadata writed in DB") 97 | 98 | self.vc.updateVersion(sample.getID(), result_version) 99 | logging.debug("Versions writed to DB") 100 | else: 101 | 102 | logging.debug("Nothing to update") 103 | 104 | logging.debug("Analysis Finished OK") 105 | return 0 106 | 107 | 108 | # ****************TEST_CODE****************** 109 | 110 | 111 | def testCode(): 112 | from Utils.Functions import recursive_read 113 | object = "./Test_files/" 114 | files = recursive_read(object) 115 | if(files is None): 116 | sys.exit() 117 | lc = Launcher() 118 | for fp in files: 119 | fd = open(fp, 'r') 120 | data = fd.read() 121 | file_id = hashlib.sha1(data).hexdigest() 122 | print("%s %s" % (fp, file_id)) 123 | lc.launchFileAnalitics((fp, data)) 124 | print("") 125 | print("") 126 | 127 | # ----------------------------------------------- 128 | 129 | 130 | def testCode2(): 131 | object = "../processed/VirusShare_00000.zip" 132 | # opening zipped package 133 | fd = open(object, 'r') 134 | zf = ZipFile(fd) 135 | names = zf.namelist() # name of compressed files 136 | 137 | lc = Launcher() 138 | count = 0 139 | reset = 0 140 | for filename in names: 141 | # print(filename) 142 | data = zf.read(filename, "infected") 143 | lc.launchFileAnalitics((filename, data)) 144 | reset += 1 145 | count += 1 146 | if(reset >= 1000): 147 | print(str(count) + " processed") 148 | reset = 0 149 | print(str(count) + " processed") 150 | 151 | # ---------------------------------------------- 152 | 153 | 154 | def testCode3(): 155 | object = "../DB/packages/fileindex" 156 | # opening the index 157 | fd = open(object, 'r') 158 | lc = Launcher() 159 | count = 0 160 | reset = 0 161 | while True: 162 | # start=time.time() 163 | rl = fd.readline() 164 | if(rl == ""): 165 | break 166 | data = rl.strip().split('|') 167 | # print(data) 168 | fd2 = open("../DB/packages/" + 169 | str(data[1]) + "/p" + str(data[2]) + ".index") 170 | fd2.seek(int(data[3])) 171 | rl2 = fd2.readline() 172 | data1 = rl2.strip().split('|') 173 | # print(data1) 174 | fd3 = open("../DB/packages/" + 175 | str(data[1]) + "/p" + str(data[2]) + ".paq") 176 | fd3.seek(int(data1[1])) 177 | datafin = fd3.read(int(data1[2])) 178 | # end=time.time() 179 | # print("search :"+str((end-start)*10000)) 180 | # start=time.time() 181 | lc.launchFileAnalitics((data[0], datafin)) 182 | # end=time.time() 183 | # print("analize :"+str((end-start)*10000)) 184 | # print("") 185 | reset += 1 186 | count += 1 187 | if(reset >= 1000): 188 | print(str(count) + " processed") 189 | reset = 0 190 | 191 | print(str(count) + " processed") 192 | 193 | # ---------------------------------------------- 194 | 195 | 196 | def testCode4(): 197 | inicio = 10569000 198 | client = MongoClient(envget('files.host'), envget('files.port')) 199 | db = client[envget('db_files_name')] 200 | fs = gridfs.GridFS(db) 201 | res = fs.find(timeout=False).skip(inicio) 202 | lc = Launcher() 203 | count = inicio 204 | reset = 0 205 | for f in res: 206 | data = f.read() 207 | # print(f.filename,count) 208 | lc.launchFileAnalitics((f.filename, data)) 209 | reset += 1 210 | count += 1 211 | if(reset >= 1000): 212 | print(str(count) + " processed") 213 | reset = 0 214 | print(str(count) + " processed") 215 | 216 | # ---------------------------------------------- 217 | 218 | 219 | def testCode5(): 220 | lc = Launcher() 221 | sample = Sample() 222 | sample.setID("0358ab4e8595db846b709cf85d7b397d92230bef") 223 | # sample.setID("223e8761fbb93458140a3592096109501927ff64") 224 | sample.setStorageVersion({}) 225 | lc.launchAnalysisByID(sample) 226 | # print(sample.getCalculatedMetadata().getData()) 227 | # print(sample.getCalculatedVersion()) 228 | # print(sample.getStorageVersion()) 229 | 230 | # ---------------------------------------------- 231 | 232 | 233 | def testCode6(): 234 | inicio = 0 235 | client = MongoClient(envget('files.host'), envget('files.port')) 236 | db = client[envget('db_files_name')] 237 | fs = gridfs.GridFS(db) 238 | res = fs.find(timeout=False).skip(inicio) 239 | lc = Launcher() 240 | count = inicio 241 | reset = 0 242 | start = time.time() 243 | first = True 244 | for f in res: 245 | sam_id = f.filename 246 | sample = Sample() 247 | sample.setID(sam_id) 248 | sample.setStorageVersion({}) 249 | lc.launchAnalysisByID(sample) 250 | reset += 1 251 | count += 1 252 | if(reset >= 1000): 253 | print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()) 254 | ) + " processed:" + str(count / 1000) + "K") 255 | reset = 0 256 | print(str(count) + " processed") 257 | 258 | 259 | # ****************TEST_EXECUTE****************** 260 | 261 | 262 | test("-test_Launcher", testCode6) 263 | --------------------------------------------------------------------------------