├── .gitignore ├── LICENSE.txt ├── README.md ├── codox ├── css │ ├── default.css │ └── highlight.css ├── euphony.commands.analyzers.html ├── euphony.commands.clusterer.html ├── euphony.commands.importer.html ├── euphony.commands.parser.html ├── euphony.commands.sub.features.html ├── euphony.commands.sub.heuristics.html ├── euphony.commits.html ├── euphony.components.datomic.html ├── euphony.core.html ├── euphony.functions.counters.html ├── euphony.functions.metrics.html ├── euphony.functions.voters.html ├── euphony.queries.html ├── euphony.structs.cograph.html ├── euphony.structs.graph.html ├── euphony.structs.label.html ├── euphony.structs.pqueue.html ├── euphony.system.html ├── euphony.tasks.html ├── euphony.utils.cli.html ├── euphony.utils.db.html ├── euphony.utils.io.html ├── euphony.utils.log.html ├── index.html └── js │ ├── highlight.min.js │ ├── jquery.min.js │ └── page_effects.js ├── project.clj ├── resources ├── english.dict ├── schema.edn ├── seeds-min.edn ├── seeds.edn └── sources.txt ├── src └── euphony │ ├── commands │ ├── analyzers.clj │ ├── clusterer.clj │ ├── importer.clj │ ├── parser.clj │ └── sub │ │ ├── features.clj │ │ └── heuristics.clj │ ├── commits.clj │ ├── components │ └── datomic.clj │ ├── core.clj │ ├── functions │ ├── counters.clj │ ├── metrics.clj │ └── voters.clj │ ├── queries.clj │ ├── structs │ ├── cograph.clj │ ├── graph.clj │ ├── label.clj │ └── pqueue.clj │ ├── system.clj │ ├── tasks.clj │ └── utils │ ├── cli.clj │ ├── db.clj │ ├── io.clj │ └── log.clj ├── target └── uberjar │ ├── euphony.jar │ └── lib-euphony.jar └── test ├── data ├── cluster-datoms.edn ├── import-datoms.edn ├── parse-datoms.edn ├── reports.vt ├── results-datoms.edn └── truths.gt └── euphony ├── commands ├── analyzers_test.clj ├── clusterer_test.clj ├── importer_test.clj ├── parser_test.clj └── sub │ └── heuristics_test.clj ├── functions ├── counters_test.clj ├── metrics_test.clj └── voters_test.clj ├── queries_test.clj ├── structs ├── cograph_test.clj ├── graph_test.clj ├── label_test.clj └── pqueue_test.clj ├── test_helpers.clj └── test_system.clj /.gitignore: -------------------------------------------------------------------------------- 1 | /target/* 2 | !/target/uberjar/ 3 | classes/ 4 | stale/ 5 | /checkouts 6 | pom.xml 7 | pom.xml.asc 8 | *.class 9 | /.lein-* 10 | /.nrepl-port 11 | /.nrepl-history 12 | .hgignore 13 | .hg/ 14 | 15 | OLD/ 16 | scripts/ 17 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | https://opensource.org/licenses/LGPL-3.0 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # What is Euphony ? 2 | 3 | __Euphony is a unifier of malware labels__. 4 | 5 | From a list of [VirusTotal](https://www.virustotal.com/) reports, Euphony can parse malware labels and produce a single family per file. 6 | 7 | # Installation 8 | 9 | Euphony is available both as [a single jar](https://github.com/fmind/euphony/releases) and [from 10 | sources](https://github.com/fmind/euphony/). 11 | 12 | For end users, the single jar is recommended. 13 | 14 | # Usage 15 | 16 | $ java -jar euphony.jar [args] 17 | 18 | ## Options 19 | 20 | * -h, --help: Display a help summary with acceptable arguments and options. 21 | * -l, --log-level LEVEL: Set the log level of the program (default: warn) 22 | * -m, --max-turn VALUE: Set the maximum number of complete iteration for inference at the parsing stage. 23 | * -t, --threshold VALUE: Set the threshold value for the trimming operation at the clustering stage. 24 | * -e, --export-dir DIR: Set the output directory of the program (default: current directory) 25 | * -f, --field FIELD: Set the label field to cluster and export (from: type, platform, family, 26 | default: family) 27 | * -r, --reports-file FILE: Provide a sequence of reports from VirusTotal formatted as JSON records 28 | (one per line). 29 | * -g, --ground-file FILE: Provide a ground-truth to evaluate the output formatted as JSON records. 30 | * -s, --seeds-file FILE: Provide a seeds file with some initial domain knowledge about malware 31 | formatted as an EDN structure 32 | (default: resources/seed-max.edn). 33 | * -d, --database-uri: URI Provide a database URI to run the program and persist the learning 34 | (default: no persistence). 35 | * -A, --export-all: export every information below 36 | * -E, --export-election: field frequency per malware signature 37 | * -O, --export-proposed: best candidate per malware signature 38 | * -P, --export-parse-rules: associations between label and field 39 | * -T, --export-parse-mapping: tokenization of malware labels 40 | * -V, --export-vendor-reports: output dataset after parsing 41 | * -G, --export-cluster-graph: output graph after clustering 42 | * -C, --export-cluster-rules: associations between raw field and clustered field 43 | * -D, --export-cluster-mapping: clustering of malware fields 44 | * -R, --export-cluster-reports: output dataset after clustering 45 | * -M, --export-malstats: statistics about malware files 46 | * -F, --export-famstats: statistics about malware families 47 | 48 | # Examples 49 | 50 | $ java -jar euphony.jar -e output-dir/ -r reports.vt -CPEO 51 | 52 | $ java -jar euphony.jar -e output-dir/ -r reports.vt -t 0.05 -CPEO 53 | 54 | $ java -jar euphony.jar -e output-dir/ -r reports.vt -f type -CPEO 55 | 56 | $ java -jar euphony.jar -e output-dir/ -r reports.vt -g truths.gt -CPEOMF 57 | 58 | ## Report file (with two items) 59 | 60 | {"positives": 2, "resource": "5e82d73a3b2d4df192d674729f9578c4081d5096d5e3641bf8b233e1bee248d4", "verbose_msg": "Scan finished, information embedded", "scans": {"NANO-Antivirus": {"result": null, "version": "1.0.38.8984", "detected": false, "update": "20160713"}, "AVware": {"result": "Trojan.AndroidOS.Generic.A", "version": "1.5.0.42", "detected": true, "update": "20160713"}, "ESET-NOD32": {"result": "Android/Adrd.A", "version": "13792", "detected": true, "update": "20160712"}}, "sha1": "09b143b430e836c513279c0209b7229a4d29a18c", "total": 55, "scan_id": "5e82d73a3b2d4df192d674729f9578c4081d5096d5e3641bf8b233e1bee248d4-1468430330", "permalink": "https://www.virustotal.com/file/5e82d73a3b2d4df192d674729f9578c4081d5096d5e3641bf8b233e1bee248d4/analysis/1468430330/", "sha256": "5e82d73a3b2d4df192d674729f9578c4081d5096d5e3641bf8b233e1bee248d4", "scan_date": "2016-07-13 17:18:50", "md5": "c05c25b769919fd7f1b12b4800e374b5", "response_code": 1} 61 | 62 | 63 | {"positives": 1, "resource": "2357651f3d15838330368dacf37252f1ff2362ce7fd84d42c175c4f3b65a8d8d", "verbose_msg": "Scan finished, information embedded", "scans": {"Tencent": {"result": "a.remote.adrd", "version": "1.0.0.1", "detected": true, "update": "20160707"}}, "sha1": "32cd5dbef434b926ce34e89f0d185fe8d1b5fdfb", "total": 54, "scan_id": "2357651f3d15838330368dacf37252f1ff2362ce7fd84d42c175c4f3b65a8d8d-1467894540", "permalink": "https://www.virustotal.com/file/2357651f3d15838330368dacf37252f1ff2362ce7fd84d42c175c4f3b65a8d8d/analysis/1467894540/", "sha256": "2357651f3d15838330368dacf37252f1ff2362ce7fd84d42c175c4f3b65a8d8d", "scan_date": "2016-07-07 12:29:00", "md5": "39c1bfbb62687e1b1d2bc4d273600448", "response_code": 1} 64 | 65 | ## Ground-truth file (with two items) 66 | 67 | {"resource": "f63256cf4eef0a60fe56989b1474dd9b0b2bb580ce9fd262b18592bf0506f911", "name": "Adwo", "type": "adware", "platform": "android"} 68 | 69 | 70 | {"resource": "a9cbe3e3d446cea683c1e72f2994f40024afed1bb1186b27690ff21741046312", "name": "Dowgin", "type": "trojan", "platform": "linux"} 71 | -------------------------------------------------------------------------------- /codox/css/highlight.css: -------------------------------------------------------------------------------- 1 | /* 2 | github.com style (c) Vasily Polovnyov 3 | */ 4 | 5 | .hljs { 6 | display: block; 7 | overflow-x: auto; 8 | padding: 0.5em; 9 | color: #333; 10 | background: #f8f8f8; 11 | } 12 | 13 | .hljs-comment, 14 | .hljs-quote { 15 | color: #998; 16 | font-style: italic; 17 | } 18 | 19 | .hljs-keyword, 20 | .hljs-selector-tag, 21 | .hljs-subst { 22 | color: #333; 23 | font-weight: bold; 24 | } 25 | 26 | .hljs-number, 27 | .hljs-literal, 28 | .hljs-variable, 29 | .hljs-template-variable, 30 | .hljs-tag .hljs-attr { 31 | color: #008080; 32 | } 33 | 34 | .hljs-string, 35 | .hljs-doctag { 36 | color: #d14; 37 | } 38 | 39 | .hljs-title, 40 | .hljs-section, 41 | .hljs-selector-id { 42 | color: #900; 43 | font-weight: bold; 44 | } 45 | 46 | .hljs-subst { 47 | font-weight: normal; 48 | } 49 | 50 | .hljs-type, 51 | .hljs-class .hljs-title { 52 | color: #458; 53 | font-weight: bold; 54 | } 55 | 56 | .hljs-tag, 57 | .hljs-name, 58 | .hljs-attribute { 59 | color: #000080; 60 | font-weight: normal; 61 | } 62 | 63 | .hljs-regexp, 64 | .hljs-link { 65 | color: #009926; 66 | } 67 | 68 | .hljs-symbol, 69 | .hljs-bullet { 70 | color: #990073; 71 | } 72 | 73 | .hljs-built_in, 74 | .hljs-builtin-name { 75 | color: #0086b3; 76 | } 77 | 78 | .hljs-meta { 79 | color: #999; 80 | font-weight: bold; 81 | } 82 | 83 | .hljs-deletion { 84 | background: #fdd; 85 | } 86 | 87 | .hljs-addition { 88 | background: #dfd; 89 | } 90 | 91 | .hljs-emphasis { 92 | font-style: italic; 93 | } 94 | 95 | .hljs-strong { 96 | font-weight: bold; 97 | } 98 | -------------------------------------------------------------------------------- /codox/euphony.commands.clusterer.html: -------------------------------------------------------------------------------- 1 | 3 | euphony.commands.clusterer documentation

euphony.commands.clusterer

FEATURES

results-clusters

(results-clusters graph & [{:keys [threshold], :or {threshold THRESHOLD}}])
Cluster a graph of antivirus results into named groups.
4 | 

results-graph

(results-graph results-seq & [{:keys [features weighter], :or {features FEATURES, weighter WEIGHTER}}])
Construct a graph of antivirus results with the given features and weighter.
5 | 

THRESHOLD

WEIGHTER

-------------------------------------------------------------------------------- /codox/euphony.commands.parser.html: -------------------------------------------------------------------------------- 1 | 3 | euphony.commands.parser documentation

euphony.commands.parser

HEURISTICS

MAX-TURN

mem-setter

(mem-setter heuristic)
Construct a memory setter to change token fields using a commit function.
4 | 

parse

(parse conn labels & [{:keys [heuristics max-turn], :or {heuristics HEURISTICS, max-turn MAX-TURN}}])
Parse labels using a knowledge database and return a mapping of token assignments per label.
5 | 

pqueue

(pqueue f labels)
Construct a queue from a list of labels and a priority function.
6 | 

priority

(priority turn [label tokens :as entry])
Compute a priority from a turn and an entry.
7 | 
-------------------------------------------------------------------------------- /codox/euphony.commands.sub.features.html: -------------------------------------------------------------------------------- 1 | 3 | euphony.commands.sub.features documentation

euphony.commands.sub.features

imbalance

(imbalance cograph [head tail :as edge])
Compute the imbalance between two av-labels (i.e. the ratio between their occurrences).
4 | 

incomplete

(incomplete cograph [head tail :as edge])
Compute the incompleteness between two av-labels (i.e. excluded elements in the smallest set)
5 | 

ldistance

(ldistance cograph [[av-head label-head] [av-tail label-tail] :as edge])
Compute the label distance between two av-labels.
6 | 
-------------------------------------------------------------------------------- /codox/euphony.components.datomic.html: -------------------------------------------------------------------------------- 1 | 3 | euphony.components.datomic documentation

euphony.components.datomic

install-schema

(install-schema conn schema-file)

install-seeds

(install-seeds conn seeds-file)

new-datomic

(new-datomic conf)
-------------------------------------------------------------------------------- /codox/euphony.core.html: -------------------------------------------------------------------------------- 1 | 3 | euphony.core documentation

euphony.core

-main

(-main & args)

OPTIONS

system-conf

(system-conf options)

USAGE

-------------------------------------------------------------------------------- /codox/euphony.functions.metrics.html: -------------------------------------------------------------------------------- 1 | 3 | euphony.functions.metrics documentation

euphony.functions.metrics

set-completeness

(set-completeness a b inter)
Compute the completeness of two sets given their cardinality and intersection.
4 | 

set-granularity

(set-granularity a b)
Compute the granularity of two sets given their cardinality.
5 | 

str-similarity

-------------------------------------------------------------------------------- /codox/euphony.functions.voters.html: -------------------------------------------------------------------------------- 1 | 3 | euphony.functions.voters documentation

euphony.functions.voters

elect

(elect votes-index)
Select the most occurring item per entry through majority voting.
4 | When there is no majority, select the most occurring item globally.

index-elected

(index-elected global-votes)

index-votes

(index-votes)(index-votes index entry items)

merge-indexes

(merge-indexes)(merge-indexes & indexes)

merge-votes

(merge-votes)(merge-votes & votes)

vote

(vote index)
Compute the frequency of items per index entry.
5 | 

vote-and-elect

-------------------------------------------------------------------------------- /codox/euphony.structs.cograph.html: -------------------------------------------------------------------------------- 1 | 3 | euphony.structs.cograph documentation

euphony.structs.cograph

co-occur

cograph

(cograph sequences)
Construct a co-occurrence graph from a list of sequences.
4 | 

occur

-------------------------------------------------------------------------------- /codox/euphony.structs.pqueue.html: -------------------------------------------------------------------------------- 1 | 3 | euphony.structs.pqueue documentation

euphony.structs.pqueue

pqueue

(pqueue elements priorities)(pqueue associations)
Construct a priority queue from a list of elements and priorities.
4 | 
-------------------------------------------------------------------------------- /codox/euphony.system.html: -------------------------------------------------------------------------------- 1 | 3 | euphony.system documentation

euphony.system

CONF

new-system

(new-system conf)

start

stop

with-system

macro

(with-system [binding conf] & body)
-------------------------------------------------------------------------------- /codox/euphony.utils.cli.html: -------------------------------------------------------------------------------- 1 | 3 | euphony.utils.cli documentation

euphony.utils.cli

argc=-error

(argc=-error argc)
Build an error message about the number of command-line arguments.
4 | 

error-message

(error-message errors)
Build a single error message from a list of errors.
5 | 

feedback!

(feedback! usage options)(feedback! usage options errors)
Display a feedback message to the user.
6 | 

parse

(parse args options & {:keys [argc=]})
Parse a list of command line arguments given the possible options.
7 | 

usage-message

(usage-message usage options)
Build a usage message from a usage summary and an option summary.
8 | 
-------------------------------------------------------------------------------- /codox/euphony.utils.db.html: -------------------------------------------------------------------------------- 1 | 3 | euphony.utils.db documentation

euphony.utils.db

db

entity

pull

q

transact

(transact conn tx-data)

with

-------------------------------------------------------------------------------- /codox/euphony.utils.io.html: -------------------------------------------------------------------------------- 1 | 3 | euphony.utils.io documentation

euphony.utils.io

filepath

(filepath dir file)

json-line->struct

mkdir!

(mkdir! dir)

read-edn!

(read-edn! path)

read-json!

(read-json! path)

write-dot!

(write-dot! path graph)

write-json!

(write-json! path structure)
-------------------------------------------------------------------------------- /codox/euphony.utils.log.html: -------------------------------------------------------------------------------- 1 | 3 | euphony.utils.log documentation

euphony.utils.log

LEVELS

log

macro

(log level & args)

set-level!

with-level

macro

(with-level level & body)
-------------------------------------------------------------------------------- /codox/js/page_effects.js: -------------------------------------------------------------------------------- 1 | function visibleInParent(element) { 2 | var position = $(element).position().top 3 | return position > -50 && position < ($(element).offsetParent().height() - 50) 4 | } 5 | 6 | function hasFragment(link, fragment) { 7 | return $(link).attr("href").indexOf("#" + fragment) != -1 8 | } 9 | 10 | function findLinkByFragment(elements, fragment) { 11 | return $(elements).filter(function(i, e) { return hasFragment(e, fragment)}).first() 12 | } 13 | 14 | function scrollToCurrentVarLink(elements) { 15 | var elements = $(elements); 16 | var parent = elements.offsetParent(); 17 | 18 | if (elements.length == 0) return; 19 | 20 | var top = elements.first().position().top; 21 | var bottom = elements.last().position().top + elements.last().height(); 22 | 23 | if (top >= 0 && bottom <= parent.height()) return; 24 | 25 | if (top < 0) { 26 | parent.scrollTop(parent.scrollTop() + top); 27 | } 28 | else if (bottom > parent.height()) { 29 | parent.scrollTop(parent.scrollTop() + bottom - parent.height()); 30 | } 31 | } 32 | 33 | function setCurrentVarLink() { 34 | $('.secondary a').parent().removeClass('current') 35 | $('.anchor'). 36 | filter(function(index) { return visibleInParent(this) }). 37 | each(function(index, element) { 38 | findLinkByFragment(".secondary a", element.id). 39 | parent(). 40 | addClass('current') 41 | }); 42 | scrollToCurrentVarLink('.secondary .current'); 43 | } 44 | 45 | var hasStorage = (function() { try { return localStorage.getItem } catch(e) {} }()) 46 | 47 | function scrollPositionId(element) { 48 | var directory = window.location.href.replace(/[^\/]+\.html$/, '') 49 | return 'scroll::' + $(element).attr('id') + '::' + directory 50 | } 51 | 52 | function storeScrollPosition(element) { 53 | if (!hasStorage) return; 54 | localStorage.setItem(scrollPositionId(element) + "::x", $(element).scrollLeft()) 55 | localStorage.setItem(scrollPositionId(element) + "::y", $(element).scrollTop()) 56 | } 57 | 58 | function recallScrollPosition(element) { 59 | if (!hasStorage) return; 60 | $(element).scrollLeft(localStorage.getItem(scrollPositionId(element) + "::x")) 61 | $(element).scrollTop(localStorage.getItem(scrollPositionId(element) + "::y")) 62 | } 63 | 64 | function persistScrollPosition(element) { 65 | recallScrollPosition(element) 66 | $(element).scroll(function() { storeScrollPosition(element) }) 67 | } 68 | 69 | function sidebarContentWidth(element) { 70 | var widths = $(element).find('.inner').map(function() { return $(this).innerWidth() }) 71 | return Math.max.apply(Math, widths) 72 | } 73 | 74 | function calculateSize(width, snap, margin, minimum) { 75 | if (width == 0) { 76 | return 0 77 | } 78 | else { 79 | return Math.max(minimum, (Math.ceil(width / snap) * snap) + (margin * 2)) 80 | } 81 | } 82 | 83 | function resizeSidebars() { 84 | var primaryWidth = sidebarContentWidth('.primary') 85 | var secondaryWidth = 0 86 | 87 | if ($('.secondary').length != 0) { 88 | secondaryWidth = sidebarContentWidth('.secondary') 89 | } 90 | 91 | // snap to grid 92 | primaryWidth = calculateSize(primaryWidth, 32, 13, 160) 93 | secondaryWidth = calculateSize(secondaryWidth, 32, 13, 160) 94 | 95 | $('.primary').css('width', primaryWidth) 96 | $('.secondary').css('width', secondaryWidth).css('left', primaryWidth + 1) 97 | 98 | if (secondaryWidth > 0) { 99 | $('#content').css('left', primaryWidth + secondaryWidth + 2) 100 | } 101 | else { 102 | $('#content').css('left', primaryWidth + 1) 103 | } 104 | } 105 | 106 | $(window).ready(resizeSidebars) 107 | $(window).ready(setCurrentVarLink) 108 | $(window).ready(function() { persistScrollPosition('.primary')}) 109 | $(window).ready(function() { 110 | $('#content').scroll(setCurrentVarLink) 111 | $(window).resize(setCurrentVarLink) 112 | }) 113 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject euphony "0.1.0" 2 | :description "A friendly translator in a world full of dangerous malware." 3 | :license {:name "GPL-3.0" :url "https://opensource.org/licenses/GPL-3.0"} 4 | :url "https://github.com/fmind/euphony" 5 | :dependencies [[org.clojure/clojure "1.8.0"] 6 | [org.clojure/math.combinatorics "0.1.4"] 7 | [org.clojure/core.match "0.3.0-alpha4"] 8 | [com.datomic/datomic-free "0.9.5561"] 9 | [com.stuartsierra/component "0.3.2"] 10 | [org.clojure/tools.cli "0.3.5"] 11 | [com.taoensso/timbre "4.8.0"] 12 | [aysylu/loom "0.6.0"] 13 | [instaparse "1.4.5"] 14 | [clj-fuzzy "0.3.3"] 15 | [cheshire "5.7.0"] 16 | [medley "0.8.4"]] 17 | :codox {:output-path "codox"} 18 | :main ^:skip-aot euphony.core 19 | :uberjar-name "euphony.jar" 20 | :jar-name "lib-euphony.jar" 21 | :target-path "target/%s" 22 | :jar-exclusions [#"dev.clj"] 23 | :profiles {:uberjar {:aot :all}}) 24 | -------------------------------------------------------------------------------- /resources/schema.edn: -------------------------------------------------------------------------------- 1 | [ 2 | ;; ANTIVIRUS SYSTEMS 3 | {:db/ident :antivirus.system/name 4 | :db/valueType :db.type/string 5 | :db/cardinality :db.cardinality/one 6 | :db/unique :db.unique/identity 7 | :db/doc "an antivirus name (e.g. avg)"} 8 | 9 | ;; ANTIVIRUS REPORTS 10 | {:db/ident :antivirus.report/resource 11 | :db/valueType :db.type/string 12 | :db/cardinality :db.cardinality/one 13 | :db/unique :db.unique/identity 14 | :db/doc "a malware signature (sha256)."} 15 | {:db/ident :antivirus.report/scan 16 | :db/valueType :db.type/ref 17 | :db/cardinality :db.cardinality/one 18 | :db/doc "a reference to a scan entity."} 19 | 20 | ;; ANTIVIRUS SCANS 21 | {:db/ident :antivirus.scan/id 22 | :db/valueType :db.type/string 23 | :db/cardinality :db.cardinality/one 24 | :db/unique :db.unique/identity 25 | :db/doc "any identifier for a scan"} 26 | {:db/ident :antivirus.scan/date 27 | :db/valueType :db.type/instant 28 | :db/cardinality :db.cardinality/one 29 | :db/doc "the date of the scan execution."} 30 | {:db/ident :antivirus.scan/results 31 | :db/valueType :db.type/ref 32 | :db/cardinality :db.cardinality/many 33 | :db/doc "a reference to result entities."} 34 | 35 | ;; ANTIVIRUS RESULTS 36 | {:db/ident :antivirus.result/id 37 | :db/valueType :db.type/string 38 | :db/cardinality :db.cardinality/one 39 | :db/unique :db.unique/identity 40 | :db/doc "any identifier for a result."} 41 | {:db/ident :antivirus.result/label 42 | :db/valueType :db.type/ref 43 | :db/cardinality :db.cardinality/one 44 | :db/index true 45 | :db/doc "a reference to an antivirus label."} 46 | {:db/ident :antivirus.result/system 47 | :db/valueType :db.type/ref 48 | :db/cardinality :db.cardinality/one 49 | :db/index true 50 | :db/doc "a reference to an antivirus system."} 51 | {:db/ident :antivirus.result/name-cluster 52 | :db/valueType :db.type/string 53 | :db/cardinality :db.cardinality/one 54 | :db/index true 55 | :db/doc "the proposed cluster name for the result."} 56 | {:db/ident :antivirus.result/type-cluster 57 | :db/valueType :db.type/string 58 | :db/cardinality :db.cardinality/one 59 | :db/index true 60 | :db/doc "the proposed cluster type for the result."} 61 | {:db/ident :antivirus.result/plat-cluster 62 | :db/valueType :db.type/string 63 | :db/cardinality :db.cardinality/one 64 | :db/index true 65 | :db/doc "the proposed cluster platform for the result."} 66 | 67 | ;; ANTIVIRUS LABELS 68 | {:db/ident :antivirus.label/label 69 | :db/valueType :db.type/string 70 | :db/cardinality :db.cardinality/one 71 | :db/unique :db.unique/identity 72 | :db/doc "an antivirus malware label."} 73 | {:db/ident :antivirus.label/words-pattern 74 | :db/valueType :db.type/string 75 | :db/cardinality :db.cardinality/one 76 | :db/index true 77 | :db/doc "the structure of the label words."} 78 | {:db/ident :antivirus.label/fields-pattern 79 | :db/valueType :db.type/string 80 | :db/cardinality :db.cardinality/one 81 | :db/index true 82 | :db/doc "the structure of the label fields."} 83 | {:db/ident :antivirus.label/name-part 84 | :db/valueType :db.type/string 85 | :db/cardinality :db.cardinality/one 86 | :db/index true 87 | :db/doc "the sub-part linked to the label name."} 88 | {:db/ident :antivirus.label/type-part 89 | :db/valueType :db.type/string 90 | :db/cardinality :db.cardinality/one 91 | :db/index true 92 | :db/doc "the sub-part linked to the label type."} 93 | {:db/ident :antivirus.label/plat-part 94 | :db/valueType :db.type/string 95 | :db/cardinality :db.cardinality/one 96 | :db/index true 97 | :db/doc "the sub-part linked to the label platform."} 98 | {:db/ident :antivirus.label/words 99 | :db/valueType :db.type/ref 100 | :db/cardinality :db.cardinality/many 101 | :db/doc "the reference to words contained in this label."} 102 | 103 | ;; ANTIVIRUS WORDS 104 | {:db/ident :antivirus.word/word 105 | :db/valueType :db.type/string 106 | :db/cardinality :db.cardinality/one 107 | :db/unique :db.unique/identity 108 | :db/doc "a word used by antivirus."} 109 | {:db/ident :antivirus.word/field 110 | :db/valueType :db.type/keyword 111 | :db/cardinality :db.cardinality/one 112 | :db/index true 113 | :db/doc "the final field associated to a word."} 114 | {:db/ident :antivirus.word/candidate-fields 115 | :db/valueType :db.type/keyword 116 | :db/cardinality :db.cardinality/many 117 | :db/noHistory true 118 | :db/doc "the candidate fields (for ambiguous words)."} 119 | 120 | ;; GROUND-TRUTH REPORTS 121 | {:db/ident :ground-truth/resource 122 | :db/valueType :db.type/string 123 | :db/cardinality :db.cardinality/one 124 | :db/unique :db.unique/identity 125 | :db/doc "a malware signature (sha256)."} 126 | {:db/ident :ground-truth/name 127 | :db/valueType :db.type/string 128 | :db/cardinality :db.cardinality/one 129 | :db/index true 130 | :db/doc "the truth about the malware name."} 131 | {:db/ident :ground-truth/type 132 | :db/valueType :db.type/string 133 | :db/cardinality :db.cardinality/one 134 | :db/index true 135 | :db/doc "the truth about the malware type."} 136 | {:db/ident :ground-truth/plat 137 | :db/valueType :db.type/string 138 | :db/cardinality :db.cardinality/one 139 | :db/index true 140 | :db/doc "the truth about the malware platform."}] 141 | -------------------------------------------------------------------------------- /resources/sources.txt: -------------------------------------------------------------------------------- 1 | - seed.edn: https://www.microsoft.com/security/portal/mmpc/shared/malwarenaming.aspx 2 | 3 | - english.dict: /usr/share/dict/american-english 4 | 5 | -------------------------------------------------------------------------------- /src/euphony/commands/analyzers.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.commands.analyzers 2 | (:require [clojure.set :as Set] 3 | [euphony.structs.label :as l] 4 | [medley.core :as m])) 5 | 6 | ; HELPERS 7 | 8 | (defn index->clusters 9 | "Convert an index of: resource -> family to a cluster of: family -> [resource ...]." 10 | [index] (->> (group-by val index) (m/map-vals (partial map first)) (m/map-vals set))) 11 | 12 | (defn humanize 13 | [index] (m/map-vals (fn [v] (if (ratio? v) (float v) v)) index)) 14 | 15 | ; STATISTICS 16 | 17 | (defn malstats [truths proposed] 18 | (for [[resource truth] truths 19 | :let [label (get proposed resource) 20 | match? (= label truth)]] 21 | {:resource resource, :truth truth, 22 | :label label, :match? match?})) 23 | 24 | (defn famstats [truths proposed] 25 | (letfn [(max-cluster [external? reference [family files]] 26 | (let [intersections (m/map-vals (comp count (partial Set/intersection files)) reference) 27 | [match inter] (apply max-key val intersections), matchfiles (get reference match)] 28 | {:external? external?, :family family, :family-card (count files) 29 | :inter inter :match match :match-card (count matchfiles)}))] 30 | (let [truths-clusters (index->clusters truths), proposed-clusters (index->clusters proposed)] 31 | (concat (for [c truths-clusters] (max-cluster true proposed-clusters c)) 32 | (for [c proposed-clusters] (max-cluster false truths-clusters c)))))) 33 | 34 | ; DATA ANALYSIS 35 | 36 | (defn analyze-parse [output] 37 | (let [assignments (vals output) 38 | tokens (apply concat assignments) 39 | seps (->> tokens (filter l/token-sep?)) 40 | words (->> tokens (filter l/token-word?)) 41 | ambiguous (->> words (filter l/token-has-many-fields?)) 42 | with-family (->> assignments (filter (fn [ts] (some #(l/token-is-this-field? :N %) ts))))] 43 | {:labels (count output) 44 | :with-family (count with-family) 45 | :distinct-seps (->> seps (map l/token-text) distinct count) 46 | :distinct-words (->> words (map l/token-text) distinct count) 47 | :ambiguous-words (->> ambiguous (map l/token-text) distinct count) 48 | :incomplete-assignments (->> assignments (remove l/tokens-assignment-complete?) count)})) 49 | 50 | (defn analyze-cluster [output] 51 | (let [results (keys output) 52 | antivirus (->> results (map l/av) distinct) 53 | cluster-families (->> (vals output) distinct) 54 | vendor-families (->> results (map l/label) distinct)] 55 | {:antivirus (count antivirus) 56 | :vendor-families (count vendor-families) 57 | :cluster-families (count cluster-families)})) 58 | 59 | (defn analyze-malstats [output] 60 | {:accuracy (/ (count (filter :match? output)) 61 | (count output))}) 62 | 63 | (defn analyze-famstats [output] 64 | (let [{externals true, internals false} (group-by :external? output) 65 | prec (/ (reduce + (map :inter internals)) 66 | (reduce + (map :family-card internals))) 67 | rec (/ (reduce + (map :inter externals)) 68 | (reduce + (map :family-card externals))) 69 | f1 (/ (* 2 prec rec) (+ prec rec))] 70 | {:proposed (count internals) :expected (count externals) 71 | :precision prec :recall rec :f1 f1})) 72 | -------------------------------------------------------------------------------- /src/euphony/commands/clusterer.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.commands.clusterer 2 | (:require [euphony.commands.sub.features :as fx] 3 | [euphony.structs 4 | [cograph :as cog] 5 | [graph :as g]])) 6 | 7 | ; DEFAULTS 8 | 9 | (def THRESHOLD 0.04) 10 | 11 | (def FEATURES {:ldistance fx/ldistance 12 | :imbalance fx/imbalance 13 | :incomplete fx/incomplete}) 14 | 15 | (def WEIGHTER (fn [cograph edge] 16 | (let [{:keys [incomplete imbalance ldistance]} (g/attrs cograph edge)] 17 | (float (+ incomplete (/ imbalance 10) (/ ldistance 100)))))) 18 | 19 | ; CONSTRUCTORS 20 | 21 | (defn results-graph 22 | "Construct a graph of antivirus results with the given features and weighter." 23 | [results-seq & [{:keys [features weighter] :or {features FEATURES weighter WEIGHTER}}]] 24 | (-> (cog/cograph results-seq) (g/with-edges-features FEATURES) (g/with-weight WEIGHTER))) 25 | 26 | ; MAIN FUNCTIONS 27 | 28 | (defn results-clusters 29 | "Cluster a graph of antivirus results into named groups." 30 | [graph & [{:keys [threshold] :or {threshold THRESHOLD}}]] 31 | (reduce (fn [mapping nodes] ;; a component/cluster is a set of nodes 32 | (if (<= (count nodes) 1) ;; assoc singleton with themselves 33 | (let [[[av label :as node]] nodes] (assoc mapping node label)) 34 | (let [votes (map (fn [[av l :as n]] {l (cog/occur graph n)}) nodes) 35 | label (->> votes (apply merge-with +) (apply max-key val) key)] 36 | (apply assoc mapping (interleave nodes (repeat label)))))) 37 | (hash-map) (g/cluster graph threshold))) 38 | -------------------------------------------------------------------------------- /src/euphony/commands/importer.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.commands.importer 2 | (:require [clojure.java.io :as jio] 3 | [clojure.string :as Str] 4 | [euphony.structs.label :as l] 5 | [euphony.utils.db :as d] 6 | [euphony.utils.io :as io])) 7 | 8 | ; HELPERS 9 | 10 | (defn- parse-date [formatter date] 11 | (.parse formatter date)) 12 | 13 | (def ^:private to-date (partial parse-date (java.text.SimpleDateFormat. "yyyy-MM-dd HH:mm:ss"))) 14 | 15 | ; COMPOSABLES 16 | 17 | (defn truth>struct->datoms [{:strs [resource name type platform] :as struct}] 18 | (if (Str/blank? resource) [] 19 | [(cond-> {:ground-truth/resource resource} 20 | name (assoc :ground-truth/name (Str/lower-case name)) 21 | type (assoc :ground-truth/type (Str/lower-case type)) 22 | platform (assoc :ground-truth/plat (Str/lower-case platform)))])) 23 | 24 | (defn result>struct->datoms [[antivirus {:strs [result detected]}]] 25 | (if (or (Str/blank? antivirus) (Str/blank? result) (not detected)) [] 26 | (let [av (Str/lower-case antivirus), label (Str/lower-case result), rid (l/result->id [av label])] 27 | [{:db/id av :antivirus.system/name av} {:db/id label :antivirus.label/label label} 28 | {:db/id rid :antivirus.result/id rid :antivirus.result/system av :antivirus.result/label label}]))) 29 | 30 | (defn report>struct->datoms [{:strs [scan_id scan_date resource positives scans] :as struct}] 31 | (if (or (Str/blank? resource) (Str/blank? scan_id) (Str/blank? scan_date) (zero? positives)) [] 32 | (let [id (Str/lower-case scan_id), date (to-date scan_date), resource (Str/lower-case resource) 33 | results-datoms (mapcat result>struct->datoms scans)] 34 | (concat results-datoms 35 | [{:db/id id :antivirus.scan/id id :antivirus.scan/date date 36 | :antivirus.scan/results (->> results-datoms (filter :antivirus.result/id) (map :db/id))} 37 | {:antivirus.report/resource resource :antivirus.report/scan id :db/txInstant date}])))) 38 | 39 | ; COMPOSITIONS 40 | 41 | (def truths-in-json (comp truth>struct->datoms io/json-line->struct)) 42 | (def reports-in-json (comp report>struct->datoms io/json-line->struct)) 43 | 44 | ; MAIN FUNCTIONS 45 | 46 | (defn import-to-memory! 47 | "Import a file to memory using a pipeline." 48 | [pipeline file] 49 | (with-open [reader (jio/reader file)] 50 | (mapv pipeline (line-seq reader)))) 51 | 52 | (defn import-to-connection! 53 | "Import a file to connection using a pipeline." 54 | [pipeline file conn] 55 | (with-open [reader (jio/reader file)] 56 | (reduce (fn [co line] (d/transact co (pipeline line))) 57 | conn (line-seq reader)))) 58 | -------------------------------------------------------------------------------- /src/euphony/commands/parser.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.commands.parser 2 | (:require [clojure.set :as Set] 3 | [euphony.commands.sub.heuristics :as hx] 4 | [euphony.commits :as c] 5 | [euphony.queries :as q] 6 | [euphony.structs.label :as l] 7 | [euphony.structs.pqueue :as p] 8 | [euphony.utils.db :as d])) 9 | 10 | ; DEFAULTS 11 | 12 | (def MAX-TURN 4) 13 | 14 | (def HEURISTICS [;; exact inference 15 | [hx/deduce-known-words {}] 16 | [hx/deduce-signature-tokens {:once true}] 17 | [hx/deduce-words-suffixed-by-ware {:once true}] 18 | [hx/deduce-words-between-parenthesis {:once true}] 19 | [hx/deduce-words-between-square-brackets {:once true}] 20 | ;; direct inferences 21 | [hx/infer-fields-by-elimination {}] 22 | [hx/infer-synonyms-from-known-platforms-and-types {:once true}] 23 | ;; delayed inferences 24 | [hx/infer-fields-from-compatible-patterns {:delay 3}] 25 | [hx/infer-name-from-last-one-unknown-token {:delay 1}] 26 | [hx/infer-name-from-last-two-unknown-tokens {:delay 1}] 27 | [hx/infer-words-in-english-sentence-at-beginning {:delay 1}]]) 28 | 29 | ; CONSTRUCTORS 30 | 31 | (defn priority 32 | "Compute a priority from a turn and an entry." 33 | [turn [label tokens :as entry]] 34 | (let [words (filter l/token-word? tokens) 35 | fields (mapcat l/token-fields words)] 36 | [turn (- (count fields) (count words))])) 37 | 38 | (defn pqueue 39 | "Construct a queue from a list of labels and a priority function." 40 | [f labels] 41 | (let [tokens (map l/tokenize-and-domainize labels) 42 | entries (map vector labels tokens)] 43 | (p/pqueue entries (map f entries)))) 44 | 45 | (defn mem-setter 46 | "Construct a memory setter to change token fields using a commit function." 47 | [heuristic] (fn [token fields] (c/mem>token-fields heuristic token fields {:on-conflict->new true}))) 48 | 49 | ; INTERNAL FUNCTIONS 50 | 51 | (defn- search 52 | "Execute a sequence of heuristics to produce new findings, take into account heuristic options." 53 | [heuristics database label tokens turn] {:post [(= (count %) (count tokens))]} 54 | (reduce (fn [findings [h {:keys [delay once]}]] 55 | (if (or (and (some? once) (not= (- turn (or delay 0)) 0)) 56 | (and (some? delay) (< turn delay))) findings 57 | (h database label findings {:setter (mem-setter h)}))) 58 | tokens heuristics)) 59 | 60 | (defn- populate 61 | "Populate a connection with label words and words fields from a list of entry." 62 | [conn entries] 63 | (let [words-parts (future (->> entries (mapcat second) distinct 64 | (filter l/token-word?) (map l/token-parts)))] 65 | (as-> conn $ 66 | (reduce (fn [co [label tokens]] 67 | (let [pattern (l/w-patternize tokens) 68 | words (->> tokens (filter l/token-word?) (map l/token-text))] 69 | (c/db>label-words-pattern populate co label pattern words))) 70 | $ entries) 71 | (reduce (fn [co [_ word fields]] 72 | (c/db>word-fields populate co word fields)) 73 | $ @words-parts)))) 74 | 75 | (defn- combine 76 | "Combine new findings in database and in memory." 77 | [conn findings] {:post [(= (count (second %)) (count findings))]} 78 | (loop [findings findings, conn conn, tokens (vector)] 79 | (if (empty? findings) [conn tokens] ;;return case 80 | (let [[token & findings] findings] 81 | (if (l/token-sep? token) ;; separator case 82 | (recur findings conn (conj tokens token)) 83 | (let [db (d/db conn) 84 | [_ word mem-fields] (l/token-parts token) 85 | db-fields (-> (q/db>word db word) :antivirus.word/candidate-fields) 86 | intersection (Set/intersection mem-fields db-fields)] 87 | (recur findings 88 | (c/db>word-fields combine conn word mem-fields) 89 | (conj tokens (c/mem>token-fields combine token db-fields {:on-conflict->new true}))))))))) 90 | 91 | (defn- enrich 92 | "Enrich the database with new patterns/words." 93 | [conn label tokens] 94 | (as-> conn $ 95 | (c/db>label-fields-pattern enrich $ label (l/f-patternize tokens)) 96 | (reduce (fn [co [_ word fields]] (c/db>word-field enrich co word (first fields))) 97 | $ (->> tokens (filter l/token-word?) (map l/token-parts))))) 98 | 99 | ; MAIN FUNCTIONS 100 | 101 | (defn parse 102 | "Parse labels using a knowledge database and return a mapping of token assignments per label." 103 | [conn labels & [{:keys [heuristics max-turn] :or {heuristics HEURISTICS max-turn MAX-TURN}}]] 104 | (let [labels (filter l/label-valid? labels), queue (pqueue (partial priority 0) labels) 105 | conn (populate conn (keys queue)), complete (hash-map)] 106 | (loop [conn conn, queue queue, complete complete] 107 | (let [[[label tokens] [turn _]] (peek queue)] 108 | (if (or (empty? queue) (>= turn max-turn)) 109 | [conn (into complete (keys queue))] ;; return: conn and tokens 110 | (let [findings (search heuristics (d/db conn) label tokens turn) 111 | [conn tokens] (combine conn findings), entry [label tokens]] 112 | (if-not (l/tokens-assignment-complete? tokens) 113 | (recur conn (conj (pop queue) [entry (priority (inc turn) entry)]) complete) 114 | (recur (enrich conn label tokens) (pop queue) (assoc complete label tokens))))))))) 115 | -------------------------------------------------------------------------------- /src/euphony/commands/sub/features.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.commands.sub.features 2 | (:require [euphony.functions.metrics :as m] 3 | [euphony.structs.cograph :as cog])) 4 | 5 | ; EDGE FEATURES 6 | 7 | (defn ldistance 8 | "Compute the label distance between two av-labels." 9 | [cograph [[av-head label-head] [av-tail label-tail] :as edge]] {:post [(<= 0 % 1)]} 10 | (- 1 (m/str-similarity label-head label-tail))) 11 | 12 | (defn imbalance 13 | "Compute the imbalance between two av-labels (i.e. the ratio between their occurrences)." 14 | [cograph [head tail :as edge]] {:post [(<= 0 % 1)]} 15 | (- 1 (m/set-granularity (cog/occur cograph head) (cog/occur cograph tail)))) 16 | 17 | (defn incomplete 18 | "Compute the incompleteness between two av-labels (i.e. excluded elements in the smallest set)" 19 | [cograph [head tail :as edge]] {:post [(<= 0 % 1)]} 20 | (- 1 (m/set-completeness (cog/occur cograph head) 21 | (cog/occur cograph tail) 22 | (cog/co-occur cograph edge)))) 23 | -------------------------------------------------------------------------------- /src/euphony/commits.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.commits 2 | (:require [clojure.core.match :refer [match]] 3 | [clojure.set :as Set] 4 | [euphony.utils.db :as d] 5 | [euphony.queries :as q] 6 | [euphony.structs.label :as l] 7 | [euphony.utils.log :as log])) 8 | 9 | ; HELPERS 10 | 11 | (defn- f->str 12 | "Convert a Clojure function to a string." 13 | [f] (str \[ (-> f class .getSimpleName) \])) 14 | 15 | (defn- intersect? 16 | "Test if two sets intersect with each other." 17 | [old new] (not (empty? (Set/intersection old new)))) 18 | 19 | (defn- changelog 20 | "Create a log message to inform about a change." 21 | [source on id at from to & conflict] 22 | (log/log :debug (f->str source) on ":" id at ":" from "->" to 23 | (if conflict (str "with conflict: " conflict) ""))) 24 | 25 | ; MEMORY COMMITS 26 | 27 | (defn mem>token-fields 28 | [source token new-fields & [{:keys [on-conflict->new]}]] 29 | {:pre [(every? l/FIELDS new-fields) (set? new-fields)] 30 | :post [(not-empty (l/token-fields %))]} 31 | (let [m (partial changelog source "Mem.Token" (l/token-text token) "Fields") 32 | old (l/token-fields token), new new-fields, inter (Set/intersection old new) 33 | keep-old (fn [] token) 34 | keep-new (fn [] (l/token-set-fields token new)) 35 | keep-inter (fn [] (l/token-set-fields token inter))] 36 | (match [(empty? old) (empty? new) (= old inter) (intersect? old new)] 37 | [_ true _ _] (do (keep-old)) 38 | [_ _ true _] (do (keep-old)) 39 | [true _ _ _] (do (m old new) (keep-new)) 40 | [_ _ _ true] (do (m old inter) (keep-inter)) 41 | [_ _ _ _] (if on-conflict->new 42 | (do (m old new new) (keep-new)) 43 | (do (m old old new) (keep-old)))))) 44 | 45 | ; DATABASE COMMITS 46 | 47 | (defn db>word-field 48 | [source conn word new-field & [{:keys [on-conflict->new]}]] 49 | {:pre [(l/FIELDS new-field)]} 50 | (let [m (partial changelog source "Db.Word" word "Field") 51 | old (-> (q/db>word (d/db conn) word) :antivirus.word/field) 52 | new new-field 53 | keep-old (fn [] conn) 54 | keep-new (fn [] (d/transact conn [{:db/id [:antivirus.word/word word] :antivirus.word/field new}]))] 55 | (match [(nil? old) (nil? new) (= old new)] 56 | [_ true _] (do (keep-old)) 57 | [_ _ true] (do (keep-old)) 58 | [true _ _] (do (m old new) (keep-new)) 59 | [_ _ _] (if on-conflict->new 60 | (do (m old new new) (keep-new)) 61 | (do (m old old new) (keep-old)))))) 62 | 63 | (defn db>word-fields 64 | [source conn word new-fields & [{:keys [on-conflict->new]}]] 65 | {:pre [(every? l/FIELDS new-fields) (set? new-fields)] 66 | :post [(not-empty (-> (q/db>word (d/db %) word) :antivirus.word/candidate-fields))]} 67 | (let [m (partial changelog source "Db.Word" word "Fields") 68 | old (-> (q/db>word (d/db conn) word) :antivirus.word/candidate-fields) 69 | new new-fields 70 | inter (Set/intersection old new) 71 | keep-old (fn [] conn) 72 | keep-new (fn [] (d/transact conn [{:db/id [:antivirus.word/word word] :antivirus.word/candidate-fields new}])) 73 | keep-inter (fn [] (d/transact conn (for [field (Set/difference old inter)] 74 | [:db/retract [:antivirus.word/word word] :antivirus.word/candidate-fields field])))] 75 | (match [(empty? old) (empty? new) (= old new) (intersect? old new)] 76 | [_ true _ _] (do (keep-old)) 77 | [_ _ true _] (do (keep-old)) 78 | [true _ _ _] (do (m old new) (keep-new)) 79 | [_ _ _ true] (do (m old inter) (keep-inter)) 80 | [_ _ _ _] (if on-conflict->new 81 | (do (m old new new) (keep-new)) 82 | (do (m old old new) (keep-old)))))) 83 | 84 | (defn db>label-words-pattern 85 | [source conn label new-pattern new-words & [{:keys [on-conflict->new]}]] 86 | {:pre [(string? new-pattern) (not-empty new-words)]} 87 | (let [m (partial changelog source "Db.Label" label "Words-Pattern") 88 | old (-> (q/db>label (d/db conn) label) :antivirus.label/words-pattern) 89 | new new-pattern 90 | keep-old (fn [] conn) 91 | keep-new (fn [] (d/transact conn (concat (for [word new-words] {:db/id word :antivirus.word/word word}) 92 | [{:db/id [:antivirus.label/label label] :antivirus.label/words new-words 93 | :antivirus.label/words-pattern new}])))] 94 | (match [(nil? old) (nil? new) (= old new)] 95 | [_ true _] (do (keep-old)) 96 | [_ _ true] (do (keep-old)) 97 | [true _ _] (do (m old new) (keep-new)) 98 | [_ _ _] (if on-conflict->new 99 | (do (m old new new) (keep-new)) 100 | (do (m old old new) (keep-old)))))) 101 | 102 | (defn db>label-fields-pattern 103 | [source conn label new-pattern & [{:keys [on-conflict->new]}]] 104 | {:pre [(string? new-pattern)]} 105 | (let [m (partial changelog source "Db.Label" label "Fields-Pattern") 106 | old (-> (q/db>label (d/db conn) label) :antivirus.label/fields-pattern) 107 | new new-pattern 108 | keep-old (fn [] conn) 109 | keep-new (fn [] (d/transact conn [{:db/id [:antivirus.label/label label] :antivirus.label/fields-pattern new-pattern}]))] 110 | (match [(nil? old) (nil? new) (= old new)] 111 | [_ true _] (do (keep-old)) 112 | [_ _ true] (do (keep-old)) 113 | [true _ _] (do (m old new) (keep-new)) 114 | [_ _ _] (if on-conflict->new 115 | (do (m old new new) (keep-new)) 116 | (do (m old old new) (keep-old)))))) 117 | 118 | (defn db>label-vendor-attribute 119 | [source conn label attribute value & [{:keys [on-conflict->new]}]] 120 | {:pre [(#{:antivirus.label/name-part :antivirus.label/type-part :antivirus.label/plat-part} attribute) (string? value)]} 121 | (let [m (partial changelog source "db.label" label (name attribute)) 122 | old (-> (q/db>label (d/db conn) label) attribute) 123 | new value 124 | keep-old (fn [] conn) 125 | keep-new (fn [] (d/transact conn [{:db/id [:antivirus.label/label label] attribute value}]))] 126 | (match [(nil? old) (nil? new) (= old new)] 127 | [_ true _] (do (keep-old)) 128 | [_ _ true] (do (keep-old)) 129 | [true _ _] (do (m old new) (keep-new)) 130 | [_ _ _] (if on-conflict->new 131 | (do (m old new new) (keep-new)) 132 | (do (m old old new) (keep-old)))))) 133 | 134 | (defn db>result-cluster-attribute 135 | [source conn result attribute value & [{:keys [on-conflict->new]}]] 136 | {:pre [(#{:antivirus.result/name-cluster :antivirus.result/type-cluster :antivirus.result/plat-cluster} attribute) (string? value)]} 137 | (let [m (partial changelog source "db.result" result (name attribute)) 138 | old (-> (q/db>result (d/db conn) result) attribute) 139 | new value 140 | keep-old (fn [] conn) 141 | keep-new (fn [] (d/transact conn [{:db/id [:antivirus.result/id result] attribute value}]))] 142 | (match [(nil? old) (nil? new) (= old new)] 143 | [_ true _] (do (keep-old)) 144 | [_ _ true] (do (keep-old)) 145 | [true _ _] (do (m old new) (keep-new)) 146 | [_ _ _] (if on-conflict->new 147 | (do (m old new new) (keep-new)) 148 | (do (m old old new) (keep-old)))))) 149 | 150 | (defn db>vendors-from-tokens 151 | [source conn label tokens & [{:keys [name-strategy type-strategy plat-strategy] 152 | :or {name-strategy first 153 | type-strategy (comp l/words-concat sort) 154 | plat-strategy (comp l/words-concat sort)}}]] 155 | (letfn [(select [field tokens] 156 | (->> tokens 157 | (filter #(l/token-is-this-field? field %)) 158 | (map l/token-text) seq)) 159 | (commit-attribute [co attribute value] 160 | (if (nil? value) co 161 | (db>label-vendor-attribute source co label attribute value)))] 162 | (-> conn 163 | (commit-attribute :antivirus.label/name-part (some-> (select :N tokens) name-strategy)) 164 | (commit-attribute :antivirus.label/type-part (some-> (select :T tokens) type-strategy)) 165 | (commit-attribute :antivirus.label/plat-part (some-> (select :P tokens) plat-strategy))))) 166 | 167 | (defn db>cluster-attribute-from-cluster-mapping 168 | [source conn mapping vendor-field cluster-field & [options]] 169 | (let [reverse-index (-> (q/db>result->av_label vendor-field (d/db conn)) q/reverse-index)] 170 | (reduce (fn [out-co [avlabel cluster]] 171 | (reduce (fn [in-co res] 172 | (db>result-cluster-attribute source in-co res cluster-field cluster options)) 173 | out-co (get reverse-index avlabel []))) 174 | conn mapping))) 175 | -------------------------------------------------------------------------------- /src/euphony/components/datomic.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.components.datomic 2 | (:require [com.stuartsierra.component :as component] 3 | [datomic.api :as d] 4 | [euphony.utils.db :as Db] 5 | [euphony.utils.io :as io] 6 | [euphony.utils.log :as log])) 7 | 8 | ; HELPERS 9 | 10 | (defn install-schema [conn schema-file] 11 | (log/log :info "** installing schema") 12 | (d/transact conn (io/read-edn! schema-file))) 13 | 14 | (defn install-seeds [conn seeds-file] 15 | (log/log :info "** installing seeds") 16 | (d/transact conn (io/read-edn! seeds-file))) 17 | 18 | ; COMPONENT 19 | 20 | (defrecord Datomic [conn uri 21 | schema-file seeds-file 22 | reset-on-start reset-on-stop] 23 | component/Lifecycle 24 | (start [this] 25 | (log/log :info "Starting Datomic at:" uri) 26 | (when reset-on-start 27 | (log/log :info "* deleting database") 28 | (d/delete-database uri)) 29 | (let [new? (d/create-database uri) 30 | conn (d/connect uri)] 31 | (when new? 32 | (log/log :info "* creating database") 33 | (install-schema conn schema-file) 34 | (install-seeds conn seeds-file)) 35 | (assoc this :conn conn))) 36 | (stop [this] 37 | (log/log :info "Stopping Datomic:" uri) 38 | (when reset-on-stop 39 | (log/log :info "* deleting database") 40 | (d/delete-database uri)) 41 | (dissoc this :conn))) 42 | 43 | (defn new-datomic [conf] 44 | (map->Datomic conf)) 45 | -------------------------------------------------------------------------------- /src/euphony/core.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.core 2 | (:gen-class) 3 | (:require 4 | [clojure.string :as Str] 5 | [euphony.tasks :as tasks] 6 | [euphony.utils 7 | [cli :as cli] 8 | [log :as log]])) 9 | 10 | ; SUMMARY 11 | 12 | (def USAGE "euphony [options]") 13 | 14 | (def OPTIONS 15 | [;; HELPERS 16 | ["-h" "--help" "Display a help summary with acceptable arguments and options."] 17 | ;; SETTINGS 18 | ["-l" "--log-level LEVEL" "Set the log level of the program." :default :warn :parse-fn #(keyword %) 19 | :validate [#(log/LEVELS %) (str "Must be in: " (->> log/LEVELS (map name) (interpose ",")))]] 20 | ["-m" "--max-turn VALUE" "Set the maximum number of turns allowed for inference at the parsing stage." 21 | :parse-fn #(Integer/parseInt %) :default tasks/MAX-TURN :validate [#(pos? %) "Must be a natural number."]] 22 | ["-t" "--threshold VALUE" "Set the threshold value for the trimming operation at the clustering stage." 23 | :parse-fn #(Float/parseFloat %) :default tasks/THRESHOLD :validate [#(<= 0 % 1) "Must be float: 0 <= x <= 1."]] 24 | ["-e" "--export-dir DIR" "Set the output directory of the program." :default (System/getProperty "user.dir")] 25 | ["-f" "--field FIELD" "Set the label field to cluster and export." :default tasks/DEFAULT-FIELD :parse-fn #(keyword %) 26 | :validate [#(tasks/FIELDS %) (str "Must be in: " (->> tasks/FIELDS (map name) (Str/join ",")))]] 27 | ;; RESOURCES 28 | ["-r" "--reports-file FILE" "Provide a sequence of reports from VirusTotal formatted as JSON records."] 29 | ["-g" "--ground-file FILE" "Provide a ground-truth to evaluate the output formatted as CSV tuples."] 30 | ["-s" "--seeds-file FILE" "Provide a seeds file with some initial domain knowledge about malware."] 31 | ["-d" "--database-uri URI" "Provide a database URI to run the program and persist the learning."] 32 | ;; EXPORT FLAGS 33 | ["-A" "--export-all" "Export every information"] 34 | ["-E" "--export-election" "Export field frequency per malware signature"] 35 | ["-O" "--export-proposed" "Export best candidate per malware signature"] 36 | ["-P" "--export-parse-rules" "Export association between malware labels and fields"] 37 | ["-T" "--export-parse-mapping" "Export tokenization of malware labels into fields"] 38 | ["-V" "--export-vendor-reports" "Export the transformation dataset after parsing"] 39 | ["-G" "--export-cluster-graph" "Export the association graph after clustering"] 40 | ["-C" "--export-cluster-rules" "Export associations between raw and clustered fields"] 41 | ["-D" "--export-cluster-mapping" "Export the clustering of malware results"] 42 | ["-R" "--export-cluster-reports" "Export the transformation dataset after parsing"] 43 | ["-M" "--export-malstats" "Export statistics about malware files based on ground-truth"] 44 | ["-F" "--export-famstats" "Export statistics about malware families based on ground-truth"]]) 45 | 46 | ; MAIN FUNCTIONS 47 | 48 | (defn system-conf [options] 49 | (cond-> options 50 | (contains? options :database-uri) (update-in [:system :datomic] assoc :uri (options :database-uri)) 51 | (contains? options :seeds-file) (update-in [:system :datomic] assoc :seeds-file (options :seeds-file)))) 52 | 53 | (defn -main [& args] 54 | (let [{:keys [arguments options summary errors]} (cli/parse args OPTIONS)] 55 | (when-let [level (:log-level options)] (log/set-level! level)) 56 | (cond 57 | (contains? options :help) (do (cli/feedback! USAGE summary) (System/exit 0)) 58 | (not-empty errors) (do (cli/feedback! USAGE summary errors) (System/exit 1)) 59 | :else (do (tasks/make! tasks/all (system-conf options)) (System/exit 0))))) 60 | -------------------------------------------------------------------------------- /src/euphony/functions/counters.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.functions.counters 2 | (:require [clojure.core.reducers :as r] 3 | [clojure.math.combinatorics :as c])) 4 | 5 | ; REDUCERS 6 | 7 | (defn merge-counts 8 | ([] {}) 9 | ([& counts] (apply merge-with + counts))) 10 | 11 | (defn count-items 12 | ([] {}) 13 | ([counts item] (assoc counts item (inc (get counts item 0))))) 14 | 15 | ; COMPOSABLES 16 | 17 | (def unroll (r/mapcat identity)) ;; 1-dimension flatten 18 | 19 | (defn associations [n] 20 | (comp (r/map set) (r/mapcat #(c/combinations % n)))) 21 | 22 | ; MAIN FUNCTIONS 23 | 24 | (defn count-flat-items 25 | "Count items from a flat collection." 26 | [coll] (r/fold merge-counts count-items coll)) 27 | 28 | (defn count-nested-items 29 | "Count items from a list of collections." 30 | [colls] (count-flat-items (unroll colls))) 31 | 32 | (defn count-assocs-items 33 | "Count n associations from a list of collections." 34 | [n colls] (count-flat-items ((associations n) colls))) 35 | -------------------------------------------------------------------------------- /src/euphony/functions/metrics.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.functions.metrics 2 | (:require [clj-fuzzy.metrics :refer [dice]])) 3 | 4 | ; SET METRICS 5 | 6 | (defn set-granularity 7 | "Compute the granularity of two sets given their cardinality." 8 | [a b] {:pre [(pos? (max a b))] :post [(<= 0 % 1)]} (/ (min a b) (max a b))) 9 | 10 | (defn set-completeness 11 | "Compute the completeness of two sets given their cardinality and intersection." 12 | [a b inter] {:pre [(pos? (min a b))] :post [(<= 0 % 1)]} (/ inter (min a b))) 13 | 14 | ; STRING METRICS 15 | 16 | (def str-similarity (memoize (fn [a b] {:post [(<= 0 % 1)]} (dice a b)))) 17 | -------------------------------------------------------------------------------- /src/euphony/functions/voters.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.functions.voters 2 | (:require [clojure.core.reducers :as r] 3 | [medley.core :as m])) 4 | 5 | ; REDUCERS 6 | 7 | (defn merge-votes 8 | ([] {}) 9 | ([& votes] (apply merge-with + votes))) 10 | 11 | (defn merge-indexes 12 | ([] {}) 13 | ([& indexes] (apply merge indexes))) 14 | 15 | (defn index-votes 16 | ([] {}) 17 | ([index entry items] (assoc index entry (frequencies items)))) 18 | 19 | (defn index-elected [global-votes] 20 | (fn ([] {}) 21 | ([index entry local-votes] 22 | (let [[_ valmax] (apply max-key val local-votes) 23 | short-list (->> (m/filter-vals #(= % valmax) local-votes) keys)] 24 | (assoc index entry 25 | (if (= (count short-list) 1) (first short-list) 26 | (->> short-list (sort-by #(get global-votes %)) last))))))) 27 | 28 | ; MAIN FUNCTIONS 29 | 30 | (defn vote 31 | "Compute the frequency of items per index entry." 32 | [index] (r/fold merge-indexes index-votes index)) 33 | 34 | (defn elect 35 | "Select the most occurring item per entry through majority voting. 36 | When there is no majority, select the most occurring item globally." 37 | [votes-index] 38 | (let [global-votes (r/fold merge-votes (vals votes-index))] 39 | (r/fold merge-indexes (index-elected global-votes) votes-index))) 40 | 41 | (def vote-and-elect (comp elect vote)) 42 | -------------------------------------------------------------------------------- /src/euphony/queries.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.queries 2 | (:require [euphony.utils.db :as d] 3 | [medley.core :as m])) 4 | 5 | ; HELPERS 6 | 7 | (defn- by-key 8 | "Retrieve a database entity by its ident." 9 | [key db value] (d/entity db [key value])) 10 | 11 | (defn reverse-index 12 | "Construct a reverse index from an index." 13 | [index] (reduce (fn [idx [k v]] (update idx k conj v)) 14 | (empty index) (map reverse index))) 15 | 16 | ; MEMORY QUERIES 17 | 18 | (defn mem>truth->attribute [attribute records] 19 | (->> records 20 | (filter (fn [r] (and (contains? r :ground-truth/resource) (contains? r attribute)))) 21 | (map (fn [r] [(:ground-truth/resource r) (attribute r)])) 22 | (into (hash-map)))) 23 | 24 | ; DATABASE QUERIES 25 | 26 | (def db>word (partial by-key :antivirus.word/word)) 27 | (def db>label (partial by-key :antivirus.label/label)) 28 | (def db>resource (partial by-key :resource/resource)) 29 | (def db>antivirus (partial by-key :antivirus.system/name)) 30 | (def db>result (partial by-key :antivirus.result/id)) 31 | (def db>scan (partial by-key :antivirus.scan/id)) 32 | (def db>truth (partial by-key :ground-truth/resource)) 33 | (def db>report (partial by-key :antivirus.report/resource)) 34 | 35 | (defn db>word->field [db] 36 | (->> (d/q '[:find ?word ?field 37 | :where 38 | [?w :antivirus.word/word ?word] 39 | [?w :antivirus.word/field ?field]] 40 | db) 41 | (into (hash-map)))) 42 | 43 | (defn db>label->antivirus [l db] 44 | (d/q '[:find [?av ...] 45 | :in $ ?label 46 | :where 47 | [?l :antivirus.label/label ?label] 48 | [?r :antivirus.result/label ?l] 49 | [?r :antivirus.result/system ?a] 50 | [?a :antivirus.system/name ?av]] 51 | db l)) 52 | 53 | (defn db>label->attribute [attribute db] 54 | (->> (d/q '[:find ?label ?value 55 | :in $ ?attribute 56 | :where 57 | [?l :antivirus.label/label ?label] 58 | [?l ?attribute ?value]] 59 | db attribute) 60 | (into (sorted-map)))) 61 | 62 | (defn db>result->attribute [attribute db] 63 | (->> (d/q '[:find ?result ?value 64 | :in $ ?attribute 65 | :where 66 | [?r :antivirus.result/id ?result] 67 | [?r ?attribute ?value]] 68 | db attribute) 69 | (into (sorted-map)))) 70 | 71 | (defn db>truth->attribute [attribute db] 72 | (->> (d/q '[:find ?resource ?value 73 | :in $ ?attribute 74 | :where 75 | [?t :ground-truth/resource ?resource] 76 | [?t ?attribute ?value]] 77 | db attribute) 78 | (into (sorted-map)))) 79 | 80 | (defn db>result->av_label 81 | [label-field db] 82 | (->> (d/q '[:find ?result ?av ?label 83 | :in $ ?field 84 | :where 85 | [?r :antivirus.result/label ?l] 86 | [?r :antivirus.result/system ?a] 87 | [?r :antivirus.result/id ?result] 88 | [?a :antivirus.system/name ?av] 89 | [?l ?field ?label]] 90 | db label-field) 91 | (group-by first) 92 | (m/map-vals (fn [[[r a l]]] [a l])))) 93 | 94 | (defn db>report->vendor-results [vendor-field db] 95 | (->> (d/q '[:find ?resource ?antivirus ?value 96 | :in $ ?attribute 97 | :where 98 | [?r :antivirus.report/resource ?resource] 99 | [?r :antivirus.report/scan ?s] 100 | [?s :antivirus.scan/results ?rs] 101 | [?rs :antivirus.result/system ?av] 102 | [?av :antivirus.system/name ?antivirus] 103 | [?rs :antivirus.result/label ?l] 104 | [?l ?attribute ?value]] 105 | db vendor-field) 106 | (group-by first) 107 | (m/map-vals (partial map (fn [[r a v]] [a v]))))) 108 | 109 | (defn db>report->cluster-results [cluster-field db] 110 | (->> (d/q '[:find ?resource ?antivirus ?value 111 | :in $ ?attribute 112 | :where 113 | [?r :antivirus.report/resource ?resource] 114 | [?r :antivirus.report/scan ?s] 115 | [?s :antivirus.scan/results ?rs] 116 | [?rs :antivirus.result/system ?av] 117 | [?av :antivirus.system/name ?antivirus] 118 | [?rs ?attribute ?value]] 119 | db cluster-field) 120 | (group-by first) 121 | (m/map-vals (partial map (fn [[r a v]] [a v]))))) 122 | 123 | (defn db>words-with-this-field [field db] 124 | (d/q '[:find [?word ...] 125 | :in $ ?field 126 | :where 127 | [?w :antivirus.word/field ?field] 128 | [?w :antivirus.word/word ?word]] 129 | db field)) 130 | 131 | (defn db>labels-with-unknown-fields-pattern [db] 132 | (d/q '[:find [?label ...] 133 | :where 134 | [?l :antivirus.label/label ?label] 135 | [(missing? $ ?l :antivirus.label/fields-pattern)]] 136 | db)) 137 | 138 | (defn db>fields-patterns-related-to-label [db l] 139 | (if-let [pattern (-> (db>label db l) :antivirus.label/words-pattern)] 140 | (d/q '[:find [?fields-pattern ...] 141 | :in $ ?words-pattern [?av ...] 142 | :where 143 | [?l :antivirus.label/words-pattern ?words-pattern] 144 | [?l :antivirus.label/fields-pattern ?fields-pattern] 145 | [?r :antivirus.result/label ?l] 146 | [?r :antivirus.result/system ?a] 147 | [?a :antivirus.system/name ?av]] 148 | db pattern (db>label->antivirus l db)) 149 | [])) 150 | -------------------------------------------------------------------------------- /src/euphony/structs/cograph.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.structs.cograph 2 | (:require [euphony.functions.counters :as c] 3 | [euphony.structs.graph :as g])) 4 | 5 | ; CONSTRUCTORS 6 | 7 | (defn cograph 8 | "Construct a co-occurrence graph from a list of sequences." 9 | [sequences] 10 | (let [occurrences (future (c/count-nested-items sequences)) 11 | co-occurrences (c/count-assocs-items 2 sequences)] 12 | ;; co-occurrences keys are sets, graph edges are vecs 13 | (-> (apply g/graph (map (comp vec key) co-occurrences)) 14 | (g/with-edges-features {:co-occur (fn [graph edge] (get co-occurrences (set edge)))}) 15 | (g/with-nodes-features {:occur (fn [graph node] (get @occurrences node))})))) 16 | 17 | ; ATTRIBUTES 18 | 19 | (def occur (partial g/node-attr :occur)) 20 | (def co-occur (partial g/edge-attr :co-occur)) 21 | -------------------------------------------------------------------------------- /src/euphony/structs/graph.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.structs.graph 2 | (:require [loom 3 | [alg :as Alg] 4 | [attr :as Attr] 5 | [graph :as Graph] 6 | [io :as Io]])) 7 | 8 | ; ALIASES 9 | 10 | ;; on graph 11 | (def nodes Graph/nodes) 12 | (def edges Alg/distinct-edges) 13 | (def has-node? Graph/has-node?) 14 | (def has-edge? Graph/has-edge?) 15 | (def remove-nodes Graph/remove-nodes) 16 | (def remove-edges Graph/remove-edges) 17 | (def components Alg/connected-components) 18 | 19 | ;; on node 20 | (def node-edges Graph/out-edges) 21 | (def node-degree Graph/out-degree) 22 | 23 | ;; on edge 24 | (def src Graph/src) 25 | (def dest Graph/dest) 26 | (def weight Graph/weight) 27 | 28 | ;; on attribute 29 | (def attrs Attr/attrs) 30 | (def add-attr Attr/add-attr) 31 | 32 | (defn node-attr 33 | "Get the attribute value of a node." 34 | [attribute graph node] 35 | (when (has-node? graph node) 36 | (Attr/attr graph node attribute))) 37 | 38 | (defn edge-attr 39 | "Get the attribute value of an edge." 40 | [attribute graph [head tail :as edge]] 41 | (when (has-edge? graph head tail) 42 | (Attr/attr graph head tail attribute))) 43 | 44 | ;; statistics 45 | (def loners Alg/loners) 46 | (def density Alg/density) 47 | 48 | ;; visualization 49 | (def view Io/view) 50 | 51 | ; CONSTRUCTORS 52 | 53 | (def graph Graph/graph) 54 | 55 | (defn- with-features 56 | "Template function to add element features as element attributes." 57 | [elements graph features] 58 | (reduce (fn [outer-graph element] 59 | (reduce (fn [inner-graph [feature-name feature-fn]] 60 | (add-attr inner-graph element feature-name 61 | (feature-fn inner-graph element))) 62 | outer-graph features)) 63 | graph (elements graph))) 64 | 65 | (def with-nodes-features (partial with-features nodes)) 66 | (def with-edges-features (partial with-features edges)) 67 | 68 | (defn with-same-attrs 69 | "Copy the attributes from a source graph to another graph." 70 | [source graph] (assoc graph :attrs (:attrs source))) 71 | 72 | (defn with-weight 73 | "Add a weight to each graph edge using a weight function." 74 | [graph weight-fn] 75 | ;; the weight of Loom graph cannot be changed. Thus, we need to copy attrs values. 76 | (letfn [(weighted-edge [graph f [head tail :as edge]] [head tail (f graph edge)])] 77 | (->> (edges graph) 78 | (map (partial weighted-edge graph weight-fn)) 79 | (apply Graph/weighted-graph) (with-same-attrs graph)))) 80 | 81 | ; SORTERS 82 | 83 | (defn sort-nodes-by-degree 84 | "Sort graph nodes by degree." 85 | [graph] 86 | (let [self-and-node-degree (juxt identity (partial node-degree graph))] 87 | (->> (nodes graph) (map self-and-node-degree) (sort-by second)))) 88 | 89 | (defn sort-edges-by-weight 90 | "Sort graph edges by weight." 91 | [graph] 92 | (let [self-and-edge-weight (juxt identity (partial weight graph))] 93 | (->> (edges graph) (map self-and-edge-weight) (sort-by second)))) 94 | 95 | (defn- sort-elements-by-attribute 96 | "Template function that returns elements sorted by the given attribute." 97 | [elements attribute graph] 98 | (let [self-and-attributes (juxt identity (partial attrs graph))] 99 | (->> (elements graph) (map self-and-attributes) (sort-by (comp attribute second))))) 100 | 101 | (def sort-nodes-by-attr (partial sort-elements-by-attribute nodes)) 102 | (def sort-edges-by-attr (partial sort-elements-by-attribute edges)) 103 | 104 | ; SELECTORS 105 | 106 | (defn where-node-degree 107 | "Build a selector based on node degree." 108 | [pred value] 109 | (fn [graph node] 110 | (pred (node-degree graph node) value))) 111 | 112 | (defn where-edge-weight 113 | "Build a selector based on edge weight." 114 | [pred value] 115 | (fn [graph edge] 116 | (pred (weight graph edge) value))) 117 | 118 | (defn where-node-attr 119 | "Build a selector based on node attribute." 120 | [attribute pred value] 121 | (fn [graph node] 122 | (pred (node-attr attribute graph node) value))) 123 | 124 | (defn where-edge-attr 125 | "Build a selector based on edge attribute." 126 | [attribute pred value] 127 | (fn [graph edge] 128 | (pred (edge-attr attribute graph edge) value))) 129 | 130 | ; SELECTERS 131 | 132 | (defn select-node 133 | "Keep nodes where: (pred graph node) is true." 134 | [pred graph] 135 | (->> (nodes graph) 136 | (remove (partial pred graph)) 137 | (apply remove-nodes graph))) 138 | 139 | (defn select-edge 140 | "Keep edges where: (pred graph edge) is true." 141 | [pred graph] 142 | (->> (edges graph) 143 | (remove (partial pred graph)) 144 | (apply remove-edges graph))) 145 | 146 | ; ALGORITHMS 147 | 148 | (defn prune 149 | "Transform a graph into a Minimum Spanning Tree (MST)." 150 | [graph] (with-same-attrs graph (Alg/prim-mst graph))) 151 | 152 | (defn trim 153 | "Divide a graph into sub-graphs based on its edge weights." 154 | [graph threshold] (select-edge (where-edge-weight <= threshold) graph)) 155 | 156 | (defn cluster 157 | "Cluster a graph into components based on a weight threshold." 158 | [graph threshold] (-> graph prune (trim threshold) components)) 159 | -------------------------------------------------------------------------------- /src/euphony/structs/label.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.structs.label 2 | (:require [clojure.string :as Str] 3 | [instaparse.core :as insta])) 4 | 5 | ; CONSTANTS 6 | 7 | (defonce GRAMMAR 8 | (insta/parser 9 | "LABEL=(w|s)+ 10 | w=#'\\p{Alnum}+' 11 | s=#'[\\p{Punct}\\p{Blank}]+'")) 12 | 13 | (defonce FIELDS #{:P :T :N :I}) 14 | 15 | ; HELPERS 16 | 17 | (def ^:private my-get (fn [key obj] (get obj key))) 18 | 19 | ; WORDS 20 | 21 | (defn word-name? 22 | "Check if a a word is a possible malware name." 23 | [word] {:pre [(string? word)]} (boolean (re-matches #"\p{Alpha}{3,}" word))) 24 | 25 | (defn word-type? 26 | "Check if a word is a possible malware type." 27 | [word] {:pre [(string? word)]} (boolean (re-matches #"\p{Alpha}{2,}" word))) 28 | 29 | (defn word-platform? 30 | "Check if a word is a possible malware platform." 31 | [word] {:pre [(string? word)]} (boolean (and (re-matches #"\p{Alnum}{2,}" word) 32 | (re-find #"\p{Alpha}" word)))) 33 | 34 | (defn word-information? 35 | "Check if a word is a possible malware information." 36 | [word] {:pre [(string? word)]} (boolean (re-matches #"\p{Alnum}+" word))) 37 | 38 | (def words-concat (partial Str/join "++")) 39 | 40 | ; TOKENS 41 | 42 | (defonce TOKEN-SYM-KEY 0) 43 | (defonce TOKEN-TEXT-KEY 1) 44 | (defonce TOKEN-FIELDS-KEY 2) 45 | (def token-sym (partial my-get TOKEN-SYM-KEY)) 46 | (def token-text (partial my-get TOKEN-TEXT-KEY)) 47 | (def token-fields (partial my-get TOKEN-FIELDS-KEY)) 48 | (def token-parts (juxt token-sym token-text token-fields)) 49 | 50 | (defn- token-test-on 51 | "Select a part of a token and test it with a predicate function." 52 | [selector pred token] (boolean (when-let [part (selector token)] (pred part)))) 53 | 54 | (def token-test-on-sym (partial token-test-on token-sym)) 55 | (def token-test-on-text (partial token-test-on token-text)) 56 | (def token-test-on-fields (partial token-test-on token-fields)) 57 | 58 | (def token-sep? (partial token-test-on-sym #(= % :s))) 59 | (def token-word? (partial token-test-on-sym #(= % :w))) 60 | 61 | (defn token-is-this-field? 62 | "Test if the token fields is the given field." 63 | [field token] (token-test-on-fields #(= % #{field}) token)) 64 | 65 | (defn token-has-same-fields? 66 | "Test if the token fields are the same as the given fields." 67 | [fields token] (token-test-on-fields #(= % fields) token)) 68 | 69 | (defn token-contains-field? 70 | "Test if the token fields contains the given field." 71 | [field token] (token-test-on-fields #(contains? % field) token)) 72 | 73 | (defn token-has-one-field? 74 | "Test if there is a single token field." 75 | [token] (token-test-on-fields #(= (count %) 1) token)) 76 | 77 | (defn token-has-many-fields? 78 | "Test if there is many token fields." 79 | [token] (token-test-on-fields #(> (count %) 1) token)) 80 | 81 | (defn tokens-assignment-complete? 82 | [tokens] (every? token-has-one-field? (filter token-word? tokens))) 83 | 84 | (defn token-set-fields 85 | "Set the fields of a token only if it's a word." 86 | [token fields] 87 | (cond 88 | (not (token-word? token)) token 89 | (not (every? FIELDS fields)) 90 | (throw (AssertionError. (str "unacceptable fields in: " fields))) 91 | :else (assoc token TOKEN-FIELDS-KEY fields))) 92 | 93 | (defn map-token 94 | "Apply f on a list of tokens. If f returns nil, leave the token unchanged." 95 | [pred f tokens] 96 | (map (fn [token] 97 | (if-not (pred token) token 98 | (if-let [new-token (f token)] new-token token))) 99 | tokens)) 100 | 101 | (def map-token-sep (partial map-token token-sep?)) 102 | (def map-token-word (partial map-token token-word?)) 103 | 104 | (defn tokenize-with 105 | "Split a label string into word and separator tokens." 106 | [grammar label] {:pre [(string? label)]} 107 | (rest (insta/parse grammar label))) 108 | 109 | (defn domainize-with 110 | "Set the initial fields for every word in a token list." 111 | [domain tokens] 112 | (letfn [(text->fields [text] 113 | (reduce (fn [fields [field pred]] 114 | (if-not (pred text) fields 115 | (conj fields field))) 116 | (hash-set) domain)) 117 | (with-fields [token] 118 | (if (token-sep? token) token 119 | (token-set-fields token (text->fields (token-text token)))))] 120 | (map with-fields tokens))) 121 | 122 | (defonce DOMAIN {:N word-name? 123 | :T word-type? 124 | :P word-platform? 125 | :I word-information?}) 126 | 127 | (def tokenize (partial tokenize-with GRAMMAR)) 128 | (def domainize (partial domainize-with DOMAIN)) 129 | (def tokenize-and-domainize (comp domainize tokenize)) 130 | 131 | ; LABELS 132 | 133 | (defn label-valid? 134 | "Check if a label contains invalid characters." 135 | [label] (boolean (and (string? label) 136 | (not (Str/blank? label)) 137 | (re-find #"[\p{Alnum}]" label) 138 | (not (re-find #"[^\p{Alnum}\p{Blank}\p{Punct}]" label))))) 139 | 140 | (defn label-find-words-by-regexp [re label] (->> (re-seq re label) (map second) set)) 141 | (def label-words-between-parenthesis (partial label-find-words-by-regexp #"\((\p{Alnum}+)\)")) 142 | (def label-words-between-square-brackets (partial label-find-words-by-regexp #"\[(\p{Alnum}+)\]")) 143 | 144 | ; RESULTS 145 | 146 | (def av first) 147 | (def label second) 148 | 149 | (defn result->id [[av label]] 150 | {:pre [(not (Str/blank? av)) (not (Str/blank? label))]} 151 | (str (Str/lower-case av) "++" (Str/lower-case label))) 152 | ; PATTERNS 153 | 154 | (def pattern-word? (comp boolean (partial re-matches #"\p{Alpha}"))) 155 | (def pattern-sep? (comp boolean (partial re-matches #"[\p{Blank}\p{Punct}]"))) 156 | 157 | (def pattern-parts (comp (partial map token-text) tokenize)) 158 | (def pattern-words (comp (partial filter pattern-word?) pattern-parts)) 159 | (def pattern-keywords (comp (partial map keyword) pattern-words)) 160 | 161 | (defn pattern-compatible? 162 | "Test if a list of tokens is compatible with the given field pattern." 163 | [f-pattern tokens] 164 | (let [pattern-fs (->> f-pattern pattern-keywords) 165 | token-fs (->> tokens (filter token-word?) (map token-fields))] 166 | (if (not= (count token-fs) (count pattern-fs)) false 167 | (every? (fn [[tf pf]] (contains? tf pf)) 168 | (map vector token-fs pattern-fs))))) 169 | 170 | (defn w-patternize 171 | "Transform a label to a general pattern based on its words and separators." 172 | [tokens] 173 | (letfn [(token->w-part [token] 174 | (let [sym (token-sym token)] 175 | (case sym 176 | :s (token-text token) 177 | :w (name (token-sym token)) 178 | (throw (AssertionError. (str sym " is not a valid token symbol."))))))] 179 | (apply str (map token->w-part tokens)))) 180 | 181 | (defn f-patternize 182 | "Transform a label to a general pattern based on its fields and separators." 183 | [tokens] 184 | (letfn [(field->f-part [field] 185 | (assert (FIELDS field)) 186 | (name field)) 187 | (token->f-part [token] 188 | (case (token-sym token) 189 | :s (token-text token) 190 | :w (let [fields (token-fields token)] 191 | (assert (= (count fields) 1)) 192 | (field->f-part (first fields)))))] 193 | (apply str (map token->f-part tokens)))) 194 | 195 | (defn pattern-set-tokens-fields 196 | "Set the fields of a token list from a field pattern." 197 | [f-pattern tokens] 198 | (if-not (pattern-compatible? f-pattern tokens) tokens 199 | (map (fn [token part] 200 | (if (token-sep? token) token 201 | (token-set-fields token #{(keyword part)}))) 202 | tokens (pattern-parts f-pattern)))) 203 | -------------------------------------------------------------------------------- /src/euphony/structs/pqueue.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.structs.pqueue 2 | (:require [clojure.data.priority-map :refer [priority-map]])) 3 | 4 | ; CONSTRUCTORS 5 | 6 | (defn pqueue 7 | "Construct a priority queue from a list of elements and priorities." 8 | ([elements priorities] (pqueue (zipmap elements priorities))) 9 | ([associations] (into (priority-map) associations))) 10 | -------------------------------------------------------------------------------- /src/euphony/system.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.system 2 | (:require [clojure.java.io :as jio] 3 | [com.stuartsierra.component :as component] 4 | [euphony.components.datomic :refer [new-datomic]])) 5 | 6 | ; DEFAULTS 7 | 8 | (def CONF {:datomic {:schema-file (jio/resource "schema.edn") 9 | :seeds-file (jio/resource "seeds.edn") 10 | :uri "datomic:mem://euphony"}}) 11 | 12 | ; ALIASES 13 | 14 | (def start component/start-system) 15 | (def stop component/stop-system) 16 | 17 | ; CONSTRUCTORS 18 | 19 | (defn- components 20 | [conf] 21 | (component/system-map 22 | :datomic (new-datomic (:datomic conf)))) 23 | 24 | (defn- dependencies 25 | [conf] {}) 26 | 27 | (defn new-system 28 | [conf] 29 | (let [conf (merge-with merge CONF conf)] 30 | (component/system-using (components conf) 31 | (dependencies conf)))) 32 | 33 | (defmacro with-system [[binding conf] & body] 34 | `(let [~binding (new-system ~conf) 35 | ~binding (start ~binding)] 36 | (try 37 | ~@body 38 | (finally 39 | (stop ~binding))))) 40 | -------------------------------------------------------------------------------- /src/euphony/utils/cli.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.utils.cli 2 | (:require [clojure.string :as Str] 3 | [clojure.tools.cli :refer [parse-opts]])) 4 | 5 | ; MESSAGE FUNCTIONS 6 | 7 | (defn argc=-error 8 | "Build an error message about the number of command-line arguments." 9 | [argc] (str "Failed to start: except exactly " argc " arguments.")) 10 | 11 | (defn error-message 12 | "Build a single error message from a list of errors." 13 | [errors] (str "ERRORS:" \newline (Str/join \newline errors) \newline)) 14 | 15 | 16 | (defn usage-message 17 | "Build a usage message from a usage summary and an option summary." 18 | [usage options] (str "USAGE: " usage \newline \newline 19 | "OPTIONS: " \newline options)) 20 | 21 | ; MAIN FUNCTIONS 22 | 23 | (defn parse 24 | "Parse a list of command line arguments given the possible options." 25 | [args options & {:keys [argc=]}] 26 | (let [result (parse-opts args options)] 27 | (cond-> result 28 | ;; check that the number of arguments matches argc= (optional) 29 | (and argc= (not= (count (:arguments result)) argc=)) (update :errors conj (argc=-error argc=))))) 30 | 31 | (defn feedback! 32 | "Display a feedback message to the user." 33 | ([usage options] 34 | (println (usage-message usage options))) 35 | ([usage options errors] 36 | (binding [*out* *err*] 37 | (when (not-empty errors) 38 | (println (error-message errors)))) 39 | (feedback! usage options))) 40 | -------------------------------------------------------------------------------- /src/euphony/utils/db.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.utils.db 2 | (:require [datomic.api :as d])) 3 | 4 | ; MAIN FUNCTIONS 5 | 6 | (def q d/q) 7 | (def db d/db) 8 | (def pull d/pull) 9 | (def entity d/entity) 10 | (def with (comp :db-after d/with)) 11 | 12 | (defn transact [conn tx-data] 13 | (d/transact conn tx-data) 14 | conn) 15 | -------------------------------------------------------------------------------- /src/euphony/utils/io.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.utils.io 2 | (:require [cheshire.core :as json] 3 | [clojure.edn :as edn] 4 | [clojure.java.io :as jio] 5 | [loom.io :as gio])) 6 | 7 | ; SYSTEM 8 | 9 | (defn filepath [dir file] 10 | (jio/file dir file)) 11 | 12 | (defn mkdir! [dir] 13 | (jio/make-parents dir) 14 | (.mkdir (jio/as-file dir))) 15 | 16 | ; PARSERS 17 | 18 | (def json-line->struct json/parse-string) 19 | 20 | ; READERS 21 | 22 | (defn read-edn! [path] 23 | (with-open [r (java.io.PushbackReader. (jio/reader path))] 24 | (edn/read r))) 25 | 26 | (defn read-json! [path] 27 | (with-open [r (jio/reader path)] 28 | (json/parse-stream r))) 29 | 30 | ; WRITERS 31 | 32 | (defn write-dot! [path graph] 33 | (with-open [w (jio/writer path)] 34 | (.write w (gio/dot-str graph)))) 35 | 36 | (defn write-json! [path structure] 37 | (with-open [w (jio/writer path)] 38 | (json/generate-stream structure w {:pretty true}))) 39 | -------------------------------------------------------------------------------- /src/euphony/utils/log.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.utils.log 2 | (:require [taoensso.timbre :as log])) 3 | 4 | ; DEFAULTS 5 | 6 | (def LEVELS #{:debug :info :warn :error :fatal}) 7 | 8 | ; CONFIGURATIONS 9 | 10 | (def set-level! log/set-level!) 11 | 12 | (defmacro with-level [level & body] 13 | `(binding [log/*config* (assoc log/*config* :level ~level)] 14 | ~@body)) 15 | 16 | ; MAIN FUNCTIONS 17 | 18 | (defmacro log [level & args] 19 | `(log/log ~level ~@args)) 20 | -------------------------------------------------------------------------------- /target/uberjar/euphony.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fmind/euphony/acd4b7efbebd5e11d8b69f87ca7194b945c41e1c/target/uberjar/euphony.jar -------------------------------------------------------------------------------- /target/uberjar/lib-euphony.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fmind/euphony/acd4b7efbebd5e11d8b69f87ca7194b945c41e1c/target/uberjar/lib-euphony.jar -------------------------------------------------------------------------------- /test/data/cluster-datoms.edn: -------------------------------------------------------------------------------- 1 | [ 2 | {:db/id [:antivirus.result/id "av1++ads.adrd"] :antivirus.result/name-cluster "adrd" :antivirus.result/type-cluster "adware"} 3 | {:db/id [:antivirus.result/id "av1++trj.dogwin"] :antivirus.result/name-cluster "dogwin" :antivirus.result/type-cluster "trojan"} 4 | {:db/id [:antivirus.result/id "av2++adware.pjapps.a"] :antivirus.result/name-cluster "adrd" :antivirus.result/type-cluster "adware"} 5 | {:db/id [:antivirus.result/id "av2++adware.pjapps.b"] :antivirus.result/name-cluster "adrd" :antivirus.result/type-cluster "adware"} 6 | {:db/id [:antivirus.result/id "av2++trojan.koguo.a"] :antivirus.result/name-cluster "dogwin" :antivirus.result/type-cluster "trojan"} 7 | {:db/id [:antivirus.result/id "av2++trojan.dogwin.a"] :antivirus.result/name-cluster "dogwin" :antivirus.result/type-cluster "trojan"} 8 | {:db/id [:antivirus.result/id "av2++trojan.dogwin.b"] :antivirus.result/name-cluster "dogwin" :antivirus.result/type-cluster "trojan"} 9 | {:db/id [:antivirus.result/id "av3++android.adrd.1"] :antivirus.result/name-cluster "adrd":antivirus.result/plat-cluster "android"} 10 | {:db/id [:antivirus.result/id "av3++android.koguo.1"] :antivirus.result/name-cluster "dogwin":antivirus.result/plat-cluster "android"} 11 | {:db/id [:antivirus.result/id "av3++android.pjapps.1"] :antivirus.result/name-cluster "pjapps":antivirus.result/plat-cluster "android"} 12 | {:db/id [:antivirus.result/id "av4++android.koguo.1"] :antivirus.result/name-cluster "koguo"} 13 | {:db/id [:antivirus.result/id "av4++trojan.dogwin.a"] :antivirus.result/name-cluster "dogwin"} 14 | {:db/id [:antivirus.result/id "av5++generic"] :antivirus.result/name-cluster "generic"} 15 | ] 16 | -------------------------------------------------------------------------------- /test/data/import-datoms.edn: -------------------------------------------------------------------------------- 1 | [ 2 | ;; ANTIVIRUS 3 | {:db/id "$av1" :antivirus.system/name "av1"} 4 | {:db/id "$av2" :antivirus.system/name "av2"} 5 | {:db/id "$av3" :antivirus.system/name "av3"} 6 | {:db/id "$av4" :antivirus.system/name "av4"} 7 | {:db/id "$av5" :antivirus.system/name "av5"} 8 | ;; LABELS 9 | {:db/id "$ads.adrd" :antivirus.label/label "ads.adrd"} 10 | {:db/id "$adware.pjapps.a" :antivirus.label/label "adware.pjapps.a"} 11 | {:db/id "$adware.pjapps.b" :antivirus.label/label "adware.pjapps.b"} 12 | {:db/id "$andr.adrd" :antivirus.label/label "andr.adrd"} 13 | {:db/id "$android.adrd.1" :antivirus.label/label "android.adrd.1"} 14 | {:db/id "$android.koguo.1" :antivirus.label/label "android.koguo.1"} 15 | {:db/id "$android.pjapps.1" :antivirus.label/label "android.pjapps.1"} 16 | {:db/id "$dogwin" :antivirus.label/label "dogwin"} 17 | {:db/id "$dogwin.b" :antivirus.label/label "dogwin.b"} 18 | {:db/id "$generic" :antivirus.label/label "generic"} 19 | {:db/id "$trj.dogwin" :antivirus.label/label "trj.dogwin"} 20 | {:db/id "$trojan.dogwin.a" :antivirus.label/label "trojan.dogwin.a"} 21 | {:db/id "$trojan.dogwin.b" :antivirus.label/label "trojan.dogwin.b"} 22 | {:db/id "$trojan.koguo.a" :antivirus.label/label "trojan.koguo.a"} 23 | ;; ;; RESULTS 24 | {:db/id "$av1++ads.adrd" :antivirus.result/id "av1++ads.adrd" :antivirus.result/system "$av1" :antivirus.result/label "$ads.adrd"} 25 | {:db/id "$av1++trj.dogwin" :antivirus.result/id "av1++trj.dogwin" :antivirus.result/system "$av1" :antivirus.result/label "$trj.dogwin"} 26 | {:db/id "$av2++adware.pjapps.a" :antivirus.result/id "av2++adware.pjapps.a" :antivirus.result/system "$av2" :antivirus.result/label "$adware.pjapps.a"} 27 | {:db/id "$av2++adware.pjapps.b" :antivirus.result/id "av2++adware.pjapps.b" :antivirus.result/system "$av2" :antivirus.result/label "$adware.pjapps.b"} 28 | {:db/id "$av2++trojan.dogwin.a" :antivirus.result/id "av2++trojan.dogwin.a" :antivirus.result/system "$av2" :antivirus.result/label "$trojan.dogwin.a"} 29 | {:db/id "$av2++trojan.dogwin.b" :antivirus.result/id "av2++trojan.dogwin.b" :antivirus.result/system "$av2" :antivirus.result/label "$trojan.dogwin.b"} 30 | {:db/id "$av2++trojan.koguo.a" :antivirus.result/id "av2++trojan.koguo.a" :antivirus.result/system "$av2" :antivirus.result/label "$trojan.koguo.a"} 31 | {:db/id "$av3++android.adrd.1" :antivirus.result/id "av3++android.adrd.1" :antivirus.result/system "$av3" :antivirus.result/label "$android.adrd.1"} 32 | {:db/id "$av3++android.koguo.1" :antivirus.result/id "av3++android.koguo.1" :antivirus.result/system "$av3" :antivirus.result/label "$android.koguo.1"} 33 | {:db/id "$av3++android.pjapps.1" :antivirus.result/id "av3++android.pjapps.1" :antivirus.result/system "$av3" :antivirus.result/label "$android.pjapps.1"} 34 | {:db/id "$av3++dogwin" :antivirus.result/id "av3++dogwin" :antivirus.result/system "$av3" :antivirus.result/label "$dogwin"} 35 | {:db/id "$av4++andr.adrd" :antivirus.result/id "av4++andr.adrd" :antivirus.result/system "$av4" :antivirus.result/label "$andr.adrd"} 36 | {:db/id "$av4++android.koguo.1" :antivirus.result/id "av4++android.koguo.1" :antivirus.result/system "$av4" :antivirus.result/label "$android.koguo.1"} 37 | {:db/id "$av4++dogwin.b" :antivirus.result/id "av4++dogwin.b" :antivirus.result/system "$av4" :antivirus.result/label "$dogwin.b"} 38 | {:db/id "$av4++trojan.dogwin.a" :antivirus.result/id "av4++trojan.dogwin.a" :antivirus.result/system "$av4" :antivirus.result/label "$trojan.dogwin.a"} 39 | {:db/id "$av5++generic" :antivirus.result/id "av5++generic" :antivirus.result/system "$av5" :antivirus.result/label "$generic"} 40 | ;; SCANS 41 | {:db/id "$s1" :antivirus.scan/id "s1" :antivirus.scan/date #inst "2017" 42 | :antivirus.scan/results ["$av1++trj.dogwin", "$av2++trojan.dogwin.a", "$av3++android.koguo.1", "$av4++android.koguo.1", "$av5++generic"]} 43 | {:db/id "$s2" :antivirus.scan/id "s2" :antivirus.scan/date #inst "2017" 44 | :antivirus.scan/results ["$av1++trj.dogwin", "$av2++trojan.koguo.a", "$av3++android.koguo.1", , "$av5++generic"]} 45 | {:db/id "$s3" :antivirus.scan/id "s3" :antivirus.scan/date #inst "2017" 46 | :antivirus.scan/results ["$av1++trj.dogwin", "$av2++trojan.dogwin.b", "$av3++dogwin", "$av4++dogwin.b", ]} 47 | {:db/id "$s4" :antivirus.scan/id "s4" :antivirus.scan/date #inst "2017" 48 | :antivirus.scan/results ["$av1++ads.adrd", "$av2++adware.pjapps.a", "$av3++android.pjapps.1", "$av4++trojan.dogwin.a", "$av5++generic"]} 49 | {:db/id "$s5" :antivirus.scan/id "s5" :antivirus.scan/date #inst "2017" 50 | :antivirus.scan/results ["$av1++ads.adrd", "$av2++adware.pjapps.b", "$av3++android.adrd.1", "$av4++andr.adrd", "$av5++generic"]} 51 | ;; REPORTS 52 | {:antivirus.report/resource "f1" :antivirus.report/scan "$s1"} 53 | {:antivirus.report/resource "f2" :antivirus.report/scan "$s2"} 54 | {:antivirus.report/resource "f3" :antivirus.report/scan "$s3"} 55 | {:antivirus.report/resource "f4" :antivirus.report/scan "$s4"} 56 | {:antivirus.report/resource "f5" :antivirus.report/scan "$s5"} 57 | ;; TRUTH 58 | {:ground-truth/resource "f1" :ground-truth/name "dogwin" :ground-truth/type "trojan" :ground-truth/plat "android"} 59 | {:ground-truth/resource "f2" :ground-truth/name "dogwin" :ground-truth/type "trojan" :ground-truth/plat "android"} 60 | {:ground-truth/resource "f3" :ground-truth/name "dogwin" :ground-truth/type "trojan" :ground-truth/plat "android"} 61 | {:ground-truth/resource "f4" :ground-truth/name "adrd" :ground-truth/type "adware" :ground-truth/plat "android"} 62 | {:ground-truth/resource "f5" :ground-truth/name "adrd" :ground-truth/type "adware" :ground-truth/plat "android"} 63 | ] 64 | -------------------------------------------------------------------------------- /test/data/parse-datoms.edn: -------------------------------------------------------------------------------- 1 | [ 2 | {:db/id [:antivirus.label/label "ads.adrd"] :antivirus.label/words-pattern "w.w" :antivirus.label/fields-pattern "T.N" :antivirus.label/name-part "adrd" :antivirus.label/type-part "ads"} 3 | {:db/id [:antivirus.label/label "adware.pjapps.a"] :antivirus.label/words-pattern "w.w.w" :antivirus.label/fields-pattern "T.N.I" :antivirus.label/name-part "pjapps" :antivirus.label/type-part "adware"} 4 | {:db/id [:antivirus.label/label "adware.pjapps.b"] :antivirus.label/words-pattern "w.w.w" :antivirus.label/fields-pattern "T.N.I" :antivirus.label/name-part "pjapps" :antivirus.label/type-part "adware"} 5 | {:db/id [:antivirus.label/label "andr.adrd"] :antivirus.label/words-pattern "w.w" :antivirus.label/fields-pattern "P.N" :antivirus.label/name-part "adrd" :antivirus.label/plat-part "andr"} 6 | {:db/id [:antivirus.label/label "android.adrd.1"] :antivirus.label/words-pattern "w.w.w" :antivirus.label/fields-pattern "P.N.I" :antivirus.label/name-part "adrd" :antivirus.label/plat-part "android"} 7 | {:db/id [:antivirus.label/label "android.koguo.1"] :antivirus.label/words-pattern "w.w.w" :antivirus.label/fields-pattern "P.N.I" :antivirus.label/name-part "koguo" :antivirus.label/plat-part "android"} 8 | {:db/id [:antivirus.label/label "android.pjapps.1"] :antivirus.label/words-pattern "w.w.w" :antivirus.label/fields-pattern "P.N.I" :antivirus.label/name-part "pjapps" :antivirus.label/plat-part "android"} 9 | {:db/id [:antivirus.label/label "dogwin"] :antivirus.label/words-pattern "w"} 10 | {:db/id [:antivirus.label/label "dogwin.b"] :antivirus.label/words-pattern "w.w"} 11 | {:db/id [:antivirus.label/label "generic"] :antivirus.label/words-pattern "w" :antivirus.label/fields-pattern "I" :antivirus.label/name-part "generic"} 12 | {:db/id [:antivirus.label/label "trj.dogwin"] :antivirus.label/words-pattern "w.w" :antivirus.label/fields-pattern "T.N" :antivirus.label/name-part "dogwin" :antivirus.label/type-part "trj"} 13 | {:db/id [:antivirus.label/label "trojan.dogwin.a"] :antivirus.label/words-pattern "w.w.w" :antivirus.label/fields-pattern "T.N.I" :antivirus.label/name-part "dogwin" :antivirus.label/type-part "trojan"} 14 | {:db/id [:antivirus.label/label "trojan.dogwin.b"] :antivirus.label/words-pattern "w.w.w" :antivirus.label/fields-pattern "T.N.I" :antivirus.label/name-part "dogwin" :antivirus.label/type-part "trojan"} 15 | {:db/id [:antivirus.label/label "trojan.koguo.a"] :antivirus.label/words-pattern "w.w.w" :antivirus.label/fields-pattern "T.N.I" :antivirus.label/name-part "koguo" :antivirus.label/type-part "trojan"} 16 | ] 17 | -------------------------------------------------------------------------------- /test/data/reports.vt: -------------------------------------------------------------------------------- 1 | {"positives": 2, "resource": "5e82d73a3b2d4df192d674729f9578c4081d5096d5e3641bf8b233e1bee248d4", "verbose_msg": "Scan finished, information embedded", "scans": {"NANO-Antivirus": {"result": null, "version": "1.0.38.8984", "detected": false, "update": "20160713"}, "AVware": {"result": "Trojan.AndroidOS.Generic.A", "version": "1.5.0.42", "detected": true, "update": "20160713"}, "ESET-NOD32": {"result": "Android/Adrd.A", "version": "13792", "detected": true, "update": "20160712"}}, "sha1": "09b143b430e836c513279c0209b7229a4d29a18c", "total": 55, "scan_id": "5e82d73a3b2d4df192d674729f9578c4081d5096d5e3641bf8b233e1bee248d4-1468430330", "permalink": "https://www.virustotal.com/file/5e82d73a3b2d4df192d674729f9578c4081d5096d5e3641bf8b233e1bee248d4/analysis/1468430330/", "sha256": "5e82d73a3b2d4df192d674729f9578c4081d5096d5e3641bf8b233e1bee248d4", "scan_date": "2016-07-13 17:18:50", "md5": "c05c25b769919fd7f1b12b4800e374b5", "response_code": 1} 2 | {"positives": 1, "resource": "2357651f3d15838330368dacf37252f1ff2362ce7fd84d42c175c4f3b65a8d8d", "verbose_msg": "Scan finished, information embedded", "scans": {"Tencent": {"result": "a.remote.adrd", "version": "1.0.0.1", "detected": true, "update": "20160707"}}, "sha1": "32cd5dbef434b926ce34e89f0d185fe8d1b5fdfb", "total": 54, "scan_id": "2357651f3d15838330368dacf37252f1ff2362ce7fd84d42c175c4f3b65a8d8d-1467894540", "permalink": "https://www.virustotal.com/file/2357651f3d15838330368dacf37252f1ff2362ce7fd84d42c175c4f3b65a8d8d/analysis/1467894540/", "sha256": "2357651f3d15838330368dacf37252f1ff2362ce7fd84d42c175c4f3b65a8d8d", "scan_date": "2016-07-07 12:29:00", "md5": "39c1bfbb62687e1b1d2bc4d273600448", "response_code": 1} 3 | {"positives": 0, "resource": "b15228ebb493aa324d38a9b567e41505fcc6b6e179cdc79a36ec93dd0252f675", "verbose_msg": "Scan finished, information embedded", "scans": {"Antiy-AVL": {"result": null, "version": "1.0.0.1", "detected": false, "update": "20160713"}}, "sha1": "40156a176bb4554853f767bb6647fd0ac1925eac", "total": 55, "scan_id": "b15228ebb493aa324d38a9b567e41505fcc6b6e179cdc79a36ec93dd0252f675-1468430609", "permalink": "https://www.virustotal.com/file/b15228ebb493aa324d38a9b567e41505fcc6b6e179cdc79a36ec93dd0252f675/analysis/1468430609/", "sha256": "b15228ebb493aa324d38a9b567e41505fcc6b6e179cdc79a36ec93dd0252f675", "scan_date": "2016-07-13 17:23:29", "md5": "5239221623bf45f742aab926273df4eb", "response_code": 1} 4 | -------------------------------------------------------------------------------- /test/data/truths.gt: -------------------------------------------------------------------------------- 1 | {"resource": "f63256cf4eef0a60fe56989b1474dd9b0b2bb580ce9fd262b18592bf0506f911", "name": "Adwo", "type": "adware", "platform": "android"} 2 | {"resource": "a9cbe3e3d446cea683c1e72f2994f40024afed1bb1186b27690ff21741046312", "name": "Dowgin", "type": "trojan", "platform": "linux"} 3 | {"resource": "9da56b0cb31d412a1ed20fb089f5364acf6b7c0a77c1774c202cd8ff6e13a1ad", "type": "ads"} 4 | {"resource": "a0196e43aaf90bef85b9661ec23037c79d246d94e1100192295079529d529d97", "name": "SINGLETON:a0196e43aaf90bef85b9661ec23037c79d246d94e1100192295079529d529d97", "platform": "android"} 5 | 6 | -------------------------------------------------------------------------------- /test/euphony/commands/analyzers_test.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.commands.analyzers-test 2 | (:require [clojure.test :as t] 3 | [euphony.commands.analyzers :refer :all])) 4 | 5 | (def TRUTHS {1 "A" 6 | 2 "A" 7 | 3 "A" 8 | 4 "B" 9 | 5 "B" 10 | 6 "C"}) 11 | 12 | (def PROPOS {1 "A" 13 | 2 "B" 14 | 3 "C" 15 | 4 "A" 16 | 5 "B" 17 | 6 "C"}) 18 | 19 | (def PARSE {"android.ddlight" 20 | [[:w "android" #{:P}] [:s "."] [:w "ddlight" #{:N}]], 21 | "elf/backdoor.tyip-" 22 | [[:w "elf" #{:I :P :N}] [:s "/"] [:w "backdoor" #{:T}] [:s "."] [:w "tyip" #{:I :P :N}] [:s "-"]], 23 | "trojan.pjapps.bdokty" 24 | [[:w "trojan" #{:T}] [:s "."] [:w "pjapps" #{:N}] [:s "."] [:w "bdokty" #{:I :P}]], 25 | "android/basebridge" 26 | [[:w "android" #{:P}] [:s "/"] [:w "basebridge" #{:N}]], 27 | "elf/trojan.fccn-10" 28 | [[:w "elf" #{:I :P :N}] [:s "/"] [:w "trojan" #{:T}] [:s "."] 29 | [:w "fccn" #{:I :P :N}] [:s "-"] [:w "10" #{:I}]]}) 30 | 31 | (def CLUSTER {["av1" "l1"] "c1" 32 | ["av1" "l2"] "c2" 33 | ["av2" "l1"] "c1" 34 | ["av2" "la"] "c2" 35 | ["av3" "la"] "c1"}) 36 | 37 | ; STATISTICS 38 | 39 | (t/deftest can-compute-malware-stats 40 | (t/is (= (malstats TRUTHS PROPOS) 41 | [{:resource 1, :truth "A", :label "A", :match? true} 42 | {:resource 2, :truth "A", :label "B", :match? false} 43 | {:resource 3, :truth "A", :label "C", :match? false} 44 | {:resource 4, :truth "B", :label "A", :match? false} 45 | {:resource 5, :truth "B", :label "B", :match? true} 46 | {:resource 6, :truth "C", :label "C", :match? true}]))) 47 | 48 | (t/deftest can-compute-family-stats 49 | (t/is (= (famstats TRUTHS PROPOS) 50 | [{:external? true, :family "A", :family-card 3, :inter 1, :match "C", :match-card 2} 51 | {:external? true, :family "B", :family-card 2, :inter 1, :match "B", :match-card 2} 52 | {:external? true, :family "C", :family-card 1, :inter 1, :match "C", :match-card 2} 53 | {:external? false, :family "A", :family-card 2, :inter 1, :match "B", :match-card 2} 54 | {:external? false, :family "B", :family-card 2, :inter 1, :match "B", :match-card 2} 55 | {:external? false, :family "C", :family-card 2, :inter 1, :match "C", :match-card 1}]))) 56 | 57 | ; DATA ANALYSIS 58 | 59 | (t/deftest can-analyze-parse 60 | (t/is (= (analyze-parse PARSE) 61 | {:labels 5, 62 | :with-family 3, 63 | :distinct-seps 3, 64 | :distinct-words 11, 65 | :ambiguous-words 4, 66 | :incomplete-assignments 3}))) 67 | 68 | (t/deftest can-analyze-cluster 69 | (t/is (= (analyze-cluster CLUSTER) 70 | {:antivirus 3 71 | :vendor-families 3 72 | :cluster-families 2}))) 73 | 74 | (t/deftest can-analyze-malstats-output 75 | (t/is (= (analyze-malstats (malstats TRUTHS PROPOS)) 76 | {:accuracy 1/2}))) 77 | 78 | (t/deftest can-analyze-famstats-output 79 | (t/is (= (analyze-famstats (famstats TRUTHS PROPOS)) 80 | {:proposed 3, :expected 3, :precision 1/2, :recall 1/2, :f1 1/2}))) 81 | -------------------------------------------------------------------------------- /test/euphony/commands/clusterer_test.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.commands.clusterer-test 2 | (:require [clojure.test :as t] 3 | [euphony.commands.clusterer :refer :all] 4 | [euphony.structs.graph :as g] 5 | [euphony.test-helpers :as th])) 6 | 7 | (def RESULTS '((["1" "java"] ["2" "java"] ["3" "java7"] ["4" "androjava"]) 8 | (["1" "java"] ["2" "jruby"] ["3" "ruby"] ["4" "generic"]) 9 | (["1" "java"] ["2" "java"] ["3" "java8"] ["4" "androjava"]) 10 | (["1" "c"] ["2" "csharp"] ["3" "csharpe"] ["4" "generic"]) 11 | (["1" "c"] ["2" "csharp"] ["3" "csharpe"]))) 12 | 13 | (def GRAPH (results-graph RESULTS)) 14 | (def OPTIONS {:threshold 0.055}) 15 | 16 | ; CONSTRUCTORS 17 | 18 | (t/deftest can-construct-results-graph 19 | (t/is (= (count (g/nodes GRAPH)) 11)) 20 | (t/is (= (count (g/edges GRAPH)) 21)) 21 | (t/is (th/set= (g/nodes GRAPH) 22 | #{["1" "c"] ["1" "java"] 23 | ["2" "csharp"] ["2" "java"] ["2" "jruby"] 24 | ["3" "csharpe"] ["3" "java7"] ["3" "java8"] ["3" "ruby"] 25 | ["4" "androjava"] ["4" "generic"]})) 26 | (t/is (th/set= (g/edges GRAPH) 27 | #{[["1" "c"] ["4" "generic"]] [["1" "java"] ["2" "jruby"]] 28 | [["1" "java"] ["4" "androjava"]] [["1" "java"] ["4" "generic"]] 29 | [["2" "csharp"] ["1" "c"]] [["2" "csharp"] ["3" "csharpe"]] [["2" "csharp"] ["4" "generic"]] 30 | [["2" "java"] ["1" "java"]] [["2" "java"] ["3" "java7"]] [["2" "java"] ["4" "androjava"]] 31 | [["2" "jruby"] ["4" "generic"]] 32 | [["3" "csharpe"] ["1" "c"]] [["3" "csharpe"] ["4" "generic"]] [["3" "java7"] ["1" "java"]] 33 | [["3" "java7"] ["4" "androjava"]] [["3" "java8"] ["1" "java"]] [["3" "java8"] ["2" "java"]] 34 | [["3" "java8"] ["4" "androjava"]] [["3" "ruby"] ["1" "java"]] [["3" "ruby"] ["2" "jruby"]] 35 | [["3" "ruby"] ["4" "generic"]]})) 36 | (t/is (every? (partial g/weight GRAPH) (g/edges GRAPH)))) 37 | 38 | ; MAIN FUNCTIONS 39 | 40 | (t/deftest can-cluster-results 41 | (let [mapping (results-clusters GRAPH OPTIONS)] 42 | (= mapping 43 | {["1" "c"] "c", 44 | ["1" "java"] "java", 45 | ["2" "csharp"] "c", 46 | ["2" "java"] "java", 47 | ["2" "jruby"] "jruby", 48 | ["3" "csharpe"] "c", 49 | ["3" "java7"] "java", 50 | ["3" "java8"] "java", 51 | ["3" "ruby"] "jruby", 52 | ["4" "androjava"] "java", 53 | ["4" "generic"] "generic"}))) 54 | -------------------------------------------------------------------------------- /test/euphony/commands/importer_test.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.commands.importer-test 2 | (:require [clojure.test :as t] 3 | [euphony.commands.importer :refer :all] 4 | [euphony.test-system :as ts])) 5 | 6 | (t/use-fixtures :once ts/with-conn-initial) 7 | 8 | (def TRUTHS-FILE "test/data/truths.gt") 9 | (def REPORTS-FILE "test/data/reports.vt") 10 | 11 | ; MAIN FUNCTIONS 12 | 13 | (t/deftest can-import-truths-file 14 | (t/is (some? (import-to-connection! truths-in-json TRUTHS-FILE ts/*conn-initial*))) 15 | (t/is (= (import-to-memory! truths-in-json TRUTHS-FILE) 16 | '[[{:ground-truth/resource "f63256cf4eef0a60fe56989b1474dd9b0b2bb580ce9fd262b18592bf0506f911", 17 | :ground-truth/name "adwo",:ground-truth/type "adware",:ground-truth/plat "android"}] 18 | [{:ground-truth/resource "a9cbe3e3d446cea683c1e72f2994f40024afed1bb1186b27690ff21741046312", 19 | :ground-truth/name "dowgin",:ground-truth/type "trojan",:ground-truth/plat "linux"}] 20 | [{:ground-truth/resource "9da56b0cb31d412a1ed20fb089f5364acf6b7c0a77c1774c202cd8ff6e13a1ad", 21 | :ground-truth/type "ads"}] 22 | [{:ground-truth/resource "a0196e43aaf90bef85b9661ec23037c79d246d94e1100192295079529d529d97", 23 | :ground-truth/name "singleton:a0196e43aaf90bef85b9661ec23037c79d246d94e1100192295079529d529d97", 24 | :ground-truth/plat "android"}] 25 | []]))) 26 | 27 | (t/deftest can-import-reports-file 28 | (t/is (some? (import-to-connection! reports-in-json REPORTS-FILE ts/*conn-initial*))) 29 | (t/is (= (import-to-memory! reports-in-json REPORTS-FILE) 30 | '[[{:db/id "avware", :antivirus.system/name "avware"} 31 | {:db/id "trojan.androidos.generic.a",:antivirus.label/label "trojan.androidos.generic.a"} 32 | {:db/id "avware++trojan.androidos.generic.a",:antivirus.result/id "avware++trojan.androidos.generic.a", 33 | :antivirus.result/system "avware",:antivirus.result/label "trojan.androidos.generic.a"} 34 | {:db/id "eset-nod32", :antivirus.system/name "eset-nod32"} 35 | {:db/id "android/adrd.a", :antivirus.label/label "android/adrd.a"} 36 | {:db/id "eset-nod32++android/adrd.a",:antivirus.result/id "eset-nod32++android/adrd.a", 37 | :antivirus.result/system "eset-nod32",:antivirus.result/label "android/adrd.a"} 38 | {:db/id "5e82d73a3b2d4df192d674729f9578c4081d5096d5e3641bf8b233e1bee248d4-1468430330", 39 | :antivirus.scan/id "5e82d73a3b2d4df192d674729f9578c4081d5096d5e3641bf8b233e1bee248d4-1468430330", 40 | :antivirus.scan/date #inst "2016-07-13T15:18:50.000-00:00", 41 | :antivirus.scan/results ("avware++trojan.androidos.generic.a" "eset-nod32++android/adrd.a")} 42 | {:antivirus.report/resource "5e82d73a3b2d4df192d674729f9578c4081d5096d5e3641bf8b233e1bee248d4", 43 | :antivirus.report/scan "5e82d73a3b2d4df192d674729f9578c4081d5096d5e3641bf8b233e1bee248d4-1468430330", 44 | :db/txInstant #inst "2016-07-13T15:18:50.000-00:00"}] 45 | [{:db/id "tencent", :antivirus.system/name "tencent"} 46 | {:db/id "a.remote.adrd", :antivirus.label/label "a.remote.adrd"} 47 | {:db/id "tencent++a.remote.adrd",:antivirus.result/id "tencent++a.remote.adrd", 48 | :antivirus.result/system "tencent",:antivirus.result/label "a.remote.adrd"} 49 | {:db/id "2357651f3d15838330368dacf37252f1ff2362ce7fd84d42c175c4f3b65a8d8d-1467894540", 50 | :antivirus.scan/id "2357651f3d15838330368dacf37252f1ff2362ce7fd84d42c175c4f3b65a8d8d-1467894540", 51 | :antivirus.scan/date #inst "2016-07-07T10:29:00.000-00:00", 52 | :antivirus.scan/results ("tencent++a.remote.adrd")} 53 | {:antivirus.report/resource "2357651f3d15838330368dacf37252f1ff2362ce7fd84d42c175c4f3b65a8d8d", 54 | :antivirus.report/scan "2357651f3d15838330368dacf37252f1ff2362ce7fd84d42c175c4f3b65a8d8d-1467894540", 55 | :db/txInstant #inst "2016-07-07T10:29:00.000-00:00"}] 56 | []]))) 57 | -------------------------------------------------------------------------------- /test/euphony/functions/counters_test.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.functions.counters-test 2 | (:require [clojure.test :as t] 3 | [euphony.functions.counters :refer :all])) 4 | 5 | (def SCALARS-1D '("A" "B" "C" "A" "B" "A")) 6 | 7 | (def VECTORS-1D '([1 "A"] [2 "B"] [3 "C"] [1 "A"] [2 "B"] [1 "A"])) 8 | 9 | (def SCALARS-2D '(["A" "B" "C"] 10 | ["A" "B"] 11 | ["A"])) 12 | 13 | (def VECTORS-2D '([[1 "A"] [2 "B"] [3 "C"]] 14 | [[1 "A"] [2 "B"]] 15 | [[1 "A"]])) 16 | 17 | ; MAIN FUNCTIONS 18 | 19 | (t/deftest can-count-flat-items 20 | (t/is (= (count-flat-items SCALARS-1D) {"A" 3, "B" 2, "C" 1})) 21 | (t/is (= (count-flat-items VECTORS-1D) {[1 "A"] 3, [2 "B"] 2, [3 "C"] 1}))) 22 | 23 | (t/deftest can-count-nested-items 24 | (t/is (= (count-nested-items SCALARS-2D) 25 | {"A" 3, "B" 2, "C" 1})) 26 | (t/is (= (count-nested-items VECTORS-2D) 27 | {[1 "A"] 3, [2 "B"] 2, [3 "C"] 1}))) 28 | 29 | (t/deftest can-count-assocs-items 30 | (t/is (= (count-assocs-items 2 SCALARS-2D) 31 | {#{"B" "A"} 2 32 | #{"C" "A"} 1 33 | #{"C" "B"} 1})) 34 | (t/is (= (count-assocs-items 2 VECTORS-2D) 35 | {#{[2 "B"] [1 "A"]} 2 36 | #{[1 "A"] [3 "C"]} 1 37 | #{[2 "B"] [3 "C"]} 1}))) 38 | -------------------------------------------------------------------------------- /test/euphony/functions/metrics_test.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.functions.metrics-test 2 | (:require [clojure.test :as t] 3 | [euphony.functions.metrics :refer :all])) 4 | 5 | ; SET METRICS 6 | 7 | (t/deftest can-compute-set-completeness 8 | (t/is (thrown? AssertionError (set-completeness 0 0 0))) 9 | (t/are [a b i, output] (= (set-completeness a b i) output) 10 | 5 5 5, 1 11 | 5 3 3, 1 12 | 3 5 3, 1 13 | 5 3 2, 2/3 14 | 3 5 2, 2/3 15 | 5 3 1, 1/3 16 | 3 5 1, 1/3 17 | 5 3 0, 0 18 | 3 5 0, 0 19 | 5 5 0, 0)) 20 | 21 | (t/deftest can-compute-set-granularity 22 | (t/is (thrown? AssertionError (set-granularity 0 0))) 23 | (t/are [a b, output] (= (set-granularity a b) output) 24 | 5 5, 1 25 | 5 3, 3/5 26 | 3 5, 3/5 27 | 5 1, 1/5 28 | 1 5, 1/5 29 | 0 5, 0 30 | 5 0, 0)) 31 | 32 | ; STRING METRICS 33 | 34 | (t/deftest can-compute-string-similarity 35 | (t/is (= (str-similarity "clojure" "clojure") 1.0)) 36 | (t/is (< 0.0 (str-similarity "clojure" "clojar") (str-similarity "clojure" "closure") 1.0)) 37 | (t/is (= (str-similarity "clojure" "php") 0.0))) 38 | -------------------------------------------------------------------------------- /test/euphony/functions/voters_test.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.functions.voters-test 2 | (:require [clojure.test :as t] 3 | [euphony.functions.voters :refer :all])) 4 | 5 | (defonce INDEX {"f1" ["dogwin", "dogwin", "dogwin", "generic", "koguo"] 6 | "f2" ["dogwin", "dogwin", "dogwin", "generic", ] 7 | "f3" ["dogwin", "generic", ] 8 | "f4" ["dogwin", "pjapps", "adrd", "adrd", "generic"] 9 | "f5" ["adrd", "adrd", "adrd", "generic", ]}) 10 | 11 | ; MAIN FUNCTIONS 12 | 13 | (t/deftest can-vote-from-index 14 | (t/is (= (vote INDEX) 15 | {"f1" {"dogwin" 3, "generic" 1, "koguo" 1}, 16 | "f2" {"dogwin" 3, "generic" 1}, 17 | "f3" {"dogwin" 1, "generic" 1}, 18 | "f4" {"dogwin" 1, "pjapps" 1, "adrd" 2, "generic" 1}, 19 | "f5" {"adrd" 3, "generic" 1}}))) 20 | 21 | (t/deftest can-elect-from-index 22 | (t/is (= (vote-and-elect INDEX) 23 | {"f1" "dogwin", 24 | "f2" "dogwin", 25 | "f3" "dogwin", 26 | "f4" "adrd", 27 | "f5" "adrd"}))) 28 | -------------------------------------------------------------------------------- /test/euphony/structs/cograph_test.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.structs.cograph-test 2 | (:require [clojure.test :as t] 3 | [euphony.structs 4 | [cograph :refer :all] 5 | [graph :as g]] 6 | [euphony.test-helpers :as th])) 7 | 8 | (defonce SEQUENCES-2 '(([1 :A] [2 :B] [3 :C]) 9 | ([1 :A] [2 :B] [3 :D]) 10 | ([1 :A] [2 :E] [3 :C]))) 11 | 12 | (def COGRAPH (cograph SEQUENCES-2)) 13 | 14 | ; CONSTRUCTORS 15 | 16 | (t/deftest can-construct-cograph 17 | (t/is (th/set= (g/nodes COGRAPH) #{[1 :A] [2 :B] [3 :C] [3 :D] [2 :E]})) 18 | (t/is (th/set= (g/edges COGRAPH) 19 | #{[[1 :A] [3 :C]] [[2 :B] [1 :A]] 20 | [[2 :B] [3 :C]] [[2 :E] [1 :A]] 21 | [[2 :E] [3 :C]] [[3 :D] [1 :A]] 22 | [[3 :D] [2 :B]]}))) 23 | 24 | ; GETTERS 25 | 26 | (t/deftest can-get-occurrence 27 | (t/are [node, output] (= (occur COGRAPH node) output) 28 | [1 :A], 3 29 | [2 :B], 2 30 | [3 :C], 2 31 | [3 :D], 1 32 | [2 :E], 1 33 | ;; do no exists 34 | [5 :Z], nil)) 35 | 36 | (t/deftest can-get-co-occurrence 37 | (t/are [edge, output] (= (co-occur COGRAPH edge) output) 38 | [[2 :B] [3 :C]], 1 39 | [[3 :C] [2 :B]], 1 40 | [[1 :A] [2 :B]], 2 41 | [[2 :B] [1 :A]], 2 42 | ;; same head and tail 43 | [[1 :A] [1 :A]], nil 44 | ;; do not exist 45 | [[2 :E] [3 :D]], nil 46 | [[3 :D] [2 :E]], nil)) 47 | -------------------------------------------------------------------------------- /test/euphony/structs/graph_test.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.structs.graph-test 2 | (:require [clojure.test :as t] 3 | [euphony.structs.graph :refer :all] 4 | [euphony.test-helpers :as th])) 5 | 6 | (defonce EDGES [[\A \B] [\A \C] 7 | [\A \D] [\A \E] 8 | [\B \C] [\C \E] 9 | [\D \B]]) 10 | 11 | (def GRAPH (-> (apply graph EDGES) 12 | (with-nodes-features {:n (fn [g n] (int n))}) 13 | (with-weight (fn [g [h t]] (+ (int h) (int t)))) 14 | (with-edges-features {:e (fn [g [h t]] (* (int h) (int t)))}))) 15 | 16 | ; CONSTRUCTORS 17 | 18 | (t/deftest can-construct-unweighted-graph 19 | (t/is (th/set= (nodes GRAPH) #{\A \B \C \D \E})) 20 | (t/is (th/set= (edges GRAPH) #{[\A \B] [\A \C] [\A \D] [\A \E] [\B \C] [\B \D] [\C \E]}))) 21 | 22 | (t/deftest can-construct-weighted-graph 23 | (t/are [edge, output] (= (weight GRAPH edge) output) 24 | [\B \C], 133 25 | [\C \B], 133 26 | [\A \B], 131 27 | [\B \A], 131 28 | ;; same head and tail 29 | [\A \A], nil 30 | ;; edges no not exist 31 | [\E \D], nil 32 | [\D \E], nil 33 | [\X \Y], nil)) 34 | 35 | ; SORTERS 36 | 37 | (t/deftest can-sort-nodes-by-degree 38 | (t/is (= (sort-nodes-by-degree GRAPH) 39 | [[\D 2] [\E 2] [\B 3] [\C 3] [\A 4]]))) 40 | 41 | (t/deftest can-sort-edges-by-weight 42 | (t/is (= (sort-edges-by-weight GRAPH) 43 | [[[\A \B] 131] 44 | [[\A \C] 132] 45 | [[\A \D] 133] 46 | [[\B \C] 133] 47 | [[\A \E] 134] 48 | [[\B \D] 134] 49 | [[\C \E] 136]]))) 50 | 51 | (t/deftest can-sort-nodes-by-attribute 52 | (t/is (= (sort-nodes-by-attr :n GRAPH) 53 | [[\A {:n 65}] [\B {:n 66}] [\C {:n 67}] [\D {:n 68}] [\E {:n 69}]]))) 54 | 55 | (t/deftest can-sort-edges-by-attribute 56 | (t/is (= (sort-edges-by-attr :e GRAPH) 57 | [[[\A \B] {:e 4290}] 58 | [[\A \C] {:e 4355}] 59 | [[\A \D] {:e 4420}] 60 | [[\B \C] {:e 4422}] 61 | [[\A \E] {:e 4485}] 62 | [[\B \D] {:e 4488}] 63 | [[\C \E] {:e 4623}]]))) 64 | 65 | ; SELECTERS 66 | 67 | (t/deftest can-select-by-node-degree 68 | (let [graph (select-node (where-node-degree >= 3) GRAPH)] 69 | (t/is (th/set= (nodes graph) #{\A \B \C})) 70 | (t/is (th/set= (edges graph) #{[\A \B] [\A \C] [\B \C]})))) 71 | 72 | (t/deftest can-select-by-edge-weight 73 | (let [graph (select-edge (where-edge-weight = 134) GRAPH)] 74 | (t/is (th/set= (nodes graph) (nodes GRAPH) #{\A \B \C \D \E})) 75 | (t/is (th/set= (edges graph) #{[\A \E] [\B \D]})))) 76 | 77 | (t/deftest can-select-nodes-by-attribute 78 | (let [graph (select-node (where-node-attr :n <= 67) GRAPH)] 79 | (t/is (th/set= (nodes graph) #{\A \B \C})) 80 | (t/is (th/set= (edges graph) #{[\A \B] [\A \C] [\B \C]})))) 81 | 82 | (t/deftest can-select-edges-by-attribute 83 | (let [graph (select-edge (where-edge-attr :e <= 4400) GRAPH)] 84 | (t/is (th/set= (nodes graph) (nodes GRAPH) #{\A \B \C \D \E})) 85 | (t/is (th/set= (edges graph) #{[\A \C] [\A \B]})))) 86 | 87 | ; ALGORITHMS 88 | 89 | (t/deftest can-prune-graph 90 | (let [graph (prune GRAPH)] 91 | (t/is (th/set= (nodes graph) (nodes GRAPH) #{\A \B \C \D \E})) 92 | (t/is (th/set= (edges graph) #{[\A \B] [\A \C] [\A \D] [\A \E]})))) 93 | 94 | (t/deftest can-trim-graph 95 | (let [graph (trim GRAPH 133)] 96 | (t/is (th/set= (nodes graph) (nodes GRAPH) #{\A \B \C \D \E})) 97 | (t/is (th/set= (edges graph) #{[\A \B] [\A \C] [\A \D] [\B \C]})))) 98 | 99 | (t/deftest can-cluster-graph 100 | (t/is (= (cluster GRAPH 132) [[\A \B \C] [\D] [\E]]))) 101 | -------------------------------------------------------------------------------- /test/euphony/structs/pqueue_test.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.structs.pqueue-test 2 | (:require [clojure.test :as t] 3 | [euphony.structs.pqueue :refer :all])) 4 | 5 | ; CONSTRUCTORS 6 | 7 | (t/deftest can-construct-priority-queue 8 | (let [q1 (pqueue {:a 5 :b 1 :c 2 :d 4 :e 3}) 9 | q2 (pqueue [:a :b :c :d :e] [5 1 2 4 3]) 10 | q3 (pqueue [[:a 5] [:b 1] [:c 2] [:d 4] [:e 3]])] 11 | (t/is (= q1 q2 q3 {:b 1 :c 2 :e 3 :d 4 :a 5})) 12 | (t/is (= (vals q1) (vals q2) (vals q3) [1 2 3 4 5])) 13 | (t/is (= (keys q1) (keys q2) (keys q3) [:b :c :e :d :a])))) 14 | -------------------------------------------------------------------------------- /test/euphony/test_helpers.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.test-helpers 2 | (:require [clojure.set :as Set])) 3 | 4 | (defn set= [& sets] 5 | (apply = (map set sets))) 6 | 7 | (defn subset? [a b] 8 | (Set/subset? (set a) (set b))) 9 | -------------------------------------------------------------------------------- /test/euphony/test_system.clj: -------------------------------------------------------------------------------- 1 | (ns euphony.test-system 2 | (:require [clojure.test :as t] 3 | [euphony.utils.db :as d] 4 | [euphony.system :as sys] 5 | [euphony.utils.io :as io] 6 | [clojure.java.io :as jio])) 7 | 8 | (def CONF {:datomic {:uri "datomic:mem://test" 9 | :reset-on-start true}}) 10 | 11 | (def PARSE-DATOMS "test/data/parse-datoms.edn") 12 | (def IMPORT-DATOMS "test/data/import-datoms.edn") 13 | (def CLUSTER-DATOMS "test/data/cluster-datoms.edn") 14 | (def RESULTS-DATOMS "test/data/results-datoms.edn") 15 | 16 | ; DYNAMIC VARS 17 | 18 | ;; main components 19 | (def ^:dynamic *sys*) 20 | 21 | ;; example datasets 22 | (def ^:dynamic *results*) 23 | 24 | ;; database + datoms 25 | (def ^:dynamic *conn-initial*) 26 | (def ^:dynamic *conn-after-parse*) 27 | (def ^:dynamic *conn-after-import*) 28 | (def ^:dynamic *conn-after-cluster*) 29 | (def ^:dynamic *conn-after-results*) 30 | 31 | ; FIXTURES FUNCTIONS 32 | 33 | (defn with-sys [f] 34 | (sys/with-system [system CONF] 35 | (binding [*sys* system] 36 | (f)))) 37 | 38 | (defn- conn-initial [f] 39 | (let [conn (-> *sys* :datomic :conn)] 40 | (binding [*conn-initial* conn] 41 | (f)))) 42 | 43 | (def with-conn-initial (t/compose-fixtures with-sys conn-initial)) 44 | 45 | (defn- after-import [f] 46 | (binding [*conn-after-import* (d/transact *conn-initial* (io/read-edn! IMPORT-DATOMS))] 47 | (f))) 48 | 49 | (def with-conn-after-import (t/compose-fixtures with-conn-initial after-import)) 50 | 51 | (defn- after-parse [f] 52 | (binding [*conn-after-parse* (d/transact *conn-after-import* (io/read-edn! PARSE-DATOMS))] 53 | (f))) 54 | 55 | (def with-conn-after-parse (t/compose-fixtures with-conn-after-import after-parse)) 56 | 57 | (defn- after-cluster [f] 58 | (binding [*conn-after-cluster* (d/transact *conn-after-parse* (io/read-edn! CLUSTER-DATOMS))] 59 | (f))) 60 | 61 | (def with-conn-after-cluster (t/compose-fixtures with-conn-after-parse after-cluster)) 62 | 63 | (defn with-results [f] 64 | (let [results-datoms (io/read-edn! RESULTS-DATOMS) 65 | to-tuple (juxt :antivirus.result/system :antivirus.result/label) 66 | results (->> results-datoms (filter :antivirus.result/label) (map to-tuple))] 67 | (binding [*results* results] 68 | (f)))) 69 | 70 | (defn- after-results [f] 71 | (let [results-datoms (io/read-edn! RESULTS-DATOMS)] 72 | (binding [*conn-after-results* (d/transact *conn-initial* results-datoms)] 73 | (f)))) 74 | 75 | (def with-conn-after-results (t/compose-fixtures with-conn-initial after-results)) 76 | --------------------------------------------------------------------------------