├── plugins ├── __init__.py ├── GeoIPASNum.dat ├── dns.yapsy-plugin ├── tld.yapsy-plugin ├── classify.yapsy-plugin ├── cymru.yapsy-plugin ├── generic.yapsy-plugin ├── ipwhois.yapsy-plugin ├── modularity.yapsy-plugin ├── maxmind.yapsy-plugin ├── path_count.yapsy-plugin ├── page_rank.yapsy-plugin ├── titan.yapsy-plugin ├── bayes_net.yapsy-plugin ├── networkx.yapsy-plugin ├── page_rank_2.yapsy-plugin ├── neo4j.yapsy-plugin ├── cymru_api.py ├── classify.py ├── generic.py ├── modularity.py ├── page_rank.py ├── page_rank_2.py ├── networkx.py ├── dns.py ├── maxmind.py ├── path_count.py ├── tld.py ├── cymru.py └── bayes_net.py ├── verum.cfg ├── minions ├── alexa_1M.yapsy-plugin ├── osint_bambenekconsulting_com.yapsy-plugin ├── osint_bambenekconsulting_com_v2.yapsy-plugin ├── edge_consolidator.yapsy-plugin ├── alexa_1M.py ├── osint_bambenekconsulting_com.py └── edge_consolidator.py ├── examples ├── plugin_template.yapsy-plugin └── plugin_template.py ├── .gitignore ├── verum ├── __init__.py └── helper.py ├── ui.py ├── README.md └── LICENSE /plugins/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'v685573' 2 | -------------------------------------------------------------------------------- /plugins/GeoIPASNum.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vz-risk/Verum/HEAD/plugins/GeoIPASNum.dat -------------------------------------------------------------------------------- /verum.cfg: -------------------------------------------------------------------------------- 1 | [CORE] 2 | ; Plugins Folder 3 | Plugins = "~/Documents/Deveopment/verum/plugins" 4 | ; Minions FOlder 5 | Minons = "~/Documents/Development/verum/minions" 6 | 7 | [LOGGING] 8 | level = debug 9 | log = none 10 | -------------------------------------------------------------------------------- /plugins/dns.yapsy-plugin: -------------------------------------------------------------------------------- 1 | [Core] 2 | Name = DNS Enrichment 3 | Module = dns 4 | 5 | [Documentation] 6 | Author = Gabriel Bassett 7 | Version = 0.1 8 | Website = https://github.com/vz-risk/Verum 9 | Description = Takes an IP string and returns the DNS resolved IP address as networkx graph. 10 | 11 | [Configuration] 12 | Type = enrichment 13 | Cost = 3 14 | Speed = 3 15 | Inputs = domain -------------------------------------------------------------------------------- /plugins/tld.yapsy-plugin: -------------------------------------------------------------------------------- 1 | [Core] 2 | Name = TLD Enrichment 3 | Module = tld 4 | 5 | [Documentation] 6 | Author = Gabriel Bassett 7 | Version = 0.1 8 | Website = https://github.com/vz-risk/Verum 9 | Description = Takes a domain name and returns the top level domain, mid-domain, and sub-domain as networkx graph. 10 | 11 | [Configuration] 12 | Type = enrichment 13 | Cost = 1 14 | Speed = 1 15 | Inputs = domain 16 | 17 | -------------------------------------------------------------------------------- /plugins/classify.yapsy-plugin: -------------------------------------------------------------------------------- 1 | [Core] 2 | Name = classify 3 | Module = classify 4 | 5 | [Documentation] 6 | Author = Gabriel Bassett 7 | Version = 0.1 8 | Website = https://github.com/vz-risk/Verum 9 | Description = Takes a target (key:value) and classification and returns a graph linking the two. 10 | 11 | [Configuration] 12 | Type = enrichment 13 | Inputs = any 14 | 15 | [Log] 16 | level = debug 17 | # file = ./logfile.log -------------------------------------------------------------------------------- /plugins/cymru.yapsy-plugin: -------------------------------------------------------------------------------- 1 | [Core] 2 | Name = Cymru Enrichment 3 | Module = cymru 4 | 5 | [Documentation] 6 | Author = Gabriel Bassett 7 | Version = 0.1 8 | Website = https://github.com/vz-risk/Verum 9 | Description = Takes a list of IPs and returns ASN and BGP information as networkx graph. 10 | 11 | [Configuration] 12 | Type = enrichment 13 | Cost = 5 14 | Speed = 4 15 | Inputs = ip 16 | Cymru_Module = ./cymru_api.py 17 | -------------------------------------------------------------------------------- /plugins/generic.yapsy-plugin: -------------------------------------------------------------------------------- 1 | [Core] 2 | Name = generic 3 | Module = generic 4 | 5 | [Documentation] 6 | Author = Gabriel Bassett 7 | Version = 0.1 8 | Website = https://github.com/vz-risk/Verum 9 | Description = Takes a described (key:value) and describing (key:value) and returns a graph linking the two. 10 | 11 | [Configuration] 12 | Type = enrichment 13 | Inputs = any 14 | 15 | [Log] 16 | level = debug 17 | # file = ./logfile.log -------------------------------------------------------------------------------- /plugins/ipwhois.yapsy-plugin: -------------------------------------------------------------------------------- 1 | [Core] 2 | Name = IP Whois Enrichment 3 | Module = ipwhois 4 | 5 | [Documentation] 6 | Author = Gabriel Bassett 7 | Version = 0.1 8 | Website = https://github.com/vz-risk/Verum 9 | Description = Takes a whois record as a list of strings in a specific format and returns a networkx graph of the information. 10 | 11 | [Configuration] 12 | Type = enrichment 13 | Cost = 3 14 | Speed = 2 15 | Inputs = domain -------------------------------------------------------------------------------- /plugins/modularity.yapsy-plugin: -------------------------------------------------------------------------------- 1 | [Core] 2 | Name = Modularity 3 | Module = modularity 4 | 5 | [Documentation] 6 | Author = Gabriel Bassett 7 | Version = 0.1 8 | Website = https://github.com/vz-risk/Verum 9 | Description = Takes a subgraph and modularity-based partitioning (clustering) of the subgraph 10 | 11 | [Configuration] 12 | Type = score 13 | cost = 2 14 | speed = 2 15 | 16 | [Log] 17 | level = debug 18 | # file = ./logfile.log -------------------------------------------------------------------------------- /plugins/maxmind.yapsy-plugin: -------------------------------------------------------------------------------- 1 | [Core] 2 | Name = Maxmind ASN Enrichment 3 | Module = maxmind 4 | 5 | [Documentation] 6 | Author = Gabriel Bassett 7 | Version = 0.1 8 | Website = https://github.com/vz-risk/Verum 9 | Description = Takes a domain name as string and returns the ASN of the IP address as networkx graph. 10 | 11 | 12 | [Configuration] 13 | Type = enrichment 14 | DAT_FILE = ./GeoIPASNum.dat 15 | Cost = 2 16 | Speed = 2 17 | Inputs = IP -------------------------------------------------------------------------------- /plugins/path_count.yapsy-plugin: -------------------------------------------------------------------------------- 1 | [Core] 2 | Name = PathCount 3 | Module = path_count 4 | 5 | [Documentation] 6 | Author = Gabriel Bassett 7 | Version = 0.1 8 | Website = https://github.com/vz-risk/Verum 9 | Description = Takes a subgraph and topic and scores nodes based on the number of paths between the topic and the node 10 | 11 | [Configuration] 12 | Type = score 13 | cost = 4 14 | speed = 6 15 | 16 | [Log] 17 | level = debug 18 | # file = ./logfile.log -------------------------------------------------------------------------------- /minions/alexa_1M.yapsy-plugin: -------------------------------------------------------------------------------- 1 | [Core] 2 | Name = Alexa Top 1M 3 | Module = alexa_1m 4 | 5 | [Documentation] 6 | Author = Gabriel Bassett 7 | Version = 0.1 8 | Website = https://github.com/vz-risk/Verum 9 | Description = Reads the alexa 1M and imports it into the intelligence graph. 10 | 11 | [Configuration] 12 | Type = minion 13 | Feed = http://s3.amazonaws.com/alexa-static/top-1m.csv.zip 14 | cost = 4 15 | 16 | [Log] 17 | level = debug 18 | # file = ./logfile.log -------------------------------------------------------------------------------- /plugins/page_rank.yapsy-plugin: -------------------------------------------------------------------------------- 1 | [Core] 2 | Name = PageRank 3 | Module = page_rank 4 | 5 | [Documentation] 6 | Author = Gabriel Bassett 7 | Version = 0.1 8 | Website = https://github.com/vz-risk/Verum 9 | Description = Takes a subgraph and topic and uses pagerank without any specific specialization to score the nodes in the subgraph. 10 | 11 | [Configuration] 12 | Type = score 13 | cost = 2 14 | speed = 2 15 | 16 | [Log] 17 | level = debug 18 | # file = ./logfile.log -------------------------------------------------------------------------------- /plugins/titan.yapsy-plugin: -------------------------------------------------------------------------------- 1 | [Core] 2 | Name = TitanDB 3 | Module = titan 4 | 5 | [Documentation] 6 | Author = Gabriel Bassett 7 | Version = 0.1 8 | Website = https://github.com/vz-risk/Verum 9 | Description = Provides the ability to enrich a CAGS context graph stored in TitanDB. 10 | 11 | [Configuration] 12 | Type = interface 13 | 14 | [titanDB] 15 | host = localhost 16 | port = 8182 17 | graph = vzgraph 18 | 19 | [Log] 20 | level = debug 21 | # file = ./logfile.log -------------------------------------------------------------------------------- /plugins/bayes_net.yapsy-plugin: -------------------------------------------------------------------------------- 1 | [Core] 2 | Name = BayesNet 3 | Module = bayes_net 4 | 5 | [Documentation] 6 | Author = Gabriel Bassett 7 | Version = 0.1 8 | Website = https://github.com/vz-risk/Verum 9 | Description = Takes a subgraph and topic, treats the subgraph as a bayesian inference network, assumes the topic true, and scores the other nodes. 10 | 11 | [Configuration] 12 | Type = score 13 | cost = 8 14 | speed = 8 15 | 16 | [Log] 17 | level = debug 18 | # file = ./logfile.log -------------------------------------------------------------------------------- /examples/plugin_template.yapsy-plugin: -------------------------------------------------------------------------------- 1 | [Core] 2 | Name = 3 | Module = 4 | 5 | [Documentation] 6 | Author = Gabriel Bassett 7 | Version = 0.1 8 | Website = https://github.com/vz-risk/Verum 9 | Description = 10 | 11 | [Configuration] 12 | Type = 13 | = 14 | = 15 | 16 | [Log] 17 | level = debug 18 | # file = ./logfile.log -------------------------------------------------------------------------------- /plugins/networkx.yapsy-plugin: -------------------------------------------------------------------------------- 1 | [Core] 2 | Name = Networkx Interface 3 | Module = networkx 4 | 5 | [Documentation] 6 | Author = Gabriel Bassett 7 | Version = 0.1 8 | Website = https://github.com/vz-risk/Verum 9 | Description = Reads a graph file into memory, stores the graph in a networkx graph in memory, and writes the graph back with .write() 10 | 11 | [Configuration] 12 | Type = interface 13 | context_graph_file = /tmp/verum.graphml 14 | 15 | [Log] 16 | level = debug 17 | # file = ./logfile.log -------------------------------------------------------------------------------- /plugins/page_rank_2.yapsy-plugin: -------------------------------------------------------------------------------- 1 | [Core] 2 | Name = PageRank2 3 | Module = page_rank_2 4 | 5 | [Documentation] 6 | Author = Gabriel Bassett 7 | Version = 0.1 8 | Website = https://github.com/vz-risk/Verum 9 | Description = Takes a subgraph and topic and uses pagerank with specific initialization values to score the nodes in the subgraph with respect to the topic. 10 | 11 | [Configuration] 12 | Type = score 13 | cost = 2 14 | speed = 2 15 | 16 | [Log] 17 | level = debug 18 | # file = ./logfile.log -------------------------------------------------------------------------------- /plugins/neo4j.yapsy-plugin: -------------------------------------------------------------------------------- 1 | [Core] 2 | Name = Neo4j 3 | Module = neo4j 4 | 5 | [Documentation] 6 | Author = Gabriel Bassett 7 | Version = 0.1 8 | Website = https://github.com/vz-risk/Verum 9 | Description = Provides the ability to enrich a CAGS context graph stored in Neo4j graph database. 10 | 11 | [Configuration] 12 | Type = interface 13 | 14 | [neo4j] 15 | host = localhost 16 | port = 7474 17 | username = neo4j 18 | password = neo4j1 19 | 20 | [Log] 21 | level = debug 22 | # file = ./logfile.log -------------------------------------------------------------------------------- /minions/osint_bambenekconsulting_com.yapsy-plugin: -------------------------------------------------------------------------------- 1 | [Core] 2 | Name = OSINT Bambenek Consulting 3 | Module = osint_bambenekconsulting_com 4 | 5 | [Documentation] 6 | Author = Gabriel Bassett 7 | Version = 0.1 8 | Website = https://github.com/vz-risk/Verum 9 | Description = Reads the feed at http://osint.bambenekconsulting.com/feeds/c2-masterlist.txt and import it into the intelligence graph. 10 | 11 | [Configuration] 12 | Type = minion 13 | Feed = http://osint.bambenekconsulting.com/feeds/c2-masterlist.txt 14 | cost = 4 15 | 16 | [Log] 17 | level = debug 18 | # file = ./logfile.log -------------------------------------------------------------------------------- /minions/osint_bambenekconsulting_com_v2.yapsy-plugin: -------------------------------------------------------------------------------- 1 | [Core] 2 | Name = OSINT Bambenek Consulting V2 3 | Module = osint_bambenekconsulting_com_v2 4 | 5 | [Documentation] 6 | Author = Gabriel Bassett 7 | Version = 0.2 8 | Website = https://github.com/vz-risk/Verum 9 | Description = Reads the feed at http://osint.bambenekconsulting.com/feeds/c2-masterlist.txt and import it into the intelligence graph. 10 | 11 | [Configuration] 12 | Type = minion 13 | Feed = http://osint.bambenekconsulting.com/feeds/c2-masterlist.txt 14 | cost = 4 15 | 16 | [Log] 17 | level = debug 18 | # file = ./logfile.log -------------------------------------------------------------------------------- /minions/edge_consolidator.yapsy-plugin: -------------------------------------------------------------------------------- 1 | [Core] 2 | Name = Neo4j Edge Consolidator 3 | Module = edge_consolidator 4 | 5 | [Documentation] 6 | Author = Gabriel Bassett 7 | Version = 0.1 8 | Website = https://github.com/vz-risk/Verum 9 | Description = Randomly walks the graph. At each node, it consolidates edges by URI. 10 | 11 | [Configuration] 12 | Type = minion 13 | Jump = 0.9 14 | Cost = 2 15 | # will sleep sleep_time seconds inbetween nodes to slow things down 16 | sleep_time = 3 17 | 18 | [neo4j] 19 | host = localhost 20 | port = 7474 21 | username = neo4j 22 | password = neo4j1 23 | 24 | [Log] 25 | level = debug 26 | # file = ./logfile.log -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # PyInstaller 26 | # Usually these files are written by a python script from a template 27 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 28 | *.manifest 29 | *.spec 30 | 31 | # Installer logs 32 | pip-log.txt 33 | pip-delete-this-directory.txt 34 | 35 | # Unit test / coverage reports 36 | htmlcov/ 37 | .tox/ 38 | .coverage 39 | .cache 40 | nosetests.xml 41 | coverage.xml 42 | 43 | # Translations 44 | *.mo 45 | *.pot 46 | 47 | # Django stuff: 48 | *.log 49 | 50 | # Sphinx documentation 51 | docs/_build/ 52 | 53 | # PyBuilder 54 | target/ 55 | 56 | # pycharm 57 | .idea 58 | .idea/ 59 | -------------------------------------------------------------------------------- /verum/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Gabriel Bassett' 2 | 3 | ''' 4 | Copyright 2014 Gabriel Bassett 5 | 6 | LICENSE: 7 | Licensed to the Apache Software Foundation (ASF) under one 8 | or more contributor license agreements. See the NOTICE file 9 | distributed with this work for additional information 10 | regarding copyright ownership. The ASF licenses this file 11 | to you under the Apache License, Version 2.0 (the 12 | "License"); you may not use this file except in compliance 13 | with the License. You may obtain a copy of the License at 14 | 15 | http://www.apache.org/licenses/LICENSE-2.0 16 | 17 | Unless required by applicable law or agreed to in writing, 18 | software distributed under the License is distributed on an 19 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 20 | KIND, either express or implied. See the License for the 21 | specific language governing permissions and limitations 22 | under the License. 23 | ''' 24 | 25 | 26 | __all__ = [ 'app', 27 | 'helper' 28 | ] 29 | # Import the packages 30 | from app import app 31 | from helper import * 32 | -------------------------------------------------------------------------------- /ui.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | AUTHOR: Gabriel Bassett 4 | DATE: 12-17-2013 5 | DEPENDENCIES: a list of modules requiring installation 6 | Copyright 2014 Gabriel Bassett 7 | 8 | LICENSE: 9 | Licensed to the Apache Software Foundation (ASF) under one 10 | or more contributor license agreements. See the NOTICE file 11 | distributed with this work for additional information 12 | regarding copyright ownership. The ASF licenses this file 13 | to you under the Apache License, Version 2.0 (the 14 | "License"); you may not use this file except in compliance 15 | with the License. You may obtain a copy of the License at 16 | 17 | http://www.apache.org/licenses/LICENSE-2.0 18 | 19 | Unless required by applicable law or agreed to in writing, 20 | software distributed under the License is distributed on an 21 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 22 | KIND, either express or implied. See the License for the 23 | specific language governing permissions and limitations 24 | under the License. 25 | ''' 26 | 27 | DESCRIPTION: 28 | A description of the software 29 | 30 | """ 31 | # PRE-USER SETUP 32 | pass 33 | 34 | ########### NOT USER EDITABLE ABOVE THIS POINT ################# 35 | 36 | 37 | # USER VARIABLES 38 | NEODB = "http://192.168.121.134:7474/db/data" 39 | 40 | 41 | ########### NOT USER EDITABLE BELOW THIS POINT ################# 42 | 43 | 44 | ## IMPORTS 45 | from py2neo import neo4j, cypher 46 | import networkx as nx 47 | import argparse 48 | import logging 49 | 50 | ## SETUP 51 | __author__ = "Gabriel Bassett" 52 | # Parse Arguments (should correspond to user variables) 53 | parser = argparse.ArgumentParser(description='This script processes a graph.') 54 | parser.add_argument('-d', '--debug', 55 | help='Print lots of debugging statements', 56 | action="store_const", dest="loglevel", const=logging.DEBUG, 57 | default=logging.WARNING 58 | ) 59 | parser.add_argument('-v', '--verbose', 60 | help='Be verbose', 61 | action="store_const", dest="loglevel", const=logging.INFO 62 | ) 63 | parser.add_argument('--log', help='Location of log file', default=None) 64 | # 65 | parser.add_argument('db', help='URL of the neo4j graph database', default=NEODB) 66 | #args = parser.parse_args() 67 | ## Set up Logging 68 | #if args.log is not None: 69 | # logging.basicConfig(filename=args.log, level=args.loglevel) 70 | #else: 71 | # logging.basicConfig(level=args.loglevel) 72 | # 73 | # Connect to database 74 | G = neo4j.GraphDatabaseService(NEODB) 75 | g = nx.DiGraph() 76 | NEODB = args.db 77 | 78 | 79 | ## EXECUTION 80 | # TODO: load plugins 81 | # TODO: catalog enrichments and provide a way to run against all enrichments which take similar input 82 | 83 | 84 | 85 | def main(): 86 | logging.info('Beginning main loop.') 87 | 88 | logging.info('Ending main loop.') 89 | 90 | if __name__ == "__main__": 91 | main() 92 | -------------------------------------------------------------------------------- /plugins/cymru_api.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | """ 4 | cymru_api.py 5 | """ 6 | # From: https://gist.github.com/zakird/11196064 7 | 8 | import sys 9 | import os 10 | import socket 11 | import unittest 12 | 13 | class CymruIPtoASNResult(object): 14 | def __init__(self, **kwargs): 15 | for k, v in kwargs.iteritems(): 16 | setattr(self, k, v) 17 | 18 | def __str__(self): 19 | return "" % self.ip_address 20 | 21 | __repr__ = __str__ 22 | 23 | class CymruIPtoASNService(object): 24 | URL = "whois.cymru.com" 25 | 26 | """Whois Netcat Action 27 | begin enable bulk input mode (netcat only) 28 | end exit the whois/netcat client (netcat only) 29 | -p prefix include matching prefix 30 | -q noprefix disable matching prefix (default) 31 | -c countrycode include matching country code 32 | -d nocountrycode disable country codes (default) 33 | -n asname include asnames (default) 34 | -o noasname disable asnames 35 | -r registry display matching registry 36 | -s noregistry disable registry display (default) 37 | -a allocdate enable allocation date 38 | -b noallocdate disable allocation date (default) 39 | -t truncate truncate asnames (default) 40 | -u notruncate do not truncate asnames 41 | -v verbose enable all flags (-c -r -p -a -u -a) 42 | -e header enable column headings (default) 43 | -f noheader disable column headings 44 | -w asnumber include asnumber column (default) 45 | -x noasnumber disable asnumber column (will not work for IP mappings) 46 | -h help this help message""" 47 | 48 | def __init__(self): 49 | self.__socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 50 | self.__socket.connect((self.URL, 43)) 51 | 52 | def _gen_query(self, queries): 53 | lines = [] 54 | lines.append("begin") 55 | lines.append("verbose") 56 | lines.extend(queries) 57 | lines.append("end\r\n") 58 | return "\n".join(lines) 59 | 60 | def _send_query(self, query): 61 | self.__socket.sendall(query) 62 | self.__socket.shutdown(socket.SHUT_WR) 63 | response = '' 64 | while True: 65 | r = self.__socket.recv(16) 66 | if r and r != '': 67 | response = ''.join((response, r)) 68 | else: 69 | break 70 | return response 71 | 72 | LABELS = ( 73 | 'as_number', 74 | 'ip_address', 75 | 'bgp_prefix', 76 | 'country', 77 | 'registry', 78 | 'allocated_at', 79 | 'as_name' 80 | ) 81 | 82 | def _parse_response(self, response): 83 | for line in response.split("\n"): 84 | if line.startswith("Bulk mode;") or line == '': 85 | continue 86 | else: 87 | clean = map(lambda v: v.rstrip().lstrip(), line.split('|')) 88 | yield CymruIPtoASNResult(**dict(zip(self.LABELS, clean))) 89 | 90 | def query(self, queries): 91 | query = self._gen_query(queries) 92 | response = self._send_query(query) 93 | results = self._parse_response(response) 94 | for r in results: 95 | yield r 96 | 97 | def query_one(self, query): 98 | return list(self.query([query,]))[0] 99 | 100 | class CymruIptoASNServiceTests(unittest.TestCase): 101 | def setUp(self): 102 | self.service = CymruIPtoASNService() 103 | 104 | def testOne(self): 105 | # expect the following: 106 | # ['3676', '128.255.1.1', '128.255.0.0/16', 'US', 'arin', '1987-06-05', 107 | # 'UIOWA-AS - University of Iowa'] 108 | r = self.service.query_one("128.255.1.1") 109 | self.assertEquals(r.as_number, '3676') 110 | self.assertEquals(r.ip_address, "128.255.1.1") 111 | self.assertEquals(r.country, "US") 112 | self.assertEquals(r.registry, "arin") 113 | self.assertEquals(r.as_name, "UIOWA-AS - University of Iowa") 114 | 115 | def testMultiple(self): 116 | rs = list(self.service.query(["128.255.1.1", "141.212.1.1"])) 117 | self.assertEquals(rs[0].as_number, '3676') 118 | self.assertEquals(rs[1].as_number, '36375') 119 | 120 | def testFailure(self): 121 | pass 122 | 123 | if __name__ == '__main__': 124 | unittest.main() -------------------------------------------------------------------------------- /plugins/classify.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __author__ = "Gabriel Bassett" 4 | """ 5 | AUTHOR: {0} 6 | DATE: 7 | DEPENDENCIES: 8 | Copyright {0} 9 | 10 | LICENSE: 11 | Licensed to the Apache Software Foundation (ASF) under one 12 | or more contributor license agreements. See the NOTICE file 13 | distributed with this work for additional information 14 | regarding copyright ownership. The ASF licenses this file 15 | to you under the Apache License, Version 2.0 (the 16 | "License"); you may not use this file except in compliance 17 | with the License. You may obtain a copy of the License at 18 | 19 | http://www.apache.org/licenses/LICENSE-2.0 20 | 21 | Unless required by applicable law or agreed to in writing, 22 | software distributed under the License is distributed on an 23 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 24 | KIND, either express or implied. See the License for the 25 | specific language governing permissions and limitations 26 | under the License. 27 | 28 | DESCRIPTION: 29 | 30 | 31 | """.format(__author__) 32 | # PRE-USER SETUP 33 | pass 34 | 35 | ########### NOT USER EDITABLE ABOVE THIS POINT ################# 36 | 37 | 38 | # USER VARIABLES 39 | PLUGIN_CONFIG_FILE = "classify.yapsy-plugin" 40 | NAME = "classify" 41 | 42 | 43 | ########### NOT USER EDITABLE BELOW THIS POINT ################# 44 | 45 | 46 | ## IMPORTS 47 | from yapsy.IPlugin import IPlugin 48 | import logging 49 | import networkx as nx 50 | from datetime import datetime # timedelta imported above 51 | import dateutil # to parse variable time strings 52 | import uuid 53 | import ConfigParser 54 | import inspect 55 | try: 56 | import tldextract 57 | module_import_success = True 58 | except: 59 | module_import_success = False 60 | logging.error("Module import failed. Please install the following module: tldextract.") 61 | raise 62 | 63 | 64 | ## SETUP 65 | loc = inspect.getfile(inspect.currentframe()) 66 | ind = loc.rfind("/") 67 | loc = loc[:ind+1] 68 | config = ConfigParser.SafeConfigParser() 69 | config.readfp(open(loc + PLUGIN_CONFIG_FILE)) 70 | 71 | if config.has_section('Core'): 72 | if 'name' in config.options('Core'): 73 | NAME = config.get('Core', 'name') 74 | if config.has_section('Log'): 75 | if 'level' in config.options('Log'): 76 | LOGLEVEL = config.get('Log', 'level') 77 | if 'file' in config.options('Log'): 78 | LOGFILE = config.get('Log', 'file') 79 | 80 | 81 | ## EXECUTION 82 | class PluginOne(IPlugin): 83 | inputs = None 84 | 85 | def __init__(self): 86 | pass 87 | 88 | def configure(self): 89 | """ 90 | 91 | :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)] 92 | """ 93 | config_options = config.options("Configuration") 94 | 95 | if 'cost' in config_options: 96 | cost = config.get('Configuration', 'cost') 97 | else: 98 | cost = 9999 99 | if 'speed' in config_options: 100 | speed = config.get('Configuration', 'speed') 101 | else: 102 | speed = 9999 103 | 104 | if config.has_section('Documentation') and 'description' in config.options('Documentation'): 105 | description = config.get('Documentation', 'description') 106 | else: 107 | logging.error("'Description not in config file.") 108 | return [None, False, NAME, None, cost, speed] 109 | 110 | if 'type' in config_options: 111 | plugin_type = config.get('Configuration', 'type') 112 | else: 113 | logging.error("'Type' not specified in config file.") 114 | return [None, False, NAME, description, None, cost, speed] 115 | 116 | if 'inputs' in config_options: 117 | self.inputs = config.get('Configuration', 'Inputs') 118 | self.inputs = [l.strip().lower() for l in self.inputs.split(",")] 119 | else: 120 | logging.error("No input types specified in config file.") 121 | return [plugin_type, False, NAME, description, None, cost, speed] 122 | 123 | return [plugin_type, True, NAME, description, self.inputs, cost, speed] 124 | 125 | 126 | def run(self, enrichment_dict, start_time="", confidence=1): 127 | """ dict, str -> networkx MultiDiGraph 128 | 129 | :param enrichment_dict: a dictionary of the form {'key': , 'value':, 'classification':} 130 | :param start_time: string in ISO 8601 combined date and time format (e.g. 2014-11-01T10:34Z) or datetime object. 131 | :param include_subdomain: Boolean value. Default False. If true, subdomain will be returned in enrichment graph 132 | :return: a networkx graph representing the sections of the domain 133 | """ 134 | key = enrichment_dict['key'] 135 | value = enrichment_dict['value'] 136 | classification = enrichment_dict['classification'] 137 | 138 | g = nx.MultiDiGraph() 139 | 140 | if type(start_time) is str: 141 | try: 142 | time = dateutil.parser.parse(start_time).strftime("%Y-%m-%dT%H:%M:%SZ") 143 | except: 144 | time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") 145 | elif type(start_time) is datetime: 146 | time = start_time.strftime("%Y-%m-%dT%H:%M:%SZ") 147 | else: 148 | time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") 149 | 150 | # Get or create target node 151 | target_uri = "class=attribute&key={0}&value={1}".format(key, value) 152 | g.add_node(target_uri, { 153 | 'class': 'attribute', 154 | 'key': key, 155 | "value": value, 156 | "start_time": time, 157 | "uri": target_uri 158 | }) 159 | 160 | # Get or create classification node 161 | classification_uri = "class=attribute&key={0}&value={1}".format("classification", classification) 162 | g.add_node(classification_uri, { 163 | 'class': 'attribute', 164 | 'key': "classification", 165 | "value": classification, 166 | "start_time": time, 167 | "uri": classification_uri 168 | }) 169 | 170 | 171 | # Link target to classification 172 | edge_attr = { 173 | "relationship": "describedBy", 174 | "start_time": time, 175 | "origin": "classification", 176 | "confidence": confidence 177 | } 178 | source_hash = uuid.uuid3(uuid.NAMESPACE_URL, target_uri) 179 | dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, classification_uri) 180 | edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash)) 181 | rel_chain = "relationship" 182 | while rel_chain in edge_attr: 183 | edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain]) 184 | rel_chain = edge_attr[rel_chain] 185 | if "origin" in edge_attr: 186 | edge_uri += "&{0}={1}".format("origin", edge_attr["origin"]) 187 | edge_attr["uri"] = edge_uri 188 | g.add_edge(target_uri, classification_uri, edge_uri, edge_attr) 189 | 190 | return g -------------------------------------------------------------------------------- /plugins/generic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __author__ = "Gabriel Bassett" 4 | """ 5 | AUTHOR: {0} 6 | DATE: 7 | DEPENDENCIES: 8 | Copyright {0} 9 | 10 | LICENSE: 11 | Licensed to the Apache Software Foundation (ASF) under one 12 | or more contributor license agreements. See the NOTICE file 13 | distributed with this work for additional information 14 | regarding copyright ownership. The ASF licenses this file 15 | to you under the Apache License, Version 2.0 (the 16 | "License"); you may not use this file except in compliance 17 | with the License. You may obtain a copy of the License at 18 | 19 | http://www.apache.org/licenses/LICENSE-2.0 20 | 21 | Unless required by applicable law or agreed to in writing, 22 | software distributed under the License is distributed on an 23 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 24 | KIND, either express or implied. See the License for the 25 | specific language governing permissions and limitations 26 | under the License. 27 | 28 | DESCRIPTION: 29 | 30 | 31 | """.format(__author__) 32 | # PRE-USER SETUP 33 | pass 34 | 35 | ########### NOT USER EDITABLE ABOVE THIS POINT ################# 36 | 37 | 38 | # USER VARIABLES 39 | PLUGIN_CONFIG_FILE = "generic.yapsy-plugin" 40 | NAME = "generic" 41 | 42 | 43 | ########### NOT USER EDITABLE BELOW THIS POINT ################# 44 | 45 | 46 | ## IMPORTS 47 | from yapsy.IPlugin import IPlugin 48 | import logging 49 | import networkx as nx 50 | from datetime import datetime # timedelta imported above 51 | import dateutil # to parse variable time strings 52 | import uuid 53 | import ConfigParser 54 | import inspect 55 | try: 56 | import tldextract 57 | module_import_success = True 58 | except: 59 | module_import_success = False 60 | logging.error("Module import failed. Please install the following module: tldextract.") 61 | raise 62 | 63 | 64 | ## SETUP 65 | loc = inspect.getfile(inspect.currentframe()) 66 | ind = loc.rfind("/") 67 | loc = loc[:ind+1] 68 | config = ConfigParser.SafeConfigParser() 69 | config.readfp(open(loc + PLUGIN_CONFIG_FILE)) 70 | 71 | if config.has_section('Core'): 72 | if 'name' in config.options('Core'): 73 | NAME = config.get('Core', 'name') 74 | if config.has_section('Log'): 75 | if 'level' in config.options('Log'): 76 | LOGLEVEL = config.get('Log', 'level') 77 | if 'file' in config.options('Log'): 78 | LOGFILE = config.get('Log', 'file') 79 | 80 | 81 | ## EXECUTION 82 | class PluginOne(IPlugin): 83 | inputs = None 84 | 85 | def __init__(self): 86 | pass 87 | 88 | 89 | def configure(self): 90 | """ 91 | 92 | :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)] 93 | """ 94 | config_options = config.options("Configuration") 95 | 96 | if 'cost' in config_options: 97 | cost = config.get('Configuration', 'cost') 98 | else: 99 | cost = 9999 100 | if 'speed' in config_options: 101 | speed = config.get('Configuration', 'speed') 102 | else: 103 | speed = 9999 104 | 105 | if config.has_section('Documentation') and 'description' in config.options('Documentation'): 106 | description = config.get('Documentation', 'description') 107 | else: 108 | logging.error("'Description not in config file.") 109 | return [None, False, NAME, None, cost, speed] 110 | 111 | if 'type' in config_options: 112 | plugin_type = config.get('Configuration', 'type') 113 | else: 114 | logging.error("'Type' not specified in config file.") 115 | return [None, False, NAME, description, None, cost, speed] 116 | 117 | if 'inputs' in config_options: 118 | self.inputs = config.get('Configuration', 'Inputs') 119 | self.inputs = [l.strip().lower() for l in self.inputs.split(",")] 120 | else: 121 | logging.error("No input types specified in config file.") 122 | return [plugin_type, False, NAME, description, None, cost, speed] 123 | 124 | return [plugin_type, True, NAME, description, self.inputs, cost, speed] 125 | 126 | 127 | def run(self, enrichment_dict, start_time="", confidence=1): 128 | """ dict, str -> networkx multiDiGraph 129 | 130 | :param enrichment_dict: a dictionary of the form {'key': , 'value':, 'describing_key':, 'describing_value':} 131 | :param start_time: string in ISO 8601 combined date and time format (e.g. 2014-11-01T10:34Z) or datetime object. 132 | :param include_subdomain: Boolean value. Default False. If true, subdomain will be returned in enrichment graph 133 | :return: a networkx graph representing the sections of the domain 134 | """ 135 | described_key = enrichment_dict['key'] 136 | described_value = enrichment_dict['value'] 137 | describing_key = enrichment_dict['describing_key'] 138 | describing_value = enrichment_dict['describing_value'] 139 | 140 | g = nx.MultiDiGraph() 141 | 142 | if type(start_time) is str: 143 | try: 144 | time = dateutil.parser.parse(start_time).strftime("%Y-%m-%dT%H:%M:%SZ") 145 | except: 146 | time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") 147 | elif type(start_time) is datetime: 148 | time = start_time.strftime("%Y-%m-%dT%H:%M:%SZ") 149 | else: 150 | time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") 151 | 152 | # Get or create target node 153 | described_uri = "class=attribute&key={0}&value={1}".format(described_key, described_value) 154 | g.add_node(described_uri, { 155 | 'class': 'attribute', 156 | 'key': described_key, 157 | "value": described_value, 158 | "start_time": time, 159 | "uri": described_uri 160 | }) 161 | 162 | # Get or create classification node 163 | describing_uri = "class=attribute&key={0}&value={1}".format(describing_key, describing_value) 164 | g.add_node(describing_uri , { 165 | 'class': 'attribute', 166 | 'key': describing_key, 167 | "value": describing_value, 168 | "start_time": time, 169 | "uri": describing_uri 170 | }) 171 | 172 | 173 | # Link target to classification 174 | edge_attr = { 175 | "relationship": "describedBy", 176 | "start_time": time, 177 | "origin": "generic", 178 | "confidence": confidence 179 | } 180 | source_hash = uuid.uuid3(uuid.NAMESPACE_URL, described_uri) 181 | dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, describing_uri ) 182 | edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash)) 183 | rel_chain = "relationship" 184 | while rel_chain in edge_attr: 185 | edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain]) 186 | rel_chain = edge_attr[rel_chain] 187 | if "origin" in edge_attr: 188 | edge_uri += "&{0}={1}".format("origin", edge_attr["origin"]) 189 | edge_attr["uri"] = edge_uri 190 | g.add_edge(described_uri, describing_uri , edge_uri, edge_attr) 191 | 192 | return g -------------------------------------------------------------------------------- /plugins/modularity.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __author__ = "Gabriel Bassett" 4 | """ 5 | AUTHOR: {0} 6 | DATE: 7 | DEPENDENCIES: 8 | Copyright {0} 9 | 10 | LICENSE: 11 | Licensed to the Apache Software Foundation (ASF) under one 12 | or more contributor license agreements. See the NOTICE file 13 | distributed with this work for additional information 14 | regarding copyright ownership. The ASF licenses this file 15 | to you under the Apache License, Version 2.0 (the 16 | "License"); you may not use this file except in compliance 17 | with the License. You may obtain a copy of the License at 18 | 19 | http://www.apache.org/licenses/LICENSE-2.0 20 | 21 | Unless required by applicable law or agreed to in writing, 22 | software distributed under the License is distributed on an 23 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 24 | KIND, either express or implied. See the License for the 25 | specific language governing permissions and limitations 26 | under the License. 27 | 28 | DESCRIPTION: 29 | 30 | 31 | """.format(__author__) 32 | # PRE-USER SETUP 33 | pass 34 | 35 | ########### NOT USER EDITABLE ABOVE THIS POINT ################# 36 | 37 | 38 | # USER VARIABLES 39 | PLUGIN_CONFIG_FILE = "modularity.yapsy-plugin" 40 | NAME = "Modularity" 41 | 42 | 43 | ########### NOT USER EDITABLE BELOW THIS POINT ################# 44 | 45 | 46 | ## IMPORTS 47 | from yapsy.IPlugin import IPlugin 48 | import logging 49 | import networkx as nx 50 | from datetime import datetime # timedelta imported above 51 | import uuid 52 | import ConfigParser 53 | import inspect 54 | try: 55 | import community 56 | module_import_success = True 57 | except: 58 | module_import_success = False 59 | import numpy as np 60 | 61 | ## SETUP 62 | loc = inspect.getfile(inspect.currentframe()) 63 | ind = loc.rfind("/") 64 | loc = loc[:ind+1] 65 | config = ConfigParser.SafeConfigParser() 66 | config.readfp(open(loc + PLUGIN_CONFIG_FILE)) 67 | 68 | if config.has_section('Core'): 69 | if 'name' in config.options('Core'): 70 | NAME = config.get('Core', 'name') 71 | if config.has_section('Log'): 72 | if 'level' in config.options('Log'): 73 | LOGLEVEL = config.get('Log', 'level') 74 | if 'file' in config.options('Log'): 75 | LOGFILE = config.get('Log', 'file') 76 | 77 | 78 | ## EXECUTION 79 | class PluginOne(IPlugin): 80 | # TODO: The init should contain anything to load modules or data files that should be variables of the plugin object 81 | def __init__(self): 82 | pass 83 | 84 | # TODO: Configuration needs to set the values needed to identify the plugin in the plugin database as well as ensure everyhing loaded correctly 85 | # TODO: Current layout is for an enrichment plugin 86 | # TODO: enrichment [type, successful_load, name, description, inputs to enrichment such as 'ip', cost, speed] 87 | # TODO: interface [type, successful_load, name] 88 | # TODO: query [TBD] 89 | # TODO: minion [TBD] 90 | def configure(self): 91 | """ 92 | 93 | :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)] 94 | """ 95 | config_options = config.options("Configuration") 96 | 97 | if 'cost' in config_options: 98 | cost = config.get('Configuration', 'cost') 99 | else: 100 | cost = 9999 101 | if 'speed' in config_options: 102 | speed = config.get('Configuration', 'speed') 103 | else: 104 | speed = 9999 105 | 106 | if config.has_section('Documentation') and 'description' in config.options('Documentation'): 107 | description = config.get('Documentation', 'description') 108 | else: 109 | logging.error("'Description not in config file.") 110 | return [None, False, NAME, None, cost, speed] 111 | 112 | if 'type' in config_options: 113 | plugin_type = config.get('Configuration', 'type') 114 | else: 115 | logging.error("'Type' not specified in config file.") 116 | return [None, False, NAME, description, cost, speed] 117 | 118 | if not module_import_success: 119 | logging.error("Module import failure caused configuration failure.") 120 | return [plugin_type, False, NAME, description, cost, speed] 121 | else: 122 | return [plugin_type, True, NAME, description, cost, speed] 123 | 124 | 125 | 126 | def score(self, sg, *args, **xargs): # get_modularity_cluster 127 | """ 128 | 129 | :param sg: subgraph 130 | :return: A dictionary of the modularity scores of the nodes in the subgraph 131 | """ 132 | # args/xargs collected so that passing a topic doesn't mess things up 133 | 134 | # Convert to diGraph 135 | if sg.is_multigraph(): 136 | sg = self.multigraph_to_digraph(sg) 137 | # Convert to undirected 138 | sg = sg.to_undirected() 139 | 140 | return community.best_partition(sg) 141 | 142 | 143 | def multigraph_to_digraph(self, g): 144 | """ 145 | 146 | :param g: takes a networkx mulitgraph 147 | :return: returns a networkx digraph with edge weights representing the number of edges 148 | 149 | NOTE: This butchers duplicate edge properties. If converting to score, use original edges in output. 150 | """ 151 | G = nx.DiGraph() 152 | edge_attributes = {} 153 | 154 | # if g isn't really a multigraph, just return it 155 | if not g.is_multigraph(): 156 | return g 157 | 158 | # collapse down to a diagraph 159 | G.add_nodes_from(g.nodes(data=True)) 160 | G.add_edges_from(g.edges(data=True)) 161 | 162 | # for each edge, weight the confidence by the number of edges 163 | ''' 164 | # captures a multiple of the confidence on the edge in the output graph 165 | for edge in G.edges(): 166 | count = g.edges().count(edge) 167 | if "count" > 1: 168 | if "confidence" in G.edge[edge[0]][edge[1]]: 169 | G.edge[edge[0]][edge[1]]['confidence'] *= count 170 | else: 171 | G.edge[edge[0]][edge[1]]["confidence"] = count 172 | ''' 173 | # Captures every confidence 174 | for edge in G.edges(): 175 | confidence = 0 176 | for src_edge in g.edge[edge[0]][edge[1]].values(): 177 | confidence += src_edge.get('confidence', 1) 178 | G.edge[edge[0]][edge[1]]['confidence'] = confidence 179 | # # collapse down to a diagraph 180 | # G.add_nodes_from(g.nodes(data=True)) 181 | # G.add_edges_from(g.edges(data=True)) 182 | 183 | return G 184 | 185 | 186 | ### DISTANCE WEIGHTS ### 187 | def linear_weight(self, distance, ddp=.2): 188 | """ 189 | 190 | :param distance: distance from topic 191 | :param ddp: percentage to degrade 192 | :return: Linear weighting factor as float 193 | """ 194 | return 1 - (distance * ddp) 195 | 196 | 197 | def log_weight(self, distance, a=1, b=1, n=3, pwr=1): 198 | """ 199 | 200 | :param distance: distance: distance from topic 201 | :param a: constant to shape graph. Adjusts hight at 0 = a / (1 + b) 202 | :param b: constant to shape graph. 203 | :param n: constant to shape graph. 204 | :param pwr: constant to shape graph. 205 | :return: log weighting factor as float 206 | """ 207 | return a / (1 + b*np.exp((distance-n) * pwr)) 208 | 209 | 210 | def exponential_weight(self, distance, b=2): 211 | return np.exp(-distance/b) 212 | 213 | 214 | def normal_weight(self, distance, pwr=2, a=1.1, b=10, c=1): 215 | """ 216 | 217 | :param distance: distance from topic 218 | :param pwr: constant to shape graph. Higher = steeper decline 219 | :param b: constant to shape graph. lower = greater spread 220 | :return: normal weighting factor as float 221 | pwr = 2.5, a = 1, c = 0, b = 30 222 | """ 223 | return a * np.exp(-(distance + c)**pwr/b) -------------------------------------------------------------------------------- /plugins/page_rank.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __author__ = "Gabriel Bassett" 4 | """ 5 | AUTHOR: {0} 6 | DATE: 7 | DEPENDENCIES: 8 | Copyright {0} 9 | 10 | LICENSE: 11 | Licensed to the Apache Software Foundation (ASF) under one 12 | or more contributor license agreements. See the NOTICE file 13 | distributed with this work for additional information 14 | regarding copyright ownership. The ASF licenses this file 15 | to you under the Apache License, Version 2.0 (the 16 | "License"); you may not use this file except in compliance 17 | with the License. You may obtain a copy of the License at 18 | 19 | http://www.apache.org/licenses/LICENSE-2.0 20 | 21 | Unless required by applicable law or agreed to in writing, 22 | software distributed under the License is distributed on an 23 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 24 | KIND, either express or implied. See the License for the 25 | specific language governing permissions and limitations 26 | under the License. 27 | 28 | DESCRIPTION: 29 | 30 | 31 | """.format(__author__) 32 | # PRE-USER SETUP 33 | pass 34 | 35 | ########### NOT USER EDITABLE ABOVE THIS POINT ################# 36 | 37 | 38 | # USER VARIABLES 39 | PLUGIN_CONFIG_FILE = "page_rank.yapsy-plugin" 40 | NAME = "PageRank" 41 | 42 | 43 | ########### NOT USER EDITABLE BELOW THIS POINT ################# 44 | 45 | 46 | ## IMPORTS 47 | from yapsy.IPlugin import IPlugin 48 | import logging 49 | import networkx as nx 50 | from datetime import datetime # timedelta imported above 51 | import uuid 52 | import ConfigParser 53 | import inspect 54 | import numpy as np 55 | 56 | 57 | ## SETUP 58 | loc = inspect.getfile(inspect.currentframe()) 59 | ind = loc.rfind("/") 60 | loc = loc[:ind+1] 61 | config = ConfigParser.SafeConfigParser() 62 | config.readfp(open(loc + PLUGIN_CONFIG_FILE)) 63 | 64 | if config.has_section('Core'): 65 | if 'name' in config.options('Core'): 66 | NAME = config.get('Core', 'name') 67 | if config.has_section('Log'): 68 | if 'level' in config.options('Log'): 69 | LOGLEVEL = config.get('Log', 'level') 70 | if 'file' in config.options('Log'): 71 | LOGFILE = config.get('Log', 'file') 72 | 73 | 74 | ## EXECUTION 75 | class PluginOne(IPlugin): 76 | # TODO: The init should contain anything to load modules or data files that should be variables of the plugin object 77 | def __init__(self): 78 | pass 79 | 80 | # TODO: Configuration needs to set the values needed to identify the plugin in the plugin database as well as ensure everyhing loaded correctly 81 | # TODO: Current layout is for an enrichment plugin 82 | # TODO: enrichment [type, successful_load, name, description, inputs to enrichment such as 'ip', cost, speed] 83 | # TODO: interface [type, successful_load, name] 84 | # TODO: query [TBD] 85 | # TODO: minion [TBD] 86 | def configure(self): 87 | """ 88 | 89 | :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)] 90 | """ 91 | config_options = config.options("Configuration") 92 | 93 | if 'cost' in config_options: 94 | cost = config.get('Configuration', 'cost') 95 | else: 96 | cost = 9999 97 | if 'speed' in config_options: 98 | speed = config.get('Configuration', 'speed') 99 | else: 100 | speed = 9999 101 | 102 | if config.has_section('Documentation') and 'description' in config.options('Documentation'): 103 | description = config.get('Documentation', 'description') 104 | else: 105 | logging.error("'Description not in config file.") 106 | return [None, False, NAME, None, cost, speed] 107 | 108 | if 'type' in config_options: 109 | plugin_type = config.get('Configuration', 'type') 110 | else: 111 | logging.error("'Type' not specified in config file.") 112 | return [None, False, NAME, description, cost, speed] 113 | 114 | return [plugin_type, True, NAME, description, cost, speed] 115 | 116 | 117 | def score(self, sg, *args, **xargs): # get_pagerank_probability 118 | """ 119 | 120 | :param sg: egocentric subgraph around topic in networkx format 121 | :param distance_degradation: A factor for degrading as distance from the topic increases 122 | :return: Dictionary of probabilities keyed by node 123 | """ 124 | # convert to digraph if needed 125 | if sg.is_multigraph(): 126 | sg = self.multigraph_to_digraph(sg) 127 | 128 | personalized = {} 129 | for node in sg.nodes(): 130 | # personalized[node] = linear_weight(sg.node[node]['topic_distance'], distance_degradation) 131 | # INSERT WEIGHTING FUNCTION BELOW 132 | personalized[node] = self.exponential_weight(sg.node[node]['topic_distance']) 133 | 134 | # return the pagerank scores 135 | return nx.pagerank(sg, personalization=personalized, weight='confidence') 136 | 137 | 138 | def multigraph_to_digraph(self, g): 139 | """ 140 | 141 | :param g: takes a networkx mulitgraph 142 | :return: returns a networkx digraph with edge weights representing the number of edges 143 | 144 | NOTE: This butchers duplicate edge properties. If converting to score, use original edges in output. 145 | """ 146 | G = nx.DiGraph() 147 | edge_attributes = {} 148 | 149 | # if g isn't really a multigraph, just return it 150 | if not g.is_multigraph(): 151 | return g 152 | 153 | # collapse down to a diagraph 154 | G.add_nodes_from(g.nodes(data=True)) 155 | G.add_edges_from(g.edges(data=True)) 156 | 157 | # for each edge, weight the confidence by the number of edges 158 | ''' 159 | # captures a multiple of the confidence on the edge in the output graph 160 | for edge in G.edges(): 161 | count = g.edges().count(edge) 162 | if "count" > 1: 163 | if "confidence" in G.edge[edge[0]][edge[1]]: 164 | G.edge[edge[0]][edge[1]]['confidence'] *= count 165 | else: 166 | G.edge[edge[0]][edge[1]]["confidence"] = count 167 | ''' 168 | # Captures every confidence 169 | for edge in G.edges(): 170 | confidence = 0 171 | for src_edge in g.edge[edge[0]][edge[1]].values(): 172 | confidence += src_edge.get('confidence', 1) 173 | G.edge[edge[0]][edge[1]]['confidence'] = confidence 174 | # # collapse down to a diagraph 175 | # G.add_nodes_from(g.nodes(data=True)) 176 | # G.add_edges_from(g.edges(data=True)) 177 | 178 | return G 179 | 180 | 181 | ### DISTANCE WEIGHTS ### 182 | def linear_weight(self, distance, ddp=.2): 183 | """ 184 | 185 | :param distance: distance from topic 186 | :param ddp: percentage to degrade 187 | :return: Linear weighting factor as float 188 | """ 189 | return 1 - (distance * ddp) 190 | 191 | 192 | def log_weight(self, distance, a=1, b=1, n=3, pwr=1): 193 | """ 194 | 195 | :param distance: distance: distance from topic 196 | :param a: constant to shape graph. Adjusts hight at 0 = a / (1 + b) 197 | :param b: constant to shape graph. 198 | :param n: constant to shape graph. 199 | :param pwr: constant to shape graph. 200 | :return: log weighting factor as float 201 | """ 202 | return a / (1 + b*np.exp((distance-n) * pwr)) 203 | 204 | 205 | def exponential_weight(self, distance, b=2): 206 | return np.exp(-distance/b) 207 | 208 | 209 | def normal_weight(self, distance, pwr=2, a=1.1, b=10, c=1): 210 | """ 211 | 212 | :param distance: distance from topic 213 | :param pwr: constant to shape graph. Higher = steeper decline 214 | :param b: constant to shape graph. lower = greater spread 215 | :return: normal weighting factor as float 216 | pwr = 2.5, a = 1, c = 0, b = 30 217 | """ 218 | return a * np.exp(-(distance + c)**pwr/b) -------------------------------------------------------------------------------- /plugins/page_rank_2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __author__ = "Gabriel Bassett" 4 | """ 5 | AUTHOR: {0} 6 | DATE: 7 | DEPENDENCIES: 8 | Copyright {0} 9 | 10 | LICENSE: 11 | Licensed to the Apache Software Foundation (ASF) under one 12 | or more contributor license agreements. See the NOTICE file 13 | distributed with this work for additional information 14 | regarding copyright ownership. The ASF licenses this file 15 | to you under the Apache License, Version 2.0 (the 16 | "License"); you may not use this file except in compliance 17 | with the License. You may obtain a copy of the License at 18 | 19 | http://www.apache.org/licenses/LICENSE-2.0 20 | 21 | Unless required by applicable law or agreed to in writing, 22 | software distributed under the License is distributed on an 23 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 24 | KIND, either express or implied. See the License for the 25 | specific language governing permissions and limitations 26 | under the License. 27 | 28 | DESCRIPTION: 29 | 30 | 31 | """.format(__author__) 32 | # PRE-USER SETUP 33 | pass 34 | 35 | ########### NOT USER EDITABLE ABOVE THIS POINT ################# 36 | 37 | 38 | # USER VARIABLES 39 | PLUGIN_CONFIG_FILE = "page_rank_2.yapsy-plugin" 40 | NAME = "PageRank2" 41 | 42 | 43 | ########### NOT USER EDITABLE BELOW THIS POINT ################# 44 | 45 | 46 | ## IMPORTS 47 | from yapsy.IPlugin import IPlugin 48 | import logging 49 | import networkx as nx 50 | from datetime import datetime # timedelta imported above 51 | import uuid 52 | import ConfigParser 53 | import inspect 54 | 55 | 56 | ## SETUP 57 | loc = inspect.getfile(inspect.currentframe()) 58 | ind = loc.rfind("/") 59 | loc = loc[:ind+1] 60 | config = ConfigParser.SafeConfigParser() 61 | config.readfp(open(loc + PLUGIN_CONFIG_FILE)) 62 | 63 | if config.has_section('Core'): 64 | if 'name' in config.options('Core'): 65 | NAME = config.get('Core', 'name') 66 | if config.has_section('Log'): 67 | if 'level' in config.options('Log'): 68 | LOGLEVEL = config.get('Log', 'level') 69 | if 'file' in config.options('Log'): 70 | LOGFILE = config.get('Log', 'file') 71 | 72 | 73 | ## EXECUTION 74 | class PluginOne(IPlugin): 75 | # TODO: The init should contain anything to load modules or data files that should be variables of the plugin object 76 | def __init__(self): 77 | pass 78 | 79 | # TODO: Configuration needs to set the values needed to identify the plugin in the plugin database as well as ensure everyhing loaded correctly 80 | # TODO: Current layout is for an enrichment plugin 81 | # TODO: enrichment [type, successful_load, name, description, inputs to enrichment such as 'ip', cost, speed] 82 | # TODO: interface [type, successful_load, name] 83 | # TODO: query [TBD] 84 | # TODO: minion [TBD] 85 | def configure(self): 86 | """ 87 | 88 | :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)] 89 | """ 90 | config_options = config.options("Configuration") 91 | 92 | if 'cost' in config_options: 93 | cost = config.get('Configuration', 'cost') 94 | else: 95 | cost = 9999 96 | if 'speed' in config_options: 97 | speed = config.get('Configuration', 'speed') 98 | else: 99 | speed = 9999 100 | 101 | if config.has_section('Documentation') and 'description' in config.options('Documentation'): 102 | description = config.get('Documentation', 'description') 103 | else: 104 | logging.error("'Description not in config file.") 105 | return [None, False, NAME, None, cost, speed] 106 | 107 | if 'type' in config_options: 108 | plugin_type = config.get('Configuration', 'type') 109 | else: 110 | logging.error("'Type' not specified in config file.") 111 | return [None, False, NAME, description, cost, speed] 112 | 113 | return [plugin_type, True, NAME, description, cost, speed] 114 | 115 | 116 | def score(self, sg, topic, personalization=None): # get_pagerank_probability_2 117 | """ 118 | 119 | :param sg: egocentric subgraph around topic in networkx format 120 | :param topic: A factor for degrading as distance from the topic increases 121 | :param personalization: Dictionary with key of a node and value of a node weight. If none specified, defaults to the linear weight of the 'topic_distance' feature of the nodes. The topic_distance is the topic for which the subgraph was generated. 122 | :return: Dictionary of probabilities keyed by node 123 | """ 124 | if sg.is_multigraph(): 125 | sg = self.multigraph_to_digraph(sg) 126 | 127 | if personalization == None: 128 | personalization = {} 129 | for node in sg.nodes(): 130 | # personalized[node] = linear_weight(sg.node[node]['topic_distance'], distance_degradation) 131 | # INSERT WEIGHTING FUNCTION BELOW 132 | personalization[node] = self.linear_weight(sg.node[node]['topic_distance']) 133 | 134 | # Build topic weights to start topic with all weight and always jump to topic 135 | 136 | topic_weight = 1/float(len(topic.nodes())) 137 | topic_weighted = {k if 1 else k: topic_weight if k in topic.nodes() else 0 for k in sg.nodes()} 138 | 139 | # return the pagerank scores 140 | return nx.pagerank(sg, 141 | personalization=personalization, 142 | weight='confidence', 143 | nstart=topic_weighted, 144 | dangling=topic_weighted) 145 | 146 | 147 | def multigraph_to_digraph(self, g): 148 | """ 149 | 150 | :param g: takes a networkx mulitgraph 151 | :return: returns a networkx digraph with edge weights representing the number of edges 152 | 153 | NOTE: This butchers duplicate edge properties. If converting to score, use original edges in output. 154 | """ 155 | G = nx.DiGraph() 156 | edge_attributes = {} 157 | 158 | # if g isn't really a multigraph, just return it 159 | if not g.is_multigraph(): 160 | return g 161 | 162 | # collapse down to a diagraph 163 | G.add_nodes_from(g.nodes(data=True)) 164 | G.add_edges_from(g.edges(data=True)) 165 | 166 | # for each edge, weight the confidence by the number of edges 167 | ''' 168 | # captures a multiple of the confidence on the edge in the output graph 169 | for edge in G.edges(): 170 | count = g.edges().count(edge) 171 | if "count" > 1: 172 | if "confidence" in G.edge[edge[0]][edge[1]]: 173 | G.edge[edge[0]][edge[1]]['confidence'] *= count 174 | else: 175 | G.edge[edge[0]][edge[1]]["confidence"] = count 176 | ''' 177 | # Captures every confidence 178 | for edge in G.edges(): 179 | confidence = 0 180 | for src_edge in g.edge[edge[0]][edge[1]].values(): 181 | confidence += src_edge.get('confidence', 1) 182 | G.edge[edge[0]][edge[1]]['confidence'] = confidence 183 | # # collapse down to a diagraph 184 | # G.add_nodes_from(g.nodes(data=True)) 185 | # G.add_edges_from(g.edges(data=True)) 186 | 187 | return G 188 | 189 | 190 | ### DISTANCE WEIGHTS ### 191 | def linear_weight(self, distance, ddp=.2): 192 | """ 193 | 194 | :param distance: distance from topic 195 | :param ddp: percentage to degrade 196 | :return: Linear weighting factor as float 197 | """ 198 | return 1 - (distance * ddp) 199 | 200 | 201 | def log_weight(self, distance, a=1, b=1, n=3, pwr=1): 202 | """ 203 | 204 | :param distance: distance: distance from topic 205 | :param a: constant to shape graph. Adjusts hight at 0 = a / (1 + b) 206 | :param b: constant to shape graph. 207 | :param n: constant to shape graph. 208 | :param pwr: constant to shape graph. 209 | :return: log weighting factor as float 210 | """ 211 | return a / (1 + b*np.exp((distance-n) * pwr)) 212 | 213 | 214 | def exponential_weight(self, distance, b=2): 215 | return np.exp(-distance/b) 216 | 217 | 218 | def normal_weight(self, distance, pwr=2, a=1.1, b=10, c=1): 219 | """ 220 | 221 | :param distance: distance from topic 222 | :param pwr: constant to shape graph. Higher = steeper decline 223 | :param b: constant to shape graph. lower = greater spread 224 | :return: normal weighting factor as float 225 | pwr = 2.5, a = 1, c = 0, b = 30 226 | """ 227 | return a * np.exp(-(distance + c)**pwr/b) -------------------------------------------------------------------------------- /plugins/networkx.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __author__ = "Gabriel Bassett" 4 | """ 5 | AUTHOR: {0} 6 | DATE: 7 | DEPENDENCIES: 8 | Copyright {0} 9 | 10 | LICENSE: 11 | Licensed to the Apache Software Foundation (ASF) under one 12 | or more contributor license agreements. See the NOTICE file 13 | distributed with this work for additional information 14 | regarding copyright ownership. The ASF licenses this file 15 | to you under the Apache License, Version 2.0 (the 16 | "License"); you may not use this file except in compliance 17 | with the License. You may obtain a copy of the License at 18 | 19 | http://www.apache.org/licenses/LICENSE-2.0 20 | 21 | Unless required by applicable law or agreed to in writing, 22 | software distributed under the License is distributed on an 23 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 24 | KIND, either express or implied. See the License for the 25 | specific language governing permissions and limitations 26 | under the License. 27 | 28 | DESCRIPTION: 29 | 30 | 31 | """.format(__author__) 32 | # PRE-USER SETUP 33 | pass 34 | 35 | ########### NOT USER EDITABLE ABOVE THIS POINT ################# 36 | 37 | 38 | # USER VARIABLES 39 | NX_CONFIG_FILE = "networkx.yapsy-plugin" 40 | NAME = "Networkx Interface" 41 | 42 | 43 | ########### NOT USER EDITABLE BELOW THIS POINT ################# 44 | 45 | 46 | ## IMPORTS 47 | from yapsy.IPlugin import IPlugin 48 | import logging 49 | import networkx as nx 50 | from datetime import datetime # timedelta imported above 51 | import uuid 52 | import ConfigParser 53 | import inspect 54 | import os.path 55 | 56 | 57 | ## SETUP 58 | loc = inspect.getfile(inspect.currentframe()) 59 | ind = loc.rfind("/") 60 | loc = loc[:ind+1] 61 | config = ConfigParser.SafeConfigParser() 62 | config.readfp(open(loc + NX_CONFIG_FILE)) 63 | 64 | if config.has_section('Core'): 65 | if 'name' in config.options('Core'): 66 | NAME = config.get('Core', 'name') 67 | if config.has_section('Log'): 68 | if 'level' in config.options('Log'): 69 | LOGLEVEL = config.get('Log', 'level') 70 | if 'file' in config.options('Log'): 71 | LOGFILE = config.get('Log', 'file') 72 | 73 | ## EXECUTION 74 | class PluginOne(IPlugin): 75 | context_graph = nx.MultiDiGraph() 76 | context_graph_file = None 77 | 78 | def __init__(self): 79 | if 'context_graph_file' in config.options("Configuration"): 80 | self.context_graph_file = config.get('Configuration', 'context_graph_file') 81 | 82 | 83 | def configure(self): 84 | """ 85 | 86 | :return: return list of [type, successful_load, name] 87 | """ 88 | config_options = config.options("Configuration") 89 | 90 | if os.path.isfile(self.context_graph_file): 91 | try: 92 | self.context_graph = self.read_graph(self.context_graph_file) 93 | except: 94 | pass 95 | else: 96 | logging.info("Networkx file not for import.") 97 | 98 | if 'type' in config_options: 99 | plugin_type = config.get('Configuration', 'type') 100 | else: 101 | logging.error("'Type' not specified in config file.") 102 | return [None, False, NAME] 103 | 104 | return [plugin_type, True, NAME] 105 | 106 | 107 | def enrich(self, g): # Networkx 108 | """ 109 | 110 | :param g: networkx graph to be merged 111 | :return: Nonetype 112 | 113 | Note: Neo4j operates differently from the current titan import. The neo4j import does not aggregate edges which 114 | means they must be handled at query time. The current titan algorithm aggregates edges based on time on 115 | merge. 116 | """ 117 | for uri, data in g.nodes(data=True): 118 | # For each node: 119 | # Get node by URI 120 | # (should we double check the the class/key/value match?) 121 | # If it exists in the receiving graph, going to need to merge properties (replacing with newer) 122 | if uri in self.context_graph.nodes(): 123 | self.context_graph.node[uri].update(data) 124 | else: 125 | self.context_graph.add_node(uri, attr_dict=data) 126 | # For each edge: 127 | for edge in g.edges(data=True): 128 | # Add it 129 | self.context_graph.add_edge(edge[0], edge[1], attr_dict=data) 130 | 131 | 132 | def query(self, topic, max_depth=4, config=None, dont_follow=['enrichment', 'classification']): 133 | """ 134 | :param topic: a graph to return the context of. At least one node ID in topic \ 135 | must be in full graph g to return any context. 136 | :param max_depth: The maximum distance from the topic to search 137 | :param config: The titanDB configuration to use if not using the one configured with the plugin 138 | :param dont_follow: A list of attribute types to not follow 139 | :return: subgraph in networkx format 140 | """ 141 | distances = dict() 142 | 143 | if config is None: 144 | config = self.context_graph 145 | 146 | # Conver topic from a graph into a set of nodes 147 | topic_nodes = set() 148 | for n, d in topic.nodes(data=True): 149 | topic_nodes.add("class={0}&key={1}&value={2}".format(d['class'], d['key'], d['value'])) 150 | 151 | nodes = topic_nodes.copy() 152 | 153 | for t in topic: 154 | # get all nodes within max_depth distance from each topic and add them to the set 155 | new_distances = nx.single_source_shortest_path_length(self.context_graph.to_undirected(), t, cutoff=max_depth) 156 | nodes = nodes.union(set(new_distances.keys())) 157 | 158 | # Update shortest distances from topic to node 159 | for n in new_distances.keys(): 160 | if n in distances: 161 | if new_distances[n] < distances[n]: 162 | distances[n] = new_distances[n] 163 | else: 164 | distances[n] = new_distances[n] 165 | 166 | # remove dont_follow nodes: 167 | nodes_to_remove = set() 168 | for n in nodes: 169 | if self.context_graph.node[n]['key'] in dont_follow: 170 | nodes_to_remove.add(n) 171 | nodes = nodes.difference(nodes_to_remove) 172 | 173 | # Get the subgraph represented by the nodes: 174 | g = nx.MultiDiGraph(self.context_graph.subgraph(nodes)) 175 | 176 | # Prune out non-relevant components by removing those that contain no topic nodes. 177 | # This gets ride of nodes that were found by following dont_follow nodes 178 | for component in nx.connected_components(g.to_undirected()): 179 | if len(topic_nodes.intersection(set(component))) <= 0: # if there's no overlap betweent the component and topic 180 | g.remove_nodes_from(component) # remove the component 181 | 182 | # add the topic distances to the subgraph 183 | for n in g.nodes(): 184 | g.node[n]['topic_distance'] = distances[n] 185 | 186 | return g 187 | 188 | 189 | def get_graph(self): 190 | return self.context_graph 191 | 192 | 193 | def write_graph(self, G=None, subgraph_file=None): 194 | if G is None: 195 | G = self.context_graph 196 | if subgraph_file is None: 197 | subgraph_file = self.context_graph_file 198 | logging.info("Writing graph.") 199 | # write the graph out 200 | file_format = subgraph_file.split(".")[-1] 201 | if file_format == "graphml": 202 | nx.write_graphml(G, subgraph_file) 203 | elif file_format == "gml": 204 | nx.write_gml(G, subgraph_file) 205 | elif file_format == "gexf": 206 | nx.write_gexf(G, subgraph_file) 207 | elif file_format == "net": 208 | nx.write_pajek(G, subgraph_file) 209 | elif file_format == "yaml": 210 | nx.write_yaml(G, subgraph_file) 211 | elif file_format == "gpickle": 212 | nx.write_gpickle(G, subgraph_file) 213 | else: 214 | print "File format not found, writing graphml." 215 | nx.write_graphml(G, subgraph_file) 216 | 217 | def read_graph(self, subgraph_file=None): 218 | if subgraph_file is None: 219 | subraph_file = self.context_graph_file 220 | logging.info("Writing graph.") 221 | # write the graph out 222 | file_format = subgraph_file.split(".")[-1] 223 | if file_format == "graphml": 224 | return nx.read_graphml(subgraph_file) 225 | elif file_format == "gml": 226 | return nx.read_gml(subgraph_file) 227 | elif file_format == "gexf": 228 | return nx.read_gexf(subgraph_file) 229 | elif file_format == "net": 230 | return nx.read_pajek(subgraph_file) 231 | elif file_format == "yaml": 232 | return nx.read_yaml(subgraph_file) 233 | elif file_format == "gpickle": 234 | return nx.read_gpickle(subgraph_file) 235 | else: 236 | logging.warning("File format not found, returning empty graph.") 237 | return nx.MultiDiGraph() -------------------------------------------------------------------------------- /plugins/dns.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | AUTHOR: Gabriel Bassett 4 | DATE: 11-22-2014 5 | DEPENDENCIES: a list of modules requiring installation 6 | Copyright 2014 Gabriel Bassett 7 | 8 | LICENSE: 9 | Licensed to the Apache Software Foundation (ASF) under one 10 | or more contributor license agreements. See the NOTICE file 11 | distributed with this work for additional information 12 | regarding copyright ownership. The ASF licenses this file 13 | to you under the Apache License, Version 2.0 (the 14 | "License"); you may not use this file except in compliance 15 | with the License. You may obtain a copy of the License at 16 | 17 | http://www.apache.org/licenses/LICENSE-2.0 18 | 19 | Unless required by applicable law or agreed to in writing, 20 | software distributed under the License is distributed on an 21 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 22 | KIND, either express or implied. See the License for the 23 | specific language governing permissions and limitations 24 | under the License. 25 | 26 | DESCRIPTION: 27 | Functions necessary to enrich the context graph 28 | 29 | """ 30 | # PRE-USER SETUP 31 | pass 32 | 33 | ########### NOT USER EDITABLE ABOVE THIS POINT ################# 34 | 35 | 36 | # USER VARIABLES 37 | DNS_CONFIG_FILE = "dns.yapsy-plugin" 38 | NAME = "DNS Enrichment" 39 | 40 | 41 | ########### NOT USER EDITABLE BELOW THIS POINT ################# 42 | 43 | ## IMPORTS 44 | from yapsy.IPlugin import IPlugin 45 | import networkx as nx 46 | from datetime import datetime 47 | import dateutil # to parse variable time strings 48 | import socket 49 | import uuid 50 | import ConfigParser 51 | import logging 52 | import inspect 53 | try: 54 | import dns.resolver 55 | resolver_import = True 56 | except: 57 | resolver_import = False 58 | 59 | ## SETUP 60 | __author__ = "Gabriel Bassett" 61 | loc = inspect.getfile(inspect.currentframe()) 62 | ind = loc.rfind("/") 63 | loc = loc[:ind+1] 64 | config = ConfigParser.SafeConfigParser() 65 | config.readfp(open(loc + DNS_CONFIG_FILE)) 66 | 67 | if config.has_section('Core'): 68 | if 'name' in config.options('Core'): 69 | NAME = config.get('Core', 'name') 70 | 71 | ## EXECUTION 72 | class PluginOne(IPlugin): 73 | def __init__(self): 74 | pass 75 | 76 | def configure(self): 77 | """ 78 | 79 | :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)] 80 | """ 81 | config_options = config.options("Configuration") 82 | 83 | if 'cost' in config_options: 84 | cost = config.get('Configuration', 'cost') 85 | else: 86 | cost = 9999 87 | if 'speed' in config_options: 88 | speed = config.get('Configuration', 'speed') 89 | else: 90 | speed = 9999 91 | 92 | if 'type' in config_options: 93 | plugin_type = config.get('Configuration', 'type') 94 | else: 95 | logging.error("'Type' not specified in config file.") 96 | return [None, False, NAME, "Takes an IP string and returns the DNS resolved IP address as networkx graph.", None, cost, speed] 97 | 98 | if 'inputs' in config_options: 99 | inputs = config.get('Configuration', 'Inputs') 100 | inputs = [l.strip().lower() for l in inputs.split(",")] 101 | else: 102 | logging.error("No input types specified in config file.") 103 | return [plugin_type, False, NAME, "Takes an IP string and returns the DNS resolved IP address as networkx graph.", None, cost, speed] 104 | 105 | return [plugin_type, True, NAME, "Takes an IP string and returns the DNS resolved IP address as networkx graph.", inputs, cost, speed] 106 | 107 | 108 | def run(self, domain, start_time=""): 109 | """ str, str -> networkx multiDiGraph 110 | 111 | :param domain: a string containing a domain to lookup up 112 | :param start_time: string in ISO 8601 combined date and time format (e.g. 2014-11-01T10:34Z) or datetime object. 113 | :return: a networkx graph representing the response. 114 | """ 115 | 116 | # Parse the start_time 117 | if type(start_time) is str: 118 | try: 119 | time = dateutil.parser.parse(start_time).strftime("%Y-%m-%dT%H:%M:%SZ") 120 | except: 121 | time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") 122 | elif type(start_time) is datetime: 123 | time = start_time.strftime("%Y-%m-%dT%H:%M:%SZ") 124 | else: 125 | time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") 126 | 127 | g = nx.MultiDiGraph() 128 | 129 | # Get or create Domain node 130 | domain_uri = "class=attribute&key={0}&value={1}".format("domain", domain) 131 | g.add_node(domain_uri, { 132 | 'class': 'attribute', 133 | 'key': "domain", 134 | "value": domain, 135 | "start_time": time, 136 | "uri": domain_uri 137 | }) 138 | 139 | # Try the DNS lookup and just return the domain if the lookup fails 140 | try: 141 | ip = socket.gethostbyname(domain) 142 | except socket.gaierror: 143 | return g 144 | 145 | # Get or create Enrichment node 146 | dns_uri = "class=attribute&key={0}&value={1}".format("enrichment", "dns") 147 | g.add_node(dns_uri, { 148 | 'class': 'attribute', 149 | 'key': "enrichment", 150 | "value": "dns", 151 | "start_time": time, 152 | "uri": dns_uri 153 | }) 154 | 155 | ip_uri = "class=attribute&key={0}&value={1}".format("ip", ip) 156 | g.add_node(ip_uri, { 157 | 'class': 'attribute', 158 | 'key': "ip", 159 | "value": ip, 160 | "start_time": time, 161 | "uri": ip_uri 162 | }) 163 | 164 | # Create edge from domain to ip node 165 | edge_attr = { 166 | "relationship": "describedBy", 167 | "start_time": time, 168 | "origin": "dns" 169 | } 170 | source_hash = uuid.uuid3(uuid.NAMESPACE_URL, domain_uri) 171 | dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, ip_uri) 172 | edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash)) 173 | rel_chain = "relationship" 174 | while rel_chain in edge_attr: 175 | edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain]) 176 | rel_chain = edge_attr[rel_chain] 177 | if "origin" in edge_attr: 178 | edge_uri += "&{0}={1}".format("origin", edge_attr["origin"]) 179 | edge_attr["uri"] = edge_uri 180 | g.add_edge(domain_uri, ip_uri, edge_uri, {"start_time": time}) 181 | 182 | # Link domain to enrichment 183 | edge_attr = { 184 | "relationship": "describedBy", 185 | "start_time": time, 186 | "origin": "dns" 187 | } 188 | source_hash = uuid.uuid3(uuid.NAMESPACE_URL, domain_uri) 189 | dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, dns_uri) 190 | edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash)) 191 | rel_chain = "relationship" 192 | while rel_chain in edge_attr: 193 | edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain]) 194 | rel_chain = edge_attr[rel_chain] 195 | if "origin" in edge_attr: 196 | edge_uri += "&{0}={1}".format("origin", edge_attr["origin"]) 197 | edge_attr["uri"] = edge_uri 198 | g.add_edge(domain_uri, dns_uri, edge_uri, edge_attr) 199 | 200 | 201 | if resolver_import: 202 | # Get nameservers. (note, this can get cached ones, but the more complex answer at http://stackoverflow.com/questions/4066614/how-can-i-find-the-authoritative-dns-server-for-a-domain-using-dnspython didn't work.) 203 | # If resolution fails, simply return the graph as is 204 | try: 205 | answers = dns.resolver.query(domain, 'NS') 206 | except dns.resolver.NoAnswer: 207 | return g 208 | 209 | for ns in answers: 210 | ns = ns.to_text().rstrip(".") 211 | 212 | # Create the nameserver node 213 | ns_uri = "class=attribute&key={0}&value={1}".format("domain", ns) 214 | g.add_node(ns_uri, { 215 | 'class': 'attribute', 216 | 'key': "domain", 217 | "value": ns, 218 | "start_time": time, 219 | "uri": ns_uri 220 | }) 221 | 222 | # Link it to the domain 223 | edge_attr = { 224 | "relationship": "describedBy", 225 | "start_time": time, 226 | "origin": "dns", 227 | "describedBy": "nameserver" 228 | } 229 | source_hash = uuid.uuid3(uuid.NAMESPACE_URL, domain_uri) 230 | dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, ns_uri) 231 | edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash)) 232 | rel_chain = "relationship" 233 | while rel_chain in edge_attr: 234 | edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain]) 235 | rel_chain = edge_attr[rel_chain] 236 | if "origin" in edge_attr: 237 | edge_uri += "&{0}={1}".format("origin", edge_attr["origin"]) 238 | edge_attr["uri"] = edge_uri 239 | g.add_edge(domain_uri, ns_uri, edge_uri, edge_attr) 240 | 241 | return g 242 | -------------------------------------------------------------------------------- /plugins/maxmind.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | AUTHOR: Gabriel Bassett 4 | DATE: 12-17-2013 5 | DEPENDENCIES: a list of modules requiring installation 6 | Copyright 2014 Gabriel Bassett 7 | 8 | LICENSE: 9 | Licensed to the Apache Software Foundation (ASF) under one 10 | or more contributor license agreements. See the NOTICE file 11 | distributed with this work for additional information 12 | regarding copyright ownership. The ASF licenses this file 13 | to you under the Apache License, Version 2.0 (the 14 | "License"); you may not use this file except in compliance 15 | with the License. You may obtain a copy of the License at 16 | 17 | http://www.apache.org/licenses/LICENSE-2.0 18 | 19 | Unless required by applicable law or agreed to in writing, 20 | software distributed under the License is distributed on an 21 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 22 | KIND, either express or implied. See the License for the 23 | specific language governing permissions and limitations 24 | under the License. 25 | 26 | DESCRIPTION: 27 | Functions necessary to enrich the context graph 28 | 29 | """ 30 | # PRE-USER SETUP 31 | pass 32 | 33 | ########### NOT USER EDITABLE ABOVE THIS POINT ################# 34 | 35 | 36 | # USER VARIABLES 37 | MAXMIND_FILE = "./GeoIPASNum.dat" 38 | MAXMIND_CONFIG_FILE = "maxmind.yapsy-plugin" 39 | NAME = "Maxmind ASN Enrichment" 40 | 41 | ########### NOT USER EDITABLE BELOW THIS POINT ################# 42 | 43 | 44 | ## IMPORTS 45 | from yapsy.IPlugin import IPlugin 46 | import logging 47 | from datetime import datetime # timedelta imported above 48 | import dateutil # to parse variable time strings 49 | import uuid 50 | import ConfigParser 51 | import os 52 | import inspect 53 | try: 54 | import networkx as nx 55 | import GeoIP 56 | import ipaddress 57 | module_import_success = True 58 | except: 59 | module_import_success = False 60 | logging.error("Module import failed. Please install the following modules: networkx, GeoIP, ipaddress.") 61 | raise 62 | 63 | ## SETUP 64 | __author__ = "Gabriel Bassett" 65 | loc = inspect.getfile(inspect.currentframe()) 66 | ind = loc.rfind("/") 67 | loc = loc[:ind+1] 68 | config = ConfigParser.SafeConfigParser() 69 | config.readfp(open(loc + MAXMIND_CONFIG_FILE)) 70 | 71 | if config.has_section('Core'): 72 | if 'name' in config.options('Core'): 73 | NAME = config.get('Core', 'name') 74 | 75 | ## EXECUTION 76 | class PluginOne(IPlugin): 77 | gi = None 78 | dat_file_success = False 79 | 80 | def __init__(self, conf=config, dat_file=MAXMIND_FILE): 81 | try: 82 | maxmind_file = config.get('Configuration', 'dat_file') 83 | if maxmind_file[0] != "/": 84 | maxmind_file = loc + maxmind_file 85 | #print maxmind_file # DEBUG 86 | self.gi = GeoIP.open(maxmind_file, GeoIP.GEOIP_STANDARD) 87 | self.dat_file_success = True 88 | except: 89 | pass 90 | if not self.dat_file_success: 91 | try: 92 | if dat_file[0] != "/": 93 | dat_file = loc + dat_file 94 | #print dat_file # DEBUG 95 | self.gi = GeoIP.open(dat_file, GeoIP.GEOIP_STANDARD) 96 | self.dat_file_success = True 97 | except: 98 | pass 99 | 100 | def configure(self): 101 | """ 102 | 103 | :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)] 104 | """ 105 | config_options = config.options("Configuration") 106 | 107 | if 'cost' in config_options: 108 | cost = config.get('Configuration', 'cost') 109 | else: 110 | cost = 9999 111 | if 'speed' in config_options: 112 | speed = config.get('Configuration', 'speed') 113 | else: 114 | speed = 9999 115 | 116 | if 'type' in config_options: 117 | plugin_type = config.get('Configuration', 'type') 118 | else: 119 | logging.error("'Type' not specified in config file.") 120 | return [None, False, NAME, "Takes an IP and returns the ASN of the IP.", None, cost, speed] 121 | 122 | if 'inputs' in config_options: 123 | inputs = config.get('Configuration', 'Inputs') 124 | inputs = [l.strip().lower() for l in inputs.split(",")] 125 | else: 126 | logging.error("No input types specified in config file.") 127 | return [plugin_type, False, NAME, "Takes an IP and returns the ASN of the IP.", None, cost, speed] 128 | 129 | if not self.dat_file_success: 130 | return [plugin_type, False, NAME, "Takes an IP and returns the ASN of the IP.", inputs, cost, speed] 131 | elif not module_import_success: 132 | logging.error("Module import failure caused configuration failure.") 133 | return [plugin_type, False, NAME, "Takes an IP and returns the ASN of the IP.", inputs, cost, speed] 134 | else: 135 | return [plugin_type, True, NAME, "Takes an IP and returns the ASN of the IP.", inputs, cost, speed] 136 | 137 | 138 | def run(self, ip, start_time=""): 139 | """ str, str -> networkx multiDiGraph 140 | 141 | :param ip: IP address to enrich in graph 142 | :param start_time: string in ISO 8601 combined date and time format (e.g. 2014-11-01T10:34Z) or datetime object. 143 | :return: enrichment graph 144 | """ 145 | 146 | # Parse the start_time 147 | if type(start_time) is str: 148 | try: 149 | time = dateutil.parser.parse(start_time).strftime("%Y-%m-%dT%H:%M:%SZ") 150 | except: 151 | time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") 152 | elif type(start_time) is datetime: 153 | time = start_time.strftime("%Y-%m-%dT%H:%M:%SZ") 154 | else: 155 | time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") 156 | 157 | # Validate IP 158 | _ = ipaddress.ip_address(unicode(ip)) 159 | 160 | # open maxmind ASN data 161 | gi = self.gi 162 | 163 | g = nx.MultiDiGraph() 164 | # Create the maxmind ASN node 165 | maxmind_asn_uri = "class=attribute&key={0}&value={1}".format("enrichment", "maxmind_asn") # Move prefix assignment to merge_titan 166 | g.add_node(maxmind_asn_uri, { 167 | 'class': 'attribute', 168 | 'key': "enrichment", 169 | "value": "maxmind_asn", 170 | "start_time": time, 171 | "uri": maxmind_asn_uri 172 | }) 173 | 174 | # set IP URI 175 | ip_uri = "class=attribute&key={0}&value={1}".format("ip", ip) 176 | g.add_node(ip_uri, { 177 | 'class': 'attribute', 178 | 'key': "ip", 179 | "value": ip, 180 | "start_time": time, 181 | "uri": ip_uri 182 | }) 183 | 184 | # retrieve maxmind enrichment 185 | ASN = gi.name_by_addr(ip) 186 | 187 | #print ASN # DEBUG 188 | #print type(gi) # DEBUG 189 | #print ip # DEBUG 190 | 191 | if ASN: 192 | ASN = ASN.split(" ", 1) 193 | 194 | # create ASN node 195 | asn_uri = "class=attribute&key={0}&value={1}".format("asn", ASN[0][2:]) 196 | attributes = { 197 | 'class': 'attribute', 198 | 'key': 'asn', 199 | 'value': ASN[0][2:], 200 | "uri": asn_uri, 201 | "start_time": time 202 | } 203 | if len(ASN) > 1: 204 | attributes['owner'] = ASN[1] 205 | g.add_node(asn_uri, attributes) 206 | 207 | # link ip to ASN node 208 | edge_attr = { 209 | "relationship": "describedBy", 210 | "origin": "maxmind_enrichment", 211 | "start_time": time, 212 | } 213 | source_hash = uuid.uuid3(uuid.NAMESPACE_URL, ip_uri) 214 | dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, asn_uri) 215 | edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash)) 216 | rel_chain = "relationship" 217 | while rel_chain in edge_attr: 218 | edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain]) 219 | rel_chain = edge_attr[rel_chain] 220 | if "origin" in edge_attr: 221 | edge_uri += "&{0}={1}".format("origin", edge_attr["origin"]) 222 | edge_attr["uri"] = edge_uri 223 | g.add_edge(ip_uri, asn_uri, edge_uri, edge_attr) 224 | 225 | 226 | # link ip to maxmind enrichment 227 | edge_attr = { 228 | "relationship": "describedBy", 229 | "origin": "maxmind_enrichment", 230 | "start_time": time, 231 | } 232 | source_hash = uuid.uuid3(uuid.NAMESPACE_URL, ip_uri) 233 | dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, maxmind_asn_uri) 234 | edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash)) 235 | rel_chain = "relationship" 236 | while rel_chain in edge_attr: 237 | edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain]) 238 | rel_chain = edge_attr[rel_chain] 239 | if "origin" in edge_attr: 240 | edge_uri += "&{0}={1}".format("origin", edge_attr["origin"]) 241 | edge_attr["uri"] = edge_uri 242 | g.add_edge(ip_uri, maxmind_asn_uri, edge_uri, edge_attr) 243 | 244 | 245 | else: 246 | logging.debug("Maxmind miss on {0}".format(ip)) 247 | 248 | # Reuturn the data enriched graph 249 | return g 250 | -------------------------------------------------------------------------------- /plugins/path_count.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __author__ = "Gabriel Bassett" 4 | """ 5 | AUTHOR: {0} 6 | DATE: 7 | DEPENDENCIES: 8 | Copyright {0} 9 | 10 | LICENSE: 11 | Licensed to the Apache Software Foundation (ASF) under one 12 | or more contributor license agreements. See the NOTICE file 13 | distributed with this work for additional information 14 | regarding copyright ownership. The ASF licenses this file 15 | to you under the Apache License, Version 2.0 (the 16 | "License"); you may not use this file except in compliance 17 | with the License. You may obtain a copy of the License at 18 | 19 | http://www.apache.org/licenses/LICENSE-2.0 20 | 21 | Unless required by applicable law or agreed to in writing, 22 | software distributed under the License is distributed on an 23 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 24 | KIND, either express or implied. See the License for the 25 | specific language governing permissions and limitations 26 | under the License. 27 | 28 | DESCRIPTION: 29 | 30 | 31 | """.format(__author__) 32 | # PRE-USER SETUP 33 | pass 34 | 35 | ########### NOT USER EDITABLE ABOVE THIS POINT ################# 36 | 37 | 38 | # USER VARIABLES 39 | PLUGIN_CONFIG_FILE = "path_count.yapsy-plugin" 40 | NAME = "PathCount" 41 | 42 | 43 | ########### NOT USER EDITABLE BELOW THIS POINT ################# 44 | 45 | 46 | ## IMPORTS 47 | from yapsy.IPlugin import IPlugin 48 | import logging 49 | import networkx as nx 50 | from datetime import datetime # timedelta imported above 51 | import uuid 52 | import ConfigParser 53 | import inspect 54 | import numpy as np 55 | 56 | 57 | ## SETUP 58 | loc = inspect.getfile(inspect.currentframe()) 59 | ind = loc.rfind("/") 60 | loc = loc[:ind+1] 61 | config = ConfigParser.SafeConfigParser() 62 | config.readfp(open(loc + PLUGIN_CONFIG_FILE)) 63 | 64 | if config.has_section('Core'): 65 | if 'name' in config.options('Core'): 66 | NAME = config.get('Core', 'name') 67 | if config.has_section('Log'): 68 | if 'level' in config.options('Log'): 69 | LOGLEVEL = config.get('Log', 'level') 70 | if 'file' in config.options('Log'): 71 | LOGFILE = config.get('Log', 'file') 72 | 73 | 74 | ## EXECUTION 75 | class PluginOne(IPlugin): 76 | # TODO: The init should contain anything to load modules or data files that should be variables of the plugin object 77 | def __init__(self): 78 | pass 79 | 80 | # TODO: Configuration needs to set the values needed to identify the plugin in the plugin database as well as ensure everyhing loaded correctly 81 | # TODO: Current layout is for an enrichment plugin 82 | # TODO: enrichment [type, successful_load, name, description, inputs to enrichment such as 'ip', cost, speed] 83 | # TODO: interface [type, successful_load, name] 84 | # TODO: query [TBD] 85 | # TODO: minion [TBD] 86 | def configure(self): 87 | """ 88 | 89 | :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)] 90 | """ 91 | config_options = config.options("Configuration") 92 | 93 | if 'cost' in config_options: 94 | cost = config.get('Configuration', 'cost') 95 | else: 96 | cost = 9999 97 | if 'speed' in config_options: 98 | speed = config.get('Configuration', 'speed') 99 | else: 100 | speed = 9999 101 | 102 | if config.has_section('Documentation') and 'description' in config.options('Documentation'): 103 | description = config.get('Documentation', 'description') 104 | else: 105 | logging.error("'Description not in config file.") 106 | return [None, False, NAME, None, cost, speed] 107 | 108 | if 'type' in config_options: 109 | plugin_type = config.get('Configuration', 'type') 110 | else: 111 | logging.error("'Type' not specified in config file.") 112 | return [None, False, NAME, description, cost, speed] 113 | 114 | return [plugin_type, True, NAME, description, cost, speed] 115 | 116 | 117 | def score(self, sg, topic, max_depth=8): # get_path_count_probability 118 | """ 119 | 120 | :param sg: egocentric subgraph around topic in networkx format 121 | :param topic: graph of topics 122 | :param max_depth: maximum length of paths 123 | :return: Dictionary of probabilities keyed by node 124 | """ 125 | # THIS IS I CRITICAL PER the 1-1-1-1-t-3-9-1 graph 126 | # THIS WILL NOT TOLERATE LOOPS WITHOUT ADDITIONAL EFFORT 127 | targets = set(sg.nodes()).difference(set(topic.nodes())) 128 | paths = {} 129 | probabilities = {} 130 | 131 | # Create a meta node to represent the topic nodes 132 | # Based on https://gist.github.com/Zulko/7629206 133 | meta_node_uuid = str(uuid.uuid4()) 134 | 135 | sg.add_node(meta_node_uuid) # Add the 'merged' node 136 | 137 | for n1, n2, data in sg.edges(data=True): 138 | # For all edges related to one of the nodes to merge, 139 | # make an edge going to or coming from the `new gene`. 140 | if n1 in topic.nodes(): 141 | sg.add_edge(meta_node_uuid, n2, data=data) 142 | elif n2 in topic.nodes(): 143 | sg.add_edge(n1, meta_node_uuid, data=data) 144 | 145 | # retrieve all paths to all nodes 146 | for target in targets: 147 | paths[target] = nx.all_simple_paths(sg, meta_node_uuid, target, cutoff=max_depth) 148 | 149 | # Combine the multiple paths from multiple topics to a single score per node 150 | for target in targets: 151 | probabilities[target] = 0 152 | for path in paths[target]: 153 | # develop a weight based on the length of the path 154 | # INSERT WEIGHTING FUNCTION BELOW 155 | path_weight = self.normal_weight(len(path)) 156 | # Calculate the confidence in the path 157 | confidence = 1 158 | for node in path: 159 | if 'confidence' in sg.node[node]: 160 | confidence *= sg.node[node]['confidence'] 161 | # Sum the path score. The path's score is it's confidence multiplied by it's weight 162 | probabilities[target] += confidence * path_weight 163 | 164 | # Make the topic nodes the highest probabilities just to put them on top 165 | max_p = max(probabilities.values()) 166 | for node in topic.nodes(): 167 | probabilities[node] = max_p 168 | 169 | # TODO: Could normalize values to 1.... 170 | 171 | # remove the meta node 172 | sg.remove_node(meta_node_uuid) 173 | 174 | # return probabilities 175 | return probabilities 176 | 177 | 178 | def multigraph_to_digraph(self, g): 179 | """ 180 | 181 | :param g: takes a networkx mulitgraph 182 | :return: returns a networkx digraph with edge weights representing the number of edges 183 | 184 | NOTE: This butchers duplicate edge properties. If converting to score, use original edges in output. 185 | """ 186 | G = nx.DiGraph() 187 | edge_attributes = {} 188 | 189 | # if g isn't really a multigraph, just return it 190 | if not g.is_multigraph(): 191 | return g 192 | 193 | # collapse down to a diagraph 194 | G.add_nodes_from(g.nodes(data=True)) 195 | G.add_edges_from(g.edges(data=True)) 196 | 197 | # for each edge, weight the confidence by the number of edges 198 | ''' 199 | # captures a multiple of the confidence on the edge in the output graph 200 | for edge in G.edges(): 201 | count = g.edges().count(edge) 202 | if "count" > 1: 203 | if "confidence" in G.edge[edge[0]][edge[1]]: 204 | G.edge[edge[0]][edge[1]]['confidence'] *= count 205 | else: 206 | G.edge[edge[0]][edge[1]]["confidence"] = count 207 | ''' 208 | # Captures every confidence 209 | for edge in G.edges(): 210 | confidence = 0 211 | for src_edge in g.edge[edge[0]][edge[1]].values(): 212 | confidence += src_edge.get('confidence', 1) 213 | G.edge[edge[0]][edge[1]]['confidence'] = confidence 214 | # # collapse down to a diagraph 215 | # G.add_nodes_from(g.nodes(data=True)) 216 | # G.add_edges_from(g.edges(data=True)) 217 | 218 | return G 219 | 220 | 221 | ### DISTANCE WEIGHTS ### 222 | def linear_weight(self, distance, ddp=.2): 223 | """ 224 | 225 | :param distance: distance from topic 226 | :param ddp: percentage to degrade 227 | :return: Linear weighting factor as float 228 | """ 229 | return 1 - (distance * ddp) 230 | 231 | 232 | def log_weight(self, distance, a=1, b=1, n=3, pwr=1): 233 | """ 234 | 235 | :param distance: distance: distance from topic 236 | :param a: constant to shape graph. Adjusts hight at 0 = a / (1 + b) 237 | :param b: constant to shape graph. 238 | :param n: constant to shape graph. 239 | :param pwr: constant to shape graph. 240 | :return: log weighting factor as float 241 | """ 242 | return a / (1 + b*np.exp((distance-n) * pwr)) 243 | 244 | 245 | def exponential_weight(self, distance, b=2): 246 | return np.exp(-distance/b) 247 | 248 | 249 | def normal_weight(self, distance, pwr=2, a=1.1, b=10, c=1): 250 | """ 251 | 252 | :param distance: distance from topic 253 | :param pwr: constant to shape graph. Higher = steeper decline 254 | :param b: constant to shape graph. lower = greater spread 255 | :return: normal weighting factor as float 256 | pwr = 2.5, a = 1, c = 0, b = 30 257 | """ 258 | return a * np.exp(-(distance + c)**pwr/b) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Verum 2 | ===== 3 | 4 | Implementation of Context-Graph algorithms for graph enrichment and querying. 5 | 6 | Context Graph Enrichment: 7 | cg_enrich.py provides functions to enrich the context graph. 8 | 9 | Context Graph Query: 10 | cg_query.py provides functions necessary to query the context graph for a specific topic. 11 | 12 | Context Graph Presentation: 13 | cg_present.py provides functions necessary to present the data to various clients. 14 | 15 | 16 | ## Installation 17 | 18 | Clone the Repository 19 | ``` 20 | git clone https://github.com/vz-risk/Verum.git 21 | ``` 22 | 23 | 24 | ## Usage 25 | Initialize storage. In this case, [neo4j] (http://neo4j.com/). 26 | 1. [Download neo4j] (http://neo4j.com/download/). 27 | 2. Unzip it, (if *nix or Mac OS X). 28 | 3. Run it, ('./bin/neo4j start' on *nix or Mac OS X). 29 | 30 | (If using [TitanDB] (http://thinkaurelius.github.io/titan/), follow the [installation documentation] (http://s3.thinkaurelius.com/docs/titan/0.9.0-M2/getting-started.html#_downloading_titan_and_running_the_gremlin_shell) provided for Titan.) 31 | 32 | ### Enrichment 33 | 34 | Run the following within your python code or at a python console to initialize the package. 35 | ``` 36 | # import imp to load verum 37 | import imp 38 | # set verum location 39 | LOCATION = "~/Documents/Development/verum/" 40 | # import verum 41 | fp, pathname, description = imp.find_module("verum", [LOCATION]) 42 | Verum = imp.load_module("verum", fp, pathname, description) 43 | # Load plugins. NOTE: if your directory is wrong, you won't receive an error but will see no individual plugins listed as successfully configured. 44 | verum = Verum.app("~/Documents/Development/verum/plugins", "~/Documents/Development/verum/minions") 45 | # display loaded plugins directly using yapsy 46 | for plugin in verum.plugins.getAllPlugins(): 47 | print plugin.name 48 | ``` 49 | 50 | Define some data to enrich: 51 | ``` 52 | ips = ['98.124.199.1', 53 | '178.62.219.229', 54 | '98.124.198.1', 55 | '209.216.10.148', 56 | '124.248.237.26', 57 | '134.170.185.211', 58 | '223.29.248.252', 59 | '117.18.73.98'] 60 | domains = ['81.java-se.com', 61 | 'stifie.com', 62 | 'microsoftor.com', 63 | 'pop1.java-sec.com', 64 | '*.mynethood.com', 65 | 'www.btipnow.com', 66 | '*.searchenginewatch.us.com', 67 | 'google3853ed273b89687a.mynethood.com', 68 | 'pop.java-sec.com', 69 | 'm-stone.co.jp', 70 | 'www.mynethood.com', 71 | 'jre76.java-sec.com', 72 | 'cdn.foxitsoftwares.com', 73 | 'u.java-se.com', 74 | 'bloger2.microsoftor.com', 75 | 'kai.jztok.com', 76 | 'ns1.searchenginewatch.us.com', 77 | '*.microsoftor.com', 78 | 's3m7ke.microsoftor.com', 79 | 'mynethood.com', 80 | 's3m7ker.microsoftor.com', 81 | 'officesoft.microsoftor.com', 82 | 'foxitsoftwares.com'] 83 | ips2 = ['107.160.143.10', 84 | '107.167.73.219', 85 | '148.163.104.35', 86 | '148.163.104.35', 87 | '184.164.70.204', 88 | '184.164.81.11', 89 | '216.244.93.247', 90 | '50.117.38.170', 91 | '50.117.38.170'] 92 | domains2 = ['4uexs.rxlijd.bbs.mythem.es', 93 | 'abdebassetbenhassen.org', 94 | 'acid.borec.cz', 95 | 'blogs.burlingtonfreepress.com', 96 | 'buysacramentoproperties.com', 97 | 'cancunluxurystyle.com', 98 | 'cate-rina.net', 99 | 'cdn.servehttp.com', 100 | 'chuamun.com', 101 | 'dayapramana.com', 102 | 'dirtychook.com', 103 | 'f1wot.bbs.mythem.es', 104 | 'fybic.com', 105 | 'gotoe3.tw', 106 | 'haft-honar.com', 107 | 'ichener-duwackstumbe.de', 108 | 'iotqduzgha.vtre.qvofj.qypvthu.loqu.forum.mythem.es', 109 | 'jigsore.nasky.net', 110 | 'kitsoft.ru', 111 | 'lytovp.istmein.de', 112 | 'meeting-rsvp.com', 113 | 'mignonfilet.com', 114 | 'myinfo.any-request-allowed.com', 115 | 'oceanspirit.com', 116 | 'opm-learning.org', 117 | 'opmsecurity.org', 118 | 'pejoratively.bloq.ro', 119 | 'subhashmadhu.com', 120 | 'tlvegan.com', 121 | 'tommyhumphreys.com', 122 | 'transcandence.com', 123 | 'travelingmu.com', 124 | 'tsv-albertshofen.net', 125 | 'universofoot.com.br', 126 | 'WDC-News-post.com', 127 | 'wdc-news-post.com', 128 | 'woodcreations.com.pk', 129 | 'xn--80aa4agmizb8a.xn--p1ai', 130 | 'yodotink.rjtp.nxrlecd.tcsq.qypvthu.loqu.forum.mythem.es'] 131 | ``` 132 | 133 | Run the following to test enrichment. 134 | ``` 135 | # Query IP & domain plugins 136 | print verum.get_enrichments(['ip']) 137 | print verum.get_enrichments(['domain']) 138 | # Query cheap IP plugins 139 | print verum.get_enrichments(['ip'], cost=3) 140 | # Query fast domain plugins 141 | print verum.get_enrichments(['domain'], speed=2) 142 | # Run maxmind enrichments of an IP 143 | import networkx as nx 144 | g = verum.run_enrichments(ips[0], 'ip', names=[u'Maxmind ASN Enrichment']) 145 | print nx.info(g) 146 | ``` 147 | 148 | Run the following to test querying. (Note: the storage interface modules expect graphs to be in a specific schema. If they are not, the interface module will error trying to store them.) 149 | ``` 150 | # (If you didn't create a graph above through an enrichment) 151 | g = Verum.create_topic({'ip': ['184.164.70.204', '184.164.81.11'], 'domain': ['WDC-News-post.com', 'wdc-news-post.com']}) 152 | ``` 153 | 154 | ``` 155 | # See what storage interfaces are configured 156 | print verum.get_interfaces(configured=True) 157 | # Set the storage interface 158 | verum.set_interface('Neo4j') 159 | # Store the graph in the storage interface 160 | verum.store_graph(g) 161 | ``` 162 | 163 | Finally, Attempt to enrich multiple pieces of data to form a robust context graph: 164 | ``` 165 | # Enrich IPs 166 | for ip in ips + ips2: 167 | verum.store_graph(verum.run_enrichments(ip, 'ip', names=[u'Maxmind ASN Enrichment'])) 168 | # Enrich Domains (passing exceptions so if a plugin fails it doesn't stop the loop) 169 | for domain in domains + domains2: 170 | try: 171 | verum.store_graph(verum.run_enrichments(domain, 'domain', names=[u'DNS Enrichment', u'TLD Enrichment'])) 172 | except: 173 | pass 174 | # Bulk enrich IPs with Cymru 175 | verum.store_graph(verum.run_enrichments(ips + ips2, 'ip', names=[u'Cymru Enrichment'])) 176 | ``` 177 | 178 | Now open `http://locahost:7474/` in a browser and enter the Cypher Query: 179 | ``` 180 | MATCH (n:attribute {key:'ip', value:"98.124.198.1"}) 181 | RETURN n; 182 | ``` 183 | You can then visually explore the graph associated with that IP. 184 | 185 | We want to classify all these domains and IPs as malicious: 186 | ``` 187 | # Classify all IPs and Domains as Malicious 188 | for ip in ips + ips2: 189 | verum.store_graph(verum.classify.run({'key': 'ip', 'value': ip, 'classification': 'malice'})) 190 | for domain in domains + domains2: 191 | verum.store_graph(verum.classify.run({'key': 'domain', 'value': domain, 'classification': 'malice'})) 192 | ``` 193 | 194 | ### Querying 195 | 196 | Now that we have built an enriched context graph, we can query it. 197 | 198 | ``` 199 | #Find out if < '117.18.73.98',> is malicious 200 | # Create a topic to score 201 | topic = Verum.create_topic({"ip": '117.18.73.98'}) 202 | # Retrieve the subgraph associated with it 203 | sg = verum.run_query(topic) 204 | # List out configured scoring plugins available. 205 | verum.get_scoring_plugins() 206 | # Set the default scoring plugin 207 | verum.set_scoring_plugin('PageRank2') 208 | # Check to ensure it was set 209 | verum.get_default_scoring_plugin() 210 | scores = verum.score_subgraph(topic, sg) 211 | print scores 212 | ``` 213 | 214 | ### Scoring 215 | To understand the scores, we can do some relative comparisons. We compare the malice score both to the topic as well as to other nodes and see that the malice node is stronger than average but not overly strong. 216 | ``` 217 | # Compare the malice node to the average score 218 | Verum.compare_classifications(scores, {"class":"attribute", "key":"classification", "value":"malice"}, output="print") 219 | # Compare the malice node to the topic node 220 | Verum.compare_classifications(scores, {"class":"attribute", "key":"classification", "value":"malice"}, {"class":"attribute", "key":"ip", "value":"117.18.73.98"}, output="print") 221 | # Score the percentile of the malice score 222 | Verum.score_percentile(scores, {"class":"attribute", "key":"classification", "value":"malice"}, output="print") 223 | ``` 224 | 225 | Note, if you wanted to know about malice, you could rescore the subgraph with the malice node as the topic and compare the node you are interested in, (117.18.73.98 in our example), and compare it to the other nodes as above. 226 | 227 | 228 | ### Minions 229 | Minions are threaded algorithms that operate on the context graph in the background. They have access to the app object so hare more ability to work directly with the context graph than other apps. 230 | 231 | First, run the following cypher query to find out how many nodes are in your context graph. 232 | ``` 233 | start n=node(*) 234 | match n 235 | return count(n) 236 | ``` 237 | 238 | Start a simple minion which imports and enriches a threat intelligence feed. 239 | ``` 240 | # List configured minions 241 | verum.get_minions() 242 | # Start a minion 243 | verum.start_minions([u'OSINT Bambenek Consulting V2']) 244 | # Check if it's started 245 | verum.get_running_minions() 246 | ``` 247 | 248 | Check the number of nodes. It should be increasing. 249 | ``` 250 | start n=node(*) 251 | match n-[r]-() 252 | return count(distinct n), count(distinct r) 253 | ``` 254 | 255 | 256 | ## Contributing 257 | 1. Fork it! 258 | 2. Create your feature branch: `git checkout -b my-new-feature` 259 | 3. Commit your changes: `git commit -am 'Add some feature'` 260 | 4. Push to the branch: `git push origin my-new-feature` 261 | 5. Submit a pull request :D 262 | 263 | 264 | ## License 265 | 266 | Licensed to the Apache Software Foundation (ASF) under one 267 | or more contributor license agreements. See the NOTICE file 268 | distributed with this work for additional information 269 | regarding copyright ownership. The ASF licenses this file 270 | to you under the Apache License, Version 2.0 (the 271 | "License"); you may not use this file except in compliance 272 | with the License. You may obtain a copy of the License at 273 | http://www.apache.org/licenses/LICENSE-2.0 274 | Unless required by applicable law or agreed to in writing, 275 | software distributed under the License is distributed on an 276 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 277 | KIND, either express or implied. See the License for the 278 | specific language governing permissions and limitations 279 | under the License. -------------------------------------------------------------------------------- /plugins/tld.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | AUTHOR: Gabriel Bassett 4 | DATE: 12-17-2013 5 | DEPENDENCIES: a list of modules requiring installation 6 | Copyright 2014 Gabriel Bassett 7 | 8 | LICENSE: 9 | Licensed to the Apache Software Foundation (ASF) under one 10 | or more contributor license agreements. See the NOTICE file 11 | distributed with this work for additional information 12 | regarding copyright ownership. The ASF licenses this file 13 | to you under the Apache License, Version 2.0 (the 14 | "License"); you may not use this file except in compliance 15 | with the License. You may obtain a copy of the License at 16 | 17 | http://www.apache.org/licenses/LICENSE-2.0 18 | 19 | Unless required by applicable law or agreed to in writing, 20 | software distributed under the License is distributed on an 21 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 22 | KIND, either express or implied. See the License for the 23 | specific language governing permissions and limitations 24 | under the License. 25 | 26 | DESCRIPTION: 27 | Functions necessary to enrich the context graph 28 | 29 | """ 30 | # PRE-USER SETUP 31 | pass 32 | 33 | ########### NOT USER EDITABLE ABOVE THIS POINT ################# 34 | 35 | 36 | # USER VARIABLES 37 | TLD_CONFIG_FILE = "tld.yapsy-plugin" 38 | NAME = "TLD Enrichment" 39 | 40 | 41 | ########### NOT USER EDITABLE BELOW THIS POINT ################# 42 | 43 | 44 | ## IMPORTS 45 | from yapsy.IPlugin import IPlugin 46 | import logging 47 | import networkx as nx 48 | from datetime import datetime # timedelta imported above 49 | import dateutil # to parse variable time strings 50 | import uuid 51 | import ConfigParser 52 | import inspect 53 | try: 54 | import tldextract 55 | module_import_success = True 56 | except: 57 | module_import_success = False 58 | logging.error("Module import failed. Please install the following module: tldextract.") 59 | raise 60 | 61 | 62 | ## SETUP 63 | __author__ = "Gabriel Bassett" 64 | loc = inspect.getfile(inspect.currentframe()) 65 | ind = loc.rfind("/") 66 | loc = loc[:ind+1] 67 | config = ConfigParser.SafeConfigParser() 68 | config.readfp(open(loc + TLD_CONFIG_FILE)) 69 | 70 | if config.has_section('Core'): 71 | if 'name' in config.options('Core'): 72 | NAME = config.get('Core', 'name') 73 | 74 | ## EXECUTION 75 | class PluginOne(IPlugin): 76 | def __init__(self): 77 | pass 78 | 79 | def configure(self): 80 | """ 81 | 82 | :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)] 83 | """ 84 | config_options = config.options("Configuration") 85 | 86 | if 'cost' in config_options: 87 | cost = config.get('Configuration', 'cost') 88 | else: 89 | cost = 9999 90 | if 'speed' in config_options: 91 | speed = config.get('Configuration', 'speed') 92 | else: 93 | speed = 9999 94 | 95 | if 'type' in config_options: 96 | plugin_type = config.get('Configuration', 'Type') 97 | else: 98 | logging.error("'Type' not specified in config file.") 99 | return [None, False, NAME, "Takes a domain name and returns the top level domain, mid-domain, and sub-domain as networkx graph.", None, cost, speed] 100 | 101 | if 'inputs' in config_options: 102 | inputs = config.get('Configuration', 'Inputs') 103 | inputs = [l.strip().lower() for l in inputs.split(",")] 104 | else: 105 | logging.error("No input types specified in config file.") 106 | return [plugin_type, False, NAME, "Takes a domain name and returns the top level domain, mid-domain, and sub-domain as networkx graph.", None, cost, speed] 107 | 108 | if not module_import_success: 109 | logging.error("Module import failure caused configuration failure.") 110 | return [plugin_type, False, NAME, "Takes a domain name and returns the top level domain, mid-domain, and sub-domain as networkx graph.", inputs, cost, speed] 111 | else: 112 | return [plugin_type, True, NAME, "Takes a domain name and returns the top level domain, mid-domain, and sub-domain as networkx graph.", inputs, cost, speed] 113 | 114 | 115 | def run(self, domain, start_time="", include_subdomain=False): 116 | """ str, str -> networkx multiDiGraph 117 | 118 | :param domain: a string containing a domain to look up 119 | :param start_time: string in ISO 8601 combined date and time format (e.g. 2014-11-01T10:34Z) or datetime object. 120 | :param include_subdomain: Boolean value. Default False. If true, subdomain will be returned in enrichment graph 121 | :return: a networkx graph representing the sections of the domain 122 | """ 123 | # Parse the start_time 124 | if type(start_time) is str: 125 | try: 126 | time = dateutil.parser.parse(start_time).strftime("%Y-%m-%dT%H:%M:%SZ") 127 | except: 128 | time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") 129 | elif type(start_time) is datetime: 130 | time = start_time.strftime("%Y-%m-%dT%H:%M:%SZ") 131 | else: 132 | time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") 133 | 134 | ext = tldextract.extract(domain) 135 | 136 | g = nx.MultiDiGraph() 137 | 138 | # Get or create Domain node 139 | domain_uri = "class=attribute&key={0}&value={1}".format("domain", domain) 140 | g.add_node(domain_uri, { 141 | 'class': 'attribute', 142 | 'key': "domain", 143 | "value": domain, 144 | "start_time": time, 145 | "uri": domain_uri 146 | }) 147 | 148 | # Get or create Enrichment node 149 | tld_extract_uri = "class=attribute&key={0}&value={1}".format("enrichment", "tld_extract") 150 | g.add_node(tld_extract_uri, { 151 | 'class': 'attribute', 152 | 'key': "enrichment", 153 | "value": "tld_extract", 154 | "start_time": time, 155 | "uri": tld_extract_uri 156 | }) 157 | 158 | # Get or create TLD node 159 | tld_uri = "class=attribute&key={0}&value={1}".format("domain", ext.suffix) 160 | g.add_node(tld_uri, { 161 | 'class': 'attribute', 162 | 'key': "domain", 163 | "value": ext.suffix, 164 | "start_time": time, 165 | "uri": tld_uri 166 | }) 167 | 168 | # Link domain to tld 169 | edge_attr = { 170 | "relationship": "describedBy", 171 | "start_time": time, 172 | "origin": "tld_extract", 173 | "describedBy":"suffix" 174 | } 175 | source_hash = uuid.uuid3(uuid.NAMESPACE_URL, domain_uri) 176 | dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, tld_uri) 177 | edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash)) 178 | rel_chain = "relationship" 179 | while rel_chain in edge_attr: 180 | edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain]) 181 | rel_chain = edge_attr[rel_chain] 182 | if "origin" in edge_attr: 183 | edge_uri += "&{0}={1}".format("origin", edge_attr["origin"]) 184 | edge_attr["uri"] = edge_uri 185 | g.add_edge(domain_uri, tld_uri, edge_uri, edge_attr) 186 | 187 | 188 | # Get or create mid domain node 189 | mid_domain_uri = "class=attribute&key={0}&value={1}".format("domain", ext.domain) 190 | g.add_node(mid_domain_uri, { 191 | 'class': 'attribute', 192 | 'key': "domain", 193 | "value": ext.domain, 194 | "start_time": time, 195 | "uri": mid_domain_uri 196 | }) 197 | 198 | # Link domain to mid_domain 199 | edge_attr = { 200 | "relationship": "describedBy", 201 | "start_time": time, 202 | "origin": "tld_extract", 203 | "describedBy":"domain" 204 | } 205 | source_hash = uuid.uuid3(uuid.NAMESPACE_URL, domain_uri) 206 | dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, mid_domain_uri) 207 | edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash)) 208 | rel_chain = "relationship" 209 | while rel_chain in edge_attr: 210 | edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain]) 211 | rel_chain = edge_attr[rel_chain] 212 | if "origin" in edge_attr: 213 | edge_uri += "&{0}={1}".format("origin", edge_attr["origin"]) 214 | edge_attr["uri"] = edge_uri 215 | g.add_edge(domain_uri, mid_domain_uri, edge_uri, edge_attr) 216 | 217 | 218 | # if including subdomains, create subdomain and node 219 | if include_subdomain: 220 | # Get or create mid domain node 221 | subdomain_uri = "class=attribute&key={0}&value={1}".format("domain", ext.subdomain) 222 | g.add_node(subdomain_uri, { 223 | 'class': 'attribute', 224 | 'key': "domain", 225 | "value": ext.domain, 226 | "start_time": time, 227 | "uri": subdomain_uri 228 | }) 229 | 230 | # Link domain to mid_domain 231 | edge_attr = { 232 | "relationship": "describedBy", 233 | "start_time": time, 234 | "origin": "tld_extract", 235 | "describedBy":"subdomain" 236 | } 237 | source_hash = uuid.uuid3(uuid.NAMESPACE_URL, domain_uri) 238 | dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, subdomain_uri) 239 | edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash)) 240 | rel_chain = "relationship" 241 | while rel_chain in edge_attr: 242 | edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain]) 243 | rel_chain = edge_attr[rel_chain] 244 | if "origin" in edge_attr: 245 | edge_uri += "&{0}={1}".format("origin", edge_attr["origin"]) 246 | edge_attr["uri"] = edge_uri 247 | g.add_edge(domain_uri, subdomain_uri, edge_uri, edge_attr) 248 | 249 | # Link domain to enrichment 250 | edge_attr = { 251 | "relationship": "describedBy", 252 | "start_time": time, 253 | "origin": "tld_extract" 254 | } 255 | source_hash = uuid.uuid3(uuid.NAMESPACE_URL, domain_uri) 256 | dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, tld_extract_uri) 257 | edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash)) 258 | rel_chain = "relationship" 259 | while rel_chain in edge_attr: 260 | edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain]) 261 | rel_chain = edge_attr[rel_chain] 262 | if "origin" in edge_attr: 263 | edge_uri += "&{0}={1}".format("origin", edge_attr["origin"]) 264 | edge_attr["uri"] = edge_uri 265 | g.add_edge(domain_uri, tld_extract_uri, edge_uri, edge_attr) 266 | 267 | return g -------------------------------------------------------------------------------- /plugins/cymru.py: -------------------------------------------------------------------------------- 1 | # TODO: Refactor as plugin 2 | #!/usr/bin/env python 3 | """ 4 | AUTHOR: Gabriel Bassett 5 | DATE: 12-17-2013 6 | DEPENDENCIES: a list of modules requiring installation 7 | Copyright 2014 Gabriel Bassett 8 | 9 | LICENSE: 10 | Licensed to the Apache Software Foundation (ASF) under one 11 | or more contributor license agreements. See the NOTICE file 12 | distributed with this work for additional information 13 | regarding copyright ownership. The ASF licenses this file 14 | to you under the Apache License, Version 2.0 (the 15 | "License"); you may not use this file except in compliance 16 | with the License. You may obtain a copy of the License at 17 | 18 | http://www.apache.org/licenses/LICENSE-2.0 19 | 20 | Unless required by applicable law or agreed to in writing, 21 | software distributed under the License is distributed on an 22 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23 | KIND, either express or implied. See the License for the 24 | specific language governing permissions and limitations 25 | under the License. 26 | 27 | DESCRIPTION: 28 | Functions necessary to enrich the context graph 29 | 30 | """ 31 | # PRE-USER SETUP 32 | from datetime import timedelta 33 | 34 | ########### NOT USER EDITABLE ABOVE THIS POINT ################# 35 | 36 | 37 | # USER VARIABLES 38 | CYMRU_CONFIG_FILE = "cymru.yapsy-plugin" 39 | NAME = 'cymru' 40 | 41 | 42 | ########### NOT USER EDITABLE BELOW THIS POINT ################# 43 | 44 | 45 | 46 | ## IMPORTS 47 | import networkx as nx 48 | from yapsy.IPlugin import IPlugin 49 | import logging 50 | import ConfigParser 51 | from datetime import datetime # timedelta imported above 52 | import dateutil # to parse variable time strings 53 | import uuid 54 | import imp 55 | import ipaddress 56 | import inspect 57 | 58 | ## SETUP 59 | 60 | __author__ = "Gabriel Bassett" 61 | loc = inspect.getfile(inspect.currentframe()) 62 | i = loc.rfind("/") 63 | loc = loc[:i+1] 64 | config = ConfigParser.SafeConfigParser() 65 | config.readfp(open(loc + CYMRU_CONFIG_FILE)) 66 | 67 | if config.has_section('Core'): 68 | if 'name' in config.options('Core'): 69 | NAME= config.get('Core', 'name') 70 | if config.has_section('Configuration') and 'cymru_module' in config.options('Configuration'): 71 | cymru_file = config.get('Configuration', 'cymru_module') 72 | if cymru_file[0] != "/": 73 | cymru_file = loc + cymru_file 74 | i = cymru_file.rfind("/") 75 | cymru_dir = cymru_file[:i] 76 | cymru_module = cymru_file[i+1:].strip(".py") 77 | with open("/tmp/output", 'w') as f: 78 | f.write(cymru_dir + "\n") 79 | f.write(cymru_module + "\n") 80 | f.write(cymru_file + "\n") 81 | try: 82 | fp, pathname, description = imp.find_module(cymru_module, [cymru_dir]) 83 | cymru_api = imp.load_module(cymru_module, fp, pathname, description) 84 | module_import_success = True 85 | except: 86 | module_import_success = False 87 | raise 88 | else: 89 | module_import_success = False 90 | 91 | ## EXECUTION 92 | class PluginOne(IPlugin): 93 | def __init__(self): 94 | pass 95 | 96 | def configure(self): 97 | """ 98 | 99 | :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)] 100 | """ 101 | config_options = config.options("Configuration") 102 | 103 | if 'cost' in config_options: 104 | cost = config.get('Configuration', 'cost') 105 | else: 106 | cost = 9999 107 | if 'speed' in config_options: 108 | speed = config.get('Configuration', 'speed') 109 | else: 110 | speed = 9999 111 | 112 | if 'type' in config_options: 113 | plugin_type = config.get('Configuration', 'type') 114 | else: 115 | logging.error("'Type' not specified in config file.") 116 | return [None, False, NAME, "Takes a list of IPs and returns ASN and BGP information as networkx graph of the information.", None, cost, speed] 117 | 118 | if 'inputs' in config_options: 119 | inputs = config.get('Configuration', 'Inputs') 120 | inputs = [l.strip().lower() for l in inputs.split(",")] 121 | else: 122 | logging.error("No input types specified in config file.") 123 | return [plugin_type, False, NAME, "Takes a list of IPs and returns ASN and BGP information as networkx graph of the information.", None, cost, speed] 124 | 125 | if not module_import_success: 126 | logging.error("Module import failure caused configuration failure.") 127 | return [plugin_type, False, NAME, "Takes a list of IPs and returns ASN and BGP information as networkx graph of the information.", inputs, cost, speed] 128 | else: 129 | return [plugin_type, True, NAME, "Takes a list of IPs and returns ASN and BGP information as networkx graph of the information.", inputs, cost, speed] 130 | 131 | 132 | def run(self, ips, start_time = ""): 133 | """ str, str -> networkx multiDiGraph 134 | 135 | :param ips: list of IP addresses to enrich in the graph 136 | :param start_time: string in ISO 8601 combined date and time format (e.g. 2014-11-01T10:34Z) or datetime object. 137 | :return: subgraph 138 | 139 | Note: based on From https://gist.github.com/zakird/11196064 140 | """ 141 | 142 | # Parse the start_time 143 | if type(start_time) is str: 144 | try: 145 | time = dateutil.parser.parse(start_time).strftime("%Y-%m-%dT%H:%M:%SZ") 146 | except: 147 | time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") 148 | elif type(start_time) is datetime: 149 | time = start_time.strftime("%Y-%m-%dT%H:%M:%SZ") 150 | else: 151 | time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") 152 | 153 | 154 | # Since sometimes I just pass in an IP, we'll fix it here. 155 | if type(ips) == str: 156 | ips = [ips] 157 | 158 | # Validate IP 159 | for ip in ips: 160 | _ = ipaddress.ip_address(unicode(ip)) 161 | 162 | g = nx.MultiDiGraph() 163 | 164 | # Create cymru ASN enrichment node 165 | cymru_asn_uri = "class=attribute&key={0}&value={1}".format("enrichment", "cymru_asn_enrichment") 166 | attributes = { 167 | 'class': 'attribute', 168 | 'key': 'enrichment', 169 | "value": "cymru_asn_enrichment", 170 | 'uri': cymru_asn_uri, 171 | 'start_time': time 172 | } 173 | g.add_node(cymru_asn_uri, attributes) 174 | 175 | # print ips 176 | 177 | a = cymru_api.CymruIPtoASNService() 178 | 179 | for result in a.query(ips): 180 | try: 181 | t = dateutil.parser(result.allocated_at).strftime("%Y-%m-%dT%H:%M:%SZ") 182 | except: 183 | t = time 184 | # Create ip's node 185 | ip_uri = "class=attribute&key={0}&value={1}".format("ip", result.ip_address) 186 | g.add_node(ip_uri, { 187 | 'class': 'attribute', 188 | 'key': "ip", 189 | "value": result.ip_address, 190 | "start_time": time, 191 | "uri": ip_uri 192 | }) 193 | 194 | # link to cymru ASN enrichment 195 | edge_attr = { 196 | "relationship": "describedBy", 197 | "origin": "cymru_asn_enrichment", 198 | "start_time": time, 199 | } 200 | source_hash = uuid.uuid3(uuid.NAMESPACE_URL, ip_uri) 201 | dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, cymru_asn_uri) 202 | edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash)) 203 | rel_chain = "relationship" 204 | while rel_chain in edge_attr: 205 | edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain]) 206 | rel_chain = edge_attr[rel_chain] 207 | if "origin" in edge_attr: 208 | edge_uri += "&{0}={1}".format("origin", edge_attr["origin"]) 209 | edge_attr["uri"] = edge_uri 210 | g.add_edge(ip_uri, cymru_asn_uri, edge_uri, edge_attr) 211 | 212 | 213 | # Create bgp prefix node 214 | bgp_uri = "class=attribute&key={0}&value={1}".format("bgp", result.bgp_prefix) 215 | attributes = { 216 | 'class': 'attribute', 217 | 'key': 'bgp', 218 | 'value': result.bgp_prefix, 219 | 'uri': bgp_uri, 220 | 'start_time': time 221 | } 222 | g.add_node(bgp_uri, attributes) 223 | 224 | # Link bgp prefix node to ip 225 | edge_attr = { 226 | "relationship": "describedBy", 227 | "origin": "cymru_asn_enrichment", 228 | "start_time": time, 229 | } 230 | source_hash = uuid.uuid3(uuid.NAMESPACE_URL, ip_uri) 231 | dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, bgp_uri) 232 | edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash)) 233 | rel_chain = "relationship" 234 | while rel_chain in edge_attr: 235 | edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain]) 236 | rel_chain = edge_attr[rel_chain] 237 | if "origin" in edge_attr: 238 | edge_uri += "&{0}={1}".format("origin", edge_attr["origin"]) 239 | edge_attr["uri"] = edge_uri 240 | g.add_edge(ip_uri, bgp_uri, edge_uri, edge_attr) 241 | 242 | 243 | # create asn node 244 | asn_uri = "class=attribute&key={0}&value={1}".format("asn", result.as_number) 245 | attributes = { 246 | 'class': 'attribute', 247 | 'key': 'asn', 248 | 'value': result.as_number, 249 | 'uri': asn_uri, 250 | 'start_time': time 251 | } 252 | try: 253 | attributes['owner'] = result.as_name 254 | except: 255 | pass 256 | g.add_node(asn_uri, attributes) 257 | 258 | # link bgp prefix to asn node 259 | edge_attr = { 260 | "relationship": "describedBy", 261 | "origin": "cymru_asn_enrichment", 262 | "start_time": t, 263 | } 264 | source_hash = uuid.uuid3(uuid.NAMESPACE_URL, ip_uri) 265 | dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, asn_uri) 266 | edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash)) 267 | rel_chain = "relationship" 268 | while rel_chain in edge_attr: 269 | edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain]) 270 | rel_chain = edge_attr[rel_chain] 271 | if "origin" in edge_attr: 272 | edge_uri += "&{0}={1}".format("origin", edge_attr["origin"]) 273 | edge_attr["uri"] = edge_uri 274 | g.add_edge(ip_uri, asn_uri, edge_uri, edge_attr) 275 | 276 | 277 | # Return the data enriched IP as a graph 278 | return g 279 | -------------------------------------------------------------------------------- /examples/plugin_template.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __author__ = "Gabriel Bassett" 4 | """ 5 | AUTHOR: {0} 6 | DATE: 7 | DEPENDENCIES: 8 | Copyright {0} 9 | 10 | LICENSE: 11 | Licensed to the Apache Software Foundation (ASF) under one 12 | or more contributor license agreements. See the NOTICE file 13 | distributed with this work for additional information 14 | regarding copyright ownership. The ASF licenses this file 15 | to you under the Apache License, Version 2.0 (the 16 | "License"); you may not use this file except in compliance 17 | with the License. You may obtain a copy of the License at 18 | 19 | http://www.apache.org/licenses/LICENSE-2.0 20 | 21 | Unless required by applicable law or agreed to in writing, 22 | software distributed under the License is distributed on an 23 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 24 | KIND, either express or implied. See the License for the 25 | specific language governing permissions and limitations 26 | under the License. 27 | 28 | DESCRIPTION: 29 | 30 | 31 | """.format(__author__) 32 | # PRE-USER SETUP 33 | pass 34 | 35 | ########### NOT USER EDITABLE ABOVE THIS POINT ################# 36 | 37 | 38 | # USER VARIABLES 39 | PLUGIN_CONFIG_FILE = "plugin_template.yapsy-plugin" # CHANGEME 40 | NAME = "" # CHANGEME 41 | 42 | 43 | ########### NOT USER EDITABLE BELOW THIS POINT ################# 44 | 45 | 46 | ## IMPORTS 47 | from yapsy.IPlugin import IPlugin 48 | import logging 49 | import networkx as nx 50 | from datetime import datetime # timedelta imported above 51 | import uuid 52 | import ConfigParser 53 | import inspect 54 | import threading 55 | """ 56 | try: 57 | import 58 | module_import_success = True 59 | except: 60 | module_import_success = False 61 | logging.error("Module import failed. Please install the following module: .") 62 | """ 63 | 64 | ## SETUP 65 | loc = inspect.getfile(inspect.currentframe()) 66 | ind = loc.rfind("/") 67 | loc = loc[:ind+1] 68 | config = ConfigParser.SafeConfigParser() 69 | config.readfp(open(loc + PLUGIN_CONFIG_FILE)) 70 | 71 | if config.has_section('Core'): 72 | if 'name' in config.options('Core'): 73 | NAME = config.get('Core', 'name') 74 | if config.has_section('Log'): 75 | if 'level' in config.options('Log'): 76 | LOGLEVEL = config.get('Log', 'level') 77 | if 'file' in config.options('Log'): 78 | LOGFILE = config.get('Log', 'file') 79 | 80 | 81 | ## EXECUTION 82 | class PluginOne(IPlugin): 83 | inputs = None 84 | shutdown = False # Used to trigger shutdown of a minion 85 | 86 | # CHANGEME: The init should contain anything to load modules or data files that should be variables of the plugin object 87 | def __init__(self): 88 | pass 89 | 90 | # CHANGEME: Configuration needs to set the values needed to identify the plugin in the plugin database as well as ensure everyhing loaded correctly 91 | # CHANGEME: Current layout is for an enrichment plugin 92 | # CHANGEME: enrichment [type, successful_load, name, description, inputs to enrichment such as 'ip', cost, speed] 93 | # CHANGEME: interface [type, successful_load, name] 94 | # CHANGEME: score [type, successful_load, name, description, cost, speed] 95 | # CHANGEME: minion [type, successful_load, name, description, cost] 96 | def configure(self): 97 | """ 98 | 99 | :return: return list of configuration variables starting with [plugin_type, successful_load, name, description, ] 100 | """ 101 | config_options = config.options("Configuration") 102 | 103 | # Cost and speed are not applicable to all plugin types 104 | """ 105 | if 'cost' in config_options: 106 | cost = config.get('Configuration', 'cost') 107 | else: 108 | cost = 9999 109 | if 'speed' in config_options: 110 | speed = config.get('Configuration', 'speed') 111 | else: 112 | speed = 9999 113 | """ 114 | 115 | if config.has_section('Documentation') and 'description' in config.options('Documentation'): 116 | description = config.get('Configuration', 'type') 117 | else: 118 | logging.error("'Description not in config file.") 119 | return [None, False, NAME, None, cost, speed] 120 | 121 | if 'type' in config_options: 122 | plugin_type = config.get('Configuration', 'type') 123 | else: 124 | logging.error("'Type' not specified in config file.") 125 | return [None, False, NAME, description, None, cost, speed] 126 | 127 | # Inputs is only applicable to enrichment plugins 128 | """ 129 | if 'inputs' in config_options: 130 | self.inputs = config.get('Configuration', 'Inputs') 131 | self.inputs = [l.strip().lower() for l in self.inputs.split(",")] 132 | else: 133 | logging.error("No input types specified in config file.") 134 | return [plugin_type, False, NAME, description, None, cost, speed] 135 | """ 136 | 137 | # Module success is only applicable to plugins which import unique code 138 | """ 139 | if not module_import_success: 140 | logging.error("Module import failure caused configuration failure.") 141 | return [plugin_type, False, NAME, description, self.inputs, cost, speed] 142 | """ 143 | 144 | return [plugin_type, True, NAME, description, self.inputs, cost, speed] 145 | 146 | 147 | ############ GENERAL NOTES ############ 148 | # CHANGEME: All functions must implement a "configuration()" function 149 | # CHANGEME: The correct type of execution function must be defined for the type of plugin 150 | ############ GENERAL NOTES ############ 151 | 152 | 153 | # CHANGEME: enrichment: "run(, inputs, start_time, any other plugin-specific attributes-MUST HAVE DEFAULTS) 154 | # CHANGEME: Enrichment plugin specifics: 155 | # - Created nodes/edges must follow http://blog.infosecanalytics.com/2014/11/cyber-attack-graph-schema-cags-20.html 156 | # - The enrichment should include a node for the 157 | # - The enrichment should include a node for the enrichment which is is statically defined & key of "enrichment" 158 | # - An edge should exist from to the enrichment node, created at the end after enrichment 159 | # - Each enrichment datum should have a node 160 | # - An edge should exist from to each enrichment datum 161 | # - The run function should then return a networkx directed multi-graph including the nodes and edges 162 | def run(self, enrichment_target, inputs=None, start_time=""): 163 | """ 164 | 165 | :param enrichment_target: a string containing a target to enrich 166 | :return: a networkx graph representing the sections of the domain 167 | """ 168 | 169 | 170 | pass # TODO: Place enrichment in here 171 | 172 | return g 173 | 174 | 175 | # CHANGEME: interface: enrich(graph, any other plugin-specific attributes-MUST HAVE DEFAULTS) 176 | # CHANGEME: query(topic, max_depth, config, dont_follow, any other plugin-specific attributes-MUST HAVE DEFAULTS) 177 | # CHANGEME: Interface plugin specifics: 178 | # - In the most efficient way possible, merge nodes and edges into the storage medium 179 | # - Merger of nodes should be done based on matching key & value. 180 | # - URI should remain static for a given node. 181 | # - Start time should be updated to the sending graph 182 | # - Edges should be added w/o attempts to merge with edges in the storage back end 183 | # - When adding nodes it is highly recommended to keep a node-to-storage-id mapping with a key of the node 184 | # - URI. This will assist in bulk-adding the edges. 185 | # - Query specifics of interface plugins: 186 | # - In the most efficient way possible retrieve and return the merged subgraph (as a networkx graph) including all nodes and 187 | # - edges within the max_distance from any node in the topic graph from the storage backend graph. 188 | # - As a default, ['enrichment', 'classification'] should not be followed. 189 | # - The query function must add a 'topic_distance' property to all nodes. 190 | def enrich(self, g): 191 | """ 192 | 193 | :param g: networkx graph to be merged 194 | :return: Nonetype 195 | """ 196 | pass # TODO: Replace this with storage into a backend storage system 197 | 198 | 199 | # CHANGEME: score: score(subgraph, topic, any other plugin-specific attributes-MUST HAVE DEFAULTS) 200 | # CHANGEME: Score plugin specifics: 201 | # - Scoring plugins should take a topic and networkx (sub)graph and return a dictionary keyed with the node (name) and with 202 | # - values of the score assigned to the node for the given topic. 203 | def score(self, sg, topic): # get_bayesian_network_probability 204 | """ 205 | 206 | :param sg: egocentric subgraph around topic in networkx format 207 | :param topic: graph of topics 208 | :return: Dictionary of probabilities keyed by node 209 | """ 210 | scores = dict() 211 | 212 | pass # TODO: Replace with code to score the subgraph with respect to the topic 213 | 214 | return scores 215 | 216 | 217 | 218 | # CHANGEME: minion: minion() 219 | # CHANGEME: start() 220 | # CHANGEME: stop() 221 | # CHANGEME: isAlive() 222 | # CHANGEME: Minion plugin specifics: 223 | # - Minions fit exist in a separate directory to prevent them importing themselves when they import their own VERUM instance 224 | # - The minion configuration function must take an argument of the parent verum object. When not present, it shouldn't error but 225 | # - instead return with successful_load set to false and a logging.info message that the parent was not passed in. 226 | # - Must have 4 functions: minion(), start(), and stop() and isAlive() 227 | # - minion() is the function which will be threaded. **Make sure to call create the new verum instance WITHIN this function 228 | # - to avoid SQLite errors!** 229 | # - start() creates the thread object as an attribute of the plugin class and starts it 230 | # - stop() stops the thread. Preferably with both a normal exit by setting a shutdown variable of the plugin class as well as a 231 | # - force stop option which removes the thread object 232 | # - isAlive() calls the thread isAlive() function and returns the status 233 | def minion(self, *args, **xargs): 234 | self.shutdown = False 235 | 236 | pass # TODO: Write the function which will be threaded to form the minion 237 | 238 | def start(self, *args, **xargs): 239 | self.thread = threading.Thread(target=self.minion, *args, **xargs) 240 | self.thread.start() 241 | 242 | def isAlive(self): 243 | if self.thread is None: 244 | return False 245 | else: 246 | return self.thread.isAlive() 247 | 248 | def stop(self, force=True): 249 | if force: 250 | self.thread = None # zero out thread 251 | else: 252 | self.shutdown = False # just dont' iterate. May take up to (SLEEP_TIME) hours 253 | 254 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /plugins/bayes_net.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __author__ = "Gabriel Bassett" 4 | """ 5 | AUTHOR: {0} 6 | DATE: 7 | DEPENDENCIES: 8 | Copyright {0} 9 | 10 | LICENSE: 11 | Licensed to the Apache Software Foundation (ASF) under one 12 | or more contributor license agreements. See the NOTICE file 13 | distributed with this work for additional information 14 | regarding copyright ownership. The ASF licenses this file 15 | to you under the Apache License, Version 2.0 (the 16 | "License"); you may not use this file except in compliance 17 | with the License. You may obtain a copy of the License at 18 | 19 | http://www.apache.org/licenses/LICENSE-2.0 20 | 21 | Unless required by applicable law or agreed to in writing, 22 | software distributed under the License is distributed on an 23 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 24 | KIND, either express or implied. See the License for the 25 | specific language governing permissions and limitations 26 | under the License. 27 | 28 | DESCRIPTION: 29 | 30 | 31 | """.format(__author__) 32 | # PRE-USER SETUP 33 | pass 34 | 35 | ########### NOT USER EDITABLE ABOVE THIS POINT ################# 36 | 37 | 38 | # USER VARIABLES 39 | PLUGIN_CONFIG_FILE = "bayes_net.yapsy-plugin" 40 | NAME = "BayesNet" 41 | 42 | 43 | ########### NOT USER EDITABLE BELOW THIS POINT ################# 44 | 45 | 46 | ## IMPORTS 47 | from yapsy.IPlugin import IPlugin 48 | import logging 49 | import networkx as nx 50 | from datetime import datetime # timedelta imported above 51 | import uuid 52 | import ConfigParser 53 | import inspect 54 | from collections import defaultdict 55 | import random 56 | import numpy as np 57 | 58 | 59 | ## SETUP 60 | loc = inspect.getfile(inspect.currentframe()) 61 | ind = loc.rfind("/") 62 | loc = loc[:ind+1] 63 | config = ConfigParser.SafeConfigParser() 64 | config.readfp(open(loc + PLUGIN_CONFIG_FILE)) 65 | 66 | if config.has_section('Core'): 67 | if 'name' in config.options('Core'): 68 | NAME = config.get('Core', 'name') 69 | if config.has_section('Log'): 70 | if 'level' in config.options('Log'): 71 | LOGLEVEL = config.get('Log', 'level') 72 | if 'file' in config.options('Log'): 73 | LOGFILE = config.get('Log', 'file') 74 | 75 | 76 | ## EXECUTION 77 | class PluginOne(IPlugin): 78 | # TODO: The init should contain anything to load modules or data files that should be variables of the plugin object 79 | def __init__(self): 80 | pass 81 | 82 | # TODO: Configuration needs to set the values needed to identify the plugin in the plugin database as well as ensure everyhing loaded correctly 83 | # TODO: Current layout is for an enrichment plugin 84 | # TODO: enrichment [type, successful_load, name, description, inputs to enrichment such as 'ip', cost, speed] 85 | # TODO: interface [type, successful_load, name] 86 | # TODO: query [TBD] 87 | # TODO: minion [TBD] 88 | def configure(self): 89 | """ 90 | 91 | :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)] 92 | """ 93 | config_options = config.options("Configuration") 94 | 95 | if 'cost' in config_options: 96 | cost = config.get('Configuration', 'cost') 97 | else: 98 | cost = 9999 99 | if 'speed' in config_options: 100 | speed = config.get('Configuration', 'speed') 101 | else: 102 | speed = 9999 103 | 104 | if config.has_section('Documentation') and 'description' in config.options('Documentation'): 105 | description = config.get('Documentation', 'description') 106 | else: 107 | logging.error("'Description not in config file.") 108 | return [None, False, NAME, None, cost, speed] 109 | 110 | if 'type' in config_options: 111 | plugin_type = config.get('Configuration', 'type') 112 | else: 113 | logging.error("'Type' not specified in config file.") 114 | return [None, False, NAME, description, cost, speed] 115 | 116 | return [plugin_type, True, NAME, description, cost, speed] 117 | 118 | 119 | def score(self, sg, topic): # get_bayesian_network_probability 120 | """ 121 | 122 | :param sg: egocentric subgraph around topic in networkx format 123 | :param topic: graph of topics 124 | :return: Dictionary of probabilities keyed by node 125 | 126 | NOTE: Will error on cycles in graph 127 | """ 128 | # Calculate the probability of each node given the topic nodes 129 | # TODO: Capture the context of relationships as well 130 | # TODO: Handle loops more elegantly than failing 131 | # TODO: handle the markov blanket 132 | 133 | # setup 134 | confidences = nx.get_edge_attributes(sg, 'confidence') 135 | probabilities = defaultdict(lambda: 0) 136 | queue = list() 137 | complete_history = random.sample(xrange(10000), 1000) 138 | complete = set() 139 | 140 | for node in topic.nodes(): 141 | probabilities[node] = 1 # The topic nodes are by definition true 142 | complete.add(node) # The topic nodes are by definition complete 143 | for node in sg.nodes(): 144 | for successor in sg.successors(node): 145 | queue.append(successor) 146 | print "Starting probability loop" 147 | while len(queue) > 0: 148 | temp = complete_history.pop(0) 149 | complete_history.append(len(complete)) 150 | if len(set(complete_history)) < 2: 151 | print "Error, nothing completed in 1000 rounds." 152 | print "Queue length is {0} with {1} unique values".format(len(queue), len(set(queue))) 153 | print "Complete is\n{0}".format(len(complete)) 154 | break 155 | node = queue.pop(0) 156 | if node not in complete: # Only 157 | ready_to_calculate = True 158 | for predecessor in sg.predecessors(node): 159 | if predecessor not in complete: 160 | queue.append(predecessor) # if the node is not complete, enqueue it 161 | ready_to_calculate = False # before we can complete a node, it's predecessors must be complete 162 | if ready_to_calculate: 163 | try: 164 | # INSERT WEIGHTING FUNCTION BELOW 165 | cpt = np.array(self.normal_weight(sg.node[node]['topic_distance'])) 166 | except Exception as e: 167 | print "Node: {0}, Attributes: {1}".format(node, sg.node[node]) 168 | raise e 169 | for predecessor in sg.predecessors(node): 170 | # If an edge has a confidence, we use it. Otherwise we assume 100% 171 | if (predecessor, node) in confidences: 172 | 173 | confidence = confidences[(predecessor, node)] 174 | else: 175 | confidence = 1 176 | # Calculate the probability based on the bayesian network 177 | # Reference: http://cs.nyu.edu/faculty/davise/ai/bayesnet.html 178 | # Reference: http://en.wikipedia.org/wiki/Bayes'_theorem 179 | # Reference: http://en.wikipedia.org/wiki/Bayesian_network 180 | for i in range(2**len(sg.predecessors(node))): 181 | # double the rows 182 | cpt = np.vstack((cpt, cpt)) 183 | # create a list that is first half the compliment of the probability and second half the probability 184 | new_col = [] 185 | for j in range(cpt.shape[0]): 186 | if j < cpt.shape[0] / float(2): 187 | new_col.append(1 - (confidence * probabilities[predecessor])) 188 | else: 189 | new_col.append(confidence * probabilities[predecessor]) 190 | # Add that column to the CPT 191 | cpt = np.column_stack((cpt, new_col)) 192 | 193 | # Remove first (all false) row as it should not be summed into the probability 194 | # This is in leu of making the prior probability zero for that row 195 | cpt = np.delete(cpt, (0), axis=0) 196 | 197 | # sum the product of each column to get the node probability 198 | probabilities[node] = cpt.prod(axis=1).sum() 199 | queue = queue + sg.successors(node) # queue successors to the node 200 | complete.add(node) # add the node as completed 201 | 202 | else: # It's not ready to be completed 203 | queue.append(node) # requeue the node after it's predecessors 204 | 205 | return probabilities 206 | 207 | 208 | def multigraph_to_digraph(self, g): 209 | """ 210 | 211 | :param g: takes a networkx mulitgraph 212 | :return: returns a networkx digraph with edge weights representing the number of edges 213 | 214 | NOTE: This butchers duplicate edge properties. If converting to score, use original edges in output. 215 | """ 216 | G = nx.DiGraph() 217 | edge_attributes = {} 218 | 219 | # if g isn't really a multigraph, just return it 220 | if not g.is_multigraph(): 221 | return g 222 | 223 | # collapse down to a diagraph 224 | G.add_nodes_from(g.nodes(data=True)) 225 | G.add_edges_from(g.edges(data=True)) 226 | 227 | # for each edge, weight the confidence by the number of edges 228 | ''' 229 | # captures a multiple of the confidence on the edge in the output graph 230 | for edge in G.edges(): 231 | count = g.edges().count(edge) 232 | if "count" > 1: 233 | if "confidence" in G.edge[edge[0]][edge[1]]: 234 | G.edge[edge[0]][edge[1]]['confidence'] *= count 235 | else: 236 | G.edge[edge[0]][edge[1]]["confidence"] = count 237 | ''' 238 | # Captures every confidence 239 | for edge in G.edges(): 240 | confidence = 0 241 | for src_edge in g.edge[edge[0]][edge[1]].values(): 242 | confidence += src_edge.get('confidence', 1) 243 | G.edge[edge[0]][edge[1]]['confidence'] = confidence 244 | # # collapse down to a diagraph 245 | # G.add_nodes_from(g.nodes(data=True)) 246 | # G.add_edges_from(g.edges(data=True)) 247 | 248 | return G 249 | 250 | 251 | ### DISTANCE WEIGHTS ### 252 | def linear_weight(self, distance, ddp=.2): 253 | """ 254 | 255 | :param distance: distance from topic 256 | :param ddp: percentage to degrade 257 | :return: Linear weighting factor as float 258 | """ 259 | return 1 - (distance * ddp) 260 | 261 | 262 | def log_weight(self, distance, a=1, b=1, n=3, pwr=1): 263 | """ 264 | 265 | :param distance: distance: distance from topic 266 | :param a: constant to shape graph. Adjusts hight at 0 = a / (1 + b) 267 | :param b: constant to shape graph. 268 | :param n: constant to shape graph. 269 | :param pwr: constant to shape graph. 270 | :return: log weighting factor as float 271 | """ 272 | return a / (1 + b*np.exp((distance-n) * pwr)) 273 | 274 | 275 | def exponential_weight(self, distance, b=2): 276 | return np.exp(-distance/b) 277 | 278 | 279 | def normal_weight(self, distance, pwr=2, a=1.1, b=10, c=1): 280 | """ 281 | 282 | :param distance: distance from topic 283 | :param pwr: constant to shape graph. Higher = steeper decline 284 | :param b: constant to shape graph. lower = greater spread 285 | :return: normal weighting factor as float 286 | pwr = 2.5, a = 1, c = 0, b = 30 287 | """ 288 | return a * np.exp(-(distance + c)**pwr/b) -------------------------------------------------------------------------------- /minions/alexa_1M.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __author__ = "Gabriel Bassett" 4 | """ 5 | AUTHOR: {0} 6 | DATE: 7 | DEPENDENCIES: 8 | Copyright {0} 9 | 10 | LICENSE: 11 | Licensed to the Apache Software Foundation (ASF) under one 12 | or more contributor license agreements. See the NOTICE file 13 | distributed with this work for additional information 14 | regarding copyright ownership. The ASF licenses this file 15 | to you under the Apache License, Version 2.0 (the 16 | "License"); you may not use this file except in compliance 17 | with the License. You may obtain a copy of the License at 18 | 19 | http://www.apache.org/licenses/LICENSE-2.0 20 | 21 | Unless required by applicable law or agreed to in writing, 22 | software distributed under the License is distributed on an 23 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 24 | KIND, either express or implied. See the License for the 25 | specific language governing permissions and limitations 26 | under the License. 27 | 28 | DESCRIPTION: 29 | 30 | 31 | """.format(__author__) 32 | # PRE-USER SETUP 33 | from datetime import timedelta 34 | 35 | ########### NOT USER EDITABLE ABOVE THIS POINT ################# 36 | 37 | 38 | # USER VARIABLES 39 | PLUGIN_CONFIG_FILE = "alexa_1M.yapsy-plugin" 40 | NAME = "Alexa Top 1M" 41 | FEED = "http://s3.amazonaws.com/alexa-static/top-1m.csv.zip" 42 | SLEEP_TIME = 14400 # 4 hours in seconds 43 | REFRESH_TIME = timedelta(days=7) 44 | 45 | ########### NOT USER EDITABLE BELOW THIS POINT ################# 46 | 47 | 48 | ## IMPORTS 49 | from yapsy.IPlugin import IPlugin 50 | import logging 51 | import networkx as nx 52 | from datetime import datetime # timedelta imported above 53 | import dateutil 54 | import uuid 55 | import ConfigParser 56 | import inspect 57 | import requests # for downloading the intel list 58 | import ipaddress # for validating ip addresses 59 | import time # for sleep 60 | import threading # import threading so minion doesn't block the app 61 | import imp # Importing imp to import verum 62 | import copy 63 | import tldextract # used for validating domains 64 | import zipfile # for unzipping alexis 1m file 65 | from StringIO import StringIO # for opening alexis 1m file in memory 66 | 67 | 68 | ## SETUP 69 | loc = inspect.getfile(inspect.currentframe()) 70 | ind = loc.rfind("/") 71 | loc = loc[:ind+1] 72 | config = ConfigParser.SafeConfigParser() 73 | config.readfp(open(loc + PLUGIN_CONFIG_FILE)) 74 | 75 | if config.has_section('Core'): 76 | if 'name' in config.options('Core'): 77 | NAME = config.get('Core', 'name') 78 | if config.has_section('Log'): 79 | if 'level' in config.options('Log'): 80 | LOGLEVEL = config.get('Log', 'level') 81 | if 'file' in config.options('Log'): 82 | LOGFILE = config.get('Log', 'file') 83 | else: 84 | LOGFILE = None 85 | 86 | if LOGFILE: 87 | logging.basicConfig(filename=LOGFILE, level=LOGLEVEL) 88 | else: 89 | logging.basicConfig(level=LOGLEVEL) 90 | 91 | ## EXECUTION 92 | class PluginOne(IPlugin): 93 | thread = None 94 | app = None # The object instance 95 | Verum = None # the module 96 | today = datetime.strptime("1970", "%Y") # Today's date 97 | shutdown = False # Used to trigger shutdown of the minion 98 | parent = None # The parent instance of the verum app object 99 | 100 | # CHANGEME: The init should contain anything to load modules or data files that should be variables of the plugin object 101 | def __init__(self): 102 | """ 103 | 104 | """ 105 | pass 106 | 107 | # CHANGEME: Configuration needs to set the values needed to identify the plugin in the plugin database as well as ensure everyhing loaded correctly 108 | # CHANGEME: Current layout is for an enrichment plugin 109 | # CHANGEME: enrichment [type, successful_load, name, description, inputs to enrichment such as 'ip', cost, speed] 110 | # CHANGEME: interface [type, successful_load, name] 111 | # CHANGEME: score [type, successful_load, name, description, cost, speed] 112 | # CHANGEME: minion [TBD] 113 | def configure(self, parent=None): 114 | """ 115 | 116 | :param verum: The directory of the verum module 117 | :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)] 118 | """ 119 | global FEED 120 | 121 | config_options = config.options("Configuration") 122 | 123 | if 'cost' in config_options: 124 | cost = config.get('Configuration', 'cost') 125 | else: 126 | cost = 9999 127 | 128 | if config.has_section('Documentation') and 'description' in config.options('Documentation'): 129 | description = config.get('Documentation', 'description') 130 | else: 131 | logging.error("'Description not in config file.") 132 | return [None, False, NAME, None, cost] 133 | 134 | if 'type' in config_options: 135 | plugin_type = config.get('Configuration', 'type') 136 | else: 137 | logging.error("'Type' not specified in config file.") 138 | return [None, False, NAME, description, cost] 139 | 140 | # Module import success 141 | if parent is not None: 142 | self.parent = parent 143 | else: 144 | logging.info("Parent verum app instance not passed to minion. Please rerun, passing the parent object instance to successfully configure.") 145 | return [plugin_type, False, NAME, description, cost] 146 | 147 | if self.parent.loc is not None: 148 | # Import the app object so that acces app features (such as the storage backend) can be used. 149 | fp, pathname, mod_description = imp.find_module("verum", [self.parent.loc]) 150 | self.Verum = imp.load_module("verum", fp, pathname, mod_description) 151 | else: 152 | logging.error("'verum' location not supplied to minion configuration function. Rerun with the location of the verum module specified.") 153 | return [plugin_type, False, NAME, description, cost] 154 | 155 | if 'feed' in config_options: 156 | FEED = config.get('Configuration', 'feed') 157 | else: 158 | logging.error("'Feed' not specified in config file.") 159 | return [plugin_type, False, NAME, description, cost] 160 | 161 | # Return success 162 | return [plugin_type, True, NAME, description, cost] 163 | 164 | 165 | def minion(self, storage=None, *args, **xargs): 166 | self.app = self.Verum.app(self.parent.PluginFolder, None) 167 | # set storage 168 | if storage is None: 169 | storage = self.parent.storage 170 | self.app.set_interface(storage) 171 | 172 | # Check until stopped 173 | while not self.shutdown: 174 | # Check to see if it's the same day, if it is, sleep for a while, otherwise run the import 175 | # delta = datetime.utcnow() - self.today 176 | # if delta.days <= 0: 177 | if datatime.utcnow() <= self.today + REFRESH_TIME: 178 | time.sleep(SLEEP_TIME) 179 | else: 180 | logging.info("Starting daily {0} enrichment.".format(NAME)) 181 | 182 | # Create list of IPs for cymru enrichment 183 | ips = set() 184 | 185 | # Get the file 186 | r = requests.get(FEED) 187 | 188 | # Unzip the file 189 | z = zipfile.ZipFile(StringIO(r.content)) 190 | 191 | # get the time 192 | dt = datetime.utcnow() 193 | 194 | with z.open('top-1m.csv') as f: 195 | for line in f: 196 | try: 197 | line = line.strip().split(",") 198 | 199 | # Validate data in row 200 | ext = tldextract.extract(line[1]) 201 | if not ext.domain or not ext.suffix: 202 | # domain is not legitimate 203 | next 204 | 205 | # classify benign and merge with current graph 206 | g = self.app.classify.run({'key': 'domain', 'value': line[1], 'classification': 'benign'}, confidence=1 - (int(line[0])-1)/float(1000000)) 207 | 208 | # enrich depending on type 209 | try: 210 | g = self.Verum.merge_graphs(g, self.app.run_enrichments(line[1], "domain", names=['TLD Enrichment'])) 211 | g = self.Verum.merge_graphs(g, self.app.run_enrichments(line[1], "domain", names=['DNS Enrichment'])) 212 | g = self.Verum.merge_graphs(g, self.app.run_enrichments(line[1], "domain", names=['IP Whois Enrichment'])) 213 | except Exception as e: 214 | logging.info("Enrichment of {0} failed due to {1}.".format(line[1], e)) 215 | #print "Enrichment of {0} failed due to {1}.".format(domain, e) # DEBUG 216 | #raise 217 | pass 218 | 219 | # Collect IPs 220 | line_ips = set() 221 | for node, data in g.nodes(data=True): 222 | if data['key'] == 'ip': 223 | line_ips.add(data['value']) 224 | 225 | for ip in line_ips: 226 | try: 227 | g = self.Verum.merge_graphs(g, self.app.run_enrichments(ip, "ip", names=[u'Maxmind ASN Enrichment'])) 228 | except Exception as e: 229 | logging.info("Enrichment of {0} failed due to {1}.".format(ip, e)) 230 | pass 231 | 232 | try: 233 | self.app.store_graph(self.Verum.remove_non_ascii_from_graph(g)) 234 | except: 235 | print g.nodes(data=True) # DEBUG 236 | print g.edges(data=True) # DEBUG 237 | raise 238 | 239 | ips = ips.union(line_ips) 240 | # Do cymru enrichment 241 | if len(ips) >= 50: 242 | # validate IPs 243 | ips2 = set() 244 | for ip in ips: 245 | try: 246 | _ = ipaddress.ip_address(unicode(ip)) 247 | ips2.add(ip) 248 | except: 249 | pass 250 | ips = ips2 251 | del(ips2) 252 | try: 253 | self.app.store_graph(self.app.run_enrichments(ips, 'ip', names=[u'Cymru Enrichment'])) 254 | #print "Cymru enrichment complete." 255 | except Exception as e: 256 | logging.info("Cymru enrichment of {0} IPs failed due to {1}.".format(len(ips), e)) 257 | #print "Cymru enrichment of {0} IPs failed due to {1}.".format(len(ips), e) # DEBUG 258 | pass 259 | ips = set() 260 | 261 | except Exception as e: 262 | print line 263 | print e 264 | raise 265 | 266 | # Copy today's date to today 267 | self.today = datetime.utcnow() 268 | 269 | logging.info("Daily {0} enrichment complete.".format(NAME)) 270 | print "Daily {0} enrichment complete.".format(NAME) # DEBUG 271 | 272 | def start(self, *args, **xargs): 273 | self.shutdown = False 274 | self.thread = threading.Thread(target=self.minion, *args, **xargs) 275 | self.thread.start() 276 | 277 | def isAlive(self): 278 | if self.thread is None: 279 | return False 280 | else: 281 | return self.thread.isAlive() 282 | 283 | 284 | def stop(self, force=True): 285 | if force: 286 | self.thread = None # zero out thread 287 | else: 288 | self.shutdown = False # just dont' iterate. May take up to (SLEEP_TIME) hours 289 | -------------------------------------------------------------------------------- /verum/helper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | AUTHOR: Gabriel Bassett 4 | DATE: <01-23-2015> 5 | DEPENDENCIES: 6 | Copyright 2015 Gabriel Bassett 7 | 8 | LICENSE: 9 | Licensed to the Apache Software Foundation (ASF) under one 10 | or more contributor license agreements. See the NOTICE file 11 | distributed with this work for additional information 12 | regarding copyright ownership. The ASF licenses this file 13 | to you under the Apache License, Version 2.0 (the 14 | "License"); you may not use this file except in compliance 15 | with the License. You may obtain a copy of the License at 16 | 17 | http://www.apache.org/licenses/LICENSE-2.0 18 | 19 | Unless required by applicable law or agreed to in writing, 20 | software distributed under the License is distributed on an 21 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 22 | KIND, either express or implied. See the License for the 23 | specific language governing permissions and limitations 24 | under the License. 25 | 26 | DESCRIPTION: 27 | 28 | 29 | NOTES: 30 | 31 | 32 | ISSUES: 33 | 34 | 35 | TODO: 36 | 37 | 38 | """ 39 | # PRE-USER SETUP 40 | import logging 41 | 42 | ########### NOT USER EDITABLE ABOVE THIS POINT ################# 43 | 44 | 45 | # USER VARIABLES 46 | CONFIG_FILE = "" 47 | LOGLEVEL = logging.DEBUG 48 | LOG = None 49 | 50 | ########### NOT USER EDITABLE BELOW THIS POINT ################# 51 | 52 | 53 | ## IMPORTS 54 | 55 | import argparse 56 | import ConfigParser 57 | import networkx as nx 58 | import urlparse 59 | import numpy as np 60 | from scipy import stats # for percentile 61 | 62 | ## SETUP 63 | __author__ = "Gabriel Bassett" 64 | 65 | if __name__ == "__main__": 66 | # Parse Arguments (should correspond to user variables) 67 | parser = argparse.ArgumentParser(description='This script processes a graph.') 68 | parser.add_argument('-d', '--debug', 69 | help='Print lots of debugging statements', 70 | action="store_const", dest="loglevel", const=logging.DEBUG, 71 | default=LOGLEVEL 72 | ) 73 | parser.add_argument('-v', '--verbose', 74 | help='Be verbose', 75 | action="store_const", dest="loglevel", const=logging.INFO 76 | ) 77 | parser.add_argument('--log', help='Location of log file', default=LOG) 78 | args = parser.parse_args() 79 | 80 | # add config arguments 81 | if __name__ == "__main__": 82 | CONFIG_FILE = args.config 83 | try: 84 | config = ConfigParser.SafeConfigParser() 85 | config.readfp(open(CONFIG_FILE)) 86 | config_exists = True 87 | except: 88 | config_exists = False 89 | if config_exists: 90 | if config.has_section('LOGGING'): 91 | if 'level' in config.options('LOGGING'): 92 | level = config.get('LOGGING', 'level') 93 | if level == 'debug': 94 | loglevel = logging.DEBUG 95 | elif level == 'verbose': 96 | loglevel = logging.INFO 97 | else: 98 | loglevel = logging.WARNING 99 | else: 100 | loglevel = logging.WARNING 101 | if 'log' in config.options('LOGGING'): 102 | log = config.get('LOGGING', 'log') 103 | else: 104 | log = None 105 | 106 | 107 | ## Set up Logging 108 | if __name__ == "__main__": 109 | if args.log is not None: 110 | logging.basicConfig(filename=args.log, level=args.loglevel) 111 | else: 112 | logging.basicConfig(level=args.loglevel) 113 | # 114 | 115 | 116 | ## GLOBAL EXECUTION 117 | pass 118 | 119 | 120 | ## FUNCTION DEFINITION 121 | def create_topic(properties, prefix=""): 122 | """ 123 | 124 | :param properties: A dictionary of properties 125 | :param prefix: If nodes are stored with a pref 126 | :return: A topic graph in networkx format with one node per property 127 | 128 | NOTE: If multiple values of a certain type, (e.g. multiple IPs) make the value of the type 129 | in the dictionary a list. 130 | """ 131 | g = nx.DiGraph() 132 | 133 | if type(properties) == dict: 134 | iterator = properties.iteritems() 135 | else: 136 | iterator = iter(properties) 137 | 138 | 139 | for key, value in iterator: 140 | if type(value) in (list, set, np.ndarray): 141 | for v in value: 142 | node_uri = "{2}class=attribute&key={0}&value={1}".format(key, v, prefix) 143 | g.add_node(node_uri, { 144 | 'class': 'attribute', 145 | 'key': key, 146 | 'value': v, 147 | 'uri': node_uri 148 | }) 149 | else: 150 | node_uri = "{2}class=attribute&key={0}&value={1}".format(key, value, prefix) 151 | g.add_node(node_uri, { 152 | 'class': 'attribute', 153 | 'key': key, 154 | 'value': value, 155 | 'uri': node_uri 156 | }) 157 | 158 | return g 159 | 160 | 161 | def validate_uri(uri): 162 | """ 163 | 164 | :param uri: a URI string to be validated 165 | :return: bool true if valid, false if not 166 | """ 167 | # TODO: Validate the order properties are in (important for uri hash lookup) 168 | 169 | try: 170 | properties = urlparse.parse_qs(urlparse.urlparse(uri).query) 171 | except: 172 | return False 173 | if u'key' not in properties: 174 | return False 175 | elif len(properties[u'key']) != 1: 176 | return False 177 | if u'value' not in properties: 178 | return False 179 | elif len(properties[u'value']) != 1: 180 | return False 181 | if u'attribute' not in properties: 182 | return False 183 | elif len(properties[u'attribute']) != 1: 184 | return False 185 | # Nothing failed, return true 186 | return True 187 | 188 | 189 | def get_topic_distance(sg, topic): 190 | """ 191 | 192 | :param sg: an egocentric subgraph in networkx format 193 | :param topic: a networkx graph of nodes representing the topic 194 | :return: a dictionary of key node name and value distance as integer 195 | """ 196 | distances = dict() 197 | 198 | # get all the distances 199 | for tnode in topic.nodes(): 200 | if tnode in sg.nodes(): 201 | distances[tnode] = nx.shortest_path_length(sg, source=tnode) 202 | 203 | # get the smallest distance per key 204 | min_dist = dict() 205 | for key in distances: 206 | for node in distances[key]: 207 | if node not in min_dist: 208 | min_dist[node] = distances[key][node] 209 | elif distances[key][node] < min_dist[node]: 210 | min_dist[node] = distances[key][node] 211 | 212 | 213 | # Return the dict 214 | return min_dist 215 | 216 | 217 | def compare_classifications(scores, node1, node2=None, output="print"): 218 | """ 219 | 220 | :param scores: dictionary keyed by nodes and values of scores 221 | :param node1: dictionary of {"class":, "key":, "value":} 222 | :param node2: dictionary of {"class":, "key":, "value":}. If empty, score will be compared to the median 223 | :param output: string representing how to output the data. "print" to print it, dictionary otherwise 224 | :return: ratio of node 1 to node 2 scores normalized to the lower score as dictionary 225 | """ 226 | node1_uri = "class={0}&key={1}&value={2}".format(node1['class'], node1['key'], node1['value']) 227 | 228 | node1_score = scores[node1_uri] 229 | if node2 is None: 230 | node2_score = np.median(scores.values()) 231 | else: 232 | node2_uri = "class={0}&key={1}&value={2}".format(node2['class'], node2['key'], node2['value']) 233 | node2_score = scores[node2_uri] 234 | 235 | if node1_score > node2_score: 236 | larger = "node1" 237 | else: 238 | larger = "node2" 239 | 240 | if output == "print": 241 | if node2 is None: 242 | if larger == "node2": 243 | print "The ratio of node 1 ({0}:{1}) to the median ({2}) is {3}:{4}.".format(node1['key'], 244 | node1['value'], 245 | node2_score, 246 | round(node1_score/float(node1_score), 4), 247 | round(node2_score/float(node1_score), 4)) 248 | else: 249 | print "The ratio of node 1 ({0}:{1}) to the median ({2}) is {3}:{4}.".format(node1['key'], 250 | node1['value'], 251 | node2_score, 252 | round(node1_score/float(node2_score), 4), 253 | round(node2_score/float(node2_score), 4)) 254 | else: 255 | if larger == "node2": 256 | print "The ratio of node 1 ({0}:{1}) to node 2 ({2}:{3}) is {4}:{5}.".format(node1['key'], 257 | node1['value'], 258 | node2['key'], 259 | node2['value'], 260 | round(node1_score/float(node1_score), 4), 261 | round(node2_score/float(node1_score), 4)) 262 | else: 263 | print "The ratio of node 1 ({0}:{1}) to node 2 ({2}:{3}) is {4}:{5}.".format(node1['key'], 264 | node1['value'], 265 | node2['key'], 266 | node2['value'], 267 | round(node1_score/float(node2_score), 4), 268 | round(node2_score/float(node2_score), 4)) 269 | else: 270 | if larger == "node2": 271 | return {"node1": node1_score/float(node1_score), "node2":node2_score/float(node1_score)} 272 | else: 273 | return {"node1": node1_score/float(node2_score), "node2":node2_score/float(node2_score)} 274 | 275 | 276 | def score_percentile(scores, node, output="print"): 277 | """ 278 | 279 | :param scores: dictionary keyed by nodes and values of scores 280 | :param node1: dictionary of {"class":, "key":, "value":} 281 | :param output: string representing how to output the data. "print" to print it, dictionary otherwise 282 | :return: the percentile the node is in. Higher means more likely.ff 283 | """ 284 | node_uri = "class={0}&key={1}&value={2}".format(node['class'], node['key'], node['value']) 285 | 286 | p =stats.percentileofscore(scores.values(), scores[node_uri]) 287 | 288 | if output == "print": 289 | print "The percentile of the node is {0}.".format(round(p, 4)) 290 | else: 291 | return p 292 | 293 | 294 | def merge_graphs(g1, g2): 295 | """ 296 | 297 | """ 298 | g = g1.copy() 299 | for node, props in g2.nodes(data=True): 300 | g.add_node(node, props) 301 | for edge in g2.edges(data=True): 302 | g.add_edge(edge[0], edge[1], attr_dict=edge[2]) 303 | 304 | return g 305 | 306 | 307 | def removeNonAscii(s): return u"".join(i for i in s if ord(i)<128).encode('utf8') 308 | 309 | 310 | def remove_non_ascii_from_graph(g): 311 | """ networkx graph -> networkx graph 312 | 313 | :param g: A networkx graph 314 | :return: a networkx graph with nonAscii removed from all node and edge attributes 315 | """ 316 | # ascii safe node key and value 317 | for node, data in g.nodes(data=True): 318 | for attr in data.keys(): 319 | data[attr] = removeNonAscii(data[attr]) 320 | g.node[node] = data 321 | 322 | if type(g) in [nx.classes.multidigraph.MultiDiGraph, nx.classes.multigraph.MultiGraph]: 323 | for edge in g.edges(data=True, keys=True): 324 | edge_attr = edge[3] 325 | for attr in edge_attr: 326 | if type(edge_attr[attr]) is str: 327 | edge_attr[attr] = removeNonAscii(edge_attr[attr]) 328 | g.edge[edge[0]][edge[1]][edge[2]] = edge_attr 329 | else: 330 | for edge in g.edges(data=True): 331 | edge_attr = edge[2] 332 | for attr in edge_attr: 333 | if type(edge_attr[attr]) is str: 334 | edge_attr[attr] = removeNonAscii(edge_attr[attr]) 335 | g.edge[edge[0]][edge[1]] = edge_attr 336 | 337 | 338 | # return the safed node 339 | return g 340 | 341 | ## MAIN LOOP EXECUTION 342 | def main(): 343 | logging.info('Beginning main loop.') 344 | 345 | logging.info('Ending main loop.') 346 | 347 | if __name__ == "__main__": 348 | main() -------------------------------------------------------------------------------- /minions/osint_bambenekconsulting_com.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __author__ = "Gabriel Bassett" 4 | """ 5 | AUTHOR: {0} 6 | DATE: 7 | DEPENDENCIES: 8 | Copyright {0} 9 | 10 | LICENSE: 11 | Licensed to the Apache Software Foundation (ASF) under one 12 | or more contributor license agreements. See the NOTICE file 13 | distributed with this work for additional information 14 | regarding copyright ownership. The ASF licenses this file 15 | to you under the Apache License, Version 2.0 (the 16 | "License"); you may not use this file except in compliance 17 | with the License. You may obtain a copy of the License at 18 | 19 | http://www.apache.org/licenses/LICENSE-2.0 20 | 21 | Unless required by applicable law or agreed to in writing, 22 | software distributed under the License is distributed on an 23 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 24 | KIND, either express or implied. See the License for the 25 | specific language governing permissions and limitations 26 | under the License. 27 | 28 | DESCRIPTION: 29 | 30 | 31 | """.format(__author__) 32 | # PRE-USER SETUP 33 | pass 34 | 35 | ########### NOT USER EDITABLE ABOVE THIS POINT ################# 36 | 37 | 38 | # USER VARIABLES 39 | PLUGIN_CONFIG_FILE = "osint_bambenekconsulting_com.yapsy-plugin" 40 | NAME = "OSINT Bambenek Consulting" 41 | keys = {u'IP': "ip", u'Domain': "domain", u'Nameserver IP': "ip", u'Nameserver': "domain"} 42 | nameserver = {u'IP': False, u'Domain': False, u'Nameserver IP': True, u'Nameserver': True} 43 | FEED = "http://osint.bambenekconsulting.com/feeds/c2-masterlist.txt" 44 | SLEEP_TIME = 14400 # 4 hours in seconds 45 | 46 | ########### NOT USER EDITABLE BELOW THIS POINT ################# 47 | 48 | 49 | ## IMPORTS 50 | from yapsy.IPlugin import IPlugin 51 | import logging 52 | import networkx as nx 53 | from datetime import datetime # timedelta imported above 54 | import dateutil 55 | import uuid 56 | import ConfigParser 57 | import inspect 58 | import pandas as pd # for organizing the intel list data 59 | import requests # for downloading the intel list 60 | import ipaddress # for validating ip addresses 61 | import time # for sleep 62 | import threading # import threading so minion doesn't block the app 63 | import imp # Importing imp to import verum 64 | 65 | ## SETUP 66 | loc = inspect.getfile(inspect.currentframe()) 67 | ind = loc.rfind("/") 68 | loc = loc[:ind+1] 69 | config = ConfigParser.SafeConfigParser() 70 | config.readfp(open(loc + PLUGIN_CONFIG_FILE)) 71 | 72 | if config.has_section('Core'): 73 | if 'name' in config.options('Core'): 74 | NAME = config.get('Core', 'name') 75 | if config.has_section('Log'): 76 | if 'level' in config.options('Log'): 77 | LOGLEVEL = config.get('Log', 'level') 78 | if 'file' in config.options('Log'): 79 | LOGFILE = config.get('Log', 'file') 80 | 81 | 82 | ## EXECUTION 83 | class PluginOne(IPlugin): 84 | thread = None 85 | app = None # The object instance 86 | Verum = None # the module 87 | yesterday = pd.DataFrame(columns=("indicator", "context", "date", "source", "key", "threat")) # Yesterday's data 88 | today = None # Today's date 89 | shutdown = False # Used to trigger shutdown of hte minion 90 | parent = None # The parent instance of the verum app object 91 | 92 | # CHANGEME: The init should contain anything to load modules or data files that should be variables of the plugin object 93 | def __init__(self): 94 | """ 95 | 96 | """ 97 | pass 98 | 99 | # CHANGEME: Configuration needs to set the values needed to identify the plugin in the plugin database as well as ensure everyhing loaded correctly 100 | # CHANGEME: Current layout is for an enrichment plugin 101 | # CHANGEME: enrichment [type, successful_load, name, description, inputs to enrichment such as 'ip', cost, speed] 102 | # CHANGEME: interface [type, successful_load, name] 103 | # CHANGEME: score [type, successful_load, name, description, cost, speed] 104 | # CHANGEME: minion [TBD] 105 | def configure(self, parent=None): 106 | """ 107 | 108 | :param verum: The directory of the verum module 109 | :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)] 110 | """ 111 | global FEED 112 | 113 | config_options = config.options("Configuration") 114 | 115 | if 'cost' in config_options: 116 | cost = config.get('Configuration', 'cost') 117 | else: 118 | cost = 9999 119 | 120 | if config.has_section('Documentation') and 'description' in config.options('Documentation'): 121 | description = config.get('Documentation', 'description') 122 | else: 123 | logging.error("'Description not in config file.") 124 | return [None, False, NAME, None, cost] 125 | 126 | if 'type' in config_options: 127 | plugin_type = config.get('Configuration', 'type') 128 | else: 129 | logging.error("'Type' not specified in config file.") 130 | return [None, False, NAME, description, cost] 131 | 132 | # Module import success 133 | if parent is not None: 134 | self.parent = parent 135 | else: 136 | logging.info("Parent verum app instance not passed to minion. Please rerun, passing the parent object instance to successfully configure.") 137 | return [plugin_type, False, NAME, description, cost] 138 | 139 | if self.parent.loc is not None: 140 | # Import the app object so that acces app features (such as the storage backend) can be used. 141 | fp, pathname, mod_description = imp.find_module("verum", [self.parent.loc]) 142 | self.Verum = imp.load_module("verum", fp, pathname, mod_description) 143 | else: 144 | logging.error("'verum' location not supplied to minion configuration function. Rerun with the location of the verum module specified.") 145 | return [plugin_type, False, NAME, description, cost] 146 | 147 | if 'feed' in config_options: 148 | FEED = config.get('Configuration', 'feed') 149 | else: 150 | logging.error("'Feed' not specified in config file.") 151 | return [plugin_type, False, NAME, description, cost] 152 | 153 | # Return success 154 | return [plugin_type, True, NAME, description, cost] 155 | 156 | 157 | def minion(self, storage=None, *args, **xargs): 158 | self.app = self.Verum.app(self.parent.PluginFolder, None) 159 | # set storage 160 | if storage is None: 161 | storage = self.parent.storage 162 | self.app.set_interface(storage) 163 | 164 | # Check until stopped 165 | while not self.shutdown: 166 | # Check to see if it's the same day, if it is, sleep for a while, otherwise run the import 167 | if datetime.utcnow() == self.today: 168 | time.sleep(SLEEP_TIME) 169 | else: 170 | # Get the file 171 | r = requests.get(FEED) 172 | 173 | # split it out 174 | feed = r.text.split("\n") 175 | 176 | df = pd.DataFrame(columns=("indicator", "context", "date", "source")) 177 | # load the feed into a dataframe line, by, line. I know it's slow. 178 | for line in feed: 179 | if line and line[0] != "#": 180 | l = line.split(",") 181 | if len(l) == 4: 182 | df.loc[df.shape[0]] = l 183 | 184 | # Index([u'indicator', u'context', u'date', u'source', u'key', u'threat'], dtype='object') 185 | df = pd.concat([df, pd.DataFrame(df.context.str.split(' used by ',1).tolist(), columns = ['key','threat'])], axis=1) 186 | 187 | # Create list of IPs for cymru enrichment 188 | ips = set() 189 | 190 | for row in df.iterrows(): 191 | # Don't add it if it was added yesterday w/ same origination date 192 | if not ((self.yesterday['indicator'] == row[1]['indicator']) & 193 | (self.yesterday['date'] == row[1]['date']) & 194 | (self.yesterday['key'] == row[1]['key']) & 195 | (self.yesterday['threat'] == row[1]['threat'])).any(): 196 | 197 | g = nx.MultiDiGraph() 198 | 199 | # convert date to correct format 200 | dt = dateutil.parser.parse(row[1]['date']).strftime("%Y-%m-%dT%H:%M:%SZ") 201 | 202 | # Add indicator to graph 203 | ## (Must account for the different types of indicators) 204 | key = keys[row[1]['key']] 205 | target_uri = "class=attribute&key={0}&value={1}".format(key, row[1]['indicator']) 206 | g.add_node(target_uri, { 207 | 'class': 'attribute', 208 | 'key': key, 209 | "value": row[1]['indicator'], 210 | "start_time": dt, 211 | "uri": target_uri 212 | }) 213 | 214 | # Add threat to list 215 | if row[1]['threat'][-4:] == u' C&C': 216 | CandC = True 217 | threat = row[1]['threat'][:-4] 218 | else: 219 | CandC = False 220 | threat = row[1]['threat'] 221 | 222 | # Threat node 223 | threat_uri = "class=attribute&key={0}&value={1}".format("malware", threat) 224 | g.add_node(threat_uri, { 225 | 'class': 'attribute', 226 | 'key': "malware", 227 | "value": threat, 228 | "start_time": dt, 229 | "uri": threat_uri 230 | }) 231 | 232 | # Threat Edge 233 | edge_attr = { 234 | "relationship": "describedBy", 235 | "origin": row[1]['source'], 236 | "start_time": dt, 237 | } 238 | # test for nameserver and update edge_attr 239 | if nameserver[row[1]['key']] == True: 240 | edge_attr['describedBy'] = 'nameserver' 241 | source_hash = uuid.uuid3(uuid.NAMESPACE_URL, target_uri) 242 | dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, threat_uri) 243 | edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash)) 244 | rel_chain = "relationship" 245 | while rel_chain in edge_attr: 246 | edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain]) 247 | rel_chain = edge_attr[rel_chain] 248 | if "origin" in edge_attr: 249 | edge_uri += "&{0}={1}".format("origin", edge_attr["origin"]) 250 | edge_attr["uri"] = edge_uri 251 | g.add_edge(target_uri, threat_uri, edge_uri, edge_attr) 252 | 253 | # Add C&C to list if applicable 254 | if CandC: 255 | # C2 node 256 | c2_uri = "class=attribute&key={0}&value={1}".format("classification", "c2") 257 | g.add_node(c2_uri, { 258 | 'class': 'attribute', 259 | 'key': "classification", 260 | "value": "c2", 261 | "start_time": dt, 262 | "uri": c2_uri 263 | }) 264 | 265 | # C2 Edge 266 | edge_attr = { 267 | "relationship": "describedBy", 268 | "origin": row[1]['source'], 269 | "start_time": dt, 270 | } 271 | # test for nameserver and update edge_attr 272 | if nameserver[row[1]['key']] == True: 273 | edge_attr['describedBy'] = 'nameserver' 274 | source_hash = uuid.uuid3(uuid.NAMESPACE_URL, target_uri) 275 | dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, c2_uri) 276 | edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash)) 277 | rel_chain = "relationship" 278 | while rel_chain in edge_attr: 279 | edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain]) 280 | rel_chain = edge_attr[rel_chain] 281 | if "origin" in edge_attr: 282 | edge_uri += "&{0}={1}".format("origin", edge_attr["origin"]) 283 | edge_attr["uri"] = edge_uri 284 | g.add_edge(target_uri, c2_uri, edge_uri, edge_attr) 285 | 286 | 287 | # classify malicious and merge with current graph 288 | g = self.Verum.merge_graphs(g, self.app.classify.run({'key': key, 'value': row[1]['indicator'], 'classification': 'malice'})) 289 | 290 | # enrich depending on type 291 | try: 292 | g = self.Verum.merge_graphs(g, self.app.run_enrichments(row[1]['indicator'], key, names=[u'DNS Enrichment', u'TLD Enrichment', u'Maxmind ASN Enrichment', 'IP Whois Enrichment'])) 293 | except Exception as e: 294 | #print "Enrichment of {0} failed due to {1}.".format(row[1]['indicator'], e) # DEBUG 295 | logging.info("Enrichment of {0} failed due to {1}.".format(row[1]['indicator'], e)) 296 | pass 297 | 298 | # add to ip list if appropriate 299 | if key == "ip": 300 | try: 301 | _ = ipaddress.ip_address(unicode(row[1]['indicator'])) 302 | ips.add(self.app.run_enrichments(row[1]['indicator'])) 303 | except: 304 | pass 305 | 306 | try: 307 | self.app.store_graph(self.Verum.remove_non_ascii_from_graph(g)) 308 | except: 309 | print g.nodes(data=True) # DEBUG 310 | print g.edges(data=True) # DEBUG 311 | raise 312 | 313 | if len(ips) >= 50: 314 | # Do cymru enrichment 315 | try: 316 | self.app.store_graph(self.app.run_enrichments(ips, 'ip', names=[u'Cymru Enrichment'])) 317 | except: 318 | logging.info("Cymru enrichment of {0} IPs failed.".format(len(ips))) 319 | pass 320 | ips = set() 321 | 322 | # Copy today's data to yesterday 323 | self.yesterday = df 324 | 325 | # Copy today's date to today 326 | self.today = datetime.utcnow() 327 | 328 | 329 | def start(self, *args, **xargs): 330 | self.thread = threading.Thread(target=self.minion, *args, **xargs) 331 | self.thread.start() 332 | 333 | def isAlive(self): 334 | if self.thread is None: 335 | return False 336 | else: 337 | return self.thread.isAlive() 338 | 339 | 340 | def stop(self, force=True): 341 | if force: 342 | self.thread = None # zero out thread 343 | else: 344 | self.shutdown = False # just dont' iterate. May take up to (SLEEP_TIME) hours 345 | -------------------------------------------------------------------------------- /minions/edge_consolidator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __author__ = "Gabriel Bassett" 4 | """ 5 | AUTHOR: {0} 6 | DATE: 7 | DEPENDENCIES: 8 | Copyright {0} 9 | 10 | LICENSE: 11 | Licensed to the Apache Software Foundation (ASF) under one 12 | or more contributor license agreements. See the NOTICE file 13 | distributed with this work for additional information 14 | regarding copyright ownership. The ASF licenses this file 15 | to you under the Apache License, Version 2.0 (the 16 | "License"); you may not use this file except in compliance 17 | with the License. You may obtain a copy of the License at 18 | 19 | http://www.apache.org/licenses/LICENSE-2.0 20 | 21 | Unless required by applicable law or agreed to in writing, 22 | software distributed under the License is distributed on an 23 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 24 | KIND, either express or implied. See the License for the 25 | specific language governing permissions and limitations 26 | under the License. 27 | 28 | DESCRIPTION: 29 | 30 | 31 | """.format(__author__) 32 | # PRE-USER SETUP 33 | pass 34 | 35 | ########### NOT USER EDITABLE ABOVE THIS POINT ################# 36 | 37 | 38 | # USER VARIABLES 39 | PLUGIN_CONFIG_FILE = "edge_consolidator.yapsy-plugin" # CHANGEME 40 | NAME = "Neo4j Edge Consolidator" # CHANGEME 41 | JUMP = 0.9 42 | NEO4J_HOST = 'localhost' 43 | NEO4J_PORT = '7474' 44 | LOGFILE = None 45 | USERNAME = None 46 | PASSWORD = None 47 | SLEEP_TIME = 5 48 | 49 | ########### NOT USER EDITABLE BELOW THIS POINT ################# 50 | 51 | 52 | ## IMPORTS 53 | from yapsy.IPlugin import IPlugin 54 | import logging 55 | from collections import defaultdict # used for storing duplicate edges 56 | import networkx as nx 57 | from datetime import datetime # timedelta imported above 58 | import uuid 59 | import ConfigParser 60 | import inspect 61 | import threading 62 | try: 63 | from py2neo import Graph as py2neoGraph 64 | from py2neo import Node as py2neoNode 65 | from py2neo import Relationship as py2neoRelationship 66 | from py2neo import authenticate as py2neoAuthenticate 67 | neo_import = True 68 | except: 69 | logging.error("Neo4j plugin did not load.") 70 | neo_import = False 71 | import imp # for verum import 72 | import random # for jumps 73 | from time import sleep # for sleeping between iterations 74 | 75 | ## SETUP 76 | random.seed() 77 | 78 | loc = inspect.getfile(inspect.currentframe()) 79 | ind = loc.rfind("/") 80 | loc = loc[:ind+1] 81 | config = ConfigParser.SafeConfigParser() 82 | config.readfp(open(loc + PLUGIN_CONFIG_FILE)) 83 | 84 | if config.has_section('Core'): 85 | if 'name' in config.options('Core'): 86 | NAME = config.get('Core', 'name') 87 | if config.has_section('Log'): 88 | if 'level' in config.options('Log'): 89 | LOGLEVEL = config.get('Log', 'level') 90 | if 'file' in config.options('Log'): 91 | LOGFILE = config.get('Log', 'file') 92 | if config.has_section('neo4j'): 93 | if 'host' in config.options('neo4j'): 94 | NEO4J_HOST = config.get('neo4j', 'host') 95 | if 'port' in config.options('neo4j'): 96 | NEO4J_PORT = config.get('neo4j', 'port') 97 | if 'username' in config.options('neo4j'): 98 | USERNAME = config.get('neo4j', 'username') 99 | if 'password' in config.options('neo4j'): 100 | PASSWORD = config.get('neo4j', 'password') 101 | 102 | ## EXECUTION 103 | class PluginOne(IPlugin): 104 | storage = None 105 | thread = None 106 | app = None # The object instance 107 | Verum = None # the module 108 | shutdown = False # Used to trigger shutdown of the minion 109 | parent = None # The parent instance of the verum app object 110 | neo4j_config = None 111 | sleep_time = SLEEP_TIME 112 | jump = JUMP 113 | 114 | # CHANGEME: The init should contain anything to load modules or data files that should be variables of the plugin object 115 | def __init__(self): 116 | pass 117 | 118 | # CHANGEME: Configuration needs to set the values needed to identify the plugin in the plugin database as well as ensure everyhing loaded correctly 119 | # CHANGEME: Current layout is for an enrichment plugin 120 | # CHANGEME: enrichment [type, successful_load, name, description, inputs to enrichment such as 'ip', cost, speed] 121 | # CHANGEME: interface [type, successful_load, name] 122 | # CHANGEME: score [type, successful_load, name, description, cost, speed] 123 | # CHANGEME: minion [type, successful_load, name, description, cost] 124 | def configure(self, parent=None): 125 | """ 126 | 127 | :return: return list of configuration variables starting with [plugin_type, successful_load, name, description, ] 128 | """ 129 | config_options = config.options("Configuration") 130 | 131 | # Cost and speed are not applicable to all plugin types 132 | if 'cost' in config_options: 133 | cost = config.get('Configuration', 'cost') 134 | else: 135 | cost = 9999 136 | if 'jump' in config_options: 137 | self.jump = config.get('Configuration', 'jump') 138 | if 'sleep_time' in config_options: 139 | self.sleep_time = float(config.get('Configuration', 'sleep_time')) 140 | 141 | 142 | if config.has_section('Documentation') and 'description' in config.options('Documentation'): 143 | description = config.get('Configuration', 'type') 144 | else: 145 | logging.error("'Description not in config file.") 146 | return [None, False, NAME, cost] 147 | 148 | if 'type' in config_options: 149 | plugin_type = config.get('Configuration', 'type') 150 | else: 151 | logging.error("'Type' not specified in config file.") 152 | return [None, False, NAME, description, cost] 153 | 154 | # Module success is only applicable to plugins which import unique code 155 | if parent is not None: 156 | self.parent = parent 157 | else: 158 | logging.info("Parent verum app instance not passed to minion. Please rerun, passing the parent object instance to successfully configure.") 159 | return [plugin_type, False, NAME, description, cost] 160 | 161 | if self.parent.loc is not None: 162 | # Import the app object so that acces app features (such as the storage backend) can be used. 163 | fp, pathname, mod_description = imp.find_module("verum", [self.parent.loc]) 164 | self.Verum = imp.load_module("verum", fp, pathname, mod_description) 165 | else: 166 | logging.error("'verum' location not supplied to minion configuration function. Rerun with the location of the verum module specified.") 167 | return [plugin_type, False, NAME, description, cost] 168 | 169 | # Ensure a neo4j storage plugin 170 | if not neo_import: 171 | logging.error("Py2neo import failed. Ensure py2neo v2.* is installed.") 172 | return [plugin_type, False, NAME, description, cost] 173 | 174 | try: 175 | self.set_neo4j_config(NEO4J_HOST, NEO4J_PORT, USERNAME, PASSWORD) 176 | except Exception as e: 177 | logging.error("Neo4j configuration failed with error {0}. Check host, port, username, and password.".format(e)) 178 | return [plugin_type, False, NAME, description, cost] 179 | 180 | 181 | return [plugin_type, True, NAME, description, cost] 182 | 183 | 184 | ############ GENERAL NOTES ############ 185 | # CHANGEME: All functions must implement a "configuration()" function 186 | # CHANGEME: The correct type of execution function must be defined for the type of plugin 187 | ############ GENERAL NOTES ############ 188 | 189 | 190 | # CHANGEME: minion: minion() 191 | # CHANGEME: start() 192 | # CHANGEME: stop() 193 | # CHANGEME: isAlive() 194 | # CHANGEME: Minion plugin specifics: 195 | # - Minions fit exist in a separate directory to prevent them importing themselves when they import their own VERUM instance 196 | # - The minion configuration function must take an argument of the parent verum object. When not present, it shouldn't error but 197 | # - instead return with successful_load set to false and a logging.info message that the parent was not passed in. 198 | # - Must have 4 functions: minion(), start(), and stop() and isAlive() 199 | # - minion() is the function which will be threaded. **Make sure to call create the new verum instance WITHIN this function 200 | # - to avoid SQLite errors!** 201 | # - start() creates the thread object as an attribute of the plugin class and starts it 202 | # - stop() stops the thread. Preferably with both a normal exit by setting a shutdown variable of the plugin class as well as a 203 | # - force stop option which removes the thread object 204 | # - isAlive() calls the thread isAlive() function and returns the status 205 | def minion(self, *args, **xargs): 206 | self.shutdown = False 207 | 208 | # Get graph 209 | neo_graph = py2neoGraph(self.neo4j_config) 210 | 211 | random_cypher = ''' MATCH (a)-[:describedBy]->() 212 | RETURN a, rand() as r 213 | ORDER BY r 214 | LIMIT 1 215 | ''' 216 | 217 | # pick a random node 218 | records = neo_graph.cypher.execute(random_cypher) 219 | node = records[0][0] 220 | 221 | logging.info("first node to consolidate edges for is class: {0}, key: {1}, value: {2}".format(node.properties['class'], node.properties['key'], node.properties['value'])) 222 | print "first node to consolidate edges for is class: {0}, key: {1}, value: {2}".format(node.properties['class'], node.properties['key'], node.properties['value']) # DEBUG 223 | 224 | while not self.shutdown: 225 | edges = defaultdict(set) 226 | destinations = set() 227 | 228 | # get edges starting with the node 229 | for rel in node.match_outgoing(): 230 | if 'uri' in rel.properties: 231 | edge_uri = rel.properties['uri'] 232 | else: 233 | # SRC URI 234 | if 'uri' in rel.start_node.properties: 235 | source_uri = rel.start_node.properties['uri'] 236 | else: 237 | source_uri = "class={0}&key={1}&value={2}".format(rel.start_node.properties['attribute'], rel.start_node.properties['key'], rel.start_node.properties['value']) 238 | 239 | # DST URI 240 | if 'uri' in rel.end_node.properties: 241 | dest_uri = rel.end_node.properties['uri'] 242 | else: 243 | dest_uri = "class={0}&key={1}&value={2}".format(rel.end_node.properties['attribute'], rel.end_node.properties['key'], rel.end_node.properties['value']) 244 | 245 | # Remove non-ascii as it gumms up uuid. 246 | # NOTE: This shouldn't effect anything as it's just for the key in the edges dictionary 247 | source_uri = self.Verum.removeNonAscii(source_uri) 248 | dest_uri = self.Verum.removeNonAscii(dest_uri) 249 | 250 | # Edge URI 251 | source_hash = uuid.uuid3(uuid.NAMESPACE_URL, source_uri) 252 | dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, dest_uri) 253 | 254 | edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash)) 255 | rel_chain = "relationship" 256 | while rel_chain in rel.properties: 257 | edge_uri = edge_uri + "&{0}={1}".format(rel_chain,rel.properties[rel_chain]) 258 | rel_chain = rel.properties[rel_chain] 259 | if "origin" in rel.properties: 260 | edge_uri += "&{0}={1}".format("origin", rel.properties["origin"]) 261 | 262 | # aggregate edges by dst, and uri 263 | edges[edge_uri].add(rel) # WARNING: The use of URI here is vulnerable to values being out of order in the URI and edges not being removed. 264 | 265 | # collect destinations to pick next node 266 | destinations.add(rel.end_node) 267 | 268 | time = datetime.utcnow() 269 | 270 | # SRC URI 271 | if 'uri' in node.properties: 272 | source_uri = node.properties['uri'] 273 | else: 274 | source_uri = "class={0}&key={1}&value={2}".format(node.properties['attribute'], node.properties['key'], node.properties['value']) 275 | 276 | for edge_uri in edges: 277 | edge_list = list(edges[edge_uri]) 278 | 279 | # DST URI 280 | if 'uri' in edge_list[0].end_node.properties: 281 | dest_uri = edge_list[0].end_node.properties['uri'] 282 | else: 283 | dest_uri = "class={0}&key={1}&value={2}".format(edge_list[0].end_node.properties['attribute'], edge_list[0].end_node.properties['key'], edge_list[0].end_node.properties['value']) 284 | 285 | logging.debug("Removing {0} edges from node {1} to {2}.".format(len(edge_list[1:]), source_uri, dest_uri)) 286 | #print "Removing {0} edges from node {1} to {2}.".format(len(edge_list[1:]), source_uri, dest_uri) # DEBUG 287 | 288 | for edge in edge_list[1:]: 289 | # keep earliest time as start 290 | try: 291 | edge_time = datetime.strptime(edge.properties['start_time'], "%Y-%m-%dT%H:%M:%SZ") 292 | if 'start_time' in edge.properties and time > edge_time: 293 | time = edge_time 294 | except ValueError: # The time on the node wasn't legit 295 | pass 296 | try: # sometimes the edge is no longer there. Better to pass than fail. 297 | # remove all but one node of each group 298 | edge.delete() 299 | except: 300 | pass 301 | # Update time on remaining node 302 | try: 303 | edge_time = datetime.strptime(edge_list[0].properties['start_time'], "%Y-%m-%dT%H:%M:%SZ") 304 | except: 305 | edge_time = datetime.utcnow() 306 | if 'start_time' not in edge_list[0].properties or time < edge_time: 307 | edge_list[0].properties['start_time'] = time.strftime("%Y-%m-%dT%H:%M:%SZ") 308 | edge_list[0].push() 309 | 310 | logging.debug("Keeping edge {0} from node {1} to node {2}.".format(edge_list[0].uri, source_uri, dest_uri)) 311 | #print "Keeping edge {0} from node {1} to node {2}.".format(edge_list[0].uri, source_uri, dest_uri) # DEBUG 312 | 313 | # Sleep to slow it down 314 | sleep(self.sleep_time) 315 | 316 | jump = random.random() 317 | 318 | # do the random walk 319 | if len(destinations) == 0 or jump <= self.jump: 320 | # pick a random node 321 | records = neo_graph.cypher.execute(random_cypher) 322 | node = records[0][0] 323 | logging.debug("Edge consolidation random walk jumped.") 324 | else: 325 | node = random.choice(destinations) 326 | logging.debug("Edge consolidation random walk didn't jumped.") 327 | 328 | logging.info("Next node to consolidate edges for is class: {0}, key: {1}, value: {2}".format(node.properties['class'], node.properties['key'], node.properties['value'])) 329 | #print "Next to consolidate edges for node is class: {0}, key: {1}, value: {2}".format(node.properties['class'], node.properties['key'], node.properties['value']) # DEBUG 330 | 331 | def start(self, *args, **xargs): 332 | self.thread = threading.Thread(target=self.minion, *args, **xargs) 333 | self.thread.start() 334 | 335 | def isAlive(self): 336 | if self.thread is None: 337 | return False 338 | else: 339 | return self.thread.isAlive() 340 | 341 | def stop(self, force=True): 342 | if force: 343 | self.thread = None # zero out thread 344 | else: 345 | self.shutdown = False # just dont' iterate. May take up to (SLEEP_TIME) hours 346 | 347 | def set_neo4j_config(self, host, port, username=None, password=None): 348 | if username and password: 349 | py2neoAuthenticate("{0}:{1}".format(host, port), username, password) 350 | self.neo4j_config = "http://{2}:{3}@{0}:{1}/db/data/".format(host, port, username, password) 351 | else: 352 | self.neo4j_config = "http://{0}:{1}/db/data/".format(host, port) 353 | --------------------------------------------------------------------------------