├── plugins
    ├── __init__.py
    ├── GeoIPASNum.dat
    ├── dns.yapsy-plugin
    ├── tld.yapsy-plugin
    ├── classify.yapsy-plugin
    ├── cymru.yapsy-plugin
    ├── generic.yapsy-plugin
    ├── ipwhois.yapsy-plugin
    ├── modularity.yapsy-plugin
    ├── maxmind.yapsy-plugin
    ├── path_count.yapsy-plugin
    ├── page_rank.yapsy-plugin
    ├── titan.yapsy-plugin
    ├── bayes_net.yapsy-plugin
    ├── networkx.yapsy-plugin
    ├── page_rank_2.yapsy-plugin
    ├── neo4j.yapsy-plugin
    ├── cymru_api.py
    ├── classify.py
    ├── generic.py
    ├── modularity.py
    ├── page_rank.py
    ├── page_rank_2.py
    ├── networkx.py
    ├── dns.py
    ├── maxmind.py
    ├── path_count.py
    ├── tld.py
    ├── cymru.py
    └── bayes_net.py
├── verum.cfg
├── minions
    ├── alexa_1M.yapsy-plugin
    ├── osint_bambenekconsulting_com.yapsy-plugin
    ├── osint_bambenekconsulting_com_v2.yapsy-plugin
    ├── edge_consolidator.yapsy-plugin
    ├── alexa_1M.py
    ├── osint_bambenekconsulting_com.py
    └── edge_consolidator.py
├── examples
    ├── plugin_template.yapsy-plugin
    └── plugin_template.py
├── .gitignore
├── verum
    ├── __init__.py
    └── helper.py
├── ui.py
├── README.md
└── LICENSE


/plugins/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'v685573'
2 | 


--------------------------------------------------------------------------------
/plugins/GeoIPASNum.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vz-risk/Verum/HEAD/plugins/GeoIPASNum.dat


--------------------------------------------------------------------------------
/verum.cfg:
--------------------------------------------------------------------------------
 1 | [CORE]
 2 | ; Plugins Folder
 3 | Plugins = "~/Documents/Deveopment/verum/plugins"
 4 | ; Minions FOlder
 5 | Minons = "~/Documents/Development/verum/minions"
 6 | 
 7 | [LOGGING]
 8 | level = debug
 9 | log = none
10 | 


--------------------------------------------------------------------------------
/plugins/dns.yapsy-plugin:
--------------------------------------------------------------------------------
 1 | [Core]
 2 | Name = DNS Enrichment
 3 | Module = dns
 4 | 
 5 | [Documentation]
 6 | Author = Gabriel Bassett
 7 | Version = 0.1
 8 | Website = https://github.com/vz-risk/Verum
 9 | Description = Takes an IP string and returns the DNS resolved IP address as networkx graph.
10 | 
11 | [Configuration]
12 | Type = enrichment
13 | Cost = 3
14 | Speed = 3
15 | Inputs = domain


--------------------------------------------------------------------------------
/plugins/tld.yapsy-plugin:
--------------------------------------------------------------------------------
 1 | [Core]
 2 | Name = TLD Enrichment
 3 | Module = tld
 4 | 
 5 | [Documentation]
 6 | Author = Gabriel Bassett
 7 | Version = 0.1
 8 | Website = https://github.com/vz-risk/Verum
 9 | Description = Takes a domain name and returns the top level domain, mid-domain, and sub-domain as networkx graph.
10 | 
11 | [Configuration]
12 | Type = enrichment
13 | Cost = 1
14 | Speed = 1
15 | Inputs = domain
16 | 
17 | 


--------------------------------------------------------------------------------
/plugins/classify.yapsy-plugin:
--------------------------------------------------------------------------------
 1 | [Core]
 2 | Name = classify
 3 | Module = classify
 4 | 
 5 | [Documentation]
 6 | Author = Gabriel Bassett
 7 | Version = 0.1
 8 | Website = https://github.com/vz-risk/Verum
 9 | Description = Takes a target (key:value) and classification and returns a graph linking the two.
10 | 
11 | [Configuration]
12 | Type = enrichment
13 | Inputs = any
14 | 
15 | [Log]
16 | level = debug
17 | # file = ./logfile.log


--------------------------------------------------------------------------------
/plugins/cymru.yapsy-plugin:
--------------------------------------------------------------------------------
 1 | [Core]
 2 | Name = Cymru Enrichment
 3 | Module = cymru
 4 | 
 5 | [Documentation]
 6 | Author = Gabriel Bassett
 7 | Version = 0.1
 8 | Website = https://github.com/vz-risk/Verum
 9 | Description = Takes a list of IPs and returns ASN and BGP information as networkx graph.
10 | 
11 | [Configuration]
12 | Type = enrichment
13 | Cost = 5
14 | Speed = 4
15 | Inputs = ip
16 | Cymru_Module = ./cymru_api.py
17 | 


--------------------------------------------------------------------------------
/plugins/generic.yapsy-plugin:
--------------------------------------------------------------------------------
 1 | [Core]
 2 | Name = generic
 3 | Module = generic
 4 | 
 5 | [Documentation]
 6 | Author = Gabriel Bassett
 7 | Version = 0.1
 8 | Website = https://github.com/vz-risk/Verum
 9 | Description = Takes a described (key:value) and describing (key:value) and returns a graph linking the two.
10 | 
11 | [Configuration]
12 | Type = enrichment
13 | Inputs = any
14 | 
15 | [Log]
16 | level = debug
17 | # file = ./logfile.log


--------------------------------------------------------------------------------
/plugins/ipwhois.yapsy-plugin:
--------------------------------------------------------------------------------
 1 | [Core]
 2 | Name = IP Whois Enrichment
 3 | Module = ipwhois
 4 | 
 5 | [Documentation]
 6 | Author = Gabriel Bassett
 7 | Version = 0.1
 8 | Website = https://github.com/vz-risk/Verum
 9 | Description = Takes a whois record as a list of strings in a specific format and returns a networkx graph of the information.
10 | 
11 | [Configuration]
12 | Type = enrichment
13 | Cost = 3
14 | Speed = 2
15 | Inputs = domain


--------------------------------------------------------------------------------
/plugins/modularity.yapsy-plugin:
--------------------------------------------------------------------------------
 1 | [Core]
 2 | Name = Modularity
 3 | Module = modularity
 4 | 
 5 | [Documentation]
 6 | Author = Gabriel Bassett
 7 | Version = 0.1
 8 | Website = https://github.com/vz-risk/Verum
 9 | Description = Takes a subgraph and modularity-based partitioning (clustering) of the subgraph
10 | 
11 | [Configuration]
12 | Type = score
13 | cost = 2
14 | speed = 2
15 | 
16 | [Log]
17 | level = debug
18 | # file = ./logfile.log


--------------------------------------------------------------------------------
/plugins/maxmind.yapsy-plugin:
--------------------------------------------------------------------------------
 1 | [Core]
 2 | Name = Maxmind ASN Enrichment
 3 | Module = maxmind
 4 | 
 5 | [Documentation]
 6 | Author = Gabriel Bassett
 7 | Version = 0.1
 8 | Website = https://github.com/vz-risk/Verum
 9 | Description = Takes a domain name as string and returns the ASN of the IP address as networkx graph.
10 | 
11 | 
12 | [Configuration]
13 | Type = enrichment
14 | DAT_FILE = ./GeoIPASNum.dat
15 | Cost = 2
16 | Speed = 2
17 | Inputs = IP


--------------------------------------------------------------------------------
/plugins/path_count.yapsy-plugin:
--------------------------------------------------------------------------------
 1 | [Core]
 2 | Name = PathCount
 3 | Module = path_count
 4 | 
 5 | [Documentation]
 6 | Author = Gabriel Bassett
 7 | Version = 0.1
 8 | Website = https://github.com/vz-risk/Verum
 9 | Description = Takes a subgraph and topic and scores nodes based on the number of paths between the topic and the node
10 | 
11 | [Configuration]
12 | Type = score
13 | cost = 4
14 | speed = 6
15 | 
16 | [Log]
17 | level = debug
18 | # file = ./logfile.log


--------------------------------------------------------------------------------
/minions/alexa_1M.yapsy-plugin:
--------------------------------------------------------------------------------
 1 | [Core]
 2 | Name = Alexa Top 1M
 3 | Module = alexa_1m
 4 | 
 5 | [Documentation]
 6 | Author = Gabriel Bassett
 7 | Version = 0.1
 8 | Website = https://github.com/vz-risk/Verum
 9 | Description = Reads the alexa 1M and imports it into the intelligence graph.
10 | 
11 | [Configuration]
12 | Type = minion
13 | Feed = http://s3.amazonaws.com/alexa-static/top-1m.csv.zip
14 | cost = 4
15 | 
16 | [Log]
17 | level = debug
18 | # file = ./logfile.log


--------------------------------------------------------------------------------
/plugins/page_rank.yapsy-plugin:
--------------------------------------------------------------------------------
 1 | [Core]
 2 | Name = PageRank
 3 | Module = page_rank
 4 | 
 5 | [Documentation]
 6 | Author = Gabriel Bassett
 7 | Version = 0.1
 8 | Website = https://github.com/vz-risk/Verum
 9 | Description = Takes a subgraph and topic and uses pagerank without any specific specialization to score the nodes in the subgraph.
10 | 
11 | [Configuration]
12 | Type = score
13 | cost = 2
14 | speed = 2
15 | 
16 | [Log]
17 | level = debug
18 | # file = ./logfile.log


--------------------------------------------------------------------------------
/plugins/titan.yapsy-plugin:
--------------------------------------------------------------------------------
 1 | [Core]
 2 | Name = TitanDB
 3 | Module = titan
 4 | 
 5 | [Documentation]
 6 | Author = Gabriel Bassett
 7 | Version = 0.1
 8 | Website = https://github.com/vz-risk/Verum
 9 | Description = Provides the ability to enrich a CAGS context graph stored in TitanDB.
10 | 
11 | [Configuration]
12 | Type = interface
13 | 
14 | [titanDB]
15 | host = localhost
16 | port = 8182
17 | graph = vzgraph
18 | 
19 | [Log]
20 | level = debug
21 | # file = ./logfile.log


--------------------------------------------------------------------------------
/plugins/bayes_net.yapsy-plugin:
--------------------------------------------------------------------------------
 1 | [Core]
 2 | Name = BayesNet
 3 | Module = bayes_net
 4 | 
 5 | [Documentation]
 6 | Author = Gabriel Bassett
 7 | Version = 0.1
 8 | Website = https://github.com/vz-risk/Verum
 9 | Description = Takes a subgraph and topic, treats the subgraph as a bayesian inference network, assumes the topic true, and scores the other nodes.
10 | 
11 | [Configuration]
12 | Type = score
13 | cost = 8
14 | speed = 8
15 | 
16 | [Log]
17 | level = debug
18 | # file = ./logfile.log


--------------------------------------------------------------------------------
/examples/plugin_template.yapsy-plugin:
--------------------------------------------------------------------------------
 1 | [Core]
 2 | Name = <plugin name>
 3 | Module = <the file name of the associated plugin, minus the '.py'>
 4 | 
 5 | [Documentation]
 6 | Author = Gabriel Bassett
 7 | Version = 0.1
 8 | Website = https://github.com/vz-risk/Verum
 9 | Description = <description>
10 | 
11 | [Configuration]
12 | Type = <type: interface, enrichment, query, minion>
13 | <variable1> = <value 1>
14 | <variable2> = <value 2>
15 | 
16 | [Log]
17 | level = debug
18 | # file = ./logfile.log


--------------------------------------------------------------------------------
/plugins/networkx.yapsy-plugin:
--------------------------------------------------------------------------------
 1 | [Core]
 2 | Name = Networkx Interface
 3 | Module = networkx
 4 | 
 5 | [Documentation]
 6 | Author = Gabriel Bassett
 7 | Version = 0.1
 8 | Website = https://github.com/vz-risk/Verum
 9 | Description = Reads a graph file into memory, stores the graph in a networkx graph in memory, and writes the graph back with .write()
10 | 
11 | [Configuration]
12 | Type = interface
13 | context_graph_file = /tmp/verum.graphml
14 | 
15 | [Log]
16 | level = debug
17 | # file = ./logfile.log


--------------------------------------------------------------------------------
/plugins/page_rank_2.yapsy-plugin:
--------------------------------------------------------------------------------
 1 | [Core]
 2 | Name = PageRank2
 3 | Module = page_rank_2
 4 | 
 5 | [Documentation]
 6 | Author = Gabriel Bassett
 7 | Version = 0.1
 8 | Website = https://github.com/vz-risk/Verum
 9 | Description = Takes a subgraph and topic and uses pagerank with specific initialization values to score the nodes in the subgraph with respect to the topic.
10 | 
11 | [Configuration]
12 | Type = score
13 | cost = 2
14 | speed = 2
15 | 
16 | [Log]
17 | level = debug
18 | # file = ./logfile.log


--------------------------------------------------------------------------------
/plugins/neo4j.yapsy-plugin:
--------------------------------------------------------------------------------
 1 | [Core]
 2 | Name = Neo4j
 3 | Module = neo4j
 4 | 
 5 | [Documentation]
 6 | Author = Gabriel Bassett
 7 | Version = 0.1
 8 | Website = https://github.com/vz-risk/Verum
 9 | Description = Provides the ability to enrich a CAGS context graph stored in Neo4j graph database.
10 | 
11 | [Configuration]
12 | Type = interface
13 | 
14 | [neo4j]
15 | host = localhost
16 | port = 7474
17 | username = neo4j
18 | password = neo4j1
19 | 
20 | [Log]
21 | level = debug
22 | # file = ./logfile.log


--------------------------------------------------------------------------------
/minions/osint_bambenekconsulting_com.yapsy-plugin:
--------------------------------------------------------------------------------
 1 | [Core]
 2 | Name = OSINT Bambenek Consulting
 3 | Module = osint_bambenekconsulting_com
 4 | 
 5 | [Documentation]
 6 | Author = Gabriel Bassett
 7 | Version = 0.1
 8 | Website = https://github.com/vz-risk/Verum
 9 | Description = Reads the feed at http://osint.bambenekconsulting.com/feeds/c2-masterlist.txt and import it into the intelligence graph.
10 | 
11 | [Configuration]
12 | Type = minion
13 | Feed = http://osint.bambenekconsulting.com/feeds/c2-masterlist.txt
14 | cost = 4
15 | 
16 | [Log]
17 | level = debug
18 | # file = ./logfile.log


--------------------------------------------------------------------------------
/minions/osint_bambenekconsulting_com_v2.yapsy-plugin:
--------------------------------------------------------------------------------
 1 | [Core]
 2 | Name = OSINT Bambenek Consulting V2
 3 | Module = osint_bambenekconsulting_com_v2
 4 | 
 5 | [Documentation]
 6 | Author = Gabriel Bassett
 7 | Version = 0.2
 8 | Website = https://github.com/vz-risk/Verum
 9 | Description = Reads the feed at http://osint.bambenekconsulting.com/feeds/c2-masterlist.txt and import it into the intelligence graph.
10 | 
11 | [Configuration]
12 | Type = minion
13 | Feed = http://osint.bambenekconsulting.com/feeds/c2-masterlist.txt
14 | cost = 4
15 | 
16 | [Log]
17 | level = debug
18 | # file = ./logfile.log


--------------------------------------------------------------------------------
/minions/edge_consolidator.yapsy-plugin:
--------------------------------------------------------------------------------
 1 | [Core]
 2 | Name = Neo4j Edge Consolidator
 3 | Module = edge_consolidator
 4 | 
 5 | [Documentation]
 6 | Author = Gabriel Bassett
 7 | Version = 0.1
 8 | Website = https://github.com/vz-risk/Verum
 9 | Description = Randomly walks the graph.  At each node, it consolidates edges by URI.
10 | 
11 | [Configuration]
12 | Type = minion
13 | Jump = 0.9
14 | Cost = 2
15 | # will sleep sleep_time seconds inbetween nodes to slow things down
16 | sleep_time = 3
17 | 
18 | [neo4j]
19 | host = localhost
20 | port = 7474
21 | username = neo4j
22 | password = neo4j1
23 | 
24 | [Log]
25 | level = debug
26 | # file = ./logfile.log


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | lib/
17 | lib64/
18 | parts/
19 | sdist/
20 | var/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 | 
25 | # PyInstaller
26 | #  Usually these files are written by a python script from a template
27 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
28 | *.manifest
29 | *.spec
30 | 
31 | # Installer logs
32 | pip-log.txt
33 | pip-delete-this-directory.txt
34 | 
35 | # Unit test / coverage reports
36 | htmlcov/
37 | .tox/
38 | .coverage
39 | .cache
40 | nosetests.xml
41 | coverage.xml
42 | 
43 | # Translations
44 | *.mo
45 | *.pot
46 | 
47 | # Django stuff:
48 | *.log
49 | 
50 | # Sphinx documentation
51 | docs/_build/
52 | 
53 | # PyBuilder
54 | target/
55 | 
56 | # pycharm
57 | .idea
58 | .idea/
59 | 


--------------------------------------------------------------------------------
/verum/__init__.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'Gabriel Bassett'
 2 | 
 3 | '''
 4 |  Copyright 2014 Gabriel Bassett
 5 | 
 6 |  LICENSE:
 7 | Licensed to the Apache Software Foundation (ASF) under one
 8 | or more contributor license agreements.  See the NOTICE file
 9 | distributed with this work for additional information
10 | regarding copyright ownership.  The ASF licenses this file
11 | to you under the Apache License, Version 2.0 (the
12 | "License"); you may not use this file except in compliance
13 | with the License.  You may obtain a copy of the License at
14 | 
15 |   http://www.apache.org/licenses/LICENSE-2.0
16 | 
17 | Unless required by applicable law or agreed to in writing,
18 | software distributed under the License is distributed on an
19 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
20 | KIND, either express or implied.  See the License for the
21 | specific language governing permissions and limitations
22 | under the License.
23 | '''
24 | 
25 | 
26 | __all__ = [ 'app',
27 |             'helper'
28 |           ]
29 | # Import the packages
30 | from app import app
31 | from helper import *
32 | 


--------------------------------------------------------------------------------
/ui.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 |  AUTHOR: Gabriel Bassett
 4 |  DATE: 12-17-2013
 5 |  DEPENDENCIES: a list of modules requiring installation
 6 |  Copyright 2014 Gabriel Bassett
 7 | 
 8 |  LICENSE:
 9 | Licensed to the Apache Software Foundation (ASF) under one
10 | or more contributor license agreements.  See the NOTICE file
11 | distributed with this work for additional information
12 | regarding copyright ownership.  The ASF licenses this file
13 | to you under the Apache License, Version 2.0 (the
14 | "License"); you may not use this file except in compliance
15 | with the License.  You may obtain a copy of the License at
16 | 
17 |   http://www.apache.org/licenses/LICENSE-2.0
18 | 
19 | Unless required by applicable law or agreed to in writing,
20 | software distributed under the License is distributed on an
21 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
22 | KIND, either express or implied.  See the License for the
23 | specific language governing permissions and limitations
24 | under the License.
25 | '''
26 | 
27 |  DESCRIPTION:
28 |  A description of the software
29 | 
30 | """
31 | # PRE-USER SETUP
32 | pass
33 | 
34 | ########### NOT USER EDITABLE ABOVE THIS POINT #################
35 | 
36 | 
37 | # USER VARIABLES
38 | NEODB = "http://192.168.121.134:7474/db/data"
39 | 
40 | 
41 | ########### NOT USER EDITABLE BELOW THIS POINT #################
42 | 
43 | 
44 | ## IMPORTS
45 | from py2neo import neo4j, cypher
46 | import networkx as nx
47 | import argparse
48 | import logging
49 | 
50 | ## SETUP
51 | __author__ = "Gabriel Bassett"
52 | # Parse Arguments (should correspond to user variables)
53 | parser = argparse.ArgumentParser(description='This script processes a graph.')
54 | parser.add_argument('-d', '--debug',
55 |                     help='Print lots of debugging statements',
56 |                     action="store_const", dest="loglevel", const=logging.DEBUG,
57 |                     default=logging.WARNING
58 |                    )
59 | parser.add_argument('-v', '--verbose',
60 |                     help='Be verbose',
61 |                     action="store_const", dest="loglevel", const=logging.INFO
62 |                    )
63 | parser.add_argument('--log', help='Location of log file', default=None)
64 | # <add arguments here>
65 | parser.add_argument('db', help='URL of the neo4j graph database', default=NEODB)
66 | #args = parser.parse_args()
67 | ## Set up Logging
68 | #if args.log is not None:
69 | #    logging.basicConfig(filename=args.log, level=args.loglevel)
70 | #else:
71 | #    logging.basicConfig(level=args.loglevel)
72 | # <add other setup here>
73 | # Connect to database
74 | G = neo4j.GraphDatabaseService(NEODB)
75 | g = nx.DiGraph()
76 | NEODB = args.db
77 | 
78 | 
79 | ## EXECUTION
80 | # TODO: load plugins
81 | # TODO: catalog enrichments and provide a way to run against all enrichments which take similar input
82 | 
83 | 
84 | 
85 | def main():
86 |     logging.info('Beginning main loop.')
87 | 
88 |     logging.info('Ending main loop.')
89 | 
90 | if __name__ == "__main__":
91 |     main()
92 | 


--------------------------------------------------------------------------------
/plugins/cymru_api.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # encoding: utf-8
  3 | """
  4 | cymru_api.py
  5 | """
  6 | #  From: https://gist.github.com/zakird/11196064
  7 | 
  8 | import sys
  9 | import os
 10 | import socket
 11 | import unittest
 12 | 
 13 | class CymruIPtoASNResult(object):
 14 |     def __init__(self, **kwargs):
 15 |         for k, v in kwargs.iteritems():
 16 |             setattr(self, k, v)
 17 | 
 18 |     def __str__(self):
 19 |         return "<CymruIPtoASNResult (%s)>" % self.ip_address
 20 | 
 21 |     __repr__ = __str__
 22 | 
 23 | class CymruIPtoASNService(object):
 24 |     URL = "whois.cymru.com"
 25 | 
 26 |     """Whois   Netcat          Action
 27 |             begin           enable bulk input mode          (netcat only)
 28 |             end             exit the whois/netcat client    (netcat only)
 29 |     -p      prefix          include matching prefix
 30 |     -q      noprefix        disable matching prefix (default)
 31 |     -c      countrycode     include matching country code
 32 |     -d      nocountrycode   disable country codes (default)
 33 |     -n      asname          include asnames (default)
 34 |     -o      noasname        disable asnames
 35 |     -r      registry        display matching registry
 36 |     -s      noregistry      disable registry display (default)
 37 |     -a      allocdate       enable allocation date
 38 |     -b      noallocdate     disable allocation date (default)
 39 |     -t      truncate        truncate asnames (default)
 40 |     -u      notruncate      do not truncate asnames
 41 |     -v      verbose         enable all flags (-c -r -p -a -u -a)
 42 |     -e      header          enable column headings (default)
 43 |     -f      noheader        disable column headings 
 44 |     -w      asnumber        include asnumber column (default)
 45 |     -x      noasnumber      disable asnumber column (will not work for IP mappings)
 46 |     -h      help            this help message"""
 47 | 
 48 |     def __init__(self):
 49 |         self.__socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 50 |         self.__socket.connect((self.URL, 43))
 51 | 
 52 |     def _gen_query(self, queries):
 53 |         lines = []
 54 |         lines.append("begin")
 55 |         lines.append("verbose")
 56 |         lines.extend(queries)
 57 |         lines.append("end\r\n")
 58 |         return "\n".join(lines)
 59 | 
 60 |     def _send_query(self, query):
 61 |         self.__socket.sendall(query)
 62 |         self.__socket.shutdown(socket.SHUT_WR)
 63 |         response = ''
 64 |         while True:
 65 |             r = self.__socket.recv(16)
 66 |             if r and r != '':
 67 |                 response = ''.join((response, r))
 68 |             else:
 69 |                 break
 70 |         return response
 71 | 
 72 |     LABELS = (
 73 |         'as_number',
 74 |         'ip_address',
 75 |         'bgp_prefix',
 76 |         'country',
 77 |         'registry',
 78 |         'allocated_at',
 79 |         'as_name'
 80 |     )
 81 | 
 82 |     def _parse_response(self, response):
 83 |         for line in response.split("\n"):
 84 |             if line.startswith("Bulk mode;") or line == '':
 85 |                 continue
 86 |             else:
 87 |                 clean = map(lambda v: v.rstrip().lstrip(), line.split('|'))
 88 |                 yield CymruIPtoASNResult(**dict(zip(self.LABELS, clean)))
 89 | 
 90 |     def query(self, queries):
 91 |         query = self._gen_query(queries)
 92 |         response = self._send_query(query)
 93 |         results = self._parse_response(response)
 94 |         for r in results:
 95 |             yield r
 96 | 
 97 |     def query_one(self, query):
 98 |         return list(self.query([query,]))[0]
 99 | 
100 | class CymruIptoASNServiceTests(unittest.TestCase):
101 |     def setUp(self):
102 |         self.service = CymruIPtoASNService()
103 | 
104 |     def testOne(self):
105 |         # expect the following:
106 |         # ['3676', '128.255.1.1', '128.255.0.0/16', 'US', 'arin', '1987-06-05',
107 |         # 'UIOWA-AS - University of Iowa']
108 |         r = self.service.query_one("128.255.1.1")
109 |         self.assertEquals(r.as_number, '3676')
110 |         self.assertEquals(r.ip_address, "128.255.1.1")
111 |         self.assertEquals(r.country, "US")
112 |         self.assertEquals(r.registry, "arin")
113 |         self.assertEquals(r.as_name, "UIOWA-AS - University of Iowa")
114 | 
115 |     def testMultiple(self):
116 |         rs = list(self.service.query(["128.255.1.1", "141.212.1.1"]))
117 |         self.assertEquals(rs[0].as_number, '3676')
118 |         self.assertEquals(rs[1].as_number, '36375')
119 | 
120 |     def testFailure(self):
121 |         pass
122 | 
123 | if __name__ == '__main__':
124 |     unittest.main()


--------------------------------------------------------------------------------
/plugins/classify.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | __author__ = "Gabriel Bassett"
  4 | """
  5 |  AUTHOR: {0}
  6 |  DATE: <DATE>
  7 |  DEPENDENCIES: <a list of modules requiring installation>
  8 |  Copyright <YEAR> {0}
  9 | 
 10 |  LICENSE:
 11 | Licensed to the Apache Software Foundation (ASF) under one
 12 | or more contributor license agreements.  See the NOTICE file
 13 | distributed with this work for additional information
 14 | regarding copyright ownership.  The ASF licenses this file
 15 | to you under the Apache License, Version 2.0 (the
 16 | "License"); you may not use this file except in compliance
 17 | with the License.  You may obtain a copy of the License at
 18 | 
 19 |   http://www.apache.org/licenses/LICENSE-2.0
 20 | 
 21 | Unless required by applicable law or agreed to in writing,
 22 | software distributed under the License is distributed on an
 23 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 24 | KIND, either express or implied.  See the License for the
 25 | specific language governing permissions and limitations
 26 | under the License.
 27 | 
 28 |  DESCRIPTION:
 29 |  <ENTER DESCRIPTION>
 30 | 
 31 | """.format(__author__)
 32 | # PRE-USER SETUP
 33 | pass
 34 | 
 35 | ########### NOT USER EDITABLE ABOVE THIS POINT #################
 36 | 
 37 | 
 38 | # USER VARIABLES
 39 | PLUGIN_CONFIG_FILE = "classify.yapsy-plugin"
 40 | NAME = "classify"
 41 | 
 42 | 
 43 | ########### NOT USER EDITABLE BELOW THIS POINT #################
 44 | 
 45 | 
 46 | ## IMPORTS
 47 | from yapsy.IPlugin import IPlugin
 48 | import logging
 49 | import networkx as nx
 50 | from datetime import datetime # timedelta imported above
 51 | import dateutil  # to parse variable time strings
 52 | import uuid
 53 | import ConfigParser
 54 | import inspect
 55 | try:
 56 |     import tldextract
 57 |     module_import_success = True
 58 | except:
 59 |     module_import_success = False
 60 |     logging.error("Module import failed.  Please install the following module: tldextract.")
 61 |     raise
 62 | 
 63 | 
 64 | ## SETUP
 65 | loc = inspect.getfile(inspect.currentframe())
 66 | ind = loc.rfind("/")
 67 | loc = loc[:ind+1]
 68 | config = ConfigParser.SafeConfigParser()
 69 | config.readfp(open(loc + PLUGIN_CONFIG_FILE))
 70 | 
 71 | if config.has_section('Core'):
 72 |     if 'name' in config.options('Core'):
 73 |         NAME = config.get('Core', 'name')
 74 | if config.has_section('Log'):
 75 |     if 'level' in config.options('Log'):
 76 |         LOGLEVEL = config.get('Log', 'level')
 77 |     if 'file' in config.options('Log'):
 78 |         LOGFILE = config.get('Log', 'file')
 79 | 
 80 | 
 81 | ## EXECUTION
 82 | class PluginOne(IPlugin):
 83 |     inputs = None
 84 | 
 85 |     def __init__(self):
 86 |         pass
 87 | 
 88 |     def configure(self):
 89 |         """
 90 | 
 91 |         :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)]
 92 |         """
 93 |         config_options = config.options("Configuration")
 94 | 
 95 |         if 'cost' in config_options:
 96 |             cost = config.get('Configuration', 'cost')
 97 |         else:
 98 |             cost = 9999
 99 |         if 'speed' in config_options:
100 |             speed = config.get('Configuration', 'speed')
101 |         else:
102 |             speed = 9999
103 | 
104 |         if config.has_section('Documentation') and 'description' in config.options('Documentation'):
105 |             description = config.get('Documentation', 'description')
106 |         else:
107 |             logging.error("'Description not in config file.")
108 |             return [None, False, NAME, None, cost, speed]
109 | 
110 |         if 'type' in config_options:
111 |             plugin_type = config.get('Configuration', 'type')
112 |         else:
113 |             logging.error("'Type' not specified in config file.")
114 |             return [None, False, NAME, description, None, cost, speed]
115 | 
116 |         if 'inputs' in config_options:
117 |             self.inputs = config.get('Configuration', 'Inputs')
118 |             self.inputs = [l.strip().lower() for l in self.inputs.split(",")]
119 |         else:
120 |             logging.error("No input types specified in config file.")
121 |             return [plugin_type, False, NAME, description, None, cost, speed]
122 | 
123 |         return [plugin_type, True, NAME, description, self.inputs, cost, speed]
124 | 
125 | 
126 |     def run(self, enrichment_dict, start_time="", confidence=1):
127 |         """ dict, str -> networkx MultiDiGraph
128 | 
129 |         :param enrichment_dict: a dictionary of the form {'key': <key of atomic to classify>, 'value':<value of atomic to classify>, 'classification':<classification of atomic>}
130 |         :param start_time: string in ISO 8601 combined date and time format (e.g. 2014-11-01T10:34Z) or datetime object.
131 |         :param include_subdomain: Boolean value.  Default False.  If true, subdomain will be returned in enrichment graph
132 |         :return: a networkx graph representing the sections of the domain
133 |         """
134 |         key = enrichment_dict['key']
135 |         value = enrichment_dict['value']
136 |         classification = enrichment_dict['classification']
137 | 
138 |         g = nx.MultiDiGraph()
139 | 
140 |         if type(start_time) is str:
141 |             try:
142 |                 time = dateutil.parser.parse(start_time).strftime("%Y-%m-%dT%H:%M:%SZ")
143 |             except:
144 |                 time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
145 |         elif type(start_time) is datetime:
146 |             time = start_time.strftime("%Y-%m-%dT%H:%M:%SZ")
147 |         else:
148 |             time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
149 | 
150 |         # Get or create target node
151 |         target_uri = "class=attribute&key={0}&value={1}".format(key, value)
152 |         g.add_node(target_uri, {
153 |             'class': 'attribute',
154 |             'key': key,
155 |             "value": value,
156 |             "start_time": time,
157 |             "uri": target_uri
158 |         })
159 | 
160 |         # Get or create classification node
161 |         classification_uri = "class=attribute&key={0}&value={1}".format("classification", classification)
162 |         g.add_node(classification_uri, {
163 |             'class': 'attribute',
164 |             'key': "classification",
165 |             "value": classification,
166 |             "start_time": time,
167 |             "uri": classification_uri
168 |         })
169 | 
170 | 
171 |         # Link target to classification
172 |         edge_attr = {
173 |             "relationship": "describedBy",
174 |             "start_time": time,
175 |             "origin": "classification",
176 |             "confidence": confidence
177 |         }
178 |         source_hash = uuid.uuid3(uuid.NAMESPACE_URL, target_uri)
179 |         dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, classification_uri)
180 |         edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash))
181 |         rel_chain = "relationship"
182 |         while rel_chain in edge_attr:
183 |             edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain])
184 |             rel_chain = edge_attr[rel_chain]
185 |         if "origin" in edge_attr:
186 |             edge_uri += "&{0}={1}".format("origin", edge_attr["origin"])
187 |         edge_attr["uri"] = edge_uri
188 |         g.add_edge(target_uri, classification_uri, edge_uri, edge_attr)
189 | 
190 |         return g


--------------------------------------------------------------------------------
/plugins/generic.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | __author__ = "Gabriel Bassett"
  4 | """
  5 |  AUTHOR: {0}
  6 |  DATE: <DATE>
  7 |  DEPENDENCIES: <a list of modules requiring installation>
  8 |  Copyright <YEAR> {0}
  9 | 
 10 |  LICENSE:
 11 | Licensed to the Apache Software Foundation (ASF) under one
 12 | or more contributor license agreements.  See the NOTICE file
 13 | distributed with this work for additional information
 14 | regarding copyright ownership.  The ASF licenses this file
 15 | to you under the Apache License, Version 2.0 (the
 16 | "License"); you may not use this file except in compliance
 17 | with the License.  You may obtain a copy of the License at
 18 | 
 19 |   http://www.apache.org/licenses/LICENSE-2.0
 20 | 
 21 | Unless required by applicable law or agreed to in writing,
 22 | software distributed under the License is distributed on an
 23 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 24 | KIND, either express or implied.  See the License for the
 25 | specific language governing permissions and limitations
 26 | under the License.
 27 | 
 28 |  DESCRIPTION:
 29 |  <ENTER DESCRIPTION>
 30 | 
 31 | """.format(__author__)
 32 | # PRE-USER SETUP
 33 | pass
 34 | 
 35 | ########### NOT USER EDITABLE ABOVE THIS POINT #################
 36 | 
 37 | 
 38 | # USER VARIABLES
 39 | PLUGIN_CONFIG_FILE = "generic.yapsy-plugin"
 40 | NAME = "generic"
 41 | 
 42 | 
 43 | ########### NOT USER EDITABLE BELOW THIS POINT #################
 44 | 
 45 | 
 46 | ## IMPORTS
 47 | from yapsy.IPlugin import IPlugin
 48 | import logging
 49 | import networkx as nx
 50 | from datetime import datetime # timedelta imported above
 51 | import dateutil  # to parse variable time strings
 52 | import uuid
 53 | import ConfigParser
 54 | import inspect
 55 | try:
 56 |     import tldextract
 57 |     module_import_success = True
 58 | except:
 59 |     module_import_success = False
 60 |     logging.error("Module import failed.  Please install the following module: tldextract.")
 61 |     raise
 62 | 
 63 | 
 64 | ## SETUP
 65 | loc = inspect.getfile(inspect.currentframe())
 66 | ind = loc.rfind("/")
 67 | loc = loc[:ind+1]
 68 | config = ConfigParser.SafeConfigParser()
 69 | config.readfp(open(loc + PLUGIN_CONFIG_FILE))
 70 | 
 71 | if config.has_section('Core'):
 72 |     if 'name' in config.options('Core'):
 73 |         NAME = config.get('Core', 'name')
 74 | if config.has_section('Log'):
 75 |     if 'level' in config.options('Log'):
 76 |         LOGLEVEL = config.get('Log', 'level')
 77 |     if 'file' in config.options('Log'):
 78 |         LOGFILE = config.get('Log', 'file')
 79 | 
 80 | 
 81 | ## EXECUTION
 82 | class PluginOne(IPlugin):
 83 |     inputs = None
 84 | 
 85 |     def __init__(self):
 86 |         pass
 87 | 
 88 | 
 89 |     def configure(self):
 90 |         """
 91 | 
 92 |         :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)]
 93 |         """
 94 |         config_options = config.options("Configuration")
 95 | 
 96 |         if 'cost' in config_options:
 97 |             cost = config.get('Configuration', 'cost')
 98 |         else:
 99 |             cost = 9999
100 |         if 'speed' in config_options:
101 |             speed = config.get('Configuration', 'speed')
102 |         else:
103 |             speed = 9999
104 | 
105 |         if config.has_section('Documentation') and 'description' in config.options('Documentation'):
106 |             description = config.get('Documentation', 'description')
107 |         else:
108 |             logging.error("'Description not in config file.")
109 |             return [None, False, NAME, None, cost, speed]
110 | 
111 |         if 'type' in config_options:
112 |             plugin_type = config.get('Configuration', 'type')
113 |         else:
114 |             logging.error("'Type' not specified in config file.")
115 |             return [None, False, NAME, description, None, cost, speed]
116 | 
117 |         if 'inputs' in config_options:
118 |             self.inputs = config.get('Configuration', 'Inputs')
119 |             self.inputs = [l.strip().lower() for l in self.inputs.split(",")]
120 |         else:
121 |             logging.error("No input types specified in config file.")
122 |             return [plugin_type, False, NAME, description, None, cost, speed]
123 | 
124 |         return [plugin_type, True, NAME, description, self.inputs, cost, speed]
125 | 
126 | 
127 |     def run(self, enrichment_dict, start_time="", confidence=1):
128 |         """ dict, str -> networkx multiDiGraph
129 | 
130 |         :param enrichment_dict: a dictionary of the form {'key': <key of atomic to describe>, 'value':<value of atomic to describe>, 'describing_key':<key of describing atomic>, 'describing_value':<value of describing atomic>}
131 |         :param start_time: string in ISO 8601 combined date and time format (e.g. 2014-11-01T10:34Z) or datetime object.
132 |         :param include_subdomain: Boolean value.  Default False.  If true, subdomain will be returned in enrichment graph
133 |         :return: a networkx graph representing the sections of the domain
134 |         """
135 |         described_key = enrichment_dict['key']
136 |         described_value = enrichment_dict['value']
137 |         describing_key = enrichment_dict['describing_key']
138 |         describing_value = enrichment_dict['describing_value']
139 | 
140 |         g = nx.MultiDiGraph()
141 | 
142 |         if type(start_time) is str:
143 |             try:
144 |                 time = dateutil.parser.parse(start_time).strftime("%Y-%m-%dT%H:%M:%SZ")
145 |             except:
146 |                 time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
147 |         elif type(start_time) is datetime:
148 |             time = start_time.strftime("%Y-%m-%dT%H:%M:%SZ")
149 |         else:
150 |             time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
151 | 
152 |         # Get or create target node
153 |         described_uri = "class=attribute&key={0}&value={1}".format(described_key, described_value)
154 |         g.add_node(described_uri, {
155 |             'class': 'attribute',
156 |             'key': described_key,
157 |             "value": described_value,
158 |             "start_time": time,
159 |             "uri": described_uri
160 |         })
161 | 
162 |         # Get or create classification node
163 |         describing_uri = "class=attribute&key={0}&value={1}".format(describing_key, describing_value)
164 |         g.add_node(describing_uri , {
165 |             'class': 'attribute',
166 |             'key': describing_key,
167 |             "value": describing_value,
168 |             "start_time": time,
169 |             "uri": describing_uri 
170 |         })
171 | 
172 | 
173 |         # Link target to classification
174 |         edge_attr = {
175 |             "relationship": "describedBy",
176 |             "start_time": time,
177 |             "origin": "generic",
178 |             "confidence": confidence
179 |         }
180 |         source_hash = uuid.uuid3(uuid.NAMESPACE_URL, described_uri)
181 |         dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, describing_uri )
182 |         edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash))
183 |         rel_chain = "relationship"
184 |         while rel_chain in edge_attr:
185 |             edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain])
186 |             rel_chain = edge_attr[rel_chain]
187 |         if "origin" in edge_attr:
188 |             edge_uri += "&{0}={1}".format("origin", edge_attr["origin"])
189 |         edge_attr["uri"] = edge_uri
190 |         g.add_edge(described_uri, describing_uri , edge_uri, edge_attr)
191 | 
192 |         return g


--------------------------------------------------------------------------------
/plugins/modularity.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | __author__ = "Gabriel Bassett"
  4 | """
  5 |  AUTHOR: {0}
  6 |  DATE: <DATE>
  7 |  DEPENDENCIES: <a list of modules requiring installation>
  8 |  Copyright <YEAR> {0}
  9 | 
 10 |  LICENSE:
 11 | Licensed to the Apache Software Foundation (ASF) under one
 12 | or more contributor license agreements.  See the NOTICE file
 13 | distributed with this work for additional information
 14 | regarding copyright ownership.  The ASF licenses this file
 15 | to you under the Apache License, Version 2.0 (the
 16 | "License"); you may not use this file except in compliance
 17 | with the License.  You may obtain a copy of the License at
 18 | 
 19 |   http://www.apache.org/licenses/LICENSE-2.0
 20 | 
 21 | Unless required by applicable law or agreed to in writing,
 22 | software distributed under the License is distributed on an
 23 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 24 | KIND, either express or implied.  See the License for the
 25 | specific language governing permissions and limitations
 26 | under the License.
 27 | 
 28 |  DESCRIPTION:
 29 |  <ENTER DESCRIPTION>
 30 | 
 31 | """.format(__author__)
 32 | # PRE-USER SETUP
 33 | pass
 34 | 
 35 | ########### NOT USER EDITABLE ABOVE THIS POINT #################
 36 | 
 37 | 
 38 | # USER VARIABLES
 39 | PLUGIN_CONFIG_FILE = "modularity.yapsy-plugin"
 40 | NAME = "Modularity"
 41 | 
 42 | 
 43 | ########### NOT USER EDITABLE BELOW THIS POINT #################
 44 | 
 45 | 
 46 | ## IMPORTS
 47 | from yapsy.IPlugin import IPlugin
 48 | import logging
 49 | import networkx as nx
 50 | from datetime import datetime # timedelta imported above
 51 | import uuid
 52 | import ConfigParser
 53 | import inspect
 54 | try:
 55 |     import community
 56 |     module_import_success = True
 57 | except:
 58 |     module_import_success = False
 59 | import numpy as np
 60 | 
 61 | ## SETUP
 62 | loc = inspect.getfile(inspect.currentframe())
 63 | ind = loc.rfind("/")
 64 | loc = loc[:ind+1]
 65 | config = ConfigParser.SafeConfigParser()
 66 | config.readfp(open(loc + PLUGIN_CONFIG_FILE))
 67 | 
 68 | if config.has_section('Core'):
 69 |     if 'name' in config.options('Core'):
 70 |         NAME = config.get('Core', 'name')
 71 | if config.has_section('Log'):
 72 |     if 'level' in config.options('Log'):
 73 |         LOGLEVEL = config.get('Log', 'level')
 74 |     if 'file' in config.options('Log'):
 75 |         LOGFILE = config.get('Log', 'file')
 76 | 
 77 | 
 78 | ## EXECUTION
 79 | class PluginOne(IPlugin):
 80 |     #  TODO: The init should contain anything to load modules or data files that should be variables of the  plugin object
 81 |     def __init__(self):
 82 |         pass
 83 | 
 84 |     #  TODO: Configuration needs to set the values needed to identify the plugin in the plugin database as well as ensure everyhing loaded correctly
 85 |     #  TODO: Current  layout is for an enrichment plugin
 86 |     #  TODO: enrichment [type, successful_load, name, description, inputs to enrichment such as 'ip', cost, speed]
 87 |     #  TODO: interface [type, successful_load, name]
 88 |     #  TODO: query [TBD]
 89 |     #  TODO: minion [TBD]
 90 |     def configure(self):
 91 |         """
 92 | 
 93 |         :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)]
 94 |         """
 95 |         config_options = config.options("Configuration")
 96 | 
 97 |         if 'cost' in config_options:
 98 |             cost = config.get('Configuration', 'cost')
 99 |         else:
100 |             cost = 9999
101 |         if 'speed' in config_options:
102 |             speed = config.get('Configuration', 'speed')
103 |         else:
104 |             speed = 9999
105 | 
106 |         if config.has_section('Documentation') and 'description' in config.options('Documentation'):
107 |             description = config.get('Documentation', 'description')
108 |         else:
109 |             logging.error("'Description not in config file.")
110 |             return [None, False, NAME, None, cost, speed]
111 | 
112 |         if 'type' in config_options:
113 |             plugin_type = config.get('Configuration', 'type')
114 |         else:
115 |             logging.error("'Type' not specified in config file.")
116 |             return [None, False, NAME, description, cost, speed]
117 | 
118 |         if not module_import_success:
119 |             logging.error("Module import failure caused configuration failure.")
120 |             return [plugin_type, False, NAME, description, cost, speed]
121 |         else:
122 |             return [plugin_type, True, NAME, description, cost, speed]
123 | 
124 | 
125 | 
126 |     def score(self, sg, *args, **xargs):  # get_modularity_cluster
127 |         """
128 | 
129 |         :param sg: subgraph
130 |         :return: A dictionary of the modularity scores of the nodes in the subgraph
131 |         """
132 |         # args/xargs collected so that passing a topic doesn't mess things up
133 | 
134 |         # Convert to diGraph
135 |         if sg.is_multigraph():
136 |             sg = self.multigraph_to_digraph(sg)
137 |         # Convert to undirected
138 |         sg = sg.to_undirected()
139 | 
140 |         return community.best_partition(sg)
141 | 
142 | 
143 |     def multigraph_to_digraph(self, g):
144 |         """
145 | 
146 |         :param g: takes a networkx mulitgraph
147 |         :return: returns a networkx digraph with edge weights representing the number of edges
148 | 
149 |         NOTE: This butchers duplicate edge properties.  If converting to score, use original edges in output.
150 |         """
151 |         G = nx.DiGraph()
152 |         edge_attributes = {}
153 | 
154 |         # if g isn't really a multigraph, just return it
155 |         if not g.is_multigraph():
156 |             return g
157 | 
158 |         # collapse down to a diagraph
159 |         G.add_nodes_from(g.nodes(data=True))
160 |         G.add_edges_from(g.edges(data=True))
161 | 
162 |         # for each edge, weight the confidence by the number of edges
163 |         '''
164 |         # captures a multiple of the confidence on the edge in the output graph
165 |         for edge in G.edges():
166 |             count = g.edges().count(edge)
167 |             if "count" > 1:
168 |                 if "confidence" in G.edge[edge[0]][edge[1]]:
169 |                     G.edge[edge[0]][edge[1]]['confidence'] *= count
170 |                 else:
171 |                     G.edge[edge[0]][edge[1]]["confidence"] = count
172 |         '''
173 |         # Captures every confidence
174 |         for edge in G.edges():
175 |             confidence = 0
176 |             for src_edge in g.edge[edge[0]][edge[1]].values():
177 |                 confidence += src_edge.get('confidence', 1)
178 |             G.edge[edge[0]][edge[1]]['confidence'] = confidence
179 |     #    # collapse down to a diagraph
180 |     #    G.add_nodes_from(g.nodes(data=True))
181 |     #    G.add_edges_from(g.edges(data=True))
182 | 
183 |         return G
184 | 
185 | 
186 |     ### DISTANCE WEIGHTS ###
187 |     def linear_weight(self, distance, ddp=.2):
188 |         """
189 | 
190 |         :param distance: distance from topic
191 |         :param ddp: percentage to degrade
192 |         :return: Linear weighting factor as float
193 |         """
194 |         return 1 - (distance * ddp)
195 | 
196 | 
197 |     def log_weight(self, distance, a=1, b=1, n=3, pwr=1):
198 |         """
199 | 
200 |         :param distance: distance: distance from topic
201 |         :param a: constant to shape graph. Adjusts hight at 0 = a / (1 + b)
202 |         :param b: constant to shape graph.
203 |         :param n: constant to shape graph.
204 |         :param pwr: constant to shape graph.
205 |         :return: log weighting factor as float
206 |         """
207 |         return a / (1 + b*np.exp((distance-n) * pwr))
208 | 
209 | 
210 |     def exponential_weight(self, distance, b=2):
211 |         return np.exp(-distance/b)
212 | 
213 | 
214 |     def normal_weight(self, distance, pwr=2, a=1.1, b=10, c=1):
215 |         """
216 | 
217 |         :param distance: distance from topic
218 |         :param pwr: constant to shape graph.  Higher = steeper decline
219 |         :param b: constant to shape graph.  lower = greater spread
220 |         :return: normal weighting factor as float
221 |         pwr = 2.5, a = 1, c = 0, b = 30
222 |         """
223 |         return a * np.exp(-(distance + c)**pwr/b)


--------------------------------------------------------------------------------
/plugins/page_rank.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | __author__ = "Gabriel Bassett"
  4 | """
  5 |  AUTHOR: {0}
  6 |  DATE: <DATE>
  7 |  DEPENDENCIES: <a list of modules requiring installation>
  8 |  Copyright <YEAR> {0}
  9 | 
 10 |  LICENSE:
 11 | Licensed to the Apache Software Foundation (ASF) under one
 12 | or more contributor license agreements.  See the NOTICE file
 13 | distributed with this work for additional information
 14 | regarding copyright ownership.  The ASF licenses this file
 15 | to you under the Apache License, Version 2.0 (the
 16 | "License"); you may not use this file except in compliance
 17 | with the License.  You may obtain a copy of the License at
 18 | 
 19 |   http://www.apache.org/licenses/LICENSE-2.0
 20 | 
 21 | Unless required by applicable law or agreed to in writing,
 22 | software distributed under the License is distributed on an
 23 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 24 | KIND, either express or implied.  See the License for the
 25 | specific language governing permissions and limitations
 26 | under the License.
 27 | 
 28 |  DESCRIPTION:
 29 |  <ENTER DESCRIPTION>
 30 | 
 31 | """.format(__author__)
 32 | # PRE-USER SETUP
 33 | pass
 34 | 
 35 | ########### NOT USER EDITABLE ABOVE THIS POINT #################
 36 | 
 37 | 
 38 | # USER VARIABLES
 39 | PLUGIN_CONFIG_FILE = "page_rank.yapsy-plugin"
 40 | NAME = "PageRank"
 41 | 
 42 | 
 43 | ########### NOT USER EDITABLE BELOW THIS POINT #################
 44 | 
 45 | 
 46 | ## IMPORTS
 47 | from yapsy.IPlugin import IPlugin
 48 | import logging
 49 | import networkx as nx
 50 | from datetime import datetime # timedelta imported above
 51 | import uuid
 52 | import ConfigParser
 53 | import inspect
 54 | import numpy as np
 55 | 
 56 | 
 57 | ## SETUP
 58 | loc = inspect.getfile(inspect.currentframe())
 59 | ind = loc.rfind("/")
 60 | loc = loc[:ind+1]
 61 | config = ConfigParser.SafeConfigParser()
 62 | config.readfp(open(loc + PLUGIN_CONFIG_FILE))
 63 | 
 64 | if config.has_section('Core'):
 65 |     if 'name' in config.options('Core'):
 66 |         NAME = config.get('Core', 'name')
 67 | if config.has_section('Log'):
 68 |     if 'level' in config.options('Log'):
 69 |         LOGLEVEL = config.get('Log', 'level')
 70 |     if 'file' in config.options('Log'):
 71 |         LOGFILE = config.get('Log', 'file')
 72 | 
 73 | 
 74 | ## EXECUTION
 75 | class PluginOne(IPlugin):
 76 |     #  TODO: The init should contain anything to load modules or data files that should be variables of the  plugin object
 77 |     def __init__(self):
 78 |         pass
 79 | 
 80 |     #  TODO: Configuration needs to set the values needed to identify the plugin in the plugin database as well as ensure everyhing loaded correctly
 81 |     #  TODO: Current  layout is for an enrichment plugin
 82 |     #  TODO: enrichment [type, successful_load, name, description, inputs to enrichment such as 'ip', cost, speed]
 83 |     #  TODO: interface [type, successful_load, name]
 84 |     #  TODO: query [TBD]
 85 |     #  TODO: minion [TBD]
 86 |     def configure(self):
 87 |         """
 88 | 
 89 |         :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)]
 90 |         """
 91 |         config_options = config.options("Configuration")
 92 | 
 93 |         if 'cost' in config_options:
 94 |             cost = config.get('Configuration', 'cost')
 95 |         else:
 96 |             cost = 9999
 97 |         if 'speed' in config_options:
 98 |             speed = config.get('Configuration', 'speed')
 99 |         else:
100 |             speed = 9999
101 | 
102 |         if config.has_section('Documentation') and 'description' in config.options('Documentation'):
103 |             description = config.get('Documentation', 'description')
104 |         else:
105 |             logging.error("'Description not in config file.")
106 |             return [None, False, NAME, None, cost, speed]
107 | 
108 |         if 'type' in config_options:
109 |             plugin_type = config.get('Configuration', 'type')
110 |         else:
111 |             logging.error("'Type' not specified in config file.")
112 |             return [None, False, NAME, description, cost, speed]
113 | 
114 |         return [plugin_type, True, NAME, description, cost, speed]
115 | 
116 | 
117 |     def score(self, sg, *args, **xargs):  # get_pagerank_probability
118 |         """
119 | 
120 |         :param sg: egocentric subgraph around topic in networkx format
121 |         :param distance_degradation: A factor for degrading as distance from the topic increases
122 |         :return: Dictionary of probabilities keyed by node
123 |         """
124 |         # convert to digraph if needed
125 |         if sg.is_multigraph():
126 |             sg = self.multigraph_to_digraph(sg)
127 | 
128 |         personalized = {}
129 |         for node in sg.nodes():
130 |     #        personalized[node] = linear_weight(sg.node[node]['topic_distance'], distance_degradation)
131 |             # INSERT WEIGHTING FUNCTION BELOW
132 |             personalized[node] = self.exponential_weight(sg.node[node]['topic_distance'])
133 | 
134 |         # return the pagerank scores
135 |         return nx.pagerank(sg, personalization=personalized, weight='confidence')
136 | 
137 | 
138 |     def multigraph_to_digraph(self, g):
139 |         """
140 | 
141 |         :param g: takes a networkx mulitgraph
142 |         :return: returns a networkx digraph with edge weights representing the number of edges
143 | 
144 |         NOTE: This butchers duplicate edge properties.  If converting to score, use original edges in output.
145 |         """
146 |         G = nx.DiGraph()
147 |         edge_attributes = {}
148 | 
149 |         # if g isn't really a multigraph, just return it
150 |         if not g.is_multigraph():
151 |             return g
152 | 
153 |         # collapse down to a diagraph
154 |         G.add_nodes_from(g.nodes(data=True))
155 |         G.add_edges_from(g.edges(data=True))
156 | 
157 |         # for each edge, weight the confidence by the number of edges
158 |         '''
159 |         # captures a multiple of the confidence on the edge in the output graph
160 |         for edge in G.edges():
161 |             count = g.edges().count(edge)
162 |             if "count" > 1:
163 |                 if "confidence" in G.edge[edge[0]][edge[1]]:
164 |                     G.edge[edge[0]][edge[1]]['confidence'] *= count
165 |                 else:
166 |                     G.edge[edge[0]][edge[1]]["confidence"] = count
167 |         '''
168 |         # Captures every confidence
169 |         for edge in G.edges():
170 |             confidence = 0
171 |             for src_edge in g.edge[edge[0]][edge[1]].values():
172 |                 confidence += src_edge.get('confidence', 1)
173 |             G.edge[edge[0]][edge[1]]['confidence'] = confidence
174 |     #    # collapse down to a diagraph
175 |     #    G.add_nodes_from(g.nodes(data=True))
176 |     #    G.add_edges_from(g.edges(data=True))
177 | 
178 |         return G
179 | 
180 | 
181 |     ### DISTANCE WEIGHTS ###
182 |     def linear_weight(self, distance, ddp=.2):
183 |         """
184 | 
185 |         :param distance: distance from topic
186 |         :param ddp: percentage to degrade
187 |         :return: Linear weighting factor as float
188 |         """
189 |         return 1 - (distance * ddp)
190 | 
191 | 
192 |     def log_weight(self, distance, a=1, b=1, n=3, pwr=1):
193 |         """
194 | 
195 |         :param distance: distance: distance from topic
196 |         :param a: constant to shape graph. Adjusts hight at 0 = a / (1 + b)
197 |         :param b: constant to shape graph.
198 |         :param n: constant to shape graph.
199 |         :param pwr: constant to shape graph.
200 |         :return: log weighting factor as float
201 |         """
202 |         return a / (1 + b*np.exp((distance-n) * pwr))
203 | 
204 | 
205 |     def exponential_weight(self, distance, b=2):
206 |         return np.exp(-distance/b)
207 | 
208 | 
209 |     def normal_weight(self, distance, pwr=2, a=1.1, b=10, c=1):
210 |         """
211 | 
212 |         :param distance: distance from topic
213 |         :param pwr: constant to shape graph.  Higher = steeper decline
214 |         :param b: constant to shape graph.  lower = greater spread
215 |         :return: normal weighting factor as float
216 |         pwr = 2.5, a = 1, c = 0, b = 30
217 |         """
218 |         return a * np.exp(-(distance + c)**pwr/b)


--------------------------------------------------------------------------------
/plugins/page_rank_2.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | __author__ = "Gabriel Bassett"
  4 | """
  5 |  AUTHOR: {0}
  6 |  DATE: <DATE>
  7 |  DEPENDENCIES: <a list of modules requiring installation>
  8 |  Copyright <YEAR> {0}
  9 | 
 10 |  LICENSE:
 11 | Licensed to the Apache Software Foundation (ASF) under one
 12 | or more contributor license agreements.  See the NOTICE file
 13 | distributed with this work for additional information
 14 | regarding copyright ownership.  The ASF licenses this file
 15 | to you under the Apache License, Version 2.0 (the
 16 | "License"); you may not use this file except in compliance
 17 | with the License.  You may obtain a copy of the License at
 18 | 
 19 |   http://www.apache.org/licenses/LICENSE-2.0
 20 | 
 21 | Unless required by applicable law or agreed to in writing,
 22 | software distributed under the License is distributed on an
 23 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 24 | KIND, either express or implied.  See the License for the
 25 | specific language governing permissions and limitations
 26 | under the License.
 27 | 
 28 |  DESCRIPTION:
 29 |  <ENTER DESCRIPTION>
 30 | 
 31 | """.format(__author__)
 32 | # PRE-USER SETUP
 33 | pass
 34 | 
 35 | ########### NOT USER EDITABLE ABOVE THIS POINT #################
 36 | 
 37 | 
 38 | # USER VARIABLES
 39 | PLUGIN_CONFIG_FILE = "page_rank_2.yapsy-plugin"
 40 | NAME = "PageRank2"
 41 | 
 42 | 
 43 | ########### NOT USER EDITABLE BELOW THIS POINT #################
 44 | 
 45 | 
 46 | ## IMPORTS
 47 | from yapsy.IPlugin import IPlugin
 48 | import logging
 49 | import networkx as nx
 50 | from datetime import datetime # timedelta imported above
 51 | import uuid
 52 | import ConfigParser
 53 | import inspect
 54 | 
 55 | 
 56 | ## SETUP
 57 | loc = inspect.getfile(inspect.currentframe())
 58 | ind = loc.rfind("/")
 59 | loc = loc[:ind+1]
 60 | config = ConfigParser.SafeConfigParser()
 61 | config.readfp(open(loc + PLUGIN_CONFIG_FILE))
 62 | 
 63 | if config.has_section('Core'):
 64 |     if 'name' in config.options('Core'):
 65 |         NAME = config.get('Core', 'name')
 66 | if config.has_section('Log'):
 67 |     if 'level' in config.options('Log'):
 68 |         LOGLEVEL = config.get('Log', 'level')
 69 |     if 'file' in config.options('Log'):
 70 |         LOGFILE = config.get('Log', 'file')
 71 | 
 72 | 
 73 | ## EXECUTION
 74 | class PluginOne(IPlugin):
 75 |     #  TODO: The init should contain anything to load modules or data files that should be variables of the  plugin object
 76 |     def __init__(self):
 77 |         pass
 78 | 
 79 |     #  TODO: Configuration needs to set the values needed to identify the plugin in the plugin database as well as ensure everyhing loaded correctly
 80 |     #  TODO: Current  layout is for an enrichment plugin
 81 |     #  TODO: enrichment [type, successful_load, name, description, inputs to enrichment such as 'ip', cost, speed]
 82 |     #  TODO: interface [type, successful_load, name]
 83 |     #  TODO: query [TBD]
 84 |     #  TODO: minion [TBD]
 85 |     def configure(self):
 86 |         """
 87 | 
 88 |         :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)]
 89 |         """
 90 |         config_options = config.options("Configuration")
 91 | 
 92 |         if 'cost' in config_options:
 93 |             cost = config.get('Configuration', 'cost')
 94 |         else:
 95 |             cost = 9999
 96 |         if 'speed' in config_options:
 97 |             speed = config.get('Configuration', 'speed')
 98 |         else:
 99 |             speed = 9999
100 | 
101 |         if config.has_section('Documentation') and 'description' in config.options('Documentation'):
102 |             description = config.get('Documentation', 'description')
103 |         else:
104 |             logging.error("'Description not in config file.")
105 |             return [None, False, NAME, None, cost, speed]
106 | 
107 |         if 'type' in config_options:
108 |             plugin_type = config.get('Configuration', 'type')
109 |         else:
110 |             logging.error("'Type' not specified in config file.")
111 |             return [None, False, NAME, description, cost, speed]
112 | 
113 |         return [plugin_type, True, NAME, description, cost, speed]
114 | 
115 | 
116 |     def score(self, sg, topic, personalization=None):  # get_pagerank_probability_2
117 |         """
118 | 
119 |         :param sg: egocentric subgraph around topic in networkx format
120 |         :param topic: A factor for degrading as distance from the topic increases
121 |         :param personalization: Dictionary with key of a node and value of a node weight.  If none specified, defaults to the linear weight of the 'topic_distance' feature of the nodes.  The topic_distance is the topic for which the subgraph was generated.
122 |         :return: Dictionary of probabilities keyed by node
123 |         """
124 |         if sg.is_multigraph():
125 |             sg = self.multigraph_to_digraph(sg)
126 | 
127 |         if personalization == None:
128 |             personalization = {}
129 |             for node in sg.nodes():
130 |         #        personalized[node] = linear_weight(sg.node[node]['topic_distance'], distance_degradation)
131 |                 # INSERT WEIGHTING FUNCTION BELOW
132 |                 personalization[node] = self.linear_weight(sg.node[node]['topic_distance'])
133 | 
134 |         # Build topic weights to start topic with all weight and always jump to topic
135 | 
136 |         topic_weight = 1/float(len(topic.nodes()))
137 |         topic_weighted = {k if 1 else k: topic_weight if k in topic.nodes() else 0 for k in sg.nodes()}
138 | 
139 |         # return the pagerank scores
140 |         return nx.pagerank(sg,
141 |                            personalization=personalization,
142 |                            weight='confidence',
143 |                            nstart=topic_weighted,
144 |                            dangling=topic_weighted)
145 | 
146 | 
147 |     def multigraph_to_digraph(self, g):
148 |         """
149 | 
150 |         :param g: takes a networkx mulitgraph
151 |         :return: returns a networkx digraph with edge weights representing the number of edges
152 | 
153 |         NOTE: This butchers duplicate edge properties.  If converting to score, use original edges in output.
154 |         """
155 |         G = nx.DiGraph()
156 |         edge_attributes = {}
157 | 
158 |         # if g isn't really a multigraph, just return it
159 |         if not g.is_multigraph():
160 |             return g
161 | 
162 |         # collapse down to a diagraph
163 |         G.add_nodes_from(g.nodes(data=True))
164 |         G.add_edges_from(g.edges(data=True))
165 | 
166 |         # for each edge, weight the confidence by the number of edges
167 |         '''
168 |         # captures a multiple of the confidence on the edge in the output graph
169 |         for edge in G.edges():
170 |             count = g.edges().count(edge)
171 |             if "count" > 1:
172 |                 if "confidence" in G.edge[edge[0]][edge[1]]:
173 |                     G.edge[edge[0]][edge[1]]['confidence'] *= count
174 |                 else:
175 |                     G.edge[edge[0]][edge[1]]["confidence"] = count
176 |         '''
177 |         # Captures every confidence
178 |         for edge in G.edges():
179 |             confidence = 0
180 |             for src_edge in g.edge[edge[0]][edge[1]].values():
181 |                 confidence += src_edge.get('confidence', 1)
182 |             G.edge[edge[0]][edge[1]]['confidence'] = confidence
183 |     #    # collapse down to a diagraph
184 |     #    G.add_nodes_from(g.nodes(data=True))
185 |     #    G.add_edges_from(g.edges(data=True))
186 | 
187 |         return G
188 | 
189 | 
190 |     ### DISTANCE WEIGHTS ###
191 |     def linear_weight(self, distance, ddp=.2):
192 |         """
193 | 
194 |         :param distance: distance from topic
195 |         :param ddp: percentage to degrade
196 |         :return: Linear weighting factor as float
197 |         """
198 |         return 1 - (distance * ddp)
199 | 
200 | 
201 |     def log_weight(self, distance, a=1, b=1, n=3, pwr=1):
202 |         """
203 | 
204 |         :param distance: distance: distance from topic
205 |         :param a: constant to shape graph. Adjusts hight at 0 = a / (1 + b)
206 |         :param b: constant to shape graph.
207 |         :param n: constant to shape graph.
208 |         :param pwr: constant to shape graph.
209 |         :return: log weighting factor as float
210 |         """
211 |         return a / (1 + b*np.exp((distance-n) * pwr))
212 | 
213 | 
214 |     def exponential_weight(self, distance, b=2):
215 |         return np.exp(-distance/b)
216 | 
217 | 
218 |     def normal_weight(self, distance, pwr=2, a=1.1, b=10, c=1):
219 |         """
220 | 
221 |         :param distance: distance from topic
222 |         :param pwr: constant to shape graph.  Higher = steeper decline
223 |         :param b: constant to shape graph.  lower = greater spread
224 |         :return: normal weighting factor as float
225 |         pwr = 2.5, a = 1, c = 0, b = 30
226 |         """
227 |         return a * np.exp(-(distance + c)**pwr/b)


--------------------------------------------------------------------------------
/plugins/networkx.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | __author__ = "Gabriel Bassett"
  4 | """
  5 |  AUTHOR: {0}
  6 |  DATE: <DATE>
  7 |  DEPENDENCIES: <a list of modules requiring installation>
  8 |  Copyright <YEAR> {0}
  9 | 
 10 |  LICENSE:
 11 | Licensed to the Apache Software Foundation (ASF) under one
 12 | or more contributor license agreements.  See the NOTICE file
 13 | distributed with this work for additional information
 14 | regarding copyright ownership.  The ASF licenses this file
 15 | to you under the Apache License, Version 2.0 (the
 16 | "License"); you may not use this file except in compliance
 17 | with the License.  You may obtain a copy of the License at
 18 | 
 19 |   http://www.apache.org/licenses/LICENSE-2.0
 20 | 
 21 | Unless required by applicable law or agreed to in writing,
 22 | software distributed under the License is distributed on an
 23 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 24 | KIND, either express or implied.  See the License for the
 25 | specific language governing permissions and limitations
 26 | under the License.
 27 | 
 28 |  DESCRIPTION:
 29 |  <ENTER DESCRIPTION>
 30 | 
 31 | """.format(__author__)
 32 | # PRE-USER SETUP
 33 | pass
 34 | 
 35 | ########### NOT USER EDITABLE ABOVE THIS POINT #################
 36 | 
 37 | 
 38 | # USER VARIABLES
 39 | NX_CONFIG_FILE = "networkx.yapsy-plugin"
 40 | NAME = "Networkx Interface"
 41 | 
 42 | 
 43 | ########### NOT USER EDITABLE BELOW THIS POINT #################
 44 | 
 45 | 
 46 | ## IMPORTS
 47 | from yapsy.IPlugin import IPlugin
 48 | import logging
 49 | import networkx as nx
 50 | from datetime import datetime # timedelta imported above
 51 | import uuid
 52 | import ConfigParser
 53 | import inspect
 54 | import os.path
 55 | 
 56 | 
 57 | ## SETUP
 58 | loc = inspect.getfile(inspect.currentframe())
 59 | ind = loc.rfind("/")
 60 | loc = loc[:ind+1]
 61 | config = ConfigParser.SafeConfigParser()
 62 | config.readfp(open(loc + NX_CONFIG_FILE))
 63 | 
 64 | if config.has_section('Core'):
 65 |     if 'name' in config.options('Core'):
 66 |         NAME = config.get('Core', 'name')
 67 | if config.has_section('Log'):
 68 |     if 'level' in config.options('Log'):
 69 |         LOGLEVEL = config.get('Log', 'level')
 70 |     if 'file' in config.options('Log'):
 71 |         LOGFILE = config.get('Log', 'file')
 72 | 
 73 | ## EXECUTION
 74 | class PluginOne(IPlugin):
 75 |     context_graph = nx.MultiDiGraph()
 76 |     context_graph_file = None
 77 | 
 78 |     def __init__(self):
 79 |         if 'context_graph_file' in config.options("Configuration"):
 80 |             self.context_graph_file = config.get('Configuration', 'context_graph_file')
 81 | 
 82 | 
 83 |     def configure(self):
 84 |         """
 85 | 
 86 |         :return: return list of [type, successful_load, name]
 87 |         """
 88 |         config_options = config.options("Configuration")
 89 | 
 90 |         if os.path.isfile(self.context_graph_file):
 91 |             try:
 92 |                 self.context_graph = self.read_graph(self.context_graph_file) 
 93 |             except:
 94 |                 pass
 95 |         else:
 96 |             logging.info("Networkx file not for import.")
 97 | 
 98 |         if 'type' in config_options:
 99 |             plugin_type = config.get('Configuration', 'type')
100 |         else:
101 |             logging.error("'Type' not specified in config file.")
102 |             return [None, False, NAME]
103 | 
104 |         return [plugin_type, True, NAME]
105 | 
106 | 
107 |     def enrich(self, g):  # Networkx
108 |         """
109 | 
110 |         :param g: networkx graph to be merged
111 |         :return: Nonetype
112 | 
113 |         Note: Neo4j operates differently from the current titan import.  The neo4j import does not aggregate edges which
114 |                means they must be handled at query time.  The current titan algorithm aggregates edges based on time on
115 |                merge.
116 |         """
117 |         for uri, data in g.nodes(data=True):
118 |         # For each node:
119 |             # Get node by URI
120 |             # (should we double check the the class/key/value match?)
121 |             # If it exists in the receiving graph, going to need to merge properties (replacing with newer)
122 |             if uri in self.context_graph.nodes():
123 |                 self.context_graph.node[uri].update(data)
124 |             else:
125 |                 self.context_graph.add_node(uri, attr_dict=data)
126 |         # For each edge:
127 |         for edge in g.edges(data=True):
128 |             # Add it
129 |             self.context_graph.add_edge(edge[0], edge[1], attr_dict=data)
130 | 
131 | 
132 |     def query(self, topic, max_depth=4, config=None, dont_follow=['enrichment', 'classification']):
133 |         """
134 |             :param topic: a  graph to return the context of.  At least one node ID in topic \
135 |              must be in full graph g to return any context.
136 |             :param max_depth: The maximum distance from the topic to search
137 |             :param config: The titanDB configuration to use if not using the one configured with the plugin
138 |             :param dont_follow: A list of attribute types to not follow
139 |             :return: subgraph in networkx format
140 |         """
141 |         distances = dict()
142 | 
143 |         if config is None:
144 |             config = self.context_graph
145 | 
146 |         # Conver topic from a graph into a set of nodes
147 |         topic_nodes = set()
148 |         for n, d in topic.nodes(data=True):
149 |             topic_nodes.add("class={0}&key={1}&value={2}".format(d['class'], d['key'], d['value']))
150 | 
151 |         nodes = topic_nodes.copy()
152 | 
153 |         for t in topic:
154 |             # get all nodes within max_depth distance from each topic and add them to the set
155 |             new_distances = nx.single_source_shortest_path_length(self.context_graph.to_undirected(), t, cutoff=max_depth)
156 |             nodes = nodes.union(set(new_distances.keys()))
157 | 
158 |             # Update shortest distances from topic to node
159 |             for n in new_distances.keys():
160 |                 if n in distances:
161 |                     if new_distances[n] < distances[n]:
162 |                         distances[n] = new_distances[n]
163 |                 else:
164 |                     distances[n] = new_distances[n]
165 | 
166 |         # remove dont_follow nodes:
167 |         nodes_to_remove = set()
168 |         for n in nodes:
169 |             if self.context_graph.node[n]['key'] in dont_follow:
170 |                 nodes_to_remove.add(n)
171 |         nodes = nodes.difference(nodes_to_remove)
172 | 
173 |         # Get the subgraph represented by the nodes:
174 |         g = nx.MultiDiGraph(self.context_graph.subgraph(nodes))
175 | 
176 |         # Prune out non-relevant components by removing those that contain no topic nodes.
177 |         #  This gets ride of nodes that were found by following dont_follow nodes
178 |         for component in nx.connected_components(g.to_undirected()):
179 |             if len(topic_nodes.intersection(set(component))) <= 0:  # if there's no overlap betweent the component and topic
180 |                 g.remove_nodes_from(component)  # remove the component
181 | 
182 |         # add the topic distances to the subgraph
183 |         for n in g.nodes():
184 |             g.node[n]['topic_distance'] = distances[n]
185 | 
186 |         return g
187 | 
188 | 
189 |     def get_graph(self):
190 |         return self.context_graph
191 | 
192 | 
193 |     def write_graph(self, G=None, subgraph_file=None):
194 |         if G is None:
195 |             G = self.context_graph
196 |         if subgraph_file is None:
197 |             subgraph_file = self.context_graph_file
198 |         logging.info("Writing graph.")
199 |         # write the graph out
200 |         file_format = subgraph_file.split(".")[-1]
201 |         if file_format == "graphml":
202 |             nx.write_graphml(G, subgraph_file)
203 |         elif file_format == "gml":
204 |             nx.write_gml(G, subgraph_file)
205 |         elif file_format == "gexf":
206 |             nx.write_gexf(G, subgraph_file)
207 |         elif file_format == "net":
208 |             nx.write_pajek(G, subgraph_file)
209 |         elif file_format == "yaml":
210 |             nx.write_yaml(G, subgraph_file)
211 |         elif file_format == "gpickle":
212 |             nx.write_gpickle(G, subgraph_file)
213 |         else:
214 |             print "File format not found, writing graphml."
215 |             nx.write_graphml(G, subgraph_file)
216 | 
217 |     def read_graph(self, subgraph_file=None):
218 |         if subgraph_file is None:
219 |             subraph_file = self.context_graph_file
220 |         logging.info("Writing graph.")
221 |         # write the graph out
222 |         file_format = subgraph_file.split(".")[-1]
223 |         if file_format == "graphml":
224 |             return nx.read_graphml(subgraph_file)
225 |         elif file_format == "gml":
226 |             return nx.read_gml(subgraph_file)
227 |         elif file_format == "gexf":
228 |             return nx.read_gexf(subgraph_file)
229 |         elif file_format == "net":
230 |             return nx.read_pajek(subgraph_file)
231 |         elif file_format == "yaml":
232 |             return nx.read_yaml(subgraph_file)
233 |         elif file_format == "gpickle":
234 |             return nx.read_gpickle(subgraph_file)
235 |         else:
236 |             logging.warning("File format not found, returning empty graph.")
237 |         return nx.MultiDiGraph()


--------------------------------------------------------------------------------
/plugins/dns.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 |  AUTHOR: Gabriel Bassett
  4 |  DATE: 11-22-2014
  5 |  DEPENDENCIES: a list of modules requiring installation
  6 |  Copyright 2014 Gabriel Bassett
  7 | 
  8 |  LICENSE:
  9 | Licensed to the Apache Software Foundation (ASF) under one
 10 | or more contributor license agreements.  See the NOTICE file
 11 | distributed with this work for additional information
 12 | regarding copyright ownership.  The ASF licenses this file
 13 | to you under the Apache License, Version 2.0 (the
 14 | "License"); you may not use this file except in compliance
 15 | with the License.  You may obtain a copy of the License at
 16 | 
 17 |   http://www.apache.org/licenses/LICENSE-2.0
 18 | 
 19 | Unless required by applicable law or agreed to in writing,
 20 | software distributed under the License is distributed on an
 21 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 22 | KIND, either express or implied.  See the License for the
 23 | specific language governing permissions and limitations
 24 | under the License.
 25 | 
 26 |  DESCRIPTION:
 27 |  Functions necessary to enrich the context graph
 28 | 
 29 | """
 30 | # PRE-USER SETUP
 31 | pass
 32 | 
 33 | ########### NOT USER EDITABLE ABOVE THIS POINT #################
 34 | 
 35 | 
 36 | # USER VARIABLES
 37 | DNS_CONFIG_FILE = "dns.yapsy-plugin"
 38 | NAME = "DNS Enrichment"
 39 | 
 40 | 
 41 | ########### NOT USER EDITABLE BELOW THIS POINT #################
 42 | 
 43 | ## IMPORTS
 44 | from yapsy.IPlugin import IPlugin
 45 | import networkx as nx
 46 | from datetime import datetime
 47 | import dateutil  # to parse variable time strings
 48 | import socket
 49 | import uuid
 50 | import ConfigParser
 51 | import logging
 52 | import inspect
 53 | try:
 54 |     import dns.resolver
 55 |     resolver_import = True
 56 | except:
 57 |     resolver_import = False
 58 | 
 59 | ## SETUP
 60 | __author__ = "Gabriel Bassett"
 61 | loc = inspect.getfile(inspect.currentframe())
 62 | ind = loc.rfind("/")
 63 | loc = loc[:ind+1]
 64 | config = ConfigParser.SafeConfigParser()
 65 | config.readfp(open(loc + DNS_CONFIG_FILE))
 66 | 
 67 | if config.has_section('Core'):
 68 |     if 'name' in config.options('Core'):
 69 |         NAME = config.get('Core', 'name')
 70 | 
 71 | ## EXECUTION
 72 | class PluginOne(IPlugin):
 73 |     def __init__(self):
 74 |         pass
 75 | 
 76 |     def configure(self):
 77 |         """
 78 | 
 79 |         :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)]
 80 |         """
 81 |         config_options = config.options("Configuration")
 82 | 
 83 |         if 'cost' in config_options:
 84 |             cost = config.get('Configuration', 'cost')
 85 |         else:
 86 |             cost = 9999
 87 |         if 'speed' in config_options:
 88 |             speed = config.get('Configuration', 'speed')
 89 |         else:
 90 |             speed = 9999
 91 | 
 92 |         if 'type' in config_options:
 93 |             plugin_type = config.get('Configuration', 'type')
 94 |         else:
 95 |             logging.error("'Type' not specified in config file.")
 96 |             return [None, False, NAME, "Takes an IP string and returns the DNS resolved IP address as networkx graph.", None, cost, speed]
 97 | 
 98 |         if 'inputs' in config_options:
 99 |             inputs = config.get('Configuration', 'Inputs')
100 |             inputs = [l.strip().lower() for l in inputs.split(",")]
101 |         else:
102 |             logging.error("No input types specified in config file.")
103 |             return [plugin_type, False, NAME, "Takes an IP string and returns the DNS resolved IP address as networkx graph.", None, cost, speed]
104 | 
105 |         return [plugin_type, True, NAME, "Takes an IP string and returns the DNS resolved IP address as networkx graph.", inputs, cost, speed]
106 | 
107 | 
108 |     def run(self, domain, start_time=""):
109 |         """ str, str -> networkx multiDiGraph
110 | 
111 |         :param domain: a string containing a domain to lookup up
112 |         :param start_time: string in ISO 8601 combined date and time format (e.g. 2014-11-01T10:34Z) or datetime object.
113 |         :return: a networkx graph representing the response.
114 |         """
115 | 
116 |         # Parse the start_time
117 |         if type(start_time) is str:
118 |             try:
119 |                 time = dateutil.parser.parse(start_time).strftime("%Y-%m-%dT%H:%M:%SZ")
120 |             except:
121 |                 time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
122 |         elif type(start_time) is datetime:
123 |             time = start_time.strftime("%Y-%m-%dT%H:%M:%SZ")
124 |         else:
125 |             time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
126 | 
127 |         g = nx.MultiDiGraph()
128 | 
129 |         # Get or create Domain node
130 |         domain_uri = "class=attribute&key={0}&value={1}".format("domain", domain)
131 |         g.add_node(domain_uri, {
132 |             'class': 'attribute',
133 |             'key': "domain",
134 |             "value": domain,
135 |             "start_time": time,
136 |             "uri": domain_uri
137 |         })
138 | 
139 |         # Try the DNS lookup and just return the domain if the lookup fails
140 |         try:
141 |             ip = socket.gethostbyname(domain)
142 |         except socket.gaierror:
143 |             return g
144 | 
145 |         # Get or create Enrichment node
146 |         dns_uri = "class=attribute&key={0}&value={1}".format("enrichment", "dns")
147 |         g.add_node(dns_uri, {
148 |             'class': 'attribute',
149 |             'key': "enrichment",
150 |             "value": "dns",
151 |             "start_time": time,
152 |             "uri": dns_uri
153 |         })
154 | 
155 |         ip_uri = "class=attribute&key={0}&value={1}".format("ip", ip)
156 |         g.add_node(ip_uri, {
157 |             'class': 'attribute',
158 |             'key': "ip",
159 |             "value": ip,
160 |             "start_time": time,
161 |             "uri": ip_uri
162 |         })
163 | 
164 |         # Create edge from domain to ip node
165 |         edge_attr = {
166 |             "relationship": "describedBy",
167 |             "start_time": time,
168 |             "origin": "dns"
169 |         }
170 |         source_hash = uuid.uuid3(uuid.NAMESPACE_URL, domain_uri)
171 |         dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, ip_uri)
172 |         edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash))
173 |         rel_chain = "relationship"
174 |         while rel_chain in edge_attr:
175 |             edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain])
176 |             rel_chain = edge_attr[rel_chain]
177 |         if "origin" in edge_attr:
178 |             edge_uri += "&{0}={1}".format("origin", edge_attr["origin"])
179 |         edge_attr["uri"] = edge_uri
180 |         g.add_edge(domain_uri, ip_uri, edge_uri, {"start_time": time})
181 | 
182 |         # Link domain to enrichment
183 |         edge_attr = {
184 |             "relationship": "describedBy",
185 |             "start_time": time,
186 |             "origin": "dns"
187 |         }
188 |         source_hash = uuid.uuid3(uuid.NAMESPACE_URL, domain_uri)
189 |         dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, dns_uri)
190 |         edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash))
191 |         rel_chain = "relationship"
192 |         while rel_chain in edge_attr:
193 |             edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain])
194 |             rel_chain = edge_attr[rel_chain]
195 |         if "origin" in edge_attr:
196 |             edge_uri += "&{0}={1}".format("origin", edge_attr["origin"])
197 |         edge_attr["uri"] = edge_uri
198 |         g.add_edge(domain_uri, dns_uri, edge_uri, edge_attr)
199 | 
200 | 
201 |         if resolver_import:
202 |             # Get nameservers.  (note, this can get cached ones, but the more complex answer at http://stackoverflow.com/questions/4066614/how-can-i-find-the-authoritative-dns-server-for-a-domain-using-dnspython didn't work.)
203 |             # If resolution fails, simply return the graph as is
204 |             try:
205 |                 answers = dns.resolver.query(domain, 'NS')
206 |             except dns.resolver.NoAnswer:
207 |                 return g
208 | 
209 |             for ns in answers:
210 |                 ns = ns.to_text().rstrip(".")
211 | 
212 |                 # Create the nameserver node
213 |                 ns_uri = "class=attribute&key={0}&value={1}".format("domain", ns)
214 |                 g.add_node(ns_uri, {
215 |                     'class': 'attribute',
216 |                     'key': "domain",
217 |                     "value": ns,
218 |                     "start_time": time,
219 |                     "uri": ns_uri
220 |                 })
221 | 
222 |                 # Link it to the domain
223 |                 edge_attr = {
224 |                     "relationship": "describedBy",
225 |                     "start_time": time,
226 |                     "origin": "dns",
227 |                     "describedBy": "nameserver" 
228 |                 }
229 |                 source_hash = uuid.uuid3(uuid.NAMESPACE_URL, domain_uri)
230 |                 dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, ns_uri)
231 |                 edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash))
232 |                 rel_chain = "relationship"
233 |                 while rel_chain in edge_attr:
234 |                     edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain])
235 |                     rel_chain = edge_attr[rel_chain]
236 |                 if "origin" in edge_attr:
237 |                     edge_uri += "&{0}={1}".format("origin", edge_attr["origin"])
238 |                 edge_attr["uri"] = edge_uri
239 |                 g.add_edge(domain_uri, ns_uri, edge_uri, edge_attr)
240 | 
241 |         return g
242 | 


--------------------------------------------------------------------------------
/plugins/maxmind.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 |  AUTHOR: Gabriel Bassett
  4 |  DATE: 12-17-2013
  5 |  DEPENDENCIES: a list of modules requiring installation
  6 |  Copyright 2014 Gabriel Bassett
  7 | 
  8 |  LICENSE:
  9 | Licensed to the Apache Software Foundation (ASF) under one
 10 | or more contributor license agreements.  See the NOTICE file
 11 | distributed with this work for additional information
 12 | regarding copyright ownership.  The ASF licenses this file
 13 | to you under the Apache License, Version 2.0 (the
 14 | "License"); you may not use this file except in compliance
 15 | with the License.  You may obtain a copy of the License at
 16 | 
 17 |   http://www.apache.org/licenses/LICENSE-2.0
 18 | 
 19 | Unless required by applicable law or agreed to in writing,
 20 | software distributed under the License is distributed on an
 21 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 22 | KIND, either express or implied.  See the License for the
 23 | specific language governing permissions and limitations
 24 | under the License.
 25 | 
 26 |  DESCRIPTION:
 27 |  Functions necessary to enrich the context graph
 28 | 
 29 | """
 30 | # PRE-USER SETUP
 31 | pass
 32 | 
 33 | ########### NOT USER EDITABLE ABOVE THIS POINT #################
 34 | 
 35 | 
 36 | # USER VARIABLES
 37 | MAXMIND_FILE = "./GeoIPASNum.dat"
 38 | MAXMIND_CONFIG_FILE = "maxmind.yapsy-plugin"
 39 | NAME = "Maxmind ASN Enrichment"
 40 | 
 41 | ########### NOT USER EDITABLE BELOW THIS POINT #################
 42 | 
 43 | 
 44 | ## IMPORTS
 45 | from yapsy.IPlugin import IPlugin
 46 | import logging
 47 | from datetime import datetime # timedelta imported above
 48 | import dateutil  # to parse variable time strings
 49 | import uuid
 50 | import ConfigParser
 51 | import os
 52 | import inspect
 53 | try:
 54 |     import networkx as nx
 55 |     import GeoIP
 56 |     import ipaddress
 57 |     module_import_success = True
 58 | except:
 59 |     module_import_success = False
 60 |     logging.error("Module import failed.  Please install the following modules: networkx, GeoIP, ipaddress.")
 61 |     raise
 62 | 
 63 | ## SETUP
 64 | __author__ = "Gabriel Bassett"
 65 | loc = inspect.getfile(inspect.currentframe())
 66 | ind = loc.rfind("/")
 67 | loc = loc[:ind+1]
 68 | config = ConfigParser.SafeConfigParser()
 69 | config.readfp(open(loc + MAXMIND_CONFIG_FILE))
 70 | 
 71 | if config.has_section('Core'):
 72 |     if 'name' in config.options('Core'):
 73 |         NAME = config.get('Core', 'name')
 74 | 
 75 | ## EXECUTION
 76 | class PluginOne(IPlugin):
 77 |     gi = None
 78 |     dat_file_success = False
 79 | 
 80 |     def __init__(self, conf=config, dat_file=MAXMIND_FILE):
 81 |         try:
 82 |             maxmind_file = config.get('Configuration', 'dat_file')
 83 |             if maxmind_file[0] != "/":
 84 |                 maxmind_file = loc + maxmind_file
 85 |             #print maxmind_file  # DEBUG
 86 |             self.gi = GeoIP.open(maxmind_file, GeoIP.GEOIP_STANDARD)
 87 |             self.dat_file_success = True
 88 |         except:
 89 |             pass
 90 |         if not self.dat_file_success:
 91 |             try:
 92 |                 if dat_file[0] != "/":
 93 |                     dat_file = loc + dat_file
 94 |                 #print dat_file  # DEBUG
 95 |                 self.gi = GeoIP.open(dat_file, GeoIP.GEOIP_STANDARD)
 96 |                 self.dat_file_success = True
 97 |             except:
 98 |                 pass
 99 | 
100 |     def configure(self):
101 |         """
102 | 
103 |         :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)]
104 |         """
105 |         config_options = config.options("Configuration")
106 | 
107 |         if 'cost' in config_options:
108 |             cost = config.get('Configuration', 'cost')
109 |         else:
110 |             cost = 9999
111 |         if 'speed' in config_options:
112 |             speed = config.get('Configuration', 'speed')
113 |         else:
114 |             speed = 9999
115 | 
116 |         if 'type' in config_options:
117 |             plugin_type = config.get('Configuration', 'type')
118 |         else:
119 |             logging.error("'Type' not specified in config file.")
120 |             return [None, False, NAME, "Takes an IP and returns the ASN of the IP.", None, cost, speed]
121 | 
122 |         if 'inputs' in config_options:
123 |             inputs = config.get('Configuration', 'Inputs')
124 |             inputs = [l.strip().lower() for l in inputs.split(",")]
125 |         else:
126 |             logging.error("No input types specified in config file.")
127 |             return [plugin_type, False, NAME, "Takes an IP and returns the ASN of the IP.", None, cost, speed]
128 | 
129 |         if not self.dat_file_success:
130 |             return [plugin_type, False, NAME, "Takes an IP and returns the ASN of the IP.", inputs, cost, speed]
131 |         elif not module_import_success:
132 |             logging.error("Module import failure caused configuration failure.")
133 |             return [plugin_type, False, NAME, "Takes an IP and returns the ASN of the IP.", inputs, cost, speed]
134 |         else:
135 |             return [plugin_type, True, NAME, "Takes an IP and returns the ASN of the IP.", inputs, cost, speed]
136 | 
137 | 
138 |     def run(self, ip, start_time=""):
139 |         """ str, str -> networkx multiDiGraph
140 | 
141 |         :param ip: IP address to enrich in graph
142 |         :param start_time: string in ISO 8601 combined date and time format (e.g. 2014-11-01T10:34Z) or datetime object.
143 |         :return: enrichment graph
144 |         """
145 | 
146 |         # Parse the start_time
147 |         if type(start_time) is str:
148 |             try:
149 |                 time = dateutil.parser.parse(start_time).strftime("%Y-%m-%dT%H:%M:%SZ")
150 |             except:
151 |                 time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
152 |         elif type(start_time) is datetime:
153 |             time = start_time.strftime("%Y-%m-%dT%H:%M:%SZ")
154 |         else:
155 |             time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
156 | 
157 |         # Validate IP
158 |         _ = ipaddress.ip_address(unicode(ip))
159 | 
160 |         # open maxmind ASN data
161 |         gi = self.gi
162 | 
163 |         g = nx.MultiDiGraph()
164 |         # Create the maxmind ASN node
165 |         maxmind_asn_uri = "class=attribute&key={0}&value={1}".format("enrichment", "maxmind_asn")  # Move prefix assignment to merge_titan
166 |         g.add_node(maxmind_asn_uri, {
167 |             'class': 'attribute',
168 |             'key': "enrichment",
169 |             "value": "maxmind_asn",
170 |             "start_time": time,
171 |             "uri": maxmind_asn_uri
172 |         })
173 | 
174 |         # set IP URI
175 |         ip_uri = "class=attribute&key={0}&value={1}".format("ip", ip)
176 |         g.add_node(ip_uri, {
177 |             'class': 'attribute',
178 |             'key': "ip",
179 |             "value": ip,
180 |             "start_time": time,
181 |             "uri": ip_uri
182 |         })
183 | 
184 |         # retrieve maxmind enrichment
185 |         ASN = gi.name_by_addr(ip)
186 | 
187 |         #print ASN  # DEBUG
188 |         #print type(gi)  # DEBUG
189 |         #print ip  # DEBUG
190 | 
191 |         if ASN:
192 |             ASN = ASN.split(" ", 1)
193 | 
194 |             # create ASN node
195 |             asn_uri = "class=attribute&key={0}&value={1}".format("asn", ASN[0][2:])
196 |             attributes = {
197 |                 'class': 'attribute',
198 |                 'key': 'asn',
199 |                 'value': ASN[0][2:],
200 |                 "uri": asn_uri,
201 |                 "start_time": time
202 |             }
203 |             if len(ASN) > 1:
204 |                 attributes['owner'] = ASN[1]
205 |             g.add_node(asn_uri, attributes)
206 | 
207 |             # link ip to ASN node
208 |             edge_attr = {
209 |                 "relationship": "describedBy",
210 |                 "origin": "maxmind_enrichment",
211 |                 "start_time": time,
212 |             }
213 |             source_hash = uuid.uuid3(uuid.NAMESPACE_URL, ip_uri)
214 |             dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, asn_uri)
215 |             edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash))
216 |             rel_chain = "relationship"
217 |             while rel_chain in edge_attr:
218 |                 edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain])
219 |                 rel_chain = edge_attr[rel_chain]
220 |             if "origin" in edge_attr:
221 |                 edge_uri += "&{0}={1}".format("origin", edge_attr["origin"])
222 |             edge_attr["uri"] = edge_uri
223 |             g.add_edge(ip_uri, asn_uri, edge_uri, edge_attr)
224 | 
225 | 
226 |             # link ip to maxmind enrichment
227 |             edge_attr = {
228 |                 "relationship": "describedBy",
229 |                 "origin": "maxmind_enrichment",
230 |                 "start_time": time,
231 |             }
232 |             source_hash = uuid.uuid3(uuid.NAMESPACE_URL, ip_uri)
233 |             dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, maxmind_asn_uri)
234 |             edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash))
235 |             rel_chain = "relationship"
236 |             while rel_chain in edge_attr:
237 |                 edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain])
238 |                 rel_chain = edge_attr[rel_chain]
239 |             if "origin" in edge_attr:
240 |                 edge_uri += "&{0}={1}".format("origin", edge_attr["origin"])
241 |             edge_attr["uri"] = edge_uri
242 |             g.add_edge(ip_uri, maxmind_asn_uri, edge_uri, edge_attr)
243 | 
244 | 
245 |         else:
246 |             logging.debug("Maxmind miss on {0}".format(ip))
247 | 
248 |         # Reuturn the data enriched graph
249 |         return g
250 | 


--------------------------------------------------------------------------------
/plugins/path_count.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | __author__ = "Gabriel Bassett"
  4 | """
  5 |  AUTHOR: {0}
  6 |  DATE: <DATE>
  7 |  DEPENDENCIES: <a list of modules requiring installation>
  8 |  Copyright <YEAR> {0}
  9 | 
 10 |  LICENSE:
 11 | Licensed to the Apache Software Foundation (ASF) under one
 12 | or more contributor license agreements.  See the NOTICE file
 13 | distributed with this work for additional information
 14 | regarding copyright ownership.  The ASF licenses this file
 15 | to you under the Apache License, Version 2.0 (the
 16 | "License"); you may not use this file except in compliance
 17 | with the License.  You may obtain a copy of the License at
 18 | 
 19 |   http://www.apache.org/licenses/LICENSE-2.0
 20 | 
 21 | Unless required by applicable law or agreed to in writing,
 22 | software distributed under the License is distributed on an
 23 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 24 | KIND, either express or implied.  See the License for the
 25 | specific language governing permissions and limitations
 26 | under the License.
 27 | 
 28 |  DESCRIPTION:
 29 |  <ENTER DESCRIPTION>
 30 | 
 31 | """.format(__author__)
 32 | # PRE-USER SETUP
 33 | pass
 34 | 
 35 | ########### NOT USER EDITABLE ABOVE THIS POINT #################
 36 | 
 37 | 
 38 | # USER VARIABLES
 39 | PLUGIN_CONFIG_FILE = "path_count.yapsy-plugin"
 40 | NAME = "PathCount"
 41 | 
 42 | 
 43 | ########### NOT USER EDITABLE BELOW THIS POINT #################
 44 | 
 45 | 
 46 | ## IMPORTS
 47 | from yapsy.IPlugin import IPlugin
 48 | import logging
 49 | import networkx as nx
 50 | from datetime import datetime # timedelta imported above
 51 | import uuid
 52 | import ConfigParser
 53 | import inspect
 54 | import numpy as np
 55 | 
 56 | 
 57 | ## SETUP
 58 | loc = inspect.getfile(inspect.currentframe())
 59 | ind = loc.rfind("/")
 60 | loc = loc[:ind+1]
 61 | config = ConfigParser.SafeConfigParser()
 62 | config.readfp(open(loc + PLUGIN_CONFIG_FILE))
 63 | 
 64 | if config.has_section('Core'):
 65 |     if 'name' in config.options('Core'):
 66 |         NAME = config.get('Core', 'name')
 67 | if config.has_section('Log'):
 68 |     if 'level' in config.options('Log'):
 69 |         LOGLEVEL = config.get('Log', 'level')
 70 |     if 'file' in config.options('Log'):
 71 |         LOGFILE = config.get('Log', 'file')
 72 | 
 73 | 
 74 | ## EXECUTION
 75 | class PluginOne(IPlugin):
 76 |     #  TODO: The init should contain anything to load modules or data files that should be variables of the  plugin object
 77 |     def __init__(self):
 78 |         pass
 79 | 
 80 |     #  TODO: Configuration needs to set the values needed to identify the plugin in the plugin database as well as ensure everyhing loaded correctly
 81 |     #  TODO: Current  layout is for an enrichment plugin
 82 |     #  TODO: enrichment [type, successful_load, name, description, inputs to enrichment such as 'ip', cost, speed]
 83 |     #  TODO: interface [type, successful_load, name]
 84 |     #  TODO: query [TBD]
 85 |     #  TODO: minion [TBD]
 86 |     def configure(self):
 87 |         """
 88 | 
 89 |         :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)]
 90 |         """
 91 |         config_options = config.options("Configuration")
 92 | 
 93 |         if 'cost' in config_options:
 94 |             cost = config.get('Configuration', 'cost')
 95 |         else:
 96 |             cost = 9999
 97 |         if 'speed' in config_options:
 98 |             speed = config.get('Configuration', 'speed')
 99 |         else:
100 |             speed = 9999
101 | 
102 |         if config.has_section('Documentation') and 'description' in config.options('Documentation'):
103 |             description = config.get('Documentation', 'description')
104 |         else:
105 |             logging.error("'Description not in config file.")
106 |             return [None, False, NAME, None, cost, speed]
107 | 
108 |         if 'type' in config_options:
109 |             plugin_type = config.get('Configuration', 'type')
110 |         else:
111 |             logging.error("'Type' not specified in config file.")
112 |             return [None, False, NAME, description, cost, speed]
113 | 
114 |         return [plugin_type, True, NAME, description, cost, speed]
115 | 
116 | 
117 |     def score(self, sg, topic, max_depth=8):  # get_path_count_probability
118 |         """
119 | 
120 |         :param sg: egocentric subgraph around topic in networkx format
121 |         :param topic: graph of topics
122 |         :param max_depth: maximum length of paths
123 |         :return: Dictionary of probabilities keyed by node
124 |         """
125 |         #  THIS IS I CRITICAL PER the 1-1-1-1-t-3-9-1 graph
126 |         #  THIS WILL NOT TOLERATE LOOPS WITHOUT ADDITIONAL EFFORT
127 |         targets = set(sg.nodes()).difference(set(topic.nodes()))
128 |         paths = {}
129 |         probabilities = {}
130 | 
131 |         # Create a meta node to represent the topic nodes
132 |         # Based on https://gist.github.com/Zulko/7629206
133 |         meta_node_uuid = str(uuid.uuid4())
134 | 
135 |         sg.add_node(meta_node_uuid)  # Add the 'merged' node
136 | 
137 |         for n1, n2, data in sg.edges(data=True):
138 |             # For all edges related to one of the nodes to merge,
139 |             # make an edge going to or coming from the `new gene`.
140 |             if n1 in topic.nodes():
141 |                 sg.add_edge(meta_node_uuid, n2, data=data)
142 |             elif n2 in topic.nodes():
143 |                 sg.add_edge(n1, meta_node_uuid, data=data)
144 | 
145 |         # retrieve all paths to all nodes
146 |         for target in targets:
147 |             paths[target] = nx.all_simple_paths(sg, meta_node_uuid, target, cutoff=max_depth)
148 | 
149 |         # Combine the multiple paths from multiple topics to a single score per node
150 |         for target in targets:
151 |             probabilities[target] = 0
152 |             for path in paths[target]:
153 |                 # develop a weight based on the length of the path
154 |                 # INSERT WEIGHTING FUNCTION BELOW
155 |                 path_weight = self.normal_weight(len(path))
156 |                 # Calculate the confidence in the path
157 |                 confidence = 1
158 |                 for node in path:
159 |                     if 'confidence' in sg.node[node]:
160 |                         confidence *= sg.node[node]['confidence']
161 |                 # Sum the path score.  The path's score is it's confidence multiplied by it's weight
162 |                 probabilities[target] += confidence * path_weight
163 | 
164 |         # Make the topic nodes the highest probabilities just to put them on top
165 |         max_p = max(probabilities.values())
166 |         for node in topic.nodes():
167 |             probabilities[node] = max_p
168 | 
169 |         # TODO: Could normalize values to 1....
170 | 
171 |         # remove the meta node
172 |         sg.remove_node(meta_node_uuid)
173 | 
174 |         # return probabilities
175 |         return probabilities
176 | 
177 | 
178 |     def multigraph_to_digraph(self, g):
179 |         """
180 | 
181 |         :param g: takes a networkx mulitgraph
182 |         :return: returns a networkx digraph with edge weights representing the number of edges
183 | 
184 |         NOTE: This butchers duplicate edge properties.  If converting to score, use original edges in output.
185 |         """
186 |         G = nx.DiGraph()
187 |         edge_attributes = {}
188 | 
189 |         # if g isn't really a multigraph, just return it
190 |         if not g.is_multigraph():
191 |             return g
192 | 
193 |         # collapse down to a diagraph
194 |         G.add_nodes_from(g.nodes(data=True))
195 |         G.add_edges_from(g.edges(data=True))
196 | 
197 |         # for each edge, weight the confidence by the number of edges
198 |         '''
199 |         # captures a multiple of the confidence on the edge in the output graph
200 |         for edge in G.edges():
201 |             count = g.edges().count(edge)
202 |             if "count" > 1:
203 |                 if "confidence" in G.edge[edge[0]][edge[1]]:
204 |                     G.edge[edge[0]][edge[1]]['confidence'] *= count
205 |                 else:
206 |                     G.edge[edge[0]][edge[1]]["confidence"] = count
207 |         '''
208 |         # Captures every confidence
209 |         for edge in G.edges():
210 |             confidence = 0
211 |             for src_edge in g.edge[edge[0]][edge[1]].values():
212 |                 confidence += src_edge.get('confidence', 1)
213 |             G.edge[edge[0]][edge[1]]['confidence'] = confidence
214 |     #    # collapse down to a diagraph
215 |     #    G.add_nodes_from(g.nodes(data=True))
216 |     #    G.add_edges_from(g.edges(data=True))
217 | 
218 |         return G
219 | 
220 | 
221 |     ### DISTANCE WEIGHTS ###
222 |     def linear_weight(self, distance, ddp=.2):
223 |         """
224 | 
225 |         :param distance: distance from topic
226 |         :param ddp: percentage to degrade
227 |         :return: Linear weighting factor as float
228 |         """
229 |         return 1 - (distance * ddp)
230 | 
231 | 
232 |     def log_weight(self, distance, a=1, b=1, n=3, pwr=1):
233 |         """
234 | 
235 |         :param distance: distance: distance from topic
236 |         :param a: constant to shape graph. Adjusts hight at 0 = a / (1 + b)
237 |         :param b: constant to shape graph.
238 |         :param n: constant to shape graph.
239 |         :param pwr: constant to shape graph.
240 |         :return: log weighting factor as float
241 |         """
242 |         return a / (1 + b*np.exp((distance-n) * pwr))
243 | 
244 | 
245 |     def exponential_weight(self, distance, b=2):
246 |         return np.exp(-distance/b)
247 | 
248 | 
249 |     def normal_weight(self, distance, pwr=2, a=1.1, b=10, c=1):
250 |         """
251 | 
252 |         :param distance: distance from topic
253 |         :param pwr: constant to shape graph.  Higher = steeper decline
254 |         :param b: constant to shape graph.  lower = greater spread
255 |         :return: normal weighting factor as float
256 |         pwr = 2.5, a = 1, c = 0, b = 30
257 |         """
258 |         return a * np.exp(-(distance + c)**pwr/b)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Verum
  2 | =====
  3 | 
  4 | Implementation of Context-Graph algorithms for graph enrichment and querying. 
  5 | 
  6 | Context Graph Enrichment:
  7 | cg_enrich.py provides functions to enrich the context graph.
  8 | 
  9 | Context Graph Query:
 10 | cg_query.py provides functions necessary to query the context graph for a specific topic.
 11 | 
 12 | Context Graph Presentation:
 13 | cg_present.py provides functions necessary to present the data to various clients.
 14 | 
 15 | 
 16 | ## Installation
 17 | 
 18 | Clone the Repository
 19 | ```
 20 | git clone https://github.com/vz-risk/Verum.git
 21 | ```
 22 | 
 23 | 
 24 | ## Usage
 25 | Initialize storage.  In this case, [neo4j] (http://neo4j.com/).
 26 | 1.  [Download neo4j] (http://neo4j.com/download/).
 27 | 2.  Unzip it, (if *nix or Mac OS X).
 28 | 3.  Run it, ('./bin/neo4j start' on *nix or Mac OS X).
 29 | 
 30 | (If using [TitanDB] (http://thinkaurelius.github.io/titan/), follow the [installation documentation] (http://s3.thinkaurelius.com/docs/titan/0.9.0-M2/getting-started.html#_downloading_titan_and_running_the_gremlin_shell) provided for Titan.)
 31 | 
 32 | ### Enrichment
 33 | 
 34 | Run the following within your python code or at a python console to initialize the package.
 35 | ```
 36 | # import imp to load verum
 37 | import imp
 38 | # set verum location
 39 | LOCATION = "~/Documents/Development/verum/"
 40 | # import verum
 41 | fp, pathname, description = imp.find_module("verum", [LOCATION])
 42 | Verum = imp.load_module("verum", fp, pathname, description)
 43 | # Load plugins. NOTE: if your directory is wrong, you won't receive an error but will see no individual plugins listed as successfully configured.
 44 | verum = Verum.app("~/Documents/Development/verum/plugins", "~/Documents/Development/verum/minions")
 45 | # display loaded plugins directly using yapsy
 46 | for plugin in verum.plugins.getAllPlugins():
 47 |     print plugin.name
 48 | ```
 49 | 
 50 | Define some data to enrich:
 51 | ```
 52 | ips = ['98.124.199.1',
 53 | '178.62.219.229',
 54 |  '98.124.198.1',
 55 |  '209.216.10.148',
 56 |  '124.248.237.26',
 57 |  '134.170.185.211',
 58 |  '223.29.248.252',
 59 |  '117.18.73.98']
 60 | domains = ['81.java-se.com',
 61 |  'stifie.com',
 62 |  'microsoftor.com',
 63 |  'pop1.java-sec.com',
 64 |  '*.mynethood.com',
 65 |  'www.btipnow.com',
 66 |  '*.searchenginewatch.us.com',
 67 |  'google3853ed273b89687a.mynethood.com',
 68 |  'pop.java-sec.com',
 69 |  'm-stone.co.jp',
 70 |  'www.mynethood.com',
 71 |  'jre76.java-sec.com',
 72 |  'cdn.foxitsoftwares.com',
 73 |  'u.java-se.com',
 74 |  'bloger2.microsoftor.com',
 75 |  'kai.jztok.com',
 76 |  'ns1.searchenginewatch.us.com',
 77 |  '*.microsoftor.com',
 78 |  's3m7ke.microsoftor.com',
 79 |  'mynethood.com',
 80 |  's3m7ker.microsoftor.com',
 81 |  'officesoft.microsoftor.com',
 82 |  'foxitsoftwares.com']
 83 | ips2 = ['107.160.143.10',
 84 |  '107.167.73.219',
 85 |  '148.163.104.35',
 86 |  '148.163.104.35',
 87 |  '184.164.70.204',
 88 |  '184.164.81.11',
 89 |  '216.244.93.247',
 90 |  '50.117.38.170',
 91 |  '50.117.38.170']
 92 | domains2 = ['4uexs.rxlijd.bbs.mythem.es',
 93 |  'abdebassetbenhassen.org',
 94 |  'acid.borec.cz',
 95 |  'blogs.burlingtonfreepress.com',
 96 |  'buysacramentoproperties.com',
 97 |  'cancunluxurystyle.com',
 98 |  'cate-rina.net',
 99 |  'cdn.servehttp.com',
100 |  'chuamun.com',
101 |  'dayapramana.com',
102 |  'dirtychook.com',
103 |  'f1wot.bbs.mythem.es',
104 |  'fybic.com',
105 |  'gotoe3.tw',
106 |  'haft-honar.com',
107 |  'ichener-duwackstumbe.de',
108 | 'iotqduzgha.vtre.qvofj.qypvthu.loqu.forum.mythem.es',
109 |  'jigsore.nasky.net',
110 |  'kitsoft.ru',
111 |  'lytovp.istmein.de',
112 |  'meeting-rsvp.com',
113 |  'mignonfilet.com',
114 |  'myinfo.any-request-allowed.com',
115 |  'oceanspirit.com',
116 |  'opm-learning.org',
117 |  'opmsecurity.org',
118 |  'pejoratively.bloq.ro',
119 |  'subhashmadhu.com',
120 |  'tlvegan.com',
121 |  'tommyhumphreys.com',
122 |  'transcandence.com',
123 |  'travelingmu.com',
124 |  'tsv-albertshofen.net',
125 |  'universofoot.com.br',
126 |  'WDC-News-post.com',
127 |  'wdc-news-post.com',
128 |  'woodcreations.com.pk',
129 |  'xn--80aa4agmizb8a.xn--p1ai',
130 | 'yodotink.rjtp.nxrlecd.tcsq.qypvthu.loqu.forum.mythem.es']
131 |  ```
132 | 
133 | Run the following to test enrichment.
134 | ```
135 | # Query IP & domain plugins
136 | print verum.get_enrichments(['ip'])
137 | print verum.get_enrichments(['domain'])
138 | # Query cheap IP plugins
139 | print verum.get_enrichments(['ip'], cost=3)
140 | # Query fast domain plugins
141 | print verum.get_enrichments(['domain'], speed=2)
142 | # Run maxmind enrichments of an IP
143 | import networkx as nx
144 | g = verum.run_enrichments(ips[0], 'ip', names=[u'Maxmind ASN Enrichment'])
145 | print nx.info(g)
146 | ```
147 | 
148 | Run the following to test querying.  (Note: the storage interface modules expect graphs to be in a specific schema.  If they are not, the interface module will error trying to store them.)
149 | ```
150 | # (If you didn't create a graph above through an enrichment)
151 | g = Verum.create_topic({'ip': ['184.164.70.204', '184.164.81.11'], 'domain': ['WDC-News-post.com', 'wdc-news-post.com']})
152 | ```
153 | 
154 | ```
155 | # See what storage interfaces are configured
156 | print verum.get_interfaces(configured=True)
157 | # Set the storage interface
158 | verum.set_interface('Neo4j')
159 | # Store the graph in the storage interface
160 | verum.store_graph(g)
161 | ```
162 | 
163 | Finally, Attempt to enrich multiple pieces of data to form a robust context graph:
164 | ```
165 | # Enrich IPs
166 | for ip in ips + ips2:
167 |     verum.store_graph(verum.run_enrichments(ip, 'ip', names=[u'Maxmind ASN Enrichment']))
168 | # Enrich Domains (passing exceptions so if a plugin fails it doesn't stop the loop)
169 | for domain in domains + domains2:
170 |     try:
171 |         verum.store_graph(verum.run_enrichments(domain, 'domain', names=[u'DNS Enrichment', u'TLD Enrichment']))
172 |     except:
173 |         pass
174 | # Bulk enrich IPs with Cymru
175 | verum.store_graph(verum.run_enrichments(ips + ips2, 'ip', names=[u'Cymru Enrichment']))
176 | ```
177 | 
178 | Now open `http://locahost:7474/` in a browser and enter the Cypher Query:
179 | ```
180 | MATCH (n:attribute {key:'ip', value:"98.124.198.1"}) 
181 | RETURN n;
182 | ```
183 | You can then visually explore the graph associated with that IP.
184 | 
185 | We want to classify all these domains and IPs as malicious:
186 | ```
187 | # Classify all IPs and Domains as Malicious
188 | for ip in ips + ips2:
189 |     verum.store_graph(verum.classify.run({'key': 'ip', 'value': ip, 'classification': 'malice'}))
190 | for domain in domains + domains2:
191 |     verum.store_graph(verum.classify.run({'key': 'domain', 'value': domain, 'classification': 'malice'}))
192 | ```
193 | 
194 | ### Querying
195 | 
196 | Now that we have built an enriched context graph, we can query it.
197 | 
198 | ```
199 | #Find out if < '117.18.73.98',> is malicious
200 | # Create a topic to score
201 | topic = Verum.create_topic({"ip": '117.18.73.98'})
202 | # Retrieve the subgraph associated with it
203 | sg = verum.run_query(topic)
204 | # List out configured scoring plugins available.
205 | verum.get_scoring_plugins()
206 | # Set the default scoring plugin
207 | verum.set_scoring_plugin('PageRank2')
208 | # Check to ensure it was set
209 | verum.get_default_scoring_plugin()
210 | scores = verum.score_subgraph(topic, sg)
211 | print scores
212 | ```
213 | 
214 | ### Scoring
215 | To understand the scores, we can do some relative comparisons.  We compare the malice score both to the topic as well as to other nodes and see that the malice node is stronger than average but not overly strong.
216 | ```
217 | # Compare the malice node to the average score
218 | Verum.compare_classifications(scores, {"class":"attribute", "key":"classification", "value":"malice"}, output="print")
219 | # Compare the malice node to the topic node
220 | Verum.compare_classifications(scores, {"class":"attribute", "key":"classification", "value":"malice"}, {"class":"attribute", "key":"ip", "value":"117.18.73.98"}, output="print")
221 | # Score the percentile of the malice score
222 | Verum.score_percentile(scores, {"class":"attribute", "key":"classification", "value":"malice"}, output="print")
223 | ```
224 | 
225 | Note, if you wanted to know about malice, you could rescore the subgraph with the malice node as the topic and compare the node you are interested in, (117.18.73.98 in our example), and compare it to the other nodes as above.
226 | 
227 | 
228 | ### Minions
229 | Minions are threaded algorithms that operate on the context graph in the background.  They have access to the app object so hare more ability to work directly with the context graph than other apps.
230 | 
231 | First, run the following cypher query to find out how many nodes are in your context graph.
232 | ```
233 | start n=node(*)
234 | match n
235 | return count(n)
236 | ```
237 | 
238 | Start a simple minion which imports and enriches a threat intelligence feed.
239 | ```
240 | # List configured minions
241 | verum.get_minions()
242 | # Start a minion
243 | verum.start_minions([u'OSINT Bambenek Consulting V2'])
244 | # Check if it's started
245 | verum.get_running_minions()
246 | ```
247 | 
248 | Check the number of nodes.  It should be increasing.
249 | ```
250 | start n=node(*)
251 | match n-[r]-()
252 | return count(distinct n), count(distinct r)
253 | ```
254 | 
255 | 
256 | ## Contributing
257 | 1. Fork it!
258 | 2. Create your feature branch: `git checkout -b my-new-feature`
259 | 3. Commit your changes: `git commit -am 'Add some feature'`
260 | 4. Push to the branch: `git push origin my-new-feature`
261 | 5. Submit a pull request :D
262 | 
263 | 
264 | ## License
265 | 
266 | Licensed to the Apache Software Foundation (ASF) under one
267 | or more contributor license agreements.  See the NOTICE file
268 | distributed with this work for additional information
269 | regarding copyright ownership.  The ASF licenses this file
270 | to you under the Apache License, Version 2.0 (the
271 | "License"); you may not use this file except in compliance
272 | with the License.  You may obtain a copy of the License at
273 |   http://www.apache.org/licenses/LICENSE-2.0
274 | Unless required by applicable law or agreed to in writing,
275 | software distributed under the License is distributed on an
276 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
277 | KIND, either express or implied.  See the License for the
278 | specific language governing permissions and limitations
279 | under the License.


--------------------------------------------------------------------------------
/plugins/tld.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 |  AUTHOR: Gabriel Bassett
  4 |  DATE: 12-17-2013
  5 |  DEPENDENCIES: a list of modules requiring installation
  6 |  Copyright 2014 Gabriel Bassett
  7 | 
  8 |  LICENSE:
  9 | Licensed to the Apache Software Foundation (ASF) under one
 10 | or more contributor license agreements.  See the NOTICE file
 11 | distributed with this work for additional information
 12 | regarding copyright ownership.  The ASF licenses this file
 13 | to you under the Apache License, Version 2.0 (the
 14 | "License"); you may not use this file except in compliance
 15 | with the License.  You may obtain a copy of the License at
 16 | 
 17 |   http://www.apache.org/licenses/LICENSE-2.0
 18 | 
 19 | Unless required by applicable law or agreed to in writing,
 20 | software distributed under the License is distributed on an
 21 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 22 | KIND, either express or implied.  See the License for the
 23 | specific language governing permissions and limitations
 24 | under the License.
 25 | 
 26 |  DESCRIPTION:
 27 |  Functions necessary to enrich the context graph
 28 | 
 29 | """
 30 | # PRE-USER SETUP
 31 | pass
 32 | 
 33 | ########### NOT USER EDITABLE ABOVE THIS POINT #################
 34 | 
 35 | 
 36 | # USER VARIABLES
 37 | TLD_CONFIG_FILE = "tld.yapsy-plugin"
 38 | NAME = "TLD Enrichment"
 39 | 
 40 | 
 41 | ########### NOT USER EDITABLE BELOW THIS POINT #################
 42 | 
 43 | 
 44 | ## IMPORTS
 45 | from yapsy.IPlugin import IPlugin
 46 | import logging
 47 | import networkx as nx
 48 | from datetime import datetime # timedelta imported above
 49 | import dateutil  # to parse variable time strings
 50 | import uuid
 51 | import ConfigParser
 52 | import inspect
 53 | try:
 54 |     import tldextract
 55 |     module_import_success = True
 56 | except:
 57 |     module_import_success = False
 58 |     logging.error("Module import failed.  Please install the following module: tldextract.")
 59 |     raise
 60 | 
 61 | 
 62 | ## SETUP
 63 | __author__ = "Gabriel Bassett"
 64 | loc = inspect.getfile(inspect.currentframe())
 65 | ind = loc.rfind("/")
 66 | loc = loc[:ind+1]
 67 | config = ConfigParser.SafeConfigParser()
 68 | config.readfp(open(loc + TLD_CONFIG_FILE))
 69 | 
 70 | if config.has_section('Core'):
 71 |     if 'name' in config.options('Core'):
 72 |         NAME = config.get('Core', 'name')
 73 | 
 74 | ## EXECUTION
 75 | class PluginOne(IPlugin):
 76 |     def __init__(self):
 77 |         pass
 78 | 
 79 |     def configure(self):
 80 |         """
 81 | 
 82 |         :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)]
 83 |         """
 84 |         config_options = config.options("Configuration")
 85 | 
 86 |         if 'cost' in config_options:
 87 |             cost = config.get('Configuration', 'cost')
 88 |         else:
 89 |             cost = 9999
 90 |         if 'speed' in config_options:
 91 |             speed = config.get('Configuration', 'speed')
 92 |         else:
 93 |             speed = 9999
 94 | 
 95 |         if 'type' in config_options:
 96 |             plugin_type = config.get('Configuration', 'Type')
 97 |         else:
 98 |             logging.error("'Type' not specified in config file.")
 99 |             return [None, False, NAME, "Takes a domain name and returns the top level domain, mid-domain, and sub-domain as networkx graph.", None, cost, speed]
100 | 
101 |         if 'inputs' in config_options:
102 |             inputs = config.get('Configuration', 'Inputs')
103 |             inputs = [l.strip().lower() for l in inputs.split(",")]
104 |         else:
105 |             logging.error("No input types specified in config file.")
106 |             return [plugin_type, False, NAME, "Takes a domain name and returns the top level domain, mid-domain, and sub-domain as networkx graph.", None, cost, speed]
107 | 
108 |         if not module_import_success:
109 |             logging.error("Module import failure caused configuration failure.")
110 |             return [plugin_type, False, NAME, "Takes a domain name and returns the top level domain, mid-domain, and sub-domain as networkx graph.", inputs, cost, speed]
111 |         else:
112 |             return [plugin_type, True, NAME, "Takes a domain name and returns the top level domain, mid-domain, and sub-domain as networkx graph.", inputs, cost, speed]
113 | 
114 | 
115 |     def run(self, domain, start_time="", include_subdomain=False):
116 |         """ str, str -> networkx multiDiGraph
117 | 
118 |         :param domain: a string containing a domain to look up
119 |         :param start_time: string in ISO 8601 combined date and time format (e.g. 2014-11-01T10:34Z) or datetime object.
120 |         :param include_subdomain: Boolean value.  Default False.  If true, subdomain will be returned in enrichment graph
121 |         :return: a networkx graph representing the sections of the domain
122 |         """
123 |         # Parse the start_time
124 |         if type(start_time) is str:
125 |             try:
126 |                 time = dateutil.parser.parse(start_time).strftime("%Y-%m-%dT%H:%M:%SZ")
127 |             except:
128 |                 time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
129 |         elif type(start_time) is datetime:
130 |             time = start_time.strftime("%Y-%m-%dT%H:%M:%SZ")
131 |         else:
132 |             time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
133 | 
134 |         ext = tldextract.extract(domain)
135 |         
136 |         g = nx.MultiDiGraph()
137 | 
138 |         # Get or create Domain node
139 |         domain_uri = "class=attribute&key={0}&value={1}".format("domain", domain)
140 |         g.add_node(domain_uri, {
141 |             'class': 'attribute',
142 |             'key': "domain",
143 |             "value": domain,
144 |             "start_time": time,
145 |             "uri": domain_uri
146 |         })
147 | 
148 |         # Get or create Enrichment node
149 |         tld_extract_uri = "class=attribute&key={0}&value={1}".format("enrichment", "tld_extract")
150 |         g.add_node(tld_extract_uri, {
151 |             'class': 'attribute',
152 |             'key': "enrichment",
153 |             "value": "tld_extract",
154 |             "start_time": time,
155 |             "uri": tld_extract_uri
156 |         })
157 | 
158 |         # Get or create TLD node
159 |         tld_uri = "class=attribute&key={0}&value={1}".format("domain", ext.suffix)
160 |         g.add_node(tld_uri, {
161 |             'class': 'attribute',
162 |             'key': "domain",
163 |             "value": ext.suffix,
164 |             "start_time": time,
165 |             "uri": tld_uri
166 |         })
167 | 
168 |         # Link domain to tld
169 |         edge_attr = {
170 |             "relationship": "describedBy",
171 |             "start_time": time,
172 |             "origin": "tld_extract",
173 |             "describedBy":"suffix"
174 |         }
175 |         source_hash = uuid.uuid3(uuid.NAMESPACE_URL, domain_uri)
176 |         dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, tld_uri)
177 |         edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash))
178 |         rel_chain = "relationship"
179 |         while rel_chain in edge_attr:
180 |             edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain])
181 |             rel_chain = edge_attr[rel_chain]
182 |         if "origin" in edge_attr:
183 |             edge_uri += "&{0}={1}".format("origin", edge_attr["origin"])
184 |         edge_attr["uri"] = edge_uri
185 |         g.add_edge(domain_uri, tld_uri, edge_uri, edge_attr)
186 | 
187 | 
188 |         # Get or create mid domain node
189 |         mid_domain_uri = "class=attribute&key={0}&value={1}".format("domain", ext.domain)
190 |         g.add_node(mid_domain_uri, {
191 |             'class': 'attribute',
192 |             'key': "domain",
193 |             "value": ext.domain,
194 |             "start_time": time,
195 |             "uri": mid_domain_uri
196 |         })
197 | 
198 |         # Link domain to mid_domain
199 |         edge_attr = {
200 |             "relationship": "describedBy",
201 |             "start_time": time,
202 |             "origin": "tld_extract",
203 |             "describedBy":"domain"
204 |         }
205 |         source_hash = uuid.uuid3(uuid.NAMESPACE_URL, domain_uri)
206 |         dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, mid_domain_uri)
207 |         edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash))
208 |         rel_chain = "relationship"
209 |         while rel_chain in edge_attr:
210 |             edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain])
211 |             rel_chain = edge_attr[rel_chain]
212 |         if "origin" in edge_attr:
213 |             edge_uri += "&{0}={1}".format("origin", edge_attr["origin"])
214 |         edge_attr["uri"] = edge_uri
215 |         g.add_edge(domain_uri, mid_domain_uri, edge_uri, edge_attr)
216 | 
217 | 
218 |         # if including subdomains, create subdomain and node
219 |         if include_subdomain:
220 |             # Get or create mid domain node
221 |             subdomain_uri = "class=attribute&key={0}&value={1}".format("domain", ext.subdomain)
222 |             g.add_node(subdomain_uri, {
223 |                 'class': 'attribute',
224 |                 'key': "domain",
225 |                 "value": ext.domain,
226 |                 "start_time": time,
227 |                 "uri": subdomain_uri
228 |             })
229 | 
230 |             # Link domain to mid_domain
231 |             edge_attr = {
232 |                 "relationship": "describedBy",
233 |                 "start_time": time,
234 |                 "origin": "tld_extract",
235 |                 "describedBy":"subdomain"
236 |             }
237 |             source_hash = uuid.uuid3(uuid.NAMESPACE_URL, domain_uri)
238 |             dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, subdomain_uri)
239 |             edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash))
240 |             rel_chain = "relationship"
241 |             while rel_chain in edge_attr:
242 |                 edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain])
243 |                 rel_chain = edge_attr[rel_chain]
244 |             if "origin" in edge_attr:
245 |                 edge_uri += "&{0}={1}".format("origin", edge_attr["origin"])
246 |             edge_attr["uri"] = edge_uri
247 |             g.add_edge(domain_uri, subdomain_uri, edge_uri, edge_attr)
248 | 
249 |         # Link domain to enrichment
250 |         edge_attr = {
251 |             "relationship": "describedBy",
252 |             "start_time": time,
253 |             "origin": "tld_extract"
254 |         }
255 |         source_hash = uuid.uuid3(uuid.NAMESPACE_URL, domain_uri)
256 |         dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, tld_extract_uri)
257 |         edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash))
258 |         rel_chain = "relationship"
259 |         while rel_chain in edge_attr:
260 |             edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain])
261 |             rel_chain = edge_attr[rel_chain]
262 |         if "origin" in edge_attr:
263 |             edge_uri += "&{0}={1}".format("origin", edge_attr["origin"])
264 |         edge_attr["uri"] = edge_uri
265 |         g.add_edge(domain_uri, tld_extract_uri, edge_uri, edge_attr)
266 | 
267 |         return g


--------------------------------------------------------------------------------
/plugins/cymru.py:
--------------------------------------------------------------------------------
  1 | # TODO: Refactor as plugin
  2 | #!/usr/bin/env python
  3 | """
  4 |  AUTHOR: Gabriel Bassett
  5 |  DATE: 12-17-2013
  6 |  DEPENDENCIES: a list of modules requiring installation
  7 |  Copyright 2014 Gabriel Bassett
  8 | 
  9 |  LICENSE:
 10 | Licensed to the Apache Software Foundation (ASF) under one
 11 | or more contributor license agreements.  See the NOTICE file
 12 | distributed with this work for additional information
 13 | regarding copyright ownership.  The ASF licenses this file
 14 | to you under the Apache License, Version 2.0 (the
 15 | "License"); you may not use this file except in compliance
 16 | with the License.  You may obtain a copy of the License at
 17 | 
 18 |   http://www.apache.org/licenses/LICENSE-2.0
 19 | 
 20 | Unless required by applicable law or agreed to in writing,
 21 | software distributed under the License is distributed on an
 22 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 23 | KIND, either express or implied.  See the License for the
 24 | specific language governing permissions and limitations
 25 | under the License.
 26 | 
 27 |  DESCRIPTION:
 28 |  Functions necessary to enrich the context graph
 29 | 
 30 | """
 31 | # PRE-USER SETUP
 32 | from datetime import timedelta
 33 | 
 34 | ########### NOT USER EDITABLE ABOVE THIS POINT #################
 35 | 
 36 | 
 37 | # USER VARIABLES
 38 | CYMRU_CONFIG_FILE = "cymru.yapsy-plugin"
 39 | NAME = 'cymru'
 40 | 
 41 | 
 42 | ########### NOT USER EDITABLE BELOW THIS POINT #################
 43 | 
 44 | 
 45 | 
 46 | ## IMPORTS
 47 | import networkx as nx
 48 | from yapsy.IPlugin import IPlugin
 49 | import logging
 50 | import ConfigParser
 51 | from datetime import datetime # timedelta imported above
 52 | import dateutil  # to parse variable time strings
 53 | import uuid
 54 | import imp
 55 | import ipaddress
 56 | import inspect
 57 | 
 58 | ## SETUP
 59 | 
 60 | __author__ = "Gabriel Bassett"
 61 | loc = inspect.getfile(inspect.currentframe())
 62 | i = loc.rfind("/")
 63 | loc = loc[:i+1]
 64 | config = ConfigParser.SafeConfigParser()
 65 | config.readfp(open(loc + CYMRU_CONFIG_FILE))
 66 | 
 67 | if config.has_section('Core'):
 68 |     if 'name' in config.options('Core'):
 69 |         NAME= config.get('Core', 'name')
 70 | if config.has_section('Configuration') and 'cymru_module' in config.options('Configuration'):
 71 |     cymru_file = config.get('Configuration', 'cymru_module')
 72 |     if cymru_file[0] != "/":
 73 |         cymru_file = loc + cymru_file
 74 |     i = cymru_file.rfind("/")
 75 |     cymru_dir = cymru_file[:i]
 76 |     cymru_module = cymru_file[i+1:].strip(".py")
 77 |     with open("/tmp/output", 'w') as f:
 78 |         f.write(cymru_dir + "\n")
 79 |         f.write(cymru_module + "\n")
 80 |         f.write(cymru_file + "\n")
 81 |     try:
 82 |         fp, pathname, description = imp.find_module(cymru_module, [cymru_dir])
 83 |         cymru_api = imp.load_module(cymru_module, fp, pathname, description)
 84 |         module_import_success = True
 85 |     except:
 86 |         module_import_success = False
 87 |         raise
 88 | else:
 89 |     module_import_success = False
 90 | 
 91 | ## EXECUTION
 92 | class PluginOne(IPlugin):
 93 |     def __init__(self):
 94 |         pass
 95 | 
 96 |     def configure(self):
 97 |         """
 98 | 
 99 |         :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)]
100 |         """
101 |         config_options = config.options("Configuration")
102 | 
103 |         if 'cost' in config_options:
104 |             cost = config.get('Configuration', 'cost')
105 |         else:
106 |             cost = 9999
107 |         if 'speed' in config_options:
108 |             speed = config.get('Configuration', 'speed')
109 |         else:
110 |             speed = 9999
111 | 
112 |         if 'type' in config_options:
113 |             plugin_type = config.get('Configuration', 'type')
114 |         else:
115 |             logging.error("'Type' not specified in config file.")
116 |             return [None, False, NAME, "Takes a list of IPs and returns ASN and BGP information as networkx graph of the information.", None, cost, speed]
117 | 
118 |         if 'inputs' in config_options:
119 |             inputs = config.get('Configuration', 'Inputs')
120 |             inputs = [l.strip().lower() for l in inputs.split(",")]
121 |         else:
122 |             logging.error("No input types specified in config file.")
123 |             return [plugin_type, False, NAME, "Takes a list of IPs and returns ASN and BGP information as networkx graph of the information.", None, cost, speed]
124 | 
125 |         if not module_import_success:
126 |             logging.error("Module import failure caused configuration failure.")
127 |             return [plugin_type, False, NAME, "Takes a list of IPs and returns ASN and BGP information as networkx graph of the information.", inputs, cost, speed]
128 |         else:
129 |             return [plugin_type, True, NAME, "Takes a list of IPs and returns ASN and BGP information as networkx graph of the information.", inputs, cost, speed]
130 | 
131 | 
132 |     def run(self, ips, start_time = ""):
133 |         """ str, str -> networkx multiDiGraph
134 | 
135 |         :param ips: list of IP addresses to enrich in the graph
136 |         :param start_time: string in ISO 8601 combined date and time format (e.g. 2014-11-01T10:34Z) or datetime object.
137 |         :return: subgraph
138 | 
139 |         Note: based on From https://gist.github.com/zakird/11196064
140 |         """
141 | 
142 |         # Parse the start_time
143 |         if type(start_time) is str:
144 |             try:
145 |                 time = dateutil.parser.parse(start_time).strftime("%Y-%m-%dT%H:%M:%SZ")
146 |             except:
147 |                 time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
148 |         elif type(start_time) is datetime:
149 |             time = start_time.strftime("%Y-%m-%dT%H:%M:%SZ")
150 |         else:
151 |             time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
152 | 
153 | 
154 |         # Since sometimes I just pass in an IP, we'll fix it here.
155 |         if type(ips) == str:
156 |             ips = [ips]
157 | 
158 |         # Validate IP
159 |         for ip in ips:
160 |             _ = ipaddress.ip_address(unicode(ip))
161 | 
162 |         g = nx.MultiDiGraph()
163 | 
164 |         # Create cymru ASN enrichment node
165 |         cymru_asn_uri = "class=attribute&key={0}&value={1}".format("enrichment", "cymru_asn_enrichment")
166 |         attributes = {
167 |             'class': 'attribute',
168 |             'key': 'enrichment',
169 |             "value": "cymru_asn_enrichment",
170 |             'uri': cymru_asn_uri,
171 |             'start_time': time
172 |         }
173 |         g.add_node(cymru_asn_uri, attributes)
174 | 
175 |     #    print ips
176 | 
177 |         a = cymru_api.CymruIPtoASNService()
178 | 
179 |         for result in a.query(ips):
180 |             try:
181 |                 t = dateutil.parser(result.allocated_at).strftime("%Y-%m-%dT%H:%M:%SZ")
182 |             except:
183 |                 t = time
184 |             # Create ip's node
185 |             ip_uri = "class=attribute&key={0}&value={1}".format("ip", result.ip_address)
186 |             g.add_node(ip_uri, {
187 |                 'class': 'attribute',
188 |                 'key': "ip",
189 |                 "value": result.ip_address,
190 |                 "start_time": time,
191 |                 "uri": ip_uri
192 |             })
193 | 
194 |             # link to cymru ASN enrichment
195 |             edge_attr = {
196 |                 "relationship": "describedBy",
197 |                 "origin": "cymru_asn_enrichment",
198 |                 "start_time": time,
199 |             }
200 |             source_hash = uuid.uuid3(uuid.NAMESPACE_URL, ip_uri)
201 |             dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, cymru_asn_uri)
202 |             edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash))
203 |             rel_chain = "relationship"
204 |             while rel_chain in edge_attr:
205 |                 edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain])
206 |                 rel_chain = edge_attr[rel_chain]
207 |             if "origin" in edge_attr:
208 |                 edge_uri += "&{0}={1}".format("origin", edge_attr["origin"])
209 |             edge_attr["uri"] = edge_uri
210 |             g.add_edge(ip_uri, cymru_asn_uri, edge_uri, edge_attr)
211 | 
212 | 
213 |             # Create bgp prefix node
214 |             bgp_uri = "class=attribute&key={0}&value={1}".format("bgp", result.bgp_prefix)
215 |             attributes = {
216 |                 'class': 'attribute',
217 |                 'key': 'bgp',
218 |                 'value': result.bgp_prefix,
219 |                 'uri': bgp_uri,
220 |                 'start_time': time
221 |             }
222 |             g.add_node(bgp_uri, attributes)
223 | 
224 |             # Link bgp prefix node to ip
225 |             edge_attr = {
226 |                 "relationship": "describedBy",
227 |                 "origin": "cymru_asn_enrichment",
228 |                 "start_time": time,
229 |             }
230 |             source_hash = uuid.uuid3(uuid.NAMESPACE_URL, ip_uri)
231 |             dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, bgp_uri)
232 |             edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash))
233 |             rel_chain = "relationship"
234 |             while rel_chain in edge_attr:
235 |                 edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain])
236 |                 rel_chain = edge_attr[rel_chain]
237 |             if "origin" in edge_attr:
238 |                 edge_uri += "&{0}={1}".format("origin", edge_attr["origin"])
239 |             edge_attr["uri"] = edge_uri
240 |             g.add_edge(ip_uri, bgp_uri, edge_uri, edge_attr)
241 | 
242 | 
243 |             # create asn node
244 |             asn_uri = "class=attribute&key={0}&value={1}".format("asn", result.as_number)
245 |             attributes = {
246 |                 'class': 'attribute',
247 |                 'key': 'asn',
248 |                 'value': result.as_number,
249 |                 'uri': asn_uri,
250 |                 'start_time': time
251 |             }
252 |             try:
253 |                 attributes['owner'] = result.as_name
254 |             except:
255 |                 pass
256 |             g.add_node(asn_uri, attributes)
257 | 
258 |             # link bgp prefix to asn node
259 |             edge_attr = {
260 |                 "relationship": "describedBy",
261 |                 "origin": "cymru_asn_enrichment",
262 |                 "start_time": t,
263 |             }
264 |             source_hash = uuid.uuid3(uuid.NAMESPACE_URL, ip_uri)
265 |             dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, asn_uri)
266 |             edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash))
267 |             rel_chain = "relationship"
268 |             while rel_chain in edge_attr:
269 |                 edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain])
270 |                 rel_chain = edge_attr[rel_chain]
271 |             if "origin" in edge_attr:
272 |                 edge_uri += "&{0}={1}".format("origin", edge_attr["origin"])
273 |             edge_attr["uri"] = edge_uri
274 |             g.add_edge(ip_uri, asn_uri, edge_uri, edge_attr)
275 | 
276 | 
277 |         # Return the data enriched IP as a graph
278 |         return g
279 | 


--------------------------------------------------------------------------------
/examples/plugin_template.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | __author__ = "Gabriel Bassett"
  4 | """
  5 |  AUTHOR: {0}
  6 |  DATE: <DATE>
  7 |  DEPENDENCIES: <a list of modules requiring installation>
  8 |  Copyright <YEAR> {0}
  9 | 
 10 |  LICENSE:
 11 | Licensed to the Apache Software Foundation (ASF) under one
 12 | or more contributor license agreements.  See the NOTICE file
 13 | distributed with this work for additional information
 14 | regarding copyright ownership.  The ASF licenses this file
 15 | to you under the Apache License, Version 2.0 (the
 16 | "License"); you may not use this file except in compliance
 17 | with the License.  You may obtain a copy of the License at
 18 | 
 19 |   http://www.apache.org/licenses/LICENSE-2.0
 20 | 
 21 | Unless required by applicable law or agreed to in writing,
 22 | software distributed under the License is distributed on an
 23 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 24 | KIND, either express or implied.  See the License for the
 25 | specific language governing permissions and limitations
 26 | under the License.
 27 | 
 28 |  DESCRIPTION:
 29 |  <ENTER DESCRIPTION>
 30 | 
 31 | """.format(__author__)
 32 | # PRE-USER SETUP
 33 | pass
 34 | 
 35 | ########### NOT USER EDITABLE ABOVE THIS POINT #################
 36 | 
 37 | 
 38 | # USER VARIABLES
 39 | PLUGIN_CONFIG_FILE = "plugin_template.yapsy-plugin"  # CHANGEME
 40 | NAME = "<NAME FROM CONFIG FILE AS BACKUP IF CONFIG FILE DOESN'T LOAD>"  # CHANGEME
 41 | 
 42 | 
 43 | ########### NOT USER EDITABLE BELOW THIS POINT #################
 44 | 
 45 | 
 46 | ## IMPORTS
 47 | from yapsy.IPlugin import IPlugin
 48 | import logging
 49 | import networkx as nx
 50 | from datetime import datetime # timedelta imported above
 51 | import uuid
 52 | import ConfigParser
 53 | import inspect
 54 | import threading
 55 | """
 56 | try:
 57 |     import <SOME UNIQUE MODULE>
 58 |     module_import_success = True
 59 | except:
 60 |     module_import_success = False
 61 |     logging.error("Module import failed.  Please install the following module: <SOME UNIQUE MODULE>.")
 62 | """
 63 | 
 64 | ## SETUP
 65 | loc = inspect.getfile(inspect.currentframe())
 66 | ind = loc.rfind("/")
 67 | loc = loc[:ind+1]
 68 | config = ConfigParser.SafeConfigParser()
 69 | config.readfp(open(loc + PLUGIN_CONFIG_FILE))
 70 | 
 71 | if config.has_section('Core'):
 72 |     if 'name' in config.options('Core'):
 73 |         NAME = config.get('Core', 'name')
 74 | if config.has_section('Log'):
 75 |     if 'level' in config.options('Log'):
 76 |         LOGLEVEL = config.get('Log', 'level')
 77 |     if 'file' in config.options('Log'):
 78 |         LOGFILE = config.get('Log', 'file')
 79 | 
 80 | 
 81 | ## EXECUTION
 82 | class PluginOne(IPlugin):
 83 |     inputs = None
 84 |     shutdown = False  # Used to trigger shutdown of a minion
 85 | 
 86 |     #  CHANGEME: The init should contain anything to load modules or data files that should be variables of the  plugin object
 87 |     def __init__(self):
 88 |         pass
 89 | 
 90 |     #  CHANGEME: Configuration needs to set the values needed to identify the plugin in the plugin database as well as ensure everyhing loaded correctly
 91 |     #  CHANGEME: Current  layout is for an enrichment plugin
 92 |     #  CHANGEME: enrichment [type, successful_load, name, description, inputs to enrichment such as 'ip', cost, speed]
 93 |     #  CHANGEME: interface [type, successful_load, name]
 94 |     #  CHANGEME: score [type, successful_load, name, description, cost, speed]
 95 |     #  CHANGEME: minion [type, successful_load, name, description, cost]
 96 |     def configure(self):
 97 |         """
 98 | 
 99 |         :return: return list of configuration variables starting with [plugin_type, successful_load, name, description, <PLUGIN TYPE SPECIFIC VALUES>]
100 |         """
101 |         config_options = config.options("Configuration")
102 | 
103 |         # Cost and speed are not applicable to all plugin types
104 |         """
105 |         if 'cost' in config_options:
106 |             cost = config.get('Configuration', 'cost')
107 |         else:
108 |             cost = 9999
109 |         if 'speed' in config_options:
110 |             speed = config.get('Configuration', 'speed')
111 |         else:
112 |             speed = 9999
113 |         """
114 | 
115 |         if config.has_section('Documentation') and 'description' in config.options('Documentation'):
116 |             description = config.get('Configuration', 'type')
117 |         else:
118 |             logging.error("'Description not in config file.")
119 |             return [None, False, NAME, None, cost, speed]
120 | 
121 |         if 'type' in config_options:
122 |             plugin_type = config.get('Configuration', 'type')
123 |         else:
124 |             logging.error("'Type' not specified in config file.")
125 |             return [None, False, NAME, description, None, cost, speed]
126 | 
127 |         # Inputs is only applicable to enrichment plugins
128 |         """
129 |         if 'inputs' in config_options:
130 |             self.inputs = config.get('Configuration', 'Inputs')
131 |             self.inputs = [l.strip().lower() for l in self.inputs.split(",")]
132 |         else:
133 |             logging.error("No input types specified in config file.")
134 |             return [plugin_type, False, NAME, description, None, cost, speed]
135 |         """
136 | 
137 |         # Module success is only applicable to plugins which import unique code
138 |         """
139 |         if not module_import_success:
140 |             logging.error("Module import failure caused configuration failure.")
141 |             return [plugin_type, False, NAME, description, self.inputs, cost, speed]
142 |         """
143 | 
144 |         return [plugin_type, True, NAME, description, self.inputs, cost, speed]
145 | 
146 | 
147 |     ############  GENERAL NOTES ############
148 |     #  CHANGEME: All functions must implement a "configuration()" function
149 |     #  CHANGEME: The correct type of execution function must be defined for the type of plugin
150 |     ############  GENERAL NOTES ############
151 | 
152 | 
153 |     #  CHANGEME: enrichment: "run(<thing to enrich>, inputs, start_time, any other plugin-specific attributes-MUST HAVE DEFAULTS)
154 |     #  CHANGEME: Enrichment plugin specifics:
155 |     #  -     Created nodes/edges must follow http://blog.infosecanalytics.com/2014/11/cyber-attack-graph-schema-cags-20.html
156 |     #  -     The enrichment should include a node for the <thing to enrich>
157 |     #  -     The enrichment should include a node for the enrichment which is is statically defined & key of "enrichment"
158 |     #  -     An edge should exist from <thing to enrich> to the enrichment node, created at the end after enrichment
159 |     #  -     Each enrichment datum should have a node
160 |     #  -     An edge should exist from <thing to enrich> to each enrichment datum
161 |     #  -     The run function should then return a networkx directed multi-graph including the nodes and edges
162 |     def run(self, enrichment_target, inputs=None, start_time=""):
163 |         """
164 | 
165 |         :param enrichment_target: a string containing a target to enrich
166 |         :return: a networkx graph representing the sections of the domain
167 |         """
168 | 
169 | 
170 |         pass  # TODO: Place enrichment in here
171 | 
172 |         return g
173 | 
174 | 
175 |     #  CHANGEME: interface: enrich(graph, any other plugin-specific attributes-MUST HAVE DEFAULTS)
176 |     #  CHANGEME:            query(topic, max_depth, config, dont_follow, any other plugin-specific attributes-MUST HAVE DEFAULTS)
177 |     #  CHANGEME: Interface plugin specifics:
178 |     #  -     In the most efficient way possible, merge nodes and edges into the storage medium
179 |     #  -     Merger of nodes should be done based on matching key & value.
180 |     #  -     URI should remain static for a given node.
181 |     #  -     Start time should be updated to the sending graph
182 |     #  -     Edges should be added w/o attempts to merge with edges in the storage back end
183 |     #  -     When adding nodes it is highly recommended to keep a node-to-storage-id mapping with a key of the node
184 |     #  -       URI.  This will assist in bulk-adding the edges.
185 |     #  -     Query specifics of interface plugins:
186 |     #  -     In the most efficient way possible retrieve and return the merged subgraph (as a networkx graph) including all nodes and 
187 |     #  -     edges within the max_distance from any node in the topic graph from the storage backend graph.
188 |     #  -     As a default, ['enrichment', 'classification'] should not be followed.
189 |     #  -     The query function must add a 'topic_distance' property to all nodes.
190 |    def enrich(self, g):
191 |         """
192 | 
193 |         :param g: networkx graph to be merged
194 |         :return: Nonetype
195 |         """
196 |         pass  # TODO: Replace this with storage into a backend storage system
197 | 
198 | 
199 |     #  CHANGEME: score: score(subgraph, topic, any other plugin-specific attributes-MUST HAVE DEFAULTS)
200 |     #  CHANGEME: Score plugin specifics:
201 |     #  -     Scoring plugins should take a topic and networkx (sub)graph and return a dictionary keyed with the node (name) and with
202 |     #  -     values of the score assigned to the node for the given topic.
203 |     def score(self, sg, topic):  # get_bayesian_network_probability
204 |         """
205 | 
206 |         :param sg: egocentric subgraph around topic in networkx format
207 |         :param topic: graph of topics
208 |         :return: Dictionary of probabilities keyed by node
209 |         """
210 |         scores = dict() 
211 | 
212 |         pass  # TODO: Replace with code to score the subgraph with respect to the topic
213 | 
214 |         return scores
215 | 
216 | 
217 | 
218 |     #  CHANGEME: minion: minion() 
219 |     #  CHANGEME:        start() 
220 |     #  CHANGEME:        stop()
221 |     #  CHANGEME:        isAlive()
222 |     #  CHANGEME: Minion plugin specifics:
223 |     #  -     Minions fit exist in a separate directory to prevent them importing themselves when they import their own VERUM instance
224 |     #  -     The minion configuration function must take an argument of the parent verum object.  When not present, it shouldn't error but
225 |     #  -      instead return with successful_load set to false and a logging.info message that the parent was not passed in.
226 |     #  -     Must have 4 functions: minion(), start(), and stop() and isAlive()
227 |     #  -     minion() is the function which will be threaded.  **Make sure to call create the new verum instance WITHIN this function
228 |     #  -      to avoid SQLite errors!**
229 |     #  -     start() creates the thread object as an attribute of the plugin class and starts it
230 |     #  -     stop() stops the thread.  Preferably with both a normal exit by setting a shutdown variable of the plugin class as well as a 
231 |     #  -      force stop option which removes the thread object
232 |     #  -     isAlive() calls the thread isAlive() function and returns the status
233 |     def minion(self,  *args, **xargs):
234 |         self.shutdown = False
235 |         
236 |         pass  # TODO: Write the function which will be threaded to form the minion
237 | 
238 |     def start(self, *args, **xargs):
239 |         self.thread = threading.Thread(target=self.minion, *args, **xargs)
240 |         self.thread.start()
241 | 
242 |     def isAlive(self):
243 |         if self.thread is None:
244 |             return False
245 |         else:
246 |             return self.thread.isAlive()
247 | 
248 |     def stop(self, force=True):
249 |         if force:
250 |             self.thread = None  # zero out thread
251 |         else:
252 |             self.shutdown = False  # just dont' iterate.  May take up to (SLEEP_TIME) hours
253 | 
254 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 
203 | 


--------------------------------------------------------------------------------
/plugins/bayes_net.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | __author__ = "Gabriel Bassett"
  4 | """
  5 |  AUTHOR: {0}
  6 |  DATE: <DATE>
  7 |  DEPENDENCIES: <a list of modules requiring installation>
  8 |  Copyright <YEAR> {0}
  9 | 
 10 |  LICENSE:
 11 | Licensed to the Apache Software Foundation (ASF) under one
 12 | or more contributor license agreements.  See the NOTICE file
 13 | distributed with this work for additional information
 14 | regarding copyright ownership.  The ASF licenses this file
 15 | to you under the Apache License, Version 2.0 (the
 16 | "License"); you may not use this file except in compliance
 17 | with the License.  You may obtain a copy of the License at
 18 | 
 19 |   http://www.apache.org/licenses/LICENSE-2.0
 20 | 
 21 | Unless required by applicable law or agreed to in writing,
 22 | software distributed under the License is distributed on an
 23 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 24 | KIND, either express or implied.  See the License for the
 25 | specific language governing permissions and limitations
 26 | under the License.
 27 | 
 28 |  DESCRIPTION:
 29 |  <ENTER DESCRIPTION>
 30 | 
 31 | """.format(__author__)
 32 | # PRE-USER SETUP
 33 | pass
 34 | 
 35 | ########### NOT USER EDITABLE ABOVE THIS POINT #################
 36 | 
 37 | 
 38 | # USER VARIABLES
 39 | PLUGIN_CONFIG_FILE = "bayes_net.yapsy-plugin"
 40 | NAME = "BayesNet"
 41 | 
 42 | 
 43 | ########### NOT USER EDITABLE BELOW THIS POINT #################
 44 | 
 45 | 
 46 | ## IMPORTS
 47 | from yapsy.IPlugin import IPlugin
 48 | import logging
 49 | import networkx as nx
 50 | from datetime import datetime # timedelta imported above
 51 | import uuid
 52 | import ConfigParser
 53 | import inspect
 54 | from collections import defaultdict
 55 | import random
 56 | import numpy as np
 57 | 
 58 | 
 59 | ## SETUP
 60 | loc = inspect.getfile(inspect.currentframe())
 61 | ind = loc.rfind("/")
 62 | loc = loc[:ind+1]
 63 | config = ConfigParser.SafeConfigParser()
 64 | config.readfp(open(loc + PLUGIN_CONFIG_FILE))
 65 | 
 66 | if config.has_section('Core'):
 67 |     if 'name' in config.options('Core'):
 68 |         NAME = config.get('Core', 'name')
 69 | if config.has_section('Log'):
 70 |     if 'level' in config.options('Log'):
 71 |         LOGLEVEL = config.get('Log', 'level')
 72 |     if 'file' in config.options('Log'):
 73 |         LOGFILE = config.get('Log', 'file')
 74 | 
 75 | 
 76 | ## EXECUTION
 77 | class PluginOne(IPlugin):
 78 |     #  TODO: The init should contain anything to load modules or data files that should be variables of the  plugin object
 79 |     def __init__(self):
 80 |         pass
 81 | 
 82 |     #  TODO: Configuration needs to set the values needed to identify the plugin in the plugin database as well as ensure everyhing loaded correctly
 83 |     #  TODO: Current  layout is for an enrichment plugin
 84 |     #  TODO: enrichment [type, successful_load, name, description, inputs to enrichment such as 'ip', cost, speed]
 85 |     #  TODO: interface [type, successful_load, name]
 86 |     #  TODO: query [TBD]
 87 |     #  TODO: minion [TBD]
 88 |     def configure(self):
 89 |         """
 90 | 
 91 |         :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)]
 92 |         """
 93 |         config_options = config.options("Configuration")
 94 | 
 95 |         if 'cost' in config_options:
 96 |             cost = config.get('Configuration', 'cost')
 97 |         else:
 98 |             cost = 9999
 99 |         if 'speed' in config_options:
100 |             speed = config.get('Configuration', 'speed')
101 |         else:
102 |             speed = 9999
103 | 
104 |         if config.has_section('Documentation') and 'description' in config.options('Documentation'):
105 |             description = config.get('Documentation', 'description')
106 |         else:
107 |             logging.error("'Description not in config file.")
108 |             return [None, False, NAME, None, cost, speed]
109 | 
110 |         if 'type' in config_options:
111 |             plugin_type = config.get('Configuration', 'type')
112 |         else:
113 |             logging.error("'Type' not specified in config file.")
114 |             return [None, False, NAME, description, cost, speed]
115 | 
116 |         return [plugin_type, True, NAME, description, cost, speed]
117 | 
118 | 
119 |     def score(self, sg, topic):  # get_bayesian_network_probability
120 |         """
121 | 
122 |         :param sg: egocentric subgraph around topic in networkx format
123 |         :param topic: graph of topics
124 |         :return: Dictionary of probabilities keyed by node
125 | 
126 |         NOTE: Will error on cycles in graph
127 |         """
128 |         # Calculate the probability of each node given the topic nodes
129 |         # TODO: Capture the context of relationships as well
130 |         # TODO: Handle loops more elegantly than failing
131 |         # TODO: handle the markov blanket
132 | 
133 |         # setup
134 |         confidences = nx.get_edge_attributes(sg, 'confidence')
135 |         probabilities = defaultdict(lambda: 0)
136 |         queue = list()
137 |         complete_history = random.sample(xrange(10000), 1000)
138 |         complete = set()
139 | 
140 |         for node in topic.nodes():
141 |             probabilities[node] = 1  # The topic nodes are by definition true
142 |             complete.add(node)  # The topic nodes are by definition complete
143 |         for node in sg.nodes():
144 |             for successor in sg.successors(node):
145 |                 queue.append(successor)
146 |         print "Starting probability loop"
147 |         while len(queue) > 0:
148 |             temp = complete_history.pop(0)
149 |             complete_history.append(len(complete))
150 |             if len(set(complete_history)) < 2:
151 |                 print "Error, nothing completed in 1000 rounds."
152 |                 print "Queue length is {0} with {1} unique values".format(len(queue), len(set(queue)))
153 |                 print "Complete is\n{0}".format(len(complete))
154 |                 break
155 |             node = queue.pop(0)
156 |             if node not in complete:  # Only
157 |                 ready_to_calculate = True
158 |                 for predecessor in sg.predecessors(node):
159 |                     if predecessor not in complete:
160 |                         queue.append(predecessor)  # if the node is not complete, enqueue it
161 |                         ready_to_calculate = False  # before we can complete a node, it's predecessors must be complete
162 |                 if ready_to_calculate:
163 |                     try:
164 |                         # INSERT WEIGHTING FUNCTION BELOW
165 |                         cpt = np.array(self.normal_weight(sg.node[node]['topic_distance']))
166 |                     except Exception as e:
167 |                         print "Node: {0}, Attributes: {1}".format(node, sg.node[node])
168 |                         raise e
169 |                     for predecessor in sg.predecessors(node):
170 |                         # If an edge has a confidence, we use it.  Otherwise we assume 100%
171 |                         if (predecessor, node) in confidences:
172 | 
173 |                             confidence = confidences[(predecessor, node)]
174 |                         else:
175 |                             confidence = 1
176 |                     # Calculate the probability based on the bayesian network
177 |                     # Reference: http://cs.nyu.edu/faculty/davise/ai/bayesnet.html
178 |                     # Reference: http://en.wikipedia.org/wiki/Bayes'_theorem
179 |                     # Reference: http://en.wikipedia.org/wiki/Bayesian_network
180 |                     for i in range(2**len(sg.predecessors(node))):
181 |                         # double the rows
182 |                         cpt = np.vstack((cpt, cpt))
183 |                         # create a list that is first half the compliment of the probability and second half the probability
184 |                         new_col = []
185 |                         for j in range(cpt.shape[0]):
186 |                             if j < cpt.shape[0] / float(2):
187 |                                 new_col.append(1 - (confidence * probabilities[predecessor]))
188 |                             else:
189 |                                 new_col.append(confidence * probabilities[predecessor])
190 |                         # Add that column to the CPT
191 |                         cpt = np.column_stack((cpt, new_col))
192 | 
193 |                     # Remove first (all false) row as it should not be summed into the probability
194 |                     #  This is in leu of making the prior probability zero for that row
195 |                     cpt = np.delete(cpt, (0), axis=0)
196 | 
197 |                     # sum the product of each column to get the node probability
198 |                     probabilities[node] = cpt.prod(axis=1).sum()
199 |                     queue = queue + sg.successors(node)  # queue successors to the node
200 |                     complete.add(node)  # add the node as completed
201 | 
202 |                 else:  # It's not ready to be completed
203 |                     queue.append(node)  # requeue the node after it's predecessors
204 | 
205 |         return probabilities
206 | 
207 | 
208 |     def multigraph_to_digraph(self, g):
209 |         """
210 | 
211 |         :param g: takes a networkx mulitgraph
212 |         :return: returns a networkx digraph with edge weights representing the number of edges
213 | 
214 |         NOTE: This butchers duplicate edge properties.  If converting to score, use original edges in output.
215 |         """
216 |         G = nx.DiGraph()
217 |         edge_attributes = {}
218 | 
219 |         # if g isn't really a multigraph, just return it
220 |         if not g.is_multigraph():
221 |             return g
222 | 
223 |         # collapse down to a diagraph
224 |         G.add_nodes_from(g.nodes(data=True))
225 |         G.add_edges_from(g.edges(data=True))
226 | 
227 |         # for each edge, weight the confidence by the number of edges
228 |         '''
229 |         # captures a multiple of the confidence on the edge in the output graph
230 |         for edge in G.edges():
231 |             count = g.edges().count(edge)
232 |             if "count" > 1:
233 |                 if "confidence" in G.edge[edge[0]][edge[1]]:
234 |                     G.edge[edge[0]][edge[1]]['confidence'] *= count
235 |                 else:
236 |                     G.edge[edge[0]][edge[1]]["confidence"] = count
237 |         '''
238 |         # Captures every confidence
239 |         for edge in G.edges():
240 |             confidence = 0
241 |             for src_edge in g.edge[edge[0]][edge[1]].values():
242 |                 confidence += src_edge.get('confidence', 1)
243 |             G.edge[edge[0]][edge[1]]['confidence'] = confidence
244 |     #    # collapse down to a diagraph
245 |     #    G.add_nodes_from(g.nodes(data=True))
246 |     #    G.add_edges_from(g.edges(data=True))
247 | 
248 |         return G
249 | 
250 | 
251 |     ### DISTANCE WEIGHTS ###
252 |     def linear_weight(self, distance, ddp=.2):
253 |         """
254 | 
255 |         :param distance: distance from topic
256 |         :param ddp: percentage to degrade
257 |         :return: Linear weighting factor as float
258 |         """
259 |         return 1 - (distance * ddp)
260 | 
261 | 
262 |     def log_weight(self, distance, a=1, b=1, n=3, pwr=1):
263 |         """
264 | 
265 |         :param distance: distance: distance from topic
266 |         :param a: constant to shape graph. Adjusts hight at 0 = a / (1 + b)
267 |         :param b: constant to shape graph.
268 |         :param n: constant to shape graph.
269 |         :param pwr: constant to shape graph.
270 |         :return: log weighting factor as float
271 |         """
272 |         return a / (1 + b*np.exp((distance-n) * pwr))
273 | 
274 | 
275 |     def exponential_weight(self, distance, b=2):
276 |         return np.exp(-distance/b)
277 | 
278 | 
279 |     def normal_weight(self, distance, pwr=2, a=1.1, b=10, c=1):
280 |         """
281 | 
282 |         :param distance: distance from topic
283 |         :param pwr: constant to shape graph.  Higher = steeper decline
284 |         :param b: constant to shape graph.  lower = greater spread
285 |         :return: normal weighting factor as float
286 |         pwr = 2.5, a = 1, c = 0, b = 30
287 |         """
288 |         return a * np.exp(-(distance + c)**pwr/b)


--------------------------------------------------------------------------------
/minions/alexa_1M.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | __author__ = "Gabriel Bassett"
  4 | """
  5 |  AUTHOR: {0}
  6 |  DATE: <DATE>
  7 |  DEPENDENCIES: <a list of modules requiring installation>
  8 |  Copyright <YEAR> {0}
  9 | 
 10 |  LICENSE:
 11 | Licensed to the Apache Software Foundation (ASF) under one
 12 | or more contributor license agreements.  See the NOTICE file
 13 | distributed with this work for additional information
 14 | regarding copyright ownership.  The ASF licenses this file
 15 | to you under the Apache License, Version 2.0 (the
 16 | "License"); you may not use this file except in compliance
 17 | with the License.  You may obtain a copy of the License at
 18 | 
 19 |   http://www.apache.org/licenses/LICENSE-2.0
 20 | 
 21 | Unless required by applicable law or agreed to in writing,
 22 | software distributed under the License is distributed on an
 23 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 24 | KIND, either express or implied.  See the License for the
 25 | specific language governing permissions and limitations
 26 | under the License.
 27 | 
 28 |  DESCRIPTION:
 29 |  <ENTER DESCRIPTION>
 30 | 
 31 | """.format(__author__)
 32 | # PRE-USER SETUP
 33 | from datetime import timedelta
 34 | 
 35 | ########### NOT USER EDITABLE ABOVE THIS POINT #################
 36 | 
 37 | 
 38 | # USER VARIABLES
 39 | PLUGIN_CONFIG_FILE = "alexa_1M.yapsy-plugin"
 40 | NAME = "Alexa Top 1M"
 41 | FEED = "http://s3.amazonaws.com/alexa-static/top-1m.csv.zip"
 42 | SLEEP_TIME = 14400  # 4 hours in seconds
 43 | REFRESH_TIME = timedelta(days=7)
 44 | 
 45 | ########### NOT USER EDITABLE BELOW THIS POINT #################
 46 | 
 47 | 
 48 | ## IMPORTS
 49 | from yapsy.IPlugin import IPlugin
 50 | import logging
 51 | import networkx as nx
 52 | from datetime import datetime # timedelta imported above
 53 | import dateutil
 54 | import uuid
 55 | import ConfigParser
 56 | import inspect
 57 | import requests  # for downloading the intel list
 58 | import ipaddress  # for validating ip addresses
 59 | import time  # for sleep
 60 | import threading  # import threading so minion doesn't block the app
 61 | import imp  # Importing imp to import verum
 62 | import copy
 63 | import tldextract  # used for validating domains
 64 | import zipfile  # for unzipping alexis 1m file
 65 | from StringIO import StringIO  # for opening alexis 1m file in memory
 66 | 
 67 | 
 68 | ## SETUP
 69 | loc = inspect.getfile(inspect.currentframe())
 70 | ind = loc.rfind("/")
 71 | loc = loc[:ind+1]
 72 | config = ConfigParser.SafeConfigParser()
 73 | config.readfp(open(loc + PLUGIN_CONFIG_FILE))
 74 | 
 75 | if config.has_section('Core'):
 76 |     if 'name' in config.options('Core'):
 77 |         NAME = config.get('Core', 'name')
 78 | if config.has_section('Log'):
 79 |     if 'level' in config.options('Log'):
 80 |         LOGLEVEL = config.get('Log', 'level')
 81 |     if 'file' in config.options('Log'):
 82 |         LOGFILE = config.get('Log', 'file')
 83 |     else:
 84 |         LOGFILE = None
 85 | 
 86 | if LOGFILE:
 87 |     logging.basicConfig(filename=LOGFILE, level=LOGLEVEL)
 88 | else:
 89 |     logging.basicConfig(level=LOGLEVEL)
 90 | 
 91 | ## EXECUTION
 92 | class PluginOne(IPlugin):
 93 |     thread = None
 94 |     app = None  # The object instance
 95 |     Verum = None  # the module
 96 |     today = datetime.strptime("1970", "%Y")  # Today's date
 97 |     shutdown = False  # Used to trigger shutdown of the minion
 98 |     parent = None  # The parent instance of the verum app object
 99 | 
100 |     #  CHANGEME: The init should contain anything to load modules or data files that should be variables of the  plugin object
101 |     def __init__(self):
102 |         """
103 | 
104 |         """
105 |         pass
106 | 
107 |     #  CHANGEME: Configuration needs to set the values needed to identify the plugin in the plugin database as well as ensure everyhing loaded correctly
108 |     #  CHANGEME: Current  layout is for an enrichment plugin
109 |     #  CHANGEME: enrichment [type, successful_load, name, description, inputs to enrichment such as 'ip', cost, speed]
110 |     #  CHANGEME: interface [type, successful_load, name]
111 |     #  CHANGEME: score [type, successful_load, name, description, cost, speed]
112 |     #  CHANGEME: minion [TBD]
113 |     def configure(self, parent=None):
114 |         """
115 | 
116 |         :param verum: The directory of the verum module 
117 |         :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)]
118 |         """
119 |         global FEED
120 | 
121 |         config_options = config.options("Configuration")
122 | 
123 |         if 'cost' in config_options:
124 |             cost = config.get('Configuration', 'cost')
125 |         else:
126 |             cost = 9999
127 | 
128 |         if config.has_section('Documentation') and 'description' in config.options('Documentation'):
129 |             description = config.get('Documentation', 'description')
130 |         else:
131 |             logging.error("'Description not in config file.")
132 |             return [None, False, NAME, None, cost]
133 | 
134 |         if 'type' in config_options:
135 |             plugin_type = config.get('Configuration', 'type')
136 |         else:
137 |             logging.error("'Type' not specified in config file.")
138 |             return [None, False, NAME, description, cost]
139 | 
140 |         #  Module import success
141 |         if parent is not None:
142 |             self.parent = parent
143 |         else:
144 |             logging.info("Parent verum app instance not passed to minion.  Please rerun, passing the parent object instance to successfully configure.")
145 |             return [plugin_type, False, NAME, description, cost]
146 | 
147 |         if self.parent.loc is not None:
148 |             # Import the app object so that acces app features (such as the storage backend) can be used.
149 |             fp, pathname, mod_description = imp.find_module("verum", [self.parent.loc])
150 |             self.Verum = imp.load_module("verum", fp, pathname, mod_description)
151 |         else:
152 |             logging.error("'verum' location not supplied to minion configuration function.  Rerun with the location of the verum module specified.")
153 |             return [plugin_type, False, NAME, description, cost]
154 | 
155 |         if 'feed' in config_options:
156 |             FEED = config.get('Configuration', 'feed')
157 |         else:
158 |             logging.error("'Feed' not specified in config file.")
159 |             return [plugin_type, False, NAME, description, cost]
160 | 
161 |         # Return success
162 |         return [plugin_type, True, NAME, description, cost]
163 | 
164 | 
165 |     def minion(self,  storage=None, *args, **xargs):        
166 |         self.app = self.Verum.app(self.parent.PluginFolder, None)
167 |         # set storage
168 |         if storage is None:
169 |             storage = self.parent.storage
170 |         self.app.set_interface(storage)
171 | 
172 |         # Check until stopped
173 |         while not self.shutdown:
174 |             # Check to see if it's the same day, if it is, sleep for a while, otherwise run the import
175 | #            delta = datetime.utcnow() - self.today
176 | #            if delta.days <= 0:
177 |             if datatime.utcnow() <= self.today + REFRESH_TIME:
178 |                 time.sleep(SLEEP_TIME)
179 |             else:
180 |                 logging.info("Starting daily {0} enrichment.".format(NAME))
181 | 
182 |                # Create list of IPs for cymru enrichment
183 |                 ips = set()
184 | 
185 |                 # Get the file
186 |                 r = requests.get(FEED)
187 | 
188 |                 # Unzip the file
189 |                 z = zipfile.ZipFile(StringIO(r.content))
190 | 
191 |                 # get the time
192 |                 dt = datetime.utcnow()
193 | 
194 |                 with z.open('top-1m.csv') as f:
195 |                     for line in f:
196 |                         try:
197 |                             line = line.strip().split(",")
198 | 
199 |                             # Validate data in row
200 |                             ext = tldextract.extract(line[1])
201 |                             if not ext.domain or not ext.suffix:
202 |                                 # domain is not legitimate
203 |                                 next
204 | 
205 |                             # classify benign and merge with current graph
206 |                             g =  self.app.classify.run({'key': 'domain', 'value': line[1], 'classification': 'benign'}, confidence=1 - (int(line[0])-1)/float(1000000))
207 | 
208 |                             # enrich depending on type
209 |                             try:
210 |                                 g = self.Verum.merge_graphs(g, self.app.run_enrichments(line[1], "domain", names=['TLD Enrichment']))
211 |                                 g = self.Verum.merge_graphs(g, self.app.run_enrichments(line[1], "domain", names=['DNS Enrichment']))
212 |                                 g = self.Verum.merge_graphs(g, self.app.run_enrichments(line[1], "domain", names=['IP Whois Enrichment']))
213 |                             except Exception as e:
214 |                                 logging.info("Enrichment of {0} failed due to {1}.".format(line[1], e))
215 |                                 #print "Enrichment of {0} failed due to {1}.".format(domain, e)  # DEBUG
216 |                                 #raise
217 |                                 pass
218 | 
219 |                             # Collect IPs
220 |                             line_ips = set()
221 |                             for node, data in g.nodes(data=True):
222 |                                 if data['key'] == 'ip':
223 |                                     line_ips.add(data['value']) 
224 | 
225 |                             for ip in line_ips:
226 |                                 try:
227 |                                     g = self.Verum.merge_graphs(g, self.app.run_enrichments(ip, "ip", names=[u'Maxmind ASN Enrichment']))
228 |                                 except Exception as e:
229 |                                     logging.info("Enrichment of {0} failed due to {1}.".format(ip, e))
230 |                                     pass
231 | 
232 |                             try:
233 |                                 self.app.store_graph(self.Verum.remove_non_ascii_from_graph(g))
234 |                             except:
235 |                                 print g.nodes(data=True)  # DEBUG
236 |                                 print g.edges(data=True)  # DEBUG
237 |                                 raise
238 | 
239 |                             ips = ips.union(line_ips)
240 |                             # Do cymru enrichment
241 |                             if len(ips) >= 50:
242 |                                 # validate IPs
243 |                                 ips2 = set()
244 |                                 for ip in ips:
245 |                                     try:
246 |                                         _ = ipaddress.ip_address(unicode(ip))
247 |                                         ips2.add(ip)
248 |                                     except:
249 |                                         pass
250 |                                 ips = ips2
251 |                                 del(ips2)
252 |                                 try:
253 |                                     self.app.store_graph(self.app.run_enrichments(ips, 'ip', names=[u'Cymru Enrichment']))
254 |                                     #print "Cymru enrichment complete."
255 |                                 except Exception as e:
256 |                                     logging.info("Cymru enrichment of {0} IPs failed due to {1}.".format(len(ips), e))
257 |                                     #print "Cymru enrichment of {0} IPs failed due to {1}.".format(len(ips), e)  # DEBUG
258 |                                     pass
259 |                                 ips = set()
260 | 
261 |                         except Exception as e:
262 |                             print line
263 |                             print e
264 |                             raise
265 | 
266 |                 # Copy today's date to today
267 |                 self.today = datetime.utcnow()
268 | 
269 |                 logging.info("Daily {0} enrichment complete.".format(NAME))
270 |                 print "Daily {0} enrichment complete.".format(NAME)  # DEBUG
271 | 
272 |     def start(self, *args, **xargs):
273 |         self.shutdown = False
274 |         self.thread = threading.Thread(target=self.minion, *args, **xargs)
275 |         self.thread.start()
276 | 
277 |     def isAlive(self):
278 |         if self.thread is None:
279 |             return False
280 |         else:
281 |             return self.thread.isAlive()
282 | 
283 | 
284 |     def stop(self, force=True):
285 |         if force:
286 |             self.thread = None  # zero out thread
287 |         else:
288 |             self.shutdown = False  # just dont' iterate.  May take up to (SLEEP_TIME) hours
289 | 


--------------------------------------------------------------------------------
/verum/helper.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 |  AUTHOR: Gabriel Bassett
  4 |  DATE: <01-23-2015>
  5 |  DEPENDENCIES: <a list of modules requiring installation>
  6 |  Copyright 2015 Gabriel Bassett
  7 | 
  8 |  LICENSE:
  9 | Licensed to the Apache Software Foundation (ASF) under one
 10 | or more contributor license agreements.  See the NOTICE file
 11 | distributed with this work for additional information
 12 | regarding copyright ownership.  The ASF licenses this file
 13 | to you under the Apache License, Version 2.0 (the
 14 | "License"); you may not use this file except in compliance
 15 | with the License.  You may obtain a copy of the License at
 16 | 
 17 |   http://www.apache.org/licenses/LICENSE-2.0
 18 | 
 19 | Unless required by applicable law or agreed to in writing,
 20 | software distributed under the License is distributed on an
 21 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 22 | KIND, either express or implied.  See the License for the
 23 | specific language governing permissions and limitations
 24 | under the License.
 25 | 
 26 |  DESCRIPTION:
 27 |  <A description of the software>
 28 | 
 29 |  NOTES:
 30 |  <No Notes>
 31 | 
 32 |  ISSUES:
 33 |  <No Issues>
 34 | 
 35 |  TODO:
 36 |  <No TODO>
 37 | 
 38 | """
 39 | # PRE-USER SETUP
 40 | import logging
 41 | 
 42 | ########### NOT USER EDITABLE ABOVE THIS POINT #################
 43 | 
 44 | 
 45 | # USER VARIABLES
 46 | CONFIG_FILE = ""
 47 | LOGLEVEL = logging.DEBUG
 48 | LOG = None
 49 | 
 50 | ########### NOT USER EDITABLE BELOW THIS POINT #################
 51 | 
 52 | 
 53 | ## IMPORTS
 54 | 
 55 | import argparse
 56 | import ConfigParser
 57 | import networkx as nx
 58 | import urlparse
 59 | import numpy as np
 60 | from scipy import stats  # for percentile
 61 | 
 62 | ## SETUP
 63 | __author__ = "Gabriel Bassett"
 64 | 
 65 | if __name__ == "__main__":
 66 |     # Parse Arguments (should correspond to user variables)
 67 |     parser = argparse.ArgumentParser(description='This script processes a graph.')
 68 |     parser.add_argument('-d', '--debug',
 69 |                         help='Print lots of debugging statements',
 70 |                         action="store_const", dest="loglevel", const=logging.DEBUG,
 71 |                         default=LOGLEVEL
 72 |                        )
 73 |     parser.add_argument('-v', '--verbose',
 74 |                         help='Be verbose',
 75 |                         action="store_const", dest="loglevel", const=logging.INFO
 76 |                        )
 77 |     parser.add_argument('--log', help='Location of log file', default=LOG)
 78 |     args = parser.parse_args()
 79 | 
 80 | # add config arguments
 81 | if __name__ == "__main__":
 82 |     CONFIG_FILE = args.config
 83 | try:
 84 |   config = ConfigParser.SafeConfigParser()
 85 |   config.readfp(open(CONFIG_FILE))
 86 |   config_exists = True
 87 | except:
 88 |   config_exists = False
 89 | if config_exists:
 90 |     if config.has_section('LOGGING'):
 91 |         if 'level' in config.options('LOGGING'):
 92 |             level = config.get('LOGGING', 'level')
 93 |             if level == 'debug':
 94 |                 loglevel = logging.DEBUG
 95 |             elif level == 'verbose':
 96 |                 loglevel = logging.INFO
 97 |             else:
 98 |                 loglevel = logging.WARNING
 99 |         else:
100 |             loglevel = logging.WARNING
101 |         if 'log' in config.options('LOGGING'):
102 |             log = config.get('LOGGING', 'log')
103 |         else:
104 |             log = None
105 | 
106 | 
107 | ## Set up Logging
108 | if __name__ == "__main__":
109 |     if args.log is not None:
110 |         logging.basicConfig(filename=args.log, level=args.loglevel)
111 |     else:
112 |         logging.basicConfig(level=args.loglevel)
113 | # <add other setup here>
114 | 
115 | 
116 | ## GLOBAL EXECUTION
117 | pass
118 | 
119 | 
120 | ## FUNCTION DEFINITION
121 | def create_topic(properties, prefix=""):
122 |     """
123 | 
124 |     :param properties: A dictionary of properties
125 |     :param prefix: If nodes are stored with a pref
126 |     :return: A topic graph in networkx format with one node per property
127 | 
128 |     NOTE: If multiple values of a certain type, (e.g. multiple IPs) make the value of the type
129 |            in the dictionary a list.
130 |     """
131 |     g = nx.DiGraph()
132 | 
133 |     if type(properties) == dict:
134 |         iterator = properties.iteritems()
135 |     else:
136 |         iterator = iter(properties)
137 | 
138 | 
139 |     for key, value in iterator:
140 |         if type(value) in (list, set, np.ndarray):
141 |             for v in value:
142 |                 node_uri = "{2}class=attribute&key={0}&value={1}".format(key, v, prefix)
143 |                 g.add_node(node_uri, {
144 |                     'class': 'attribute',
145 |                     'key': key,
146 |                     'value': v,
147 |                     'uri': node_uri
148 |                 })
149 |         else:
150 |             node_uri = "{2}class=attribute&key={0}&value={1}".format(key, value, prefix)
151 |             g.add_node(node_uri, {
152 |                 'class': 'attribute',
153 |                 'key': key,
154 |                 'value': value,
155 |                 'uri': node_uri
156 |             })
157 | 
158 |     return g
159 | 
160 | 
161 | def validate_uri(uri):
162 |     """
163 | 
164 |     :param uri: a URI string to be validated
165 |     :return: bool true if valid, false if not
166 |     """
167 |     # TODO: Validate the order properties are in (important for uri hash lookup)
168 | 
169 |     try:
170 |         properties = urlparse.parse_qs(urlparse.urlparse(uri).query)
171 |     except:
172 |         return False
173 |     if u'key' not in properties:
174 |         return False
175 |     elif len(properties[u'key']) != 1:
176 |         return False
177 |     if u'value' not in properties:
178 |         return False
179 |     elif len(properties[u'value']) != 1:
180 |         return False
181 |     if u'attribute' not in properties:
182 |         return False
183 |     elif len(properties[u'attribute']) != 1:
184 |         return False
185 |     # Nothing failed, return true
186 |     return True
187 | 
188 | 
189 | def get_topic_distance(sg, topic):
190 |     """
191 | 
192 |     :param sg: an egocentric subgraph in networkx format
193 |     :param topic: a networkx graph of nodes representing the topic
194 |     :return: a dictionary of key node name and value distance as integer
195 |     """
196 |     distances = dict()
197 | 
198 |     # get all the distances
199 |     for tnode in topic.nodes():
200 |         if tnode in sg.nodes():
201 |             distances[tnode] = nx.shortest_path_length(sg, source=tnode)
202 | 
203 |     # get the smallest distance per key
204 |     min_dist = dict()
205 |     for key in distances:
206 |         for node in distances[key]:
207 |             if node not in min_dist:
208 |                 min_dist[node] = distances[key][node]
209 |             elif distances[key][node] < min_dist[node]:
210 |                 min_dist[node] = distances[key][node]
211 | 
212 | 
213 |     # Return the dict
214 |     return min_dist
215 | 
216 | 
217 | def compare_classifications(scores, node1, node2=None, output="print"):
218 |     """
219 | 
220 |     :param scores: dictionary keyed by nodes and values of scores
221 |     :param node1: dictionary of {"class":<class>, "key":<key>, "value":<value>}
222 |     :param node2: dictionary of {"class":<class>, "key":<key>, "value":<value>}.  If empty, score will be compared to the median
223 |     :param output: string representing how to output the data.  "print" to print it, dictionary otherwise
224 |     :return: ratio of node 1 to node 2 scores normalized to the lower score as dictionary
225 |     """
226 |     node1_uri = "class={0}&key={1}&value={2}".format(node1['class'], node1['key'], node1['value'])
227 | 
228 |     node1_score = scores[node1_uri]
229 |     if node2 is None:
230 |         node2_score = np.median(scores.values())
231 |     else:
232 |         node2_uri = "class={0}&key={1}&value={2}".format(node2['class'], node2['key'], node2['value'])
233 |         node2_score = scores[node2_uri]
234 | 
235 |     if node1_score > node2_score:
236 |         larger = "node1"
237 |     else:
238 |         larger = "node2"
239 | 
240 |     if output == "print":
241 |         if node2 is None:
242 |             if larger == "node2":
243 |                 print "The ratio of node 1 ({0}:{1}) to the median ({2}) is {3}:{4}.".format(node1['key'],
244 |                                                                                              node1['value'],
245 |                                                                                              node2_score,
246 |                                                                                              round(node1_score/float(node1_score), 4),
247 |                                                                                              round(node2_score/float(node1_score), 4))
248 |             else:
249 |                 print "The ratio of node 1 ({0}:{1}) to the median ({2}) is {3}:{4}.".format(node1['key'],
250 |                                                                                              node1['value'],
251 |                                                                                              node2_score,
252 |                                                                                              round(node1_score/float(node2_score), 4),
253 |                                                                                              round(node2_score/float(node2_score), 4))   
254 |         else:
255 |             if larger == "node2":
256 |                 print "The ratio of node 1 ({0}:{1}) to node 2 ({2}:{3}) is {4}:{5}.".format(node1['key'],
257 |                                                                                              node1['value'],
258 |                                                                                              node2['key'],
259 |                                                                                              node2['value'],
260 |                                                                                              round(node1_score/float(node1_score), 4),
261 |                                                                                              round(node2_score/float(node1_score), 4))
262 |             else:
263 |                 print "The ratio of node 1 ({0}:{1}) to node 2 ({2}:{3}) is {4}:{5}.".format(node1['key'],
264 |                                                                                              node1['value'],
265 |                                                                                              node2['key'],
266 |                                                                                              node2['value'],
267 |                                                                                              round(node1_score/float(node2_score), 4),
268 |                                                                                              round(node2_score/float(node2_score), 4))        
269 |     else:
270 |         if larger == "node2":
271 |             return {"node1": node1_score/float(node1_score), "node2":node2_score/float(node1_score)}
272 |         else:
273 |             return {"node1": node1_score/float(node2_score), "node2":node2_score/float(node2_score)}
274 | 
275 | 
276 | def score_percentile(scores, node, output="print"):
277 |     """
278 | 
279 |     :param scores: dictionary keyed by nodes and values of scores
280 |     :param node1: dictionary of {"class":<class>, "key":<key>, "value":<value>}
281 |     :param output: string representing how to output the data.  "print" to print it, dictionary otherwise
282 |     :return: the percentile the node is in.  Higher means more likely.ff
283 |     """
284 |     node_uri = "class={0}&key={1}&value={2}".format(node['class'], node['key'], node['value'])
285 | 
286 |     p =stats.percentileofscore(scores.values(), scores[node_uri])
287 | 
288 |     if output == "print":
289 |         print "The percentile of the node is {0}.".format(round(p, 4))
290 |     else:
291 |         return p
292 | 
293 | 
294 | def merge_graphs(g1, g2):
295 |     """
296 | 
297 |     """
298 |     g = g1.copy()
299 |     for node, props in g2.nodes(data=True):
300 |         g.add_node(node, props)
301 |     for edge in g2.edges(data=True):
302 |         g.add_edge(edge[0], edge[1], attr_dict=edge[2])
303 | 
304 |     return g
305 | 
306 | 
307 | def removeNonAscii(s): return u"".join(i for i in s if ord(i)<128).encode('utf8')
308 | 
309 | 
310 | def remove_non_ascii_from_graph(g):
311 |     """ networkx graph -> networkx graph
312 | 
313 |     :param g: A networkx graph
314 |     :return: a networkx graph with nonAscii removed from all node and edge attributes
315 |     """
316 |     # ascii safe node key and value
317 |     for node, data in g.nodes(data=True):
318 |         for attr in data.keys():
319 |             data[attr] = removeNonAscii(data[attr])
320 |         g.node[node] = data
321 | 
322 |     if type(g) in [nx.classes.multidigraph.MultiDiGraph, nx.classes.multigraph.MultiGraph]:
323 |         for edge in g.edges(data=True, keys=True):
324 |             edge_attr = edge[3]
325 |             for attr in edge_attr:
326 |                 if type(edge_attr[attr]) is str:
327 |                     edge_attr[attr] = removeNonAscii(edge_attr[attr])
328 |             g.edge[edge[0]][edge[1]][edge[2]] = edge_attr
329 |     else:
330 |         for edge in g.edges(data=True):
331 |             edge_attr = edge[2]
332 |             for attr in edge_attr:
333 |                 if type(edge_attr[attr]) is str:
334 |                     edge_attr[attr] = removeNonAscii(edge_attr[attr])
335 |             g.edge[edge[0]][edge[1]] = edge_attr
336 | 
337 | 
338 |     # return the safed node
339 |     return g
340 | 
341 | ## MAIN LOOP EXECUTION
342 | def main():
343 |     logging.info('Beginning main loop.')
344 | 
345 |     logging.info('Ending main loop.')
346 | 
347 | if __name__ == "__main__":
348 |     main()


--------------------------------------------------------------------------------
/minions/osint_bambenekconsulting_com.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | __author__ = "Gabriel Bassett"
  4 | """
  5 |  AUTHOR: {0}
  6 |  DATE: <DATE>
  7 |  DEPENDENCIES: <a list of modules requiring installation>
  8 |  Copyright <YEAR> {0}
  9 | 
 10 |  LICENSE:
 11 | Licensed to the Apache Software Foundation (ASF) under one
 12 | or more contributor license agreements.  See the NOTICE file
 13 | distributed with this work for additional information
 14 | regarding copyright ownership.  The ASF licenses this file
 15 | to you under the Apache License, Version 2.0 (the
 16 | "License"); you may not use this file except in compliance
 17 | with the License.  You may obtain a copy of the License at
 18 | 
 19 |   http://www.apache.org/licenses/LICENSE-2.0
 20 | 
 21 | Unless required by applicable law or agreed to in writing,
 22 | software distributed under the License is distributed on an
 23 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 24 | KIND, either express or implied.  See the License for the
 25 | specific language governing permissions and limitations
 26 | under the License.
 27 | 
 28 |  DESCRIPTION:
 29 |  <ENTER DESCRIPTION>
 30 | 
 31 | """.format(__author__)
 32 | # PRE-USER SETUP
 33 | pass
 34 | 
 35 | ########### NOT USER EDITABLE ABOVE THIS POINT #################
 36 | 
 37 | 
 38 | # USER VARIABLES
 39 | PLUGIN_CONFIG_FILE = "osint_bambenekconsulting_com.yapsy-plugin"
 40 | NAME = "OSINT Bambenek Consulting"
 41 | keys = {u'IP': "ip", u'Domain': "domain", u'Nameserver IP': "ip", u'Nameserver': "domain"}
 42 | nameserver = {u'IP': False, u'Domain': False, u'Nameserver IP': True, u'Nameserver': True}
 43 | FEED = "http://osint.bambenekconsulting.com/feeds/c2-masterlist.txt"
 44 | SLEEP_TIME = 14400  # 4 hours in seconds
 45 | 
 46 | ########### NOT USER EDITABLE BELOW THIS POINT #################
 47 | 
 48 | 
 49 | ## IMPORTS
 50 | from yapsy.IPlugin import IPlugin
 51 | import logging
 52 | import networkx as nx
 53 | from datetime import datetime # timedelta imported above
 54 | import dateutil
 55 | import uuid
 56 | import ConfigParser
 57 | import inspect
 58 | import pandas as pd  # for organizing the intel list data
 59 | import requests  # for downloading the intel list
 60 | import ipaddress  # for validating ip addresses
 61 | import time  # for sleep
 62 | import threading  # import threading so minion doesn't block the app
 63 | import imp  # Importing imp to import verum
 64 | 
 65 | ## SETUP
 66 | loc = inspect.getfile(inspect.currentframe())
 67 | ind = loc.rfind("/")
 68 | loc = loc[:ind+1]
 69 | config = ConfigParser.SafeConfigParser()
 70 | config.readfp(open(loc + PLUGIN_CONFIG_FILE))
 71 | 
 72 | if config.has_section('Core'):
 73 |     if 'name' in config.options('Core'):
 74 |         NAME = config.get('Core', 'name')
 75 | if config.has_section('Log'):
 76 |     if 'level' in config.options('Log'):
 77 |         LOGLEVEL = config.get('Log', 'level')
 78 |     if 'file' in config.options('Log'):
 79 |         LOGFILE = config.get('Log', 'file')
 80 | 
 81 | 
 82 | ## EXECUTION
 83 | class PluginOne(IPlugin):
 84 |     thread = None
 85 |     app = None  # The object instance
 86 |     Verum = None  # the module
 87 |     yesterday = pd.DataFrame(columns=("indicator", "context", "date", "source", "key", "threat"))  # Yesterday's data
 88 |     today = None  # Today's date
 89 |     shutdown = False  # Used to trigger shutdown of hte minion
 90 |     parent = None  # The parent instance of the verum app object
 91 | 
 92 |     #  CHANGEME: The init should contain anything to load modules or data files that should be variables of the  plugin object
 93 |     def __init__(self):
 94 |         """
 95 | 
 96 |         """
 97 |         pass
 98 | 
 99 |     #  CHANGEME: Configuration needs to set the values needed to identify the plugin in the plugin database as well as ensure everyhing loaded correctly
100 |     #  CHANGEME: Current  layout is for an enrichment plugin
101 |     #  CHANGEME: enrichment [type, successful_load, name, description, inputs to enrichment such as 'ip', cost, speed]
102 |     #  CHANGEME: interface [type, successful_load, name]
103 |     #  CHANGEME: score [type, successful_load, name, description, cost, speed]
104 |     #  CHANGEME: minion [TBD]
105 |     def configure(self, parent=None):
106 |         """
107 | 
108 |         :param verum: The directory of the verum module 
109 |         :return: return list of [configure success (bool), name, description, list of acceptable inputs, resource cost (1-10, 1=low), speed (1-10, 1=fast)]
110 |         """
111 |         global FEED
112 | 
113 |         config_options = config.options("Configuration")
114 | 
115 |         if 'cost' in config_options:
116 |             cost = config.get('Configuration', 'cost')
117 |         else:
118 |             cost = 9999
119 | 
120 |         if config.has_section('Documentation') and 'description' in config.options('Documentation'):
121 |             description = config.get('Documentation', 'description')
122 |         else:
123 |             logging.error("'Description not in config file.")
124 |             return [None, False, NAME, None, cost]
125 | 
126 |         if 'type' in config_options:
127 |             plugin_type = config.get('Configuration', 'type')
128 |         else:
129 |             logging.error("'Type' not specified in config file.")
130 |             return [None, False, NAME, description, cost]
131 | 
132 |         #  Module import success
133 |         if parent is not None:
134 |             self.parent = parent
135 |         else:
136 |             logging.info("Parent verum app instance not passed to minion.  Please rerun, passing the parent object instance to successfully configure.")
137 |             return [plugin_type, False, NAME, description, cost]
138 | 
139 |         if self.parent.loc is not None:
140 |             # Import the app object so that acces app features (such as the storage backend) can be used.
141 |             fp, pathname, mod_description = imp.find_module("verum", [self.parent.loc])
142 |             self.Verum = imp.load_module("verum", fp, pathname, mod_description)
143 |         else:
144 |             logging.error("'verum' location not supplied to minion configuration function.  Rerun with the location of the verum module specified.")
145 |             return [plugin_type, False, NAME, description, cost]
146 | 
147 |         if 'feed' in config_options:
148 |             FEED = config.get('Configuration', 'feed')
149 |         else:
150 |             logging.error("'Feed' not specified in config file.")
151 |             return [plugin_type, False, NAME, description, cost]
152 | 
153 |         # Return success
154 |         return [plugin_type, True, NAME, description, cost]
155 | 
156 | 
157 |     def minion(self,  storage=None, *args, **xargs):
158 |         self.app = self.Verum.app(self.parent.PluginFolder, None)
159 |         # set storage
160 |         if storage is None:
161 |             storage = self.parent.storage
162 |         self.app.set_interface(storage)
163 | 
164 |         # Check until stopped
165 |         while not self.shutdown:
166 |             # Check to see if it's the same day, if it is, sleep for a while, otherwise run the import
167 |             if datetime.utcnow() == self.today:
168 |                 time.sleep(SLEEP_TIME)
169 |             else:
170 |                 # Get the file
171 |                 r = requests.get(FEED)
172 | 
173 |                 # split it out
174 |                 feed = r.text.split("\n")
175 | 
176 |                 df = pd.DataFrame(columns=("indicator", "context", "date", "source"))
177 |                 # load the feed into a dataframe line, by, line.  I know it's slow.  
178 |                 for line in feed:
179 |                     if line and line[0] != "#":
180 |                         l = line.split(",")
181 |                         if len(l) == 4:
182 |                             df.loc[df.shape[0]] = l
183 | 
184 |                 # Index([u'indicator', u'context', u'date', u'source', u'key', u'threat'], dtype='object')
185 |                 df = pd.concat([df, pd.DataFrame(df.context.str.split(' used by ',1).tolist(), columns = ['key','threat'])], axis=1)
186 | 
187 |                 # Create list of IPs for cymru enrichment
188 |                 ips = set()
189 | 
190 |                 for row in df.iterrows():
191 |                     # Don't add it if it was added yesterday w/ same origination date
192 |                     if not ((self.yesterday['indicator'] == row[1]['indicator']) & 
193 |                             (self.yesterday['date'] == row[1]['date']) & 
194 |                             (self.yesterday['key'] == row[1]['key']) & 
195 |                             (self.yesterday['threat'] == row[1]['threat'])).any():
196 | 
197 |                         g = nx.MultiDiGraph()
198 |                         
199 |                         # convert date to correct format
200 |                         dt = dateutil.parser.parse(row[1]['date']).strftime("%Y-%m-%dT%H:%M:%SZ")
201 | 
202 |                         # Add indicator to graph
203 |                         ## (Must account for the different types of indicators)
204 |                         key = keys[row[1]['key']]
205 |                         target_uri = "class=attribute&key={0}&value={1}".format(key, row[1]['indicator']) 
206 |                         g.add_node(target_uri, {
207 |                             'class': 'attribute',
208 |                             'key': key,
209 |                             "value": row[1]['indicator'],
210 |                             "start_time": dt,
211 |                             "uri": target_uri
212 |                         })
213 | 
214 |                         # Add threat to list
215 |                         if row[1]['threat'][-4:] == u' C&C':
216 |                             CandC = True
217 |                             threat = row[1]['threat'][:-4]
218 |                         else:
219 |                             CandC = False
220 |                             threat = row[1]['threat']
221 | 
222 |                         # Threat node
223 |                         threat_uri = "class=attribute&key={0}&value={1}".format("malware", threat) 
224 |                         g.add_node(threat_uri, {
225 |                             'class': 'attribute',
226 |                             'key': "malware",
227 |                             "value": threat,
228 |                             "start_time": dt,
229 |                             "uri": threat_uri
230 |                         })
231 | 
232 |                         # Threat Edge
233 |                         edge_attr = {
234 |                             "relationship": "describedBy",
235 |                             "origin": row[1]['source'],
236 |                             "start_time": dt,
237 |                         }
238 |                         # test for nameserver and update edge_attr
239 |                         if nameserver[row[1]['key']] == True:
240 |                             edge_attr['describedBy'] = 'nameserver'
241 |                         source_hash = uuid.uuid3(uuid.NAMESPACE_URL, target_uri)
242 |                         dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, threat_uri)
243 |                         edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash))
244 |                         rel_chain = "relationship"
245 |                         while rel_chain in edge_attr:
246 |                             edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain])
247 |                             rel_chain = edge_attr[rel_chain]
248 |                         if "origin" in edge_attr:
249 |                             edge_uri += "&{0}={1}".format("origin", edge_attr["origin"])
250 |                         edge_attr["uri"] = edge_uri
251 |                         g.add_edge(target_uri, threat_uri, edge_uri, edge_attr)
252 | 
253 |                         # Add C&C to list if applicable
254 |                         if CandC:
255 |                             # C2 node
256 |                             c2_uri = "class=attribute&key={0}&value={1}".format("classification", "c2") 
257 |                             g.add_node(c2_uri, {
258 |                                 'class': 'attribute',
259 |                                 'key': "classification",
260 |                                 "value": "c2",
261 |                                 "start_time": dt,
262 |                                 "uri": c2_uri
263 |                             })
264 | 
265 |                             # C2 Edge
266 |                             edge_attr = {
267 |                                 "relationship": "describedBy",
268 |                                 "origin": row[1]['source'],
269 |                                 "start_time": dt,
270 |                             }
271 |                             # test for nameserver and update edge_attr
272 |                             if nameserver[row[1]['key']] == True:
273 |                                 edge_attr['describedBy'] = 'nameserver'
274 |                             source_hash = uuid.uuid3(uuid.NAMESPACE_URL, target_uri)
275 |                             dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, c2_uri)
276 |                             edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash))
277 |                             rel_chain = "relationship"
278 |                             while rel_chain in edge_attr:
279 |                                 edge_uri = edge_uri + "&{0}={1}".format(rel_chain,edge_attr[rel_chain])
280 |                                 rel_chain = edge_attr[rel_chain]
281 |                             if "origin" in edge_attr:
282 |                                 edge_uri += "&{0}={1}".format("origin", edge_attr["origin"])
283 |                             edge_attr["uri"] = edge_uri
284 |                             g.add_edge(target_uri, c2_uri, edge_uri, edge_attr)
285 | 
286 | 
287 |                         # classify malicious and merge with current graph
288 |                         g = self.Verum.merge_graphs(g, self.app.classify.run({'key': key, 'value': row[1]['indicator'], 'classification': 'malice'}))
289 | 
290 |                         # enrich depending on type
291 |                         try:
292 |                             g = self.Verum.merge_graphs(g, self.app.run_enrichments(row[1]['indicator'], key, names=[u'DNS Enrichment', u'TLD Enrichment', u'Maxmind ASN Enrichment', 'IP Whois Enrichment']))
293 |                         except Exception as e:
294 |                             #print "Enrichment of {0} failed due to {1}.".format(row[1]['indicator'], e)  # DEBUG
295 |                             logging.info("Enrichment of {0} failed due to {1}.".format(row[1]['indicator'], e))
296 |                             pass
297 |                             
298 |                         # add to ip list if appropriate
299 |                         if key == "ip":
300 |                             try:
301 |                                 _ = ipaddress.ip_address(unicode(row[1]['indicator']))
302 |                                 ips.add(self.app.run_enrichments(row[1]['indicator']))
303 |                             except:
304 |                                 pass
305 | 
306 |                         try:
307 |                             self.app.store_graph(self.Verum.remove_non_ascii_from_graph(g))
308 |                         except:
309 |                             print g.nodes(data=True)  # DEBUG
310 |                             print g.edges(data=True)  # DEBUG
311 |                             raise
312 | 
313 |                     if len(ips) >= 50:
314 |                         # Do cymru enrichment
315 |                         try:
316 |                             self.app.store_graph(self.app.run_enrichments(ips, 'ip', names=[u'Cymru Enrichment']))
317 |                         except:
318 |                             logging.info("Cymru enrichment of {0} IPs failed.".format(len(ips)))
319 |                             pass
320 |                     ips = set()
321 |                 
322 |                 # Copy today's data to yesterday
323 |                 self.yesterday = df
324 | 
325 |                 # Copy today's date to today
326 |                 self.today = datetime.utcnow()
327 | 
328 | 
329 |     def start(self, *args, **xargs):
330 |         self.thread = threading.Thread(target=self.minion, *args, **xargs)
331 |         self.thread.start()
332 | 
333 |     def isAlive(self):
334 |         if self.thread is None:
335 |             return False
336 |         else:
337 |             return self.thread.isAlive()
338 | 
339 | 
340 |     def stop(self, force=True):
341 |         if force:
342 |             self.thread = None  # zero out thread
343 |         else:
344 |             self.shutdown = False  # just dont' iterate.  May take up to (SLEEP_TIME) hours
345 | 


--------------------------------------------------------------------------------
/minions/edge_consolidator.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | __author__ = "Gabriel Bassett"
  4 | """
  5 |  AUTHOR: {0}
  6 |  DATE: <DATE>
  7 |  DEPENDENCIES: <a list of modules requiring installation>
  8 |  Copyright <YEAR> {0}
  9 | 
 10 |  LICENSE:
 11 | Licensed to the Apache Software Foundation (ASF) under one
 12 | or more contributor license agreements.  See the NOTICE file
 13 | distributed with this work for additional information
 14 | regarding copyright ownership.  The ASF licenses this file
 15 | to you under the Apache License, Version 2.0 (the
 16 | "License"); you may not use this file except in compliance
 17 | with the License.  You may obtain a copy of the License at
 18 | 
 19 |   http://www.apache.org/licenses/LICENSE-2.0
 20 | 
 21 | Unless required by applicable law or agreed to in writing,
 22 | software distributed under the License is distributed on an
 23 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 24 | KIND, either express or implied.  See the License for the
 25 | specific language governing permissions and limitations
 26 | under the License.
 27 | 
 28 |  DESCRIPTION:
 29 |  <ENTER DESCRIPTION>
 30 | 
 31 | """.format(__author__)
 32 | # PRE-USER SETUP
 33 | pass
 34 | 
 35 | ########### NOT USER EDITABLE ABOVE THIS POINT #################
 36 | 
 37 | 
 38 | # USER VARIABLES
 39 | PLUGIN_CONFIG_FILE = "edge_consolidator.yapsy-plugin"  # CHANGEME
 40 | NAME = "Neo4j Edge Consolidator"  # CHANGEME
 41 | JUMP = 0.9
 42 | NEO4J_HOST = 'localhost'
 43 | NEO4J_PORT = '7474'
 44 | LOGFILE = None
 45 | USERNAME = None
 46 | PASSWORD = None
 47 | SLEEP_TIME = 5
 48 | 
 49 | ########### NOT USER EDITABLE BELOW THIS POINT #################
 50 | 
 51 | 
 52 | ## IMPORTS
 53 | from yapsy.IPlugin import IPlugin
 54 | import logging
 55 | from collections import defaultdict  # used for storing duplicate edges
 56 | import networkx as nx
 57 | from datetime import datetime # timedelta imported above
 58 | import uuid
 59 | import ConfigParser
 60 | import inspect
 61 | import threading
 62 | try:
 63 |     from py2neo import Graph as py2neoGraph
 64 |     from py2neo import Node as py2neoNode
 65 |     from py2neo import Relationship as py2neoRelationship
 66 |     from py2neo import authenticate as py2neoAuthenticate
 67 |     neo_import = True
 68 | except:
 69 |     logging.error("Neo4j plugin did not load.")
 70 |     neo_import = False
 71 | import imp  # for verum import
 72 | import random  # for jumps
 73 | from time import sleep # for sleeping between iterations
 74 | 
 75 | ## SETUP
 76 | random.seed()
 77 | 
 78 | loc = inspect.getfile(inspect.currentframe())
 79 | ind = loc.rfind("/")
 80 | loc = loc[:ind+1]
 81 | config = ConfigParser.SafeConfigParser()
 82 | config.readfp(open(loc + PLUGIN_CONFIG_FILE))
 83 | 
 84 | if config.has_section('Core'):
 85 |     if 'name' in config.options('Core'):
 86 |         NAME = config.get('Core', 'name')
 87 | if config.has_section('Log'):
 88 |     if 'level' in config.options('Log'):
 89 |         LOGLEVEL = config.get('Log', 'level')
 90 |     if 'file' in config.options('Log'):
 91 |         LOGFILE = config.get('Log', 'file')
 92 | if config.has_section('neo4j'):
 93 |     if 'host' in config.options('neo4j'):
 94 |         NEO4J_HOST = config.get('neo4j', 'host')
 95 |     if 'port' in config.options('neo4j'):
 96 |         NEO4J_PORT = config.get('neo4j', 'port')
 97 |     if 'username' in config.options('neo4j'):
 98 |         USERNAME = config.get('neo4j', 'username')
 99 |     if 'password' in config.options('neo4j'):
100 |         PASSWORD = config.get('neo4j', 'password')
101 | 
102 | ## EXECUTION
103 | class PluginOne(IPlugin):
104 |     storage = None
105 |     thread = None
106 |     app = None  # The object instance
107 |     Verum = None  # the module
108 |     shutdown = False  # Used to trigger shutdown of the minion
109 |     parent = None  # The parent instance of the verum app object
110 |     neo4j_config = None
111 |     sleep_time = SLEEP_TIME
112 |     jump = JUMP
113 | 
114 |     #  CHANGEME: The init should contain anything to load modules or data files that should be variables of the  plugin object
115 |     def __init__(self):
116 |         pass
117 | 
118 |     #  CHANGEME: Configuration needs to set the values needed to identify the plugin in the plugin database as well as ensure everyhing loaded correctly
119 |     #  CHANGEME: Current  layout is for an enrichment plugin
120 |     #  CHANGEME: enrichment [type, successful_load, name, description, inputs to enrichment such as 'ip', cost, speed]
121 |     #  CHANGEME: interface [type, successful_load, name]
122 |     #  CHANGEME: score [type, successful_load, name, description, cost, speed]
123 |     #  CHANGEME: minion [type, successful_load, name, description, cost]
124 |     def configure(self, parent=None):
125 |         """
126 | 
127 |         :return: return list of configuration variables starting with [plugin_type, successful_load, name, description, <PLUGIN TYPE SPECIFIC VALUES>]
128 |         """
129 |         config_options = config.options("Configuration")
130 | 
131 |         # Cost and speed are not applicable to all plugin types
132 |         if 'cost' in config_options:
133 |             cost = config.get('Configuration', 'cost')
134 |         else:
135 |             cost = 9999
136 |         if 'jump' in config_options:
137 |             self.jump = config.get('Configuration', 'jump')
138 |         if 'sleep_time' in config_options:
139 |             self.sleep_time = float(config.get('Configuration', 'sleep_time'))
140 | 
141 | 
142 |         if config.has_section('Documentation') and 'description' in config.options('Documentation'):
143 |             description = config.get('Configuration', 'type')
144 |         else:
145 |             logging.error("'Description not in config file.")
146 |             return [None, False, NAME, cost]
147 | 
148 |         if 'type' in config_options:
149 |             plugin_type = config.get('Configuration', 'type')
150 |         else:
151 |             logging.error("'Type' not specified in config file.")
152 |             return [None, False, NAME, description, cost]
153 | 
154 |         # Module success is only applicable to plugins which import unique code
155 |         if parent is not None:
156 |             self.parent = parent
157 |         else:
158 |             logging.info("Parent verum app instance not passed to minion.  Please rerun, passing the parent object instance to successfully configure.")
159 |             return [plugin_type, False, NAME, description, cost]
160 | 
161 |         if self.parent.loc is not None:
162 |             # Import the app object so that acces app features (such as the storage backend) can be used.
163 |             fp, pathname, mod_description = imp.find_module("verum", [self.parent.loc])
164 |             self.Verum = imp.load_module("verum", fp, pathname, mod_description)
165 |         else:
166 |             logging.error("'verum' location not supplied to minion configuration function.  Rerun with the location of the verum module specified.")
167 |             return [plugin_type, False, NAME, description, cost]
168 | 
169 |         # Ensure a neo4j storage plugin
170 |         if not neo_import:
171 |             logging.error("Py2neo import failed.  Ensure py2neo v2.* is installed.")
172 |             return [plugin_type, False, NAME, description, cost]
173 | 
174 |         try:
175 |             self.set_neo4j_config(NEO4J_HOST, NEO4J_PORT, USERNAME, PASSWORD)
176 |         except Exception as e:
177 |             logging.error("Neo4j configuration failed with error {0}.  Check host, port, username, and password.".format(e))
178 |             return [plugin_type, False, NAME, description, cost]
179 | 
180 | 
181 |         return [plugin_type, True, NAME, description, cost]
182 | 
183 | 
184 |     ############  GENERAL NOTES ############
185 |     #  CHANGEME: All functions must implement a "configuration()" function
186 |     #  CHANGEME: The correct type of execution function must be defined for the type of plugin
187 |     ############  GENERAL NOTES ############
188 | 
189 | 
190 |     #  CHANGEME: minion: minion() 
191 |     #  CHANGEME:        start() 
192 |     #  CHANGEME:        stop()
193 |     #  CHANGEME:        isAlive()
194 |     #  CHANGEME: Minion plugin specifics:
195 |     #  -     Minions fit exist in a separate directory to prevent them importing themselves when they import their own VERUM instance
196 |     #  -     The minion configuration function must take an argument of the parent verum object.  When not present, it shouldn't error but
197 |     #  -      instead return with successful_load set to false and a logging.info message that the parent was not passed in.
198 |     #  -     Must have 4 functions: minion(), start(), and stop() and isAlive()
199 |     #  -     minion() is the function which will be threaded.  **Make sure to call create the new verum instance WITHIN this function
200 |     #  -      to avoid SQLite errors!**
201 |     #  -     start() creates the thread object as an attribute of the plugin class and starts it
202 |     #  -     stop() stops the thread.  Preferably with both a normal exit by setting a shutdown variable of the plugin class as well as a 
203 |     #  -      force stop option which removes the thread object
204 |     #  -     isAlive() calls the thread isAlive() function and returns the status
205 |     def minion(self,  *args, **xargs):
206 |         self.shutdown = False
207 | 
208 |         # Get graph
209 |         neo_graph = py2neoGraph(self.neo4j_config)
210 | 
211 |         random_cypher = ''' MATCH (a)-[:describedBy]->() 
212 |                             RETURN a, rand() as r
213 |                             ORDER BY r
214 |                             LIMIT 1
215 |                         '''
216 | 
217 |         # pick a random node
218 |         records = neo_graph.cypher.execute(random_cypher)
219 |         node = records[0][0]
220 | 
221 |         logging.info("first node to consolidate edges for is class: {0}, key: {1}, value: {2}".format(node.properties['class'], node.properties['key'], node.properties['value']))
222 |         print "first node to consolidate edges for is class: {0}, key: {1}, value: {2}".format(node.properties['class'], node.properties['key'], node.properties['value'])  # DEBUG
223 | 
224 |         while not self.shutdown:
225 |             edges = defaultdict(set)
226 |             destinations = set()
227 | 
228 |             # get edges starting with the node
229 |             for rel in node.match_outgoing():
230 |                 if 'uri' in rel.properties:
231 |                     edge_uri = rel.properties['uri']
232 |                 else:
233 |                     # SRC URI
234 |                     if 'uri' in rel.start_node.properties:
235 |                         source_uri = rel.start_node.properties['uri']
236 |                     else:
237 |                         source_uri = "class={0}&key={1}&value={2}".format(rel.start_node.properties['attribute'], rel.start_node.properties['key'], rel.start_node.properties['value'])
238 | 
239 |                     # DST URI
240 |                     if 'uri' in rel.end_node.properties:
241 |                         dest_uri = rel.end_node.properties['uri']
242 |                     else:
243 |                         dest_uri = "class={0}&key={1}&value={2}".format(rel.end_node.properties['attribute'], rel.end_node.properties['key'], rel.end_node.properties['value'])
244 | 
245 |                     # Remove non-ascii as it gumms up uuid.
246 |                     # NOTE: This shouldn't effect anything as it's just for the key in the edges dictionary
247 |                     source_uri = self.Verum.removeNonAscii(source_uri)
248 |                     dest_uri = self.Verum.removeNonAscii(dest_uri)
249 | 
250 |                     # Edge URI
251 |                     source_hash = uuid.uuid3(uuid.NAMESPACE_URL, source_uri)
252 |                     dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, dest_uri)
253 | 
254 |                     edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash))
255 |                     rel_chain = "relationship"
256 |                     while rel_chain in rel.properties:
257 |                         edge_uri = edge_uri + "&{0}={1}".format(rel_chain,rel.properties[rel_chain])
258 |                         rel_chain = rel.properties[rel_chain]
259 |                     if "origin" in rel.properties:
260 |                         edge_uri += "&{0}={1}".format("origin", rel.properties["origin"])
261 | 
262 |                 # aggregate edges by dst, and uri
263 |                 edges[edge_uri].add(rel)  # WARNING: The use of URI here is vulnerable to values being out of order in the URI and edges not being removed.
264 | 
265 |                 # collect destinations to pick next node
266 |                 destinations.add(rel.end_node)
267 | 
268 |             time = datetime.utcnow()
269 | 
270 |             # SRC URI
271 |             if 'uri' in node.properties:
272 |                 source_uri = node.properties['uri']
273 |             else:
274 |                 source_uri = "class={0}&key={1}&value={2}".format(node.properties['attribute'], node.properties['key'], node.properties['value'])
275 | 
276 |             for edge_uri in edges:
277 |                 edge_list = list(edges[edge_uri])
278 | 
279 |                 # DST URI
280 |                 if 'uri' in edge_list[0].end_node.properties:
281 |                     dest_uri = edge_list[0].end_node.properties['uri']
282 |                 else:
283 |                     dest_uri = "class={0}&key={1}&value={2}".format(edge_list[0].end_node.properties['attribute'], edge_list[0].end_node.properties['key'], edge_list[0].end_node.properties['value'])
284 | 
285 |                 logging.debug("Removing {0} edges from node {1} to {2}.".format(len(edge_list[1:]), source_uri, dest_uri))
286 |                 #print "Removing {0} edges from node {1} to {2}.".format(len(edge_list[1:]), source_uri, dest_uri)  # DEBUG
287 | 
288 |                 for edge in edge_list[1:]:
289 |                     # keep earliest time as start
290 |                     try:
291 |                         edge_time = datetime.strptime(edge.properties['start_time'], "%Y-%m-%dT%H:%M:%SZ")
292 |                         if 'start_time' in edge.properties and time > edge_time:
293 |                             time = edge_time
294 |                     except ValueError:  # The time on the node wasn't legit
295 |                         pass
296 |                     try:  # sometimes the edge is no longer there.  Better to pass than fail.
297 |                         #  remove all but one node of each group
298 |                         edge.delete()
299 |                     except:
300 |                         pass
301 |                 # Update time on remaining node
302 |                 try:
303 |                     edge_time = datetime.strptime(edge_list[0].properties['start_time'], "%Y-%m-%dT%H:%M:%SZ")
304 |                 except:
305 |                     edge_time = datetime.utcnow()
306 |                 if 'start_time' not in edge_list[0].properties or time < edge_time:
307 |                     edge_list[0].properties['start_time'] = time.strftime("%Y-%m-%dT%H:%M:%SZ")
308 |                     edge_list[0].push()
309 | 
310 |                 logging.debug("Keeping edge {0} from node {1} to node {2}.".format(edge_list[0].uri, source_uri, dest_uri))
311 |                 #print "Keeping edge {0} from node {1} to node {2}.".format(edge_list[0].uri, source_uri, dest_uri)  # DEBUG
312 | 
313 |             #  Sleep to slow it down
314 |             sleep(self.sleep_time)
315 | 
316 |             jump = random.random()
317 | 
318 |             # do the random walk
319 |             if len(destinations) == 0 or jump <= self.jump:
320 |                 # pick a random node
321 |                 records = neo_graph.cypher.execute(random_cypher)
322 |                 node = records[0][0]
323 |                 logging.debug("Edge consolidation random walk jumped.")
324 |             else:
325 |                 node = random.choice(destinations)
326 |                 logging.debug("Edge consolidation random walk didn't jumped.")
327 | 
328 |             logging.info("Next node to consolidate edges for is class: {0}, key: {1}, value: {2}".format(node.properties['class'], node.properties['key'], node.properties['value']))
329 |             #print "Next to consolidate edges for node is class: {0}, key: {1}, value: {2}".format(node.properties['class'], node.properties['key'], node.properties['value'])  # DEBUG
330 | 
331 |     def start(self, *args, **xargs):
332 |         self.thread = threading.Thread(target=self.minion, *args, **xargs)
333 |         self.thread.start()
334 | 
335 |     def isAlive(self):
336 |         if self.thread is None:
337 |             return False
338 |         else:
339 |             return self.thread.isAlive()
340 | 
341 |     def stop(self, force=True):
342 |         if force:
343 |             self.thread = None  # zero out thread
344 |         else:
345 |             self.shutdown = False  # just dont' iterate.  May take up to (SLEEP_TIME) hours
346 | 
347 |     def set_neo4j_config(self, host, port, username=None, password=None):
348 |         if username and password:
349 |             py2neoAuthenticate("{0}:{1}".format(host, port), username, password)
350 |             self.neo4j_config = "http://{2}:{3}@{0}:{1}/db/data/".format(host, port, username, password)
351 |         else:
352 |             self.neo4j_config = "http://{0}:{1}/db/data/".format(host, port)
353 | 


--------------------------------------------------------------------------------