├── resources
    ├── opensoc
    │   ├── config
    │   │   ├── etc
    │   │   │   ├── env
    │   │   │   │   ├── hdfs_connection.conf
    │   │   │   │   ├── es_connection.conf
    │   │   │   │   ├── mysql_connection.conf
    │   │   │   │   └── environment_common.conf
    │   │   │   └── whitelists
    │   │   │   │   └── known_hosts.conf
    │   │   └── topologies
    │   │   │   ├── bro
    │   │   │       ├── topology_identifier.conf
    │   │   │       ├── metrics.conf
    │   │   │       ├── alerts.xml
    │   │   │       ├── topology.conf
    │   │   │       └── features_enabled.conf
    │   │   │   ├── pcap
    │   │   │       ├── topology_identifier.conf
    │   │   │       ├── metrics.conf
    │   │   │       ├── features_enabled.conf
    │   │   │       └── topology.conf
    │   │   │   ├── environment_identifier.conf
    │   │   │   └── sourcefire
    │   │   │       ├── topology_identifier.conf
    │   │   │       ├── alerts.xml
    │   │   │       ├── metrics.conf
    │   │   │       ├── topology.conf
    │   │   │       └── features_enabled.conf
    │   ├── hbase_ip_whitelist.rb
    │   ├── hbase-site.xml
    │   └── geo.sql
    ├── upstart-supervisor.conf
    ├── hbase
    │   ├── supervisor-master.conf
    │   ├── supervisor-regionserver.conf
    │   └── hbase-site.xml
    ├── hadoop
    │   ├── supervisor-namenode.conf
    │   ├── supervisor-resourcemanager.conf
    │   ├── supervisor-datanode.conf
    │   ├── core-site.xml
    │   ├── mapred-site.xml
    │   ├── yarn-site.xml
    │   └── hdfs-site.xml
    ├── elasticsearch
    │   ├── supervisor-elasticsearch.conf
    │   ├── elasticsearch.yml
    │   └── elasticsearch-client.yml
    ├── zookeeper
    │   ├── supervisor-zookeeper.conf
    │   └── log4j.properties
    ├── hive
    │   ├── supervisor-hive-metastore.conf
    │   ├── hive-user.sql
    │   └── hive-site.xml
    ├── storm
    │   ├── supervisor-worker.conf
    │   └── supervisor-nimbus-ui.conf
    ├── kafka
    │   ├── supervisor-kafka.conf
    │   └── server.properties
    └── supervisord.conf
├── .gitignore
├── scripts
    ├── setup-java.sh
    ├── setup-geo-enrichment.sh
    ├── setup-hbase.sh
    ├── closest-mirror.py
    ├── init-hadoop.sh
    ├── setup-kafka.sh
    ├── setup-os.sh
    ├── setup-hive.sh
    ├── setup-storm.sh
    ├── setup-elasticsearch.sh
    ├── setup-zookeeper.sh
    ├── common.sh
    └── setup-hadoop.sh
├── Vagrantfile
├── README.md
└── fabfile.py


/resources/opensoc/config/etc/env/hdfs_connection.conf:
--------------------------------------------------------------------------------
1 | bolt.hdfs.IP=node1
2 | bolt.hdfs.port=9000


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .vagrant/*
2 | resources/jre*
3 | resources/tmp/*
4 | resources/opensoc/*.jar
5 | .ssh_config
6 | 


--------------------------------------------------------------------------------
/resources/opensoc/config/etc/env/es_connection.conf:
--------------------------------------------------------------------------------
1 | es.ip=node1
2 | es.port=9300
3 | es.clustername=opensoc-vagrant


--------------------------------------------------------------------------------
/resources/opensoc/config/etc/env/mysql_connection.conf:
--------------------------------------------------------------------------------
1 | mysql.ip=node1
2 | mysql.port=0
3 | mysql.username=hive
4 | mysql.password=hive123


--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/bro/topology_identifier.conf:
--------------------------------------------------------------------------------
1 | #Each topology must have a unique identifier.  This setting is required
2 | 
3 | topology.id=bro
4 | instance.id=B001


--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/pcap/topology_identifier.conf:
--------------------------------------------------------------------------------
1 | #Each topology must have a unique identifier.  This setting is required
2 | 
3 | topology.id=pcap
4 | instance.id=P001


--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/environment_identifier.conf:
--------------------------------------------------------------------------------
1 | #This file identifies the cluster instance
2 | 
3 | customer.id=vagrant
4 | datacenter.id=quick
5 | instance.id=start
6 | 


--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/sourcefire/topology_identifier.conf:
--------------------------------------------------------------------------------
1 | #Each topology must have a unique identifier.  This setting is required
2 | 
3 | topology.id=sourcefire
4 | instance.id=S001


--------------------------------------------------------------------------------
/resources/opensoc/config/etc/env/environment_common.conf:
--------------------------------------------------------------------------------
1 | kafka.zk.port=2181
2 | kafka.zk.list=node2,node3,node4
3 | kafka.zk=node2:2181,node3:2181,node4:2181
4 | kafka.br=node2:9092,node3:9092,node4:9092


--------------------------------------------------------------------------------
/resources/upstart-supervisor.conf:
--------------------------------------------------------------------------------
1 | description "supervisor"
2 | 
3 | start on runlevel [2345]
4 | stop on runlevel [!2345]
5 | 
6 | exec /usr/bin/supervisord --configuration /etc/supervisord.conf --nodaemon


--------------------------------------------------------------------------------
/resources/opensoc/hbase_ip_whitelist.rb:
--------------------------------------------------------------------------------
1 | create "ip_whitelist", "ip"
2 | put "ip_whitelist", "10.0.0.0/8", "ip", "y"
3 | put "ip_whitelist", "192.168.0.0/16", "ip", "y"
4 | put "ip_whitelist", "172.16.0.0/12", "ip", "y"
5 | create "pcap", "t"
6 | exit


--------------------------------------------------------------------------------
/resources/hbase/supervisor-master.conf:
--------------------------------------------------------------------------------
1 | [program:master]
2 | command=/opt/hbase/bin/hbase master start
3 | directory=/opt/hbase
4 | stdout_logfile=/var/log/hbase/master-stdout.log
5 | stderr_logfile=/var/log/hbase/master-stderr.log
6 | environment = JAVA_HOME=/usr/java/default
7 | 


--------------------------------------------------------------------------------
/resources/opensoc/config/etc/whitelists/known_hosts.conf:
--------------------------------------------------------------------------------
1 | 10.1.128.236={"local":"YES", "type":"webserver", "asset_value" : "important"}
2 | 10.1.128.237={"local":"UNKNOWN", "type":"unknown", "asset_value" : "important"}
3 | 10.60.10.254={"local":"YES", "type":"printer", "asset_value" : "important"}


--------------------------------------------------------------------------------
/resources/hbase/supervisor-regionserver.conf:
--------------------------------------------------------------------------------
1 | [program:regionserver]
2 | command=/opt/hbase/bin/hbase regionserver start
3 | directory=/opt/hbase
4 | stdout_logfile=/var/log/hbase/regionserver-stdout.log
5 | stderr_logfile=/var/log/hbase/regionserver-stderr.log
6 | environment = JAVA_HOME=/usr/java/default
7 | 


--------------------------------------------------------------------------------
/resources/hadoop/supervisor-namenode.conf:
--------------------------------------------------------------------------------
1 | [program:namenode]
2 | command = /opt/hadoop/bin/hdfs --config /opt/hadoop/etc/hadoop namenode
3 | stdout_logfile = /var/log/hadoop/namenode.stdout
4 | stderr_logfile = /var/log/hadoop/namenode.stderr
5 | autostart = false
6 | environment = JAVA_HOME=/usr/java/default
7 | 


--------------------------------------------------------------------------------
/resources/elasticsearch/supervisor-elasticsearch.conf:
--------------------------------------------------------------------------------
1 | [program:elasticsearch]
2 | command=/opt/elasticsearch/bin/elasticsearch
3 | directory=/opt/elasticsearch
4 | stdout_logfile=/var/log/elasticsearch/stdout.log
5 | stderr_logfile=/var/log/elasticsearch/stderr.log
6 | environment=JAVA_HOME=/usr/java/default,ES_HEAP=256mb
7 | 


--------------------------------------------------------------------------------
/resources/zookeeper/supervisor-zookeeper.conf:
--------------------------------------------------------------------------------
1 | [program:zookeeper]
2 | command=/opt/zookeeper/bin/zkServer.sh start-foreground
3 | directory=/opt/zookeeper
4 | stdout_logfile=/var/log/zookeeper/stdout.log
5 | stderr_logfile=/var/log/zookeeper/stderr.log
6 | redirect_stderr=true
7 | environment = JAVA_HOME=/usr/java/default
8 | 


--------------------------------------------------------------------------------
/resources/hadoop/supervisor-resourcemanager.conf:
--------------------------------------------------------------------------------
1 | [program:resourcemanager]
2 | command = /opt/hadoop/bin/yarn --config /opt/hadoop/etc/hadoop resourcemanager
3 | stdout_logfile = /var/log/hadoop/resourcemanager.stdout
4 | stderr_logfile = /var/log/hadoop/resourcemanager.stderr
5 | autostart = false
6 | environment = JAVA_HOME=/usr/java/default
7 | 


--------------------------------------------------------------------------------
/resources/hive/supervisor-hive-metastore.conf:
--------------------------------------------------------------------------------
1 | [program:hive-metastore]
2 | command=/opt/hive/bin/hive --service metastore
3 | directory=/opt/hive
4 | stdout_logfile=/var/log/hive/metastore-stdout.log
5 | stderr_logfile=/var/log/hive/metastore-stderr.log
6 | redirect_stderr=true
7 | environment = JAVA_HOME=/usr/java/default,HADOOP_HOME=/opt/hadoop
8 | 


--------------------------------------------------------------------------------
/resources/storm/supervisor-worker.conf:
--------------------------------------------------------------------------------
 1 | [program:storm-supervisor]
 2 | command=/opt/storm/bin/storm supervisor
 3 | directory=/opt/storm
 4 | autostart=true
 5 | autorestart=true
 6 | stdout_logfile=/var/log/storm/supervisor-stdout.log
 7 | stderr_logfile=/var/log/storm/supervisor-stderr.log
 8 | environment = JAVA_HOME=/usr/java/default
 9 | 
10 | 


--------------------------------------------------------------------------------
/resources/hive/hive-user.sql:
--------------------------------------------------------------------------------
1 | CREATE USER 'hive'@'localhost' IDENTIFIED BY 'hive123';
2 | GRANT ALL PRIVILEGES ON *.* TO 'hive'@'localhost'; 
3 | CREATE USER 'hive'@'%' IDENTIFIED BY 'hive123';
4 | GRANT ALL PRIVILEGES ON *.* TO 'hive'@'%';
5 | CREATE USER 'hive'@'node1' IDENTIFIED BY 'hive123';
6 | GRANT ALL PRIVILEGES ON *.* TO 'hive'@'node1';
7 | FLUSH PRIVILEGES;


--------------------------------------------------------------------------------
/resources/kafka/supervisor-kafka.conf:
--------------------------------------------------------------------------------
 1 | [program:kafka]
 2 | command=/opt/kafka/bin/kafka-server-start.sh /opt/kafka/config/server.properties
 3 | directory=/opt/kafka
 4 | user=root
 5 | autostart=true
 6 | autorestart=true
 7 | stdout_logfile=/var/log/kafka/stdout.log
 8 | stderr_logfile=/var/log/kafka/stderr.log
 9 | environment = JAVA_HOME=/usr/java/default
10 | 


--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/sourcefire/alerts.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="ISO-8859-1" ?>
 2 | <rule-definitions>
 3 | 	<rule>
 4 | 		<pattern>.*message.*</pattern>
 5 | 		<alert>{"type":"alert","priority":5, "title":"Sourcefire Alert", "body":
 6 | 			"Alert triggered by sourcefire"}
 7 | 		</alert>
 8 | 	</rule>
 9 | </rule-definitions>
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/resources/supervisord.conf:
--------------------------------------------------------------------------------
 1 | [unix_http_server]
 2 | file=/var/run/supervisor.sock
 3 | 
 4 | [supervisord]
 5 | pidfile=/var/run/supervisord.pid
 6 | logfile=/var/log/supervisor/supervisord.log
 7 | childlogdir=/var/log/supervisor
 8 | 
 9 | [rpcinterface:supervisor]
10 | supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
11 | 
12 | [supervisorctl]
13 | serverurl=unix:///var/run/supervisor.sock
14 | 
15 | [include]
16 | files = /etc/supervisor.d/*.conf


--------------------------------------------------------------------------------
/resources/hadoop/supervisor-datanode.conf:
--------------------------------------------------------------------------------
 1 | [program:datanode]
 2 | command = /opt/hadoop/bin/hdfs --config /opt/hadoop/etc/hadoop datanode
 3 | stdout_logfile = /var/log/hadoop/datanode.stdout
 4 | stderr_logfile = /var/log/hadoop/datanode.stderr
 5 | autostart = false
 6 | environment = JAVA_HOME=/usr/java/default
 7 | 
 8 | [program:nodemanager]
 9 | command = /opt/hadoop/bin/yarn --config /opt/hadoop/etc/hadoop nodemanager
10 | stdout_logfile = /var/log/hadoop/nodemanager.stdout
11 | stderr_logfile = /var/log/hadoop/nodemanager.stderr
12 | autostart = false
13 | environment = JAVA_HOME=/usr/java/default
14 | 


--------------------------------------------------------------------------------
/resources/storm/supervisor-nimbus-ui.conf:
--------------------------------------------------------------------------------
 1 | [program:storm-ui]
 2 | command=/opt/storm/bin/storm ui 
 3 | directory=/opt/storm
 4 | autostart=true
 5 | autorestart=true
 6 | stdout_logfile=/var/log/storm/ui-stdout.log
 7 | stderr_logfile=/var/log/storm/ui-stderr.log
 8 | environment = JAVA_HOME=/usr/java/default
 9 | 
10 | 
11 | [program:storm-nimbus]
12 | command=/opt/storm/bin/storm nimbus
13 | directory=/opt/storm
14 | autostart=true
15 | autorestart=true
16 | stdout_logfile=/var/log/storm/nimbus-stdout.log
17 | stderr_logfile=/var/log/storm/nimbus-stderr.log
18 | environment = JAVA_HOME=/usr/java/default
19 | 


--------------------------------------------------------------------------------
/scripts/setup-java.sh:
--------------------------------------------------------------------------------
 1 | source "/vagrant/scripts/common.sh"
 2 | 
 3 | function installJava {
 4 |     
 5 |     rpm -q jre
 6 |     if [ $? -eq 0 ]; then
 7 |         echo "Java is already installed"
 8 |     else
 9 |         echo "install ${JRE_RPM}"
10 |         rpm -i /vagrant/resources/$JRE_RPM
11 |     fi
12 | }
13 | 
14 | function setupEnvVars {
15 |     echo "creating java environment variables"
16 |     echo export JAVA_HOME=/usr/java/default >> /etc/profile.d/java.sh
17 |     echo export PATH=\${JAVA_HOME}/bin:\${PATH} >> /etc/profile.d/java.sh
18 | }
19 | 
20 | echo "Setting Up Java"
21 | installJava
22 | setupEnvVars
23 | 


--------------------------------------------------------------------------------
/scripts/setup-geo-enrichment.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | source "/vagrant/scripts/common.sh"
 4 | 
 5 | 
 6 | function downloadGeoData {
 7 | 
 8 |     downloadFile http://geolite.maxmind.com/download/geoip/database/GeoLiteCity_CSV/GeoLiteCity-latest.zip GeoLiteCity-latest.zip
 9 |     geo_folder=`unzip -l $TARBALL | grep -m 1 -o -E GeoLiteCity_[0-9]{8}`
10 |     cd /tmp && unzip $TARBALL
11 | 
12 | }
13 | 
14 | function provisionMySql {
15 | 
16 |     sed "s/__GEO_FOLDER__/${geo_folder}/" /vagrant/resources/opensoc/geo.sql > /tmp/geo.sql
17 |     mysql -u root < /tmp/geo.sql
18 | }
19 | 
20 | echo "Setting up Geo Enrichment Data"
21 | downloadGeoData
22 | provisionMySql
23 | 


--------------------------------------------------------------------------------
/scripts/setup-hbase.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | source "/vagrant/scripts/common.sh"
 4 | 
 5 | while getopts t:r: option; do
 6 |     case $option in
 7 |         t) TOTAL_NODES=$OPTARG;;
 8 |         r) HBASE_ROLE=$OPTARG;;
 9 |     esac
10 | done
11 | 
12 | function installHbase {
13 |     downloadApacheFile hbase $HBASE_VERSION_NUM "${HBASE_VERSION}-bin.tar.gz"
14 | 
15 |     tar -oxzf $TARBALL -C /opt
16 |     safeSymLink "/opt/${HBASE_VERSION}" /opt/hbase
17 | 
18 |     mkdir -p /var/log/hbase
19 | }
20 | 
21 | function configureHbase {
22 | 
23 |     generateZkStringNoPorts $TOTAL_NODES
24 |     sed "s/__ZK_QUORUM__/${ZK_STRING_NOPORTS}/" /vagrant/resources/hbase/hbase-site.xml > /opt/hbase/conf/hbase-site.xml
25 |     cp "/vagrant/resources/hbase/supervisor-${HBASE_ROLE}.conf" /etc/supervisor.d/hbase.conf
26 | }
27 | 
28 | echo "Setting up HBase"
29 | installHbase
30 | configureHbase
31 | 
32 | 


--------------------------------------------------------------------------------
/scripts/closest-mirror.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # Print the closest apache mirror for the given project
 3 | 
 4 | import urllib2, json, argparse, os
 5 | 
 6 | parser = argparse.ArgumentParser(description='gets the closest Apache Mirror for a project')
 7 | parser.add_argument('project', help='project to get the mirror for')
 8 | parser.add_argument('-v', '--version', help='project version')
 9 | parser.add_argument('-f', '--file', help='filename of binary')
10 | 
11 | args = parser.parse_args()
12 | 
13 | closer_url = 'http://www.apache.org/dyn/closer.cgi/{0}/?as_json=1'.format(args.project)
14 | 
15 | response = json.loads(urllib2.urlopen(closer_url).read())
16 | 
17 | 
18 | path = response['path_info']
19 | 
20 | if args.version:
21 |     path = os.path.join(path, args.version)
22 | 
23 | if args.file:
24 |     path = os.path.join(path, args.file)
25 | 
26 | print response['preferred'] + path
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/bro/metrics.conf:
--------------------------------------------------------------------------------
 1 | #reporters
 2 | com.opensoc.metrics.reporter.graphite=false
 3 | com.opensoc.metrics.reporter.console=false
 4 | com.opensoc.metrics.reporter.jmx=false
 5 | 
 6 | #Graphite Addresses
 7 | 
 8 | com.opensoc.metrics.graphite.address=localhost
 9 | com.opensoc.metrics.graphite.port=2023
10 | 
11 | #TelemetryParserBolt
12 | com.opensoc.metrics.TelemetryParserBolt.acks=false
13 | com.opensoc.metrics.TelemetryParserBolt.emits=false
14 | com.opensoc.metrics.TelemetryParserBolt.fails=false
15 | 
16 | 
17 | #GenericEnrichmentBolt
18 | com.opensoc.metrics.GenericEnrichmentBolt.acks=false
19 | com.opensoc.metrics.GenericEnrichmentBolt.emits=false
20 | com.opensoc.metrics.GenericEnrichmentBolt.fails=false
21 | 
22 | 
23 | #TelemetryIndexingBolt
24 | com.opensoc.metrics.TelemetryIndexingBolt.acks=false
25 | com.opensoc.metrics.TelemetryIndexingBolt.emits=false
26 | com.opensoc.metrics.TelemetryIndexingBolt.fails=false
27 | 


--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/pcap/metrics.conf:
--------------------------------------------------------------------------------
 1 | #reporters
 2 | com.opensoc.metrics.reporter.graphite=false
 3 | com.opensoc.metrics.reporter.console=false
 4 | com.opensoc.metrics.reporter.jmx=false
 5 | 
 6 | #Graphite Addresses
 7 | 
 8 | com.opensoc.metrics.graphite.address=localhost
 9 | com.opensoc.metrics.graphite.port=2023
10 | 
11 | #TelemetryParserBolt
12 | com.opensoc.metrics.TelemetryParserBolt.acks=false
13 | com.opensoc.metrics.TelemetryParserBolt.emits=false
14 | com.opensoc.metrics.TelemetryParserBolt.fails=false
15 | 
16 | 
17 | #GenericEnrichmentBolt
18 | com.opensoc.metrics.GenericEnrichmentBolt.acks=false
19 | com.opensoc.metrics.GenericEnrichmentBolt.emits=false
20 | com.opensoc.metrics.GenericEnrichmentBolt.fails=false
21 | 
22 | 
23 | #TelemetryIndexingBolt
24 | com.opensoc.metrics.TelemetryIndexingBolt.acks=false
25 | com.opensoc.metrics.TelemetryIndexingBolt.emits=false
26 | com.opensoc.metrics.TelemetryIndexingBolt.fails=false
27 | 


--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/sourcefire/metrics.conf:
--------------------------------------------------------------------------------
 1 | #reporters
 2 | com.opensoc.metrics.reporter.graphite=false
 3 | com.opensoc.metrics.reporter.console=false
 4 | com.opensoc.metrics.reporter.jmx=false
 5 | 
 6 | #Graphite Addresses
 7 | 
 8 | com.opensoc.metrics.graphite.address=localhost
 9 | com.opensoc.metrics.graphite.port=2023
10 | 
11 | #TelemetryParserBolt
12 | com.opensoc.metrics.TelemetryParserBolt.acks=false
13 | com.opensoc.metrics.TelemetryParserBolt.emits=false
14 | com.opensoc.metrics.TelemetryParserBolt.fails=false
15 | 
16 | 
17 | #GenericEnrichmentBolt
18 | com.opensoc.metrics.GenericEnrichmentBolt.acks=false
19 | com.opensoc.metrics.GenericEnrichmentBolt.emits=false
20 | com.opensoc.metrics.GenericEnrichmentBolt.fails=false
21 | 
22 | 
23 | #TelemetryIndexingBolt
24 | com.opensoc.metrics.TelemetryIndexingBolt.acks=false
25 | com.opensoc.metrics.TelemetryIndexingBolt.emits=false
26 | com.opensoc.metrics.TelemetryIndexingBolt.fails=false
27 | 


--------------------------------------------------------------------------------
/resources/hadoop/core-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 | http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License. See accompanying LICENSE file.
15 | -->
16 | 
17 | <!-- Put site-specific property overrides in this file. -->
18 | 
19 | <configuration>
20 |   <property>
21 |     <name>fs.default.name</name>
22 |     <value>hdfs://node1:9000</value>
23 |   </property>
24 | </configuration>


--------------------------------------------------------------------------------
/resources/hadoop/mapred-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed under the Apache License, Version 2.0 (the "License");
 5 |   you may not use this file except in compliance with the License.
 6 |   You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |   Unless required by applicable law or agreed to in writing, software
11 |   distributed under the License is distributed on an "AS IS" BASIS,
12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |   See the License for the specific language governing permissions and
14 |   limitations under the License. See accompanying LICENSE file.
15 | -->
16 | 
17 | <!-- Put site-specific property overrides in this file. -->
18 | 
19 | <configuration>
20 |   <property>
21 |     <name>mapreduce.framework.name</name>
22 |     <value>yarn</value>
23 |   </property>
24 | </configuration>


--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/bro/alerts.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="ISO-8859-1" ?>
 2 | <rule-definitions>
 3 | 	<rule>
 4 | 		<pattern>.*host\"\:\{"ip_dst_addr\"\:\{\},\"ip_src_addr\"\:\{\}.*</pattern>
 5 | 		<alert>{"type":"error","priority":5, "title":"No Local Hostname Present", "body":
 6 | 			"We don't have a record for source or destination IPs in our internal database."}
 7 | 		</alert>
 8 | 	</rule>
 9 | 	<rule>
10 | 		<pattern>.*whois\"\:\{\"tld\"\:\{\}.*</pattern>
11 | 		<alert>{"type":"warning","priority":10, "title":"Whois domain unknown", "body":
12 | 			"Could not locate whois information for tld"}</alert>
13 | 	</rule>
14 | 	<rule>
15 | 		<pattern>^((?!country\"\:\"US\").)*$</pattern>
16 | 		<alert>{"type":"warning","priority":10, "title":"NOT US IP", "body": "Communication contains a non-US IP"}</alert>
17 | 	</rule>
18 | 	<rule>
19 | 		<pattern>.*geo.*</pattern>
20 | 		<alert>{"type":"error","priority":1, "title":"test", "body": "test alert"}</alert>
21 | 	</rule>
22 | </rule-definitions>
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/resources/hive/hive-site.xml:
--------------------------------------------------------------------------------
 1 | <configuration>
 2 |     <property>
 3 |         <name>javax.jdo.option.ConnectionURL</name>
 4 |         <value>jdbc:mysql://node1:3306/hivemeta?createDatabaseIfNotExist=true</value>
 5 |     </property>
 6 |     <property>
 7 |         <name>javax.jdo.option.ConnectionDriverName</name>
 8 |         <value>com.mysql.jdbc.Driver</value>
 9 |     </property>
10 |     <property>
11 |         <name>javax.jdo.option.ConnectionUserName</name>
12 |         <value>hive</value>
13 |     </property>
14 |     <property>
15 |         <name>javax.jdo.option.ConnectionPassword</name>
16 |         <value>hive123</value>
17 |     </property>
18 |     <property>
19 |         <name>hive.server2.thrift.bind.host</name>
20 |         <value>0.0.0.0</value>
21 |     </property>
22 |     <property>
23 |         <name>hadoop.bin.path</name>
24 |         <value>/opt/hadoop/bin</value>
25 |     </property>
26 |     <property>
27 |         <name>hadoop.config.dir</name>
28 |         <value>/opt/hadoop/etc/hadoop</value>
29 |     </property>
30 | </configuration>


--------------------------------------------------------------------------------
/scripts/init-hadoop.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | source "/vagrant/scripts/common.sh"
 4 | 
 5 | while getopts r: option; do
 6 |     case $option in 
 7 |         r) ROLE=$OPTARG;;
 8 |     esac
 9 | done
10 | 
11 | function startHadoopRole {
12 |     ps -ef | grep -v grep | grep -v vagrant | grep $1
13 |     if [ $? -ne 0 ]; then 
14 |         /opt/hadoop/sbin/hadoop-daemon.sh --config /opt/hadoop/etc/hadoop --script hdfs start $1
15 |     fi
16 | }
17 | 
18 | function startYarnRole {
19 |     ps -ef | grep -v grep | grep -v vagrant | grep $1
20 |     if [ $? -ne 0 ]; then
21 |         /opt/hadoop/sbin/yarn-daemon.sh --config /opt/hadoop/etc/hadoop start $1
22 |     fi
23 | }
24 | function formatHdfs {
25 |     /opt/hadoop/bin/hdfs namenode -format vagrant -nonInteractive
26 | 
27 | }
28 | 
29 | echo "Starting Hadoop"
30 | 
31 | if [ "${ROLE}" == "namenode" ]; then
32 |     formatHdfs
33 |     startHadoopRole $ROLE
34 |     startYarnRole "resourcemanager"
35 | elif [ "${ROLE}" == "datanode" ]; then
36 |     startHadoopRole $ROLE
37 |     startYarnRole "nodemanager"
38 | fi


--------------------------------------------------------------------------------
/resources/hadoop/yarn-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <!--
 3 |   Licensed under the Apache License, Version 2.0 (the "License");
 4 |   you may not use this file except in compliance with the License.
 5 |   You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 |   Unless required by applicable law or agreed to in writing, software
10 |   distributed under the License is distributed on an "AS IS" BASIS,
11 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |   See the License for the specific language governing permissions and
13 |   limitations under the License. See accompanying LICENSE file.
14 | -->
15 | <configuration>
16 |   <property>
17 |     <name>yarn.nodemanager.aux-services</name>
18 |     <value>mapreduce_shuffle</value>
19 |   </property>
20 | 
21 |   <property>
22 |     <name>yarn.resourcemanager.hostname</name>
23 |     <value>node1</value>
24 |   </property>
25 | 
26 |   <property>
27 |     <name>yarn.resourcemanager.bind-host</name>
28 |     <value>0.0.0.0</value>
29 |   </property>
30 | </configuration>


--------------------------------------------------------------------------------
/scripts/setup-kafka.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | source "/vagrant/scripts/common.sh"
 4 | 
 5 | while getopts t: option; do
 6 |     case $option in
 7 |         t) TOTAL_NODES=$OPTARG;;
 8 |     esac
 9 | done
10 | 
11 | function installKafka {
12 |     downloadApacheFile kafka ${KAFKA_VERSION_NUM} "${KAFKA_VERSION}.tgz"
13 | 
14 |     tar -oxzf $TARBALL -C /opt
15 |     safeSymLink "/opt/${KAFKA_VERSION}/" /opt/kafka 
16 | 
17 |     mkdir -p /var/lib/kafka-logs
18 |     mkdir -p /var/log/kafka
19 | }
20 | 
21 | function configureKafka {
22 |     echo "Configuring Kafka"
23 |     # copy over config with static properties
24 |     cp /vagrant/resources/kafka/server.properties /opt/kafka/config/
25 | 
26 |     # echo in dynamic ones
27 |     echo "broker.id=${NODE_NUMBER}" >> /opt/kafka/config/server.properties
28 | 
29 |     generateZkString $TOTAL_NODES
30 | 
31 |     echo "zookeeper.connect=${ZK_STRING}" >> /opt/kafka/config/server.properties
32 | 
33 |     cp /vagrant/resources/kafka/supervisor-kafka.conf /etc/supervisor.d/kakfa.conf
34 | }
35 | 
36 | 
37 | echo "Setting up Kafka"
38 | installKafka
39 | configureKafka


--------------------------------------------------------------------------------
/resources/hadoop/hdfs-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed under the Apache License, Version 2.0 (the "License");
 5 |   you may not use this file except in compliance with the License.
 6 |   You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |   Unless required by applicable law or agreed to in writing, software
11 |   distributed under the License is distributed on an "AS IS" BASIS,
12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |   See the License for the specific language governing permissions and
14 |   limitations under the License. See accompanying LICENSE file.
15 | -->
16 | 
17 | <!-- Put site-specific property overrides in this file. -->
18 | 
19 | <configuration>
20 |   <property>
21 |     <name>dfs.replication</name>
22 |     <value>1</value>
23 |   </property>
24 | 
25 | <property>
26 |     <name>dfs.name.dir</name>
27 |     <value>file:///var/lib/hadoop/hdfs/namenode</value>
28 |   </property>
29 | 
30 |   <property>
31 |     <name>dfs.data.dir</name>
32 |     <value>file:///var/lib/hadoop/hdfs/datanode</value>
33 |   </property>
34 | </configuration>


--------------------------------------------------------------------------------
/scripts/setup-os.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | while getopts t: option; do
 4 |     case $option in
 5 |         t) TOTAL_NODES=$OPTARG;;
 6 |     esac
 7 | done
 8 | 
 9 | function disableFirewall {
10 |     echo "Disabling the Firewall"
11 |     service iptables save
12 |     service iptables stop
13 |     chkconfig iptables off
14 | }
15 | 
16 | function writeHostFile {
17 |     echo "setting up /etc/hosts file"
18 | 
19 |     echo "127.0.0.1   localhost localhost.localdomain localhost4 localhost4.localdomain4" > /etc/hosts
20 |     echo "::1         localhost localhost.localdomain localhost6 localhost6.localdomain6" >> /etc/hosts
21 | 
22 |     for i in $(seq 1 $TOTAL_NODES); do
23 |         echo "10.0.0.10${i}   node${i}" >> /etc/hosts
24 |     done
25 | }
26 | 
27 | function installDependencies {
28 |     echo "Installing Supervisor"
29 |     yum install -y epel-release 
30 |     yum install -y python-pip unzip
31 | 
32 |     pip install supervisor
33 |     pip install argparse
34 | 
35 |     cp /vagrant/resources/supervisord.conf /etc/supervisord.conf
36 |     cp /vagrant/resources/upstart-supervisor.conf /etc/init/supervisor.conf
37 | 
38 |     mkdir -p /etc/supervisor.d
39 |     mkdir -p /var/log/supervisor
40 | }
41 | 
42 | function installNtpd {
43 |     yum install -y ntp
44 | 
45 |     ntpdate 0.pool.ntp.org
46 | 
47 |     service ntpd start
48 |     chckconfig ntpd on
49 | }
50 | 
51 | disableFirewall
52 | writeHostFile
53 | installDependencies


--------------------------------------------------------------------------------
/resources/hbase/hbase-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 | /**
 5 |  *
 6 |  * Licensed to the Apache Software Foundation (ASF) under one
 7 |  * or more contributor license agreements.  See the NOTICE file
 8 |  * distributed with this work for additional information
 9 |  * regarding copyright ownership.  The ASF licenses this file
10 |  * to you under the Apache License, Version 2.0 (the
11 |  * "License"); you may not use this file except in compliance
12 |  * with the License.  You may obtain a copy of the License at
13 |  *
14 |  *     http://www.apache.org/licenses/LICENSE-2.0
15 |  *
16 |  * Unless required by applicable law or agreed to in writing, software
17 |  * distributed under the License is distributed on an "AS IS" BASIS,
18 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 |  * See the License for the specific language governing permissions and
20 |  * limitations under the License.
21 |  */
22 | -->
23 | <configuration>
24 |   <property>
25 |     <name>hbase.rootdir</name>
26 |     <value>hdfs://node1:9000/hbase</value>
27 |   </property>
28 |   <property>
29 |     <name>hbase.cluster.distributed</name>
30 |     <value>true</value>
31 |   </property>
32 |   <property>
33 |     <name>hbase.zookeeper.quorum</name>
34 |     <value>__ZK_QUORUM__</value>
35 |   </property>
36 |   <propery>
37 |     <name>zookeeper.znode.parent</name>
38 |     <value>/hbase-unsecure</value>
39 |   </propery>
40 | </configuration>


--------------------------------------------------------------------------------
/resources/opensoc/hbase-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 | /**
 5 |  *
 6 |  * Licensed to the Apache Software Foundation (ASF) under one
 7 |  * or more contributor license agreements.  See the NOTICE file
 8 |  * distributed with this work for additional information
 9 |  * regarding copyright ownership.  The ASF licenses this file
10 |  * to you under the Apache License, Version 2.0 (the
11 |  * "License"); you may not use this file except in compliance
12 |  * with the License.  You may obtain a copy of the License at
13 |  *
14 |  *     http://www.apache.org/licenses/LICENSE-2.0
15 |  *
16 |  * Unless required by applicable law or agreed to in writing, software
17 |  * distributed under the License is distributed on an "AS IS" BASIS,
18 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 |  * See the License for the specific language governing permissions and
20 |  * limitations under the License.
21 |  */
22 | -->
23 | <configuration>
24 |   <property>
25 |     <name>hbase.rootdir</name>
26 |     <value>hdfs://node1:9000/hbase</value>
27 |   </property>
28 |   <property>
29 |     <name>hbase.cluster.distributed</name>
30 |     <value>true</value>
31 |   </property>
32 |   <property>
33 |     <name>hbase.zookeeper.quorum</name>
34 |     <value>node2:2181,node3:2181,node4:2181</value>
35 |   </property>
36 |   <propery>
37 |     <name>zookeeper.znode.parent</name>
38 |     <value>/hbase-unsecure</value>
39 |   </propery>
40 | </configuration>


--------------------------------------------------------------------------------
/scripts/setup-hive.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | source "/vagrant/scripts/common.sh"
 4 | 
 5 | function installHive {
 6 |     
 7 |     downloadApacheFile hive $HIVE_VERSION "apache-${HIVE_VERSION}-bin.tar.gz"
 8 | 
 9 |     tar -oxzf $TARBALL -C /opt
10 |     safeSymLink "/opt/apache-${HIVE_VERSION}-bin/" /opt/hive
11 | 
12 |     mkdir -p /var/log/hive
13 | 
14 |     cp /vagrant/resources/hive/supervisor-hive-metastore.conf /etc/supervisor.d/hive-metastore.conf
15 | 
16 | }
17 | 
18 | function installMySql {
19 |     yum install -y mysql-server mysql-connector-java
20 | 
21 |     chkconfig mysqld on
22 |     service mysqld start
23 | 
24 |     safeSymLink /usr/share/java/mysql-connector-java.jar /opt/hive/lib/mysql-connector-java.jar
25 | 
26 |     echo "Setting up mysql user"
27 |     if mysql -u root mysql -e "select User from user where User='hive';" | grep hive; then
28 |         echo "hive user exists..."
29 |     else
30 |         mysql -u root < /vagrant/resources/hive/hive-user.sql
31 |     fi
32 | 
33 |     echo "Setting up metastore schema"
34 |     if mysql -u root -e "show databases like 'hivemeta';" | grep hivemeta; then
35 |         echo "metastore table exists..."
36 |     else
37 |         mysql -u root -e "CREATE DATABASE hivemeta;"
38 |         cd /opt/hive/scripts/metastore/upgrade/mysql && mysql -u hive -phive123 hivemeta < hive-schema-1.2.0.mysql.sql
39 |     fi
40 | }
41 | 
42 | function configureHive {
43 | 
44 |     cp /vagrant/resources/hive/hive-site.xml /opt/hive/conf/
45 | }
46 | 
47 | echo "Setting up Hive"
48 | installHive
49 | installMySql
50 | configureHive


--------------------------------------------------------------------------------
/scripts/setup-storm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | source "/vagrant/scripts/common.sh"
 4 | 
 5 | 
 6 | while getopts t:r: option; do
 7 |     case $option in
 8 |         t) TOTAL_NODES=$OPTARG;;
 9 |         r) STORM_ROLE=$OPTARG;;
10 |     esac
11 | done
12 | 
13 | 
14 | function installStorm {
15 |     downloadApacheFile storm ${STORM_VERSION} "${STORM_VERSION}.tar.gz"
16 | 
17 |     tar -oxzf $TARBALL -C /opt
18 |     safeSymLink "/opt/${STORM_VERSION}" /opt/storm
19 | 
20 |     mkdir -p /var/log/storm
21 | }
22 | 
23 | function configureStorm {
24 |     echo "Configuring Storm"
25 | 
26 |     echo "storm.zookeeper.servers:" >> /opt/storm/conf/storm.yaml
27 |     for i in $(seq 2 $TOTAL_NODES); do
28 |         echo "  - node${i}" >> /opt/storm/conf/storm.yaml
29 |     done
30 | 
31 |     echo "nimbus.host: node1" >> /opt/storm/conf/storm.yaml
32 |     echo "java.library.path: /usr/local/lib:/opt/local/lib:/usr/lib:/opt/hadoop/lib/native:/usr/lib64" >> /opt/storm/conf/storm.yaml
33 |     echo "LD_LIBRARY_PATH:/usr/local/lib:/opt/local/lib:/usr/lib:/opt/hadoop/lib/native:/usr/lib64" >> /opt/storm/conf/storm_env.ini
34 | 
35 | }
36 | 
37 | function setupNimbus {
38 |     echo "Setting up Storm Nimbus"
39 | 
40 |     cp /vagrant/resources/storm/supervisor-nimbus-ui.conf /etc/supervisor.d/storm.conf
41 | }
42 | 
43 | function setupSupervisor {
44 |     echo "Setting up Storm Supervisor"
45 | 
46 |     cp /vagrant/resources/storm/supervisor-worker.conf /etc/supervisor.d/storm.conf
47 | }
48 | 
49 | echo "Setting up Storm"
50 | installStorm
51 | configureStorm
52 | 
53 | 
54 | case $STORM_ROLE in
55 |     nimbus) setupNimbus;;
56 |     supervisor) setupSupervisor;;
57 | esac
58 | 


--------------------------------------------------------------------------------
/scripts/setup-elasticsearch.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | source "/vagrant/scripts/common.sh"
 4 | 
 5 | while getopts ci: option; do
 6 |     case $option in
 7 |         c) ES_CLIENT=yes;;
 8 |         i) IP_ADDR=$OPTARG;;
 9 |     esac
10 | done
11 | 
12 | function installElasticsearch {
13 | 
14 |     downloadFile "https://download.elastic.co/elasticsearch/elasticsearch/elasticsearch-${ES_VERSION}.tar.gz" "elasticsearch-${ES_VERSION}.tar.gz"
15 | 
16 |     tar -oxf $TARBALL -C /opt
17 |     safeSymLink "/opt/elasticsearch-${ES_VERSION}" /opt/elasticsearch
18 | 
19 |     mkdir -p /var/lib/elasticsearch
20 |     mkdir -p /var/log/elasticsearch
21 |     mkdir -p /opt/elasticsearch/plugins
22 | }
23 | 
24 | function configureElasticsearch {
25 | 
26 |     hostname=`hostname -f`
27 |     if [ -z "${ES_CLIENT}" ]; then
28 |         echo "Configuring elasticsearch as a normal node"
29 |         sed "s/__HOSTNAME__/${hostname}/" /vagrant/resources/elasticsearch/elasticsearch.yml | sed "s/__IP_ADDR__/${IP_ADDR}/" > /opt/elasticsearch/config/elasticsearch.yml
30 |     else 
31 |         echo "Configuring elasticsearch as a client"
32 |         sed "s/__HOSTNAME__/${hostname}/" /vagrant/resources/elasticsearch/elasticsearch-client.yml | sed "s/__IP_ADDR__/${IP_ADDR}/" > /opt/elasticsearch/config/elasticsearch.yml
33 |     fi
34 | 
35 |     if [ ! -e /opt/elasticsearch/plugins/kopf ]; then
36 |         echo "Installing kopf plugin"
37 |         /opt/elasticsearch/bin/plugin --install lmenezes/elasticsearch-kopf/1.5.3
38 |     fi
39 | 
40 |     cp /vagrant/resources/elasticsearch/supervisor-elasticsearch.conf /etc/supervisor.d/elasticsearch.conf
41 | 
42 | }
43 | echo "Setting up Elasticsearch"
44 | installElasticsearch
45 | configureElasticsearch


--------------------------------------------------------------------------------
/scripts/setup-zookeeper.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | source "/vagrant/scripts/common.sh"
 4 | 
 5 | while getopts t: option; do
 6 |     case $option in 
 7 |         t) TOTAL_NODES=$OPTARG;;
 8 |     esac
 9 | done
10 | 
11 | function installZookeeper {
12 |     downloadApacheFile zookeeper ${ZOOKEEPER_VERSION} "${ZOOKEEPER_VERSION}.tar.gz"
13 | 
14 |     tar -oxzf $TARBALL -C /opt
15 |     safeSymLink "/opt/${ZOOKEEPER_VERSION}/" /opt/zookeeper
16 | 
17 |     mkdir -p /var/lib/zookeeper
18 |     mkdir -p /var/log/zookeeper
19 | 
20 |     echo "0 0 * * *  /usr/local/bin/zookeeper_cleanup" >> /etc/crontab
21 | 
22 |     echo "cd /opt/zookeeper" > /usr/local/bin/zookeeper_cleanup
23 |     echo "echo `date` > /root/last_zk_cleanup" >> /usr/local/bin/zookeeper_cleanup
24 |     echo "bin/zkCleanup.sh /var/lib/zookeeper -n 5 >> /root/last_zk_cleanup" >> /usr/local/bin/zookeeper_cleanup
25 | 
26 |     chmod +x /usr/local/bin/zookeeper_cleanup
27 | 
28 |     echo $NODE_NUMBER > /var/lib/zookeeper/myid
29 | }
30 | 
31 | function configureZookeeper {
32 | 
33 |     echo "Configuring Zookeeper..."
34 |     echo "tickTime=2000" >  /opt/zookeeper/conf/zoo.cfg
35 |     echo "initLimit=10" >> /opt/zookeeper/conf/zoo.cfg
36 |     echo "syncLimit=5" >> /opt/zookeeper/conf/zoo.cfg
37 |     echo "dataDir=/var/lib/zookeeper" >> /opt/zookeeper/conf/zoo.cfg
38 |     echo "clientPort=2181" >> /opt/zookeeper/conf/zoo.cfg
39 | 
40 |     for i in $(seq 1 $TOTAL_NODES); do
41 |         echo "server.${i}=node${i}:2888:3888" >> /opt/zookeeper/conf/zoo.cfg
42 |     done
43 | 
44 |     cp /vagrant/resources/zookeeper/supervisor-zookeeper.conf /etc/supervisor.d/zookeeper.conf
45 | } 
46 | 
47 | echo "Setting up Zookeeper"
48 | 
49 | installZookeeper
50 | configureZookeeper


--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/pcap/features_enabled.conf:
--------------------------------------------------------------------------------
 1 | #Enable and disable features for each topology
 2 | 
 3 | #Feature: Kafka spout
 4 | ##Feature Description: Acts as a Kafka consumer.  Takes messages from a Kafka topic and ingests them into a topology
 5 | 
 6 | spout.kafka.name=KafkaSpout
 7 | spout.kafka.enabled=true
 8 | spout.kafka.num.tasks=1
 9 | spout.kafka.parallelism.hint=1
10 | 
11 | #Feature: Parser Bolt
12 | ##Feature Description: Parses telemetry from its native format into a native JSON
13 | 
14 | parser.bolt.name=ParserBolt
15 | bolt.parser.enabled=true
16 | bolt.parser.num.tasks=1
17 | bolt.parser.parallelism.hint=1
18 | 
19 | #Feature: Indexer
20 | ##Feature Description: Indexes telemetry messages in ElasticSearch or Solr
21 | 
22 | bolt.indexing.name=IndexBolt
23 | bolt.indexing.enabled=true
24 | bolt.indexing.num.tasks=1
25 | bolt.indexing.parallelism.hint=1
26 | 
27 | #Feature: Error Indexer
28 | ##Feature Description: Indexes error messages in ElasticSearch or Solr
29 | 
30 | bolt.error.indexing.name=ErrorIndexBolt
31 | bolt.error.indexing.enabled=true
32 | bolt.error.indexing.num.tasks=1
33 | bolt.error.indexing.parallelism.hint=1
34 | 
35 | #Feature: HDFS Bolt
36 | ##Feature Description: Writes telemetry messages into HDFS
37 | 
38 | bolt.hdfs.name=HDFSBolt
39 | bolt.hdfs.enabled=false
40 | bolt.hdfs.num.tasks=4
41 | bolt.hdfs.parallelism.hint=4
42 | 
43 | bolt.hbase.name=HBaseBolt
44 | bolt.hbase.enabled=true
45 | bolt.hbase.num.tasks=1
46 | bolt.hbase.parallelism.hint=1
47 | 
48 | 
49 | # unused stuff
50 | bolt.enrichment.host.enabled=false
51 | bolt.enrichment.geo.enabled=false
52 | bolt.enrichment.whois.enabled=false
53 | bolt.enrichment.cif.enabled=false
54 | bolt.enrichment.threat.enabled=false
55 | bolt.alerts.enabled=false
56 | bolt.alerts.indexing.enabled=false
57 | bolt.kafka.enabled=false
58 | 
59 | 


--------------------------------------------------------------------------------
/resources/opensoc/geo.sql:
--------------------------------------------------------------------------------
 1 | CREATE DATABASE IF NOT EXISTS GEO;
 2 | 
 3 | USE GEO;
 4 | 
 5 | DROP TABLE IF EXISTS `blocks`; 
 6 | CREATE TABLE  `blocks` ( `startIPNum` int(10) unsigned NOT NULL,`endIPNum` int(10) unsigned NOT NULL,`locID` 
 7 | int(10) unsigned NOT NULL, PRIMARY KEY  (`startIPNum`,`endIPNum`) ) 
 8 | ENGINE=MyISAM DEFAULT CHARSET=latin1 PACK_KEYS=1 DELAY_KEY_WRITE=1;
 9 | 
10 | DROP TABLE IF EXISTS `location`; 
11 | CREATE TABLE  `location` (`locID` int(10) unsigned NOT NULL,`country` char(2) default NULL,`region` char(2)
12 |  default NULL,`city` varchar(45) default NULL,`postalCode` char(7) default NULL,`latitude` double default 
13 | NULL,`longitude` double default NULL,`dmaCode` char(3) default NULL,`areaCode` char(3) default NULL,PRIMARY KEY
14 |   (`locID`),KEY `Index_Country` (`country`) ) ENGINE=MyISAM DEFAULT CHARSET=latin1 ROW_FORMAT=FIXED;
15 | 
16 | load data infile '/tmp/__GEO_FOLDER__/GeoLiteCity-Blocks.csv'  into table `blocks`  fields terminated by ',' optionally enclosed by
17 |  '"'  lines terminated by '\n'  ignore 2 lines;
18 | 
19 | load data infile '/tmp/__GEO_FOLDER__/GeoLiteCity-Location.csv'  into table `location`  fields terminated by ',' optionally enclosed
20 |  by '"'  lines terminated by '\n'  ignore 2 lines;
21 | 
22 |  DELIMITER $$ 
23 |  DROP FUNCTION IF EXISTS `IPTOLOCID` $$ 
24 |  CREATE FUNCTION `IPTOLOCID`( ip VARCHAR(15)) RETURNS int(10) unsigned 
25 |  BEGIN
26 |     DECLARE ipn INTEGER UNSIGNED;
27 |     DECLARE locID_var INTEGER; 
28 |     IF ip LIKE '192.168.%' OR ip LIKE '10.%' THEN RETURN 0;
29 |     END IF;
30 |     SET ipn = INET_ATON(ip);
31 |     SELECT locID INTO locID_var FROM `blocks` INNER JOIN (SELECT MAX(startIPNum) AS start FROM `blocks` WHERE startIPNum <= ipn) AS s ON (startIPNum = s.start) WHERE endIPNum >= ipn;
32 |     RETURN locID_var; 
33 | END
34 | $$  
35 | DELIMITER ;


--------------------------------------------------------------------------------
/scripts/common.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | JRE_RPM=jre-7u79-linux-x64.rpm
 4 | HADOOP_VERSION=hadoop-2.6.0
 5 | ZOOKEEPER_VERSION=zookeeper-3.4.6
 6 | KAFKA_SCALA_VERSION=2.9.2
 7 | KAFKA_VERSION_NUM=0.8.1.1
 8 | KAFKA_VERSION="kafka_${KAFKA_SCALA_VERSION}-${KAFKA_VERSION_NUM}"
 9 | STORM_VERSION=apache-storm-0.9.4
10 | HBASE_VERSION_NUM=0.98.13
11 | HBASE_VERSION=hbase-"${HBASE_VERSION_NUM}-hadoop2"
12 | HIVE_VERSION=hive-1.2.0
13 | ES_VERSION=1.5.2
14 | 
15 | # So we dont need to pass in i to the scripts
16 | NODE_NUMBER=`hostname | tr -d node`
17 | 
18 | 
19 | function downloadFile {
20 |     
21 |     url="${1}"
22 |     filename="${2}"
23 | 
24 |     tmp_dir="/vagrant/resources/tmp/"
25 |     cached_file="${tmp_dir}${filename}"
26 | 
27 |     if [ ! -e $cached_file ]; then
28 |         echo "Downloading ${filename} from ${url} to ${cached_file}"
29 |         echo "This will take some time. Please be patient..."
30 |         wget -nv -P $tmp_dir $url
31 |     fi
32 | 
33 |     TARBALL=$cached_file
34 | }
35 | 
36 | function downloadApacheFile {
37 | 
38 |     project="${1}"
39 |     version="${2}"
40 |     filename="${3}"
41 | 
42 |     closest_url=`python /vagrant/scripts/closest-mirror.py ${project} -v ${version} -f ${filename}`
43 | 
44 |     downloadFile $closest_url $filename
45 | }
46 | 
47 | function join {
48 |     local IFS="$1"; shift; echo "$*"
49 | }
50 | 
51 | function generateZkString {
52 |     # Yes its ugly, but so is bash :)
53 |     ZK_STRING=`python -c "print ','.join([ 'node{0}:2181'.format(x) for x in range(2,${1}+1)])"`
54 | }
55 | 
56 | function generateZkStringNoPorts {
57 |     ZK_STRING_NOPORTS=`python -c "print ','.join([ 'node{0}'.format(x) for x in range(2,${1}+1)])"`
58 | }
59 | 
60 | function safeSymLink {
61 |     target=$1
62 |     symlink=$2
63 | 
64 |     if [ -e $symlink ]; then
65 |         echo "${symlink} exists. Deleteing."
66 |         rm $symlink
67 |     fi
68 | 
69 |     ln -s $target $symlink
70 | }
71 | 


--------------------------------------------------------------------------------
/scripts/setup-hadoop.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | source "/vagrant/scripts/common.sh"
 4 | 
 5 | while getopts r:t: option; do
 6 |     case $option in
 7 |         t) TOTAL_NODES=$OPTARG;;
 8 |         r) ROLE=$OPTARG;;
 9 |     esac
10 | done
11 | 
12 | function installHadoop {
13 |     
14 |     downloadApacheFile hadoop/common $HADOOP_VERSION "${HADOOP_VERSION}.tar.gz"
15 | 
16 |     tar -oxzf $TARBALL -C /opt
17 |     safeSymLink "/opt/${HADOOP_VERSION}/" /opt/hadoop
18 | 
19 |     mkdir -p /var/lib/hadoop/hdfs/namenode
20 |     mkdir -p /var/lib/hadoop/hdfs/datanode
21 |     mkdir -p /var/log/hadoop
22 |     mkdir -p /opt/hadoop/logs
23 | 
24 |     # neeed for writing to HDFS
25 |     yum install -y snappy snappy-devel
26 | 
27 | }
28 | 
29 | function configureHadoop {
30 |     HADOOP_RESOURCE_DIR=/vagrant/resources/hadoop
31 |     for file in `ls ${HADOOP_RESOURCE_DIR}/*.xml`; do
32 |         echo "Copying ${file}"
33 |         cp $file /opt/hadoop/etc/hadoop
34 |     done
35 | 
36 |     echo "Setting slaves file"
37 |     for i in $(seq 2 $TOTAL_NODES); do
38 |         echo "node${i}" >> /opt/hadoop/etc/hadoop/slaves
39 |     done
40 | 
41 |     echo "export JAVA_LIBRARY_PATH=\${JAVA_LIBRARY_PATH}:/usr/lib/hadoop/lib/native:/usr/lib64" >> /opt/hadoop/etc/hadoop/hadoop-env.sh
42 | }
43 | 
44 | function configureNameNode {
45 |     echo "Copying over Supervisor config for namenode and resourcemanager"
46 |     cp /vagrant/resources/hadoop/supervisor-namenode.conf /etc/supervisor.d/namenode.conf
47 |     cp /vagrant/resources/hadoop/supervisor-resourcemanager.conf /etc/supervisor.d/resourcemanager.conf
48 | }
49 | 
50 | function configureDataNode {
51 |     echo "Copying over Supervisor config for datenode"
52 |     cp /vagrant/resources/hadoop/supervisor-datanode.conf /etc/supervisor.d/datanode.conf
53 | }
54 | 
55 | echo "Setting up Hadoop"
56 | installHadoop
57 | configureHadoop
58 | 
59 | if [ "${ROLE}" == "namenode" ]; then
60 |     configureNameNode
61 | elif [ "${ROLE}" == "datanode" ]; then
62 |     configureDataNode
63 | fi


--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/bro/topology.conf:
--------------------------------------------------------------------------------
 1 | include = ../../etc/env/environment_common.conf
 2 | include = ../../etc/env/es_connection.conf
 3 | include = ../../etc/env/hdfs_connection.conf
 4 | include = ../../etc/env/mysql_connection.conf
 5 | include = metrics.conf
 6 | include = features_enabled.conf
 7 | 
 8 | #Global Properties
 9 | 
10 | debug.mode=true
11 | local.mode=true
12 | num.workers=1
13 | num.ackers=1
14 | 
15 | #Standard 5-tuple fields
16 | 
17 | source.ip=ip_src_addr
18 | source.port=ip_src_port
19 | dest.ip=ip_dst_addr
20 | dest.port=ip_dst_port
21 | protocol=protocol
22 | 
23 | #Test Spout
24 | spout.test.parallelism.repeat=false
25 | 
26 | #Kafka Spout
27 | spout.kafka.topic=bro_raw
28 | 
29 | #Parsing Bolt
30 | bolt.parser.adapter=com.opensoc.parsing.parsers.BasicBroParser
31 | source.include.protocols=snmp,http,ftp,ssh,ssl,dns,socks,dnp3,smtp,dhcp,modbus,radius,irc
32 | source.exclude.protocols=x509,files,app_stats
33 | 
34 | #GeoEnrichment
35 | 
36 | bolt.enrichment.geo.enrichment_tag=geo
37 | bolt.enrichment.geo.adapter.table=GEO
38 | bolt.enrichment.geo.MAX_CACHE_SIZE_OBJECTS_NUM=10000
39 | bolt.enrichment.geo.MAX_TIME_RETAIN_MINUTES=10
40 | bolt.enrichment.geo.fields=ip_src_addr,ip_dst_addr
41 | 
42 | #Indexing Bolt
43 | bolt.indexing.indexname=bro_index
44 | bolt.indexing.timestamp=yyyy.MM.dd
45 | bolt.indexing.documentname=bro_doc
46 | bolt.indexing.bulk=200
47 | bolt.indexing.adapter=com.opensoc.indexing.adapters.ESTimedRotatingAdapter
48 | 
49 | 
50 | #Error Indexing Bolt
51 | bolt.error.indexing.indexname=error
52 | bolt.error.indexing.timestamp=yyyy.MM
53 | bolt.error.indexing.documentname=bro_error
54 | bolt.error.indexing.bulk=1
55 | bolt.error.indexing.adapter=com.opensoc.indexing.adapters.ESTimedRotatingAdapter
56 | 
57 | #HDFS Bolt
58 | bolt.hdfs.batch.size=5000
59 | bolt.hdfs.field.delimiter=|
60 | bolt.hdfs.file.rotation.size.in.mb=5
61 | bolt.hdfs.file.system.url=hdfs://node1:9000
62 | bolt.hdfs.wip.file.path=/bro/wip
63 | bolt.hdfs.finished.file.path=/bro/rotated
64 | bolt.hdfs.compression.codec.class=org.apache.hadoop.io.compress.SnappyCodec


--------------------------------------------------------------------------------
/resources/zookeeper/log4j.properties:
--------------------------------------------------------------------------------
 1 | # Define some default values that can be overridden by system properties
 2 | zookeeper.root.logger=INFO, CONSOLE, ROLLINGFILE
 3 | zookeeper.console.threshold=INFO
 4 | zookeeper.log.dir=/var/log/zookeeper
 5 | zookeeper.log.file=zookeeper.log
 6 | zookeeper.log.threshold=DEBUG
 7 | zookeeper.tracelog.dir=/var/log/zookeeper
 8 | zookeeper.tracelog.file=zookeeper_trace.log
 9 | 
10 | #
11 | # ZooKeeper Logging Configuration
12 | #
13 | 
14 | # Format is "<default threshold> (, <appender>)+
15 | 
16 | # DEFAULT: console appender only
17 | log4j.rootLogger=${zookeeper.root.logger}
18 | 
19 | # Example with rolling log file
20 | #log4j.rootLogger=DEBUG, CONSOLE, ROLLINGFILE
21 | 
22 | # Example with rolling log file and tracing
23 | #log4j.rootLogger=TRACE, CONSOLE, ROLLINGFILE, TRACEFILE
24 | 
25 | #
26 | # Log INFO level and above messages to the console
27 | #
28 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
29 | log4j.appender.CONSOLE.Threshold=${zookeeper.console.threshold}
30 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
31 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n
32 | 
33 | #
34 | # Add ROLLINGFILE to rootLogger to get log file output
35 | #    Log DEBUG level and above messages to a log file
36 | log4j.appender.ROLLINGFILE=org.apache.log4j.RollingFileAppender
37 | log4j.appender.ROLLINGFILE.Threshold=${zookeeper.log.threshold}
38 | log4j.appender.ROLLINGFILE.File=${zookeeper.log.dir}/${zookeeper.log.file}
39 | 
40 | # Max log file size of 10MB
41 | log4j.appender.ROLLINGFILE.MaxFileSize=10MB
42 | # uncomment the next line to limit number of backup files
43 | #log4j.appender.ROLLINGFILE.MaxBackupIndex=10
44 | 
45 | log4j.appender.ROLLINGFILE.layout=org.apache.log4j.PatternLayout
46 | log4j.appender.ROLLINGFILE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n
47 | 
48 | 
49 | #
50 | # Add TRACEFILE to rootLogger to get log file output
51 | #    Log DEBUG level and above messages to a log file
52 | log4j.appender.TRACEFILE=org.apache.log4j.FileAppender
53 | log4j.appender.TRACEFILE.Threshold=TRACE
54 | log4j.appender.TRACEFILE.File=${zookeeper.tracelog.dir}/${zookeeper.tracelog.file}
55 | 
56 | log4j.appender.TRACEFILE.layout=org.apache.log4j.PatternLayout
57 | ### Notice we are including log4j's NDC here (%x)
58 | log4j.appender.TRACEFILE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L][%x] - %m%n


--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/pcap/topology.conf:
--------------------------------------------------------------------------------
 1 | include = ../../etc/env/environment_common.conf
 2 | include = ../../etc/env/es_connection.conf
 3 | include = ../../etc/env/hdfs_connection.conf
 4 | include = ../../etc/env/mysql_connection.conf
 5 | include = metrics.conf
 6 | include = features_enabled.conf
 7 | 
 8 | #Global Properties
 9 | 
10 | debug.mode=true
11 | local.mode=true
12 | num.workers=1
13 | num.ackers=1
14 | 
15 | #Standard 5-tuple fields
16 | 
17 | source.ip=ip_src_addr
18 | source.port=ip_src_port
19 | dest.ip=ip_dst_addr
20 | dest.port=ip_dst_port
21 | protocol=protocol
22 | 
23 | #Kafka Spout
24 | spout.kafka.buffer.size.bytes=1024000
25 | spout.kafka.consumer.id=pcap.kafka
26 | spout.kafka.fetch.size.bytes=1024
27 | spout.kafka.forcefromstart=false
28 | spout.kafka.socket.timeout.ms=600000
29 | spout.kafka.start.offset.time=-1
30 | spout.kafka.zk.root=/storm/topology/pcap/kafka
31 | spout.kafka.topic=pcap_raw
32 | 
33 | #Parser Bolt
34 | bolt.parser.enabled=true
35 | bolt.parser.num.of.key.chars.to.use.for.shuffle.grouping=6
36 | bolt.parser.ts.precision=MICRO
37 | 
38 | #Test Spout
39 | spout.test.parallelism.repeat=false
40 | 
41 | #Kafka Spout
42 | spout.kafka.topic=pcap_raw
43 | 
44 | #Indexing Bolt
45 | bolt.indexing.indexname=pcap
46 | bolt.indexing.timestamp=yyyy.MM.dd.HH
47 | bolt.indexing.documentname=pcap_doc
48 | bolt.indexing.bulk=1
49 | bolt.indexing.adapter=com.opensoc.indexing.adapters.ESTimedRotatingAdapter
50 | 
51 | #Error Indexing Bolt
52 | bolt.error.indexing.indexname=error
53 | bolt.error.indexing.timestamp=yyyy.MM
54 | bolt.error.indexing.documentname=pcap_error
55 | bolt.error.indexing.bulk=1
56 | bolt.error.indexing.adapter=com.opensoc.indexing.adapters.ESTimedRotatingAdapter
57 | 
58 | #HDFS Bolt
59 | bolt.hdfs.batch.size=5000
60 | bolt.hdfs.field.delimiter=|
61 | bolt.hdfs.file.rotation.size.in.mb=5
62 | bolt.hdfs.file.system.url=hdfs://node1:9000
63 | bolt.hdfs.wip.file.path=/pcap/wip
64 | bolt.hdfs.finished.file.path=/pcap/rotated
65 | bolt.hdfs.compression.codec.class=org.apache.hadoop.io.compress.SnappyCodec
66 | 
67 | #HBase Bolt
68 | bolt.hbase.table.name=pcap
69 | ## Define the hbase table columns in the form <cf1>:<cq11>,<cq12>,<cq13>|<cf2>:<cq21>,<cq22>|.......
70 | bolt.hbase.table.fields=t:pcap
71 | bolt.hbase.table.key.tuple.field.name=pcap_id
72 | bolt.hbase.table.timestamp.tuple.field.name=timestamp
73 | bolt.hbase.enable.batching=false
74 | bolt.hbase.write.buffer.size.in.bytes=2000000
75 | bolt.hbase.durability=SKIP_WAL
76 | bolt.hbase.partitioner.region.info.refresh.interval.mins=60
77 | 
78 | 


--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/sourcefire/topology.conf:
--------------------------------------------------------------------------------
 1 | include = ../../etc/env/environment_common.conf
 2 | include = ../../etc/env/es_connection.conf
 3 | include = ../../etc/env/hdfs_connection.conf
 4 | include = ../../etc/env/mysql_connection.conf
 5 | include = metrics.conf
 6 | include = features_enabled.conf
 7 | 
 8 | #Global Properties
 9 | 
10 | debug.mode=true
11 | local.mode=true
12 | num.workers=1
13 | num.ackers=1
14 | #Standard 5-tuple fields
15 | 
16 | source.ip=ip_src_addr
17 | source.port=ip_src_port
18 | dest.ip=ip_dst_addr
19 | dest.port=ip_dst_port
20 | protocol=protocol
21 | 
22 | #Test Spout
23 | spout.test.parallelism.repeat=false
24 | 
25 | #Kafka Spout
26 | spout.kafka.topic=sourcefire_raw
27 | 
28 | #Parser Bolt
29 | bolt.parser.adapter=com.opensoc.parsing.parsers.BasicSourcefireParser
30 | 
31 | #GeoEnrichment
32 | 
33 | bolt.enrichment.geo.enrichment_tag=geo
34 | bolt.enrichment.geo.adapter.table=GEO
35 | bolt.enrichment.geo.MAX_CACHE_SIZE_OBJECTS_NUM=100
36 | bolt.enrichment.geo.MAX_TIME_RETAIN_MINUTES=10
37 | bolt.enrichment.geo.fields=ip_src_addr,ip_dst_addr
38 | 
39 | #Indexing Bolt
40 | bolt.indexing.indexname=sourcefire_index
41 | bolt.indexing.timestamp=yyyy.MM.dd
42 | bolt.indexing.documentname=sourcefire_doc
43 | bolt.indexing.bulk=1
44 | bolt.indexing.adapter=com.opensoc.indexing.adapters.ESTimedRotatingAdapter
45 | 
46 | #Alerts Indexing Bolt
47 | bolt.alerts.indexing.indexname=alert
48 | bolt.alerts.indexing.timestamp=yyyy.MM.dd
49 | bolt.alerts.indexing.documentname=sourcefire_alert
50 | bolt.alerts.indexing.bulk=1
51 | bolt.alerts.indexing.adapter=com.opensoc.indexing.adapters.ESTimedRotatingAdapter
52 | 
53 | #Error Indexing Bolt
54 | bolt.error.indexing.indexname=error
55 | bolt.error.indexing.timestamp=yyyy.MM
56 | bolt.error.indexing.documentname=sourcefire_error
57 | bolt.error.indexing.bulk=1
58 | bolt.error.indexing.adapter=com.opensoc.indexing.adapters.ESTimedRotatingAdapter
59 | 
60 | #Alerts Bolt
61 | bolt.alerts.adapter=com.opensoc.alerts.adapters.AllAlertAdapter
62 | com.opensoc.alerts.adapters.AllAlertAdapter.whitelist_table_name = ip_whitelist
63 | com.opensoc.alerts.adapters.AllAlertAdapter.blacklist_table_name = ip_blacklist
64 | com.opensoc.alerts.adapters.AllAlertAdapter.quorum=node2,node3,node4
65 | com.opensoc.alerts.adapters.AllAlertAdapter.port=2181
66 | com.opensoc.alerts.adapters.AllAlertAdapter._MAX_CACHE_SIZE_OBJECTS_NUM=25
67 | com.opensoc.alerts.adapters.AllAlertAdapter._MAX_TIME_RETAIN_MINUTES=10
68 | 
69 | #HDFS Bolt
70 | bolt.hdfs.batch.size=5000
71 | bolt.hdfs.field.delimiter=|
72 | bolt.hdfs.file.rotation.size.in.mb=5
73 | bolt.hdfs.file.system.url=hdfs://node1:9000
74 | bolt.hdfs.wip.file.path=/sourcefire/wip
75 | bolt.hdfs.finished.file.path=/sourcefire/rotated
76 | bolt.hdfs.compression.codec.class=org.apache.hadoop.io.compress.SnappyCodec
77 | 
78 | #Kafka Bolt
79 | bolt.kafka.topic=sourcefire_enriched
80 | 


--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/bro/features_enabled.conf:
--------------------------------------------------------------------------------
  1 | #Enable and disable features for each topology
  2 | 
  3 | #Feature: Test spout 
  4 | ##Feature Description: Reads telemetry from file and ingests it into topology.  Used for testing or bulk loading the topology
  5 | 
  6 | spout.test.name=TestSpout
  7 | spout.test.enabled=false
  8 | spout.test.num.tasks=1
  9 | spout.test.parallelism.hint=1
 10 | 
 11 | #Feature: Kafka spout
 12 | ##Feature Description: Acts as a Kafka consumer.  Takes messages from a Kafka topic and ingests them into a topology
 13 | 
 14 | spout.kafka.name=KafkaSpout
 15 | spout.kafka.enabled=true
 16 | spout.kafka.num.tasks=1
 17 | spout.kafka.parallelism.hint=1
 18 | 
 19 | #Feature: Parser Bolt
 20 | ##Feature Description: Parses telemetry from its native format into a native JSON
 21 | 
 22 | parser.bolt.name=ParserBolt
 23 | bolt.parser.name=ParserBolt
 24 | bolt.parser.enabled=true
 25 | bolt.parser.num.tasks=1
 26 | bolt.parser.parallelism.hint=1
 27 | 
 28 | #Feature: Host Enrichment
 29 | ##Feature Description: Appends information about known hosts to a telemetry message
 30 | 
 31 | bolt.enrichment.host.name=HostEnrichment
 32 | bolt.enrichment.host.enabled=false
 33 | bolt.enrichment.host.num.tasks=1
 34 | bolt.enrichment.host.parallelism.hint=1
 35 | 
 36 | #Feature: Geo Enrichment
 37 | ##Feature Description: Appends geo information about known non-local IPs to a telemetry message
 38 | 
 39 | bolt.enrichment.geo.name=GeoEnrichment 
 40 | bolt.enrichment.geo.enabled=true
 41 | bolt.enrichment.geo.num.tasks=1
 42 | bolt.enrichment.geo.parallelism.hint=1
 43 | 
 44 | #Feature: Whois Enrichment
 45 | ##Feature Description: Appends whois information about known domains to a telemetry message
 46 | 
 47 | bolt.enrichment.whois.name=WhoisEnrichment
 48 | bolt.enrichment.whois.enabled=false
 49 | bolt.enrichment.whois.num.tasks=1
 50 | bolt.enrichment.whois.parallelism.hint=1
 51 | 
 52 | #Feature: CIF Enrichment
 53 | ##Feature Description: Appends information from CIF threat intelligence feeds to a telemetry message
 54 | 
 55 | bolt.enrichment.cif.name=CIFBolt
 56 | bolt.enrichment.cif.enabled=false
 57 | bolt.enrichment.cif.num.tasks=1
 58 | bolt.enrichment.cif.parallelism.hint=1
 59 | 
 60 | #Feature: Threat Enrichment
 61 | ##Feature Description: Appends information from Threat intelligence feeds to a telemetry message
 62 | 
 63 | bolt.enrichment.threat.name=ThreatBolt
 64 | bolt.enrichment.threat.enabled=false
 65 | bolt.enrichment.threat.num.tasks=1
 66 | bolt.enrichment.threat.parallelism.hint=1
 67 | 
 68 | #Feature: Rules-Based Alerts
 69 | ##Feature Description: Tags messages with rules-based alerts
 70 | 
 71 | bolt.alerts.name=Alerts
 72 | bolt.alerts.enabled=false
 73 | bolt.alerts.num.tasks=1
 74 | bolt.alerts.parallelism.hint=1
 75 | 
 76 | #Feature: Indexer
 77 | ##Feature Description: Indexes telemetry messages in ElasticSearch or Solr
 78 | 
 79 | bolt.indexing.name=IndexBolt
 80 | bolt.indexing.enabled=true
 81 | bolt.indexing.num.tasks=1
 82 | bolt.indexing.parallelism.hint=1
 83 | 
 84 | #Feature: Alerts Indexer
 85 | ##Feature Description: Indexes alert messages in ElasticSearch or Solr
 86 | 
 87 | bolt.alerts.indexing.name=AlertIndexBolt
 88 | bolt.alerts.indexing.enabled=false
 89 | bolt.alerts.indexing.num.tasks=1
 90 | bolt.alerts.indexing.parallelism.hint=1
 91 | 
 92 | #Feature: Error Indexer
 93 | ##Feature Description: Indexes error messages in ElasticSearch or Solr
 94 | 
 95 | bolt.error.indexing.name=ErrorIndexBolt
 96 | bolt.error.indexing.enabled=true
 97 | bolt.error.indexing.num.tasks=1
 98 | bolt.error.indexing.parallelism.hint=1
 99 | 
100 | #Feature: Kafka Bolt
101 | ##Feature Description: Writes telemetry messages back into a Kafka topic
102 | 
103 | bolt.kafka.name=KafkaBolt
104 | bolt.kafka.enabled=false
105 | bolt.kafka.num.tasks=1
106 | bolt.kafka.parallelism.hint=1
107 | 
108 | #Feature: HDFS Bolt
109 | ##Feature Description: Writes telemetry messages into HDFS
110 | 
111 | bolt.hdfs.name=HDFSBolt
112 | bolt.hdfs.enabled=false
113 | bolt.hdfs.num.tasks=1
114 | bolt.hdfs.parallelism.hint=1
115 | 


--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/sourcefire/features_enabled.conf:
--------------------------------------------------------------------------------
  1 | #Enable and disable features for each topology
  2 | 
  3 | #Feature: Test spout 
  4 | ##Feature Description: Reads telemetry from file and ingests it into topology.  Used for testing or bulk loading the topology
  5 | 
  6 | spout.test.name=TestSpout
  7 | spout.test.enabled=false
  8 | spout.test.num.tasks=1
  9 | spout.test.parallelism.hint=1
 10 | 
 11 | #Feature: Kafka spout
 12 | ##Feature Description: Acts as a Kafka consumer.  Takes messages from a Kafka topic and ingests them into a topology
 13 | 
 14 | spout.kafka.name=KafkaSpout
 15 | spout.kafka.enabled=true
 16 | spout.kafka.num.tasks=1
 17 | spout.kafka.parallelism.hint=1
 18 | 
 19 | #Feature: Parser Bolt
 20 | ##Feature Description: Parses telemetry from its native format into a native JSON
 21 | 
 22 | parser.bolt.name=ParserBolt
 23 | bolt.parser.name=ParserBolt
 24 | bolt.parser.enabled=true
 25 | bolt.parser.num.tasks=1
 26 | bolt.parser.parallelism.hint=1
 27 | 
 28 | #Feature: Host Enrichment
 29 | ##Feature Description: Appends information about known hosts to a telemetry message
 30 | 
 31 | bolt.enrichment.host.name=HostEnrichment
 32 | bolt.enrichment.host.enabled=false
 33 | bolt.enrichment.host.num.tasks=1
 34 | bolt.enrichment.host.parallelism.hint=1
 35 | 
 36 | #Feature: Geo Enrichment
 37 | ##Feature Description: Appends geo information about known non-local IPs to a telemetry message
 38 | 
 39 | bolt.enrichment.geo.name=GeoEnrichment 
 40 | bolt.enrichment.geo.enabled=true
 41 | bolt.enrichment.geo.num.tasks=1
 42 | bolt.enrichment.geo.parallelism.hint=1
 43 | 
 44 | #Feature: Whois Enrichment
 45 | ##Feature Description: Appends whois information about known domains to a telemetry message
 46 | 
 47 | bolt.enrichment.whois.name=WhoisEnrichment
 48 | bolt.enrichment.whois.enabled=false
 49 | bolt.enrichment.whois.num.tasks=1
 50 | bolt.enrichment.whois.parallelism.hint=1
 51 | 
 52 | #Feature: CIF Enrichment
 53 | ##Feature Description: Appends information from CIF threat intelligence feeds to a telemetry message
 54 | 
 55 | bolt.enrichment.cif.name=CIFBolt
 56 | bolt.enrichment.cif.enabled=false
 57 | bolt.enrichment.cif.num.tasks=1
 58 | bolt.enrichment.cif.parallelism.hint=1
 59 | 
 60 | #Feature: Threat Enrichment
 61 | ##Feature Description: Appends information from Threat intelligence feeds to a telemetry message
 62 | 
 63 | bolt.enrichment.threat.name=ThreatBolt
 64 | bolt.enrichment.threat.enabled=false
 65 | bolt.enrichment.threat.num.tasks=1
 66 | bolt.enrichment.threat.parallelism.hint=1
 67 | 
 68 | #Feature: Rules-Based Alerts
 69 | ##Feature Description: Tags messages with rules-based alerts
 70 | 
 71 | bolt.alerts.name=Alerts
 72 | bolt.alerts.enabled=true
 73 | bolt.alerts.num.tasks=1
 74 | bolt.alerts.parallelism.hint=1
 75 | 
 76 | #Feature: Indexer
 77 | ##Feature Description: Indexes telemetry messages in ElasticSearch or Solr
 78 | 
 79 | bolt.indexing.name=IndexBolt
 80 | bolt.indexing.enabled=true
 81 | bolt.indexing.num.tasks=1
 82 | bolt.indexing.parallelism.hint=1
 83 | 
 84 | #Feature: Alerts Indexer
 85 | ##Feature Description: Indexes alert messages in ElasticSearch or Solr
 86 | 
 87 | bolt.alerts.indexing.name=AlertIndexBolt
 88 | bolt.alerts.indexing.enabled=true
 89 | bolt.alerts.indexing.num.tasks=1
 90 | bolt.alerts.indexing.parallelism.hint=1
 91 | 
 92 | #Feature: Error Indexer
 93 | ##Feature Description: Indexes error messages in ElasticSearch or Solr
 94 | 
 95 | bolt.error.indexing.name=ErrorIndexBolt
 96 | bolt.error.indexing.enabled=true
 97 | bolt.error.indexing.num.tasks=1
 98 | bolt.error.indexing.parallelism.hint=1
 99 | 
100 | #Feature: Kafka Bolt
101 | ##Feature Description: Writes telemetry messages back into a Kafka topic
102 | 
103 | bolt.kafka.name=KafkaBolt
104 | bolt.kafka.enabled=false
105 | bolt.kafka.num.tasks=1
106 | bolt.kafka.parallelism.hint=1
107 | 
108 | #Feature: HDFS Bolt
109 | ##Feature Description: Writes telemetry messages into HDFS
110 | 
111 | bolt.hdfs.name=HDFSBolt
112 | bolt.hdfs.enabled=false
113 | bolt.hdfs.num.tasks=1
114 | bolt.hdfs.parallelism.hint=1
115 | 


--------------------------------------------------------------------------------
/Vagrantfile:
--------------------------------------------------------------------------------
  1 | Vagrant.require_version ">= 1.4.3"
  2 | VAGRANTFILE_API_VERSION = "2"
  3 | 
  4 | Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
  5 |     numNodes = 4
  6 |     r = numNodes..1
  7 |     (r.first).downto(r.last).each do |i|
  8 |         config.vm.define "node#{i}" do |node|
  9 |             node.vm.box = "chef/centos-6.5"
 10 |             node.vm.provider "virtualbox" do |v|
 11 |               v.name = "node#{i}"
 12 |               v.customize ["modifyvm", :id, "--memory", "1024"]
 13 |             end
 14 |             node.vm.network :private_network, ip: "10.0.0.10#{i}"
 15 | 
 16 |             # base setup
 17 |             node.vm.hostname = "node#{i}"
 18 |             
 19 |             node.vm.provision "shell" do |s|
 20 |                 s.path = "scripts/setup-os.sh"
 21 |                 s.args = "-t #{numNodes}"
 22 |             end
 23 | 
 24 |             node.vm.provision "shell", path: "scripts/setup-java.sh"
 25 | 
 26 |             if i == 1
 27 |                 # namenode
 28 |                 node.vm.provision "shell" do |s|
 29 |                     s.path = "scripts/setup-hadoop.sh"
 30 |                     s.args = "-r namenode -t #{numNodes}"
 31 |                 end
 32 |                 node.vm.network "forwarded_port", guest: 50070, host: 50070
 33 |                 node.vm.network "forwarded_port", guest: 8088, host:8088
 34 | 
 35 |                 # storm nimbus
 36 |                 node.vm.provision "shell" do |s|
 37 |                     s.path = "scripts/setup-storm.sh"
 38 |                     s.args = "-r nimbus -t #{numNodes}"
 39 |                 end
 40 |                 node.vm.network "forwarded_port", guest: 8080, host: 8080
 41 | 
 42 |                 # hbase master
 43 |                 node.vm.provision "shell" do |s|
 44 |                     s.path = "scripts/setup-hbase.sh"
 45 |                     s.args = "-r master -t #{numNodes}"
 46 |                 end
 47 |                 node.vm.network "forwarded_port", guest: 60010, host: 60010
 48 | 
 49 |                 # hive
 50 |                 node.vm.provision "shell" do |s|
 51 |                     s.path = "scripts/setup-hive.sh"
 52 |                 end
 53 | 
 54 |                 node.vm.provision "shell" do |s|
 55 |                     s.path = "scripts/setup-elasticsearch.sh"
 56 |                     s.args = "-c -i 10.0.0.10#{i}"
 57 |                 end
 58 |                 node.vm.network "forwarded_port", guest: 9200, host:9200
 59 | 
 60 |                 # setup mysql for geo enrichment
 61 |                 node.vm.provision "shell", path: "scripts/setup-geo-enrichment.sh"
 62 |             else
 63 |                 # zookeeper
 64 |                 node.vm.provision "shell" do |s|
 65 |                     s.path = "scripts/setup-zookeeper.sh"
 66 |                     s.args = "-t #{numNodes}"
 67 |                 end
 68 |                 # datanode
 69 |                 node.vm.provision "shell" do |s|
 70 |                     s.path = "scripts/setup-hadoop.sh"
 71 |                     s.args = "-r datanode -t #{numNodes}"
 72 |                 end
 73 |                 # hbase regionserver
 74 |                 node.vm.provision "shell" do |s|
 75 |                     s.path = "scripts/setup-hbase.sh"
 76 |                     s.args = "-r regionserver -t #{numNodes}"
 77 |                 end
 78 |                 # kafka broker
 79 |                 node.vm.provision "shell" do |s|
 80 |                     s.path = "scripts/setup-kafka.sh"
 81 |                     s.args = "-t #{numNodes}"
 82 |                 end
 83 |                 # storm supervisor
 84 |                 node.vm.provision "shell" do |s|
 85 |                     s.path = "scripts/setup-storm.sh"
 86 |                     s.args = "-r supervisor -t #{numNodes}"
 87 |                 end
 88 |                 # elasticsearch
 89 |                 node.vm.provision "shell" do |s|
 90 |                     s.path = "scripts/setup-elasticsearch.sh"
 91 |                     s.args = "-i 10.0.0.10#{i}"
 92 |                 end
 93 |                 # reload supervisord
 94 |             end
 95 | 
 96 |             #After everything is provisioned, start Supervisor
 97 |             node.vm.provision "shell", inline: "pgrep supervisord || start supervisor"
 98 |         end
 99 |     end
100 | end
101 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # OpenSOC Vagrant
  2 | 
  3 | A collection of shell scripts and a Vagrant file for building an OpenSOC cluster. There are two primary goals we hope to solve with this project:
  4 | 
  5 | * Create a turnkey OpenSOC cluster to allow users to play with OpenSOC with minimal setup
  6 | * Provide a disposable environment where developers can run and test OpenSOC topologies.
  7 | 
  8 | To accomplish this, we have provided a collection of bash scripts that are orchestrated using [Vagrant](https://www.vagrantup.com/) and [Fabric](http://www.fabfile.org/). Both of these tools should be installed prior to using this project. 
  9 | 
 10 | ## Inspiration
 11 | 
 12 | Credit to https://github.com/vangj/vagrant-hadoop-2.4.1-spark-1.0.1 for the inspiration for this. This project is heavily influenced by that one.
 13 | 
 14 | ## Quick Start
 15 | 
 16 | If you don't want to bother with the details of the cluster, and just want to see OpenSOC, place a RPM For Oracle's JVM in `resources/` and edit `common.sh` to set `JRE_RPM` to the name of the RPM. Then run:
 17 | 
 18 | ```
 19 | vagrant up
 20 | fab vagrant quickstart
 21 | ```
 22 | 
 23 | Finally, point your browser at https://localhost:8443
 24 | 
 25 | This should get you a running OpenSOC cluster with Bro, Snort, and PCAP. If you are looking to customize the setup or run your own topologies, see the secions below on running the cluster and running an OpenSOC Topology.
 26 | 
 27 | ## Advanced Setup
 28 | 
 29 | If you are interested in tweaking the underlying cluster, running your own OpenSOC topology, or just want to understand how it all works, this section will break down how the cluster is started, and now topoogies can be run.
 30 | 
 31 | ## Running the cluster
 32 | 
 33 | To get the cluster up and running, do the following:
 34 | 
 35 | * Place an RPM for Oracle's JVM in `resources/` and edit `common.sh` to set `JRE_RPM` to the name of the RPM
 36 | * Run `vagrant up`
 37 | * Run `fab vagrant postsetup`
 38 | 
 39 | The `vagrant up` command will build the VMs for the cluster, and install all dependencies which include:
 40 | 
 41 | * Hadoop 2.6
 42 | * Hbase 0.98
 43 | * Kafka 0.8.1.1
 44 | * Zookeeper 3.4.6
 45 | * Hive 1.2.0
 46 | * Elasticsearch 1.5.2
 47 | * Storm 0.9.4
 48 | 
 49 | After this, the `fab vagrant postsetup` command will run a handful of tasks that need to occur after the cluster is running, but before it can be used. These are:
 50 | 
 51 | * Formatting HDFS
 52 | * Starting Hadoop cluster
 53 | * Starting HBase cluster
 54 | * Setup Hbase whitelist table with RFC1918 addresses
 55 | 
 56 | ## Running an OpenSOC Topology
 57 | 
 58 | After provisioning the cluster as described above, you can use some more fabric tasks to run a topology. Before you start, you should have the following:
 59 | 
 60 | * opensoc-streaming repo cloned locally
 61 | * a copy of OpenSOC configs in resources/opensoc/OpenSOC_Configs
 62 | 
 63 | Then you can run `fab vagrant start_topology:<topology_name>` which will do the following:
 64 | 
 65 | * cd into the opensoc-streaming repo, and run `mvn clean package`
 66 | * copy the newly built OpenSOC-Topologies.jar to resources/opensoc, where it will be avilable to the VMs
 67 | * Submit `<topology_name>` and the topology jar to Nimbus
 68 | 
 69 | If your topology is pulling data from Kafka, you can create a topic with the fabric task `fab vagrant create_topic:<topic>`
 70 | 
 71 | ## Virtual Machines
 72 | 
 73 | By default, 4 VMs will be created. They are named node1, node2, node3, and node4. Here is a breakdown of what services run where:
 74 | 
 75 | * node1
 76 |   * HDFS Namenode
 77 |   * Yarn Resourcemanager
 78 |   * Storm Nimbus and UI
 79 |   * HBase Master
 80 |   * Elasticsearch Master
 81 |   * MySql (Hive metastore and geo enrichment store)
 82 | 
 83 | * node2-4
 84 |   * Kafka Broker
 85 |   * Zookeeper
 86 |   * HDFS Datanode
 87 |   * YARN Nodemanager
 88 |   * Storm Supervisor
 89 |   * HBase Regionserver
 90 |   * Elasticsearch Data Nodes
 91 | 
 92 | ## Port Forwarding
 93 | 
 94 | Some service's UIs are forwarded to localhost for ease of use. You can find the following services forwarded by default:
 95 | 
 96 | * HDFS - localhost:50070 -> node1:50070
 97 | * Hbase - localhost:60010 -> node1:60010
 98 | * Storm UI - localhost:8080 -> node1:8080
 99 | * Elasticsearch - localhost:9200 -> node1:9200
100 | * OpenSOC-UI - localhost:8443 -> node1:443
101 | 
102 | ## Progress
103 | 
104 | Here is a list of what will be provisioned via vagrant and its current status:
105 | 
106 | * Java - DONE
107 | * Zookeeper - DONE
108 | * HDFS/Yarn - DONE
109 | * Kafka - DONE 
110 | * Storm - DONE
111 | * Hbase - DONE
112 | * Hive - DONE
113 | * Elasticsearch - DONE
114 | * GeoIP Enrichment Data - DONE
115 | * OpenSOC UI
116 | * OpenSOC Storm Topologies
117 | 
118 | 


--------------------------------------------------------------------------------
/resources/kafka/server.properties:
--------------------------------------------------------------------------------
  1 | # Licensed to the Apache Software Foundation (ASF) under one or more
  2 | # contributor license agreements.  See the NOTICE file distributed with
  3 | # this work for additional information regarding copyright ownership.
  4 | # The ASF licenses this file to You under the Apache License, Version 2.0
  5 | # (the "License"); you may not use this file except in compliance with
  6 | # the License.  You may obtain a copy of the License at
  7 | # 
  8 | #    http://www.apache.org/licenses/LICENSE-2.0
  9 | # 
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # see kafka.server.KafkaConfig for additional details and defaults
 16 | 
 17 | ############################# Server Basics #############################
 18 | 
 19 | # The id of the broker. This must be set to a unique integer for each broker.
 20 | #broker.id=0
 21 | 
 22 | ############################# Socket Server Settings #############################
 23 | 
 24 | # The port the socket server listens on
 25 | port=9092
 26 | 
 27 | # Hostname the broker will bind to. If not set, the server will bind to all interfaces
 28 | #host.name=localhost
 29 | 
 30 | # Hostname the broker will advertise to producers and consumers. If not set, it uses the
 31 | # value for "host.name" if configured.  Otherwise, it will use the value returned from
 32 | # java.net.InetAddress.getCanonicalHostName().
 33 | #advertised.host.name=<hostname routable by clients>
 34 | 
 35 | # The port to publish to ZooKeeper for clients to use. If this is not set,
 36 | # it will publish the same port that the broker binds to.
 37 | #advertised.port=<port accessible by clients>
 38 | 
 39 | # The number of threads handling network requests
 40 | num.network.threads=2
 41 |  
 42 | # The number of threads doing disk I/O
 43 | num.io.threads=8
 44 | 
 45 | # The send buffer (SO_SNDBUF) used by the socket server
 46 | socket.send.buffer.bytes=1048576
 47 | 
 48 | # The receive buffer (SO_RCVBUF) used by the socket server
 49 | socket.receive.buffer.bytes=1048576
 50 | 
 51 | # The maximum size of a request that the socket server will accept (protection against OOM)
 52 | socket.request.max.bytes=104857600
 53 | 
 54 | 
 55 | ############################# Log Basics #############################
 56 | 
 57 | # A comma seperated list of directories under which to store log files
 58 | log.dirs=/var/lib/kafka-logs
 59 | 
 60 | # The default number of log partitions per topic. More partitions allow greater
 61 | # parallelism for consumption, but this will also result in more files across
 62 | # the brokers.
 63 | num.partitions=1
 64 | 
 65 | ############################# Log Flush Policy #############################
 66 | 
 67 | # Messages are immediately written to the filesystem but by default we only fsync() to sync
 68 | # the OS cache lazily. The following configurations control the flush of data to disk. 
 69 | # There are a few important trade-offs here:
 70 | #    1. Durability: Unflushed data may be lost if you are not using replication.
 71 | #    2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush.
 72 | #    3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. 
 73 | # The settings below allow one to configure the flush policy to flush data after a period of time or
 74 | # every N messages (or both). This can be done globally and overridden on a per-topic basis.
 75 | 
 76 | # The number of messages to accept before forcing a flush of data to disk
 77 | #log.flush.interval.messages=10000
 78 | 
 79 | # The maximum amount of time a message can sit in a log before we force a flush
 80 | #log.flush.interval.ms=1000
 81 | 
 82 | ############################# Log Retention Policy #############################
 83 | 
 84 | # The following configurations control the disposal of log segments. The policy can
 85 | # be set to delete segments after a period of time, or after a given size has accumulated.
 86 | # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
 87 | # from the end of the log.
 88 | 
 89 | # The minimum age of a log file to be eligible for deletion
 90 | log.retention.hours=168
 91 | 
 92 | # A size-based retention policy for logs. Segments are pruned from the log as long as the remaining
 93 | # segments don't drop below log.retention.bytes.
 94 | #log.retention.bytes=1073741824
 95 | 
 96 | # The maximum size of a log segment file. When this size is reached a new log segment will be created.
 97 | log.segment.bytes=536870912
 98 | 
 99 | # The interval at which log segments are checked to see if they can be deleted according 
100 | # to the retention policies
101 | log.retention.check.interval.ms=60000
102 | 
103 | # By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires.
104 | # If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction.
105 | log.cleaner.enable=false
106 | 
107 | ############################# Zookeeper #############################
108 | 
109 | # Zookeeper connection string (see zookeeper docs for details).
110 | # This is a comma separated host:port pairs, each corresponding to a zk
111 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
112 | # You can also append an optional chroot string to the urls to specify the
113 | # root directory for all kafka znodes.
114 | #zookeeper.connect=localhost:2181
115 | 
116 | # Timeout in ms for connecting to zookeeper
117 | zookeeper.connection.timeout.ms=1000000
118 | 


--------------------------------------------------------------------------------
/fabfile.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import os
  3 | import xml.etree.ElementTree as ETree
  4 | 
  5 | from fabric.api import env, local, run, sudo, execute, hosts
  6 | from fabric.context_managers import shell_env, lcd, cd
  7 | from fabric.colors import yellow, green
  8 | 
  9 | # configure fabric to talk to the VMs
 10 | temp_ssh_config = '.ssh_config'
 11 | 
 12 | def vagrant():
 13 |     '''sets up fabric environment to work with vagrant VMs'''
 14 |     with open(temp_ssh_config, 'w') as f:
 15 |         f.write(local('vagrant ssh-config', capture=True))
 16 | 
 17 |     global total_nodes 
 18 |     total_nodes = int(local('vagrant status | grep node | wc -l', capture=True))
 19 | 
 20 |     env.user = 'vagrant'
 21 |     env.use_ssh_config = True
 22 |     env.ssh_config_path = temp_ssh_config
 23 | 
 24 | @hosts('node1')
 25 | def format_namenode():
 26 |     '''Formats namenode on node1'''
 27 |     with shell_env(JAVA_HOME='/usr/java/default'):
 28 |         sudo('/opt/hadoop/bin/hdfs namenode -format vagrant -nonInteractive', warn_only=True)
 29 | 
 30 | 
 31 | def supervisorctl_start(process):
 32 |     '''Start a process managed by supervisor'''
 33 |     sudo('supervisorctl start {0}'.format(process))
 34 | 
 35 | def supervisorctl_stop(process):
 36 |     '''Stop a process managed by supervisor'''
 37 |     sudo('supervisorctl stop {0}'.format(process))
 38 | 
 39 | 
 40 | def postsetup():
 41 |     '''Perform post vagrant up tasks on cluster'''
 42 |     execute(format_namenode)
 43 |     execute(supervisorctl_start, 'namenode', host='node1')
 44 |     execute(supervisorctl_start, 'resourcemanager', host='node1')
 45 |     execute(supervisorctl_start, 'master', host='node1')
 46 |     for x in range(2,total_nodes+1):
 47 |         execute(supervisorctl_start, 'datanode', host='node{0}'.format(x))
 48 |         execute(supervisorctl_start, 'nodemanager', host='node{0}'.format(x))
 49 |         execute(supervisorctl_start, 'regionserver', host='node{0}'.format(x))
 50 | 
 51 |     execute(init_ip_whitelist,host='node1')
 52 | 
 53 | def supervisorctl_reread_update():
 54 |     sudo('supervisorctl reread')
 55 |     sudo('supervisorctl update')
 56 | 
 57 | def update_supervisor():
 58 |     execute(supervisorctl_reread_update, hosts=['node{0}'.format(x) for x in range(1,total_nodes+1)])
 59 | 
 60 | def supervisorctl_status():
 61 |     sudo('supervisorctl status')
 62 | 
 63 | def status():
 64 |     execute(supervisorctl_status, hosts=['node{0}'.format(x) for x in range(1,total_nodes+1)])
 65 | 
 66 | def init_ip_whitelist():
 67 |     run('/opt/hbase/bin/hbase shell /vagrant/resources/opensoc/hbase_ip_whitelist.rb')
 68 | 
 69 | 
 70 | @hosts('node2')
 71 | def create_topic(topic, partitions=1, replication_factor=1):
 72 |     run('/opt/kafka/bin/kafka-topics.sh --zookeeper localhost --create --topic {0} --partitions {1} --replication-factor {2}'.format(
 73 |         topic,
 74 |         partitions,
 75 |         replication_factor
 76 |         ))
 77 | 
 78 | def get_topologies(repo='../opensoc-streaming'):
 79 |     '''Build and fetch a new OpenSOC topology jar from repo (default: ../opensoc-streaming)'''
 80 | 
 81 |     pom_file = os.path.join(repo, 'pom.xml')
 82 |     pom = ETree.parse(pom_file)
 83 |     version = pom.getroot().find('{http://maven.apache.org/POM/4.0.0}version').text
 84 |     rev = local("git log | head -1 | cut -d ' ' -f 2 | cut -c1-11", capture=True)
 85 | 
 86 |     topology_jar = os.path.join(
 87 |         repo,
 88 |         'OpenSOC-Topologies',
 89 |         'target',
 90 |         'OpenSOC-Topologies-{0}.jar'.format(version)
 91 |         )
 92 | 
 93 |     vagrant_jar = 'OpenSOC-Topologies-{0}-{1}.jar'.format(version, rev)
 94 |     vagrant_jar_path = os.path.join('resources/opensoc', vagrant_jar)
 95 |     
 96 |     if os.path.exists(vagrant_jar_path):
 97 |         print yellow('{0} already exists. Not building a new jar.'.format(vagrant_jar_path))
 98 |         print yellow('Remove the existing jar and run this command again to build a fresh jar.')
 99 |         return vagrant_jar
100 | 
101 |     with lcd(repo):
102 |         local('mvn clean package')
103 | 
104 |     local('cp {0} {1}'.format(
105 |         topology_jar,
106 |         vagrant_jar_path
107 |         ))
108 | 
109 |     return vagrant_jar
110 |     
111 | @hosts('node1')
112 | def start_topology(topology, repo=None, local_mode=False, config_path='/vagrant/opensoc/OpenSOC_Configs/', generator_spout=False):
113 |     '''Builds and copies a fresh topology jar from a locally cloned opensoc-streaming and submits it to storm'''
114 | 
115 |     if repo is not None:
116 |         jar = get_topologies(repo)
117 |     else:
118 |         jar = get_topologies()
119 | 
120 |     if local_mode:
121 |         local_mode='true'
122 |     else:
123 |         local_mode='false'
124 | 
125 |     if generator_spout:
126 |         generator_spout='true'
127 |     else:
128 |         generator_spout='false'
129 | 
130 |     with cd('/vagrant/resources/opensoc/'):
131 |         run('/opt/storm/bin/storm jar {0} {1} -local_mode {2} -config_path {3} -generator_spout {4}'.format(
132 |             jar,
133 |             topology,
134 |             local_mode,
135 |             config_path,
136 |             generator_spout
137 |             ))
138 | 
139 | def quickstart():
140 |     '''Start OpenSOC with bro, snort, and pcap'''
141 |     # run post setup tasks
142 |     postsetup()
143 | 
144 |     # clone opensoc-streaming if its not here locally
145 |     if not os.path.exists('../opensoc-streaming'):
146 |         with lcd('../'):
147 |             local('git clone https://github.com/OpenSOC/opensoc-streaming.git')
148 |     else:
149 |         print green('Found a copy of opensoc-streaming in ../opensoc-streaming.')
150 | 
151 |     for top in ['bro', 'sourcefire', 'pcap']:
152 | 
153 |         topic = '{0}_raw'.format(top)
154 |         # create kafka topic
155 |         execute(create_topic, topic, host='node2')
156 | 
157 |         # launch topology
158 |         topology = 'com.opensoc.topology.{0}'.format(top.capitalize())
159 |         execute(start_topology, topology, config_path='config/')
160 | 
161 | 
162 | 
163 | 
164 | 


--------------------------------------------------------------------------------
/resources/elasticsearch/elasticsearch.yml:
--------------------------------------------------------------------------------
  1 | ################################### Cluster ###################################
  2 | 
  3 | # Cluster name identifies your cluster for auto-discovery. If you're running
  4 | # multiple clusters on the same network, make sure you're using unique names.
  5 | #
  6 | cluster.name: "opensoc-vagrant"
  7 | 
  8 | 
  9 | #################################### Node #####################################
 10 | 
 11 | # Node names are generated dynamically on startup, so you're relieved
 12 | # from configuring them manually. You can tie this node to a specific name:
 13 | #
 14 | node.name: "__HOSTNAME__"
 15 | 
 16 | # Every node can be configured to allow or deny being eligible as the master,
 17 | # and to allow or deny to store the data.
 18 | #
 19 | # Allow this node to be eligible as a master node (enabled by default):
 20 | #
 21 | #node.master: true
 22 | #
 23 | # Allow this node to store data (enabled by default):
 24 | #
 25 | #node.data: true
 26 | 
 27 | # You can exploit these settings to design advanced cluster topologies.
 28 | #
 29 | # 1. You want this node to never become a master node, only to hold data.
 30 | #    This will be the "workhorse" of your cluster.
 31 | #
 32 | node.master: false
 33 | node.data: true
 34 | #
 35 | # 2. You want this node to only serve as a master: to not store any data and
 36 | #    to have free resources. This will be the "coordinator" of your cluster.
 37 | #
 38 | #node.master: true
 39 | #node.data: false
 40 | #
 41 | # 3. You want this node to be neither master nor data node, but
 42 | #    to act as a "search load balancer" (fetching data from nodes,
 43 | #    aggregating results, etc.)
 44 | #
 45 | #node.master: false
 46 | #node.data: false
 47 | 
 48 | # Use the Cluster Health API [http://localhost:9200/_cluster/health], the
 49 | # Node Info API [http://localhost:9200/_nodes] or GUI tools
 50 | # such as <http://www.elasticsearch.org/overview/marvel/>,
 51 | # <http://github.com/karmi/elasticsearch-paramedic>,
 52 | # <http://github.com/lukas-vlcek/bigdesk> and
 53 | # <http://mobz.github.com/elasticsearch-head> to inspect the cluster state.
 54 | 
 55 | # A node can have generic attributes associated with it, which can later be used
 56 | # for customized shard allocation filtering, or allocation awareness. An attribute
 57 | # is a simple key value pair, similar to node.key: value, here is an example:
 58 | #
 59 | #node.rack: rack314
 60 | 
 61 | # By default, multiple nodes are allowed to start from the same installation location
 62 | # to disable it, set the following:
 63 | #node.max_local_storage_nodes: 1
 64 | 
 65 | 
 66 | #################################### Index ####################################
 67 | 
 68 | # You can set a number of options (such as shard/replica options, mapping
 69 | # or analyzer definitions, translog settings, ...) for indices globally,
 70 | # in this file.
 71 | #
 72 | # Note, that it makes more sense to configure index settings specifically for
 73 | # a certain index, either when creating it or by using the index templates API.
 74 | #
 75 | # See <http://elasticsearch.org/guide/en/elasticsearch/reference/current/index-modules.html> and
 76 | # <http://elasticsearch.org/guide/en/elasticsearch/reference/current/indices-create-index.html>
 77 | # for more information.
 78 | 
 79 | # Set the number of shards (splits) of an index (5 by default):
 80 | #
 81 | #index.number_of_shards: 5
 82 | 
 83 | # Set the number of replicas (additional copies) of an index (1 by default):
 84 | #
 85 | #index.number_of_replicas: 1
 86 | 
 87 | # Note, that for development on a local machine, with small indices, it usually
 88 | # makes sense to "disable" the distributed features:
 89 | #
 90 | index.number_of_shards: 1
 91 | index.number_of_replicas: 0
 92 | 
 93 | # These settings directly affect the performance of index and search operations
 94 | # in your cluster. Assuming you have enough machines to hold shards and
 95 | # replicas, the rule of thumb is:
 96 | #
 97 | # 1. Having more *shards* enhances the _indexing_ performance and allows to
 98 | #    _distribute_ a big index across machines.
 99 | # 2. Having more *replicas* enhances the _search_ performance and improves the
100 | #    cluster _availability_.
101 | #
102 | # The "number_of_shards" is a one-time setting for an index.
103 | #
104 | # The "number_of_replicas" can be increased or decreased anytime,
105 | # by using the Index Update Settings API.
106 | #
107 | # Elasticsearch takes care about load balancing, relocating, gathering the
108 | # results from nodes, etc. Experiment with different settings to fine-tune
109 | # your setup.
110 | 
111 | # Use the Index Status API (<http://localhost:9200/A/_status>) to inspect
112 | # the index status.
113 | 
114 | 
115 | #################################### Paths ####################################
116 | 
117 | # Path to directory containing configuration (this file and logging.yml):
118 | #
119 | path.conf: /opt/elasticsearch/config
120 | 
121 | # Path to directory where to store index data allocated for this node.
122 | #
123 | path.data: /var/lib/elasticsearch
124 | #
125 | # Can optionally include more than one location, causing data to be striped across
126 | # the locations (a la RAID 0) on a file level, favouring locations with most free
127 | # space on creation. For example:
128 | #
129 | #path.data: /path/to/data1,/path/to/data2
130 | 
131 | # Path to temporary files:
132 | #
133 | #path.work: /path/to/work
134 | 
135 | # Path to log files:
136 | #
137 | path.logs: /var/log/elasticsearch
138 | 
139 | # Path to where plugins are installed:
140 | #
141 | path.plugins: /opt/elasticsearch/plugins
142 | 
143 | 
144 | #################################### Plugin ###################################
145 | 
146 | # If a plugin listed here is not installed for current node, the node will not start.
147 | #
148 | #plugin.mandatory: mapper-attachments,lang-groovy
149 | 
150 | 
151 | ################################### Memory ####################################
152 | 
153 | # Elasticsearch performs poorly when JVM starts swapping: you should ensure that
154 | # it _never_ swaps.
155 | #
156 | # Set this property to true to lock the memory:
157 | #
158 | #bootstrap.mlockall: true
159 | 
160 | # Make sure that the ES_MIN_MEM and ES_MAX_MEM environment variables are set
161 | # to the same value, and that the machine has enough memory to allocate
162 | # for Elasticsearch, leaving enough memory for the operating system itself.
163 | #
164 | # You should also make sure that the Elasticsearch process is allowed to lock
165 | # the memory, eg. by using `ulimit -l unlimited`.
166 | 
167 | 
168 | ############################## Network And HTTP ###############################
169 | 
170 | # Elasticsearch, by default, binds itself to the 0.0.0.0 address, and listens
171 | # on port [9200-9300] for HTTP traffic and on port [9300-9400] for node-to-node
172 | # communication. (the range means that if the port is busy, it will automatically
173 | # try the next port).
174 | 
175 | # Set the bind address specifically (IPv4 or IPv6):
176 | #
177 | #network.bind_host: 192.168.0.1
178 | 
179 | # Set the address other nodes will use to communicate with this node. If not
180 | # set, it is automatically derived. It must point to an actual IP address.
181 | #
182 | network.publish_host: __IP_ADDR__
183 | 
184 | # Set both 'bind_host' and 'publish_host':
185 | #
186 | #network.host: 192.168.0.1
187 | 
188 | # Set a custom port for the node to node communication (9300 by default):
189 | #
190 | #transport.tcp.port: 9300
191 | 
192 | # Enable compression for all communication between nodes (disabled by default):
193 | #
194 | #transport.tcp.compress: true
195 | 
196 | # Set a custom port to listen for HTTP traffic:
197 | #
198 | #http.port: 9200
199 | 
200 | # Set a custom allowed content length:
201 | #
202 | #http.max_content_length: 100mb
203 | 
204 | # Disable HTTP completely:
205 | #
206 | #http.enabled: false
207 | 
208 | 
209 | ################################### Gateway ###################################
210 | 
211 | # The gateway allows for persisting the cluster state between full cluster
212 | # restarts. Every change to the state (such as adding an index) will be stored
213 | # in the gateway, and when the cluster starts up for the first time,
214 | # it will read its state from the gateway.
215 | 
216 | # There are several types of gateway implementations. For more information, see
217 | # <http://elasticsearch.org/guide/en/elasticsearch/reference/current/modules-gateway.html>.
218 | 
219 | # The default gateway type is the "local" gateway (recommended):
220 | #
221 | #gateway.type: local
222 | 
223 | # Settings below control how and when to start the initial recovery process on
224 | # a full cluster restart (to reuse as much local data as possible when using shared
225 | # gateway).
226 | 
227 | # Allow recovery process after N nodes in a cluster are up:
228 | #
229 | #gateway.recover_after_nodes: 1
230 | 
231 | # Set the timeout to initiate the recovery process, once the N nodes
232 | # from previous setting are up (accepts time value):
233 | #
234 | #gateway.recover_after_time: 5m
235 | 
236 | # Set how many nodes are expected in this cluster. Once these N nodes
237 | # are up (and recover_after_nodes is met), begin recovery process immediately
238 | # (without waiting for recover_after_time to expire):
239 | #
240 | #gateway.expected_nodes: 2
241 | 
242 | 
243 | ############################# Recovery Throttling #############################
244 | 
245 | # These settings allow to control the process of shards allocation between
246 | # nodes during initial recovery, replica allocation, rebalancing,
247 | # or when adding and removing nodes.
248 | 
249 | # Set the number of concurrent recoveries happening on a node:
250 | #
251 | # 1. During the initial recovery
252 | #
253 | #cluster.routing.allocation.node_initial_primaries_recoveries: 4
254 | #
255 | # 2. During adding/removing nodes, rebalancing, etc
256 | #
257 | #cluster.routing.allocation.node_concurrent_recoveries: 2
258 | 
259 | # Set to throttle throughput when recovering (eg. 100mb, by default 20mb):
260 | #
261 | #indices.recovery.max_bytes_per_sec: 20mb
262 | 
263 | # Set to limit the number of open concurrent streams when
264 | # recovering a shard from a peer:
265 | #
266 | #indices.recovery.concurrent_streams: 5
267 | 
268 | 
269 | ################################## Discovery ##################################
270 | 
271 | # Discovery infrastructure ensures nodes can be found within a cluster
272 | # and master node is elected. Multicast discovery is the default.
273 | 
274 | # Set to ensure a node sees N other master eligible nodes to be considered
275 | # operational within the cluster. This should be set to a quorum/majority of 
276 | # the master-eligible nodes in the cluster.
277 | #
278 | #discovery.zen.minimum_master_nodes: 1
279 | 
280 | # Set the time to wait for ping responses from other nodes when discovering.
281 | # Set this option to a higher value on a slow or congested network
282 | # to minimize discovery failures:
283 | #
284 | #discovery.zen.ping.timeout: 3s
285 | 
286 | # For more information, see
287 | # <http://elasticsearch.org/guide/en/elasticsearch/reference/current/modules-discovery-zen.html>
288 | 
289 | # Unicast discovery allows to explicitly control which nodes will be used
290 | # to discover the cluster. It can be used when multicast is not present,
291 | # or to restrict the cluster communication-wise.
292 | #
293 | # 1. Disable multicast discovery (enabled by default):
294 | #
295 | discovery.zen.ping.multicast.enabled: false
296 | #
297 | # 2. Configure an initial list of master nodes in the cluster
298 | #    to perform discovery when new nodes (master or data) are started:
299 | #
300 | discovery.zen.ping.unicast.hosts: ["node1"]
301 | 
302 | # EC2 discovery allows to use AWS EC2 API in order to perform discovery.
303 | #
304 | # You have to install the cloud-aws plugin for enabling the EC2 discovery.
305 | #
306 | # For more information, see
307 | # <http://elasticsearch.org/guide/en/elasticsearch/reference/current/modules-discovery-ec2.html>
308 | #
309 | # See <http://elasticsearch.org/tutorials/elasticsearch-on-ec2/>
310 | # for a step-by-step tutorial.
311 | 
312 | # GCE discovery allows to use Google Compute Engine API in order to perform discovery.
313 | #
314 | # You have to install the cloud-gce plugin for enabling the GCE discovery.
315 | #
316 | # For more information, see <https://github.com/elasticsearch/elasticsearch-cloud-gce>.
317 | 
318 | # Azure discovery allows to use Azure API in order to perform discovery.
319 | #
320 | # You have to install the cloud-azure plugin for enabling the Azure discovery.
321 | #
322 | # For more information, see <https://github.com/elasticsearch/elasticsearch-cloud-azure>.
323 | 
324 | ################################## Slow Log ##################################
325 | 
326 | # Shard level query and fetch threshold logging.
327 | 
328 | #index.search.slowlog.threshold.query.warn: 10s
329 | #index.search.slowlog.threshold.query.info: 5s
330 | #index.search.slowlog.threshold.query.debug: 2s
331 | #index.search.slowlog.threshold.query.trace: 500ms
332 | 
333 | #index.search.slowlog.threshold.fetch.warn: 1s
334 | #index.search.slowlog.threshold.fetch.info: 800ms
335 | #index.search.slowlog.threshold.fetch.debug: 500ms
336 | #index.search.slowlog.threshold.fetch.trace: 200ms
337 | 
338 | #index.indexing.slowlog.threshold.index.warn: 10s
339 | #index.indexing.slowlog.threshold.index.info: 5s
340 | #index.indexing.slowlog.threshold.index.debug: 2s
341 | #index.indexing.slowlog.threshold.index.trace: 500ms
342 | 
343 | ################################## GC Logging ################################
344 | 
345 | #monitor.jvm.gc.young.warn: 1000ms
346 | #monitor.jvm.gc.young.info: 700ms
347 | #monitor.jvm.gc.young.debug: 400ms
348 | 
349 | #monitor.jvm.gc.old.warn: 10s
350 | #monitor.jvm.gc.old.info: 5s
351 | #monitor.jvm.gc.old.debug: 2s
352 | 
353 | ################################## Security ################################
354 | 
355 | # Uncomment if you want to enable JSONP as a valid return transport on the
356 | # http server. With this enabled, it may pose a security risk, so disabling
357 | # it unless you need it is recommended (it is disabled by default).
358 | #
359 | #http.jsonp.enable: true


--------------------------------------------------------------------------------
/resources/elasticsearch/elasticsearch-client.yml:
--------------------------------------------------------------------------------
  1 | ################################### Cluster ###################################
  2 | 
  3 | # Cluster name identifies your cluster for auto-discovery. If you're running
  4 | # multiple clusters on the same network, make sure you're using unique names.
  5 | #
  6 | cluster.name: "opensoc-vagrant"
  7 | 
  8 | 
  9 | #################################### Node #####################################
 10 | 
 11 | # Node names are generated dynamically on startup, so you're relieved
 12 | # from configuring them manually. You can tie this node to a specific name:
 13 | #
 14 | node.name: "__HOSTNAME__"
 15 | 
 16 | # Every node can be configured to allow or deny being eligible as the master,
 17 | # and to allow or deny to store the data.
 18 | #
 19 | # Allow this node to be eligible as a master node (enabled by default):
 20 | #
 21 | #node.master: true
 22 | #
 23 | # Allow this node to store data (enabled by default):
 24 | #
 25 | #node.data: true
 26 | 
 27 | # You can exploit these settings to design advanced cluster topologies.
 28 | #
 29 | # 1. You want this node to never become a master node, only to hold data.
 30 | #    This will be the "workhorse" of your cluster.
 31 | #
 32 | #node.master: false
 33 | #node.data: true
 34 | #
 35 | # 2. You want this node to only serve as a master: to not store any data and
 36 | #    to have free resources. This will be the "coordinator" of your cluster.
 37 | #
 38 | node.master: true
 39 | node.data: false
 40 | #
 41 | # 3. You want this node to be neither master nor data node, but
 42 | #    to act as a "search load balancer" (fetching data from nodes,
 43 | #    aggregating results, etc.)
 44 | #
 45 | # node.master: false
 46 | # node.data: false
 47 | 
 48 | # Use the Cluster Health API [http://localhost:9200/_cluster/health], the
 49 | # Node Info API [http://localhost:9200/_nodes] or GUI tools
 50 | # such as <http://www.elasticsearch.org/overview/marvel/>,
 51 | # <http://github.com/karmi/elasticsearch-paramedic>,
 52 | # <http://github.com/lukas-vlcek/bigdesk> and
 53 | # <http://mobz.github.com/elasticsearch-head> to inspect the cluster state.
 54 | 
 55 | # A node can have generic attributes associated with it, which can later be used
 56 | # for customized shard allocation filtering, or allocation awareness. An attribute
 57 | # is a simple key value pair, similar to node.key: value, here is an example:
 58 | #
 59 | #node.rack: rack314
 60 | 
 61 | # By default, multiple nodes are allowed to start from the same installation location
 62 | # to disable it, set the following:
 63 | #node.max_local_storage_nodes: 1
 64 | 
 65 | 
 66 | #################################### Index ####################################
 67 | 
 68 | # You can set a number of options (such as shard/replica options, mapping
 69 | # or analyzer definitions, translog settings, ...) for indices globally,
 70 | # in this file.
 71 | #
 72 | # Note, that it makes more sense to configure index settings specifically for
 73 | # a certain index, either when creating it or by using the index templates API.
 74 | #
 75 | # See <http://elasticsearch.org/guide/en/elasticsearch/reference/current/index-modules.html> and
 76 | # <http://elasticsearch.org/guide/en/elasticsearch/reference/current/indices-create-index.html>
 77 | # for more information.
 78 | 
 79 | # Set the number of shards (splits) of an index (5 by default):
 80 | #
 81 | #index.number_of_shards: 5
 82 | 
 83 | # Set the number of replicas (additional copies) of an index (1 by default):
 84 | #
 85 | #index.number_of_replicas: 1
 86 | 
 87 | # Note, that for development on a local machine, with small indices, it usually
 88 | # makes sense to "disable" the distributed features:
 89 | #
 90 | index.number_of_shards: 1
 91 | index.number_of_replicas: 0
 92 | 
 93 | # These settings directly affect the performance of index and search operations
 94 | # in your cluster. Assuming you have enough machines to hold shards and
 95 | # replicas, the rule of thumb is:
 96 | #
 97 | # 1. Having more *shards* enhances the _indexing_ performance and allows to
 98 | #    _distribute_ a big index across machines.
 99 | # 2. Having more *replicas* enhances the _search_ performance and improves the
100 | #    cluster _availability_.
101 | #
102 | # The "number_of_shards" is a one-time setting for an index.
103 | #
104 | # The "number_of_replicas" can be increased or decreased anytime,
105 | # by using the Index Update Settings API.
106 | #
107 | # Elasticsearch takes care about load balancing, relocating, gathering the
108 | # results from nodes, etc. Experiment with different settings to fine-tune
109 | # your setup.
110 | 
111 | # Use the Index Status API (<http://localhost:9200/A/_status>) to inspect
112 | # the index status.
113 | 
114 | 
115 | #################################### Paths ####################################
116 | 
117 | # Path to directory containing configuration (this file and logging.yml):
118 | #
119 | path.conf: /opt/elasticsearch/config
120 | 
121 | # Path to directory where to store index data allocated for this node.
122 | #
123 | path.data: /var/lib/elasticsearch
124 | #
125 | # Can optionally include more than one location, causing data to be striped across
126 | # the locations (a la RAID 0) on a file level, favouring locations with most free
127 | # space on creation. For example:
128 | #
129 | #path.data: /path/to/data1,/path/to/data2
130 | 
131 | # Path to temporary files:
132 | #
133 | #path.work: /path/to/work
134 | 
135 | # Path to log files:
136 | #
137 | path.logs: /var/log/elasticsearch
138 | 
139 | # Path to where plugins are installed:
140 | #
141 | path.plugins: /opt/elasticsearch/plugins
142 | 
143 | 
144 | #################################### Plugin ###################################
145 | 
146 | # If a plugin listed here is not installed for current node, the node will not start.
147 | #
148 | #plugin.mandatory: mapper-attachments,lang-groovy
149 | 
150 | 
151 | ################################### Memory ####################################
152 | 
153 | # Elasticsearch performs poorly when JVM starts swapping: you should ensure that
154 | # it _never_ swaps.
155 | #
156 | # Set this property to true to lock the memory:
157 | #
158 | #bootstrap.mlockall: true
159 | 
160 | # Make sure that the ES_MIN_MEM and ES_MAX_MEM environment variables are set
161 | # to the same value, and that the machine has enough memory to allocate
162 | # for Elasticsearch, leaving enough memory for the operating system itself.
163 | #
164 | # You should also make sure that the Elasticsearch process is allowed to lock
165 | # the memory, eg. by using `ulimit -l unlimited`.
166 | 
167 | 
168 | ############################## Network And HTTP ###############################
169 | 
170 | # Elasticsearch, by default, binds itself to the 0.0.0.0 address, and listens
171 | # on port [9200-9300] for HTTP traffic and on port [9300-9400] for node-to-node
172 | # communication. (the range means that if the port is busy, it will automatically
173 | # try the next port).
174 | 
175 | # Set the bind address specifically (IPv4 or IPv6):
176 | #
177 | #network.bind_host: 192.168.0.1
178 | 
179 | # Set the address other nodes will use to communicate with this node. If not
180 | # set, it is automatically derived. It must point to an actual IP address.
181 | #
182 | network.publish_host: __IP_ADDR__
183 | 
184 | # Set both 'bind_host' and 'publish_host':
185 | #
186 | #network.host: 192.168.0.1
187 | 
188 | # Set a custom port for the node to node communication (9300 by default):
189 | #
190 | #transport.tcp.port: 9300
191 | 
192 | # Enable compression for all communication between nodes (disabled by default):
193 | #
194 | #transport.tcp.compress: true
195 | 
196 | # Set a custom port to listen for HTTP traffic:
197 | #
198 | #http.port: 9200
199 | 
200 | # Set a custom allowed content length:
201 | #
202 | #http.max_content_length: 100mb
203 | 
204 | # Disable HTTP completely:
205 | #
206 | #http.enabled: false
207 | 
208 | 
209 | ################################### Gateway ###################################
210 | 
211 | # The gateway allows for persisting the cluster state between full cluster
212 | # restarts. Every change to the state (such as adding an index) will be stored
213 | # in the gateway, and when the cluster starts up for the first time,
214 | # it will read its state from the gateway.
215 | 
216 | # There are several types of gateway implementations. For more information, see
217 | # <http://elasticsearch.org/guide/en/elasticsearch/reference/current/modules-gateway.html>.
218 | 
219 | # The default gateway type is the "local" gateway (recommended):
220 | #
221 | #gateway.type: local
222 | 
223 | # Settings below control how and when to start the initial recovery process on
224 | # a full cluster restart (to reuse as much local data as possible when using shared
225 | # gateway).
226 | 
227 | # Allow recovery process after N nodes in a cluster are up:
228 | #
229 | #gateway.recover_after_nodes: 1
230 | 
231 | # Set the timeout to initiate the recovery process, once the N nodes
232 | # from previous setting are up (accepts time value):
233 | #
234 | #gateway.recover_after_time: 5m
235 | 
236 | # Set how many nodes are expected in this cluster. Once these N nodes
237 | # are up (and recover_after_nodes is met), begin recovery process immediately
238 | # (without waiting for recover_after_time to expire):
239 | #
240 | #gateway.expected_nodes: 2
241 | 
242 | 
243 | ############################# Recovery Throttling #############################
244 | 
245 | # These settings allow to control the process of shards allocation between
246 | # nodes during initial recovery, replica allocation, rebalancing,
247 | # or when adding and removing nodes.
248 | 
249 | # Set the number of concurrent recoveries happening on a node:
250 | #
251 | # 1. During the initial recovery
252 | #
253 | #cluster.routing.allocation.node_initial_primaries_recoveries: 4
254 | #
255 | # 2. During adding/removing nodes, rebalancing, etc
256 | #
257 | #cluster.routing.allocation.node_concurrent_recoveries: 2
258 | 
259 | # Set to throttle throughput when recovering (eg. 100mb, by default 20mb):
260 | #
261 | #indices.recovery.max_bytes_per_sec: 20mb
262 | 
263 | # Set to limit the number of open concurrent streams when
264 | # recovering a shard from a peer:
265 | #
266 | #indices.recovery.concurrent_streams: 5
267 | 
268 | 
269 | ################################## Discovery ##################################
270 | 
271 | # Discovery infrastructure ensures nodes can be found within a cluster
272 | # and master node is elected. Multicast discovery is the default.
273 | 
274 | # Set to ensure a node sees N other master eligible nodes to be considered
275 | # operational within the cluster. This should be set to a quorum/majority of 
276 | # the master-eligible nodes in the cluster.
277 | #
278 | #discovery.zen.minimum_master_nodes: 1
279 | 
280 | # Set the time to wait for ping responses from other nodes when discovering.
281 | # Set this option to a higher value on a slow or congested network
282 | # to minimize discovery failures:
283 | #
284 | #discovery.zen.ping.timeout: 3s
285 | 
286 | # For more information, see
287 | # <http://elasticsearch.org/guide/en/elasticsearch/reference/current/modules-discovery-zen.html>
288 | 
289 | # Unicast discovery allows to explicitly control which nodes will be used
290 | # to discover the cluster. It can be used when multicast is not present,
291 | # or to restrict the cluster communication-wise.
292 | #
293 | # 1. Disable multicast discovery (enabled by default):
294 | #
295 | discovery.zen.ping.multicast.enabled: false
296 | #
297 | # 2. Configure an initial list of master nodes in the cluster
298 | #    to perform discovery when new nodes (master or data) are started:
299 | #
300 | discovery.zen.ping.unicast.hosts: ["node1"]
301 | 
302 | # EC2 discovery allows to use AWS EC2 API in order to perform discovery.
303 | #
304 | # You have to install the cloud-aws plugin for enabling the EC2 discovery.
305 | #
306 | # For more information, see
307 | # <http://elasticsearch.org/guide/en/elasticsearch/reference/current/modules-discovery-ec2.html>
308 | #
309 | # See <http://elasticsearch.org/tutorials/elasticsearch-on-ec2/>
310 | # for a step-by-step tutorial.
311 | 
312 | # GCE discovery allows to use Google Compute Engine API in order to perform discovery.
313 | #
314 | # You have to install the cloud-gce plugin for enabling the GCE discovery.
315 | #
316 | # For more information, see <https://github.com/elasticsearch/elasticsearch-cloud-gce>.
317 | 
318 | # Azure discovery allows to use Azure API in order to perform discovery.
319 | #
320 | # You have to install the cloud-azure plugin for enabling the Azure discovery.
321 | #
322 | # For more information, see <https://github.com/elasticsearch/elasticsearch-cloud-azure>.
323 | 
324 | ################################## Slow Log ##################################
325 | 
326 | # Shard level query and fetch threshold logging.
327 | 
328 | #index.search.slowlog.threshold.query.warn: 10s
329 | #index.search.slowlog.threshold.query.info: 5s
330 | #index.search.slowlog.threshold.query.debug: 2s
331 | #index.search.slowlog.threshold.query.trace: 500ms
332 | 
333 | #index.search.slowlog.threshold.fetch.warn: 1s
334 | #index.search.slowlog.threshold.fetch.info: 800ms
335 | #index.search.slowlog.threshold.fetch.debug: 500ms
336 | #index.search.slowlog.threshold.fetch.trace: 200ms
337 | 
338 | #index.indexing.slowlog.threshold.index.warn: 10s
339 | #index.indexing.slowlog.threshold.index.info: 5s
340 | #index.indexing.slowlog.threshold.index.debug: 2s
341 | #index.indexing.slowlog.threshold.index.trace: 500ms
342 | 
343 | ################################## GC Logging ################################
344 | 
345 | #monitor.jvm.gc.young.warn: 1000ms
346 | #monitor.jvm.gc.young.info: 700ms
347 | #monitor.jvm.gc.young.debug: 400ms
348 | 
349 | #monitor.jvm.gc.old.warn: 10s
350 | #monitor.jvm.gc.old.info: 5s
351 | #monitor.jvm.gc.old.debug: 2s
352 | 
353 | ################################## Security ################################
354 | 
355 | # Uncomment if you want to enable JSONP as a valid return transport on the
356 | # http server. With this enabled, it may pose a security risk, so disabling
357 | # it unless you need it is recommended (it is disabled by default).
358 | #
359 | #http.jsonp.enable: true


--------------------------------------------------------------------------------