├── resources
├── opensoc
│ ├── config
│ │ ├── etc
│ │ │ ├── env
│ │ │ │ ├── hdfs_connection.conf
│ │ │ │ ├── es_connection.conf
│ │ │ │ ├── mysql_connection.conf
│ │ │ │ └── environment_common.conf
│ │ │ └── whitelists
│ │ │ │ └── known_hosts.conf
│ │ └── topologies
│ │ │ ├── bro
│ │ │ ├── topology_identifier.conf
│ │ │ ├── metrics.conf
│ │ │ ├── alerts.xml
│ │ │ ├── topology.conf
│ │ │ └── features_enabled.conf
│ │ │ ├── pcap
│ │ │ ├── topology_identifier.conf
│ │ │ ├── metrics.conf
│ │ │ ├── features_enabled.conf
│ │ │ └── topology.conf
│ │ │ ├── environment_identifier.conf
│ │ │ └── sourcefire
│ │ │ ├── topology_identifier.conf
│ │ │ ├── alerts.xml
│ │ │ ├── metrics.conf
│ │ │ ├── topology.conf
│ │ │ └── features_enabled.conf
│ ├── hbase_ip_whitelist.rb
│ ├── hbase-site.xml
│ └── geo.sql
├── upstart-supervisor.conf
├── hbase
│ ├── supervisor-master.conf
│ ├── supervisor-regionserver.conf
│ └── hbase-site.xml
├── hadoop
│ ├── supervisor-namenode.conf
│ ├── supervisor-resourcemanager.conf
│ ├── supervisor-datanode.conf
│ ├── core-site.xml
│ ├── mapred-site.xml
│ ├── yarn-site.xml
│ └── hdfs-site.xml
├── elasticsearch
│ ├── supervisor-elasticsearch.conf
│ ├── elasticsearch.yml
│ └── elasticsearch-client.yml
├── zookeeper
│ ├── supervisor-zookeeper.conf
│ └── log4j.properties
├── hive
│ ├── supervisor-hive-metastore.conf
│ ├── hive-user.sql
│ └── hive-site.xml
├── storm
│ ├── supervisor-worker.conf
│ └── supervisor-nimbus-ui.conf
├── kafka
│ ├── supervisor-kafka.conf
│ └── server.properties
└── supervisord.conf
├── .gitignore
├── scripts
├── setup-java.sh
├── setup-geo-enrichment.sh
├── setup-hbase.sh
├── closest-mirror.py
├── init-hadoop.sh
├── setup-kafka.sh
├── setup-os.sh
├── setup-hive.sh
├── setup-storm.sh
├── setup-elasticsearch.sh
├── setup-zookeeper.sh
├── common.sh
└── setup-hadoop.sh
├── Vagrantfile
├── README.md
└── fabfile.py
/resources/opensoc/config/etc/env/hdfs_connection.conf:
--------------------------------------------------------------------------------
1 | bolt.hdfs.IP=node1
2 | bolt.hdfs.port=9000
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .vagrant/*
2 | resources/jre*
3 | resources/tmp/*
4 | resources/opensoc/*.jar
5 | .ssh_config
6 |
--------------------------------------------------------------------------------
/resources/opensoc/config/etc/env/es_connection.conf:
--------------------------------------------------------------------------------
1 | es.ip=node1
2 | es.port=9300
3 | es.clustername=opensoc-vagrant
--------------------------------------------------------------------------------
/resources/opensoc/config/etc/env/mysql_connection.conf:
--------------------------------------------------------------------------------
1 | mysql.ip=node1
2 | mysql.port=0
3 | mysql.username=hive
4 | mysql.password=hive123
--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/bro/topology_identifier.conf:
--------------------------------------------------------------------------------
1 | #Each topology must have a unique identifier. This setting is required
2 |
3 | topology.id=bro
4 | instance.id=B001
--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/pcap/topology_identifier.conf:
--------------------------------------------------------------------------------
1 | #Each topology must have a unique identifier. This setting is required
2 |
3 | topology.id=pcap
4 | instance.id=P001
--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/environment_identifier.conf:
--------------------------------------------------------------------------------
1 | #This file identifies the cluster instance
2 |
3 | customer.id=vagrant
4 | datacenter.id=quick
5 | instance.id=start
6 |
--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/sourcefire/topology_identifier.conf:
--------------------------------------------------------------------------------
1 | #Each topology must have a unique identifier. This setting is required
2 |
3 | topology.id=sourcefire
4 | instance.id=S001
--------------------------------------------------------------------------------
/resources/opensoc/config/etc/env/environment_common.conf:
--------------------------------------------------------------------------------
1 | kafka.zk.port=2181
2 | kafka.zk.list=node2,node3,node4
3 | kafka.zk=node2:2181,node3:2181,node4:2181
4 | kafka.br=node2:9092,node3:9092,node4:9092
--------------------------------------------------------------------------------
/resources/upstart-supervisor.conf:
--------------------------------------------------------------------------------
1 | description "supervisor"
2 |
3 | start on runlevel [2345]
4 | stop on runlevel [!2345]
5 |
6 | exec /usr/bin/supervisord --configuration /etc/supervisord.conf --nodaemon
--------------------------------------------------------------------------------
/resources/opensoc/hbase_ip_whitelist.rb:
--------------------------------------------------------------------------------
1 | create "ip_whitelist", "ip"
2 | put "ip_whitelist", "10.0.0.0/8", "ip", "y"
3 | put "ip_whitelist", "192.168.0.0/16", "ip", "y"
4 | put "ip_whitelist", "172.16.0.0/12", "ip", "y"
5 | create "pcap", "t"
6 | exit
--------------------------------------------------------------------------------
/resources/hbase/supervisor-master.conf:
--------------------------------------------------------------------------------
1 | [program:master]
2 | command=/opt/hbase/bin/hbase master start
3 | directory=/opt/hbase
4 | stdout_logfile=/var/log/hbase/master-stdout.log
5 | stderr_logfile=/var/log/hbase/master-stderr.log
6 | environment = JAVA_HOME=/usr/java/default
7 |
--------------------------------------------------------------------------------
/resources/opensoc/config/etc/whitelists/known_hosts.conf:
--------------------------------------------------------------------------------
1 | 10.1.128.236={"local":"YES", "type":"webserver", "asset_value" : "important"}
2 | 10.1.128.237={"local":"UNKNOWN", "type":"unknown", "asset_value" : "important"}
3 | 10.60.10.254={"local":"YES", "type":"printer", "asset_value" : "important"}
--------------------------------------------------------------------------------
/resources/hbase/supervisor-regionserver.conf:
--------------------------------------------------------------------------------
1 | [program:regionserver]
2 | command=/opt/hbase/bin/hbase regionserver start
3 | directory=/opt/hbase
4 | stdout_logfile=/var/log/hbase/regionserver-stdout.log
5 | stderr_logfile=/var/log/hbase/regionserver-stderr.log
6 | environment = JAVA_HOME=/usr/java/default
7 |
--------------------------------------------------------------------------------
/resources/hadoop/supervisor-namenode.conf:
--------------------------------------------------------------------------------
1 | [program:namenode]
2 | command = /opt/hadoop/bin/hdfs --config /opt/hadoop/etc/hadoop namenode
3 | stdout_logfile = /var/log/hadoop/namenode.stdout
4 | stderr_logfile = /var/log/hadoop/namenode.stderr
5 | autostart = false
6 | environment = JAVA_HOME=/usr/java/default
7 |
--------------------------------------------------------------------------------
/resources/elasticsearch/supervisor-elasticsearch.conf:
--------------------------------------------------------------------------------
1 | [program:elasticsearch]
2 | command=/opt/elasticsearch/bin/elasticsearch
3 | directory=/opt/elasticsearch
4 | stdout_logfile=/var/log/elasticsearch/stdout.log
5 | stderr_logfile=/var/log/elasticsearch/stderr.log
6 | environment=JAVA_HOME=/usr/java/default,ES_HEAP=256mb
7 |
--------------------------------------------------------------------------------
/resources/zookeeper/supervisor-zookeeper.conf:
--------------------------------------------------------------------------------
1 | [program:zookeeper]
2 | command=/opt/zookeeper/bin/zkServer.sh start-foreground
3 | directory=/opt/zookeeper
4 | stdout_logfile=/var/log/zookeeper/stdout.log
5 | stderr_logfile=/var/log/zookeeper/stderr.log
6 | redirect_stderr=true
7 | environment = JAVA_HOME=/usr/java/default
8 |
--------------------------------------------------------------------------------
/resources/hadoop/supervisor-resourcemanager.conf:
--------------------------------------------------------------------------------
1 | [program:resourcemanager]
2 | command = /opt/hadoop/bin/yarn --config /opt/hadoop/etc/hadoop resourcemanager
3 | stdout_logfile = /var/log/hadoop/resourcemanager.stdout
4 | stderr_logfile = /var/log/hadoop/resourcemanager.stderr
5 | autostart = false
6 | environment = JAVA_HOME=/usr/java/default
7 |
--------------------------------------------------------------------------------
/resources/hive/supervisor-hive-metastore.conf:
--------------------------------------------------------------------------------
1 | [program:hive-metastore]
2 | command=/opt/hive/bin/hive --service metastore
3 | directory=/opt/hive
4 | stdout_logfile=/var/log/hive/metastore-stdout.log
5 | stderr_logfile=/var/log/hive/metastore-stderr.log
6 | redirect_stderr=true
7 | environment = JAVA_HOME=/usr/java/default,HADOOP_HOME=/opt/hadoop
8 |
--------------------------------------------------------------------------------
/resources/storm/supervisor-worker.conf:
--------------------------------------------------------------------------------
1 | [program:storm-supervisor]
2 | command=/opt/storm/bin/storm supervisor
3 | directory=/opt/storm
4 | autostart=true
5 | autorestart=true
6 | stdout_logfile=/var/log/storm/supervisor-stdout.log
7 | stderr_logfile=/var/log/storm/supervisor-stderr.log
8 | environment = JAVA_HOME=/usr/java/default
9 |
10 |
--------------------------------------------------------------------------------
/resources/hive/hive-user.sql:
--------------------------------------------------------------------------------
1 | CREATE USER 'hive'@'localhost' IDENTIFIED BY 'hive123';
2 | GRANT ALL PRIVILEGES ON *.* TO 'hive'@'localhost';
3 | CREATE USER 'hive'@'%' IDENTIFIED BY 'hive123';
4 | GRANT ALL PRIVILEGES ON *.* TO 'hive'@'%';
5 | CREATE USER 'hive'@'node1' IDENTIFIED BY 'hive123';
6 | GRANT ALL PRIVILEGES ON *.* TO 'hive'@'node1';
7 | FLUSH PRIVILEGES;
--------------------------------------------------------------------------------
/resources/kafka/supervisor-kafka.conf:
--------------------------------------------------------------------------------
1 | [program:kafka]
2 | command=/opt/kafka/bin/kafka-server-start.sh /opt/kafka/config/server.properties
3 | directory=/opt/kafka
4 | user=root
5 | autostart=true
6 | autorestart=true
7 | stdout_logfile=/var/log/kafka/stdout.log
8 | stderr_logfile=/var/log/kafka/stderr.log
9 | environment = JAVA_HOME=/usr/java/default
10 |
--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/sourcefire/alerts.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | .*message.*
5 | {"type":"alert","priority":5, "title":"Sourcefire Alert", "body":
6 | "Alert triggered by sourcefire"}
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/resources/supervisord.conf:
--------------------------------------------------------------------------------
1 | [unix_http_server]
2 | file=/var/run/supervisor.sock
3 |
4 | [supervisord]
5 | pidfile=/var/run/supervisord.pid
6 | logfile=/var/log/supervisor/supervisord.log
7 | childlogdir=/var/log/supervisor
8 |
9 | [rpcinterface:supervisor]
10 | supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
11 |
12 | [supervisorctl]
13 | serverurl=unix:///var/run/supervisor.sock
14 |
15 | [include]
16 | files = /etc/supervisor.d/*.conf
--------------------------------------------------------------------------------
/resources/hadoop/supervisor-datanode.conf:
--------------------------------------------------------------------------------
1 | [program:datanode]
2 | command = /opt/hadoop/bin/hdfs --config /opt/hadoop/etc/hadoop datanode
3 | stdout_logfile = /var/log/hadoop/datanode.stdout
4 | stderr_logfile = /var/log/hadoop/datanode.stderr
5 | autostart = false
6 | environment = JAVA_HOME=/usr/java/default
7 |
8 | [program:nodemanager]
9 | command = /opt/hadoop/bin/yarn --config /opt/hadoop/etc/hadoop nodemanager
10 | stdout_logfile = /var/log/hadoop/nodemanager.stdout
11 | stderr_logfile = /var/log/hadoop/nodemanager.stderr
12 | autostart = false
13 | environment = JAVA_HOME=/usr/java/default
14 |
--------------------------------------------------------------------------------
/resources/storm/supervisor-nimbus-ui.conf:
--------------------------------------------------------------------------------
1 | [program:storm-ui]
2 | command=/opt/storm/bin/storm ui
3 | directory=/opt/storm
4 | autostart=true
5 | autorestart=true
6 | stdout_logfile=/var/log/storm/ui-stdout.log
7 | stderr_logfile=/var/log/storm/ui-stderr.log
8 | environment = JAVA_HOME=/usr/java/default
9 |
10 |
11 | [program:storm-nimbus]
12 | command=/opt/storm/bin/storm nimbus
13 | directory=/opt/storm
14 | autostart=true
15 | autorestart=true
16 | stdout_logfile=/var/log/storm/nimbus-stdout.log
17 | stderr_logfile=/var/log/storm/nimbus-stderr.log
18 | environment = JAVA_HOME=/usr/java/default
19 |
--------------------------------------------------------------------------------
/scripts/setup-java.sh:
--------------------------------------------------------------------------------
1 | source "/vagrant/scripts/common.sh"
2 |
3 | function installJava {
4 |
5 | rpm -q jre
6 | if [ $? -eq 0 ]; then
7 | echo "Java is already installed"
8 | else
9 | echo "install ${JRE_RPM}"
10 | rpm -i /vagrant/resources/$JRE_RPM
11 | fi
12 | }
13 |
14 | function setupEnvVars {
15 | echo "creating java environment variables"
16 | echo export JAVA_HOME=/usr/java/default >> /etc/profile.d/java.sh
17 | echo export PATH=\${JAVA_HOME}/bin:\${PATH} >> /etc/profile.d/java.sh
18 | }
19 |
20 | echo "Setting Up Java"
21 | installJava
22 | setupEnvVars
23 |
--------------------------------------------------------------------------------
/scripts/setup-geo-enrichment.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source "/vagrant/scripts/common.sh"
4 |
5 |
6 | function downloadGeoData {
7 |
8 | downloadFile http://geolite.maxmind.com/download/geoip/database/GeoLiteCity_CSV/GeoLiteCity-latest.zip GeoLiteCity-latest.zip
9 | geo_folder=`unzip -l $TARBALL | grep -m 1 -o -E GeoLiteCity_[0-9]{8}`
10 | cd /tmp && unzip $TARBALL
11 |
12 | }
13 |
14 | function provisionMySql {
15 |
16 | sed "s/__GEO_FOLDER__/${geo_folder}/" /vagrant/resources/opensoc/geo.sql > /tmp/geo.sql
17 | mysql -u root < /tmp/geo.sql
18 | }
19 |
20 | echo "Setting up Geo Enrichment Data"
21 | downloadGeoData
22 | provisionMySql
23 |
--------------------------------------------------------------------------------
/scripts/setup-hbase.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source "/vagrant/scripts/common.sh"
4 |
5 | while getopts t:r: option; do
6 | case $option in
7 | t) TOTAL_NODES=$OPTARG;;
8 | r) HBASE_ROLE=$OPTARG;;
9 | esac
10 | done
11 |
12 | function installHbase {
13 | downloadApacheFile hbase $HBASE_VERSION_NUM "${HBASE_VERSION}-bin.tar.gz"
14 |
15 | tar -oxzf $TARBALL -C /opt
16 | safeSymLink "/opt/${HBASE_VERSION}" /opt/hbase
17 |
18 | mkdir -p /var/log/hbase
19 | }
20 |
21 | function configureHbase {
22 |
23 | generateZkStringNoPorts $TOTAL_NODES
24 | sed "s/__ZK_QUORUM__/${ZK_STRING_NOPORTS}/" /vagrant/resources/hbase/hbase-site.xml > /opt/hbase/conf/hbase-site.xml
25 | cp "/vagrant/resources/hbase/supervisor-${HBASE_ROLE}.conf" /etc/supervisor.d/hbase.conf
26 | }
27 |
28 | echo "Setting up HBase"
29 | installHbase
30 | configureHbase
31 |
32 |
--------------------------------------------------------------------------------
/scripts/closest-mirror.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # Print the closest apache mirror for the given project
3 |
4 | import urllib2, json, argparse, os
5 |
6 | parser = argparse.ArgumentParser(description='gets the closest Apache Mirror for a project')
7 | parser.add_argument('project', help='project to get the mirror for')
8 | parser.add_argument('-v', '--version', help='project version')
9 | parser.add_argument('-f', '--file', help='filename of binary')
10 |
11 | args = parser.parse_args()
12 |
13 | closer_url = 'http://www.apache.org/dyn/closer.cgi/{0}/?as_json=1'.format(args.project)
14 |
15 | response = json.loads(urllib2.urlopen(closer_url).read())
16 |
17 |
18 | path = response['path_info']
19 |
20 | if args.version:
21 | path = os.path.join(path, args.version)
22 |
23 | if args.file:
24 | path = os.path.join(path, args.file)
25 |
26 | print response['preferred'] + path
27 |
28 |
29 |
--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/bro/metrics.conf:
--------------------------------------------------------------------------------
1 | #reporters
2 | com.opensoc.metrics.reporter.graphite=false
3 | com.opensoc.metrics.reporter.console=false
4 | com.opensoc.metrics.reporter.jmx=false
5 |
6 | #Graphite Addresses
7 |
8 | com.opensoc.metrics.graphite.address=localhost
9 | com.opensoc.metrics.graphite.port=2023
10 |
11 | #TelemetryParserBolt
12 | com.opensoc.metrics.TelemetryParserBolt.acks=false
13 | com.opensoc.metrics.TelemetryParserBolt.emits=false
14 | com.opensoc.metrics.TelemetryParserBolt.fails=false
15 |
16 |
17 | #GenericEnrichmentBolt
18 | com.opensoc.metrics.GenericEnrichmentBolt.acks=false
19 | com.opensoc.metrics.GenericEnrichmentBolt.emits=false
20 | com.opensoc.metrics.GenericEnrichmentBolt.fails=false
21 |
22 |
23 | #TelemetryIndexingBolt
24 | com.opensoc.metrics.TelemetryIndexingBolt.acks=false
25 | com.opensoc.metrics.TelemetryIndexingBolt.emits=false
26 | com.opensoc.metrics.TelemetryIndexingBolt.fails=false
27 |
--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/pcap/metrics.conf:
--------------------------------------------------------------------------------
1 | #reporters
2 | com.opensoc.metrics.reporter.graphite=false
3 | com.opensoc.metrics.reporter.console=false
4 | com.opensoc.metrics.reporter.jmx=false
5 |
6 | #Graphite Addresses
7 |
8 | com.opensoc.metrics.graphite.address=localhost
9 | com.opensoc.metrics.graphite.port=2023
10 |
11 | #TelemetryParserBolt
12 | com.opensoc.metrics.TelemetryParserBolt.acks=false
13 | com.opensoc.metrics.TelemetryParserBolt.emits=false
14 | com.opensoc.metrics.TelemetryParserBolt.fails=false
15 |
16 |
17 | #GenericEnrichmentBolt
18 | com.opensoc.metrics.GenericEnrichmentBolt.acks=false
19 | com.opensoc.metrics.GenericEnrichmentBolt.emits=false
20 | com.opensoc.metrics.GenericEnrichmentBolt.fails=false
21 |
22 |
23 | #TelemetryIndexingBolt
24 | com.opensoc.metrics.TelemetryIndexingBolt.acks=false
25 | com.opensoc.metrics.TelemetryIndexingBolt.emits=false
26 | com.opensoc.metrics.TelemetryIndexingBolt.fails=false
27 |
--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/sourcefire/metrics.conf:
--------------------------------------------------------------------------------
1 | #reporters
2 | com.opensoc.metrics.reporter.graphite=false
3 | com.opensoc.metrics.reporter.console=false
4 | com.opensoc.metrics.reporter.jmx=false
5 |
6 | #Graphite Addresses
7 |
8 | com.opensoc.metrics.graphite.address=localhost
9 | com.opensoc.metrics.graphite.port=2023
10 |
11 | #TelemetryParserBolt
12 | com.opensoc.metrics.TelemetryParserBolt.acks=false
13 | com.opensoc.metrics.TelemetryParserBolt.emits=false
14 | com.opensoc.metrics.TelemetryParserBolt.fails=false
15 |
16 |
17 | #GenericEnrichmentBolt
18 | com.opensoc.metrics.GenericEnrichmentBolt.acks=false
19 | com.opensoc.metrics.GenericEnrichmentBolt.emits=false
20 | com.opensoc.metrics.GenericEnrichmentBolt.fails=false
21 |
22 |
23 | #TelemetryIndexingBolt
24 | com.opensoc.metrics.TelemetryIndexingBolt.acks=false
25 | com.opensoc.metrics.TelemetryIndexingBolt.emits=false
26 | com.opensoc.metrics.TelemetryIndexingBolt.fails=false
27 |
--------------------------------------------------------------------------------
/resources/hadoop/core-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
16 |
17 |
18 |
19 |
20 |
21 | fs.default.name
22 | hdfs://node1:9000
23 |
24 |
--------------------------------------------------------------------------------
/resources/hadoop/mapred-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
16 |
17 |
18 |
19 |
20 |
21 | mapreduce.framework.name
22 | yarn
23 |
24 |
--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/bro/alerts.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | .*host\"\:\{"ip_dst_addr\"\:\{\},\"ip_src_addr\"\:\{\}.*
5 | {"type":"error","priority":5, "title":"No Local Hostname Present", "body":
6 | "We don't have a record for source or destination IPs in our internal database."}
7 |
8 |
9 |
10 | .*whois\"\:\{\"tld\"\:\{\}.*
11 | {"type":"warning","priority":10, "title":"Whois domain unknown", "body":
12 | "Could not locate whois information for tld"}
13 |
14 |
15 | ^((?!country\"\:\"US\").)*$
16 | {"type":"warning","priority":10, "title":"NOT US IP", "body": "Communication contains a non-US IP"}
17 |
18 |
19 | .*geo.*
20 | {"type":"error","priority":1, "title":"test", "body": "test alert"}
21 |
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/resources/hive/hive-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | javax.jdo.option.ConnectionURL
4 | jdbc:mysql://node1:3306/hivemeta?createDatabaseIfNotExist=true
5 |
6 |
7 | javax.jdo.option.ConnectionDriverName
8 | com.mysql.jdbc.Driver
9 |
10 |
11 | javax.jdo.option.ConnectionUserName
12 | hive
13 |
14 |
15 | javax.jdo.option.ConnectionPassword
16 | hive123
17 |
18 |
19 | hive.server2.thrift.bind.host
20 | 0.0.0.0
21 |
22 |
23 | hadoop.bin.path
24 | /opt/hadoop/bin
25 |
26 |
27 | hadoop.config.dir
28 | /opt/hadoop/etc/hadoop
29 |
30 |
--------------------------------------------------------------------------------
/scripts/init-hadoop.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source "/vagrant/scripts/common.sh"
4 |
5 | while getopts r: option; do
6 | case $option in
7 | r) ROLE=$OPTARG;;
8 | esac
9 | done
10 |
11 | function startHadoopRole {
12 | ps -ef | grep -v grep | grep -v vagrant | grep $1
13 | if [ $? -ne 0 ]; then
14 | /opt/hadoop/sbin/hadoop-daemon.sh --config /opt/hadoop/etc/hadoop --script hdfs start $1
15 | fi
16 | }
17 |
18 | function startYarnRole {
19 | ps -ef | grep -v grep | grep -v vagrant | grep $1
20 | if [ $? -ne 0 ]; then
21 | /opt/hadoop/sbin/yarn-daemon.sh --config /opt/hadoop/etc/hadoop start $1
22 | fi
23 | }
24 | function formatHdfs {
25 | /opt/hadoop/bin/hdfs namenode -format vagrant -nonInteractive
26 |
27 | }
28 |
29 | echo "Starting Hadoop"
30 |
31 | if [ "${ROLE}" == "namenode" ]; then
32 | formatHdfs
33 | startHadoopRole $ROLE
34 | startYarnRole "resourcemanager"
35 | elif [ "${ROLE}" == "datanode" ]; then
36 | startHadoopRole $ROLE
37 | startYarnRole "nodemanager"
38 | fi
--------------------------------------------------------------------------------
/resources/hadoop/yarn-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
15 |
16 |
17 | yarn.nodemanager.aux-services
18 | mapreduce_shuffle
19 |
20 |
21 |
22 | yarn.resourcemanager.hostname
23 | node1
24 |
25 |
26 |
27 | yarn.resourcemanager.bind-host
28 | 0.0.0.0
29 |
30 |
--------------------------------------------------------------------------------
/scripts/setup-kafka.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source "/vagrant/scripts/common.sh"
4 |
5 | while getopts t: option; do
6 | case $option in
7 | t) TOTAL_NODES=$OPTARG;;
8 | esac
9 | done
10 |
11 | function installKafka {
12 | downloadApacheFile kafka ${KAFKA_VERSION_NUM} "${KAFKA_VERSION}.tgz"
13 |
14 | tar -oxzf $TARBALL -C /opt
15 | safeSymLink "/opt/${KAFKA_VERSION}/" /opt/kafka
16 |
17 | mkdir -p /var/lib/kafka-logs
18 | mkdir -p /var/log/kafka
19 | }
20 |
21 | function configureKafka {
22 | echo "Configuring Kafka"
23 | # copy over config with static properties
24 | cp /vagrant/resources/kafka/server.properties /opt/kafka/config/
25 |
26 | # echo in dynamic ones
27 | echo "broker.id=${NODE_NUMBER}" >> /opt/kafka/config/server.properties
28 |
29 | generateZkString $TOTAL_NODES
30 |
31 | echo "zookeeper.connect=${ZK_STRING}" >> /opt/kafka/config/server.properties
32 |
33 | cp /vagrant/resources/kafka/supervisor-kafka.conf /etc/supervisor.d/kakfa.conf
34 | }
35 |
36 |
37 | echo "Setting up Kafka"
38 | installKafka
39 | configureKafka
--------------------------------------------------------------------------------
/resources/hadoop/hdfs-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
16 |
17 |
18 |
19 |
20 |
21 | dfs.replication
22 | 1
23 |
24 |
25 |
26 | dfs.name.dir
27 | file:///var/lib/hadoop/hdfs/namenode
28 |
29 |
30 |
31 | dfs.data.dir
32 | file:///var/lib/hadoop/hdfs/datanode
33 |
34 |
--------------------------------------------------------------------------------
/scripts/setup-os.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | while getopts t: option; do
4 | case $option in
5 | t) TOTAL_NODES=$OPTARG;;
6 | esac
7 | done
8 |
9 | function disableFirewall {
10 | echo "Disabling the Firewall"
11 | service iptables save
12 | service iptables stop
13 | chkconfig iptables off
14 | }
15 |
16 | function writeHostFile {
17 | echo "setting up /etc/hosts file"
18 |
19 | echo "127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4" > /etc/hosts
20 | echo "::1 localhost localhost.localdomain localhost6 localhost6.localdomain6" >> /etc/hosts
21 |
22 | for i in $(seq 1 $TOTAL_NODES); do
23 | echo "10.0.0.10${i} node${i}" >> /etc/hosts
24 | done
25 | }
26 |
27 | function installDependencies {
28 | echo "Installing Supervisor"
29 | yum install -y epel-release
30 | yum install -y python-pip unzip
31 |
32 | pip install supervisor
33 | pip install argparse
34 |
35 | cp /vagrant/resources/supervisord.conf /etc/supervisord.conf
36 | cp /vagrant/resources/upstart-supervisor.conf /etc/init/supervisor.conf
37 |
38 | mkdir -p /etc/supervisor.d
39 | mkdir -p /var/log/supervisor
40 | }
41 |
42 | function installNtpd {
43 | yum install -y ntp
44 |
45 | ntpdate 0.pool.ntp.org
46 |
47 | service ntpd start
48 | chckconfig ntpd on
49 | }
50 |
51 | disableFirewall
52 | writeHostFile
53 | installDependencies
--------------------------------------------------------------------------------
/resources/hbase/hbase-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
23 |
24 |
25 | hbase.rootdir
26 | hdfs://node1:9000/hbase
27 |
28 |
29 | hbase.cluster.distributed
30 | true
31 |
32 |
33 | hbase.zookeeper.quorum
34 | __ZK_QUORUM__
35 |
36 |
37 | zookeeper.znode.parent
38 | /hbase-unsecure
39 |
40 |
--------------------------------------------------------------------------------
/resources/opensoc/hbase-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
23 |
24 |
25 | hbase.rootdir
26 | hdfs://node1:9000/hbase
27 |
28 |
29 | hbase.cluster.distributed
30 | true
31 |
32 |
33 | hbase.zookeeper.quorum
34 | node2:2181,node3:2181,node4:2181
35 |
36 |
37 | zookeeper.znode.parent
38 | /hbase-unsecure
39 |
40 |
--------------------------------------------------------------------------------
/scripts/setup-hive.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source "/vagrant/scripts/common.sh"
4 |
5 | function installHive {
6 |
7 | downloadApacheFile hive $HIVE_VERSION "apache-${HIVE_VERSION}-bin.tar.gz"
8 |
9 | tar -oxzf $TARBALL -C /opt
10 | safeSymLink "/opt/apache-${HIVE_VERSION}-bin/" /opt/hive
11 |
12 | mkdir -p /var/log/hive
13 |
14 | cp /vagrant/resources/hive/supervisor-hive-metastore.conf /etc/supervisor.d/hive-metastore.conf
15 |
16 | }
17 |
18 | function installMySql {
19 | yum install -y mysql-server mysql-connector-java
20 |
21 | chkconfig mysqld on
22 | service mysqld start
23 |
24 | safeSymLink /usr/share/java/mysql-connector-java.jar /opt/hive/lib/mysql-connector-java.jar
25 |
26 | echo "Setting up mysql user"
27 | if mysql -u root mysql -e "select User from user where User='hive';" | grep hive; then
28 | echo "hive user exists..."
29 | else
30 | mysql -u root < /vagrant/resources/hive/hive-user.sql
31 | fi
32 |
33 | echo "Setting up metastore schema"
34 | if mysql -u root -e "show databases like 'hivemeta';" | grep hivemeta; then
35 | echo "metastore table exists..."
36 | else
37 | mysql -u root -e "CREATE DATABASE hivemeta;"
38 | cd /opt/hive/scripts/metastore/upgrade/mysql && mysql -u hive -phive123 hivemeta < hive-schema-1.2.0.mysql.sql
39 | fi
40 | }
41 |
42 | function configureHive {
43 |
44 | cp /vagrant/resources/hive/hive-site.xml /opt/hive/conf/
45 | }
46 |
47 | echo "Setting up Hive"
48 | installHive
49 | installMySql
50 | configureHive
--------------------------------------------------------------------------------
/scripts/setup-storm.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source "/vagrant/scripts/common.sh"
4 |
5 |
6 | while getopts t:r: option; do
7 | case $option in
8 | t) TOTAL_NODES=$OPTARG;;
9 | r) STORM_ROLE=$OPTARG;;
10 | esac
11 | done
12 |
13 |
14 | function installStorm {
15 | downloadApacheFile storm ${STORM_VERSION} "${STORM_VERSION}.tar.gz"
16 |
17 | tar -oxzf $TARBALL -C /opt
18 | safeSymLink "/opt/${STORM_VERSION}" /opt/storm
19 |
20 | mkdir -p /var/log/storm
21 | }
22 |
23 | function configureStorm {
24 | echo "Configuring Storm"
25 |
26 | echo "storm.zookeeper.servers:" >> /opt/storm/conf/storm.yaml
27 | for i in $(seq 2 $TOTAL_NODES); do
28 | echo " - node${i}" >> /opt/storm/conf/storm.yaml
29 | done
30 |
31 | echo "nimbus.host: node1" >> /opt/storm/conf/storm.yaml
32 | echo "java.library.path: /usr/local/lib:/opt/local/lib:/usr/lib:/opt/hadoop/lib/native:/usr/lib64" >> /opt/storm/conf/storm.yaml
33 | echo "LD_LIBRARY_PATH:/usr/local/lib:/opt/local/lib:/usr/lib:/opt/hadoop/lib/native:/usr/lib64" >> /opt/storm/conf/storm_env.ini
34 |
35 | }
36 |
37 | function setupNimbus {
38 | echo "Setting up Storm Nimbus"
39 |
40 | cp /vagrant/resources/storm/supervisor-nimbus-ui.conf /etc/supervisor.d/storm.conf
41 | }
42 |
43 | function setupSupervisor {
44 | echo "Setting up Storm Supervisor"
45 |
46 | cp /vagrant/resources/storm/supervisor-worker.conf /etc/supervisor.d/storm.conf
47 | }
48 |
49 | echo "Setting up Storm"
50 | installStorm
51 | configureStorm
52 |
53 |
54 | case $STORM_ROLE in
55 | nimbus) setupNimbus;;
56 | supervisor) setupSupervisor;;
57 | esac
58 |
--------------------------------------------------------------------------------
/scripts/setup-elasticsearch.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source "/vagrant/scripts/common.sh"
4 |
5 | while getopts ci: option; do
6 | case $option in
7 | c) ES_CLIENT=yes;;
8 | i) IP_ADDR=$OPTARG;;
9 | esac
10 | done
11 |
12 | function installElasticsearch {
13 |
14 | downloadFile "https://download.elastic.co/elasticsearch/elasticsearch/elasticsearch-${ES_VERSION}.tar.gz" "elasticsearch-${ES_VERSION}.tar.gz"
15 |
16 | tar -oxf $TARBALL -C /opt
17 | safeSymLink "/opt/elasticsearch-${ES_VERSION}" /opt/elasticsearch
18 |
19 | mkdir -p /var/lib/elasticsearch
20 | mkdir -p /var/log/elasticsearch
21 | mkdir -p /opt/elasticsearch/plugins
22 | }
23 |
24 | function configureElasticsearch {
25 |
26 | hostname=`hostname -f`
27 | if [ -z "${ES_CLIENT}" ]; then
28 | echo "Configuring elasticsearch as a normal node"
29 | sed "s/__HOSTNAME__/${hostname}/" /vagrant/resources/elasticsearch/elasticsearch.yml | sed "s/__IP_ADDR__/${IP_ADDR}/" > /opt/elasticsearch/config/elasticsearch.yml
30 | else
31 | echo "Configuring elasticsearch as a client"
32 | sed "s/__HOSTNAME__/${hostname}/" /vagrant/resources/elasticsearch/elasticsearch-client.yml | sed "s/__IP_ADDR__/${IP_ADDR}/" > /opt/elasticsearch/config/elasticsearch.yml
33 | fi
34 |
35 | if [ ! -e /opt/elasticsearch/plugins/kopf ]; then
36 | echo "Installing kopf plugin"
37 | /opt/elasticsearch/bin/plugin --install lmenezes/elasticsearch-kopf/1.5.3
38 | fi
39 |
40 | cp /vagrant/resources/elasticsearch/supervisor-elasticsearch.conf /etc/supervisor.d/elasticsearch.conf
41 |
42 | }
43 | echo "Setting up Elasticsearch"
44 | installElasticsearch
45 | configureElasticsearch
--------------------------------------------------------------------------------
/scripts/setup-zookeeper.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source "/vagrant/scripts/common.sh"
4 |
5 | while getopts t: option; do
6 | case $option in
7 | t) TOTAL_NODES=$OPTARG;;
8 | esac
9 | done
10 |
11 | function installZookeeper {
12 | downloadApacheFile zookeeper ${ZOOKEEPER_VERSION} "${ZOOKEEPER_VERSION}.tar.gz"
13 |
14 | tar -oxzf $TARBALL -C /opt
15 | safeSymLink "/opt/${ZOOKEEPER_VERSION}/" /opt/zookeeper
16 |
17 | mkdir -p /var/lib/zookeeper
18 | mkdir -p /var/log/zookeeper
19 |
20 | echo "0 0 * * * /usr/local/bin/zookeeper_cleanup" >> /etc/crontab
21 |
22 | echo "cd /opt/zookeeper" > /usr/local/bin/zookeeper_cleanup
23 | echo "echo `date` > /root/last_zk_cleanup" >> /usr/local/bin/zookeeper_cleanup
24 | echo "bin/zkCleanup.sh /var/lib/zookeeper -n 5 >> /root/last_zk_cleanup" >> /usr/local/bin/zookeeper_cleanup
25 |
26 | chmod +x /usr/local/bin/zookeeper_cleanup
27 |
28 | echo $NODE_NUMBER > /var/lib/zookeeper/myid
29 | }
30 |
31 | function configureZookeeper {
32 |
33 | echo "Configuring Zookeeper..."
34 | echo "tickTime=2000" > /opt/zookeeper/conf/zoo.cfg
35 | echo "initLimit=10" >> /opt/zookeeper/conf/zoo.cfg
36 | echo "syncLimit=5" >> /opt/zookeeper/conf/zoo.cfg
37 | echo "dataDir=/var/lib/zookeeper" >> /opt/zookeeper/conf/zoo.cfg
38 | echo "clientPort=2181" >> /opt/zookeeper/conf/zoo.cfg
39 |
40 | for i in $(seq 1 $TOTAL_NODES); do
41 | echo "server.${i}=node${i}:2888:3888" >> /opt/zookeeper/conf/zoo.cfg
42 | done
43 |
44 | cp /vagrant/resources/zookeeper/supervisor-zookeeper.conf /etc/supervisor.d/zookeeper.conf
45 | }
46 |
47 | echo "Setting up Zookeeper"
48 |
49 | installZookeeper
50 | configureZookeeper
--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/pcap/features_enabled.conf:
--------------------------------------------------------------------------------
1 | #Enable and disable features for each topology
2 |
3 | #Feature: Kafka spout
4 | ##Feature Description: Acts as a Kafka consumer. Takes messages from a Kafka topic and ingests them into a topology
5 |
6 | spout.kafka.name=KafkaSpout
7 | spout.kafka.enabled=true
8 | spout.kafka.num.tasks=1
9 | spout.kafka.parallelism.hint=1
10 |
11 | #Feature: Parser Bolt
12 | ##Feature Description: Parses telemetry from its native format into a native JSON
13 |
14 | parser.bolt.name=ParserBolt
15 | bolt.parser.enabled=true
16 | bolt.parser.num.tasks=1
17 | bolt.parser.parallelism.hint=1
18 |
19 | #Feature: Indexer
20 | ##Feature Description: Indexes telemetry messages in ElasticSearch or Solr
21 |
22 | bolt.indexing.name=IndexBolt
23 | bolt.indexing.enabled=true
24 | bolt.indexing.num.tasks=1
25 | bolt.indexing.parallelism.hint=1
26 |
27 | #Feature: Error Indexer
28 | ##Feature Description: Indexes error messages in ElasticSearch or Solr
29 |
30 | bolt.error.indexing.name=ErrorIndexBolt
31 | bolt.error.indexing.enabled=true
32 | bolt.error.indexing.num.tasks=1
33 | bolt.error.indexing.parallelism.hint=1
34 |
35 | #Feature: HDFS Bolt
36 | ##Feature Description: Writes telemetry messages into HDFS
37 |
38 | bolt.hdfs.name=HDFSBolt
39 | bolt.hdfs.enabled=false
40 | bolt.hdfs.num.tasks=4
41 | bolt.hdfs.parallelism.hint=4
42 |
43 | bolt.hbase.name=HBaseBolt
44 | bolt.hbase.enabled=true
45 | bolt.hbase.num.tasks=1
46 | bolt.hbase.parallelism.hint=1
47 |
48 |
49 | # unused stuff
50 | bolt.enrichment.host.enabled=false
51 | bolt.enrichment.geo.enabled=false
52 | bolt.enrichment.whois.enabled=false
53 | bolt.enrichment.cif.enabled=false
54 | bolt.enrichment.threat.enabled=false
55 | bolt.alerts.enabled=false
56 | bolt.alerts.indexing.enabled=false
57 | bolt.kafka.enabled=false
58 |
59 |
--------------------------------------------------------------------------------
/resources/opensoc/geo.sql:
--------------------------------------------------------------------------------
1 | CREATE DATABASE IF NOT EXISTS GEO;
2 |
3 | USE GEO;
4 |
5 | DROP TABLE IF EXISTS `blocks`;
6 | CREATE TABLE `blocks` ( `startIPNum` int(10) unsigned NOT NULL,`endIPNum` int(10) unsigned NOT NULL,`locID`
7 | int(10) unsigned NOT NULL, PRIMARY KEY (`startIPNum`,`endIPNum`) )
8 | ENGINE=MyISAM DEFAULT CHARSET=latin1 PACK_KEYS=1 DELAY_KEY_WRITE=1;
9 |
10 | DROP TABLE IF EXISTS `location`;
11 | CREATE TABLE `location` (`locID` int(10) unsigned NOT NULL,`country` char(2) default NULL,`region` char(2)
12 | default NULL,`city` varchar(45) default NULL,`postalCode` char(7) default NULL,`latitude` double default
13 | NULL,`longitude` double default NULL,`dmaCode` char(3) default NULL,`areaCode` char(3) default NULL,PRIMARY KEY
14 | (`locID`),KEY `Index_Country` (`country`) ) ENGINE=MyISAM DEFAULT CHARSET=latin1 ROW_FORMAT=FIXED;
15 |
16 | load data infile '/tmp/__GEO_FOLDER__/GeoLiteCity-Blocks.csv' into table `blocks` fields terminated by ',' optionally enclosed by
17 | '"' lines terminated by '\n' ignore 2 lines;
18 |
19 | load data infile '/tmp/__GEO_FOLDER__/GeoLiteCity-Location.csv' into table `location` fields terminated by ',' optionally enclosed
20 | by '"' lines terminated by '\n' ignore 2 lines;
21 |
22 | DELIMITER $$
23 | DROP FUNCTION IF EXISTS `IPTOLOCID` $$
24 | CREATE FUNCTION `IPTOLOCID`( ip VARCHAR(15)) RETURNS int(10) unsigned
25 | BEGIN
26 | DECLARE ipn INTEGER UNSIGNED;
27 | DECLARE locID_var INTEGER;
28 | IF ip LIKE '192.168.%' OR ip LIKE '10.%' THEN RETURN 0;
29 | END IF;
30 | SET ipn = INET_ATON(ip);
31 | SELECT locID INTO locID_var FROM `blocks` INNER JOIN (SELECT MAX(startIPNum) AS start FROM `blocks` WHERE startIPNum <= ipn) AS s ON (startIPNum = s.start) WHERE endIPNum >= ipn;
32 | RETURN locID_var;
33 | END
34 | $$
35 | DELIMITER ;
--------------------------------------------------------------------------------
/scripts/common.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | JRE_RPM=jre-7u79-linux-x64.rpm
4 | HADOOP_VERSION=hadoop-2.6.0
5 | ZOOKEEPER_VERSION=zookeeper-3.4.6
6 | KAFKA_SCALA_VERSION=2.9.2
7 | KAFKA_VERSION_NUM=0.8.1.1
8 | KAFKA_VERSION="kafka_${KAFKA_SCALA_VERSION}-${KAFKA_VERSION_NUM}"
9 | STORM_VERSION=apache-storm-0.9.4
10 | HBASE_VERSION_NUM=0.98.13
11 | HBASE_VERSION=hbase-"${HBASE_VERSION_NUM}-hadoop2"
12 | HIVE_VERSION=hive-1.2.0
13 | ES_VERSION=1.5.2
14 |
15 | # So we dont need to pass in i to the scripts
16 | NODE_NUMBER=`hostname | tr -d node`
17 |
18 |
19 | function downloadFile {
20 |
21 | url="${1}"
22 | filename="${2}"
23 |
24 | tmp_dir="/vagrant/resources/tmp/"
25 | cached_file="${tmp_dir}${filename}"
26 |
27 | if [ ! -e $cached_file ]; then
28 | echo "Downloading ${filename} from ${url} to ${cached_file}"
29 | echo "This will take some time. Please be patient..."
30 | wget -nv -P $tmp_dir $url
31 | fi
32 |
33 | TARBALL=$cached_file
34 | }
35 |
36 | function downloadApacheFile {
37 |
38 | project="${1}"
39 | version="${2}"
40 | filename="${3}"
41 |
42 | closest_url=`python /vagrant/scripts/closest-mirror.py ${project} -v ${version} -f ${filename}`
43 |
44 | downloadFile $closest_url $filename
45 | }
46 |
47 | function join {
48 | local IFS="$1"; shift; echo "$*"
49 | }
50 |
51 | function generateZkString {
52 | # Yes its ugly, but so is bash :)
53 | ZK_STRING=`python -c "print ','.join([ 'node{0}:2181'.format(x) for x in range(2,${1}+1)])"`
54 | }
55 |
56 | function generateZkStringNoPorts {
57 | ZK_STRING_NOPORTS=`python -c "print ','.join([ 'node{0}'.format(x) for x in range(2,${1}+1)])"`
58 | }
59 |
60 | function safeSymLink {
61 | target=$1
62 | symlink=$2
63 |
64 | if [ -e $symlink ]; then
65 | echo "${symlink} exists. Deleteing."
66 | rm $symlink
67 | fi
68 |
69 | ln -s $target $symlink
70 | }
71 |
--------------------------------------------------------------------------------
/scripts/setup-hadoop.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source "/vagrant/scripts/common.sh"
4 |
5 | while getopts r:t: option; do
6 | case $option in
7 | t) TOTAL_NODES=$OPTARG;;
8 | r) ROLE=$OPTARG;;
9 | esac
10 | done
11 |
12 | function installHadoop {
13 |
14 | downloadApacheFile hadoop/common $HADOOP_VERSION "${HADOOP_VERSION}.tar.gz"
15 |
16 | tar -oxzf $TARBALL -C /opt
17 | safeSymLink "/opt/${HADOOP_VERSION}/" /opt/hadoop
18 |
19 | mkdir -p /var/lib/hadoop/hdfs/namenode
20 | mkdir -p /var/lib/hadoop/hdfs/datanode
21 | mkdir -p /var/log/hadoop
22 | mkdir -p /opt/hadoop/logs
23 |
24 | # neeed for writing to HDFS
25 | yum install -y snappy snappy-devel
26 |
27 | }
28 |
29 | function configureHadoop {
30 | HADOOP_RESOURCE_DIR=/vagrant/resources/hadoop
31 | for file in `ls ${HADOOP_RESOURCE_DIR}/*.xml`; do
32 | echo "Copying ${file}"
33 | cp $file /opt/hadoop/etc/hadoop
34 | done
35 |
36 | echo "Setting slaves file"
37 | for i in $(seq 2 $TOTAL_NODES); do
38 | echo "node${i}" >> /opt/hadoop/etc/hadoop/slaves
39 | done
40 |
41 | echo "export JAVA_LIBRARY_PATH=\${JAVA_LIBRARY_PATH}:/usr/lib/hadoop/lib/native:/usr/lib64" >> /opt/hadoop/etc/hadoop/hadoop-env.sh
42 | }
43 |
44 | function configureNameNode {
45 | echo "Copying over Supervisor config for namenode and resourcemanager"
46 | cp /vagrant/resources/hadoop/supervisor-namenode.conf /etc/supervisor.d/namenode.conf
47 | cp /vagrant/resources/hadoop/supervisor-resourcemanager.conf /etc/supervisor.d/resourcemanager.conf
48 | }
49 |
50 | function configureDataNode {
51 | echo "Copying over Supervisor config for datenode"
52 | cp /vagrant/resources/hadoop/supervisor-datanode.conf /etc/supervisor.d/datanode.conf
53 | }
54 |
55 | echo "Setting up Hadoop"
56 | installHadoop
57 | configureHadoop
58 |
59 | if [ "${ROLE}" == "namenode" ]; then
60 | configureNameNode
61 | elif [ "${ROLE}" == "datanode" ]; then
62 | configureDataNode
63 | fi
--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/bro/topology.conf:
--------------------------------------------------------------------------------
1 | include = ../../etc/env/environment_common.conf
2 | include = ../../etc/env/es_connection.conf
3 | include = ../../etc/env/hdfs_connection.conf
4 | include = ../../etc/env/mysql_connection.conf
5 | include = metrics.conf
6 | include = features_enabled.conf
7 |
8 | #Global Properties
9 |
10 | debug.mode=true
11 | local.mode=true
12 | num.workers=1
13 | num.ackers=1
14 |
15 | #Standard 5-tuple fields
16 |
17 | source.ip=ip_src_addr
18 | source.port=ip_src_port
19 | dest.ip=ip_dst_addr
20 | dest.port=ip_dst_port
21 | protocol=protocol
22 |
23 | #Test Spout
24 | spout.test.parallelism.repeat=false
25 |
26 | #Kafka Spout
27 | spout.kafka.topic=bro_raw
28 |
29 | #Parsing Bolt
30 | bolt.parser.adapter=com.opensoc.parsing.parsers.BasicBroParser
31 | source.include.protocols=snmp,http,ftp,ssh,ssl,dns,socks,dnp3,smtp,dhcp,modbus,radius,irc
32 | source.exclude.protocols=x509,files,app_stats
33 |
34 | #GeoEnrichment
35 |
36 | bolt.enrichment.geo.enrichment_tag=geo
37 | bolt.enrichment.geo.adapter.table=GEO
38 | bolt.enrichment.geo.MAX_CACHE_SIZE_OBJECTS_NUM=10000
39 | bolt.enrichment.geo.MAX_TIME_RETAIN_MINUTES=10
40 | bolt.enrichment.geo.fields=ip_src_addr,ip_dst_addr
41 |
42 | #Indexing Bolt
43 | bolt.indexing.indexname=bro_index
44 | bolt.indexing.timestamp=yyyy.MM.dd
45 | bolt.indexing.documentname=bro_doc
46 | bolt.indexing.bulk=200
47 | bolt.indexing.adapter=com.opensoc.indexing.adapters.ESTimedRotatingAdapter
48 |
49 |
50 | #Error Indexing Bolt
51 | bolt.error.indexing.indexname=error
52 | bolt.error.indexing.timestamp=yyyy.MM
53 | bolt.error.indexing.documentname=bro_error
54 | bolt.error.indexing.bulk=1
55 | bolt.error.indexing.adapter=com.opensoc.indexing.adapters.ESTimedRotatingAdapter
56 |
57 | #HDFS Bolt
58 | bolt.hdfs.batch.size=5000
59 | bolt.hdfs.field.delimiter=|
60 | bolt.hdfs.file.rotation.size.in.mb=5
61 | bolt.hdfs.file.system.url=hdfs://node1:9000
62 | bolt.hdfs.wip.file.path=/bro/wip
63 | bolt.hdfs.finished.file.path=/bro/rotated
64 | bolt.hdfs.compression.codec.class=org.apache.hadoop.io.compress.SnappyCodec
--------------------------------------------------------------------------------
/resources/zookeeper/log4j.properties:
--------------------------------------------------------------------------------
1 | # Define some default values that can be overridden by system properties
2 | zookeeper.root.logger=INFO, CONSOLE, ROLLINGFILE
3 | zookeeper.console.threshold=INFO
4 | zookeeper.log.dir=/var/log/zookeeper
5 | zookeeper.log.file=zookeeper.log
6 | zookeeper.log.threshold=DEBUG
7 | zookeeper.tracelog.dir=/var/log/zookeeper
8 | zookeeper.tracelog.file=zookeeper_trace.log
9 |
10 | #
11 | # ZooKeeper Logging Configuration
12 | #
13 |
14 | # Format is " (, )+
15 |
16 | # DEFAULT: console appender only
17 | log4j.rootLogger=${zookeeper.root.logger}
18 |
19 | # Example with rolling log file
20 | #log4j.rootLogger=DEBUG, CONSOLE, ROLLINGFILE
21 |
22 | # Example with rolling log file and tracing
23 | #log4j.rootLogger=TRACE, CONSOLE, ROLLINGFILE, TRACEFILE
24 |
25 | #
26 | # Log INFO level and above messages to the console
27 | #
28 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
29 | log4j.appender.CONSOLE.Threshold=${zookeeper.console.threshold}
30 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
31 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n
32 |
33 | #
34 | # Add ROLLINGFILE to rootLogger to get log file output
35 | # Log DEBUG level and above messages to a log file
36 | log4j.appender.ROLLINGFILE=org.apache.log4j.RollingFileAppender
37 | log4j.appender.ROLLINGFILE.Threshold=${zookeeper.log.threshold}
38 | log4j.appender.ROLLINGFILE.File=${zookeeper.log.dir}/${zookeeper.log.file}
39 |
40 | # Max log file size of 10MB
41 | log4j.appender.ROLLINGFILE.MaxFileSize=10MB
42 | # uncomment the next line to limit number of backup files
43 | #log4j.appender.ROLLINGFILE.MaxBackupIndex=10
44 |
45 | log4j.appender.ROLLINGFILE.layout=org.apache.log4j.PatternLayout
46 | log4j.appender.ROLLINGFILE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n
47 |
48 |
49 | #
50 | # Add TRACEFILE to rootLogger to get log file output
51 | # Log DEBUG level and above messages to a log file
52 | log4j.appender.TRACEFILE=org.apache.log4j.FileAppender
53 | log4j.appender.TRACEFILE.Threshold=TRACE
54 | log4j.appender.TRACEFILE.File=${zookeeper.tracelog.dir}/${zookeeper.tracelog.file}
55 |
56 | log4j.appender.TRACEFILE.layout=org.apache.log4j.PatternLayout
57 | ### Notice we are including log4j's NDC here (%x)
58 | log4j.appender.TRACEFILE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L][%x] - %m%n
--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/pcap/topology.conf:
--------------------------------------------------------------------------------
1 | include = ../../etc/env/environment_common.conf
2 | include = ../../etc/env/es_connection.conf
3 | include = ../../etc/env/hdfs_connection.conf
4 | include = ../../etc/env/mysql_connection.conf
5 | include = metrics.conf
6 | include = features_enabled.conf
7 |
8 | #Global Properties
9 |
10 | debug.mode=true
11 | local.mode=true
12 | num.workers=1
13 | num.ackers=1
14 |
15 | #Standard 5-tuple fields
16 |
17 | source.ip=ip_src_addr
18 | source.port=ip_src_port
19 | dest.ip=ip_dst_addr
20 | dest.port=ip_dst_port
21 | protocol=protocol
22 |
23 | #Kafka Spout
24 | spout.kafka.buffer.size.bytes=1024000
25 | spout.kafka.consumer.id=pcap.kafka
26 | spout.kafka.fetch.size.bytes=1024
27 | spout.kafka.forcefromstart=false
28 | spout.kafka.socket.timeout.ms=600000
29 | spout.kafka.start.offset.time=-1
30 | spout.kafka.zk.root=/storm/topology/pcap/kafka
31 | spout.kafka.topic=pcap_raw
32 |
33 | #Parser Bolt
34 | bolt.parser.enabled=true
35 | bolt.parser.num.of.key.chars.to.use.for.shuffle.grouping=6
36 | bolt.parser.ts.precision=MICRO
37 |
38 | #Test Spout
39 | spout.test.parallelism.repeat=false
40 |
41 | #Kafka Spout
42 | spout.kafka.topic=pcap_raw
43 |
44 | #Indexing Bolt
45 | bolt.indexing.indexname=pcap
46 | bolt.indexing.timestamp=yyyy.MM.dd.HH
47 | bolt.indexing.documentname=pcap_doc
48 | bolt.indexing.bulk=1
49 | bolt.indexing.adapter=com.opensoc.indexing.adapters.ESTimedRotatingAdapter
50 |
51 | #Error Indexing Bolt
52 | bolt.error.indexing.indexname=error
53 | bolt.error.indexing.timestamp=yyyy.MM
54 | bolt.error.indexing.documentname=pcap_error
55 | bolt.error.indexing.bulk=1
56 | bolt.error.indexing.adapter=com.opensoc.indexing.adapters.ESTimedRotatingAdapter
57 |
58 | #HDFS Bolt
59 | bolt.hdfs.batch.size=5000
60 | bolt.hdfs.field.delimiter=|
61 | bolt.hdfs.file.rotation.size.in.mb=5
62 | bolt.hdfs.file.system.url=hdfs://node1:9000
63 | bolt.hdfs.wip.file.path=/pcap/wip
64 | bolt.hdfs.finished.file.path=/pcap/rotated
65 | bolt.hdfs.compression.codec.class=org.apache.hadoop.io.compress.SnappyCodec
66 |
67 | #HBase Bolt
68 | bolt.hbase.table.name=pcap
69 | ## Define the hbase table columns in the form :,,|:,|.......
70 | bolt.hbase.table.fields=t:pcap
71 | bolt.hbase.table.key.tuple.field.name=pcap_id
72 | bolt.hbase.table.timestamp.tuple.field.name=timestamp
73 | bolt.hbase.enable.batching=false
74 | bolt.hbase.write.buffer.size.in.bytes=2000000
75 | bolt.hbase.durability=SKIP_WAL
76 | bolt.hbase.partitioner.region.info.refresh.interval.mins=60
77 |
78 |
--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/sourcefire/topology.conf:
--------------------------------------------------------------------------------
1 | include = ../../etc/env/environment_common.conf
2 | include = ../../etc/env/es_connection.conf
3 | include = ../../etc/env/hdfs_connection.conf
4 | include = ../../etc/env/mysql_connection.conf
5 | include = metrics.conf
6 | include = features_enabled.conf
7 |
8 | #Global Properties
9 |
10 | debug.mode=true
11 | local.mode=true
12 | num.workers=1
13 | num.ackers=1
14 | #Standard 5-tuple fields
15 |
16 | source.ip=ip_src_addr
17 | source.port=ip_src_port
18 | dest.ip=ip_dst_addr
19 | dest.port=ip_dst_port
20 | protocol=protocol
21 |
22 | #Test Spout
23 | spout.test.parallelism.repeat=false
24 |
25 | #Kafka Spout
26 | spout.kafka.topic=sourcefire_raw
27 |
28 | #Parser Bolt
29 | bolt.parser.adapter=com.opensoc.parsing.parsers.BasicSourcefireParser
30 |
31 | #GeoEnrichment
32 |
33 | bolt.enrichment.geo.enrichment_tag=geo
34 | bolt.enrichment.geo.adapter.table=GEO
35 | bolt.enrichment.geo.MAX_CACHE_SIZE_OBJECTS_NUM=100
36 | bolt.enrichment.geo.MAX_TIME_RETAIN_MINUTES=10
37 | bolt.enrichment.geo.fields=ip_src_addr,ip_dst_addr
38 |
39 | #Indexing Bolt
40 | bolt.indexing.indexname=sourcefire_index
41 | bolt.indexing.timestamp=yyyy.MM.dd
42 | bolt.indexing.documentname=sourcefire_doc
43 | bolt.indexing.bulk=1
44 | bolt.indexing.adapter=com.opensoc.indexing.adapters.ESTimedRotatingAdapter
45 |
46 | #Alerts Indexing Bolt
47 | bolt.alerts.indexing.indexname=alert
48 | bolt.alerts.indexing.timestamp=yyyy.MM.dd
49 | bolt.alerts.indexing.documentname=sourcefire_alert
50 | bolt.alerts.indexing.bulk=1
51 | bolt.alerts.indexing.adapter=com.opensoc.indexing.adapters.ESTimedRotatingAdapter
52 |
53 | #Error Indexing Bolt
54 | bolt.error.indexing.indexname=error
55 | bolt.error.indexing.timestamp=yyyy.MM
56 | bolt.error.indexing.documentname=sourcefire_error
57 | bolt.error.indexing.bulk=1
58 | bolt.error.indexing.adapter=com.opensoc.indexing.adapters.ESTimedRotatingAdapter
59 |
60 | #Alerts Bolt
61 | bolt.alerts.adapter=com.opensoc.alerts.adapters.AllAlertAdapter
62 | com.opensoc.alerts.adapters.AllAlertAdapter.whitelist_table_name = ip_whitelist
63 | com.opensoc.alerts.adapters.AllAlertAdapter.blacklist_table_name = ip_blacklist
64 | com.opensoc.alerts.adapters.AllAlertAdapter.quorum=node2,node3,node4
65 | com.opensoc.alerts.adapters.AllAlertAdapter.port=2181
66 | com.opensoc.alerts.adapters.AllAlertAdapter._MAX_CACHE_SIZE_OBJECTS_NUM=25
67 | com.opensoc.alerts.adapters.AllAlertAdapter._MAX_TIME_RETAIN_MINUTES=10
68 |
69 | #HDFS Bolt
70 | bolt.hdfs.batch.size=5000
71 | bolt.hdfs.field.delimiter=|
72 | bolt.hdfs.file.rotation.size.in.mb=5
73 | bolt.hdfs.file.system.url=hdfs://node1:9000
74 | bolt.hdfs.wip.file.path=/sourcefire/wip
75 | bolt.hdfs.finished.file.path=/sourcefire/rotated
76 | bolt.hdfs.compression.codec.class=org.apache.hadoop.io.compress.SnappyCodec
77 |
78 | #Kafka Bolt
79 | bolt.kafka.topic=sourcefire_enriched
80 |
--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/bro/features_enabled.conf:
--------------------------------------------------------------------------------
1 | #Enable and disable features for each topology
2 |
3 | #Feature: Test spout
4 | ##Feature Description: Reads telemetry from file and ingests it into topology. Used for testing or bulk loading the topology
5 |
6 | spout.test.name=TestSpout
7 | spout.test.enabled=false
8 | spout.test.num.tasks=1
9 | spout.test.parallelism.hint=1
10 |
11 | #Feature: Kafka spout
12 | ##Feature Description: Acts as a Kafka consumer. Takes messages from a Kafka topic and ingests them into a topology
13 |
14 | spout.kafka.name=KafkaSpout
15 | spout.kafka.enabled=true
16 | spout.kafka.num.tasks=1
17 | spout.kafka.parallelism.hint=1
18 |
19 | #Feature: Parser Bolt
20 | ##Feature Description: Parses telemetry from its native format into a native JSON
21 |
22 | parser.bolt.name=ParserBolt
23 | bolt.parser.name=ParserBolt
24 | bolt.parser.enabled=true
25 | bolt.parser.num.tasks=1
26 | bolt.parser.parallelism.hint=1
27 |
28 | #Feature: Host Enrichment
29 | ##Feature Description: Appends information about known hosts to a telemetry message
30 |
31 | bolt.enrichment.host.name=HostEnrichment
32 | bolt.enrichment.host.enabled=false
33 | bolt.enrichment.host.num.tasks=1
34 | bolt.enrichment.host.parallelism.hint=1
35 |
36 | #Feature: Geo Enrichment
37 | ##Feature Description: Appends geo information about known non-local IPs to a telemetry message
38 |
39 | bolt.enrichment.geo.name=GeoEnrichment
40 | bolt.enrichment.geo.enabled=true
41 | bolt.enrichment.geo.num.tasks=1
42 | bolt.enrichment.geo.parallelism.hint=1
43 |
44 | #Feature: Whois Enrichment
45 | ##Feature Description: Appends whois information about known domains to a telemetry message
46 |
47 | bolt.enrichment.whois.name=WhoisEnrichment
48 | bolt.enrichment.whois.enabled=false
49 | bolt.enrichment.whois.num.tasks=1
50 | bolt.enrichment.whois.parallelism.hint=1
51 |
52 | #Feature: CIF Enrichment
53 | ##Feature Description: Appends information from CIF threat intelligence feeds to a telemetry message
54 |
55 | bolt.enrichment.cif.name=CIFBolt
56 | bolt.enrichment.cif.enabled=false
57 | bolt.enrichment.cif.num.tasks=1
58 | bolt.enrichment.cif.parallelism.hint=1
59 |
60 | #Feature: Threat Enrichment
61 | ##Feature Description: Appends information from Threat intelligence feeds to a telemetry message
62 |
63 | bolt.enrichment.threat.name=ThreatBolt
64 | bolt.enrichment.threat.enabled=false
65 | bolt.enrichment.threat.num.tasks=1
66 | bolt.enrichment.threat.parallelism.hint=1
67 |
68 | #Feature: Rules-Based Alerts
69 | ##Feature Description: Tags messages with rules-based alerts
70 |
71 | bolt.alerts.name=Alerts
72 | bolt.alerts.enabled=false
73 | bolt.alerts.num.tasks=1
74 | bolt.alerts.parallelism.hint=1
75 |
76 | #Feature: Indexer
77 | ##Feature Description: Indexes telemetry messages in ElasticSearch or Solr
78 |
79 | bolt.indexing.name=IndexBolt
80 | bolt.indexing.enabled=true
81 | bolt.indexing.num.tasks=1
82 | bolt.indexing.parallelism.hint=1
83 |
84 | #Feature: Alerts Indexer
85 | ##Feature Description: Indexes alert messages in ElasticSearch or Solr
86 |
87 | bolt.alerts.indexing.name=AlertIndexBolt
88 | bolt.alerts.indexing.enabled=false
89 | bolt.alerts.indexing.num.tasks=1
90 | bolt.alerts.indexing.parallelism.hint=1
91 |
92 | #Feature: Error Indexer
93 | ##Feature Description: Indexes error messages in ElasticSearch or Solr
94 |
95 | bolt.error.indexing.name=ErrorIndexBolt
96 | bolt.error.indexing.enabled=true
97 | bolt.error.indexing.num.tasks=1
98 | bolt.error.indexing.parallelism.hint=1
99 |
100 | #Feature: Kafka Bolt
101 | ##Feature Description: Writes telemetry messages back into a Kafka topic
102 |
103 | bolt.kafka.name=KafkaBolt
104 | bolt.kafka.enabled=false
105 | bolt.kafka.num.tasks=1
106 | bolt.kafka.parallelism.hint=1
107 |
108 | #Feature: HDFS Bolt
109 | ##Feature Description: Writes telemetry messages into HDFS
110 |
111 | bolt.hdfs.name=HDFSBolt
112 | bolt.hdfs.enabled=false
113 | bolt.hdfs.num.tasks=1
114 | bolt.hdfs.parallelism.hint=1
115 |
--------------------------------------------------------------------------------
/resources/opensoc/config/topologies/sourcefire/features_enabled.conf:
--------------------------------------------------------------------------------
1 | #Enable and disable features for each topology
2 |
3 | #Feature: Test spout
4 | ##Feature Description: Reads telemetry from file and ingests it into topology. Used for testing or bulk loading the topology
5 |
6 | spout.test.name=TestSpout
7 | spout.test.enabled=false
8 | spout.test.num.tasks=1
9 | spout.test.parallelism.hint=1
10 |
11 | #Feature: Kafka spout
12 | ##Feature Description: Acts as a Kafka consumer. Takes messages from a Kafka topic and ingests them into a topology
13 |
14 | spout.kafka.name=KafkaSpout
15 | spout.kafka.enabled=true
16 | spout.kafka.num.tasks=1
17 | spout.kafka.parallelism.hint=1
18 |
19 | #Feature: Parser Bolt
20 | ##Feature Description: Parses telemetry from its native format into a native JSON
21 |
22 | parser.bolt.name=ParserBolt
23 | bolt.parser.name=ParserBolt
24 | bolt.parser.enabled=true
25 | bolt.parser.num.tasks=1
26 | bolt.parser.parallelism.hint=1
27 |
28 | #Feature: Host Enrichment
29 | ##Feature Description: Appends information about known hosts to a telemetry message
30 |
31 | bolt.enrichment.host.name=HostEnrichment
32 | bolt.enrichment.host.enabled=false
33 | bolt.enrichment.host.num.tasks=1
34 | bolt.enrichment.host.parallelism.hint=1
35 |
36 | #Feature: Geo Enrichment
37 | ##Feature Description: Appends geo information about known non-local IPs to a telemetry message
38 |
39 | bolt.enrichment.geo.name=GeoEnrichment
40 | bolt.enrichment.geo.enabled=true
41 | bolt.enrichment.geo.num.tasks=1
42 | bolt.enrichment.geo.parallelism.hint=1
43 |
44 | #Feature: Whois Enrichment
45 | ##Feature Description: Appends whois information about known domains to a telemetry message
46 |
47 | bolt.enrichment.whois.name=WhoisEnrichment
48 | bolt.enrichment.whois.enabled=false
49 | bolt.enrichment.whois.num.tasks=1
50 | bolt.enrichment.whois.parallelism.hint=1
51 |
52 | #Feature: CIF Enrichment
53 | ##Feature Description: Appends information from CIF threat intelligence feeds to a telemetry message
54 |
55 | bolt.enrichment.cif.name=CIFBolt
56 | bolt.enrichment.cif.enabled=false
57 | bolt.enrichment.cif.num.tasks=1
58 | bolt.enrichment.cif.parallelism.hint=1
59 |
60 | #Feature: Threat Enrichment
61 | ##Feature Description: Appends information from Threat intelligence feeds to a telemetry message
62 |
63 | bolt.enrichment.threat.name=ThreatBolt
64 | bolt.enrichment.threat.enabled=false
65 | bolt.enrichment.threat.num.tasks=1
66 | bolt.enrichment.threat.parallelism.hint=1
67 |
68 | #Feature: Rules-Based Alerts
69 | ##Feature Description: Tags messages with rules-based alerts
70 |
71 | bolt.alerts.name=Alerts
72 | bolt.alerts.enabled=true
73 | bolt.alerts.num.tasks=1
74 | bolt.alerts.parallelism.hint=1
75 |
76 | #Feature: Indexer
77 | ##Feature Description: Indexes telemetry messages in ElasticSearch or Solr
78 |
79 | bolt.indexing.name=IndexBolt
80 | bolt.indexing.enabled=true
81 | bolt.indexing.num.tasks=1
82 | bolt.indexing.parallelism.hint=1
83 |
84 | #Feature: Alerts Indexer
85 | ##Feature Description: Indexes alert messages in ElasticSearch or Solr
86 |
87 | bolt.alerts.indexing.name=AlertIndexBolt
88 | bolt.alerts.indexing.enabled=true
89 | bolt.alerts.indexing.num.tasks=1
90 | bolt.alerts.indexing.parallelism.hint=1
91 |
92 | #Feature: Error Indexer
93 | ##Feature Description: Indexes error messages in ElasticSearch or Solr
94 |
95 | bolt.error.indexing.name=ErrorIndexBolt
96 | bolt.error.indexing.enabled=true
97 | bolt.error.indexing.num.tasks=1
98 | bolt.error.indexing.parallelism.hint=1
99 |
100 | #Feature: Kafka Bolt
101 | ##Feature Description: Writes telemetry messages back into a Kafka topic
102 |
103 | bolt.kafka.name=KafkaBolt
104 | bolt.kafka.enabled=false
105 | bolt.kafka.num.tasks=1
106 | bolt.kafka.parallelism.hint=1
107 |
108 | #Feature: HDFS Bolt
109 | ##Feature Description: Writes telemetry messages into HDFS
110 |
111 | bolt.hdfs.name=HDFSBolt
112 | bolt.hdfs.enabled=false
113 | bolt.hdfs.num.tasks=1
114 | bolt.hdfs.parallelism.hint=1
115 |
--------------------------------------------------------------------------------
/Vagrantfile:
--------------------------------------------------------------------------------
1 | Vagrant.require_version ">= 1.4.3"
2 | VAGRANTFILE_API_VERSION = "2"
3 |
4 | Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
5 | numNodes = 4
6 | r = numNodes..1
7 | (r.first).downto(r.last).each do |i|
8 | config.vm.define "node#{i}" do |node|
9 | node.vm.box = "chef/centos-6.5"
10 | node.vm.provider "virtualbox" do |v|
11 | v.name = "node#{i}"
12 | v.customize ["modifyvm", :id, "--memory", "1024"]
13 | end
14 | node.vm.network :private_network, ip: "10.0.0.10#{i}"
15 |
16 | # base setup
17 | node.vm.hostname = "node#{i}"
18 |
19 | node.vm.provision "shell" do |s|
20 | s.path = "scripts/setup-os.sh"
21 | s.args = "-t #{numNodes}"
22 | end
23 |
24 | node.vm.provision "shell", path: "scripts/setup-java.sh"
25 |
26 | if i == 1
27 | # namenode
28 | node.vm.provision "shell" do |s|
29 | s.path = "scripts/setup-hadoop.sh"
30 | s.args = "-r namenode -t #{numNodes}"
31 | end
32 | node.vm.network "forwarded_port", guest: 50070, host: 50070
33 | node.vm.network "forwarded_port", guest: 8088, host:8088
34 |
35 | # storm nimbus
36 | node.vm.provision "shell" do |s|
37 | s.path = "scripts/setup-storm.sh"
38 | s.args = "-r nimbus -t #{numNodes}"
39 | end
40 | node.vm.network "forwarded_port", guest: 8080, host: 8080
41 |
42 | # hbase master
43 | node.vm.provision "shell" do |s|
44 | s.path = "scripts/setup-hbase.sh"
45 | s.args = "-r master -t #{numNodes}"
46 | end
47 | node.vm.network "forwarded_port", guest: 60010, host: 60010
48 |
49 | # hive
50 | node.vm.provision "shell" do |s|
51 | s.path = "scripts/setup-hive.sh"
52 | end
53 |
54 | node.vm.provision "shell" do |s|
55 | s.path = "scripts/setup-elasticsearch.sh"
56 | s.args = "-c -i 10.0.0.10#{i}"
57 | end
58 | node.vm.network "forwarded_port", guest: 9200, host:9200
59 |
60 | # setup mysql for geo enrichment
61 | node.vm.provision "shell", path: "scripts/setup-geo-enrichment.sh"
62 | else
63 | # zookeeper
64 | node.vm.provision "shell" do |s|
65 | s.path = "scripts/setup-zookeeper.sh"
66 | s.args = "-t #{numNodes}"
67 | end
68 | # datanode
69 | node.vm.provision "shell" do |s|
70 | s.path = "scripts/setup-hadoop.sh"
71 | s.args = "-r datanode -t #{numNodes}"
72 | end
73 | # hbase regionserver
74 | node.vm.provision "shell" do |s|
75 | s.path = "scripts/setup-hbase.sh"
76 | s.args = "-r regionserver -t #{numNodes}"
77 | end
78 | # kafka broker
79 | node.vm.provision "shell" do |s|
80 | s.path = "scripts/setup-kafka.sh"
81 | s.args = "-t #{numNodes}"
82 | end
83 | # storm supervisor
84 | node.vm.provision "shell" do |s|
85 | s.path = "scripts/setup-storm.sh"
86 | s.args = "-r supervisor -t #{numNodes}"
87 | end
88 | # elasticsearch
89 | node.vm.provision "shell" do |s|
90 | s.path = "scripts/setup-elasticsearch.sh"
91 | s.args = "-i 10.0.0.10#{i}"
92 | end
93 | # reload supervisord
94 | end
95 |
96 | #After everything is provisioned, start Supervisor
97 | node.vm.provision "shell", inline: "pgrep supervisord || start supervisor"
98 | end
99 | end
100 | end
101 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # OpenSOC Vagrant
2 |
3 | A collection of shell scripts and a Vagrant file for building an OpenSOC cluster. There are two primary goals we hope to solve with this project:
4 |
5 | * Create a turnkey OpenSOC cluster to allow users to play with OpenSOC with minimal setup
6 | * Provide a disposable environment where developers can run and test OpenSOC topologies.
7 |
8 | To accomplish this, we have provided a collection of bash scripts that are orchestrated using [Vagrant](https://www.vagrantup.com/) and [Fabric](http://www.fabfile.org/). Both of these tools should be installed prior to using this project.
9 |
10 | ## Inspiration
11 |
12 | Credit to https://github.com/vangj/vagrant-hadoop-2.4.1-spark-1.0.1 for the inspiration for this. This project is heavily influenced by that one.
13 |
14 | ## Quick Start
15 |
16 | If you don't want to bother with the details of the cluster, and just want to see OpenSOC, place a RPM For Oracle's JVM in `resources/` and edit `common.sh` to set `JRE_RPM` to the name of the RPM. Then run:
17 |
18 | ```
19 | vagrant up
20 | fab vagrant quickstart
21 | ```
22 |
23 | Finally, point your browser at https://localhost:8443
24 |
25 | This should get you a running OpenSOC cluster with Bro, Snort, and PCAP. If you are looking to customize the setup or run your own topologies, see the secions below on running the cluster and running an OpenSOC Topology.
26 |
27 | ## Advanced Setup
28 |
29 | If you are interested in tweaking the underlying cluster, running your own OpenSOC topology, or just want to understand how it all works, this section will break down how the cluster is started, and now topoogies can be run.
30 |
31 | ## Running the cluster
32 |
33 | To get the cluster up and running, do the following:
34 |
35 | * Place an RPM for Oracle's JVM in `resources/` and edit `common.sh` to set `JRE_RPM` to the name of the RPM
36 | * Run `vagrant up`
37 | * Run `fab vagrant postsetup`
38 |
39 | The `vagrant up` command will build the VMs for the cluster, and install all dependencies which include:
40 |
41 | * Hadoop 2.6
42 | * Hbase 0.98
43 | * Kafka 0.8.1.1
44 | * Zookeeper 3.4.6
45 | * Hive 1.2.0
46 | * Elasticsearch 1.5.2
47 | * Storm 0.9.4
48 |
49 | After this, the `fab vagrant postsetup` command will run a handful of tasks that need to occur after the cluster is running, but before it can be used. These are:
50 |
51 | * Formatting HDFS
52 | * Starting Hadoop cluster
53 | * Starting HBase cluster
54 | * Setup Hbase whitelist table with RFC1918 addresses
55 |
56 | ## Running an OpenSOC Topology
57 |
58 | After provisioning the cluster as described above, you can use some more fabric tasks to run a topology. Before you start, you should have the following:
59 |
60 | * opensoc-streaming repo cloned locally
61 | * a copy of OpenSOC configs in resources/opensoc/OpenSOC_Configs
62 |
63 | Then you can run `fab vagrant start_topology:` which will do the following:
64 |
65 | * cd into the opensoc-streaming repo, and run `mvn clean package`
66 | * copy the newly built OpenSOC-Topologies.jar to resources/opensoc, where it will be avilable to the VMs
67 | * Submit `` and the topology jar to Nimbus
68 |
69 | If your topology is pulling data from Kafka, you can create a topic with the fabric task `fab vagrant create_topic:`
70 |
71 | ## Virtual Machines
72 |
73 | By default, 4 VMs will be created. They are named node1, node2, node3, and node4. Here is a breakdown of what services run where:
74 |
75 | * node1
76 | * HDFS Namenode
77 | * Yarn Resourcemanager
78 | * Storm Nimbus and UI
79 | * HBase Master
80 | * Elasticsearch Master
81 | * MySql (Hive metastore and geo enrichment store)
82 |
83 | * node2-4
84 | * Kafka Broker
85 | * Zookeeper
86 | * HDFS Datanode
87 | * YARN Nodemanager
88 | * Storm Supervisor
89 | * HBase Regionserver
90 | * Elasticsearch Data Nodes
91 |
92 | ## Port Forwarding
93 |
94 | Some service's UIs are forwarded to localhost for ease of use. You can find the following services forwarded by default:
95 |
96 | * HDFS - localhost:50070 -> node1:50070
97 | * Hbase - localhost:60010 -> node1:60010
98 | * Storm UI - localhost:8080 -> node1:8080
99 | * Elasticsearch - localhost:9200 -> node1:9200
100 | * OpenSOC-UI - localhost:8443 -> node1:443
101 |
102 | ## Progress
103 |
104 | Here is a list of what will be provisioned via vagrant and its current status:
105 |
106 | * Java - DONE
107 | * Zookeeper - DONE
108 | * HDFS/Yarn - DONE
109 | * Kafka - DONE
110 | * Storm - DONE
111 | * Hbase - DONE
112 | * Hive - DONE
113 | * Elasticsearch - DONE
114 | * GeoIP Enrichment Data - DONE
115 | * OpenSOC UI
116 | * OpenSOC Storm Topologies
117 |
118 |
--------------------------------------------------------------------------------
/resources/kafka/server.properties:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one or more
2 | # contributor license agreements. See the NOTICE file distributed with
3 | # this work for additional information regarding copyright ownership.
4 | # The ASF licenses this file to You under the Apache License, Version 2.0
5 | # (the "License"); you may not use this file except in compliance with
6 | # the License. You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # see kafka.server.KafkaConfig for additional details and defaults
16 |
17 | ############################# Server Basics #############################
18 |
19 | # The id of the broker. This must be set to a unique integer for each broker.
20 | #broker.id=0
21 |
22 | ############################# Socket Server Settings #############################
23 |
24 | # The port the socket server listens on
25 | port=9092
26 |
27 | # Hostname the broker will bind to. If not set, the server will bind to all interfaces
28 | #host.name=localhost
29 |
30 | # Hostname the broker will advertise to producers and consumers. If not set, it uses the
31 | # value for "host.name" if configured. Otherwise, it will use the value returned from
32 | # java.net.InetAddress.getCanonicalHostName().
33 | #advertised.host.name=
34 |
35 | # The port to publish to ZooKeeper for clients to use. If this is not set,
36 | # it will publish the same port that the broker binds to.
37 | #advertised.port=
38 |
39 | # The number of threads handling network requests
40 | num.network.threads=2
41 |
42 | # The number of threads doing disk I/O
43 | num.io.threads=8
44 |
45 | # The send buffer (SO_SNDBUF) used by the socket server
46 | socket.send.buffer.bytes=1048576
47 |
48 | # The receive buffer (SO_RCVBUF) used by the socket server
49 | socket.receive.buffer.bytes=1048576
50 |
51 | # The maximum size of a request that the socket server will accept (protection against OOM)
52 | socket.request.max.bytes=104857600
53 |
54 |
55 | ############################# Log Basics #############################
56 |
57 | # A comma seperated list of directories under which to store log files
58 | log.dirs=/var/lib/kafka-logs
59 |
60 | # The default number of log partitions per topic. More partitions allow greater
61 | # parallelism for consumption, but this will also result in more files across
62 | # the brokers.
63 | num.partitions=1
64 |
65 | ############################# Log Flush Policy #############################
66 |
67 | # Messages are immediately written to the filesystem but by default we only fsync() to sync
68 | # the OS cache lazily. The following configurations control the flush of data to disk.
69 | # There are a few important trade-offs here:
70 | # 1. Durability: Unflushed data may be lost if you are not using replication.
71 | # 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush.
72 | # 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks.
73 | # The settings below allow one to configure the flush policy to flush data after a period of time or
74 | # every N messages (or both). This can be done globally and overridden on a per-topic basis.
75 |
76 | # The number of messages to accept before forcing a flush of data to disk
77 | #log.flush.interval.messages=10000
78 |
79 | # The maximum amount of time a message can sit in a log before we force a flush
80 | #log.flush.interval.ms=1000
81 |
82 | ############################# Log Retention Policy #############################
83 |
84 | # The following configurations control the disposal of log segments. The policy can
85 | # be set to delete segments after a period of time, or after a given size has accumulated.
86 | # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
87 | # from the end of the log.
88 |
89 | # The minimum age of a log file to be eligible for deletion
90 | log.retention.hours=168
91 |
92 | # A size-based retention policy for logs. Segments are pruned from the log as long as the remaining
93 | # segments don't drop below log.retention.bytes.
94 | #log.retention.bytes=1073741824
95 |
96 | # The maximum size of a log segment file. When this size is reached a new log segment will be created.
97 | log.segment.bytes=536870912
98 |
99 | # The interval at which log segments are checked to see if they can be deleted according
100 | # to the retention policies
101 | log.retention.check.interval.ms=60000
102 |
103 | # By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires.
104 | # If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction.
105 | log.cleaner.enable=false
106 |
107 | ############################# Zookeeper #############################
108 |
109 | # Zookeeper connection string (see zookeeper docs for details).
110 | # This is a comma separated host:port pairs, each corresponding to a zk
111 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
112 | # You can also append an optional chroot string to the urls to specify the
113 | # root directory for all kafka znodes.
114 | #zookeeper.connect=localhost:2181
115 |
116 | # Timeout in ms for connecting to zookeeper
117 | zookeeper.connection.timeout.ms=1000000
118 |
--------------------------------------------------------------------------------
/fabfile.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | import os
3 | import xml.etree.ElementTree as ETree
4 |
5 | from fabric.api import env, local, run, sudo, execute, hosts
6 | from fabric.context_managers import shell_env, lcd, cd
7 | from fabric.colors import yellow, green
8 |
9 | # configure fabric to talk to the VMs
10 | temp_ssh_config = '.ssh_config'
11 |
12 | def vagrant():
13 | '''sets up fabric environment to work with vagrant VMs'''
14 | with open(temp_ssh_config, 'w') as f:
15 | f.write(local('vagrant ssh-config', capture=True))
16 |
17 | global total_nodes
18 | total_nodes = int(local('vagrant status | grep node | wc -l', capture=True))
19 |
20 | env.user = 'vagrant'
21 | env.use_ssh_config = True
22 | env.ssh_config_path = temp_ssh_config
23 |
24 | @hosts('node1')
25 | def format_namenode():
26 | '''Formats namenode on node1'''
27 | with shell_env(JAVA_HOME='/usr/java/default'):
28 | sudo('/opt/hadoop/bin/hdfs namenode -format vagrant -nonInteractive', warn_only=True)
29 |
30 |
31 | def supervisorctl_start(process):
32 | '''Start a process managed by supervisor'''
33 | sudo('supervisorctl start {0}'.format(process))
34 |
35 | def supervisorctl_stop(process):
36 | '''Stop a process managed by supervisor'''
37 | sudo('supervisorctl stop {0}'.format(process))
38 |
39 |
40 | def postsetup():
41 | '''Perform post vagrant up tasks on cluster'''
42 | execute(format_namenode)
43 | execute(supervisorctl_start, 'namenode', host='node1')
44 | execute(supervisorctl_start, 'resourcemanager', host='node1')
45 | execute(supervisorctl_start, 'master', host='node1')
46 | for x in range(2,total_nodes+1):
47 | execute(supervisorctl_start, 'datanode', host='node{0}'.format(x))
48 | execute(supervisorctl_start, 'nodemanager', host='node{0}'.format(x))
49 | execute(supervisorctl_start, 'regionserver', host='node{0}'.format(x))
50 |
51 | execute(init_ip_whitelist,host='node1')
52 |
53 | def supervisorctl_reread_update():
54 | sudo('supervisorctl reread')
55 | sudo('supervisorctl update')
56 |
57 | def update_supervisor():
58 | execute(supervisorctl_reread_update, hosts=['node{0}'.format(x) for x in range(1,total_nodes+1)])
59 |
60 | def supervisorctl_status():
61 | sudo('supervisorctl status')
62 |
63 | def status():
64 | execute(supervisorctl_status, hosts=['node{0}'.format(x) for x in range(1,total_nodes+1)])
65 |
66 | def init_ip_whitelist():
67 | run('/opt/hbase/bin/hbase shell /vagrant/resources/opensoc/hbase_ip_whitelist.rb')
68 |
69 |
70 | @hosts('node2')
71 | def create_topic(topic, partitions=1, replication_factor=1):
72 | run('/opt/kafka/bin/kafka-topics.sh --zookeeper localhost --create --topic {0} --partitions {1} --replication-factor {2}'.format(
73 | topic,
74 | partitions,
75 | replication_factor
76 | ))
77 |
78 | def get_topologies(repo='../opensoc-streaming'):
79 | '''Build and fetch a new OpenSOC topology jar from repo (default: ../opensoc-streaming)'''
80 |
81 | pom_file = os.path.join(repo, 'pom.xml')
82 | pom = ETree.parse(pom_file)
83 | version = pom.getroot().find('{http://maven.apache.org/POM/4.0.0}version').text
84 | rev = local("git log | head -1 | cut -d ' ' -f 2 | cut -c1-11", capture=True)
85 |
86 | topology_jar = os.path.join(
87 | repo,
88 | 'OpenSOC-Topologies',
89 | 'target',
90 | 'OpenSOC-Topologies-{0}.jar'.format(version)
91 | )
92 |
93 | vagrant_jar = 'OpenSOC-Topologies-{0}-{1}.jar'.format(version, rev)
94 | vagrant_jar_path = os.path.join('resources/opensoc', vagrant_jar)
95 |
96 | if os.path.exists(vagrant_jar_path):
97 | print yellow('{0} already exists. Not building a new jar.'.format(vagrant_jar_path))
98 | print yellow('Remove the existing jar and run this command again to build a fresh jar.')
99 | return vagrant_jar
100 |
101 | with lcd(repo):
102 | local('mvn clean package')
103 |
104 | local('cp {0} {1}'.format(
105 | topology_jar,
106 | vagrant_jar_path
107 | ))
108 |
109 | return vagrant_jar
110 |
111 | @hosts('node1')
112 | def start_topology(topology, repo=None, local_mode=False, config_path='/vagrant/opensoc/OpenSOC_Configs/', generator_spout=False):
113 | '''Builds and copies a fresh topology jar from a locally cloned opensoc-streaming and submits it to storm'''
114 |
115 | if repo is not None:
116 | jar = get_topologies(repo)
117 | else:
118 | jar = get_topologies()
119 |
120 | if local_mode:
121 | local_mode='true'
122 | else:
123 | local_mode='false'
124 |
125 | if generator_spout:
126 | generator_spout='true'
127 | else:
128 | generator_spout='false'
129 |
130 | with cd('/vagrant/resources/opensoc/'):
131 | run('/opt/storm/bin/storm jar {0} {1} -local_mode {2} -config_path {3} -generator_spout {4}'.format(
132 | jar,
133 | topology,
134 | local_mode,
135 | config_path,
136 | generator_spout
137 | ))
138 |
139 | def quickstart():
140 | '''Start OpenSOC with bro, snort, and pcap'''
141 | # run post setup tasks
142 | postsetup()
143 |
144 | # clone opensoc-streaming if its not here locally
145 | if not os.path.exists('../opensoc-streaming'):
146 | with lcd('../'):
147 | local('git clone https://github.com/OpenSOC/opensoc-streaming.git')
148 | else:
149 | print green('Found a copy of opensoc-streaming in ../opensoc-streaming.')
150 |
151 | for top in ['bro', 'sourcefire', 'pcap']:
152 |
153 | topic = '{0}_raw'.format(top)
154 | # create kafka topic
155 | execute(create_topic, topic, host='node2')
156 |
157 | # launch topology
158 | topology = 'com.opensoc.topology.{0}'.format(top.capitalize())
159 | execute(start_topology, topology, config_path='config/')
160 |
161 |
162 |
163 |
164 |
--------------------------------------------------------------------------------
/resources/elasticsearch/elasticsearch.yml:
--------------------------------------------------------------------------------
1 | ################################### Cluster ###################################
2 |
3 | # Cluster name identifies your cluster for auto-discovery. If you're running
4 | # multiple clusters on the same network, make sure you're using unique names.
5 | #
6 | cluster.name: "opensoc-vagrant"
7 |
8 |
9 | #################################### Node #####################################
10 |
11 | # Node names are generated dynamically on startup, so you're relieved
12 | # from configuring them manually. You can tie this node to a specific name:
13 | #
14 | node.name: "__HOSTNAME__"
15 |
16 | # Every node can be configured to allow or deny being eligible as the master,
17 | # and to allow or deny to store the data.
18 | #
19 | # Allow this node to be eligible as a master node (enabled by default):
20 | #
21 | #node.master: true
22 | #
23 | # Allow this node to store data (enabled by default):
24 | #
25 | #node.data: true
26 |
27 | # You can exploit these settings to design advanced cluster topologies.
28 | #
29 | # 1. You want this node to never become a master node, only to hold data.
30 | # This will be the "workhorse" of your cluster.
31 | #
32 | node.master: false
33 | node.data: true
34 | #
35 | # 2. You want this node to only serve as a master: to not store any data and
36 | # to have free resources. This will be the "coordinator" of your cluster.
37 | #
38 | #node.master: true
39 | #node.data: false
40 | #
41 | # 3. You want this node to be neither master nor data node, but
42 | # to act as a "search load balancer" (fetching data from nodes,
43 | # aggregating results, etc.)
44 | #
45 | #node.master: false
46 | #node.data: false
47 |
48 | # Use the Cluster Health API [http://localhost:9200/_cluster/health], the
49 | # Node Info API [http://localhost:9200/_nodes] or GUI tools
50 | # such as ,
51 | # ,
52 | # and
53 | # to inspect the cluster state.
54 |
55 | # A node can have generic attributes associated with it, which can later be used
56 | # for customized shard allocation filtering, or allocation awareness. An attribute
57 | # is a simple key value pair, similar to node.key: value, here is an example:
58 | #
59 | #node.rack: rack314
60 |
61 | # By default, multiple nodes are allowed to start from the same installation location
62 | # to disable it, set the following:
63 | #node.max_local_storage_nodes: 1
64 |
65 |
66 | #################################### Index ####################################
67 |
68 | # You can set a number of options (such as shard/replica options, mapping
69 | # or analyzer definitions, translog settings, ...) for indices globally,
70 | # in this file.
71 | #
72 | # Note, that it makes more sense to configure index settings specifically for
73 | # a certain index, either when creating it or by using the index templates API.
74 | #
75 | # See and
76 | #
77 | # for more information.
78 |
79 | # Set the number of shards (splits) of an index (5 by default):
80 | #
81 | #index.number_of_shards: 5
82 |
83 | # Set the number of replicas (additional copies) of an index (1 by default):
84 | #
85 | #index.number_of_replicas: 1
86 |
87 | # Note, that for development on a local machine, with small indices, it usually
88 | # makes sense to "disable" the distributed features:
89 | #
90 | index.number_of_shards: 1
91 | index.number_of_replicas: 0
92 |
93 | # These settings directly affect the performance of index and search operations
94 | # in your cluster. Assuming you have enough machines to hold shards and
95 | # replicas, the rule of thumb is:
96 | #
97 | # 1. Having more *shards* enhances the _indexing_ performance and allows to
98 | # _distribute_ a big index across machines.
99 | # 2. Having more *replicas* enhances the _search_ performance and improves the
100 | # cluster _availability_.
101 | #
102 | # The "number_of_shards" is a one-time setting for an index.
103 | #
104 | # The "number_of_replicas" can be increased or decreased anytime,
105 | # by using the Index Update Settings API.
106 | #
107 | # Elasticsearch takes care about load balancing, relocating, gathering the
108 | # results from nodes, etc. Experiment with different settings to fine-tune
109 | # your setup.
110 |
111 | # Use the Index Status API () to inspect
112 | # the index status.
113 |
114 |
115 | #################################### Paths ####################################
116 |
117 | # Path to directory containing configuration (this file and logging.yml):
118 | #
119 | path.conf: /opt/elasticsearch/config
120 |
121 | # Path to directory where to store index data allocated for this node.
122 | #
123 | path.data: /var/lib/elasticsearch
124 | #
125 | # Can optionally include more than one location, causing data to be striped across
126 | # the locations (a la RAID 0) on a file level, favouring locations with most free
127 | # space on creation. For example:
128 | #
129 | #path.data: /path/to/data1,/path/to/data2
130 |
131 | # Path to temporary files:
132 | #
133 | #path.work: /path/to/work
134 |
135 | # Path to log files:
136 | #
137 | path.logs: /var/log/elasticsearch
138 |
139 | # Path to where plugins are installed:
140 | #
141 | path.plugins: /opt/elasticsearch/plugins
142 |
143 |
144 | #################################### Plugin ###################################
145 |
146 | # If a plugin listed here is not installed for current node, the node will not start.
147 | #
148 | #plugin.mandatory: mapper-attachments,lang-groovy
149 |
150 |
151 | ################################### Memory ####################################
152 |
153 | # Elasticsearch performs poorly when JVM starts swapping: you should ensure that
154 | # it _never_ swaps.
155 | #
156 | # Set this property to true to lock the memory:
157 | #
158 | #bootstrap.mlockall: true
159 |
160 | # Make sure that the ES_MIN_MEM and ES_MAX_MEM environment variables are set
161 | # to the same value, and that the machine has enough memory to allocate
162 | # for Elasticsearch, leaving enough memory for the operating system itself.
163 | #
164 | # You should also make sure that the Elasticsearch process is allowed to lock
165 | # the memory, eg. by using `ulimit -l unlimited`.
166 |
167 |
168 | ############################## Network And HTTP ###############################
169 |
170 | # Elasticsearch, by default, binds itself to the 0.0.0.0 address, and listens
171 | # on port [9200-9300] for HTTP traffic and on port [9300-9400] for node-to-node
172 | # communication. (the range means that if the port is busy, it will automatically
173 | # try the next port).
174 |
175 | # Set the bind address specifically (IPv4 or IPv6):
176 | #
177 | #network.bind_host: 192.168.0.1
178 |
179 | # Set the address other nodes will use to communicate with this node. If not
180 | # set, it is automatically derived. It must point to an actual IP address.
181 | #
182 | network.publish_host: __IP_ADDR__
183 |
184 | # Set both 'bind_host' and 'publish_host':
185 | #
186 | #network.host: 192.168.0.1
187 |
188 | # Set a custom port for the node to node communication (9300 by default):
189 | #
190 | #transport.tcp.port: 9300
191 |
192 | # Enable compression for all communication between nodes (disabled by default):
193 | #
194 | #transport.tcp.compress: true
195 |
196 | # Set a custom port to listen for HTTP traffic:
197 | #
198 | #http.port: 9200
199 |
200 | # Set a custom allowed content length:
201 | #
202 | #http.max_content_length: 100mb
203 |
204 | # Disable HTTP completely:
205 | #
206 | #http.enabled: false
207 |
208 |
209 | ################################### Gateway ###################################
210 |
211 | # The gateway allows for persisting the cluster state between full cluster
212 | # restarts. Every change to the state (such as adding an index) will be stored
213 | # in the gateway, and when the cluster starts up for the first time,
214 | # it will read its state from the gateway.
215 |
216 | # There are several types of gateway implementations. For more information, see
217 | # .
218 |
219 | # The default gateway type is the "local" gateway (recommended):
220 | #
221 | #gateway.type: local
222 |
223 | # Settings below control how and when to start the initial recovery process on
224 | # a full cluster restart (to reuse as much local data as possible when using shared
225 | # gateway).
226 |
227 | # Allow recovery process after N nodes in a cluster are up:
228 | #
229 | #gateway.recover_after_nodes: 1
230 |
231 | # Set the timeout to initiate the recovery process, once the N nodes
232 | # from previous setting are up (accepts time value):
233 | #
234 | #gateway.recover_after_time: 5m
235 |
236 | # Set how many nodes are expected in this cluster. Once these N nodes
237 | # are up (and recover_after_nodes is met), begin recovery process immediately
238 | # (without waiting for recover_after_time to expire):
239 | #
240 | #gateway.expected_nodes: 2
241 |
242 |
243 | ############################# Recovery Throttling #############################
244 |
245 | # These settings allow to control the process of shards allocation between
246 | # nodes during initial recovery, replica allocation, rebalancing,
247 | # or when adding and removing nodes.
248 |
249 | # Set the number of concurrent recoveries happening on a node:
250 | #
251 | # 1. During the initial recovery
252 | #
253 | #cluster.routing.allocation.node_initial_primaries_recoveries: 4
254 | #
255 | # 2. During adding/removing nodes, rebalancing, etc
256 | #
257 | #cluster.routing.allocation.node_concurrent_recoveries: 2
258 |
259 | # Set to throttle throughput when recovering (eg. 100mb, by default 20mb):
260 | #
261 | #indices.recovery.max_bytes_per_sec: 20mb
262 |
263 | # Set to limit the number of open concurrent streams when
264 | # recovering a shard from a peer:
265 | #
266 | #indices.recovery.concurrent_streams: 5
267 |
268 |
269 | ################################## Discovery ##################################
270 |
271 | # Discovery infrastructure ensures nodes can be found within a cluster
272 | # and master node is elected. Multicast discovery is the default.
273 |
274 | # Set to ensure a node sees N other master eligible nodes to be considered
275 | # operational within the cluster. This should be set to a quorum/majority of
276 | # the master-eligible nodes in the cluster.
277 | #
278 | #discovery.zen.minimum_master_nodes: 1
279 |
280 | # Set the time to wait for ping responses from other nodes when discovering.
281 | # Set this option to a higher value on a slow or congested network
282 | # to minimize discovery failures:
283 | #
284 | #discovery.zen.ping.timeout: 3s
285 |
286 | # For more information, see
287 | #
288 |
289 | # Unicast discovery allows to explicitly control which nodes will be used
290 | # to discover the cluster. It can be used when multicast is not present,
291 | # or to restrict the cluster communication-wise.
292 | #
293 | # 1. Disable multicast discovery (enabled by default):
294 | #
295 | discovery.zen.ping.multicast.enabled: false
296 | #
297 | # 2. Configure an initial list of master nodes in the cluster
298 | # to perform discovery when new nodes (master or data) are started:
299 | #
300 | discovery.zen.ping.unicast.hosts: ["node1"]
301 |
302 | # EC2 discovery allows to use AWS EC2 API in order to perform discovery.
303 | #
304 | # You have to install the cloud-aws plugin for enabling the EC2 discovery.
305 | #
306 | # For more information, see
307 | #
308 | #
309 | # See
310 | # for a step-by-step tutorial.
311 |
312 | # GCE discovery allows to use Google Compute Engine API in order to perform discovery.
313 | #
314 | # You have to install the cloud-gce plugin for enabling the GCE discovery.
315 | #
316 | # For more information, see .
317 |
318 | # Azure discovery allows to use Azure API in order to perform discovery.
319 | #
320 | # You have to install the cloud-azure plugin for enabling the Azure discovery.
321 | #
322 | # For more information, see .
323 |
324 | ################################## Slow Log ##################################
325 |
326 | # Shard level query and fetch threshold logging.
327 |
328 | #index.search.slowlog.threshold.query.warn: 10s
329 | #index.search.slowlog.threshold.query.info: 5s
330 | #index.search.slowlog.threshold.query.debug: 2s
331 | #index.search.slowlog.threshold.query.trace: 500ms
332 |
333 | #index.search.slowlog.threshold.fetch.warn: 1s
334 | #index.search.slowlog.threshold.fetch.info: 800ms
335 | #index.search.slowlog.threshold.fetch.debug: 500ms
336 | #index.search.slowlog.threshold.fetch.trace: 200ms
337 |
338 | #index.indexing.slowlog.threshold.index.warn: 10s
339 | #index.indexing.slowlog.threshold.index.info: 5s
340 | #index.indexing.slowlog.threshold.index.debug: 2s
341 | #index.indexing.slowlog.threshold.index.trace: 500ms
342 |
343 | ################################## GC Logging ################################
344 |
345 | #monitor.jvm.gc.young.warn: 1000ms
346 | #monitor.jvm.gc.young.info: 700ms
347 | #monitor.jvm.gc.young.debug: 400ms
348 |
349 | #monitor.jvm.gc.old.warn: 10s
350 | #monitor.jvm.gc.old.info: 5s
351 | #monitor.jvm.gc.old.debug: 2s
352 |
353 | ################################## Security ################################
354 |
355 | # Uncomment if you want to enable JSONP as a valid return transport on the
356 | # http server. With this enabled, it may pose a security risk, so disabling
357 | # it unless you need it is recommended (it is disabled by default).
358 | #
359 | #http.jsonp.enable: true
--------------------------------------------------------------------------------
/resources/elasticsearch/elasticsearch-client.yml:
--------------------------------------------------------------------------------
1 | ################################### Cluster ###################################
2 |
3 | # Cluster name identifies your cluster for auto-discovery. If you're running
4 | # multiple clusters on the same network, make sure you're using unique names.
5 | #
6 | cluster.name: "opensoc-vagrant"
7 |
8 |
9 | #################################### Node #####################################
10 |
11 | # Node names are generated dynamically on startup, so you're relieved
12 | # from configuring them manually. You can tie this node to a specific name:
13 | #
14 | node.name: "__HOSTNAME__"
15 |
16 | # Every node can be configured to allow or deny being eligible as the master,
17 | # and to allow or deny to store the data.
18 | #
19 | # Allow this node to be eligible as a master node (enabled by default):
20 | #
21 | #node.master: true
22 | #
23 | # Allow this node to store data (enabled by default):
24 | #
25 | #node.data: true
26 |
27 | # You can exploit these settings to design advanced cluster topologies.
28 | #
29 | # 1. You want this node to never become a master node, only to hold data.
30 | # This will be the "workhorse" of your cluster.
31 | #
32 | #node.master: false
33 | #node.data: true
34 | #
35 | # 2. You want this node to only serve as a master: to not store any data and
36 | # to have free resources. This will be the "coordinator" of your cluster.
37 | #
38 | node.master: true
39 | node.data: false
40 | #
41 | # 3. You want this node to be neither master nor data node, but
42 | # to act as a "search load balancer" (fetching data from nodes,
43 | # aggregating results, etc.)
44 | #
45 | # node.master: false
46 | # node.data: false
47 |
48 | # Use the Cluster Health API [http://localhost:9200/_cluster/health], the
49 | # Node Info API [http://localhost:9200/_nodes] or GUI tools
50 | # such as ,
51 | # ,
52 | # and
53 | # to inspect the cluster state.
54 |
55 | # A node can have generic attributes associated with it, which can later be used
56 | # for customized shard allocation filtering, or allocation awareness. An attribute
57 | # is a simple key value pair, similar to node.key: value, here is an example:
58 | #
59 | #node.rack: rack314
60 |
61 | # By default, multiple nodes are allowed to start from the same installation location
62 | # to disable it, set the following:
63 | #node.max_local_storage_nodes: 1
64 |
65 |
66 | #################################### Index ####################################
67 |
68 | # You can set a number of options (such as shard/replica options, mapping
69 | # or analyzer definitions, translog settings, ...) for indices globally,
70 | # in this file.
71 | #
72 | # Note, that it makes more sense to configure index settings specifically for
73 | # a certain index, either when creating it or by using the index templates API.
74 | #
75 | # See and
76 | #
77 | # for more information.
78 |
79 | # Set the number of shards (splits) of an index (5 by default):
80 | #
81 | #index.number_of_shards: 5
82 |
83 | # Set the number of replicas (additional copies) of an index (1 by default):
84 | #
85 | #index.number_of_replicas: 1
86 |
87 | # Note, that for development on a local machine, with small indices, it usually
88 | # makes sense to "disable" the distributed features:
89 | #
90 | index.number_of_shards: 1
91 | index.number_of_replicas: 0
92 |
93 | # These settings directly affect the performance of index and search operations
94 | # in your cluster. Assuming you have enough machines to hold shards and
95 | # replicas, the rule of thumb is:
96 | #
97 | # 1. Having more *shards* enhances the _indexing_ performance and allows to
98 | # _distribute_ a big index across machines.
99 | # 2. Having more *replicas* enhances the _search_ performance and improves the
100 | # cluster _availability_.
101 | #
102 | # The "number_of_shards" is a one-time setting for an index.
103 | #
104 | # The "number_of_replicas" can be increased or decreased anytime,
105 | # by using the Index Update Settings API.
106 | #
107 | # Elasticsearch takes care about load balancing, relocating, gathering the
108 | # results from nodes, etc. Experiment with different settings to fine-tune
109 | # your setup.
110 |
111 | # Use the Index Status API () to inspect
112 | # the index status.
113 |
114 |
115 | #################################### Paths ####################################
116 |
117 | # Path to directory containing configuration (this file and logging.yml):
118 | #
119 | path.conf: /opt/elasticsearch/config
120 |
121 | # Path to directory where to store index data allocated for this node.
122 | #
123 | path.data: /var/lib/elasticsearch
124 | #
125 | # Can optionally include more than one location, causing data to be striped across
126 | # the locations (a la RAID 0) on a file level, favouring locations with most free
127 | # space on creation. For example:
128 | #
129 | #path.data: /path/to/data1,/path/to/data2
130 |
131 | # Path to temporary files:
132 | #
133 | #path.work: /path/to/work
134 |
135 | # Path to log files:
136 | #
137 | path.logs: /var/log/elasticsearch
138 |
139 | # Path to where plugins are installed:
140 | #
141 | path.plugins: /opt/elasticsearch/plugins
142 |
143 |
144 | #################################### Plugin ###################################
145 |
146 | # If a plugin listed here is not installed for current node, the node will not start.
147 | #
148 | #plugin.mandatory: mapper-attachments,lang-groovy
149 |
150 |
151 | ################################### Memory ####################################
152 |
153 | # Elasticsearch performs poorly when JVM starts swapping: you should ensure that
154 | # it _never_ swaps.
155 | #
156 | # Set this property to true to lock the memory:
157 | #
158 | #bootstrap.mlockall: true
159 |
160 | # Make sure that the ES_MIN_MEM and ES_MAX_MEM environment variables are set
161 | # to the same value, and that the machine has enough memory to allocate
162 | # for Elasticsearch, leaving enough memory for the operating system itself.
163 | #
164 | # You should also make sure that the Elasticsearch process is allowed to lock
165 | # the memory, eg. by using `ulimit -l unlimited`.
166 |
167 |
168 | ############################## Network And HTTP ###############################
169 |
170 | # Elasticsearch, by default, binds itself to the 0.0.0.0 address, and listens
171 | # on port [9200-9300] for HTTP traffic and on port [9300-9400] for node-to-node
172 | # communication. (the range means that if the port is busy, it will automatically
173 | # try the next port).
174 |
175 | # Set the bind address specifically (IPv4 or IPv6):
176 | #
177 | #network.bind_host: 192.168.0.1
178 |
179 | # Set the address other nodes will use to communicate with this node. If not
180 | # set, it is automatically derived. It must point to an actual IP address.
181 | #
182 | network.publish_host: __IP_ADDR__
183 |
184 | # Set both 'bind_host' and 'publish_host':
185 | #
186 | #network.host: 192.168.0.1
187 |
188 | # Set a custom port for the node to node communication (9300 by default):
189 | #
190 | #transport.tcp.port: 9300
191 |
192 | # Enable compression for all communication between nodes (disabled by default):
193 | #
194 | #transport.tcp.compress: true
195 |
196 | # Set a custom port to listen for HTTP traffic:
197 | #
198 | #http.port: 9200
199 |
200 | # Set a custom allowed content length:
201 | #
202 | #http.max_content_length: 100mb
203 |
204 | # Disable HTTP completely:
205 | #
206 | #http.enabled: false
207 |
208 |
209 | ################################### Gateway ###################################
210 |
211 | # The gateway allows for persisting the cluster state between full cluster
212 | # restarts. Every change to the state (such as adding an index) will be stored
213 | # in the gateway, and when the cluster starts up for the first time,
214 | # it will read its state from the gateway.
215 |
216 | # There are several types of gateway implementations. For more information, see
217 | # .
218 |
219 | # The default gateway type is the "local" gateway (recommended):
220 | #
221 | #gateway.type: local
222 |
223 | # Settings below control how and when to start the initial recovery process on
224 | # a full cluster restart (to reuse as much local data as possible when using shared
225 | # gateway).
226 |
227 | # Allow recovery process after N nodes in a cluster are up:
228 | #
229 | #gateway.recover_after_nodes: 1
230 |
231 | # Set the timeout to initiate the recovery process, once the N nodes
232 | # from previous setting are up (accepts time value):
233 | #
234 | #gateway.recover_after_time: 5m
235 |
236 | # Set how many nodes are expected in this cluster. Once these N nodes
237 | # are up (and recover_after_nodes is met), begin recovery process immediately
238 | # (without waiting for recover_after_time to expire):
239 | #
240 | #gateway.expected_nodes: 2
241 |
242 |
243 | ############################# Recovery Throttling #############################
244 |
245 | # These settings allow to control the process of shards allocation between
246 | # nodes during initial recovery, replica allocation, rebalancing,
247 | # or when adding and removing nodes.
248 |
249 | # Set the number of concurrent recoveries happening on a node:
250 | #
251 | # 1. During the initial recovery
252 | #
253 | #cluster.routing.allocation.node_initial_primaries_recoveries: 4
254 | #
255 | # 2. During adding/removing nodes, rebalancing, etc
256 | #
257 | #cluster.routing.allocation.node_concurrent_recoveries: 2
258 |
259 | # Set to throttle throughput when recovering (eg. 100mb, by default 20mb):
260 | #
261 | #indices.recovery.max_bytes_per_sec: 20mb
262 |
263 | # Set to limit the number of open concurrent streams when
264 | # recovering a shard from a peer:
265 | #
266 | #indices.recovery.concurrent_streams: 5
267 |
268 |
269 | ################################## Discovery ##################################
270 |
271 | # Discovery infrastructure ensures nodes can be found within a cluster
272 | # and master node is elected. Multicast discovery is the default.
273 |
274 | # Set to ensure a node sees N other master eligible nodes to be considered
275 | # operational within the cluster. This should be set to a quorum/majority of
276 | # the master-eligible nodes in the cluster.
277 | #
278 | #discovery.zen.minimum_master_nodes: 1
279 |
280 | # Set the time to wait for ping responses from other nodes when discovering.
281 | # Set this option to a higher value on a slow or congested network
282 | # to minimize discovery failures:
283 | #
284 | #discovery.zen.ping.timeout: 3s
285 |
286 | # For more information, see
287 | #
288 |
289 | # Unicast discovery allows to explicitly control which nodes will be used
290 | # to discover the cluster. It can be used when multicast is not present,
291 | # or to restrict the cluster communication-wise.
292 | #
293 | # 1. Disable multicast discovery (enabled by default):
294 | #
295 | discovery.zen.ping.multicast.enabled: false
296 | #
297 | # 2. Configure an initial list of master nodes in the cluster
298 | # to perform discovery when new nodes (master or data) are started:
299 | #
300 | discovery.zen.ping.unicast.hosts: ["node1"]
301 |
302 | # EC2 discovery allows to use AWS EC2 API in order to perform discovery.
303 | #
304 | # You have to install the cloud-aws plugin for enabling the EC2 discovery.
305 | #
306 | # For more information, see
307 | #
308 | #
309 | # See
310 | # for a step-by-step tutorial.
311 |
312 | # GCE discovery allows to use Google Compute Engine API in order to perform discovery.
313 | #
314 | # You have to install the cloud-gce plugin for enabling the GCE discovery.
315 | #
316 | # For more information, see .
317 |
318 | # Azure discovery allows to use Azure API in order to perform discovery.
319 | #
320 | # You have to install the cloud-azure plugin for enabling the Azure discovery.
321 | #
322 | # For more information, see .
323 |
324 | ################################## Slow Log ##################################
325 |
326 | # Shard level query and fetch threshold logging.
327 |
328 | #index.search.slowlog.threshold.query.warn: 10s
329 | #index.search.slowlog.threshold.query.info: 5s
330 | #index.search.slowlog.threshold.query.debug: 2s
331 | #index.search.slowlog.threshold.query.trace: 500ms
332 |
333 | #index.search.slowlog.threshold.fetch.warn: 1s
334 | #index.search.slowlog.threshold.fetch.info: 800ms
335 | #index.search.slowlog.threshold.fetch.debug: 500ms
336 | #index.search.slowlog.threshold.fetch.trace: 200ms
337 |
338 | #index.indexing.slowlog.threshold.index.warn: 10s
339 | #index.indexing.slowlog.threshold.index.info: 5s
340 | #index.indexing.slowlog.threshold.index.debug: 2s
341 | #index.indexing.slowlog.threshold.index.trace: 500ms
342 |
343 | ################################## GC Logging ################################
344 |
345 | #monitor.jvm.gc.young.warn: 1000ms
346 | #monitor.jvm.gc.young.info: 700ms
347 | #monitor.jvm.gc.young.debug: 400ms
348 |
349 | #monitor.jvm.gc.old.warn: 10s
350 | #monitor.jvm.gc.old.info: 5s
351 | #monitor.jvm.gc.old.debug: 2s
352 |
353 | ################################## Security ################################
354 |
355 | # Uncomment if you want to enable JSONP as a valid return transport on the
356 | # http server. With this enabled, it may pose a security risk, so disabling
357 | # it unless you need it is recommended (it is disabled by default).
358 | #
359 | #http.jsonp.enable: true
--------------------------------------------------------------------------------