├── resources ├── opensoc │ ├── config │ │ ├── etc │ │ │ ├── env │ │ │ │ ├── hdfs_connection.conf │ │ │ │ ├── es_connection.conf │ │ │ │ ├── mysql_connection.conf │ │ │ │ └── environment_common.conf │ │ │ └── whitelists │ │ │ │ └── known_hosts.conf │ │ └── topologies │ │ │ ├── bro │ │ │ ├── topology_identifier.conf │ │ │ ├── metrics.conf │ │ │ ├── alerts.xml │ │ │ ├── topology.conf │ │ │ └── features_enabled.conf │ │ │ ├── pcap │ │ │ ├── topology_identifier.conf │ │ │ ├── metrics.conf │ │ │ ├── features_enabled.conf │ │ │ └── topology.conf │ │ │ ├── environment_identifier.conf │ │ │ └── sourcefire │ │ │ ├── topology_identifier.conf │ │ │ ├── alerts.xml │ │ │ ├── metrics.conf │ │ │ ├── topology.conf │ │ │ └── features_enabled.conf │ ├── hbase_ip_whitelist.rb │ ├── hbase-site.xml │ └── geo.sql ├── upstart-supervisor.conf ├── hbase │ ├── supervisor-master.conf │ ├── supervisor-regionserver.conf │ └── hbase-site.xml ├── hadoop │ ├── supervisor-namenode.conf │ ├── supervisor-resourcemanager.conf │ ├── supervisor-datanode.conf │ ├── core-site.xml │ ├── mapred-site.xml │ ├── yarn-site.xml │ └── hdfs-site.xml ├── elasticsearch │ ├── supervisor-elasticsearch.conf │ ├── elasticsearch.yml │ └── elasticsearch-client.yml ├── zookeeper │ ├── supervisor-zookeeper.conf │ └── log4j.properties ├── hive │ ├── supervisor-hive-metastore.conf │ ├── hive-user.sql │ └── hive-site.xml ├── storm │ ├── supervisor-worker.conf │ └── supervisor-nimbus-ui.conf ├── kafka │ ├── supervisor-kafka.conf │ └── server.properties └── supervisord.conf ├── .gitignore ├── scripts ├── setup-java.sh ├── setup-geo-enrichment.sh ├── setup-hbase.sh ├── closest-mirror.py ├── init-hadoop.sh ├── setup-kafka.sh ├── setup-os.sh ├── setup-hive.sh ├── setup-storm.sh ├── setup-elasticsearch.sh ├── setup-zookeeper.sh ├── common.sh └── setup-hadoop.sh ├── Vagrantfile ├── README.md └── fabfile.py /resources/opensoc/config/etc/env/hdfs_connection.conf: -------------------------------------------------------------------------------- 1 | bolt.hdfs.IP=node1 2 | bolt.hdfs.port=9000 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vagrant/* 2 | resources/jre* 3 | resources/tmp/* 4 | resources/opensoc/*.jar 5 | .ssh_config 6 | -------------------------------------------------------------------------------- /resources/opensoc/config/etc/env/es_connection.conf: -------------------------------------------------------------------------------- 1 | es.ip=node1 2 | es.port=9300 3 | es.clustername=opensoc-vagrant -------------------------------------------------------------------------------- /resources/opensoc/config/etc/env/mysql_connection.conf: -------------------------------------------------------------------------------- 1 | mysql.ip=node1 2 | mysql.port=0 3 | mysql.username=hive 4 | mysql.password=hive123 -------------------------------------------------------------------------------- /resources/opensoc/config/topologies/bro/topology_identifier.conf: -------------------------------------------------------------------------------- 1 | #Each topology must have a unique identifier. This setting is required 2 | 3 | topology.id=bro 4 | instance.id=B001 -------------------------------------------------------------------------------- /resources/opensoc/config/topologies/pcap/topology_identifier.conf: -------------------------------------------------------------------------------- 1 | #Each topology must have a unique identifier. This setting is required 2 | 3 | topology.id=pcap 4 | instance.id=P001 -------------------------------------------------------------------------------- /resources/opensoc/config/topologies/environment_identifier.conf: -------------------------------------------------------------------------------- 1 | #This file identifies the cluster instance 2 | 3 | customer.id=vagrant 4 | datacenter.id=quick 5 | instance.id=start 6 | -------------------------------------------------------------------------------- /resources/opensoc/config/topologies/sourcefire/topology_identifier.conf: -------------------------------------------------------------------------------- 1 | #Each topology must have a unique identifier. This setting is required 2 | 3 | topology.id=sourcefire 4 | instance.id=S001 -------------------------------------------------------------------------------- /resources/opensoc/config/etc/env/environment_common.conf: -------------------------------------------------------------------------------- 1 | kafka.zk.port=2181 2 | kafka.zk.list=node2,node3,node4 3 | kafka.zk=node2:2181,node3:2181,node4:2181 4 | kafka.br=node2:9092,node3:9092,node4:9092 -------------------------------------------------------------------------------- /resources/upstart-supervisor.conf: -------------------------------------------------------------------------------- 1 | description "supervisor" 2 | 3 | start on runlevel [2345] 4 | stop on runlevel [!2345] 5 | 6 | exec /usr/bin/supervisord --configuration /etc/supervisord.conf --nodaemon -------------------------------------------------------------------------------- /resources/opensoc/hbase_ip_whitelist.rb: -------------------------------------------------------------------------------- 1 | create "ip_whitelist", "ip" 2 | put "ip_whitelist", "10.0.0.0/8", "ip", "y" 3 | put "ip_whitelist", "192.168.0.0/16", "ip", "y" 4 | put "ip_whitelist", "172.16.0.0/12", "ip", "y" 5 | create "pcap", "t" 6 | exit -------------------------------------------------------------------------------- /resources/hbase/supervisor-master.conf: -------------------------------------------------------------------------------- 1 | [program:master] 2 | command=/opt/hbase/bin/hbase master start 3 | directory=/opt/hbase 4 | stdout_logfile=/var/log/hbase/master-stdout.log 5 | stderr_logfile=/var/log/hbase/master-stderr.log 6 | environment = JAVA_HOME=/usr/java/default 7 | -------------------------------------------------------------------------------- /resources/opensoc/config/etc/whitelists/known_hosts.conf: -------------------------------------------------------------------------------- 1 | 10.1.128.236={"local":"YES", "type":"webserver", "asset_value" : "important"} 2 | 10.1.128.237={"local":"UNKNOWN", "type":"unknown", "asset_value" : "important"} 3 | 10.60.10.254={"local":"YES", "type":"printer", "asset_value" : "important"} -------------------------------------------------------------------------------- /resources/hbase/supervisor-regionserver.conf: -------------------------------------------------------------------------------- 1 | [program:regionserver] 2 | command=/opt/hbase/bin/hbase regionserver start 3 | directory=/opt/hbase 4 | stdout_logfile=/var/log/hbase/regionserver-stdout.log 5 | stderr_logfile=/var/log/hbase/regionserver-stderr.log 6 | environment = JAVA_HOME=/usr/java/default 7 | -------------------------------------------------------------------------------- /resources/hadoop/supervisor-namenode.conf: -------------------------------------------------------------------------------- 1 | [program:namenode] 2 | command = /opt/hadoop/bin/hdfs --config /opt/hadoop/etc/hadoop namenode 3 | stdout_logfile = /var/log/hadoop/namenode.stdout 4 | stderr_logfile = /var/log/hadoop/namenode.stderr 5 | autostart = false 6 | environment = JAVA_HOME=/usr/java/default 7 | -------------------------------------------------------------------------------- /resources/elasticsearch/supervisor-elasticsearch.conf: -------------------------------------------------------------------------------- 1 | [program:elasticsearch] 2 | command=/opt/elasticsearch/bin/elasticsearch 3 | directory=/opt/elasticsearch 4 | stdout_logfile=/var/log/elasticsearch/stdout.log 5 | stderr_logfile=/var/log/elasticsearch/stderr.log 6 | environment=JAVA_HOME=/usr/java/default,ES_HEAP=256mb 7 | -------------------------------------------------------------------------------- /resources/zookeeper/supervisor-zookeeper.conf: -------------------------------------------------------------------------------- 1 | [program:zookeeper] 2 | command=/opt/zookeeper/bin/zkServer.sh start-foreground 3 | directory=/opt/zookeeper 4 | stdout_logfile=/var/log/zookeeper/stdout.log 5 | stderr_logfile=/var/log/zookeeper/stderr.log 6 | redirect_stderr=true 7 | environment = JAVA_HOME=/usr/java/default 8 | -------------------------------------------------------------------------------- /resources/hadoop/supervisor-resourcemanager.conf: -------------------------------------------------------------------------------- 1 | [program:resourcemanager] 2 | command = /opt/hadoop/bin/yarn --config /opt/hadoop/etc/hadoop resourcemanager 3 | stdout_logfile = /var/log/hadoop/resourcemanager.stdout 4 | stderr_logfile = /var/log/hadoop/resourcemanager.stderr 5 | autostart = false 6 | environment = JAVA_HOME=/usr/java/default 7 | -------------------------------------------------------------------------------- /resources/hive/supervisor-hive-metastore.conf: -------------------------------------------------------------------------------- 1 | [program:hive-metastore] 2 | command=/opt/hive/bin/hive --service metastore 3 | directory=/opt/hive 4 | stdout_logfile=/var/log/hive/metastore-stdout.log 5 | stderr_logfile=/var/log/hive/metastore-stderr.log 6 | redirect_stderr=true 7 | environment = JAVA_HOME=/usr/java/default,HADOOP_HOME=/opt/hadoop 8 | -------------------------------------------------------------------------------- /resources/storm/supervisor-worker.conf: -------------------------------------------------------------------------------- 1 | [program:storm-supervisor] 2 | command=/opt/storm/bin/storm supervisor 3 | directory=/opt/storm 4 | autostart=true 5 | autorestart=true 6 | stdout_logfile=/var/log/storm/supervisor-stdout.log 7 | stderr_logfile=/var/log/storm/supervisor-stderr.log 8 | environment = JAVA_HOME=/usr/java/default 9 | 10 | -------------------------------------------------------------------------------- /resources/hive/hive-user.sql: -------------------------------------------------------------------------------- 1 | CREATE USER 'hive'@'localhost' IDENTIFIED BY 'hive123'; 2 | GRANT ALL PRIVILEGES ON *.* TO 'hive'@'localhost'; 3 | CREATE USER 'hive'@'%' IDENTIFIED BY 'hive123'; 4 | GRANT ALL PRIVILEGES ON *.* TO 'hive'@'%'; 5 | CREATE USER 'hive'@'node1' IDENTIFIED BY 'hive123'; 6 | GRANT ALL PRIVILEGES ON *.* TO 'hive'@'node1'; 7 | FLUSH PRIVILEGES; -------------------------------------------------------------------------------- /resources/kafka/supervisor-kafka.conf: -------------------------------------------------------------------------------- 1 | [program:kafka] 2 | command=/opt/kafka/bin/kafka-server-start.sh /opt/kafka/config/server.properties 3 | directory=/opt/kafka 4 | user=root 5 | autostart=true 6 | autorestart=true 7 | stdout_logfile=/var/log/kafka/stdout.log 8 | stderr_logfile=/var/log/kafka/stderr.log 9 | environment = JAVA_HOME=/usr/java/default 10 | -------------------------------------------------------------------------------- /resources/opensoc/config/topologies/sourcefire/alerts.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | .*message.* 5 | {"type":"alert","priority":5, "title":"Sourcefire Alert", "body": 6 | "Alert triggered by sourcefire"} 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /resources/supervisord.conf: -------------------------------------------------------------------------------- 1 | [unix_http_server] 2 | file=/var/run/supervisor.sock 3 | 4 | [supervisord] 5 | pidfile=/var/run/supervisord.pid 6 | logfile=/var/log/supervisor/supervisord.log 7 | childlogdir=/var/log/supervisor 8 | 9 | [rpcinterface:supervisor] 10 | supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface 11 | 12 | [supervisorctl] 13 | serverurl=unix:///var/run/supervisor.sock 14 | 15 | [include] 16 | files = /etc/supervisor.d/*.conf -------------------------------------------------------------------------------- /resources/hadoop/supervisor-datanode.conf: -------------------------------------------------------------------------------- 1 | [program:datanode] 2 | command = /opt/hadoop/bin/hdfs --config /opt/hadoop/etc/hadoop datanode 3 | stdout_logfile = /var/log/hadoop/datanode.stdout 4 | stderr_logfile = /var/log/hadoop/datanode.stderr 5 | autostart = false 6 | environment = JAVA_HOME=/usr/java/default 7 | 8 | [program:nodemanager] 9 | command = /opt/hadoop/bin/yarn --config /opt/hadoop/etc/hadoop nodemanager 10 | stdout_logfile = /var/log/hadoop/nodemanager.stdout 11 | stderr_logfile = /var/log/hadoop/nodemanager.stderr 12 | autostart = false 13 | environment = JAVA_HOME=/usr/java/default 14 | -------------------------------------------------------------------------------- /resources/storm/supervisor-nimbus-ui.conf: -------------------------------------------------------------------------------- 1 | [program:storm-ui] 2 | command=/opt/storm/bin/storm ui 3 | directory=/opt/storm 4 | autostart=true 5 | autorestart=true 6 | stdout_logfile=/var/log/storm/ui-stdout.log 7 | stderr_logfile=/var/log/storm/ui-stderr.log 8 | environment = JAVA_HOME=/usr/java/default 9 | 10 | 11 | [program:storm-nimbus] 12 | command=/opt/storm/bin/storm nimbus 13 | directory=/opt/storm 14 | autostart=true 15 | autorestart=true 16 | stdout_logfile=/var/log/storm/nimbus-stdout.log 17 | stderr_logfile=/var/log/storm/nimbus-stderr.log 18 | environment = JAVA_HOME=/usr/java/default 19 | -------------------------------------------------------------------------------- /scripts/setup-java.sh: -------------------------------------------------------------------------------- 1 | source "/vagrant/scripts/common.sh" 2 | 3 | function installJava { 4 | 5 | rpm -q jre 6 | if [ $? -eq 0 ]; then 7 | echo "Java is already installed" 8 | else 9 | echo "install ${JRE_RPM}" 10 | rpm -i /vagrant/resources/$JRE_RPM 11 | fi 12 | } 13 | 14 | function setupEnvVars { 15 | echo "creating java environment variables" 16 | echo export JAVA_HOME=/usr/java/default >> /etc/profile.d/java.sh 17 | echo export PATH=\${JAVA_HOME}/bin:\${PATH} >> /etc/profile.d/java.sh 18 | } 19 | 20 | echo "Setting Up Java" 21 | installJava 22 | setupEnvVars 23 | -------------------------------------------------------------------------------- /scripts/setup-geo-enrichment.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source "/vagrant/scripts/common.sh" 4 | 5 | 6 | function downloadGeoData { 7 | 8 | downloadFile http://geolite.maxmind.com/download/geoip/database/GeoLiteCity_CSV/GeoLiteCity-latest.zip GeoLiteCity-latest.zip 9 | geo_folder=`unzip -l $TARBALL | grep -m 1 -o -E GeoLiteCity_[0-9]{8}` 10 | cd /tmp && unzip $TARBALL 11 | 12 | } 13 | 14 | function provisionMySql { 15 | 16 | sed "s/__GEO_FOLDER__/${geo_folder}/" /vagrant/resources/opensoc/geo.sql > /tmp/geo.sql 17 | mysql -u root < /tmp/geo.sql 18 | } 19 | 20 | echo "Setting up Geo Enrichment Data" 21 | downloadGeoData 22 | provisionMySql 23 | -------------------------------------------------------------------------------- /scripts/setup-hbase.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source "/vagrant/scripts/common.sh" 4 | 5 | while getopts t:r: option; do 6 | case $option in 7 | t) TOTAL_NODES=$OPTARG;; 8 | r) HBASE_ROLE=$OPTARG;; 9 | esac 10 | done 11 | 12 | function installHbase { 13 | downloadApacheFile hbase $HBASE_VERSION_NUM "${HBASE_VERSION}-bin.tar.gz" 14 | 15 | tar -oxzf $TARBALL -C /opt 16 | safeSymLink "/opt/${HBASE_VERSION}" /opt/hbase 17 | 18 | mkdir -p /var/log/hbase 19 | } 20 | 21 | function configureHbase { 22 | 23 | generateZkStringNoPorts $TOTAL_NODES 24 | sed "s/__ZK_QUORUM__/${ZK_STRING_NOPORTS}/" /vagrant/resources/hbase/hbase-site.xml > /opt/hbase/conf/hbase-site.xml 25 | cp "/vagrant/resources/hbase/supervisor-${HBASE_ROLE}.conf" /etc/supervisor.d/hbase.conf 26 | } 27 | 28 | echo "Setting up HBase" 29 | installHbase 30 | configureHbase 31 | 32 | -------------------------------------------------------------------------------- /scripts/closest-mirror.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # Print the closest apache mirror for the given project 3 | 4 | import urllib2, json, argparse, os 5 | 6 | parser = argparse.ArgumentParser(description='gets the closest Apache Mirror for a project') 7 | parser.add_argument('project', help='project to get the mirror for') 8 | parser.add_argument('-v', '--version', help='project version') 9 | parser.add_argument('-f', '--file', help='filename of binary') 10 | 11 | args = parser.parse_args() 12 | 13 | closer_url = 'http://www.apache.org/dyn/closer.cgi/{0}/?as_json=1'.format(args.project) 14 | 15 | response = json.loads(urllib2.urlopen(closer_url).read()) 16 | 17 | 18 | path = response['path_info'] 19 | 20 | if args.version: 21 | path = os.path.join(path, args.version) 22 | 23 | if args.file: 24 | path = os.path.join(path, args.file) 25 | 26 | print response['preferred'] + path 27 | 28 | 29 | -------------------------------------------------------------------------------- /resources/opensoc/config/topologies/bro/metrics.conf: -------------------------------------------------------------------------------- 1 | #reporters 2 | com.opensoc.metrics.reporter.graphite=false 3 | com.opensoc.metrics.reporter.console=false 4 | com.opensoc.metrics.reporter.jmx=false 5 | 6 | #Graphite Addresses 7 | 8 | com.opensoc.metrics.graphite.address=localhost 9 | com.opensoc.metrics.graphite.port=2023 10 | 11 | #TelemetryParserBolt 12 | com.opensoc.metrics.TelemetryParserBolt.acks=false 13 | com.opensoc.metrics.TelemetryParserBolt.emits=false 14 | com.opensoc.metrics.TelemetryParserBolt.fails=false 15 | 16 | 17 | #GenericEnrichmentBolt 18 | com.opensoc.metrics.GenericEnrichmentBolt.acks=false 19 | com.opensoc.metrics.GenericEnrichmentBolt.emits=false 20 | com.opensoc.metrics.GenericEnrichmentBolt.fails=false 21 | 22 | 23 | #TelemetryIndexingBolt 24 | com.opensoc.metrics.TelemetryIndexingBolt.acks=false 25 | com.opensoc.metrics.TelemetryIndexingBolt.emits=false 26 | com.opensoc.metrics.TelemetryIndexingBolt.fails=false 27 | -------------------------------------------------------------------------------- /resources/opensoc/config/topologies/pcap/metrics.conf: -------------------------------------------------------------------------------- 1 | #reporters 2 | com.opensoc.metrics.reporter.graphite=false 3 | com.opensoc.metrics.reporter.console=false 4 | com.opensoc.metrics.reporter.jmx=false 5 | 6 | #Graphite Addresses 7 | 8 | com.opensoc.metrics.graphite.address=localhost 9 | com.opensoc.metrics.graphite.port=2023 10 | 11 | #TelemetryParserBolt 12 | com.opensoc.metrics.TelemetryParserBolt.acks=false 13 | com.opensoc.metrics.TelemetryParserBolt.emits=false 14 | com.opensoc.metrics.TelemetryParserBolt.fails=false 15 | 16 | 17 | #GenericEnrichmentBolt 18 | com.opensoc.metrics.GenericEnrichmentBolt.acks=false 19 | com.opensoc.metrics.GenericEnrichmentBolt.emits=false 20 | com.opensoc.metrics.GenericEnrichmentBolt.fails=false 21 | 22 | 23 | #TelemetryIndexingBolt 24 | com.opensoc.metrics.TelemetryIndexingBolt.acks=false 25 | com.opensoc.metrics.TelemetryIndexingBolt.emits=false 26 | com.opensoc.metrics.TelemetryIndexingBolt.fails=false 27 | -------------------------------------------------------------------------------- /resources/opensoc/config/topologies/sourcefire/metrics.conf: -------------------------------------------------------------------------------- 1 | #reporters 2 | com.opensoc.metrics.reporter.graphite=false 3 | com.opensoc.metrics.reporter.console=false 4 | com.opensoc.metrics.reporter.jmx=false 5 | 6 | #Graphite Addresses 7 | 8 | com.opensoc.metrics.graphite.address=localhost 9 | com.opensoc.metrics.graphite.port=2023 10 | 11 | #TelemetryParserBolt 12 | com.opensoc.metrics.TelemetryParserBolt.acks=false 13 | com.opensoc.metrics.TelemetryParserBolt.emits=false 14 | com.opensoc.metrics.TelemetryParserBolt.fails=false 15 | 16 | 17 | #GenericEnrichmentBolt 18 | com.opensoc.metrics.GenericEnrichmentBolt.acks=false 19 | com.opensoc.metrics.GenericEnrichmentBolt.emits=false 20 | com.opensoc.metrics.GenericEnrichmentBolt.fails=false 21 | 22 | 23 | #TelemetryIndexingBolt 24 | com.opensoc.metrics.TelemetryIndexingBolt.acks=false 25 | com.opensoc.metrics.TelemetryIndexingBolt.emits=false 26 | com.opensoc.metrics.TelemetryIndexingBolt.fails=false 27 | -------------------------------------------------------------------------------- /resources/hadoop/core-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 16 | 17 | 18 | 19 | 20 | 21 | fs.default.name 22 | hdfs://node1:9000 23 | 24 | -------------------------------------------------------------------------------- /resources/hadoop/mapred-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 16 | 17 | 18 | 19 | 20 | 21 | mapreduce.framework.name 22 | yarn 23 | 24 | -------------------------------------------------------------------------------- /resources/opensoc/config/topologies/bro/alerts.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | .*host\"\:\{"ip_dst_addr\"\:\{\},\"ip_src_addr\"\:\{\}.* 5 | {"type":"error","priority":5, "title":"No Local Hostname Present", "body": 6 | "We don't have a record for source or destination IPs in our internal database."} 7 | 8 | 9 | 10 | .*whois\"\:\{\"tld\"\:\{\}.* 11 | {"type":"warning","priority":10, "title":"Whois domain unknown", "body": 12 | "Could not locate whois information for tld"} 13 | 14 | 15 | ^((?!country\"\:\"US\").)*$ 16 | {"type":"warning","priority":10, "title":"NOT US IP", "body": "Communication contains a non-US IP"} 17 | 18 | 19 | .*geo.* 20 | {"type":"error","priority":1, "title":"test", "body": "test alert"} 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /resources/hive/hive-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | javax.jdo.option.ConnectionURL 4 | jdbc:mysql://node1:3306/hivemeta?createDatabaseIfNotExist=true 5 | 6 | 7 | javax.jdo.option.ConnectionDriverName 8 | com.mysql.jdbc.Driver 9 | 10 | 11 | javax.jdo.option.ConnectionUserName 12 | hive 13 | 14 | 15 | javax.jdo.option.ConnectionPassword 16 | hive123 17 | 18 | 19 | hive.server2.thrift.bind.host 20 | 0.0.0.0 21 | 22 | 23 | hadoop.bin.path 24 | /opt/hadoop/bin 25 | 26 | 27 | hadoop.config.dir 28 | /opt/hadoop/etc/hadoop 29 | 30 | -------------------------------------------------------------------------------- /scripts/init-hadoop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source "/vagrant/scripts/common.sh" 4 | 5 | while getopts r: option; do 6 | case $option in 7 | r) ROLE=$OPTARG;; 8 | esac 9 | done 10 | 11 | function startHadoopRole { 12 | ps -ef | grep -v grep | grep -v vagrant | grep $1 13 | if [ $? -ne 0 ]; then 14 | /opt/hadoop/sbin/hadoop-daemon.sh --config /opt/hadoop/etc/hadoop --script hdfs start $1 15 | fi 16 | } 17 | 18 | function startYarnRole { 19 | ps -ef | grep -v grep | grep -v vagrant | grep $1 20 | if [ $? -ne 0 ]; then 21 | /opt/hadoop/sbin/yarn-daemon.sh --config /opt/hadoop/etc/hadoop start $1 22 | fi 23 | } 24 | function formatHdfs { 25 | /opt/hadoop/bin/hdfs namenode -format vagrant -nonInteractive 26 | 27 | } 28 | 29 | echo "Starting Hadoop" 30 | 31 | if [ "${ROLE}" == "namenode" ]; then 32 | formatHdfs 33 | startHadoopRole $ROLE 34 | startYarnRole "resourcemanager" 35 | elif [ "${ROLE}" == "datanode" ]; then 36 | startHadoopRole $ROLE 37 | startYarnRole "nodemanager" 38 | fi -------------------------------------------------------------------------------- /resources/hadoop/yarn-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | 17 | yarn.nodemanager.aux-services 18 | mapreduce_shuffle 19 | 20 | 21 | 22 | yarn.resourcemanager.hostname 23 | node1 24 | 25 | 26 | 27 | yarn.resourcemanager.bind-host 28 | 0.0.0.0 29 | 30 | -------------------------------------------------------------------------------- /scripts/setup-kafka.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source "/vagrant/scripts/common.sh" 4 | 5 | while getopts t: option; do 6 | case $option in 7 | t) TOTAL_NODES=$OPTARG;; 8 | esac 9 | done 10 | 11 | function installKafka { 12 | downloadApacheFile kafka ${KAFKA_VERSION_NUM} "${KAFKA_VERSION}.tgz" 13 | 14 | tar -oxzf $TARBALL -C /opt 15 | safeSymLink "/opt/${KAFKA_VERSION}/" /opt/kafka 16 | 17 | mkdir -p /var/lib/kafka-logs 18 | mkdir -p /var/log/kafka 19 | } 20 | 21 | function configureKafka { 22 | echo "Configuring Kafka" 23 | # copy over config with static properties 24 | cp /vagrant/resources/kafka/server.properties /opt/kafka/config/ 25 | 26 | # echo in dynamic ones 27 | echo "broker.id=${NODE_NUMBER}" >> /opt/kafka/config/server.properties 28 | 29 | generateZkString $TOTAL_NODES 30 | 31 | echo "zookeeper.connect=${ZK_STRING}" >> /opt/kafka/config/server.properties 32 | 33 | cp /vagrant/resources/kafka/supervisor-kafka.conf /etc/supervisor.d/kakfa.conf 34 | } 35 | 36 | 37 | echo "Setting up Kafka" 38 | installKafka 39 | configureKafka -------------------------------------------------------------------------------- /resources/hadoop/hdfs-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 16 | 17 | 18 | 19 | 20 | 21 | dfs.replication 22 | 1 23 | 24 | 25 | 26 | dfs.name.dir 27 | file:///var/lib/hadoop/hdfs/namenode 28 | 29 | 30 | 31 | dfs.data.dir 32 | file:///var/lib/hadoop/hdfs/datanode 33 | 34 | -------------------------------------------------------------------------------- /scripts/setup-os.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | while getopts t: option; do 4 | case $option in 5 | t) TOTAL_NODES=$OPTARG;; 6 | esac 7 | done 8 | 9 | function disableFirewall { 10 | echo "Disabling the Firewall" 11 | service iptables save 12 | service iptables stop 13 | chkconfig iptables off 14 | } 15 | 16 | function writeHostFile { 17 | echo "setting up /etc/hosts file" 18 | 19 | echo "127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4" > /etc/hosts 20 | echo "::1 localhost localhost.localdomain localhost6 localhost6.localdomain6" >> /etc/hosts 21 | 22 | for i in $(seq 1 $TOTAL_NODES); do 23 | echo "10.0.0.10${i} node${i}" >> /etc/hosts 24 | done 25 | } 26 | 27 | function installDependencies { 28 | echo "Installing Supervisor" 29 | yum install -y epel-release 30 | yum install -y python-pip unzip 31 | 32 | pip install supervisor 33 | pip install argparse 34 | 35 | cp /vagrant/resources/supervisord.conf /etc/supervisord.conf 36 | cp /vagrant/resources/upstart-supervisor.conf /etc/init/supervisor.conf 37 | 38 | mkdir -p /etc/supervisor.d 39 | mkdir -p /var/log/supervisor 40 | } 41 | 42 | function installNtpd { 43 | yum install -y ntp 44 | 45 | ntpdate 0.pool.ntp.org 46 | 47 | service ntpd start 48 | chckconfig ntpd on 49 | } 50 | 51 | disableFirewall 52 | writeHostFile 53 | installDependencies -------------------------------------------------------------------------------- /resources/hbase/hbase-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 23 | 24 | 25 | hbase.rootdir 26 | hdfs://node1:9000/hbase 27 | 28 | 29 | hbase.cluster.distributed 30 | true 31 | 32 | 33 | hbase.zookeeper.quorum 34 | __ZK_QUORUM__ 35 | 36 | 37 | zookeeper.znode.parent 38 | /hbase-unsecure 39 | 40 | -------------------------------------------------------------------------------- /resources/opensoc/hbase-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 23 | 24 | 25 | hbase.rootdir 26 | hdfs://node1:9000/hbase 27 | 28 | 29 | hbase.cluster.distributed 30 | true 31 | 32 | 33 | hbase.zookeeper.quorum 34 | node2:2181,node3:2181,node4:2181 35 | 36 | 37 | zookeeper.znode.parent 38 | /hbase-unsecure 39 | 40 | -------------------------------------------------------------------------------- /scripts/setup-hive.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source "/vagrant/scripts/common.sh" 4 | 5 | function installHive { 6 | 7 | downloadApacheFile hive $HIVE_VERSION "apache-${HIVE_VERSION}-bin.tar.gz" 8 | 9 | tar -oxzf $TARBALL -C /opt 10 | safeSymLink "/opt/apache-${HIVE_VERSION}-bin/" /opt/hive 11 | 12 | mkdir -p /var/log/hive 13 | 14 | cp /vagrant/resources/hive/supervisor-hive-metastore.conf /etc/supervisor.d/hive-metastore.conf 15 | 16 | } 17 | 18 | function installMySql { 19 | yum install -y mysql-server mysql-connector-java 20 | 21 | chkconfig mysqld on 22 | service mysqld start 23 | 24 | safeSymLink /usr/share/java/mysql-connector-java.jar /opt/hive/lib/mysql-connector-java.jar 25 | 26 | echo "Setting up mysql user" 27 | if mysql -u root mysql -e "select User from user where User='hive';" | grep hive; then 28 | echo "hive user exists..." 29 | else 30 | mysql -u root < /vagrant/resources/hive/hive-user.sql 31 | fi 32 | 33 | echo "Setting up metastore schema" 34 | if mysql -u root -e "show databases like 'hivemeta';" | grep hivemeta; then 35 | echo "metastore table exists..." 36 | else 37 | mysql -u root -e "CREATE DATABASE hivemeta;" 38 | cd /opt/hive/scripts/metastore/upgrade/mysql && mysql -u hive -phive123 hivemeta < hive-schema-1.2.0.mysql.sql 39 | fi 40 | } 41 | 42 | function configureHive { 43 | 44 | cp /vagrant/resources/hive/hive-site.xml /opt/hive/conf/ 45 | } 46 | 47 | echo "Setting up Hive" 48 | installHive 49 | installMySql 50 | configureHive -------------------------------------------------------------------------------- /scripts/setup-storm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source "/vagrant/scripts/common.sh" 4 | 5 | 6 | while getopts t:r: option; do 7 | case $option in 8 | t) TOTAL_NODES=$OPTARG;; 9 | r) STORM_ROLE=$OPTARG;; 10 | esac 11 | done 12 | 13 | 14 | function installStorm { 15 | downloadApacheFile storm ${STORM_VERSION} "${STORM_VERSION}.tar.gz" 16 | 17 | tar -oxzf $TARBALL -C /opt 18 | safeSymLink "/opt/${STORM_VERSION}" /opt/storm 19 | 20 | mkdir -p /var/log/storm 21 | } 22 | 23 | function configureStorm { 24 | echo "Configuring Storm" 25 | 26 | echo "storm.zookeeper.servers:" >> /opt/storm/conf/storm.yaml 27 | for i in $(seq 2 $TOTAL_NODES); do 28 | echo " - node${i}" >> /opt/storm/conf/storm.yaml 29 | done 30 | 31 | echo "nimbus.host: node1" >> /opt/storm/conf/storm.yaml 32 | echo "java.library.path: /usr/local/lib:/opt/local/lib:/usr/lib:/opt/hadoop/lib/native:/usr/lib64" >> /opt/storm/conf/storm.yaml 33 | echo "LD_LIBRARY_PATH:/usr/local/lib:/opt/local/lib:/usr/lib:/opt/hadoop/lib/native:/usr/lib64" >> /opt/storm/conf/storm_env.ini 34 | 35 | } 36 | 37 | function setupNimbus { 38 | echo "Setting up Storm Nimbus" 39 | 40 | cp /vagrant/resources/storm/supervisor-nimbus-ui.conf /etc/supervisor.d/storm.conf 41 | } 42 | 43 | function setupSupervisor { 44 | echo "Setting up Storm Supervisor" 45 | 46 | cp /vagrant/resources/storm/supervisor-worker.conf /etc/supervisor.d/storm.conf 47 | } 48 | 49 | echo "Setting up Storm" 50 | installStorm 51 | configureStorm 52 | 53 | 54 | case $STORM_ROLE in 55 | nimbus) setupNimbus;; 56 | supervisor) setupSupervisor;; 57 | esac 58 | -------------------------------------------------------------------------------- /scripts/setup-elasticsearch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source "/vagrant/scripts/common.sh" 4 | 5 | while getopts ci: option; do 6 | case $option in 7 | c) ES_CLIENT=yes;; 8 | i) IP_ADDR=$OPTARG;; 9 | esac 10 | done 11 | 12 | function installElasticsearch { 13 | 14 | downloadFile "https://download.elastic.co/elasticsearch/elasticsearch/elasticsearch-${ES_VERSION}.tar.gz" "elasticsearch-${ES_VERSION}.tar.gz" 15 | 16 | tar -oxf $TARBALL -C /opt 17 | safeSymLink "/opt/elasticsearch-${ES_VERSION}" /opt/elasticsearch 18 | 19 | mkdir -p /var/lib/elasticsearch 20 | mkdir -p /var/log/elasticsearch 21 | mkdir -p /opt/elasticsearch/plugins 22 | } 23 | 24 | function configureElasticsearch { 25 | 26 | hostname=`hostname -f` 27 | if [ -z "${ES_CLIENT}" ]; then 28 | echo "Configuring elasticsearch as a normal node" 29 | sed "s/__HOSTNAME__/${hostname}/" /vagrant/resources/elasticsearch/elasticsearch.yml | sed "s/__IP_ADDR__/${IP_ADDR}/" > /opt/elasticsearch/config/elasticsearch.yml 30 | else 31 | echo "Configuring elasticsearch as a client" 32 | sed "s/__HOSTNAME__/${hostname}/" /vagrant/resources/elasticsearch/elasticsearch-client.yml | sed "s/__IP_ADDR__/${IP_ADDR}/" > /opt/elasticsearch/config/elasticsearch.yml 33 | fi 34 | 35 | if [ ! -e /opt/elasticsearch/plugins/kopf ]; then 36 | echo "Installing kopf plugin" 37 | /opt/elasticsearch/bin/plugin --install lmenezes/elasticsearch-kopf/1.5.3 38 | fi 39 | 40 | cp /vagrant/resources/elasticsearch/supervisor-elasticsearch.conf /etc/supervisor.d/elasticsearch.conf 41 | 42 | } 43 | echo "Setting up Elasticsearch" 44 | installElasticsearch 45 | configureElasticsearch -------------------------------------------------------------------------------- /scripts/setup-zookeeper.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source "/vagrant/scripts/common.sh" 4 | 5 | while getopts t: option; do 6 | case $option in 7 | t) TOTAL_NODES=$OPTARG;; 8 | esac 9 | done 10 | 11 | function installZookeeper { 12 | downloadApacheFile zookeeper ${ZOOKEEPER_VERSION} "${ZOOKEEPER_VERSION}.tar.gz" 13 | 14 | tar -oxzf $TARBALL -C /opt 15 | safeSymLink "/opt/${ZOOKEEPER_VERSION}/" /opt/zookeeper 16 | 17 | mkdir -p /var/lib/zookeeper 18 | mkdir -p /var/log/zookeeper 19 | 20 | echo "0 0 * * * /usr/local/bin/zookeeper_cleanup" >> /etc/crontab 21 | 22 | echo "cd /opt/zookeeper" > /usr/local/bin/zookeeper_cleanup 23 | echo "echo `date` > /root/last_zk_cleanup" >> /usr/local/bin/zookeeper_cleanup 24 | echo "bin/zkCleanup.sh /var/lib/zookeeper -n 5 >> /root/last_zk_cleanup" >> /usr/local/bin/zookeeper_cleanup 25 | 26 | chmod +x /usr/local/bin/zookeeper_cleanup 27 | 28 | echo $NODE_NUMBER > /var/lib/zookeeper/myid 29 | } 30 | 31 | function configureZookeeper { 32 | 33 | echo "Configuring Zookeeper..." 34 | echo "tickTime=2000" > /opt/zookeeper/conf/zoo.cfg 35 | echo "initLimit=10" >> /opt/zookeeper/conf/zoo.cfg 36 | echo "syncLimit=5" >> /opt/zookeeper/conf/zoo.cfg 37 | echo "dataDir=/var/lib/zookeeper" >> /opt/zookeeper/conf/zoo.cfg 38 | echo "clientPort=2181" >> /opt/zookeeper/conf/zoo.cfg 39 | 40 | for i in $(seq 1 $TOTAL_NODES); do 41 | echo "server.${i}=node${i}:2888:3888" >> /opt/zookeeper/conf/zoo.cfg 42 | done 43 | 44 | cp /vagrant/resources/zookeeper/supervisor-zookeeper.conf /etc/supervisor.d/zookeeper.conf 45 | } 46 | 47 | echo "Setting up Zookeeper" 48 | 49 | installZookeeper 50 | configureZookeeper -------------------------------------------------------------------------------- /resources/opensoc/config/topologies/pcap/features_enabled.conf: -------------------------------------------------------------------------------- 1 | #Enable and disable features for each topology 2 | 3 | #Feature: Kafka spout 4 | ##Feature Description: Acts as a Kafka consumer. Takes messages from a Kafka topic and ingests them into a topology 5 | 6 | spout.kafka.name=KafkaSpout 7 | spout.kafka.enabled=true 8 | spout.kafka.num.tasks=1 9 | spout.kafka.parallelism.hint=1 10 | 11 | #Feature: Parser Bolt 12 | ##Feature Description: Parses telemetry from its native format into a native JSON 13 | 14 | parser.bolt.name=ParserBolt 15 | bolt.parser.enabled=true 16 | bolt.parser.num.tasks=1 17 | bolt.parser.parallelism.hint=1 18 | 19 | #Feature: Indexer 20 | ##Feature Description: Indexes telemetry messages in ElasticSearch or Solr 21 | 22 | bolt.indexing.name=IndexBolt 23 | bolt.indexing.enabled=true 24 | bolt.indexing.num.tasks=1 25 | bolt.indexing.parallelism.hint=1 26 | 27 | #Feature: Error Indexer 28 | ##Feature Description: Indexes error messages in ElasticSearch or Solr 29 | 30 | bolt.error.indexing.name=ErrorIndexBolt 31 | bolt.error.indexing.enabled=true 32 | bolt.error.indexing.num.tasks=1 33 | bolt.error.indexing.parallelism.hint=1 34 | 35 | #Feature: HDFS Bolt 36 | ##Feature Description: Writes telemetry messages into HDFS 37 | 38 | bolt.hdfs.name=HDFSBolt 39 | bolt.hdfs.enabled=false 40 | bolt.hdfs.num.tasks=4 41 | bolt.hdfs.parallelism.hint=4 42 | 43 | bolt.hbase.name=HBaseBolt 44 | bolt.hbase.enabled=true 45 | bolt.hbase.num.tasks=1 46 | bolt.hbase.parallelism.hint=1 47 | 48 | 49 | # unused stuff 50 | bolt.enrichment.host.enabled=false 51 | bolt.enrichment.geo.enabled=false 52 | bolt.enrichment.whois.enabled=false 53 | bolt.enrichment.cif.enabled=false 54 | bolt.enrichment.threat.enabled=false 55 | bolt.alerts.enabled=false 56 | bolt.alerts.indexing.enabled=false 57 | bolt.kafka.enabled=false 58 | 59 | -------------------------------------------------------------------------------- /resources/opensoc/geo.sql: -------------------------------------------------------------------------------- 1 | CREATE DATABASE IF NOT EXISTS GEO; 2 | 3 | USE GEO; 4 | 5 | DROP TABLE IF EXISTS `blocks`; 6 | CREATE TABLE `blocks` ( `startIPNum` int(10) unsigned NOT NULL,`endIPNum` int(10) unsigned NOT NULL,`locID` 7 | int(10) unsigned NOT NULL, PRIMARY KEY (`startIPNum`,`endIPNum`) ) 8 | ENGINE=MyISAM DEFAULT CHARSET=latin1 PACK_KEYS=1 DELAY_KEY_WRITE=1; 9 | 10 | DROP TABLE IF EXISTS `location`; 11 | CREATE TABLE `location` (`locID` int(10) unsigned NOT NULL,`country` char(2) default NULL,`region` char(2) 12 | default NULL,`city` varchar(45) default NULL,`postalCode` char(7) default NULL,`latitude` double default 13 | NULL,`longitude` double default NULL,`dmaCode` char(3) default NULL,`areaCode` char(3) default NULL,PRIMARY KEY 14 | (`locID`),KEY `Index_Country` (`country`) ) ENGINE=MyISAM DEFAULT CHARSET=latin1 ROW_FORMAT=FIXED; 15 | 16 | load data infile '/tmp/__GEO_FOLDER__/GeoLiteCity-Blocks.csv' into table `blocks` fields terminated by ',' optionally enclosed by 17 | '"' lines terminated by '\n' ignore 2 lines; 18 | 19 | load data infile '/tmp/__GEO_FOLDER__/GeoLiteCity-Location.csv' into table `location` fields terminated by ',' optionally enclosed 20 | by '"' lines terminated by '\n' ignore 2 lines; 21 | 22 | DELIMITER $$ 23 | DROP FUNCTION IF EXISTS `IPTOLOCID` $$ 24 | CREATE FUNCTION `IPTOLOCID`( ip VARCHAR(15)) RETURNS int(10) unsigned 25 | BEGIN 26 | DECLARE ipn INTEGER UNSIGNED; 27 | DECLARE locID_var INTEGER; 28 | IF ip LIKE '192.168.%' OR ip LIKE '10.%' THEN RETURN 0; 29 | END IF; 30 | SET ipn = INET_ATON(ip); 31 | SELECT locID INTO locID_var FROM `blocks` INNER JOIN (SELECT MAX(startIPNum) AS start FROM `blocks` WHERE startIPNum <= ipn) AS s ON (startIPNum = s.start) WHERE endIPNum >= ipn; 32 | RETURN locID_var; 33 | END 34 | $$ 35 | DELIMITER ; -------------------------------------------------------------------------------- /scripts/common.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | JRE_RPM=jre-7u79-linux-x64.rpm 4 | HADOOP_VERSION=hadoop-2.6.0 5 | ZOOKEEPER_VERSION=zookeeper-3.4.6 6 | KAFKA_SCALA_VERSION=2.9.2 7 | KAFKA_VERSION_NUM=0.8.1.1 8 | KAFKA_VERSION="kafka_${KAFKA_SCALA_VERSION}-${KAFKA_VERSION_NUM}" 9 | STORM_VERSION=apache-storm-0.9.4 10 | HBASE_VERSION_NUM=0.98.13 11 | HBASE_VERSION=hbase-"${HBASE_VERSION_NUM}-hadoop2" 12 | HIVE_VERSION=hive-1.2.0 13 | ES_VERSION=1.5.2 14 | 15 | # So we dont need to pass in i to the scripts 16 | NODE_NUMBER=`hostname | tr -d node` 17 | 18 | 19 | function downloadFile { 20 | 21 | url="${1}" 22 | filename="${2}" 23 | 24 | tmp_dir="/vagrant/resources/tmp/" 25 | cached_file="${tmp_dir}${filename}" 26 | 27 | if [ ! -e $cached_file ]; then 28 | echo "Downloading ${filename} from ${url} to ${cached_file}" 29 | echo "This will take some time. Please be patient..." 30 | wget -nv -P $tmp_dir $url 31 | fi 32 | 33 | TARBALL=$cached_file 34 | } 35 | 36 | function downloadApacheFile { 37 | 38 | project="${1}" 39 | version="${2}" 40 | filename="${3}" 41 | 42 | closest_url=`python /vagrant/scripts/closest-mirror.py ${project} -v ${version} -f ${filename}` 43 | 44 | downloadFile $closest_url $filename 45 | } 46 | 47 | function join { 48 | local IFS="$1"; shift; echo "$*" 49 | } 50 | 51 | function generateZkString { 52 | # Yes its ugly, but so is bash :) 53 | ZK_STRING=`python -c "print ','.join([ 'node{0}:2181'.format(x) for x in range(2,${1}+1)])"` 54 | } 55 | 56 | function generateZkStringNoPorts { 57 | ZK_STRING_NOPORTS=`python -c "print ','.join([ 'node{0}'.format(x) for x in range(2,${1}+1)])"` 58 | } 59 | 60 | function safeSymLink { 61 | target=$1 62 | symlink=$2 63 | 64 | if [ -e $symlink ]; then 65 | echo "${symlink} exists. Deleteing." 66 | rm $symlink 67 | fi 68 | 69 | ln -s $target $symlink 70 | } 71 | -------------------------------------------------------------------------------- /scripts/setup-hadoop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source "/vagrant/scripts/common.sh" 4 | 5 | while getopts r:t: option; do 6 | case $option in 7 | t) TOTAL_NODES=$OPTARG;; 8 | r) ROLE=$OPTARG;; 9 | esac 10 | done 11 | 12 | function installHadoop { 13 | 14 | downloadApacheFile hadoop/common $HADOOP_VERSION "${HADOOP_VERSION}.tar.gz" 15 | 16 | tar -oxzf $TARBALL -C /opt 17 | safeSymLink "/opt/${HADOOP_VERSION}/" /opt/hadoop 18 | 19 | mkdir -p /var/lib/hadoop/hdfs/namenode 20 | mkdir -p /var/lib/hadoop/hdfs/datanode 21 | mkdir -p /var/log/hadoop 22 | mkdir -p /opt/hadoop/logs 23 | 24 | # neeed for writing to HDFS 25 | yum install -y snappy snappy-devel 26 | 27 | } 28 | 29 | function configureHadoop { 30 | HADOOP_RESOURCE_DIR=/vagrant/resources/hadoop 31 | for file in `ls ${HADOOP_RESOURCE_DIR}/*.xml`; do 32 | echo "Copying ${file}" 33 | cp $file /opt/hadoop/etc/hadoop 34 | done 35 | 36 | echo "Setting slaves file" 37 | for i in $(seq 2 $TOTAL_NODES); do 38 | echo "node${i}" >> /opt/hadoop/etc/hadoop/slaves 39 | done 40 | 41 | echo "export JAVA_LIBRARY_PATH=\${JAVA_LIBRARY_PATH}:/usr/lib/hadoop/lib/native:/usr/lib64" >> /opt/hadoop/etc/hadoop/hadoop-env.sh 42 | } 43 | 44 | function configureNameNode { 45 | echo "Copying over Supervisor config for namenode and resourcemanager" 46 | cp /vagrant/resources/hadoop/supervisor-namenode.conf /etc/supervisor.d/namenode.conf 47 | cp /vagrant/resources/hadoop/supervisor-resourcemanager.conf /etc/supervisor.d/resourcemanager.conf 48 | } 49 | 50 | function configureDataNode { 51 | echo "Copying over Supervisor config for datenode" 52 | cp /vagrant/resources/hadoop/supervisor-datanode.conf /etc/supervisor.d/datanode.conf 53 | } 54 | 55 | echo "Setting up Hadoop" 56 | installHadoop 57 | configureHadoop 58 | 59 | if [ "${ROLE}" == "namenode" ]; then 60 | configureNameNode 61 | elif [ "${ROLE}" == "datanode" ]; then 62 | configureDataNode 63 | fi -------------------------------------------------------------------------------- /resources/opensoc/config/topologies/bro/topology.conf: -------------------------------------------------------------------------------- 1 | include = ../../etc/env/environment_common.conf 2 | include = ../../etc/env/es_connection.conf 3 | include = ../../etc/env/hdfs_connection.conf 4 | include = ../../etc/env/mysql_connection.conf 5 | include = metrics.conf 6 | include = features_enabled.conf 7 | 8 | #Global Properties 9 | 10 | debug.mode=true 11 | local.mode=true 12 | num.workers=1 13 | num.ackers=1 14 | 15 | #Standard 5-tuple fields 16 | 17 | source.ip=ip_src_addr 18 | source.port=ip_src_port 19 | dest.ip=ip_dst_addr 20 | dest.port=ip_dst_port 21 | protocol=protocol 22 | 23 | #Test Spout 24 | spout.test.parallelism.repeat=false 25 | 26 | #Kafka Spout 27 | spout.kafka.topic=bro_raw 28 | 29 | #Parsing Bolt 30 | bolt.parser.adapter=com.opensoc.parsing.parsers.BasicBroParser 31 | source.include.protocols=snmp,http,ftp,ssh,ssl,dns,socks,dnp3,smtp,dhcp,modbus,radius,irc 32 | source.exclude.protocols=x509,files,app_stats 33 | 34 | #GeoEnrichment 35 | 36 | bolt.enrichment.geo.enrichment_tag=geo 37 | bolt.enrichment.geo.adapter.table=GEO 38 | bolt.enrichment.geo.MAX_CACHE_SIZE_OBJECTS_NUM=10000 39 | bolt.enrichment.geo.MAX_TIME_RETAIN_MINUTES=10 40 | bolt.enrichment.geo.fields=ip_src_addr,ip_dst_addr 41 | 42 | #Indexing Bolt 43 | bolt.indexing.indexname=bro_index 44 | bolt.indexing.timestamp=yyyy.MM.dd 45 | bolt.indexing.documentname=bro_doc 46 | bolt.indexing.bulk=200 47 | bolt.indexing.adapter=com.opensoc.indexing.adapters.ESTimedRotatingAdapter 48 | 49 | 50 | #Error Indexing Bolt 51 | bolt.error.indexing.indexname=error 52 | bolt.error.indexing.timestamp=yyyy.MM 53 | bolt.error.indexing.documentname=bro_error 54 | bolt.error.indexing.bulk=1 55 | bolt.error.indexing.adapter=com.opensoc.indexing.adapters.ESTimedRotatingAdapter 56 | 57 | #HDFS Bolt 58 | bolt.hdfs.batch.size=5000 59 | bolt.hdfs.field.delimiter=| 60 | bolt.hdfs.file.rotation.size.in.mb=5 61 | bolt.hdfs.file.system.url=hdfs://node1:9000 62 | bolt.hdfs.wip.file.path=/bro/wip 63 | bolt.hdfs.finished.file.path=/bro/rotated 64 | bolt.hdfs.compression.codec.class=org.apache.hadoop.io.compress.SnappyCodec -------------------------------------------------------------------------------- /resources/zookeeper/log4j.properties: -------------------------------------------------------------------------------- 1 | # Define some default values that can be overridden by system properties 2 | zookeeper.root.logger=INFO, CONSOLE, ROLLINGFILE 3 | zookeeper.console.threshold=INFO 4 | zookeeper.log.dir=/var/log/zookeeper 5 | zookeeper.log.file=zookeeper.log 6 | zookeeper.log.threshold=DEBUG 7 | zookeeper.tracelog.dir=/var/log/zookeeper 8 | zookeeper.tracelog.file=zookeeper_trace.log 9 | 10 | # 11 | # ZooKeeper Logging Configuration 12 | # 13 | 14 | # Format is " (, )+ 15 | 16 | # DEFAULT: console appender only 17 | log4j.rootLogger=${zookeeper.root.logger} 18 | 19 | # Example with rolling log file 20 | #log4j.rootLogger=DEBUG, CONSOLE, ROLLINGFILE 21 | 22 | # Example with rolling log file and tracing 23 | #log4j.rootLogger=TRACE, CONSOLE, ROLLINGFILE, TRACEFILE 24 | 25 | # 26 | # Log INFO level and above messages to the console 27 | # 28 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 29 | log4j.appender.CONSOLE.Threshold=${zookeeper.console.threshold} 30 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 31 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n 32 | 33 | # 34 | # Add ROLLINGFILE to rootLogger to get log file output 35 | # Log DEBUG level and above messages to a log file 36 | log4j.appender.ROLLINGFILE=org.apache.log4j.RollingFileAppender 37 | log4j.appender.ROLLINGFILE.Threshold=${zookeeper.log.threshold} 38 | log4j.appender.ROLLINGFILE.File=${zookeeper.log.dir}/${zookeeper.log.file} 39 | 40 | # Max log file size of 10MB 41 | log4j.appender.ROLLINGFILE.MaxFileSize=10MB 42 | # uncomment the next line to limit number of backup files 43 | #log4j.appender.ROLLINGFILE.MaxBackupIndex=10 44 | 45 | log4j.appender.ROLLINGFILE.layout=org.apache.log4j.PatternLayout 46 | log4j.appender.ROLLINGFILE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n 47 | 48 | 49 | # 50 | # Add TRACEFILE to rootLogger to get log file output 51 | # Log DEBUG level and above messages to a log file 52 | log4j.appender.TRACEFILE=org.apache.log4j.FileAppender 53 | log4j.appender.TRACEFILE.Threshold=TRACE 54 | log4j.appender.TRACEFILE.File=${zookeeper.tracelog.dir}/${zookeeper.tracelog.file} 55 | 56 | log4j.appender.TRACEFILE.layout=org.apache.log4j.PatternLayout 57 | ### Notice we are including log4j's NDC here (%x) 58 | log4j.appender.TRACEFILE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L][%x] - %m%n -------------------------------------------------------------------------------- /resources/opensoc/config/topologies/pcap/topology.conf: -------------------------------------------------------------------------------- 1 | include = ../../etc/env/environment_common.conf 2 | include = ../../etc/env/es_connection.conf 3 | include = ../../etc/env/hdfs_connection.conf 4 | include = ../../etc/env/mysql_connection.conf 5 | include = metrics.conf 6 | include = features_enabled.conf 7 | 8 | #Global Properties 9 | 10 | debug.mode=true 11 | local.mode=true 12 | num.workers=1 13 | num.ackers=1 14 | 15 | #Standard 5-tuple fields 16 | 17 | source.ip=ip_src_addr 18 | source.port=ip_src_port 19 | dest.ip=ip_dst_addr 20 | dest.port=ip_dst_port 21 | protocol=protocol 22 | 23 | #Kafka Spout 24 | spout.kafka.buffer.size.bytes=1024000 25 | spout.kafka.consumer.id=pcap.kafka 26 | spout.kafka.fetch.size.bytes=1024 27 | spout.kafka.forcefromstart=false 28 | spout.kafka.socket.timeout.ms=600000 29 | spout.kafka.start.offset.time=-1 30 | spout.kafka.zk.root=/storm/topology/pcap/kafka 31 | spout.kafka.topic=pcap_raw 32 | 33 | #Parser Bolt 34 | bolt.parser.enabled=true 35 | bolt.parser.num.of.key.chars.to.use.for.shuffle.grouping=6 36 | bolt.parser.ts.precision=MICRO 37 | 38 | #Test Spout 39 | spout.test.parallelism.repeat=false 40 | 41 | #Kafka Spout 42 | spout.kafka.topic=pcap_raw 43 | 44 | #Indexing Bolt 45 | bolt.indexing.indexname=pcap 46 | bolt.indexing.timestamp=yyyy.MM.dd.HH 47 | bolt.indexing.documentname=pcap_doc 48 | bolt.indexing.bulk=1 49 | bolt.indexing.adapter=com.opensoc.indexing.adapters.ESTimedRotatingAdapter 50 | 51 | #Error Indexing Bolt 52 | bolt.error.indexing.indexname=error 53 | bolt.error.indexing.timestamp=yyyy.MM 54 | bolt.error.indexing.documentname=pcap_error 55 | bolt.error.indexing.bulk=1 56 | bolt.error.indexing.adapter=com.opensoc.indexing.adapters.ESTimedRotatingAdapter 57 | 58 | #HDFS Bolt 59 | bolt.hdfs.batch.size=5000 60 | bolt.hdfs.field.delimiter=| 61 | bolt.hdfs.file.rotation.size.in.mb=5 62 | bolt.hdfs.file.system.url=hdfs://node1:9000 63 | bolt.hdfs.wip.file.path=/pcap/wip 64 | bolt.hdfs.finished.file.path=/pcap/rotated 65 | bolt.hdfs.compression.codec.class=org.apache.hadoop.io.compress.SnappyCodec 66 | 67 | #HBase Bolt 68 | bolt.hbase.table.name=pcap 69 | ## Define the hbase table columns in the form :,,|:,|....... 70 | bolt.hbase.table.fields=t:pcap 71 | bolt.hbase.table.key.tuple.field.name=pcap_id 72 | bolt.hbase.table.timestamp.tuple.field.name=timestamp 73 | bolt.hbase.enable.batching=false 74 | bolt.hbase.write.buffer.size.in.bytes=2000000 75 | bolt.hbase.durability=SKIP_WAL 76 | bolt.hbase.partitioner.region.info.refresh.interval.mins=60 77 | 78 | -------------------------------------------------------------------------------- /resources/opensoc/config/topologies/sourcefire/topology.conf: -------------------------------------------------------------------------------- 1 | include = ../../etc/env/environment_common.conf 2 | include = ../../etc/env/es_connection.conf 3 | include = ../../etc/env/hdfs_connection.conf 4 | include = ../../etc/env/mysql_connection.conf 5 | include = metrics.conf 6 | include = features_enabled.conf 7 | 8 | #Global Properties 9 | 10 | debug.mode=true 11 | local.mode=true 12 | num.workers=1 13 | num.ackers=1 14 | #Standard 5-tuple fields 15 | 16 | source.ip=ip_src_addr 17 | source.port=ip_src_port 18 | dest.ip=ip_dst_addr 19 | dest.port=ip_dst_port 20 | protocol=protocol 21 | 22 | #Test Spout 23 | spout.test.parallelism.repeat=false 24 | 25 | #Kafka Spout 26 | spout.kafka.topic=sourcefire_raw 27 | 28 | #Parser Bolt 29 | bolt.parser.adapter=com.opensoc.parsing.parsers.BasicSourcefireParser 30 | 31 | #GeoEnrichment 32 | 33 | bolt.enrichment.geo.enrichment_tag=geo 34 | bolt.enrichment.geo.adapter.table=GEO 35 | bolt.enrichment.geo.MAX_CACHE_SIZE_OBJECTS_NUM=100 36 | bolt.enrichment.geo.MAX_TIME_RETAIN_MINUTES=10 37 | bolt.enrichment.geo.fields=ip_src_addr,ip_dst_addr 38 | 39 | #Indexing Bolt 40 | bolt.indexing.indexname=sourcefire_index 41 | bolt.indexing.timestamp=yyyy.MM.dd 42 | bolt.indexing.documentname=sourcefire_doc 43 | bolt.indexing.bulk=1 44 | bolt.indexing.adapter=com.opensoc.indexing.adapters.ESTimedRotatingAdapter 45 | 46 | #Alerts Indexing Bolt 47 | bolt.alerts.indexing.indexname=alert 48 | bolt.alerts.indexing.timestamp=yyyy.MM.dd 49 | bolt.alerts.indexing.documentname=sourcefire_alert 50 | bolt.alerts.indexing.bulk=1 51 | bolt.alerts.indexing.adapter=com.opensoc.indexing.adapters.ESTimedRotatingAdapter 52 | 53 | #Error Indexing Bolt 54 | bolt.error.indexing.indexname=error 55 | bolt.error.indexing.timestamp=yyyy.MM 56 | bolt.error.indexing.documentname=sourcefire_error 57 | bolt.error.indexing.bulk=1 58 | bolt.error.indexing.adapter=com.opensoc.indexing.adapters.ESTimedRotatingAdapter 59 | 60 | #Alerts Bolt 61 | bolt.alerts.adapter=com.opensoc.alerts.adapters.AllAlertAdapter 62 | com.opensoc.alerts.adapters.AllAlertAdapter.whitelist_table_name = ip_whitelist 63 | com.opensoc.alerts.adapters.AllAlertAdapter.blacklist_table_name = ip_blacklist 64 | com.opensoc.alerts.adapters.AllAlertAdapter.quorum=node2,node3,node4 65 | com.opensoc.alerts.adapters.AllAlertAdapter.port=2181 66 | com.opensoc.alerts.adapters.AllAlertAdapter._MAX_CACHE_SIZE_OBJECTS_NUM=25 67 | com.opensoc.alerts.adapters.AllAlertAdapter._MAX_TIME_RETAIN_MINUTES=10 68 | 69 | #HDFS Bolt 70 | bolt.hdfs.batch.size=5000 71 | bolt.hdfs.field.delimiter=| 72 | bolt.hdfs.file.rotation.size.in.mb=5 73 | bolt.hdfs.file.system.url=hdfs://node1:9000 74 | bolt.hdfs.wip.file.path=/sourcefire/wip 75 | bolt.hdfs.finished.file.path=/sourcefire/rotated 76 | bolt.hdfs.compression.codec.class=org.apache.hadoop.io.compress.SnappyCodec 77 | 78 | #Kafka Bolt 79 | bolt.kafka.topic=sourcefire_enriched 80 | -------------------------------------------------------------------------------- /resources/opensoc/config/topologies/bro/features_enabled.conf: -------------------------------------------------------------------------------- 1 | #Enable and disable features for each topology 2 | 3 | #Feature: Test spout 4 | ##Feature Description: Reads telemetry from file and ingests it into topology. Used for testing or bulk loading the topology 5 | 6 | spout.test.name=TestSpout 7 | spout.test.enabled=false 8 | spout.test.num.tasks=1 9 | spout.test.parallelism.hint=1 10 | 11 | #Feature: Kafka spout 12 | ##Feature Description: Acts as a Kafka consumer. Takes messages from a Kafka topic and ingests them into a topology 13 | 14 | spout.kafka.name=KafkaSpout 15 | spout.kafka.enabled=true 16 | spout.kafka.num.tasks=1 17 | spout.kafka.parallelism.hint=1 18 | 19 | #Feature: Parser Bolt 20 | ##Feature Description: Parses telemetry from its native format into a native JSON 21 | 22 | parser.bolt.name=ParserBolt 23 | bolt.parser.name=ParserBolt 24 | bolt.parser.enabled=true 25 | bolt.parser.num.tasks=1 26 | bolt.parser.parallelism.hint=1 27 | 28 | #Feature: Host Enrichment 29 | ##Feature Description: Appends information about known hosts to a telemetry message 30 | 31 | bolt.enrichment.host.name=HostEnrichment 32 | bolt.enrichment.host.enabled=false 33 | bolt.enrichment.host.num.tasks=1 34 | bolt.enrichment.host.parallelism.hint=1 35 | 36 | #Feature: Geo Enrichment 37 | ##Feature Description: Appends geo information about known non-local IPs to a telemetry message 38 | 39 | bolt.enrichment.geo.name=GeoEnrichment 40 | bolt.enrichment.geo.enabled=true 41 | bolt.enrichment.geo.num.tasks=1 42 | bolt.enrichment.geo.parallelism.hint=1 43 | 44 | #Feature: Whois Enrichment 45 | ##Feature Description: Appends whois information about known domains to a telemetry message 46 | 47 | bolt.enrichment.whois.name=WhoisEnrichment 48 | bolt.enrichment.whois.enabled=false 49 | bolt.enrichment.whois.num.tasks=1 50 | bolt.enrichment.whois.parallelism.hint=1 51 | 52 | #Feature: CIF Enrichment 53 | ##Feature Description: Appends information from CIF threat intelligence feeds to a telemetry message 54 | 55 | bolt.enrichment.cif.name=CIFBolt 56 | bolt.enrichment.cif.enabled=false 57 | bolt.enrichment.cif.num.tasks=1 58 | bolt.enrichment.cif.parallelism.hint=1 59 | 60 | #Feature: Threat Enrichment 61 | ##Feature Description: Appends information from Threat intelligence feeds to a telemetry message 62 | 63 | bolt.enrichment.threat.name=ThreatBolt 64 | bolt.enrichment.threat.enabled=false 65 | bolt.enrichment.threat.num.tasks=1 66 | bolt.enrichment.threat.parallelism.hint=1 67 | 68 | #Feature: Rules-Based Alerts 69 | ##Feature Description: Tags messages with rules-based alerts 70 | 71 | bolt.alerts.name=Alerts 72 | bolt.alerts.enabled=false 73 | bolt.alerts.num.tasks=1 74 | bolt.alerts.parallelism.hint=1 75 | 76 | #Feature: Indexer 77 | ##Feature Description: Indexes telemetry messages in ElasticSearch or Solr 78 | 79 | bolt.indexing.name=IndexBolt 80 | bolt.indexing.enabled=true 81 | bolt.indexing.num.tasks=1 82 | bolt.indexing.parallelism.hint=1 83 | 84 | #Feature: Alerts Indexer 85 | ##Feature Description: Indexes alert messages in ElasticSearch or Solr 86 | 87 | bolt.alerts.indexing.name=AlertIndexBolt 88 | bolt.alerts.indexing.enabled=false 89 | bolt.alerts.indexing.num.tasks=1 90 | bolt.alerts.indexing.parallelism.hint=1 91 | 92 | #Feature: Error Indexer 93 | ##Feature Description: Indexes error messages in ElasticSearch or Solr 94 | 95 | bolt.error.indexing.name=ErrorIndexBolt 96 | bolt.error.indexing.enabled=true 97 | bolt.error.indexing.num.tasks=1 98 | bolt.error.indexing.parallelism.hint=1 99 | 100 | #Feature: Kafka Bolt 101 | ##Feature Description: Writes telemetry messages back into a Kafka topic 102 | 103 | bolt.kafka.name=KafkaBolt 104 | bolt.kafka.enabled=false 105 | bolt.kafka.num.tasks=1 106 | bolt.kafka.parallelism.hint=1 107 | 108 | #Feature: HDFS Bolt 109 | ##Feature Description: Writes telemetry messages into HDFS 110 | 111 | bolt.hdfs.name=HDFSBolt 112 | bolt.hdfs.enabled=false 113 | bolt.hdfs.num.tasks=1 114 | bolt.hdfs.parallelism.hint=1 115 | -------------------------------------------------------------------------------- /resources/opensoc/config/topologies/sourcefire/features_enabled.conf: -------------------------------------------------------------------------------- 1 | #Enable and disable features for each topology 2 | 3 | #Feature: Test spout 4 | ##Feature Description: Reads telemetry from file and ingests it into topology. Used for testing or bulk loading the topology 5 | 6 | spout.test.name=TestSpout 7 | spout.test.enabled=false 8 | spout.test.num.tasks=1 9 | spout.test.parallelism.hint=1 10 | 11 | #Feature: Kafka spout 12 | ##Feature Description: Acts as a Kafka consumer. Takes messages from a Kafka topic and ingests them into a topology 13 | 14 | spout.kafka.name=KafkaSpout 15 | spout.kafka.enabled=true 16 | spout.kafka.num.tasks=1 17 | spout.kafka.parallelism.hint=1 18 | 19 | #Feature: Parser Bolt 20 | ##Feature Description: Parses telemetry from its native format into a native JSON 21 | 22 | parser.bolt.name=ParserBolt 23 | bolt.parser.name=ParserBolt 24 | bolt.parser.enabled=true 25 | bolt.parser.num.tasks=1 26 | bolt.parser.parallelism.hint=1 27 | 28 | #Feature: Host Enrichment 29 | ##Feature Description: Appends information about known hosts to a telemetry message 30 | 31 | bolt.enrichment.host.name=HostEnrichment 32 | bolt.enrichment.host.enabled=false 33 | bolt.enrichment.host.num.tasks=1 34 | bolt.enrichment.host.parallelism.hint=1 35 | 36 | #Feature: Geo Enrichment 37 | ##Feature Description: Appends geo information about known non-local IPs to a telemetry message 38 | 39 | bolt.enrichment.geo.name=GeoEnrichment 40 | bolt.enrichment.geo.enabled=true 41 | bolt.enrichment.geo.num.tasks=1 42 | bolt.enrichment.geo.parallelism.hint=1 43 | 44 | #Feature: Whois Enrichment 45 | ##Feature Description: Appends whois information about known domains to a telemetry message 46 | 47 | bolt.enrichment.whois.name=WhoisEnrichment 48 | bolt.enrichment.whois.enabled=false 49 | bolt.enrichment.whois.num.tasks=1 50 | bolt.enrichment.whois.parallelism.hint=1 51 | 52 | #Feature: CIF Enrichment 53 | ##Feature Description: Appends information from CIF threat intelligence feeds to a telemetry message 54 | 55 | bolt.enrichment.cif.name=CIFBolt 56 | bolt.enrichment.cif.enabled=false 57 | bolt.enrichment.cif.num.tasks=1 58 | bolt.enrichment.cif.parallelism.hint=1 59 | 60 | #Feature: Threat Enrichment 61 | ##Feature Description: Appends information from Threat intelligence feeds to a telemetry message 62 | 63 | bolt.enrichment.threat.name=ThreatBolt 64 | bolt.enrichment.threat.enabled=false 65 | bolt.enrichment.threat.num.tasks=1 66 | bolt.enrichment.threat.parallelism.hint=1 67 | 68 | #Feature: Rules-Based Alerts 69 | ##Feature Description: Tags messages with rules-based alerts 70 | 71 | bolt.alerts.name=Alerts 72 | bolt.alerts.enabled=true 73 | bolt.alerts.num.tasks=1 74 | bolt.alerts.parallelism.hint=1 75 | 76 | #Feature: Indexer 77 | ##Feature Description: Indexes telemetry messages in ElasticSearch or Solr 78 | 79 | bolt.indexing.name=IndexBolt 80 | bolt.indexing.enabled=true 81 | bolt.indexing.num.tasks=1 82 | bolt.indexing.parallelism.hint=1 83 | 84 | #Feature: Alerts Indexer 85 | ##Feature Description: Indexes alert messages in ElasticSearch or Solr 86 | 87 | bolt.alerts.indexing.name=AlertIndexBolt 88 | bolt.alerts.indexing.enabled=true 89 | bolt.alerts.indexing.num.tasks=1 90 | bolt.alerts.indexing.parallelism.hint=1 91 | 92 | #Feature: Error Indexer 93 | ##Feature Description: Indexes error messages in ElasticSearch or Solr 94 | 95 | bolt.error.indexing.name=ErrorIndexBolt 96 | bolt.error.indexing.enabled=true 97 | bolt.error.indexing.num.tasks=1 98 | bolt.error.indexing.parallelism.hint=1 99 | 100 | #Feature: Kafka Bolt 101 | ##Feature Description: Writes telemetry messages back into a Kafka topic 102 | 103 | bolt.kafka.name=KafkaBolt 104 | bolt.kafka.enabled=false 105 | bolt.kafka.num.tasks=1 106 | bolt.kafka.parallelism.hint=1 107 | 108 | #Feature: HDFS Bolt 109 | ##Feature Description: Writes telemetry messages into HDFS 110 | 111 | bolt.hdfs.name=HDFSBolt 112 | bolt.hdfs.enabled=false 113 | bolt.hdfs.num.tasks=1 114 | bolt.hdfs.parallelism.hint=1 115 | -------------------------------------------------------------------------------- /Vagrantfile: -------------------------------------------------------------------------------- 1 | Vagrant.require_version ">= 1.4.3" 2 | VAGRANTFILE_API_VERSION = "2" 3 | 4 | Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| 5 | numNodes = 4 6 | r = numNodes..1 7 | (r.first).downto(r.last).each do |i| 8 | config.vm.define "node#{i}" do |node| 9 | node.vm.box = "chef/centos-6.5" 10 | node.vm.provider "virtualbox" do |v| 11 | v.name = "node#{i}" 12 | v.customize ["modifyvm", :id, "--memory", "1024"] 13 | end 14 | node.vm.network :private_network, ip: "10.0.0.10#{i}" 15 | 16 | # base setup 17 | node.vm.hostname = "node#{i}" 18 | 19 | node.vm.provision "shell" do |s| 20 | s.path = "scripts/setup-os.sh" 21 | s.args = "-t #{numNodes}" 22 | end 23 | 24 | node.vm.provision "shell", path: "scripts/setup-java.sh" 25 | 26 | if i == 1 27 | # namenode 28 | node.vm.provision "shell" do |s| 29 | s.path = "scripts/setup-hadoop.sh" 30 | s.args = "-r namenode -t #{numNodes}" 31 | end 32 | node.vm.network "forwarded_port", guest: 50070, host: 50070 33 | node.vm.network "forwarded_port", guest: 8088, host:8088 34 | 35 | # storm nimbus 36 | node.vm.provision "shell" do |s| 37 | s.path = "scripts/setup-storm.sh" 38 | s.args = "-r nimbus -t #{numNodes}" 39 | end 40 | node.vm.network "forwarded_port", guest: 8080, host: 8080 41 | 42 | # hbase master 43 | node.vm.provision "shell" do |s| 44 | s.path = "scripts/setup-hbase.sh" 45 | s.args = "-r master -t #{numNodes}" 46 | end 47 | node.vm.network "forwarded_port", guest: 60010, host: 60010 48 | 49 | # hive 50 | node.vm.provision "shell" do |s| 51 | s.path = "scripts/setup-hive.sh" 52 | end 53 | 54 | node.vm.provision "shell" do |s| 55 | s.path = "scripts/setup-elasticsearch.sh" 56 | s.args = "-c -i 10.0.0.10#{i}" 57 | end 58 | node.vm.network "forwarded_port", guest: 9200, host:9200 59 | 60 | # setup mysql for geo enrichment 61 | node.vm.provision "shell", path: "scripts/setup-geo-enrichment.sh" 62 | else 63 | # zookeeper 64 | node.vm.provision "shell" do |s| 65 | s.path = "scripts/setup-zookeeper.sh" 66 | s.args = "-t #{numNodes}" 67 | end 68 | # datanode 69 | node.vm.provision "shell" do |s| 70 | s.path = "scripts/setup-hadoop.sh" 71 | s.args = "-r datanode -t #{numNodes}" 72 | end 73 | # hbase regionserver 74 | node.vm.provision "shell" do |s| 75 | s.path = "scripts/setup-hbase.sh" 76 | s.args = "-r regionserver -t #{numNodes}" 77 | end 78 | # kafka broker 79 | node.vm.provision "shell" do |s| 80 | s.path = "scripts/setup-kafka.sh" 81 | s.args = "-t #{numNodes}" 82 | end 83 | # storm supervisor 84 | node.vm.provision "shell" do |s| 85 | s.path = "scripts/setup-storm.sh" 86 | s.args = "-r supervisor -t #{numNodes}" 87 | end 88 | # elasticsearch 89 | node.vm.provision "shell" do |s| 90 | s.path = "scripts/setup-elasticsearch.sh" 91 | s.args = "-i 10.0.0.10#{i}" 92 | end 93 | # reload supervisord 94 | end 95 | 96 | #After everything is provisioned, start Supervisor 97 | node.vm.provision "shell", inline: "pgrep supervisord || start supervisor" 98 | end 99 | end 100 | end 101 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OpenSOC Vagrant 2 | 3 | A collection of shell scripts and a Vagrant file for building an OpenSOC cluster. There are two primary goals we hope to solve with this project: 4 | 5 | * Create a turnkey OpenSOC cluster to allow users to play with OpenSOC with minimal setup 6 | * Provide a disposable environment where developers can run and test OpenSOC topologies. 7 | 8 | To accomplish this, we have provided a collection of bash scripts that are orchestrated using [Vagrant](https://www.vagrantup.com/) and [Fabric](http://www.fabfile.org/). Both of these tools should be installed prior to using this project. 9 | 10 | ## Inspiration 11 | 12 | Credit to https://github.com/vangj/vagrant-hadoop-2.4.1-spark-1.0.1 for the inspiration for this. This project is heavily influenced by that one. 13 | 14 | ## Quick Start 15 | 16 | If you don't want to bother with the details of the cluster, and just want to see OpenSOC, place a RPM For Oracle's JVM in `resources/` and edit `common.sh` to set `JRE_RPM` to the name of the RPM. Then run: 17 | 18 | ``` 19 | vagrant up 20 | fab vagrant quickstart 21 | ``` 22 | 23 | Finally, point your browser at https://localhost:8443 24 | 25 | This should get you a running OpenSOC cluster with Bro, Snort, and PCAP. If you are looking to customize the setup or run your own topologies, see the secions below on running the cluster and running an OpenSOC Topology. 26 | 27 | ## Advanced Setup 28 | 29 | If you are interested in tweaking the underlying cluster, running your own OpenSOC topology, or just want to understand how it all works, this section will break down how the cluster is started, and now topoogies can be run. 30 | 31 | ## Running the cluster 32 | 33 | To get the cluster up and running, do the following: 34 | 35 | * Place an RPM for Oracle's JVM in `resources/` and edit `common.sh` to set `JRE_RPM` to the name of the RPM 36 | * Run `vagrant up` 37 | * Run `fab vagrant postsetup` 38 | 39 | The `vagrant up` command will build the VMs for the cluster, and install all dependencies which include: 40 | 41 | * Hadoop 2.6 42 | * Hbase 0.98 43 | * Kafka 0.8.1.1 44 | * Zookeeper 3.4.6 45 | * Hive 1.2.0 46 | * Elasticsearch 1.5.2 47 | * Storm 0.9.4 48 | 49 | After this, the `fab vagrant postsetup` command will run a handful of tasks that need to occur after the cluster is running, but before it can be used. These are: 50 | 51 | * Formatting HDFS 52 | * Starting Hadoop cluster 53 | * Starting HBase cluster 54 | * Setup Hbase whitelist table with RFC1918 addresses 55 | 56 | ## Running an OpenSOC Topology 57 | 58 | After provisioning the cluster as described above, you can use some more fabric tasks to run a topology. Before you start, you should have the following: 59 | 60 | * opensoc-streaming repo cloned locally 61 | * a copy of OpenSOC configs in resources/opensoc/OpenSOC_Configs 62 | 63 | Then you can run `fab vagrant start_topology:` which will do the following: 64 | 65 | * cd into the opensoc-streaming repo, and run `mvn clean package` 66 | * copy the newly built OpenSOC-Topologies.jar to resources/opensoc, where it will be avilable to the VMs 67 | * Submit `` and the topology jar to Nimbus 68 | 69 | If your topology is pulling data from Kafka, you can create a topic with the fabric task `fab vagrant create_topic:` 70 | 71 | ## Virtual Machines 72 | 73 | By default, 4 VMs will be created. They are named node1, node2, node3, and node4. Here is a breakdown of what services run where: 74 | 75 | * node1 76 | * HDFS Namenode 77 | * Yarn Resourcemanager 78 | * Storm Nimbus and UI 79 | * HBase Master 80 | * Elasticsearch Master 81 | * MySql (Hive metastore and geo enrichment store) 82 | 83 | * node2-4 84 | * Kafka Broker 85 | * Zookeeper 86 | * HDFS Datanode 87 | * YARN Nodemanager 88 | * Storm Supervisor 89 | * HBase Regionserver 90 | * Elasticsearch Data Nodes 91 | 92 | ## Port Forwarding 93 | 94 | Some service's UIs are forwarded to localhost for ease of use. You can find the following services forwarded by default: 95 | 96 | * HDFS - localhost:50070 -> node1:50070 97 | * Hbase - localhost:60010 -> node1:60010 98 | * Storm UI - localhost:8080 -> node1:8080 99 | * Elasticsearch - localhost:9200 -> node1:9200 100 | * OpenSOC-UI - localhost:8443 -> node1:443 101 | 102 | ## Progress 103 | 104 | Here is a list of what will be provisioned via vagrant and its current status: 105 | 106 | * Java - DONE 107 | * Zookeeper - DONE 108 | * HDFS/Yarn - DONE 109 | * Kafka - DONE 110 | * Storm - DONE 111 | * Hbase - DONE 112 | * Hive - DONE 113 | * Elasticsearch - DONE 114 | * GeoIP Enrichment Data - DONE 115 | * OpenSOC UI 116 | * OpenSOC Storm Topologies 117 | 118 | -------------------------------------------------------------------------------- /resources/kafka/server.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # see kafka.server.KafkaConfig for additional details and defaults 16 | 17 | ############################# Server Basics ############################# 18 | 19 | # The id of the broker. This must be set to a unique integer for each broker. 20 | #broker.id=0 21 | 22 | ############################# Socket Server Settings ############################# 23 | 24 | # The port the socket server listens on 25 | port=9092 26 | 27 | # Hostname the broker will bind to. If not set, the server will bind to all interfaces 28 | #host.name=localhost 29 | 30 | # Hostname the broker will advertise to producers and consumers. If not set, it uses the 31 | # value for "host.name" if configured. Otherwise, it will use the value returned from 32 | # java.net.InetAddress.getCanonicalHostName(). 33 | #advertised.host.name= 34 | 35 | # The port to publish to ZooKeeper for clients to use. If this is not set, 36 | # it will publish the same port that the broker binds to. 37 | #advertised.port= 38 | 39 | # The number of threads handling network requests 40 | num.network.threads=2 41 | 42 | # The number of threads doing disk I/O 43 | num.io.threads=8 44 | 45 | # The send buffer (SO_SNDBUF) used by the socket server 46 | socket.send.buffer.bytes=1048576 47 | 48 | # The receive buffer (SO_RCVBUF) used by the socket server 49 | socket.receive.buffer.bytes=1048576 50 | 51 | # The maximum size of a request that the socket server will accept (protection against OOM) 52 | socket.request.max.bytes=104857600 53 | 54 | 55 | ############################# Log Basics ############################# 56 | 57 | # A comma seperated list of directories under which to store log files 58 | log.dirs=/var/lib/kafka-logs 59 | 60 | # The default number of log partitions per topic. More partitions allow greater 61 | # parallelism for consumption, but this will also result in more files across 62 | # the brokers. 63 | num.partitions=1 64 | 65 | ############################# Log Flush Policy ############################# 66 | 67 | # Messages are immediately written to the filesystem but by default we only fsync() to sync 68 | # the OS cache lazily. The following configurations control the flush of data to disk. 69 | # There are a few important trade-offs here: 70 | # 1. Durability: Unflushed data may be lost if you are not using replication. 71 | # 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. 72 | # 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. 73 | # The settings below allow one to configure the flush policy to flush data after a period of time or 74 | # every N messages (or both). This can be done globally and overridden on a per-topic basis. 75 | 76 | # The number of messages to accept before forcing a flush of data to disk 77 | #log.flush.interval.messages=10000 78 | 79 | # The maximum amount of time a message can sit in a log before we force a flush 80 | #log.flush.interval.ms=1000 81 | 82 | ############################# Log Retention Policy ############################# 83 | 84 | # The following configurations control the disposal of log segments. The policy can 85 | # be set to delete segments after a period of time, or after a given size has accumulated. 86 | # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens 87 | # from the end of the log. 88 | 89 | # The minimum age of a log file to be eligible for deletion 90 | log.retention.hours=168 91 | 92 | # A size-based retention policy for logs. Segments are pruned from the log as long as the remaining 93 | # segments don't drop below log.retention.bytes. 94 | #log.retention.bytes=1073741824 95 | 96 | # The maximum size of a log segment file. When this size is reached a new log segment will be created. 97 | log.segment.bytes=536870912 98 | 99 | # The interval at which log segments are checked to see if they can be deleted according 100 | # to the retention policies 101 | log.retention.check.interval.ms=60000 102 | 103 | # By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. 104 | # If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. 105 | log.cleaner.enable=false 106 | 107 | ############################# Zookeeper ############################# 108 | 109 | # Zookeeper connection string (see zookeeper docs for details). 110 | # This is a comma separated host:port pairs, each corresponding to a zk 111 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". 112 | # You can also append an optional chroot string to the urls to specify the 113 | # root directory for all kafka znodes. 114 | #zookeeper.connect=localhost:2181 115 | 116 | # Timeout in ms for connecting to zookeeper 117 | zookeeper.connection.timeout.ms=1000000 118 | -------------------------------------------------------------------------------- /fabfile.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import os 3 | import xml.etree.ElementTree as ETree 4 | 5 | from fabric.api import env, local, run, sudo, execute, hosts 6 | from fabric.context_managers import shell_env, lcd, cd 7 | from fabric.colors import yellow, green 8 | 9 | # configure fabric to talk to the VMs 10 | temp_ssh_config = '.ssh_config' 11 | 12 | def vagrant(): 13 | '''sets up fabric environment to work with vagrant VMs''' 14 | with open(temp_ssh_config, 'w') as f: 15 | f.write(local('vagrant ssh-config', capture=True)) 16 | 17 | global total_nodes 18 | total_nodes = int(local('vagrant status | grep node | wc -l', capture=True)) 19 | 20 | env.user = 'vagrant' 21 | env.use_ssh_config = True 22 | env.ssh_config_path = temp_ssh_config 23 | 24 | @hosts('node1') 25 | def format_namenode(): 26 | '''Formats namenode on node1''' 27 | with shell_env(JAVA_HOME='/usr/java/default'): 28 | sudo('/opt/hadoop/bin/hdfs namenode -format vagrant -nonInteractive', warn_only=True) 29 | 30 | 31 | def supervisorctl_start(process): 32 | '''Start a process managed by supervisor''' 33 | sudo('supervisorctl start {0}'.format(process)) 34 | 35 | def supervisorctl_stop(process): 36 | '''Stop a process managed by supervisor''' 37 | sudo('supervisorctl stop {0}'.format(process)) 38 | 39 | 40 | def postsetup(): 41 | '''Perform post vagrant up tasks on cluster''' 42 | execute(format_namenode) 43 | execute(supervisorctl_start, 'namenode', host='node1') 44 | execute(supervisorctl_start, 'resourcemanager', host='node1') 45 | execute(supervisorctl_start, 'master', host='node1') 46 | for x in range(2,total_nodes+1): 47 | execute(supervisorctl_start, 'datanode', host='node{0}'.format(x)) 48 | execute(supervisorctl_start, 'nodemanager', host='node{0}'.format(x)) 49 | execute(supervisorctl_start, 'regionserver', host='node{0}'.format(x)) 50 | 51 | execute(init_ip_whitelist,host='node1') 52 | 53 | def supervisorctl_reread_update(): 54 | sudo('supervisorctl reread') 55 | sudo('supervisorctl update') 56 | 57 | def update_supervisor(): 58 | execute(supervisorctl_reread_update, hosts=['node{0}'.format(x) for x in range(1,total_nodes+1)]) 59 | 60 | def supervisorctl_status(): 61 | sudo('supervisorctl status') 62 | 63 | def status(): 64 | execute(supervisorctl_status, hosts=['node{0}'.format(x) for x in range(1,total_nodes+1)]) 65 | 66 | def init_ip_whitelist(): 67 | run('/opt/hbase/bin/hbase shell /vagrant/resources/opensoc/hbase_ip_whitelist.rb') 68 | 69 | 70 | @hosts('node2') 71 | def create_topic(topic, partitions=1, replication_factor=1): 72 | run('/opt/kafka/bin/kafka-topics.sh --zookeeper localhost --create --topic {0} --partitions {1} --replication-factor {2}'.format( 73 | topic, 74 | partitions, 75 | replication_factor 76 | )) 77 | 78 | def get_topologies(repo='../opensoc-streaming'): 79 | '''Build and fetch a new OpenSOC topology jar from repo (default: ../opensoc-streaming)''' 80 | 81 | pom_file = os.path.join(repo, 'pom.xml') 82 | pom = ETree.parse(pom_file) 83 | version = pom.getroot().find('{http://maven.apache.org/POM/4.0.0}version').text 84 | rev = local("git log | head -1 | cut -d ' ' -f 2 | cut -c1-11", capture=True) 85 | 86 | topology_jar = os.path.join( 87 | repo, 88 | 'OpenSOC-Topologies', 89 | 'target', 90 | 'OpenSOC-Topologies-{0}.jar'.format(version) 91 | ) 92 | 93 | vagrant_jar = 'OpenSOC-Topologies-{0}-{1}.jar'.format(version, rev) 94 | vagrant_jar_path = os.path.join('resources/opensoc', vagrant_jar) 95 | 96 | if os.path.exists(vagrant_jar_path): 97 | print yellow('{0} already exists. Not building a new jar.'.format(vagrant_jar_path)) 98 | print yellow('Remove the existing jar and run this command again to build a fresh jar.') 99 | return vagrant_jar 100 | 101 | with lcd(repo): 102 | local('mvn clean package') 103 | 104 | local('cp {0} {1}'.format( 105 | topology_jar, 106 | vagrant_jar_path 107 | )) 108 | 109 | return vagrant_jar 110 | 111 | @hosts('node1') 112 | def start_topology(topology, repo=None, local_mode=False, config_path='/vagrant/opensoc/OpenSOC_Configs/', generator_spout=False): 113 | '''Builds and copies a fresh topology jar from a locally cloned opensoc-streaming and submits it to storm''' 114 | 115 | if repo is not None: 116 | jar = get_topologies(repo) 117 | else: 118 | jar = get_topologies() 119 | 120 | if local_mode: 121 | local_mode='true' 122 | else: 123 | local_mode='false' 124 | 125 | if generator_spout: 126 | generator_spout='true' 127 | else: 128 | generator_spout='false' 129 | 130 | with cd('/vagrant/resources/opensoc/'): 131 | run('/opt/storm/bin/storm jar {0} {1} -local_mode {2} -config_path {3} -generator_spout {4}'.format( 132 | jar, 133 | topology, 134 | local_mode, 135 | config_path, 136 | generator_spout 137 | )) 138 | 139 | def quickstart(): 140 | '''Start OpenSOC with bro, snort, and pcap''' 141 | # run post setup tasks 142 | postsetup() 143 | 144 | # clone opensoc-streaming if its not here locally 145 | if not os.path.exists('../opensoc-streaming'): 146 | with lcd('../'): 147 | local('git clone https://github.com/OpenSOC/opensoc-streaming.git') 148 | else: 149 | print green('Found a copy of opensoc-streaming in ../opensoc-streaming.') 150 | 151 | for top in ['bro', 'sourcefire', 'pcap']: 152 | 153 | topic = '{0}_raw'.format(top) 154 | # create kafka topic 155 | execute(create_topic, topic, host='node2') 156 | 157 | # launch topology 158 | topology = 'com.opensoc.topology.{0}'.format(top.capitalize()) 159 | execute(start_topology, topology, config_path='config/') 160 | 161 | 162 | 163 | 164 | -------------------------------------------------------------------------------- /resources/elasticsearch/elasticsearch.yml: -------------------------------------------------------------------------------- 1 | ################################### Cluster ################################### 2 | 3 | # Cluster name identifies your cluster for auto-discovery. If you're running 4 | # multiple clusters on the same network, make sure you're using unique names. 5 | # 6 | cluster.name: "opensoc-vagrant" 7 | 8 | 9 | #################################### Node ##################################### 10 | 11 | # Node names are generated dynamically on startup, so you're relieved 12 | # from configuring them manually. You can tie this node to a specific name: 13 | # 14 | node.name: "__HOSTNAME__" 15 | 16 | # Every node can be configured to allow or deny being eligible as the master, 17 | # and to allow or deny to store the data. 18 | # 19 | # Allow this node to be eligible as a master node (enabled by default): 20 | # 21 | #node.master: true 22 | # 23 | # Allow this node to store data (enabled by default): 24 | # 25 | #node.data: true 26 | 27 | # You can exploit these settings to design advanced cluster topologies. 28 | # 29 | # 1. You want this node to never become a master node, only to hold data. 30 | # This will be the "workhorse" of your cluster. 31 | # 32 | node.master: false 33 | node.data: true 34 | # 35 | # 2. You want this node to only serve as a master: to not store any data and 36 | # to have free resources. This will be the "coordinator" of your cluster. 37 | # 38 | #node.master: true 39 | #node.data: false 40 | # 41 | # 3. You want this node to be neither master nor data node, but 42 | # to act as a "search load balancer" (fetching data from nodes, 43 | # aggregating results, etc.) 44 | # 45 | #node.master: false 46 | #node.data: false 47 | 48 | # Use the Cluster Health API [http://localhost:9200/_cluster/health], the 49 | # Node Info API [http://localhost:9200/_nodes] or GUI tools 50 | # such as , 51 | # , 52 | # and 53 | # to inspect the cluster state. 54 | 55 | # A node can have generic attributes associated with it, which can later be used 56 | # for customized shard allocation filtering, or allocation awareness. An attribute 57 | # is a simple key value pair, similar to node.key: value, here is an example: 58 | # 59 | #node.rack: rack314 60 | 61 | # By default, multiple nodes are allowed to start from the same installation location 62 | # to disable it, set the following: 63 | #node.max_local_storage_nodes: 1 64 | 65 | 66 | #################################### Index #################################### 67 | 68 | # You can set a number of options (such as shard/replica options, mapping 69 | # or analyzer definitions, translog settings, ...) for indices globally, 70 | # in this file. 71 | # 72 | # Note, that it makes more sense to configure index settings specifically for 73 | # a certain index, either when creating it or by using the index templates API. 74 | # 75 | # See and 76 | # 77 | # for more information. 78 | 79 | # Set the number of shards (splits) of an index (5 by default): 80 | # 81 | #index.number_of_shards: 5 82 | 83 | # Set the number of replicas (additional copies) of an index (1 by default): 84 | # 85 | #index.number_of_replicas: 1 86 | 87 | # Note, that for development on a local machine, with small indices, it usually 88 | # makes sense to "disable" the distributed features: 89 | # 90 | index.number_of_shards: 1 91 | index.number_of_replicas: 0 92 | 93 | # These settings directly affect the performance of index and search operations 94 | # in your cluster. Assuming you have enough machines to hold shards and 95 | # replicas, the rule of thumb is: 96 | # 97 | # 1. Having more *shards* enhances the _indexing_ performance and allows to 98 | # _distribute_ a big index across machines. 99 | # 2. Having more *replicas* enhances the _search_ performance and improves the 100 | # cluster _availability_. 101 | # 102 | # The "number_of_shards" is a one-time setting for an index. 103 | # 104 | # The "number_of_replicas" can be increased or decreased anytime, 105 | # by using the Index Update Settings API. 106 | # 107 | # Elasticsearch takes care about load balancing, relocating, gathering the 108 | # results from nodes, etc. Experiment with different settings to fine-tune 109 | # your setup. 110 | 111 | # Use the Index Status API () to inspect 112 | # the index status. 113 | 114 | 115 | #################################### Paths #################################### 116 | 117 | # Path to directory containing configuration (this file and logging.yml): 118 | # 119 | path.conf: /opt/elasticsearch/config 120 | 121 | # Path to directory where to store index data allocated for this node. 122 | # 123 | path.data: /var/lib/elasticsearch 124 | # 125 | # Can optionally include more than one location, causing data to be striped across 126 | # the locations (a la RAID 0) on a file level, favouring locations with most free 127 | # space on creation. For example: 128 | # 129 | #path.data: /path/to/data1,/path/to/data2 130 | 131 | # Path to temporary files: 132 | # 133 | #path.work: /path/to/work 134 | 135 | # Path to log files: 136 | # 137 | path.logs: /var/log/elasticsearch 138 | 139 | # Path to where plugins are installed: 140 | # 141 | path.plugins: /opt/elasticsearch/plugins 142 | 143 | 144 | #################################### Plugin ################################### 145 | 146 | # If a plugin listed here is not installed for current node, the node will not start. 147 | # 148 | #plugin.mandatory: mapper-attachments,lang-groovy 149 | 150 | 151 | ################################### Memory #################################### 152 | 153 | # Elasticsearch performs poorly when JVM starts swapping: you should ensure that 154 | # it _never_ swaps. 155 | # 156 | # Set this property to true to lock the memory: 157 | # 158 | #bootstrap.mlockall: true 159 | 160 | # Make sure that the ES_MIN_MEM and ES_MAX_MEM environment variables are set 161 | # to the same value, and that the machine has enough memory to allocate 162 | # for Elasticsearch, leaving enough memory for the operating system itself. 163 | # 164 | # You should also make sure that the Elasticsearch process is allowed to lock 165 | # the memory, eg. by using `ulimit -l unlimited`. 166 | 167 | 168 | ############################## Network And HTTP ############################### 169 | 170 | # Elasticsearch, by default, binds itself to the 0.0.0.0 address, and listens 171 | # on port [9200-9300] for HTTP traffic and on port [9300-9400] for node-to-node 172 | # communication. (the range means that if the port is busy, it will automatically 173 | # try the next port). 174 | 175 | # Set the bind address specifically (IPv4 or IPv6): 176 | # 177 | #network.bind_host: 192.168.0.1 178 | 179 | # Set the address other nodes will use to communicate with this node. If not 180 | # set, it is automatically derived. It must point to an actual IP address. 181 | # 182 | network.publish_host: __IP_ADDR__ 183 | 184 | # Set both 'bind_host' and 'publish_host': 185 | # 186 | #network.host: 192.168.0.1 187 | 188 | # Set a custom port for the node to node communication (9300 by default): 189 | # 190 | #transport.tcp.port: 9300 191 | 192 | # Enable compression for all communication between nodes (disabled by default): 193 | # 194 | #transport.tcp.compress: true 195 | 196 | # Set a custom port to listen for HTTP traffic: 197 | # 198 | #http.port: 9200 199 | 200 | # Set a custom allowed content length: 201 | # 202 | #http.max_content_length: 100mb 203 | 204 | # Disable HTTP completely: 205 | # 206 | #http.enabled: false 207 | 208 | 209 | ################################### Gateway ################################### 210 | 211 | # The gateway allows for persisting the cluster state between full cluster 212 | # restarts. Every change to the state (such as adding an index) will be stored 213 | # in the gateway, and when the cluster starts up for the first time, 214 | # it will read its state from the gateway. 215 | 216 | # There are several types of gateway implementations. For more information, see 217 | # . 218 | 219 | # The default gateway type is the "local" gateway (recommended): 220 | # 221 | #gateway.type: local 222 | 223 | # Settings below control how and when to start the initial recovery process on 224 | # a full cluster restart (to reuse as much local data as possible when using shared 225 | # gateway). 226 | 227 | # Allow recovery process after N nodes in a cluster are up: 228 | # 229 | #gateway.recover_after_nodes: 1 230 | 231 | # Set the timeout to initiate the recovery process, once the N nodes 232 | # from previous setting are up (accepts time value): 233 | # 234 | #gateway.recover_after_time: 5m 235 | 236 | # Set how many nodes are expected in this cluster. Once these N nodes 237 | # are up (and recover_after_nodes is met), begin recovery process immediately 238 | # (without waiting for recover_after_time to expire): 239 | # 240 | #gateway.expected_nodes: 2 241 | 242 | 243 | ############################# Recovery Throttling ############################# 244 | 245 | # These settings allow to control the process of shards allocation between 246 | # nodes during initial recovery, replica allocation, rebalancing, 247 | # or when adding and removing nodes. 248 | 249 | # Set the number of concurrent recoveries happening on a node: 250 | # 251 | # 1. During the initial recovery 252 | # 253 | #cluster.routing.allocation.node_initial_primaries_recoveries: 4 254 | # 255 | # 2. During adding/removing nodes, rebalancing, etc 256 | # 257 | #cluster.routing.allocation.node_concurrent_recoveries: 2 258 | 259 | # Set to throttle throughput when recovering (eg. 100mb, by default 20mb): 260 | # 261 | #indices.recovery.max_bytes_per_sec: 20mb 262 | 263 | # Set to limit the number of open concurrent streams when 264 | # recovering a shard from a peer: 265 | # 266 | #indices.recovery.concurrent_streams: 5 267 | 268 | 269 | ################################## Discovery ################################## 270 | 271 | # Discovery infrastructure ensures nodes can be found within a cluster 272 | # and master node is elected. Multicast discovery is the default. 273 | 274 | # Set to ensure a node sees N other master eligible nodes to be considered 275 | # operational within the cluster. This should be set to a quorum/majority of 276 | # the master-eligible nodes in the cluster. 277 | # 278 | #discovery.zen.minimum_master_nodes: 1 279 | 280 | # Set the time to wait for ping responses from other nodes when discovering. 281 | # Set this option to a higher value on a slow or congested network 282 | # to minimize discovery failures: 283 | # 284 | #discovery.zen.ping.timeout: 3s 285 | 286 | # For more information, see 287 | # 288 | 289 | # Unicast discovery allows to explicitly control which nodes will be used 290 | # to discover the cluster. It can be used when multicast is not present, 291 | # or to restrict the cluster communication-wise. 292 | # 293 | # 1. Disable multicast discovery (enabled by default): 294 | # 295 | discovery.zen.ping.multicast.enabled: false 296 | # 297 | # 2. Configure an initial list of master nodes in the cluster 298 | # to perform discovery when new nodes (master or data) are started: 299 | # 300 | discovery.zen.ping.unicast.hosts: ["node1"] 301 | 302 | # EC2 discovery allows to use AWS EC2 API in order to perform discovery. 303 | # 304 | # You have to install the cloud-aws plugin for enabling the EC2 discovery. 305 | # 306 | # For more information, see 307 | # 308 | # 309 | # See 310 | # for a step-by-step tutorial. 311 | 312 | # GCE discovery allows to use Google Compute Engine API in order to perform discovery. 313 | # 314 | # You have to install the cloud-gce plugin for enabling the GCE discovery. 315 | # 316 | # For more information, see . 317 | 318 | # Azure discovery allows to use Azure API in order to perform discovery. 319 | # 320 | # You have to install the cloud-azure plugin for enabling the Azure discovery. 321 | # 322 | # For more information, see . 323 | 324 | ################################## Slow Log ################################## 325 | 326 | # Shard level query and fetch threshold logging. 327 | 328 | #index.search.slowlog.threshold.query.warn: 10s 329 | #index.search.slowlog.threshold.query.info: 5s 330 | #index.search.slowlog.threshold.query.debug: 2s 331 | #index.search.slowlog.threshold.query.trace: 500ms 332 | 333 | #index.search.slowlog.threshold.fetch.warn: 1s 334 | #index.search.slowlog.threshold.fetch.info: 800ms 335 | #index.search.slowlog.threshold.fetch.debug: 500ms 336 | #index.search.slowlog.threshold.fetch.trace: 200ms 337 | 338 | #index.indexing.slowlog.threshold.index.warn: 10s 339 | #index.indexing.slowlog.threshold.index.info: 5s 340 | #index.indexing.slowlog.threshold.index.debug: 2s 341 | #index.indexing.slowlog.threshold.index.trace: 500ms 342 | 343 | ################################## GC Logging ################################ 344 | 345 | #monitor.jvm.gc.young.warn: 1000ms 346 | #monitor.jvm.gc.young.info: 700ms 347 | #monitor.jvm.gc.young.debug: 400ms 348 | 349 | #monitor.jvm.gc.old.warn: 10s 350 | #monitor.jvm.gc.old.info: 5s 351 | #monitor.jvm.gc.old.debug: 2s 352 | 353 | ################################## Security ################################ 354 | 355 | # Uncomment if you want to enable JSONP as a valid return transport on the 356 | # http server. With this enabled, it may pose a security risk, so disabling 357 | # it unless you need it is recommended (it is disabled by default). 358 | # 359 | #http.jsonp.enable: true -------------------------------------------------------------------------------- /resources/elasticsearch/elasticsearch-client.yml: -------------------------------------------------------------------------------- 1 | ################################### Cluster ################################### 2 | 3 | # Cluster name identifies your cluster for auto-discovery. If you're running 4 | # multiple clusters on the same network, make sure you're using unique names. 5 | # 6 | cluster.name: "opensoc-vagrant" 7 | 8 | 9 | #################################### Node ##################################### 10 | 11 | # Node names are generated dynamically on startup, so you're relieved 12 | # from configuring them manually. You can tie this node to a specific name: 13 | # 14 | node.name: "__HOSTNAME__" 15 | 16 | # Every node can be configured to allow or deny being eligible as the master, 17 | # and to allow or deny to store the data. 18 | # 19 | # Allow this node to be eligible as a master node (enabled by default): 20 | # 21 | #node.master: true 22 | # 23 | # Allow this node to store data (enabled by default): 24 | # 25 | #node.data: true 26 | 27 | # You can exploit these settings to design advanced cluster topologies. 28 | # 29 | # 1. You want this node to never become a master node, only to hold data. 30 | # This will be the "workhorse" of your cluster. 31 | # 32 | #node.master: false 33 | #node.data: true 34 | # 35 | # 2. You want this node to only serve as a master: to not store any data and 36 | # to have free resources. This will be the "coordinator" of your cluster. 37 | # 38 | node.master: true 39 | node.data: false 40 | # 41 | # 3. You want this node to be neither master nor data node, but 42 | # to act as a "search load balancer" (fetching data from nodes, 43 | # aggregating results, etc.) 44 | # 45 | # node.master: false 46 | # node.data: false 47 | 48 | # Use the Cluster Health API [http://localhost:9200/_cluster/health], the 49 | # Node Info API [http://localhost:9200/_nodes] or GUI tools 50 | # such as , 51 | # , 52 | # and 53 | # to inspect the cluster state. 54 | 55 | # A node can have generic attributes associated with it, which can later be used 56 | # for customized shard allocation filtering, or allocation awareness. An attribute 57 | # is a simple key value pair, similar to node.key: value, here is an example: 58 | # 59 | #node.rack: rack314 60 | 61 | # By default, multiple nodes are allowed to start from the same installation location 62 | # to disable it, set the following: 63 | #node.max_local_storage_nodes: 1 64 | 65 | 66 | #################################### Index #################################### 67 | 68 | # You can set a number of options (such as shard/replica options, mapping 69 | # or analyzer definitions, translog settings, ...) for indices globally, 70 | # in this file. 71 | # 72 | # Note, that it makes more sense to configure index settings specifically for 73 | # a certain index, either when creating it or by using the index templates API. 74 | # 75 | # See and 76 | # 77 | # for more information. 78 | 79 | # Set the number of shards (splits) of an index (5 by default): 80 | # 81 | #index.number_of_shards: 5 82 | 83 | # Set the number of replicas (additional copies) of an index (1 by default): 84 | # 85 | #index.number_of_replicas: 1 86 | 87 | # Note, that for development on a local machine, with small indices, it usually 88 | # makes sense to "disable" the distributed features: 89 | # 90 | index.number_of_shards: 1 91 | index.number_of_replicas: 0 92 | 93 | # These settings directly affect the performance of index and search operations 94 | # in your cluster. Assuming you have enough machines to hold shards and 95 | # replicas, the rule of thumb is: 96 | # 97 | # 1. Having more *shards* enhances the _indexing_ performance and allows to 98 | # _distribute_ a big index across machines. 99 | # 2. Having more *replicas* enhances the _search_ performance and improves the 100 | # cluster _availability_. 101 | # 102 | # The "number_of_shards" is a one-time setting for an index. 103 | # 104 | # The "number_of_replicas" can be increased or decreased anytime, 105 | # by using the Index Update Settings API. 106 | # 107 | # Elasticsearch takes care about load balancing, relocating, gathering the 108 | # results from nodes, etc. Experiment with different settings to fine-tune 109 | # your setup. 110 | 111 | # Use the Index Status API () to inspect 112 | # the index status. 113 | 114 | 115 | #################################### Paths #################################### 116 | 117 | # Path to directory containing configuration (this file and logging.yml): 118 | # 119 | path.conf: /opt/elasticsearch/config 120 | 121 | # Path to directory where to store index data allocated for this node. 122 | # 123 | path.data: /var/lib/elasticsearch 124 | # 125 | # Can optionally include more than one location, causing data to be striped across 126 | # the locations (a la RAID 0) on a file level, favouring locations with most free 127 | # space on creation. For example: 128 | # 129 | #path.data: /path/to/data1,/path/to/data2 130 | 131 | # Path to temporary files: 132 | # 133 | #path.work: /path/to/work 134 | 135 | # Path to log files: 136 | # 137 | path.logs: /var/log/elasticsearch 138 | 139 | # Path to where plugins are installed: 140 | # 141 | path.plugins: /opt/elasticsearch/plugins 142 | 143 | 144 | #################################### Plugin ################################### 145 | 146 | # If a plugin listed here is not installed for current node, the node will not start. 147 | # 148 | #plugin.mandatory: mapper-attachments,lang-groovy 149 | 150 | 151 | ################################### Memory #################################### 152 | 153 | # Elasticsearch performs poorly when JVM starts swapping: you should ensure that 154 | # it _never_ swaps. 155 | # 156 | # Set this property to true to lock the memory: 157 | # 158 | #bootstrap.mlockall: true 159 | 160 | # Make sure that the ES_MIN_MEM and ES_MAX_MEM environment variables are set 161 | # to the same value, and that the machine has enough memory to allocate 162 | # for Elasticsearch, leaving enough memory for the operating system itself. 163 | # 164 | # You should also make sure that the Elasticsearch process is allowed to lock 165 | # the memory, eg. by using `ulimit -l unlimited`. 166 | 167 | 168 | ############################## Network And HTTP ############################### 169 | 170 | # Elasticsearch, by default, binds itself to the 0.0.0.0 address, and listens 171 | # on port [9200-9300] for HTTP traffic and on port [9300-9400] for node-to-node 172 | # communication. (the range means that if the port is busy, it will automatically 173 | # try the next port). 174 | 175 | # Set the bind address specifically (IPv4 or IPv6): 176 | # 177 | #network.bind_host: 192.168.0.1 178 | 179 | # Set the address other nodes will use to communicate with this node. If not 180 | # set, it is automatically derived. It must point to an actual IP address. 181 | # 182 | network.publish_host: __IP_ADDR__ 183 | 184 | # Set both 'bind_host' and 'publish_host': 185 | # 186 | #network.host: 192.168.0.1 187 | 188 | # Set a custom port for the node to node communication (9300 by default): 189 | # 190 | #transport.tcp.port: 9300 191 | 192 | # Enable compression for all communication between nodes (disabled by default): 193 | # 194 | #transport.tcp.compress: true 195 | 196 | # Set a custom port to listen for HTTP traffic: 197 | # 198 | #http.port: 9200 199 | 200 | # Set a custom allowed content length: 201 | # 202 | #http.max_content_length: 100mb 203 | 204 | # Disable HTTP completely: 205 | # 206 | #http.enabled: false 207 | 208 | 209 | ################################### Gateway ################################### 210 | 211 | # The gateway allows for persisting the cluster state between full cluster 212 | # restarts. Every change to the state (such as adding an index) will be stored 213 | # in the gateway, and when the cluster starts up for the first time, 214 | # it will read its state from the gateway. 215 | 216 | # There are several types of gateway implementations. For more information, see 217 | # . 218 | 219 | # The default gateway type is the "local" gateway (recommended): 220 | # 221 | #gateway.type: local 222 | 223 | # Settings below control how and when to start the initial recovery process on 224 | # a full cluster restart (to reuse as much local data as possible when using shared 225 | # gateway). 226 | 227 | # Allow recovery process after N nodes in a cluster are up: 228 | # 229 | #gateway.recover_after_nodes: 1 230 | 231 | # Set the timeout to initiate the recovery process, once the N nodes 232 | # from previous setting are up (accepts time value): 233 | # 234 | #gateway.recover_after_time: 5m 235 | 236 | # Set how many nodes are expected in this cluster. Once these N nodes 237 | # are up (and recover_after_nodes is met), begin recovery process immediately 238 | # (without waiting for recover_after_time to expire): 239 | # 240 | #gateway.expected_nodes: 2 241 | 242 | 243 | ############################# Recovery Throttling ############################# 244 | 245 | # These settings allow to control the process of shards allocation between 246 | # nodes during initial recovery, replica allocation, rebalancing, 247 | # or when adding and removing nodes. 248 | 249 | # Set the number of concurrent recoveries happening on a node: 250 | # 251 | # 1. During the initial recovery 252 | # 253 | #cluster.routing.allocation.node_initial_primaries_recoveries: 4 254 | # 255 | # 2. During adding/removing nodes, rebalancing, etc 256 | # 257 | #cluster.routing.allocation.node_concurrent_recoveries: 2 258 | 259 | # Set to throttle throughput when recovering (eg. 100mb, by default 20mb): 260 | # 261 | #indices.recovery.max_bytes_per_sec: 20mb 262 | 263 | # Set to limit the number of open concurrent streams when 264 | # recovering a shard from a peer: 265 | # 266 | #indices.recovery.concurrent_streams: 5 267 | 268 | 269 | ################################## Discovery ################################## 270 | 271 | # Discovery infrastructure ensures nodes can be found within a cluster 272 | # and master node is elected. Multicast discovery is the default. 273 | 274 | # Set to ensure a node sees N other master eligible nodes to be considered 275 | # operational within the cluster. This should be set to a quorum/majority of 276 | # the master-eligible nodes in the cluster. 277 | # 278 | #discovery.zen.minimum_master_nodes: 1 279 | 280 | # Set the time to wait for ping responses from other nodes when discovering. 281 | # Set this option to a higher value on a slow or congested network 282 | # to minimize discovery failures: 283 | # 284 | #discovery.zen.ping.timeout: 3s 285 | 286 | # For more information, see 287 | # 288 | 289 | # Unicast discovery allows to explicitly control which nodes will be used 290 | # to discover the cluster. It can be used when multicast is not present, 291 | # or to restrict the cluster communication-wise. 292 | # 293 | # 1. Disable multicast discovery (enabled by default): 294 | # 295 | discovery.zen.ping.multicast.enabled: false 296 | # 297 | # 2. Configure an initial list of master nodes in the cluster 298 | # to perform discovery when new nodes (master or data) are started: 299 | # 300 | discovery.zen.ping.unicast.hosts: ["node1"] 301 | 302 | # EC2 discovery allows to use AWS EC2 API in order to perform discovery. 303 | # 304 | # You have to install the cloud-aws plugin for enabling the EC2 discovery. 305 | # 306 | # For more information, see 307 | # 308 | # 309 | # See 310 | # for a step-by-step tutorial. 311 | 312 | # GCE discovery allows to use Google Compute Engine API in order to perform discovery. 313 | # 314 | # You have to install the cloud-gce plugin for enabling the GCE discovery. 315 | # 316 | # For more information, see . 317 | 318 | # Azure discovery allows to use Azure API in order to perform discovery. 319 | # 320 | # You have to install the cloud-azure plugin for enabling the Azure discovery. 321 | # 322 | # For more information, see . 323 | 324 | ################################## Slow Log ################################## 325 | 326 | # Shard level query and fetch threshold logging. 327 | 328 | #index.search.slowlog.threshold.query.warn: 10s 329 | #index.search.slowlog.threshold.query.info: 5s 330 | #index.search.slowlog.threshold.query.debug: 2s 331 | #index.search.slowlog.threshold.query.trace: 500ms 332 | 333 | #index.search.slowlog.threshold.fetch.warn: 1s 334 | #index.search.slowlog.threshold.fetch.info: 800ms 335 | #index.search.slowlog.threshold.fetch.debug: 500ms 336 | #index.search.slowlog.threshold.fetch.trace: 200ms 337 | 338 | #index.indexing.slowlog.threshold.index.warn: 10s 339 | #index.indexing.slowlog.threshold.index.info: 5s 340 | #index.indexing.slowlog.threshold.index.debug: 2s 341 | #index.indexing.slowlog.threshold.index.trace: 500ms 342 | 343 | ################################## GC Logging ################################ 344 | 345 | #monitor.jvm.gc.young.warn: 1000ms 346 | #monitor.jvm.gc.young.info: 700ms 347 | #monitor.jvm.gc.young.debug: 400ms 348 | 349 | #monitor.jvm.gc.old.warn: 10s 350 | #monitor.jvm.gc.old.info: 5s 351 | #monitor.jvm.gc.old.debug: 2s 352 | 353 | ################################## Security ################################ 354 | 355 | # Uncomment if you want to enable JSONP as a valid return transport on the 356 | # http server. With this enabled, it may pose a security risk, so disabling 357 | # it unless you need it is recommended (it is disabled by default). 358 | # 359 | #http.jsonp.enable: true --------------------------------------------------------------------------------