├── README.md ├── docker-compose.yml └── elasticsearch ├── Dockerfile ├── docker-entrypoint.sh └── run.py /README.md: -------------------------------------------------------------------------------- 1 | # docker-elasticsearch-cluster 2 | 1. Install Docker 3 | 2. Install Docker Composer 4 | 3. Run command with docker-composer 5 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | ################ ELASTICSEARCH CONTAINERS ################ 2 | # es-master 3 | esmaster: 4 | build: ./elasticsearch 5 | environment: 6 | NODE_NAME: master 7 | IS_MASTER_NODE: true 8 | IS_DATA_NODE: false 9 | # CONTAINER_HOST_ADDRESS: 172.17.42.1 10 | # List of Unicast hosts, Marvel targets format: host:port,host:port 11 | UNICAST_HOSTS: 172.17.42.1:9300 12 | MARVEL_TARGETS: 172.17.42.1:9200 13 | ports: 14 | - "9200:9200" 15 | - "9300:9300" 16 | volumes: 17 | # - /data/elasticsearch:/usr/share/elasticsearch/data 18 | - /etc/timezone:/etc/timezone:ro 19 | - /etc/localtime:/etc/localtime:ro 20 | privileged: true 21 | restart: always 22 | 23 | # es-data-node-1 24 | esdatanode1: 25 | build: ./elasticsearch 26 | environment: 27 | NODE_NAME: node1 28 | IS_MASTER_NODE: false 29 | # CONTAINER_HOST_ADDRESS: 172.17.42.1 30 | # List of Unicast hosts, Marvel targets format: host:port,host:port 31 | UNICAST_HOSTS: 172.17.42.1:9300 32 | MARVEL_TARGETS: 172.17.42.1:9200 33 | ports: 34 | - "19200:9200" 35 | - "19300:9300" 36 | volumes: 37 | - /etc/timezone:/etc/timezone:ro 38 | - /etc/localtime:/etc/localtime:ro 39 | privileged: true 40 | restart: always 41 | 42 | # es-data-node-2 43 | esdatanode2: 44 | build: ./elasticsearch 45 | environment: 46 | NODE_NAME: node2 47 | IS_MASTER_NODE: false 48 | # CONTAINER_HOST_ADDRESS: 172.17.42.1 49 | # List of Unicast hosts, Marvel targets format: host:port,host:port 50 | UNICAST_HOSTS: 172.17.42.1:9300 51 | MARVEL_TARGETS: 172.17.42.1:9200 52 | ports: 53 | - "29200:9200" 54 | - "29300:9300" 55 | volumes: 56 | - /etc/timezone:/etc/timezone:ro 57 | - /etc/localtime:/etc/localtime:ro 58 | privileged: true 59 | restart: always 60 | 61 | # es-data-node-3 62 | esdatanode3: 63 | build: ./elasticsearch 64 | environment: 65 | NODE_NAME: node3 66 | IS_MASTER_NODE: false 67 | # CONTAINER_HOST_ADDRESS: 172.17.42.1 68 | # List of Unicast hosts, Marvel targets format: host:port,host:port 69 | UNICAST_HOSTS: 172.17.42.1:9300 70 | MARVEL_TARGETS: 172.17.42.1:9200 71 | ports: 72 | - "39200:9200" 73 | - "39300:9300" 74 | volumes: 75 | - /etc/timezone:/etc/timezone:ro 76 | - /etc/localtime:/etc/localtime:ro 77 | privileged: true 78 | restart: always -------------------------------------------------------------------------------- /elasticsearch/Dockerfile: -------------------------------------------------------------------------------- 1 | #image: elasticsearch:1.7.1 2 | FROM java:8-jre 3 | 4 | MAINTAINER Nhat Nguyen 5 | 6 | ENV DEBIAN_FRONTEND noninteractive 7 | 8 | # grab gosu for easy step-down from root 9 | RUN gpg --keyserver ha.pool.sks-keyservers.net --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 10 | RUN curl -o /usr/local/bin/gosu -SL "https://github.com/tianon/gosu/releases/download/1.2/gosu-$(dpkg --print-architecture)" \ 11 | && curl -o /usr/local/bin/gosu.asc -SL "https://github.com/tianon/gosu/releases/download/1.2/gosu-$(dpkg --print-architecture).asc" \ 12 | && gpg --verify /usr/local/bin/gosu.asc \ 13 | && rm /usr/local/bin/gosu.asc \ 14 | && chmod +x /usr/local/bin/gosu 15 | 16 | # Python YAML is required to generate ElasticSearch's configuration. Maven & JDK are 17 | # needed to build the elasticsearch-zookeeper plugin. 18 | RUN apt-get update &&\ 19 | # apt-get -y install python-yaml python-setuptools maven git openjdk-7-jdk &&\ 20 | apt-get -y install python-yaml &&\ 21 | apt-get clean && rm -rf /var/lib/apt/lists/* 22 | 23 | RUN apt-key adv --keyserver ha.pool.sks-keyservers.net --recv-keys 46095ACC8548582C1A2699A9D27D666CD88E42B4 24 | 25 | ENV ELASTICSEARCH_VERSION 1.7.1 26 | 27 | RUN echo "deb http://packages.elasticsearch.org/elasticsearch/${ELASTICSEARCH_VERSION%.*}/debian stable main" > /etc/apt/sources.list.d/elasticsearch.list 28 | 29 | ENV PATH /usr/share/elasticsearch/bin:$PATH 30 | 31 | RUN apt-get update \ 32 | && apt-get install elasticsearch=$ELASTICSEARCH_VERSION \ 33 | && mkdir -p /usr/share/elasticsearch/config \ 34 | && rm -rf /var/lib/apt/lists/* &&\ 35 | # Install Marvel plugin 36 | plugin -v -i elasticsearch/marvel/latest 37 | 38 | # COPY config /usr/share/elasticsearch/config 39 | 40 | # Install ZooKeeper discovery plugin 41 | RUN plugin -url https://github.com/grmblfrz/elasticsearch-zookeeper/releases/download/v1.7.1/elasticsearch-zookeeper-1.7.1.zip -install zookeeper 42 | RUN git clone https://github.com/grmblfrz/elasticsearch-zookeeper.git /tmp/elasticsearch-zookeeper &&\ 43 | cd /tmp/elasticsearch-zookeeper &&\ 44 | mvn package -Dmaven.test.skip=true -Dzookeeper.version=3.4.6 &&\ 45 | plugin -v -u file:///elasticsearch-zookeeper/target/releases/elasticsearch-zookeeper-1.7.1.zip \ 46 | -i elasticsearch-zookeeper-1.7.1 &&\ 47 | rm -rf /tmp/elasticsearch-zookeeper &&\ 48 | # Install Marvel plugin 49 | plugin -v -i elasticsearch/marvel/latest &&\ 50 | # Install AWS Cloud plugin 51 | plugin -v -i elasticsearch/elasticsearch-cloud-aws/2.7.0 52 | 53 | VOLUME /usr/share/elasticsearch/data 54 | 55 | COPY run.py / 56 | COPY docker-entrypoint.sh / 57 | 58 | ENTRYPOINT ["/docker-entrypoint.sh"] 59 | 60 | # Java clients talk to the cluster over port 9300, using the native Elasticsearch transport protocol 61 | # All other languages can communicate with Elasticsearch over port 9200 using a RESTful API 62 | EXPOSE 9200 9300 63 | 64 | ############# Run this on host machine ############# 65 | # Elasticsearch also uses a mix of NioFS and MMapFS for the various files. Ensure that the maximum map count so that there is ample virtual memory available for mmapped files. 66 | # RUN sysctl -w vm.max_map_count=262144 67 | 68 | CMD ["elasticsearch"] -------------------------------------------------------------------------------- /elasticsearch/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Create log folder 4 | if [ ! -d "/var/log/docker/elasticsearch" ]; then 5 | mkdir -p /var/log/docker/elasticsearch 6 | chown -R elasticsearch:elasticsearch /var/log/docker/elasticsearch 7 | fi 8 | 9 | set -e 10 | 11 | # Generate configuration 12 | ./run.py 13 | 14 | # Require privileged 15 | # allow user running Elasticsearch to lock memory 16 | # to be able to set mlockall=true 17 | ulimit -l unlimited 18 | 19 | # Add elasticsearch as command if needed 20 | if [ "${1:0:1}" = '-' ]; then 21 | set -- elasticsearch "$@" 22 | fi 23 | 24 | # Drop root privileges if we are running elasticsearch 25 | if [ "$1" = 'elasticsearch' ]; then 26 | # Change the ownership of /usr/share/elasticsearch/data to elasticsearch 27 | chown -R elasticsearch:elasticsearch /usr/share/elasticsearch/data 28 | exec gosu elasticsearch "$@" 29 | fi 30 | 31 | # As argument is not related to elasticsearch, 32 | # then assume that user wants to run his own process, 33 | # for example a `bash` shell to explore this image 34 | exec "$@" -------------------------------------------------------------------------------- /elasticsearch/run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Inspire from SignalFuse, Inc. 4 | # https://github.com/signalfuse/docker-elasticsearch 5 | 6 | # Generate configs for ElasticSearch. 7 | # Requires python-yaml for configuration writing. 8 | 9 | import os 10 | import yaml 11 | import socket 12 | 13 | 14 | ELASTICSEARCH_CONFIG_FILE = '/usr/share/elasticsearch/config/elasticsearch.yml' 15 | ELASTICSEARCH_LOGGING_CONFIG = '/usr/share/elasticsearch/config/logging.yml' 16 | DEFAULT_ELASTICSEARCH_ZONE = 'ether' 17 | LOG_PATTERN = "%d{yyyy'-'MM'-'dd'T'HH:mm:ss.SSSXXX} %-5p [%-35.35t] [%-36.36c]: %m%n" 18 | 19 | def env_as_bool(k, default=True): 20 | if k in os.environ: 21 | v = os.environ[k].lower() 22 | return v == 'true' 23 | return default 24 | 25 | # Prepare the YAML configuration and write it. 26 | with open(ELASTICSEARCH_CONFIG_FILE, 'w+') as conf: 27 | data = { 28 | 'cluster': { 29 | 'name': os.environ.get('CLUSTER_NAME', 30 | '{}-elasticsearch'.format('local')), 31 | # configure the zone attribute as one of the awareness allocation attributes 32 | # a shard and its replica will not be allocated in the same zone 33 | # exception is when the cluster is left with less zone values than shard 34 | 'routing.allocation.awareness.attributes': 'zone', 35 | 36 | # Shard allocation, at any point in time only 2 shards are allowed to be moving 37 | 'routing.allocation.cluster_concurrent_rebalance': 2, 38 | 39 | # Shard allocation will take free disk space into account while allocating shards to a node 40 | # prevent shard allocation on nodes depending on disk usage 41 | 'routing.allocation.disk.threshold_enabled': True, 42 | 'routing.allocation.disk.watermark.low': '85%', 43 | 44 | # Relocate shards to another node if the node disk usage rises above 45 | 'routing.allocation.disk.watermark.high': '90%', 46 | }, 47 | 48 | # Node configuration 49 | 'node': { 50 | #'name': socket.gethostname(), 51 | 'name': os.environ.get('NODE_NAME'), 52 | 'zone': os.environ.get('ZONE_NAME', DEFAULT_ELASTICSEARCH_ZONE), 53 | # Allow this node to store data (enabled by default): 54 | 'data': env_as_bool('IS_DATA_NODE'), 55 | # If you want this node to never become a master node, only to hold data, set to false 56 | 'master': env_as_bool('IS_MASTER_NODE'), 57 | }, 58 | 59 | 'path': { 60 | # Can optionally include more than one location, favouring locations with most free space on creation 61 | 'data': os.environ.get('PATH_DATA', '/usr/share/elasticsearch/data').split(','), 62 | 'logs': '/var/log/docker/elasticsearch', 63 | }, 64 | 65 | 'gateway': { 66 | # Wait nodes to come online, the data would have been local and nothing would need to move 67 | # After the first node is up, begin recovering after 5 minutes 68 | 'recover_after_time': '5m', 69 | }, 70 | 71 | # Network and discovery. 72 | 'network': { 73 | # Set the address other nodes will use to communicate with this node. 74 | 'publish_host': os.environ.get('CONTAINER_HOST_ADDRESS'), 75 | }, 76 | 77 | # Index/replica configuration 78 | 'index': { 79 | 'number_of_replicas': int(os.environ.get('NUM_INDEX_REPLICAS', 1)), 80 | 'number_of_shards': int(os.environ.get('NUM_INDEX_SHARDS', 5)), 81 | 82 | # SlowQueries log 83 | 'search.slowlog.threshold.query.warn': '10s', 84 | 'search.slowlog.threshold.query.info': '5s', 85 | 'search.slowlog.threshold.query.debug': '2s', 86 | 'search.slowlog.threshold.query.trace': '500ms', 87 | 88 | 'search.slowlog.threshold.fetch.warn': '1s', 89 | 'search.slowlog.threshold.fetch.info': '800ms', 90 | 'search.slowlog.threshold.fetch.debug': '500ms', 91 | 'search.slowlog.threshold.fetch.trace': '200ms', 92 | 93 | 'indexing.slowlog.threshold.index.warn': '10s', 94 | 'indexing.slowlog.threshold.index.info': '5s', 95 | 'indexing.slowlog.threshold.index.debug': '2s', 96 | 'indexing.slowlog.threshold.index.trace': '500ms', 97 | }, 98 | 99 | # port for the node to node communication (9300 by default): 100 | 'transport.tcp.port': os.environ.get('TRANSPORT_TCP_PORT', 9300), 101 | 102 | 'http': { 103 | # Enable REST API 104 | # All other languages can communicate with Elasticsearch 105 | 'enabled': env_as_bool('HTTP_ENABLED'), 106 | 'port': os.environ.get('HTTP_PORT', 9200), 107 | }, 108 | 109 | 'discovery': { 110 | 'type': 'zen', 111 | 112 | # Disable multicast 113 | 'zen.ping.multicast.enabled': False, 114 | 115 | 'zen.ping.unicast.hosts': os.environ.get('UNICAST_HOSTS', []).split(','), 116 | 117 | # Fault detection settings 118 | 'zen.fd.ping_interval': '15s', 119 | 'zen.fd.ping_timeout': '30s', 120 | 'zen.fd.ping_retries': 5, 121 | }, 122 | 123 | # Marvel plugin configuration. 124 | 'marvel': { 125 | 'agent': { 126 | # enable Marvel plugin agent reporting from this instance (Disable on the monitoring instance, avoid collecting data from the monitoring instance itself) 127 | 'enabled': env_as_bool('MARVEL_ENABLED'), 128 | 129 | # TODO: improve this, ideally we want to figure this out 130 | # automatically. 131 | # Tell each node where to send its stats 132 | # Can have multiple monitoring instances 133 | # Data will be sent to the first host, but will failover to the next host(s) if the first is not reachable 134 | 'exporter.es.hosts': \ 135 | os.environ.get('MARVEL_TARGETS', 'localhost:9200').split(','), 136 | }, 137 | }, 138 | 139 | # AWS Cloud plugin configuration. 140 | 'cloud': { 141 | 'aws': { 142 | 'region': os.environ.get('ZONE_NAME', DEFAULT_ELASTICSEARCH_ZONE), 143 | }, 144 | }, 145 | 146 | ################## OPTIMIZATION ################## 147 | 148 | # Disable dynamic scripting and prevent remote code execution 149 | # watch here (http://bouk.co/blog/elasticsearch-rce/) 150 | # 'script.disable_dynamic': True, 151 | 152 | # New version of Elasticsearch support fine-grained 153 | # Fine-grained script settings (separate inline and indexed script) 154 | 'script': { 155 | 'inline': 'off', 156 | 'indexed': 'off', 157 | }, 158 | 159 | 'bootstrap': { 160 | # Lock the process address space into RAM, 161 | # preventing any Elasticsearch memory from being swapped out 162 | 'mlockall': True, 163 | }, 164 | 165 | 'action': { 166 | # disable allowing to delete indices via wildcards or _all 167 | 'destructive_requires_name': True, 168 | }, 169 | 170 | 'indices': { 171 | # data cache used mainly when sorting on or faceting on a field. 172 | # It loads all the field values to memory in order to provide fast document based access to those values. 173 | 'fielddata.cache.size': '25%', 174 | 175 | # make the index request more efficient 176 | 'cluster.send_refresh_mapping': False, 177 | }, 178 | } 179 | 180 | # using get will return 'None' if a key is not present rather than raise a 'KeyError' 181 | if os.environ.get('AWS_ACCESS_KEY') and os.environ.get('AWS_SECRET_KEY'): 182 | data['cloud']['aws'].update({ 183 | 'access_key': os.environ['AWS_ACCESS_KEY'], 184 | 'secret_key': os.environ['AWS_SECRET_KEY'], 185 | }) 186 | 187 | yaml.dump(data, conf, default_flow_style=False) 188 | 189 | # Setup the logging configuration 190 | with open(ELASTICSEARCH_LOGGING_CONFIG, 'w+') as conf: 191 | yaml.dump({ 192 | 'es.logger.level': 'INFO', 193 | 194 | # output messages into a rolling log file 195 | 'rootLogger': '${es.logger.level},R', 196 | 'logger': { 197 | # log action execution errors 198 | 'action': 'DEBUG', 199 | 200 | # reduce the logging for aws, too much is logged under the default INFO 201 | 'com.amazonaws': 'WARN', 202 | }, 203 | 'appender': { 204 | 'R': { 205 | 'type': 'rollingFile', 206 | 'File': '${path.logs}/%s.log' % 'elasticsearch', 207 | 'MaxFileSize': '100MB', 208 | 'MaxBackupIndex': '10', 209 | 'layout': { 210 | 'type': 'pattern', 211 | 'ConversionPattern': LOG_PATTERN, 212 | }, 213 | }, 214 | }, 215 | }, conf, default_flow_style=False) 216 | --------------------------------------------------------------------------------