├── handlers ├── main.yml └── restart_elasticsearch.yml ├── defaults └── main.yml ├── vars └── main.yml ├── templates ├── elasticsearch_repo.j2 └── elasticsearch.yml.j2 ├── README.md ├── tasks └── main.yml ├── LICENSE └── meta └── main.yml /handlers/main.yml: -------------------------------------------------------------------------------- 1 | - include: restart_elasticsearch.yml 2 | -------------------------------------------------------------------------------- /defaults/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # defaults file for include.elasticsearch 3 | -------------------------------------------------------------------------------- /handlers/restart_elasticsearch.yml: -------------------------------------------------------------------------------- 1 | - name: restart elasticsearch 2 | service: name=elasticsearch state=restarted enabled=yes 3 | -------------------------------------------------------------------------------- /vars/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | elasticsearch_version: 1.0 4 | elasticsearch_conf_dir: /usr/local/etc/elasticsearch 5 | elasticsearch_conf_file: elasticsearch.yml 6 | 7 | openjdk_version: 1.7.0 -------------------------------------------------------------------------------- /templates/elasticsearch_repo.j2: -------------------------------------------------------------------------------- 1 | # {{ ansible_managed }} 2 | 3 | [elasticsearch-{{ elasticsearch_version }}] 4 | name=Elasticsearch repository for {{ elasticsearch_version }}.x packages 5 | baseurl=http://packages.elasticsearch.org/elasticsearch/{{ elasticsearch_version }}/centos 6 | gpgcheck=1 7 | gpgkey=http://packages.elasticsearch.org/GPG-KEY-elasticsearch 8 | enabled=1 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Tiny Elasticsearch setup on CentOS 2 | ================================== 3 | 4 | This tiny Elasticsearch setup is nothing more than this... install, configure and run elasticsearch. 5 | 6 | Requirements 7 | ------------ 8 | 9 | None. 10 | 11 | Role Variables 12 | -------------- 13 | 14 | Change variables under `vars/main.yml`. All variables used in this role are mentioned here. 15 | 16 | Dependencies 17 | ------------ 18 | 19 | This role installs OpenJDK 1.7.0 if not present. 20 | 21 | Example Playbook 22 | ---------------- 23 | 24 | - hosts: servers 25 | roles: 26 | - { role: include.elasticsearch } 27 | 28 | License 29 | ------- 30 | 31 | BSD 32 | 33 | Author Information 34 | ------------------ 35 | 36 | Francisco Cabrita - -------------------------------------------------------------------------------- /tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # SETUP YUM REPO 3 | - name: ELASTICSEARCH | add elasticsearch repo key 4 | rpm_key: key="http://packages.elasticsearch.org/GPG-KEY-elasticsearch" state=present 5 | tags: elasticsearch 6 | 7 | - name: ELASTICSEARCH | enable repository 8 | template: src=elasticsearch_repo.j2 dest=/etc/yum.repos.d/elasticsearch.repo 9 | tags: elasticsearch 10 | 11 | 12 | # JAVA 13 | - name: ELASTICSEARCH | install OpenJDK 14 | yum: name="java-{{ openjdk_version }}-openjdk" state=present 15 | tags: 16 | - openjdk 17 | - elasticsearch 18 | 19 | 20 | # ELASTICSEARCH 21 | - name: ELASTICSEARCH | install 22 | yum: name={{ item }} state=latest 23 | with_items: 24 | - elasticsearch 25 | notify: 26 | - restart elasticsearch 27 | tags: elasticsearch 28 | 29 | - name: ELASTICSEARCH | mkdir config folder 30 | file: path={{ elasticsearch_conf_dir }} state=directory owner=root group=root mode=0644 31 | tags: elasticsearch 32 | 33 | - name: ELASTICSEARCH | configuration 34 | template: src=elasticsearch.yml.j2 dest={{ elasticsearch_conf_dir }}/{{ elasticsearch_conf_file }} owner=root group=root mode=0644 35 | notify: 36 | - restart elasticsearch 37 | tags: elasticsearch 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Francisco Cabrita 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of the {organization} nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | -------------------------------------------------------------------------------- /meta/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | galaxy_info: 3 | author: Francisco Cabrita 4 | description: This role intends to provide simple elasticsearch setup. 5 | # company: your company (optional) 6 | # Some suggested licenses: 7 | # - BSD (default) 8 | # - MIT 9 | # - GPLv2 10 | # - GPLv3 11 | # - Apache 12 | # - CC-BY 13 | license: BSD 14 | min_ansible_version: 1.2 15 | # 16 | # Below are all platforms currently available. Just uncomment 17 | # the ones that apply to your role. If you don't see your 18 | # platform on this list, let us know and we'll get it added! 19 | # 20 | platforms: 21 | - name: EL 22 | versions: 23 | # - all 24 | # - 5 25 | - 6 26 | - 7 27 | #- name: GenericUNIX 28 | # versions: 29 | # - all 30 | # - any 31 | #- name: Fedora 32 | # versions: 33 | # - all 34 | # - 16 35 | # - 17 36 | # - 18 37 | # - 19 38 | # - 20 39 | #- name: opensuse 40 | # versions: 41 | # - all 42 | # - 12.1 43 | # - 12.2 44 | # - 12.3 45 | # - 13.1 46 | # - 13.2 47 | #- name: Amazon 48 | # versions: 49 | # - all 50 | # - 2013.03 51 | # - 2013.09 52 | #- name: GenericBSD 53 | # versions: 54 | # - all 55 | # - any 56 | #- name: FreeBSD 57 | # versions: 58 | # - all 59 | # - 8.0 60 | # - 8.1 61 | # - 8.2 62 | # - 8.3 63 | # - 8.4 64 | # - 9.0 65 | # - 9.1 66 | # - 9.1 67 | # - 9.2 68 | #- name: Ubuntu 69 | # versions: 70 | # - all 71 | # - lucid 72 | # - maverick 73 | # - natty 74 | # - oneiric 75 | # - precise 76 | # - quantal 77 | # - raring 78 | # - saucy 79 | # - trusty 80 | #- name: SLES 81 | # versions: 82 | # - all 83 | # - 10SP3 84 | # - 10SP4 85 | # - 11 86 | # - 11SP1 87 | # - 11SP2 88 | # - 11SP3 89 | #- name: GenericLinux 90 | # versions: 91 | # - all 92 | # - any 93 | #- name: Debian 94 | # versions: 95 | # - all 96 | # - etch 97 | # - lenny 98 | # - squeeze 99 | # - wheezy 100 | # 101 | # Below are all categories currently available. Just as with 102 | # the platforms above, uncomment those that apply to your role. 103 | # 104 | categories: 105 | #- cloud 106 | #- cloud:ec2 107 | #- cloud:gce 108 | #- cloud:rax 109 | #- clustering 110 | - database 111 | #- database:nosql 112 | #- database:sql 113 | #- development 114 | - monitoring 115 | #- networking 116 | #- packaging 117 | - system 118 | - web 119 | dependencies: [] 120 | # List your role dependencies here, one per line. Only 121 | # dependencies available via galaxy should be listed here. 122 | # Be sure to remove the '[]' above if you add dependencies 123 | # to this list. 124 | 125 | -------------------------------------------------------------------------------- /templates/elasticsearch.yml.j2: -------------------------------------------------------------------------------- 1 | # {{ ansible_managed }} 2 | 3 | ##################### Elasticsearch Configuration Example ##################### 4 | 5 | # This file contains an overview of various configuration settings, 6 | # targeted at operations staff. Application developers should 7 | # consult the guide at . 8 | # 9 | # The installation procedure is covered at 10 | # . 11 | # 12 | # Elasticsearch comes with reasonable defaults for most settings, 13 | # so you can try it out without bothering with configuration. 14 | # 15 | # Most of the time, these defaults are just fine for running a production 16 | # cluster. If you're fine-tuning your cluster, or wondering about the 17 | # effect of certain configuration option, please _do ask_ on the 18 | # mailing list or IRC channel [http://elasticsearch.org/community]. 19 | 20 | # Any element in the configuration can be replaced with environment variables 21 | # by placing them in ${...} notation. For example: 22 | # 23 | # node.rack: ${RACK_ENV_VAR} 24 | 25 | # For information on supported formats and syntax for the config file, see 26 | # 27 | 28 | script.disable_dynamic: true # See http://bouk.co/blog/elasticsearch-rce/ # http://www.cve.mitre.org/cgi-bin/cvename.cgi?name=2014-3120 29 | 30 | 31 | ################################### Cluster ################################### 32 | 33 | # Cluster name identifies your cluster for auto-discovery. If you're running 34 | # multiple clusters on the same network, make sure you're using unique names. 35 | # 36 | # cluster.name: elasticsearch 37 | 38 | 39 | #################################### Node ##################################### 40 | 41 | # Node names are generated dynamically on startup, so you're relieved 42 | # from configuring them manually. You can tie this node to a specific name: 43 | # 44 | # node.name: "Franz Kafka" 45 | 46 | # Every node can be configured to allow or deny being eligible as the master, 47 | # and to allow or deny to store the data. 48 | # 49 | # Allow this node to be eligible as a master node (enabled by default): 50 | # 51 | # node.master: true 52 | # 53 | # Allow this node to store data (enabled by default): 54 | # 55 | # node.data: true 56 | 57 | # You can exploit these settings to design advanced cluster topologies. 58 | # 59 | # 1. You want this node to never become a master node, only to hold data. 60 | # This will be the "workhorse" of your cluster. 61 | # 62 | # node.master: false 63 | # node.data: true 64 | # 65 | # 2. You want this node to only serve as a master: to not store any data and 66 | # to have free resources. This will be the "coordinator" of your cluster. 67 | # 68 | # node.master: true 69 | # node.data: false 70 | # 71 | # 3. You want this node to be neither master nor data node, but 72 | # to act as a "search load balancer" (fetching data from nodes, 73 | # aggregating results, etc.) 74 | # 75 | # node.master: false 76 | # node.data: false 77 | 78 | # Use the Cluster Health API [http://localhost:9200/_cluster/health], the 79 | # Node Info API [http://localhost:9200/_nodes] or GUI tools 80 | # such as , 81 | # , 82 | # and 83 | # to inspect the cluster state. 84 | 85 | # A node can have generic attributes associated with it, which can later be used 86 | # for customized shard allocation filtering, or allocation awareness. An attribute 87 | # is a simple key value pair, similar to node.key: value, here is an example: 88 | # 89 | # node.rack: rack314 90 | 91 | # By default, multiple nodes are allowed to start from the same installation location 92 | # to disable it, set the following: 93 | # node.max_local_storage_nodes: 1 94 | 95 | 96 | #################################### Index #################################### 97 | 98 | # You can set a number of options (such as shard/replica options, mapping 99 | # or analyzer definitions, translog settings, ...) for indices globally, 100 | # in this file. 101 | # 102 | # Note, that it makes more sense to configure index settings specifically for 103 | # a certain index, either when creating it or by using the index templates API. 104 | # 105 | # See and 106 | # 107 | # for more information. 108 | 109 | # Set the number of shards (splits) of an index (5 by default): 110 | # 111 | # index.number_of_shards: 5 112 | 113 | # Set the number of replicas (additional copies) of an index (1 by default): 114 | # 115 | # index.number_of_replicas: 1 116 | 117 | # Note, that for development on a local machine, with small indices, it usually 118 | # makes sense to "disable" the distributed features: 119 | # 120 | # index.number_of_shards: 1 121 | # index.number_of_replicas: 0 122 | 123 | # These settings directly affect the performance of index and search operations 124 | # in your cluster. Assuming you have enough machines to hold shards and 125 | # replicas, the rule of thumb is: 126 | # 127 | # 1. Having more *shards* enhances the _indexing_ performance and allows to 128 | # _distribute_ a big index across machines. 129 | # 2. Having more *replicas* enhances the _search_ performance and improves the 130 | # cluster _availability_. 131 | # 132 | # The "number_of_shards" is a one-time setting for an index. 133 | # 134 | # The "number_of_replicas" can be increased or decreased anytime, 135 | # by using the Index Update Settings API. 136 | # 137 | # Elasticsearch takes care about load balancing, relocating, gathering the 138 | # results from nodes, etc. Experiment with different settings to fine-tune 139 | # your setup. 140 | 141 | # Use the Index Status API () to inspect 142 | # the index status. 143 | 144 | 145 | #################################### Paths #################################### 146 | 147 | # Path to directory containing configuration (this file and logging.yml): 148 | # 149 | # path.conf: /path/to/conf 150 | 151 | # Path to directory where to store index data allocated for this node. 152 | # 153 | # path.data: /path/to/data 154 | # 155 | # Can optionally include more than one location, causing data to be striped across 156 | # the locations (a la RAID 0) on a file level, favouring locations with most free 157 | # space on creation. For example: 158 | # 159 | # path.data: /path/to/data1,/path/to/data2 160 | 161 | # Path to temporary files: 162 | # 163 | # path.work: /path/to/work 164 | 165 | # Path to log files: 166 | # 167 | # path.logs: /path/to/logs 168 | 169 | # Path to where plugins are installed: 170 | # 171 | # path.plugins: /path/to/plugins 172 | 173 | 174 | #################################### Plugin ################################### 175 | 176 | # If a plugin listed here is not installed for current node, the node will not start. 177 | # 178 | # plugin.mandatory: mapper-attachments,lang-groovy 179 | 180 | 181 | ################################### Memory #################################### 182 | 183 | # Elasticsearch performs poorly when JVM starts swapping: you should ensure that 184 | # it _never_ swaps. 185 | # 186 | # Set this property to true to lock the memory: 187 | # 188 | # bootstrap.mlockall: true 189 | 190 | # Make sure that the ES_MIN_MEM and ES_MAX_MEM environment variables are set 191 | # to the same value, and that the machine has enough memory to allocate 192 | # for Elasticsearch, leaving enough memory for the operating system itself. 193 | # 194 | # You should also make sure that the Elasticsearch process is allowed to lock 195 | # the memory, eg. by using `ulimit -l unlimited`. 196 | 197 | 198 | ############################## Network And HTTP ############################### 199 | 200 | # Elasticsearch, by default, binds itself to the 0.0.0.0 address, and listens 201 | # on port [9200-9300] for HTTP traffic and on port [9300-9400] for node-to-node 202 | # communication. (the range means that if the port is busy, it will automatically 203 | # try the next port). 204 | 205 | # Set the bind address specifically (IPv4 or IPv6): 206 | # 207 | # network.bind_host: 192.168.0.1 208 | 209 | # Set the address other nodes will use to communicate with this node. If not 210 | # set, it is automatically derived. It must point to an actual IP address. 211 | # 212 | # network.publish_host: 192.168.0.1 213 | 214 | # Set both 'bind_host' and 'publish_host': 215 | # 216 | # network.host: 192.168.0.1 217 | 218 | # Set a custom port for the node to node communication (9300 by default): 219 | # 220 | # transport.tcp.port: 9300 221 | 222 | # Enable compression for all communication between nodes (disabled by default): 223 | # 224 | # transport.tcp.compress: true 225 | 226 | # Set a custom port to listen for HTTP traffic: 227 | # 228 | # http.port: 9200 229 | 230 | # Set a custom allowed content length: 231 | # 232 | # http.max_content_length: 100mb 233 | 234 | # Disable HTTP completely: 235 | # 236 | # http.enabled: false 237 | 238 | 239 | ################################### Gateway ################################### 240 | 241 | # The gateway allows for persisting the cluster state between full cluster 242 | # restarts. Every change to the state (such as adding an index) will be stored 243 | # in the gateway, and when the cluster starts up for the first time, 244 | # it will read its state from the gateway. 245 | 246 | # There are several types of gateway implementations. For more information, see 247 | # . 248 | 249 | # The default gateway type is the "local" gateway (recommended): 250 | # 251 | # gateway.type: local 252 | 253 | # Settings below control how and when to start the initial recovery process on 254 | # a full cluster restart (to reuse as much local data as possible when using shared 255 | # gateway). 256 | 257 | # Allow recovery process after N nodes in a cluster are up: 258 | # 259 | # gateway.recover_after_nodes: 1 260 | 261 | # Set the timeout to initiate the recovery process, once the N nodes 262 | # from previous setting are up (accepts time value): 263 | # 264 | # gateway.recover_after_time: 5m 265 | 266 | # Set how many nodes are expected in this cluster. Once these N nodes 267 | # are up (and recover_after_nodes is met), begin recovery process immediately 268 | # (without waiting for recover_after_time to expire): 269 | # 270 | # gateway.expected_nodes: 2 271 | 272 | 273 | ############################# Recovery Throttling ############################# 274 | 275 | # These settings allow to control the process of shards allocation between 276 | # nodes during initial recovery, replica allocation, rebalancing, 277 | # or when adding and removing nodes. 278 | 279 | # Set the number of concurrent recoveries happening on a node: 280 | # 281 | # 1. During the initial recovery 282 | # 283 | # cluster.routing.allocation.node_initial_primaries_recoveries: 4 284 | # 285 | # 2. During adding/removing nodes, rebalancing, etc 286 | # 287 | # cluster.routing.allocation.node_concurrent_recoveries: 2 288 | 289 | # Set to throttle throughput when recovering (eg. 100mb, by default 20mb): 290 | # 291 | # indices.recovery.max_bytes_per_sec: 20mb 292 | 293 | # Set to limit the number of open concurrent streams when 294 | # recovering a shard from a peer: 295 | # 296 | # indices.recovery.concurrent_streams: 5 297 | 298 | 299 | ################################## Discovery ################################## 300 | 301 | # Discovery infrastructure ensures nodes can be found within a cluster 302 | # and master node is elected. Multicast discovery is the default. 303 | 304 | # Set to ensure a node sees N other master eligible nodes to be considered 305 | # operational within the cluster. Its recommended to set it to a higher value 306 | # than 1 when running more than 2 nodes in the cluster. 307 | # 308 | # discovery.zen.minimum_master_nodes: 1 309 | 310 | # Set the time to wait for ping responses from other nodes when discovering. 311 | # Set this option to a higher value on a slow or congested network 312 | # to minimize discovery failures: 313 | # 314 | # discovery.zen.ping.timeout: 3s 315 | 316 | # For more information, see 317 | # 318 | 319 | # Unicast discovery allows to explicitly control which nodes will be used 320 | # to discover the cluster. It can be used when multicast is not present, 321 | # or to restrict the cluster communication-wise. 322 | # 323 | # 1. Disable multicast discovery (enabled by default): 324 | # 325 | # discovery.zen.ping.multicast.enabled: false 326 | # 327 | # 2. Configure an initial list of master nodes in the cluster 328 | # to perform discovery when new nodes (master or data) are started: 329 | # 330 | # discovery.zen.ping.unicast.hosts: ["host1", "host2:port"] 331 | 332 | # EC2 discovery allows to use AWS EC2 API in order to perform discovery. 333 | # 334 | # You have to install the cloud-aws plugin for enabling the EC2 discovery. 335 | # 336 | # For more information, see 337 | # 338 | # 339 | # See 340 | # for a step-by-step tutorial. 341 | 342 | # GCE discovery allows to use Google Compute Engine API in order to perform discovery. 343 | # 344 | # You have to install the cloud-gce plugin for enabling the GCE discovery. 345 | # 346 | # For more information, see . 347 | 348 | # Azure discovery allows to use Azure API in order to perform discovery. 349 | # 350 | # You have to install the cloud-azure plugin for enabling the Azure discovery. 351 | # 352 | # For more information, see . 353 | 354 | ################################## Slow Log ################################## 355 | 356 | # Shard level query and fetch threshold logging. 357 | 358 | #index.search.slowlog.threshold.query.warn: 10s 359 | #index.search.slowlog.threshold.query.info: 5s 360 | #index.search.slowlog.threshold.query.debug: 2s 361 | #index.search.slowlog.threshold.query.trace: 500ms 362 | 363 | #index.search.slowlog.threshold.fetch.warn: 1s 364 | #index.search.slowlog.threshold.fetch.info: 800ms 365 | #index.search.slowlog.threshold.fetch.debug: 500ms 366 | #index.search.slowlog.threshold.fetch.trace: 200ms 367 | 368 | #index.indexing.slowlog.threshold.index.warn: 10s 369 | #index.indexing.slowlog.threshold.index.info: 5s 370 | #index.indexing.slowlog.threshold.index.debug: 2s 371 | #index.indexing.slowlog.threshold.index.trace: 500ms 372 | 373 | ################################## GC Logging ################################ 374 | 375 | #monitor.jvm.gc.young.warn: 1000ms 376 | #monitor.jvm.gc.young.info: 700ms 377 | #monitor.jvm.gc.young.debug: 400ms 378 | 379 | #monitor.jvm.gc.old.warn: 10s 380 | #monitor.jvm.gc.old.info: 5s 381 | #monitor.jvm.gc.old.debug: 2s 382 | --------------------------------------------------------------------------------