├── .gitignore ├── LICENSE ├── README.md ├── defaults └── main.yml ├── files └── content_mapping.json ├── handlers └── main.yml ├── meta └── main.yml ├── tasks └── main.yml ├── templates └── elasticsearch.yml.j2 └── vars └── main.yml /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | ._* 3 | .ideas 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Servers For Hackers 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Elasticsearch 2 | ========= 3 | 4 | Install Elasticsearch for Servers for Hackers 5 | 6 | Requirements 7 | ------------ 8 | 9 | Python makes URL calls, and requires the installation of `PIP` and `httplib2`. This is done within the tasks `main.yml` file. 10 | 11 | Role Variables 12 | -------------- 13 | 14 | * **cluster**: sfhsearch 15 | * **node**: sfhnode 16 | * **num_replicas**: 1 17 | * **num_shards**: 5 18 | * **index_name**: serversforhackers 19 | 20 | > This currently assumes a cluster of 1 node. 21 | 22 | License 23 | ------- 24 | 25 | MIT -------------------------------------------------------------------------------- /defaults/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # defaults file for elasticsearch 3 | -------------------------------------------------------------------------------- /files/content_mapping.json: -------------------------------------------------------------------------------- 1 | { 2 | "mappings" : { 3 | "content" : { 4 | "properties" : { 5 | "content" : { 6 | "type" : "string" 7 | }, 8 | "excerpt" : { 9 | "type" : "string" 10 | }, 11 | "published" : { 12 | "type" : "date", 13 | "index" : "not_analyzed" 14 | }, 15 | "rundown" : { 16 | "type" : "string" 17 | }, 18 | "series" : { 19 | "type" : "object", 20 | "properties" : { 21 | "name" : {"type" : "string", "index" : "not_analyzed"}, 22 | "slug" : {"type" : "string", "index" : "not_analyzed"} 23 | } 24 | }, 25 | "tags" : { 26 | "type" : "object", 27 | "properties" : { 28 | "tag" : {"type" : "string"}, 29 | "slug" : {"type" : "string", "index" : "not_analyzed"} 30 | }, 31 | "boost": 1.6 32 | }, 33 | "keywords" : { 34 | "type" : "string", 35 | "boost": 1.3 36 | }, 37 | "title" : { 38 | "type" : "string", 39 | "boost": 2.0 40 | }, 41 | "slug" : { 42 | "type" : "string", 43 | "index" : "not_analyzed" 44 | }, 45 | "content_type" : { 46 | "type" : "string", 47 | "index" : "not_analyzed" 48 | } 49 | } 50 | } 51 | } 52 | } -------------------------------------------------------------------------------- /handlers/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Start Elasticsearch 3 | service: name=elasticsearch state=start 4 | 5 | - name: Restart Elasticsearch 6 | service: name=elasticsearch state=restarted -------------------------------------------------------------------------------- /meta/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | galaxy_info: 3 | author: Chris Fidao 4 | description: Elasticsearch for Serversforhackers.com 5 | company: Fideloper LLC 6 | # Some suggested licenses: 7 | # - BSD (default) 8 | # - MIT 9 | # - GPLv2 10 | # - GPLv3 11 | # - Apache 12 | # - CC-BY 13 | license: MIT 14 | min_ansible_version: 1.2 15 | # 16 | # Below are all platforms currently available. Just uncomment 17 | # the ones that apply to your role. If you don't see your 18 | # platform on this list, let us know and we'll get it added! 19 | # 20 | #platforms: 21 | #- name: EL 22 | # versions: 23 | # - all 24 | # - 5 25 | # - 6 26 | # - 7 27 | #- name: GenericUNIX 28 | # versions: 29 | # - all 30 | # - any 31 | #- name: Fedora 32 | # versions: 33 | # - all 34 | # - 16 35 | # - 17 36 | # - 18 37 | # - 19 38 | # - 20 39 | #- name: SmartOS 40 | # versions: 41 | # - all 42 | # - any 43 | #- name: opensuse 44 | # versions: 45 | # - all 46 | # - 12.1 47 | # - 12.2 48 | # - 12.3 49 | # - 13.1 50 | # - 13.2 51 | #- name: Amazon 52 | # versions: 53 | # - all 54 | # - 2013.03 55 | # - 2013.09 56 | #- name: GenericBSD 57 | # versions: 58 | # - all 59 | # - any 60 | #- name: FreeBSD 61 | # versions: 62 | # - all 63 | # - 8.0 64 | # - 8.1 65 | # - 8.2 66 | # - 8.3 67 | # - 8.4 68 | # - 9.0 69 | # - 9.1 70 | # - 9.1 71 | # - 9.2 72 | #- name: Ubuntu 73 | # versions: 74 | # - all 75 | # - lucid 76 | # - maverick 77 | # - natty 78 | # - oneiric 79 | # - precise 80 | # - quantal 81 | # - raring 82 | # - saucy 83 | # - trusty 84 | #- name: SLES 85 | # versions: 86 | # - all 87 | # - 10SP3 88 | # - 10SP4 89 | # - 11 90 | # - 11SP1 91 | # - 11SP2 92 | # - 11SP3 93 | #- name: GenericLinux 94 | # versions: 95 | # - all 96 | # - any 97 | #- name: Debian 98 | # versions: 99 | # - all 100 | # - etch 101 | # - lenny 102 | # - squeeze 103 | # - wheezy 104 | # 105 | # Below are all categories currently available. Just as with 106 | # the platforms above, uncomment those that apply to your role. 107 | # 108 | #categories: 109 | #- cloud 110 | #- cloud:ec2 111 | #- cloud:gce 112 | #- cloud:rax 113 | #- clustering 114 | #- database 115 | #- database:nosql 116 | #- database:sql 117 | #- development 118 | #- monitoring 119 | #- networking 120 | #- packaging 121 | #- system 122 | #- web 123 | dependencies: [] 124 | # List your role dependencies here, one per line. Only 125 | # dependencies available via galaxy should be listed here. 126 | # Be sure to remove the '[]' above if you add dependencies 127 | # to this list. 128 | 129 | -------------------------------------------------------------------------------- /tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Install PIP 3 | apt: pkg=python-pip state=installed update_cache=true 4 | 5 | - name: Install Python HTTPLib 6 | pip: name=httplib2 state=present 7 | 8 | - name: Install Java (OpenJDK) 9 | apt: pkg=openjdk-7-jdk state=installed update_cache=yes 10 | 11 | - name: Add Elasticsearch Key 12 | apt_key: url='http://packages.elasticsearch.org/GPG-KEY-elasticsearch' state=present 13 | 14 | - name: Add Elasticsearch Repository 15 | apt_repository: repo='deb http://packages.elasticsearch.org/elasticsearch/1.4/debian stable main' state=present 16 | 17 | - name: Install Elasticsearch 18 | apt: pkg=elasticsearch state=installed update_cache=yes 19 | 20 | # Run sudo update-rc.d elasticsearch defaults 95 10 21 | - name: Enable Elasticsearch (Start on boot) 22 | service: name=elasticsearch enabled=yes 23 | notify: 24 | - Start Elasticsearch 25 | 26 | - name: Configure Elasticsearch 27 | template: src=elasticsearch.yml.j2 dest=/etc/elasticsearch/elasticsearch.yml 28 | tags: 29 | - configs 30 | notify: 31 | - Restart Elasticsearch 32 | 33 | - name: Check If Index Exists 34 | uri: url='http://localhost:9200/{{ index_name }}' 35 | method=HEAD 36 | status_code=200,404 37 | register: index_status 38 | 39 | # - name: Create SFH Index 40 | # uri: url='http://localhost:9200/{{ index_name }}' 41 | # method=PUT 42 | # status_code=200 43 | # when: index_status.status == 404 44 | 45 | - name: Create SFH Index with Mapping 46 | uri: url='http://localhost:9200/{{ index_name }}' 47 | method=POST 48 | status_code=200 49 | body='{{ lookup('file','content_mapping.json') }}' 50 | HEADER_Content-Type="application/json" 51 | when: index_status.status == 404 -------------------------------------------------------------------------------- /templates/elasticsearch.yml.j2: -------------------------------------------------------------------------------- 1 | ##################### Elasticsearch Configuration Example ##################### 2 | 3 | # This file contains an overview of various configuration settings, 4 | # targeted at operations staff. Application developers should 5 | # consult the guide at . 6 | # 7 | # The installation procedure is covered at 8 | # . 9 | # 10 | # Elasticsearch comes with reasonable defaults for most settings, 11 | # so you can try it out without bothering with configuration. 12 | # 13 | # Most of the time, these defaults are just fine for running a production 14 | # cluster. If you're fine-tuning your cluster, or wondering about the 15 | # effect of certain configuration option, please _do ask_ on the 16 | # mailing list or IRC channel [http://elasticsearch.org/community]. 17 | 18 | # Any element in the configuration can be replaced with environment variables 19 | # by placing them in ${...} notation. For example: 20 | # 21 | #node.rack: ${RACK_ENV_VAR} 22 | 23 | # For information on supported formats and syntax for the config file, see 24 | # 25 | 26 | 27 | ################################### Cluster ################################### 28 | 29 | # Cluster name identifies your cluster for auto-discovery. If you're running 30 | # multiple clusters on the same network, make sure you're using unique names. 31 | # 32 | cluster.name: {{ cluster }} 33 | 34 | 35 | #################################### Node ##################################### 36 | 37 | # Node names are generated dynamically on startup, so you're relieved 38 | # from configuring them manually. You can tie this node to a specific name: 39 | # 40 | node.name: "{{ node }}" 41 | 42 | # Every node can be configured to allow or deny being eligible as the master, 43 | # and to allow or deny to store the data. 44 | # 45 | # Allow this node to be eligible as a master node (enabled by default): 46 | # 47 | #node.master: true 48 | # 49 | # Allow this node to store data (enabled by default): 50 | # 51 | #node.data: true 52 | 53 | # You can exploit these settings to design advanced cluster topologies. 54 | # 55 | # 1. You want this node to never become a master node, only to hold data. 56 | # This will be the "workhorse" of your cluster. 57 | # 58 | #node.master: false 59 | #node.data: true 60 | # 61 | # 2. You want this node to only serve as a master: to not store any data and 62 | # to have free resources. This will be the "coordinator" of your cluster. 63 | # 64 | #node.master: true 65 | #node.data: false 66 | # 67 | # 3. You want this node to be neither master nor data node, but 68 | # to act as a "search load balancer" (fetching data from nodes, 69 | # aggregating results, etc.) 70 | # 71 | #node.master: false 72 | #node.data: false 73 | 74 | # Use the Cluster Health API [http://localhost:9200/_cluster/health], the 75 | # Node Info API [http://localhost:9200/_nodes] or GUI tools 76 | # such as , 77 | # , 78 | # and 79 | # to inspect the cluster state. 80 | 81 | # A node can have generic attributes associated with it, which can later be used 82 | # for customized shard allocation filtering, or allocation awareness. An attribute 83 | # is a simple key value pair, similar to node.key: value, here is an example: 84 | # 85 | #node.rack: rack314 86 | 87 | # By default, multiple nodes are allowed to start from the same installation location 88 | # to disable it, set the following: 89 | #node.max_local_storage_nodes: 1 90 | 91 | 92 | #################################### Index #################################### 93 | 94 | # You can set a number of options (such as shard/replica options, mapping 95 | # or analyzer definitions, translog settings, ...) for indices globally, 96 | # in this file. 97 | # 98 | # Note, that it makes more sense to configure index settings specifically for 99 | # a certain index, either when creating it or by using the index templates API. 100 | # 101 | # See and 102 | # 103 | # for more information. 104 | 105 | # Set the number of shards (splits) of an index (5 by default): 106 | # 107 | index.number_of_shards: {{ num_shards }} 108 | 109 | # Set the number of replicas (additional copies) of an index (1 by default): 110 | # 111 | index.number_of_replicas: {{ num_replicas }} 112 | 113 | # Note, that for development on a local machine, with small indices, it usually 114 | # makes sense to "disable" the distributed features: 115 | # 116 | #index.number_of_shards: 1 117 | #index.number_of_replicas: 0 118 | 119 | # These settings directly affect the performance of index and search operations 120 | # in your cluster. Assuming you have enough machines to hold shards and 121 | # replicas, the rule of thumb is: 122 | # 123 | # 1. Having more *shards* enhances the _indexing_ performance and allows to 124 | # _distribute_ a big index across machines. 125 | # 2. Having more *replicas* enhances the _search_ performance and improves the 126 | # cluster _availability_. 127 | # 128 | # The "number_of_shards" is a one-time setting for an index. 129 | # 130 | # The "number_of_replicas" can be increased or decreased anytime, 131 | # by using the Index Update Settings API. 132 | # 133 | # Elasticsearch takes care about load balancing, relocating, gathering the 134 | # results from nodes, etc. Experiment with different settings to fine-tune 135 | # your setup. 136 | 137 | # Use the Index Status API () to inspect 138 | # the index status. 139 | 140 | 141 | #################################### Paths #################################### 142 | 143 | # Path to directory containing configuration (this file and logging.yml): 144 | # 145 | #path.conf: /path/to/conf 146 | 147 | # Path to directory where to store index data allocated for this node. 148 | # 149 | #path.data: /path/to/data 150 | # 151 | # Can optionally include more than one location, causing data to be striped across 152 | # the locations (a la RAID 0) on a file level, favouring locations with most free 153 | # space on creation. For example: 154 | # 155 | #path.data: /path/to/data1,/path/to/data2 156 | 157 | # Path to temporary files: 158 | # 159 | #path.work: /path/to/work 160 | 161 | # Path to log files: 162 | # 163 | #path.logs: /path/to/logs 164 | 165 | # Path to where plugins are installed: 166 | # 167 | #path.plugins: /path/to/plugins 168 | 169 | 170 | #################################### Plugin ################################### 171 | 172 | # If a plugin listed here is not installed for current node, the node will not start. 173 | # 174 | #plugin.mandatory: mapper-attachments,lang-groovy 175 | 176 | 177 | ################################### Memory #################################### 178 | 179 | # Elasticsearch performs poorly when JVM starts swapping: you should ensure that 180 | # it _never_ swaps. 181 | # 182 | # Set this property to true to lock the memory: 183 | # 184 | #bootstrap.mlockall: true 185 | 186 | # Make sure that the ES_MIN_MEM and ES_MAX_MEM environment variables are set 187 | # to the same value, and that the machine has enough memory to allocate 188 | # for Elasticsearch, leaving enough memory for the operating system itself. 189 | # 190 | # You should also make sure that the Elasticsearch process is allowed to lock 191 | # the memory, eg. by using `ulimit -l unlimited`. 192 | 193 | 194 | ############################## Network And HTTP ############################### 195 | 196 | # Elasticsearch, by default, binds itself to the 0.0.0.0 address, and listens 197 | # on port [9200-9300] for HTTP traffic and on port [9300-9400] for node-to-node 198 | # communication. (the range means that if the port is busy, it will automatically 199 | # try the next port). 200 | 201 | # Set the bind address specifically (IPv4 or IPv6): 202 | # 203 | #network.bind_host: 192.168.0.1 204 | 205 | # Set the address other nodes will use to communicate with this node. If not 206 | # set, it is automatically derived. It must point to an actual IP address. 207 | # 208 | #network.publish_host: 192.168.0.1 209 | 210 | # Set both 'bind_host' and 'publish_host': 211 | # 212 | #network.host: 192.168.0.1 213 | 214 | # Set a custom port for the node to node communication (9300 by default): 215 | # 216 | #transport.tcp.port: 9300 217 | 218 | # Enable compression for all communication between nodes (disabled by default): 219 | # 220 | #transport.tcp.compress: true 221 | 222 | # Set a custom port to listen for HTTP traffic: 223 | # 224 | #http.port: 9200 225 | 226 | # Set a custom allowed content length: 227 | # 228 | #http.max_content_length: 100mb 229 | 230 | # Disable HTTP completely: 231 | # 232 | #http.enabled: false 233 | 234 | 235 | ################################### Gateway ################################### 236 | 237 | # The gateway allows for persisting the cluster state between full cluster 238 | # restarts. Every change to the state (such as adding an index) will be stored 239 | # in the gateway, and when the cluster starts up for the first time, 240 | # it will read its state from the gateway. 241 | 242 | # There are several types of gateway implementations. For more information, see 243 | # . 244 | 245 | # The default gateway type is the "local" gateway (recommended): 246 | # 247 | #gateway.type: local 248 | 249 | # Settings below control how and when to start the initial recovery process on 250 | # a full cluster restart (to reuse as much local data as possible when using shared 251 | # gateway). 252 | 253 | # Allow recovery process after N nodes in a cluster are up: 254 | # 255 | #gateway.recover_after_nodes: 1 256 | 257 | # Set the timeout to initiate the recovery process, once the N nodes 258 | # from previous setting are up (accepts time value): 259 | # 260 | #gateway.recover_after_time: 5m 261 | 262 | # Set how many nodes are expected in this cluster. Once these N nodes 263 | # are up (and recover_after_nodes is met), begin recovery process immediately 264 | # (without waiting for recover_after_time to expire): 265 | # 266 | #gateway.expected_nodes: 2 267 | 268 | 269 | ############################# Recovery Throttling ############################# 270 | 271 | # These settings allow to control the process of shards allocation between 272 | # nodes during initial recovery, replica allocation, rebalancing, 273 | # or when adding and removing nodes. 274 | 275 | # Set the number of concurrent recoveries happening on a node: 276 | # 277 | # 1. During the initial recovery 278 | # 279 | #cluster.routing.allocation.node_initial_primaries_recoveries: 4 280 | # 281 | # 2. During adding/removing nodes, rebalancing, etc 282 | # 283 | #cluster.routing.allocation.node_concurrent_recoveries: 2 284 | 285 | # Set to throttle throughput when recovering (eg. 100mb, by default 20mb): 286 | # 287 | #indices.recovery.max_bytes_per_sec: 20mb 288 | 289 | # Set to limit the number of open concurrent streams when 290 | # recovering a shard from a peer: 291 | # 292 | #indices.recovery.concurrent_streams: 5 293 | 294 | 295 | ################################## Discovery ################################## 296 | 297 | # Discovery infrastructure ensures nodes can be found within a cluster 298 | # and master node is elected. Multicast discovery is the default. 299 | 300 | # Set to ensure a node sees N other master eligible nodes to be considered 301 | # operational within the cluster. This should be set to a quorum/majority of 302 | # the master-eligible nodes in the cluster. 303 | # 304 | #discovery.zen.minimum_master_nodes: 1 305 | 306 | # Set the time to wait for ping responses from other nodes when discovering. 307 | # Set this option to a higher value on a slow or congested network 308 | # to minimize discovery failures: 309 | # 310 | #discovery.zen.ping.timeout: 3s 311 | 312 | # For more information, see 313 | # 314 | 315 | # Unicast discovery allows to explicitly control which nodes will be used 316 | # to discover the cluster. It can be used when multicast is not present, 317 | # or to restrict the cluster communication-wise. 318 | # 319 | # 1. Disable multicast discovery (enabled by default): 320 | # 321 | #discovery.zen.ping.multicast.enabled: false 322 | # 323 | # 2. Configure an initial list of master nodes in the cluster 324 | # to perform discovery when new nodes (master or data) are started: 325 | # 326 | #discovery.zen.ping.unicast.hosts: ["host1", "host2:port"] 327 | 328 | # EC2 discovery allows to use AWS EC2 API in order to perform discovery. 329 | # 330 | # You have to install the cloud-aws plugin for enabling the EC2 discovery. 331 | # 332 | # For more information, see 333 | # 334 | # 335 | # See 336 | # for a step-by-step tutorial. 337 | 338 | # GCE discovery allows to use Google Compute Engine API in order to perform discovery. 339 | # 340 | # You have to install the cloud-gce plugin for enabling the GCE discovery. 341 | # 342 | # For more information, see . 343 | 344 | # Azure discovery allows to use Azure API in order to perform discovery. 345 | # 346 | # You have to install the cloud-azure plugin for enabling the Azure discovery. 347 | # 348 | # For more information, see . 349 | 350 | ################################## Slow Log ################################## 351 | 352 | # Shard level query and fetch threshold logging. 353 | 354 | #index.search.slowlog.threshold.query.warn: 10s 355 | #index.search.slowlog.threshold.query.info: 5s 356 | #index.search.slowlog.threshold.query.debug: 2s 357 | #index.search.slowlog.threshold.query.trace: 500ms 358 | 359 | #index.search.slowlog.threshold.fetch.warn: 1s 360 | #index.search.slowlog.threshold.fetch.info: 800ms 361 | #index.search.slowlog.threshold.fetch.debug: 500ms 362 | #index.search.slowlog.threshold.fetch.trace: 200ms 363 | 364 | #index.indexing.slowlog.threshold.index.warn: 10s 365 | #index.indexing.slowlog.threshold.index.info: 5s 366 | #index.indexing.slowlog.threshold.index.debug: 2s 367 | #index.indexing.slowlog.threshold.index.trace: 500ms 368 | 369 | ################################## GC Logging ################################ 370 | 371 | #monitor.jvm.gc.young.warn: 1000ms 372 | #monitor.jvm.gc.young.info: 700ms 373 | #monitor.jvm.gc.young.debug: 400ms 374 | 375 | #monitor.jvm.gc.old.warn: 10s 376 | #monitor.jvm.gc.old.info: 5s 377 | #monitor.jvm.gc.old.debug: 2s 378 | 379 | ################################## Security ################################ 380 | 381 | # Uncomment if you want to enable JSONP as a valid return transport on the 382 | # http server. With this enabled, it may pose a security risk, so disabling 383 | # it unless you need it is recommended (it is disabled by default). 384 | # 385 | #http.jsonp.enable: true 386 | -------------------------------------------------------------------------------- /vars/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | cluster: sfhsearch 3 | node: sfhnode 4 | num_replicas: 1 5 | num_shards: 5 6 | index_name: serversforhackers --------------------------------------------------------------------------------