├── README.md
├── Vagrantfile
├── playbook.yml
└── roles
    └── elasticsearch
        ├── files
            └── elasticsearch.yml
        ├── handlers
            └── main.yml
        └── tasks
            └── main.yml


/README.md:
--------------------------------------------------------------------------------
  1 | # Elasticsearch on Vagrant
  2 | precise64 (Ubuntu 12.04 LTS)
  3 | 
  4 | This is a Vagrant machine provisioned with Ansible.
  5 | It is made for development rather than deployment, a couple of very useful plugins
  6 | has been included to aid you in your development.
  7 | 
  8 | ## Instructions
  9 | 
 10 | * Install [Vagrant](http://www.vagrantup.com/)
 11 | * Install [Ansible](http://docs.ansible.com/intro_installation.html)
 12 | * Clone this repo
 13 | * Run `vagrant up` from inside the repository directory
 14 | 
 15 | You should now have a `elasticsearch` server up and running and reachable 
 16 | from your host machine on
 17 | 
 18 |     http://localhost:9200
 19 | 
 20 | ## Included Plugins
 21 | 
 22 | ### Inquisitor
 23 | [polyfractal/elasticsearch-inquisitor](https://github.com/polyfractal/elasticsearch-inquisitor)
 24 | 
 25 | [http://localhost:9200/_plugin/inquisitor/](http://localhost:9200/_plugin/inquisitor/)
 26 | 
 27 | ### Paramedic
 28 | [karmi/elasticsearch-paramedic](https://github.com/karmi/elasticsearch-paramedic)
 29 | 
 30 | [http://localhost:9200/_plugin/paramedic/index.html](http://localhost:9200/_plugin/paramedic/index.html)
 31 | 
 32 | ### Bigdesk
 33 | [lukas-vlcek/bigdesk](https://github.com/lukas-vlcek/bigdesk)
 34 | 
 35 | [http://localhost:9200/_plugin/bigdesk/](http://localhost:9200/_plugin/bigdesk/)
 36 | 
 37 | ### Head
 38 | [mobz/elasticsearch-head](https://github.com/mobz/elasticsearch-head)
 39 | 
 40 | [http://localhost:9200/_plugin/head/](http://localhost:9200/_plugin/head/)
 41 | 
 42 | ---
 43 | 
 44 | You can install your own plugins with the standard elasticsearch plugin executable:
 45 | 
 46 |     $ sudo /usr/share/elasticsearch/bin/plugin -install elasticsearch/elasticsearch-analysis-icu/2.1.0
 47 | 
 48 | ## License
 49 | 
 50 | MIT
 51 | 
 52 | ---
 53 | 
 54 |     $ vagrant up
 55 |     Bringing machine 'default' up with 'virtualbox' provider...
 56 |     [default] Importing base box 'precise64'...
 57 |     [default] Matching MAC address for NAT networking...
 58 |     [default] Setting the name of the VM...
 59 |     [default] Clearing any previously set forwarded ports...
 60 |     [default] Clearing any previously set network interfaces...
 61 |     [default] Preparing network interfaces based on configuration...
 62 |     [default] Forwarding ports...
 63 |     [default] -- 22 => 2222 (adapter 1)
 64 |     [default] -- 9200 => 9200 (adapter 1)
 65 |     [default] Booting VM...
 66 |     [default] Waiting for machine to boot. This may take a few minutes...
 67 |     [default] Machine booted and ready!
 68 |     [default] Setting hostname...
 69 |     [default] Mounting shared folders...
 70 |     [default] -- /vagrant
 71 |     [default] Running provisioner: ansible...
 72 | 
 73 |     PLAY [all] ********************************************************************
 74 | 
 75 |     GATHERING FACTS ***************************************************************
 76 |     ok: [default]
 77 | 
 78 |     TASK: [elasticsearch | Ensure python-pycurl is installed] *********************
 79 |     changed: [default]
 80 | 
 81 |     TASK: [elasticsearch | Ensure Elasticsearch apt signing key is installed] *****
 82 |     changed: [default]
 83 | 
 84 |     TASK: [elasticsearch | Ensure Elasticsearch 1.1.x repository is installed] ***
 85 |     changed: [default]
 86 | 
 87 |     TASK: [elasticsearch | Ensure elasticsearch is installed] *********************
 88 |     changed: [default] => (item=openjdk-7-jre-headless,elasticsearch)
 89 | 
 90 |     TASK: [elasticsearch | Ensure elasticsearch config is in place] ***************
 91 |     changed: [default]
 92 | 
 93 |     TASK: [elasticsearch | Ensure head (Plugin) is installed] *********************
 94 |     changed: [default]
 95 | 
 96 |     TASK: [elasticsearch | Ensure Paramedic (Plugin) is installed] ****************
 97 |     changed: [default]
 98 | 
 99 |     TASK: [elasticsearch | Ensure Bigdesk (Plugin) is installed] ******************
100 |     changed: [default]
101 | 
102 |     TASK: [elasticsearch | Ensure Inquisitor (Plugin) is installed] ***************
103 |     changed: [default]
104 | 
105 |     NOTIFIED: [elasticsearch | restart elasticsearch] *****************************
106 |     changed: [default]
107 | 
108 |     PLAY RECAP ********************************************************************
109 |     default                    : ok=11   changed=10   unreachable=0    failed=0
110 | 
111 | 
112 | ---
113 | 
114 | <img src="http://developer.rackspace.com/images/2013-11-04-welcome-to-performance-cloud-servers/simba.gif" width="100%" />
115 | 


--------------------------------------------------------------------------------
/Vagrantfile:
--------------------------------------------------------------------------------
 1 | # -*- mode: ruby -*-
 2 | # vi: set ft=ruby :
 3 | 
 4 | 
 5 | VAGRANTFILE_API_VERSION = "2"
 6 | 
 7 | Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
 8 | 
 9 |   config.vm.box = "precise64"
10 |   config.vm.box_url = "http://files.vagrantup.com/precise64.box"
11 |   config.vm.hostname = "elasticsearch"
12 |   config.vm.network :forwarded_port, guest: 9200, host: 9200
13 |   config.ssh.forward_agent = true
14 | 
15 |   config.vm.provision :ansible do |ansible|
16 |     ansible.playbook = "playbook.yml"
17 |   end
18 | 
19 | end
20 | 


--------------------------------------------------------------------------------
/playbook.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - hosts: all
3 |   user: vagrant
4 |   sudo: yes
5 | 
6 |   roles:
7 |     - { role: elasticsearch }
8 | 


--------------------------------------------------------------------------------
/roles/elasticsearch/files/elasticsearch.yml:
--------------------------------------------------------------------------------
  1 | ##################### ElasticSearch Configuration Example #####################
  2 | 
  3 | # This file contains an overview of various configuration settings,
  4 | # targeted at operations staff. Application developers should
  5 | # consult the guide at <http://elasticsearch.org/guide>.
  6 | #
  7 | # The installation procedure is covered at
  8 | # <http://elasticsearch.org/guide/en/elasticsearch/reference/current/setup.html>.
  9 | #
 10 | # ElasticSearch comes with reasonable defaults for most settings,
 11 | # so you can try it out without bothering with configuration.
 12 | #
 13 | # Most of the time, these defaults are just fine for running a production
 14 | # cluster. If you're fine-tuning your cluster, or wondering about the
 15 | # effect of certain configuration option, please _do ask_ on the
 16 | # mailing list or IRC channel [http://elasticsearch.org/community].
 17 | 
 18 | # Any element in the configuration can be replaced with environment variables
 19 | # by placing them in ${...} notation. For example:
 20 | #
 21 | # node.rack: ${RACK_ENV_VAR}
 22 | 
 23 | # For information on supported formats and syntax for the config file, see
 24 | # <http://elasticsearch.org/guide/en/elasticsearch/reference/current/setup-configuration.html>
 25 | 
 26 | 
 27 | ################################### Cluster ###################################
 28 | 
 29 | # Cluster name identifies your cluster for auto-discovery. If you're running
 30 | # multiple clusters on the same network, make sure you're using unique names.
 31 | #
 32 | # cluster.name: elasticsearch
 33 | 
 34 | 
 35 | #################################### Node #####################################
 36 | 
 37 | # Node names are generated dynamically on startup, so you're relieved
 38 | # from configuring them manually. You can tie this node to a specific name:
 39 | #
 40 | # node.name: "Franz Kafka"
 41 | 
 42 | # Every node can be configured to allow or deny being eligible as the master,
 43 | # and to allow or deny to store the data.
 44 | #
 45 | # Allow this node to be eligible as a master node (enabled by default):
 46 | #
 47 | # node.master: true
 48 | #
 49 | # Allow this node to store data (enabled by default):
 50 | #
 51 | # node.data: true
 52 | 
 53 | # You can exploit these settings to design advanced cluster topologies.
 54 | #
 55 | # 1. You want this node to never become a master node, only to hold data.
 56 | #    This will be the "workhorse" of your cluster.
 57 | #
 58 | # node.master: false
 59 | # node.data: true
 60 | #
 61 | # 2. You want this node to only serve as a master: to not store any data and
 62 | #    to have free resources. This will be the "coordinator" of your cluster.
 63 | #
 64 | # node.master: true
 65 | # node.data: false
 66 | #
 67 | # 3. You want this node to be neither master nor data node, but
 68 | #    to act as a "search load balancer" (fetching data from nodes,
 69 | #    aggregating results, etc.)
 70 | #
 71 | # node.master: false
 72 | # node.data: false
 73 | 
 74 | # Use the Cluster Health API [http://localhost:9200/_cluster/health], the
 75 | # Node Info API [http://localhost:9200/_cluster/nodes] or GUI tools
 76 | # such as <http://github.com/lukas-vlcek/bigdesk> and
 77 | # <http://mobz.github.com/elasticsearch-head> to inspect the cluster state.
 78 | 
 79 | # A node can have generic attributes associated with it, which can later be used
 80 | # for customized shard allocation filtering, or allocation awareness. An attribute
 81 | # is a simple key value pair, similar to node.key: value, here is an example:
 82 | #
 83 | # node.rack: rack314
 84 | 
 85 | # By default, multiple nodes are allowed to start from the same installation location
 86 | # to disable it, set the following:
 87 | # node.max_local_storage_nodes: 1
 88 | 
 89 | 
 90 | #################################### Index ####################################
 91 | 
 92 | # You can set a number of options (such as shard/replica options, mapping
 93 | # or analyzer definitions, translog settings, ...) for indices globally,
 94 | # in this file.
 95 | #
 96 | # Note, that it makes more sense to configure index settings specifically for
 97 | # a certain index, either when creating it or by using the index templates API.
 98 | #
 99 | # See <http://elasticsearch.org/guide/en/elasticsearch/reference/current/index-modules.html> and
100 | # <http://elasticsearch.org/guide/en/elasticsearch/reference/current/indices-create-index.html>
101 | # for more information.
102 | 
103 | # Set the number of shards (splits) of an index (5 by default):
104 | #
105 | # index.number_of_shards: 5
106 | 
107 | # Set the number of replicas (additional copies) of an index (1 by default):
108 | #
109 | # index.number_of_replicas: 1
110 | 
111 | # Note, that for development on a local machine, with small indices, it usually
112 | # makes sense to "disable" the distributed features:
113 | #
114 | # index.number_of_shards: 1
115 | # index.number_of_replicas: 0
116 | 
117 | # These settings directly affect the performance of index and search operations
118 | # in your cluster. Assuming you have enough machines to hold shards and
119 | # replicas, the rule of thumb is:
120 | #
121 | # 1. Having more *shards* enhances the _indexing_ performance and allows to
122 | #    _distribute_ a big index across machines.
123 | # 2. Having more *replicas* enhances the _search_ performance and improves the
124 | #    cluster _availability_.
125 | #
126 | # The "number_of_shards" is a one-time setting for an index.
127 | #
128 | # The "number_of_replicas" can be increased or decreased anytime,
129 | # by using the Index Update Settings API.
130 | #
131 | # ElasticSearch takes care about load balancing, relocating, gathering the
132 | # results from nodes, etc. Experiment with different settings to fine-tune
133 | # your setup.
134 | 
135 | # Use the Index Status API (<http://localhost:9200/A/_status>) to inspect
136 | # the index status.
137 | 
138 | 
139 | #################################### Paths ####################################
140 | 
141 | # Path to directory containing configuration (this file and logging.yml):
142 | #
143 | # path.conf: /path/to/conf
144 | 
145 | # Path to directory where to store index data allocated for this node.
146 | #
147 | # path.data: /path/to/data
148 | #
149 | # Can optionally include more than one location, causing data to be striped across
150 | # the locations (a la RAID 0) on a file level, favouring locations with most free
151 | # space on creation. For example:
152 | #
153 | # path.data: /path/to/data1,/path/to/data2
154 | 
155 | # Path to temporary files:
156 | #
157 | # path.work: /path/to/work
158 | 
159 | # Path to log files:
160 | #
161 | # path.logs: /path/to/logs
162 | 
163 | # Path to where plugins are installed:
164 | #
165 | # path.plugins: /path/to/plugins
166 | 
167 | 
168 | #################################### Plugin ###################################
169 | 
170 | # If a plugin listed here is not installed for current node, the node will not start.
171 | #
172 | # plugin.mandatory: mapper-attachments,lang-groovy
173 | 
174 | 
175 | ################################### Memory ####################################
176 | 
177 | # ElasticSearch performs poorly when JVM starts swapping: you should ensure that
178 | # it _never_ swaps.
179 | #
180 | # Set this property to true to lock the memory:
181 | #
182 | # bootstrap.mlockall: true
183 | 
184 | # Make sure that the ES_MIN_MEM and ES_MAX_MEM environment variables are set
185 | # to the same value, and that the machine has enough memory to allocate
186 | # for ElasticSearch, leaving enough memory for the operating system itself.
187 | #
188 | # You should also make sure that the ElasticSearch process is allowed to lock
189 | # the memory, eg. by using `ulimit -l unlimited`.
190 | 
191 | 
192 | ############################## Network And HTTP ###############################
193 | 
194 | # ElasticSearch, by default, binds itself to the 0.0.0.0 address, and listens
195 | # on port [9200-9300] for HTTP traffic and on port [9300-9400] for node-to-node
196 | # communication. (the range means that if the port is busy, it will automatically
197 | # try the next port).
198 | 
199 | # Set the bind address specifically (IPv4 or IPv6):
200 | #
201 | # network.bind_host: 192.168.0.1
202 | 
203 | # Set the address other nodes will use to communicate with this node. If not
204 | # set, it is automatically derived. It must point to an actual IP address.
205 | #
206 | # network.publish_host: 192.168.0.1
207 | 
208 | # Set both 'bind_host' and 'publish_host':
209 | # network.host: 127.0.0.1
210 | 
211 | # Set a custom port for the node to node communication (9300 by default):
212 | #
213 | # transport.tcp.port: 9300
214 | 
215 | # Enable compression for all communication between nodes (disabled by default):
216 | #
217 | # transport.tcp.compress: true
218 | 
219 | # Set a custom port to listen for HTTP traffic:
220 | #
221 | # http.port: 9200
222 | 
223 | # Set a custom allowed content length:
224 | #
225 | # http.max_content_length: 100mb
226 | 
227 | # Disable HTTP completely:
228 | #
229 | # http.enabled: false
230 | 
231 | 
232 | ################################### Gateway ###################################
233 | 
234 | # The gateway allows for persisting the cluster state between full cluster
235 | # restarts. Every change to the state (such as adding an index) will be stored
236 | # in the gateway, and when the cluster starts up for the first time,
237 | # it will read its state from the gateway.
238 | 
239 | # There are several types of gateway implementations. For more information, see
240 | # <http://elasticsearch.org/guide/en/elasticsearch/reference/current/modules-gateway.html>.
241 | 
242 | # The default gateway type is the "local" gateway (recommended):
243 | #
244 | # gateway.type: local
245 | 
246 | # Settings below control how and when to start the initial recovery process on
247 | # a full cluster restart (to reuse as much local data as possible when using shared
248 | # gateway).
249 | 
250 | # Allow recovery process after N nodes in a cluster are up:
251 | #
252 | # gateway.recover_after_nodes: 1
253 | 
254 | # Set the timeout to initiate the recovery process, once the N nodes
255 | # from previous setting are up (accepts time value):
256 | #
257 | # gateway.recover_after_time: 5m
258 | 
259 | # Set how many nodes are expected in this cluster. Once these N nodes
260 | # are up (and recover_after_nodes is met), begin recovery process immediately
261 | # (without waiting for recover_after_time to expire):
262 | #
263 | # gateway.expected_nodes: 2
264 | 
265 | 
266 | ############################# Recovery Throttling #############################
267 | 
268 | # These settings allow to control the process of shards allocation between
269 | # nodes during initial recovery, replica allocation, rebalancing,
270 | # or when adding and removing nodes.
271 | 
272 | # Set the number of concurrent recoveries happening on a node:
273 | #
274 | # 1. During the initial recovery
275 | #
276 | # cluster.routing.allocation.node_initial_primaries_recoveries: 4
277 | #
278 | # 2. During adding/removing nodes, rebalancing, etc
279 | #
280 | # cluster.routing.allocation.node_concurrent_recoveries: 2
281 | 
282 | # Set to throttle throughput when recovering (eg. 100mb, by default 20mb):
283 | #
284 | # indices.recovery.max_bytes_per_sec: 20mb
285 | 
286 | # Set to limit the number of open concurrent streams when
287 | # recovering a shard from a peer:
288 | #
289 | # indices.recovery.concurrent_streams: 5
290 | 
291 | 
292 | ################################## Discovery ##################################
293 | 
294 | # Discovery infrastructure ensures nodes can be found within a cluster
295 | # and master node is elected. Multicast discovery is the default.
296 | 
297 | # Set to ensure a node sees N other master eligible nodes to be considered
298 | # operational within the cluster. Its recommended to set it to a higher value
299 | # than 1 when running more than 2 nodes in the cluster.
300 | #
301 | # discovery.zen.minimum_master_nodes: 1
302 | 
303 | # Set the time to wait for ping responses from other nodes when discovering.
304 | # Set this option to a higher value on a slow or congested network
305 | # to minimize discovery failures:
306 | #
307 | # discovery.zen.ping.timeout: 3s
308 | 
309 | # For more information, see
310 | # <http://elasticsearch.org/guide/en/elasticsearch/reference/current/modules-discovery-zen.html>
311 | 
312 | # Unicast discovery allows to explicitly control which nodes will be used
313 | # to discover the cluster. It can be used when multicast is not present,
314 | # or to restrict the cluster communication-wise.
315 | #
316 | # 1. Disable multicast discovery (enabled by default):
317 | #
318 | # discovery.zen.ping.multicast.enabled: false
319 | #
320 | # 2. Configure an initial list of master nodes in the cluster
321 | #    to perform discovery when new nodes (master or data) are started:
322 | #
323 | # discovery.zen.ping.unicast.hosts: ["host1", "host2:port"]
324 | 
325 | # EC2 discovery allows to use AWS EC2 API in order to perform discovery.
326 | #
327 | # You have to install the cloud-aws plugin for enabling the EC2 discovery.
328 | #
329 | # For more information, see
330 | # <http://elasticsearch.org/guide/en/elasticsearch/reference/current/modules-discovery-ec2.html>
331 | #
332 | # See <http://elasticsearch.org/tutorials/elasticsearch-on-ec2/>
333 | # for a step-by-step tutorial.
334 | 
335 | 
336 | ################################## Slow Log ##################################
337 | 
338 | # Shard level query and fetch threshold logging.
339 | 
340 | #index.search.slowlog.threshold.query.warn: 10s
341 | #index.search.slowlog.threshold.query.info: 5s
342 | #index.search.slowlog.threshold.query.debug: 2s
343 | #index.search.slowlog.threshold.query.trace: 500ms
344 | 
345 | #index.search.slowlog.threshold.fetch.warn: 1s
346 | #index.search.slowlog.threshold.fetch.info: 800ms
347 | #index.search.slowlog.threshold.fetch.debug: 500ms
348 | #index.search.slowlog.threshold.fetch.trace: 200ms
349 | 
350 | #index.indexing.slowlog.threshold.index.warn: 10s
351 | #index.indexing.slowlog.threshold.index.info: 5s
352 | #index.indexing.slowlog.threshold.index.debug: 2s
353 | #index.indexing.slowlog.threshold.index.trace: 500ms
354 | 
355 | ################################## GC Logging ################################
356 | 
357 | #monitor.jvm.gc.ParNew.warn: 1000ms
358 | #monitor.jvm.gc.ParNew.info: 700ms
359 | #monitor.jvm.gc.ParNew.debug: 400ms
360 | 
361 | #monitor.jvm.gc.ConcurrentMarkSweep.warn: 10s
362 | #monitor.jvm.gc.ConcurrentMarkSweep.info: 5s
363 | #monitor.jvm.gc.ConcurrentMarkSweep.debug: 2s
364 | 


--------------------------------------------------------------------------------
/roles/elasticsearch/handlers/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: restart elasticsearch
3 |   service: name=elasticsearch state=restarted
4 | 


--------------------------------------------------------------------------------
/roles/elasticsearch/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | - name: Ensure python-pycurl is installed
 4 |   apt: pkg=python-pycurl state=installed
 5 | 
 6 | - name: Ensure Elasticsearch apt signing key is installed
 7 |   apt_key: url=http://packages.elasticsearch.org/GPG-KEY-elasticsearch state=present
 8 | 
 9 | - name: Ensure Elasticsearch 1.4 repository is installed
10 |   apt_repository: repo="deb http://packages.elasticsearch.org/elasticsearch/1.4/debian stable main" state=present update_cache=yes
11 | 
12 | - name: Ensure elasticsearch is installed
13 |   apt: pkg={{item}} state=latest
14 |   with_items:
15 |     - openjdk-7-jre-headless
16 |     - elasticsearch
17 | 
18 | - name: Ensure elasticsearch config is in place
19 |   copy: src=elasticsearch.yml dest=/etc/elasticsearch/elasticsearch.yml
20 |   notify:
21 |     - restart elasticsearch
22 | 
23 | - name: Ensure elasticsearch service is enabled
24 |   service: name=elasticsearch enabled=yes
25 | 
26 | - name: Ensure head (Plugin) is installed
27 |   command: /usr/share/elasticsearch/bin/plugin -install mobz/elasticsearch-head creates=/usr/share/elasticsearch/plugins/head/_site/.gitignore
28 | 
29 | - name: Ensure Paramedic (Plugin) is installed
30 |   command: /usr/share/elasticsearch/bin/plugin -install karmi/elasticsearch-paramedic creates=/usr/share/elasticsearch/plugins/paramedic/_site/.gitignore
31 | 
32 | - name: Ensure Bigdesk (Plugin) is installed
33 |   command: /usr/share/elasticsearch/bin/plugin -install lukas-vlcek/bigdesk creates=/usr/share/elasticsearch/plugins/bigdesk/_site/.gitignore
34 | 
35 | - name: Ensure Inquisitor (Plugin) is installed
36 |   command: /usr/share/elasticsearch/bin/plugin -install polyfractal/elasticsearch-inquisitor creates=/usr/share/elasticsearch/plugins/inquisitor/.gitignore
37 | 


--------------------------------------------------------------------------------