├── elasticsearch_runner
    ├── __init__.py
    ├── test
    │   ├── __init__.py
    │   ├── test_configuration.py
    │   └── test_elasticsearch_runner.py
    ├── configuration.py
    ├── resources
    │   ├── embedded_logging.yml
    │   └── embedded_elasticsearch.yml
    └── runner.py
├── .gitignore
├── requirements.txt
├── .travis.yml
├── setup.py
└── readme.md


/elasticsearch_runner/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/elasticsearch_runner/test/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | temp/
2 | *.pyc
3 | .idea
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | PyYAML
2 | elasticsearch
3 | requests
4 | psutil
5 | lxml


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 |   - "2.7"
4 |   - "3.3"
5 |   - "3.4"
6 |   - "3.5"
7 | install: "pip install -r requirements.txt"
8 | script: nosetests


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name='elasticsearch-runner',
 5 |     version='0.1',
 6 |     packages=['elasticsearch_runner.resources', 'elasticsearch_runner', 'elasticsearch_runner.test'],
 7 |     url='https://bitbucket.org/comperio/comperio-text-analytics',
 8 |     license='For internal use only.',
 9 |     author='Andre Lynum',
10 |     author_email='andre.lynum@comperiosearch.com',
11 |     description='Lightweight runner for transient Elasticsearch instances, f.ex for testing.',
12 |     install_requires=['PyYAML', 'elasticsearch', 'requests', 'psutil', 'lxml'],
13 |     package_data={'resources': ['embedded_elasticsearch.yml']}
14 | )
15 | 


--------------------------------------------------------------------------------
/elasticsearch_runner/test/test_configuration.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     from StringIO import StringIO
 3 | except ImportError:
 4 |     from io import StringIO
 5 | from unittest import TestCase
 6 | 
 7 | import yaml
 8 | 
 9 | from elasticsearch_runner.configuration import generate_config, serialize_config
10 | 
11 | __author__ = 'alynum'
12 | 
13 | 
14 | class TestConfiguration(TestCase):
15 |     def test_generate_config(self):
16 |         self.assertEqual({
17 |             'marvel': {'agent': {'enabled': 'false'}},
18 |             'index': {
19 |                 'number_of_shards': 1,
20 |                 'number_of_replicas': 0,
21 |             },
22 |             'http': {
23 |                 'cors': {
24 |                     'enabled': True
25 |                 }
26 |             },
27 |             'cluster': {'name': 'ba'}},
28 |             generate_config(cluster_name='ba'))
29 | 
30 |     def test_serialize_config(self):
31 |         s = StringIO()
32 |         c = generate_config(cluster_name='ba')
33 |         serialize_config(s, c)
34 |         s.seek(0)
35 | 
36 |         self.assertEqual(c, yaml.load(s))
37 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | ## Elasticsearch test runner [![Build Status](https://travis-ci.org/comperiosearch/python-elasticsearch-runner.svg)](https://travis-ci.org/comperiosearch/python-elasticsearch-runner)
 2 | 
 3 | The python-elasticsearch-runner contains a standalone Python runner for Elasticsearch. This is intended
 4 | for transient and lightweight usage such as small integration tests.
 5 | 
 6 | The runner takes about 10 sec. to start so it should be a part of at least module level setup/teardown in
 7 | order to minimize test run time.
 8 | 
 9 | The following code sets up the runner instance at module level with nosetests if placed in __init__.py:
10 | 
11 | ```python
12 | from elasticsearch_runner.runner import ElasticsearchRunner
13 | 
14 | es_runner = ElasticsearchRunner()
15 | 
16 | def setup():
17 |     es_runner.install()
18 |     es_runner.run()
19 |     es_runner.wait_for_green()
20 | 
21 | def teardown():
22 |     if es_runner and es_runner.is_running():
23 |         es_runner.stop()
24 | ```
25 | 
26 | The runner instance can then be queried for the port number when connecting:
27 | 
28 | ```python
29 | es = Elasticsearch(hosts=['localhost:%d' % es_runner.es_state.port])
30 | ```
31 | 
32 | 
33 | 
34 | ### Some details
35 | Should run with python 2.7. 3.3 and 3.4
36 | By default, elasticsearch version 2.1.0 is used, and everything is installed into HOME/.elasticsearch_runner (most systems) or APP_DATA/elasticsearch_runner (windows) folder.
37 | 
38 | 
39 | ```python
40 | es_runner = ElasticsearchRunner(version=1.0.0, install_path=/var/test/)
41 | ```
42 | 
43 | The elasticsearch runner accepts parameters for elasticsearch version and install path. 
44 | The install path is where the Elasticsearch software package and data storage will be kept.
45 | Install path can also be provided as the environment variable 'elasticsearch-runner-install-path', and if set will override the install_path parameter.
46 | 
47 | 


--------------------------------------------------------------------------------
/elasticsearch_runner/configuration.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from random import randint
 3 | 
 4 | import yaml
 5 | 
 6 | 
 7 | def generate_config(cluster_name=None, log_path=None, data_path=None):
 8 |     """
 9 |     Generates basic Elasticsearch configuration for setting up the runner.
10 | 
11 |     :param cluster_name: Set as cluster.name option.
12 |     :type cluster_name: str|unicode
13 |     :param log_path: Set as path.log option.
14 |     :type log_path: str|unicode
15 |     :param data_path: Set as path.data option.
16 |     :type data_path str|unicode
17 |     :rtype : dict
18 |     :return: Elasticsearch configuration as dict.
19 |     """
20 |     config = {
21 |         'marvel': {'agent': {'enabled':'false'}},
22 |         'index': {
23 |             'number_of_shards': 1,
24 |             'number_of_replicas': 0,
25 |         },
26 |         'http': {
27 |             'cors': {
28 |                 'enabled': True
29 |             }
30 |         }
31 |     }
32 | 
33 |     if not cluster_name:
34 |         cluster_name = generate_cluster_name()
35 | 
36 |     config['cluster'] = {'name': cluster_name}
37 | 
38 |     if log_path or data_path:
39 |         path = {}
40 | 
41 |         if log_path:
42 |             path['log'] = log_path
43 | 
44 |         if data_path:
45 |             path['data'] = data_path
46 | 
47 |         config['path'] = path
48 | 
49 |     return config
50 | 
51 | 
52 | def generate_cluster_name(prefix='elasticsearch_runner'):
53 |     """
54 |     Generates a cluster name with a prefix and a random number.
55 | 
56 |     :param prefix: Cluster name prefix.
57 |     :rtype : str|unicode
58 |     :return: cluster name string
59 | 
60 |     TODO make this collision safe
61 |     """
62 |     cluster_name = '%s_%7d' % (prefix, randint(1, 9999999))
63 | 
64 |     return cluster_name
65 | 
66 | 
67 | def serialize_config(stream, config):
68 |     """
69 |     Serialize Elasticsearch configuration dict to YAML formatted file.
70 | 
71 |     :param stream: Stream to write YAML configuration to.
72 |     :param config: Elasticsearch configuration as dict.
73 |     :type config: dict
74 |     :rtype : dict
75 |     :return: The passed configuration dict.
76 |     """
77 |     yaml.dump(config, stream=stream)
78 | 
79 |     return config
80 | 
81 | 
82 | def package_path():
83 |     """
84 |     Returns the path to the root of the package directory.
85 | 
86 |     :rtype : str|unicode
87 |     :return: The root project path as a string.
88 |     """
89 |     self_path = os.path.dirname(os.path.abspath(__file__))
90 | 
91 |     return os.path.abspath(os.path.join(self_path, '..'))


--------------------------------------------------------------------------------
/elasticsearch_runner/resources/embedded_logging.yml:
--------------------------------------------------------------------------------
 1 | # you can override this using by setting a system property, for example -Des.logger.level=DEBUG
 2 | es.logger.level: INFO
 3 | rootLogger: ${es.logger.level}, console, file
 4 | logger:
 5 |   # log action execution errors for easier debugging
 6 |   action: DEBUG
 7 | 
 8 |   # deprecation logging, turn to DEBUG to see them
 9 |   deprecation: INFO, deprecation_log_file
10 | 
11 |   # reduce the logging for aws, too much is logged under the default INFO
12 |   com.amazonaws: WARN
13 |   # aws will try to do some sketchy JMX stuff, but its not needed.
14 |   com.amazonaws.jmx.SdkMBeanRegistrySupport: ERROR
15 |   com.amazonaws.metrics.AwsSdkMetrics: ERROR
16 | 
17 |   org.apache.http: INFO
18 | 
19 |   # gateway
20 |   #gateway: DEBUG
21 |   #index.gateway: DEBUG
22 | 
23 |   # peer shard recovery
24 |   #indices.recovery: DEBUG
25 | 
26 |   # discovery
27 |   #discovery: TRACE
28 | 
29 |   index.search.slowlog: TRACE, index_search_slow_log_file
30 |   index.indexing.slowlog: TRACE, index_indexing_slow_log_file
31 | 
32 | additivity:
33 |   index.search.slowlog: false
34 |   index.indexing.slowlog: false
35 |   deprecation: false
36 | 
37 | appender:
38 |   console:
39 |     type: console
40 |     layout:
41 |       type: consolePattern
42 |       conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"
43 | 
44 |   file:
45 |     type: dailyRollingFile
46 |     file: ${path.logs}/${cluster.name}.log
47 |     datePattern: "'.'yyyy-MM-dd"
48 |     layout:
49 |       type: pattern
50 |       conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %.10000m%n"
51 | 
52 |   # Use the following log4j-extras RollingFileAppender to enable gzip compression of log files. 
53 |   # For more information see https://logging.apache.org/log4j/extras/apidocs/org/apache/log4j/rolling/RollingFileAppender.html
54 |   #file:
55 |     #type: extrasRollingFile
56 |     #file: ${path.logs}/${cluster.name}.log
57 |     #rollingPolicy: timeBased
58 |     #rollingPolicy.FileNamePattern: ${path.logs}/${cluster.name}.log.%d{yyyy-MM-dd}.gz
59 |     #layout:
60 |       #type: pattern
61 |       #conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"
62 | 
63 |   deprecation_log_file:
64 |     type: dailyRollingFile
65 |     file: ${path.logs}/${cluster.name}_deprecation.log
66 |     datePattern: "'.'yyyy-MM-dd"
67 |     layout:
68 |       type: pattern
69 |       conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"
70 | 
71 |   index_search_slow_log_file:
72 |     type: dailyRollingFile
73 |     file: ${path.logs}/${cluster.name}_index_search_slowlog.log
74 |     datePattern: "'.'yyyy-MM-dd"
75 |     layout:
76 |       type: pattern
77 |       conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"
78 | 
79 |   index_indexing_slow_log_file:
80 |     type: dailyRollingFile
81 |     file: ${path.logs}/${cluster.name}_index_indexing_slowlog.log
82 |     datePattern: "'.'yyyy-MM-dd"
83 |     layout:
84 |       type: pattern
85 |       conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"
86 | 


--------------------------------------------------------------------------------
/elasticsearch_runner/test/test_elasticsearch_runner.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import io
  3 | from unittest import TestCase
  4 | import json
  5 | import requests
  6 | from elasticsearch_runner.runner import ElasticsearchRunner, process_exists, parse_es_log_header
  7 | 
  8 | 
  9 | class TestElasticsearchRunner(TestCase):
 10 |     def __init__(self, methodName='runTest'):
 11 |         super(TestElasticsearchRunner, self).__init__(methodName)
 12 |         self.runner = None
 13 |         self.runner2 = None
 14 | 
 15 |     def tearDown(self):
 16 |         super(TestElasticsearchRunner, self).tearDown()
 17 | 
 18 |         if self.runner and self.runner.is_running():
 19 |             self.runner.stop()
 20 | 
 21 |         if self.runner2 and self.runner2.is_running():
 22 |             self.runner2.stop()
 23 | 
 24 |     def test_run(self):
 25 |         self.runner = ElasticsearchRunner()
 26 |         self.runner.install()
 27 |         self.runner.run()
 28 |         self.runner.wait_for_green()
 29 | 
 30 |         self.assertTrue(self.runner.is_running())
 31 | 
 32 |         health_resp = requests.get('http://localhost:%d/_cluster/health' % self.runner.es_state.port)
 33 |         self.assertEqual(200, health_resp.status_code)
 34 |         health_data = json.loads(health_resp.text)
 35 |         self.assertEqual(health_data['status'], 'green')
 36 | 
 37 |         server_pid = self.runner.es_state.server_pid
 38 | 
 39 |         self.runner.stop()
 40 | 
 41 |         self.assertFalse(process_exists(server_pid))
 42 |         self.assertFalse(self.runner.is_running())
 43 |         self.assertIsNone(self.runner.es_state)
 44 | 
 45 |     def test_run_multiple(self):
 46 |         self.runner = ElasticsearchRunner()
 47 |         self.runner.install()
 48 |         self.runner.run()
 49 |         self.runner.wait_for_green()
 50 | 
 51 |         self.assertTrue(self.runner.is_running())
 52 | 
 53 |         self.runner2 = ElasticsearchRunner()
 54 |         self.runner2.install()
 55 |         self.runner2.run()
 56 |         self.runner2.wait_for_green()
 57 | 
 58 |         self.assertTrue(self.runner2.is_running())
 59 | 
 60 |         health_resp = requests.get('http://localhost:%d/_cluster/health' % self.runner.es_state.port)
 61 |         self.assertEqual(200, health_resp.status_code)
 62 |         health_data = json.loads(health_resp.text)
 63 |         self.assertEqual(health_data['status'], 'green')
 64 | 
 65 |         health_resp = requests.get('http://localhost:%d/_cluster/health' % self.runner2.es_state.port)
 66 |         self.assertEqual(200, health_resp.status_code)
 67 |         health_data = json.loads(health_resp.text)
 68 |         self.assertEqual(health_data['status'], 'green')
 69 | 
 70 |         server_pid = self.runner.es_state.server_pid
 71 | 
 72 |         self.runner.stop()
 73 | 
 74 |         self.assertFalse(process_exists(server_pid))
 75 |         self.assertFalse(self.runner.is_running())
 76 |         self.assertIsNone(self.runner.es_state)
 77 | 
 78 |         server_pid = self.runner2.es_state.server_pid
 79 | 
 80 |         self.runner2.stop()
 81 | 
 82 |         self.assertFalse(process_exists(server_pid))
 83 |         self.assertFalse(self.runner2.is_running())
 84 |         self.assertIsNone(self.runner2.es_state)
 85 | 
 86 |     def test_es_wrapper_call(self):
 87 |         # NB! beware that if the environment variable 'elasticsearch-runner-install-path' is set this test will fail
 88 |         runner = ElasticsearchRunner(install_path='fakepath')
 89 |         self.assertEqual(runner._es_wrapper_call('nt'),
 90 |                          [os.path.sep.join(['fakepath', runner.version_folder, 'bin', 'elasticsearch.bat'])])
 91 |         self.assertEqual(runner._es_wrapper_call('posix'),
 92 |                          ['/bin/sh', os.path.sep.join(['fakepath', runner.version_folder, 'bin', 'elasticsearch'])])
 93 | 
 94 |     def test_run_version2(self):
 95 |         es_version = '2.1.0'
 96 |         self.runner = ElasticsearchRunner(version=es_version)
 97 |         self.runner.install()
 98 |         self.runner.run()
 99 |         self.runner.wait_for_green()
100 | 
101 |         self.assertTrue(self.runner.is_running())
102 | 
103 |         health_resp = requests.get('http://localhost:%d/_cluster/health' % self.runner.es_state.port)
104 |         self.assertEqual(200, health_resp.status_code)
105 |         health_data = json.loads(health_resp.text)
106 |         self.assertEqual(health_data['status'], 'green')
107 |         status = requests.get('http://localhost:%d' % self.runner.es_state.port)
108 |         status_data = json.loads(status.text)
109 |         self.assertEqual(status_data['version']['number'], es_version)
110 |         server_pid = self.runner.es_state.server_pid
111 | 
112 |         self.runner.stop()
113 | 
114 |         self.assertFalse(process_exists(server_pid))
115 |         self.assertFalse(self.runner.is_running())
116 |         self.assertIsNone(self.runner.es_state)
117 | 
118 |     def test_run_version15(self):
119 |         es_version = '1.5.2'
120 |         self.runner = ElasticsearchRunner(version=es_version)
121 |         self.runner.install()
122 |         self.runner.run()
123 |         self.runner.wait_for_green()
124 | 
125 |         self.assertTrue(self.runner.is_running())
126 | 
127 |         health_resp = requests.get('http://localhost:%d/_cluster/health' % self.runner.es_state.port)
128 |         self.assertEqual(200, health_resp.status_code)
129 |         health_data = json.loads(health_resp.text)
130 |         self.assertEqual(health_data['status'], 'green')
131 |         status = requests.get('http://localhost:%d' % self.runner.es_state.port)
132 |         status_data = json.loads(status.text)
133 |         self.assertEqual(status_data['version']['number'], es_version)
134 |         server_pid = self.runner.es_state.server_pid
135 | 
136 |         self.runner.stop()
137 | 
138 |         self.assertFalse(process_exists(server_pid))
139 |         self.assertFalse(self.runner.is_running())
140 |         self.assertIsNone(self.runner.es_state)
141 | 
142 |     def test_parse_log_header_esv2_format(self):
143 |         testStream = io.StringIO()
144 |         testStream.write(
145 |             u"[2015-10-08 11:21:02,427][INFO ][node                     ] [Hero] version[2.0.0-rc1], pid[208], build[4757962/2015-10-01T10:06:08Z]\n")
146 |         testStream.write(
147 |             u"[2015-10-08 11:21:09,025][INFO ][http                     ] [Hero] publish_address {127.0.0.1:9200}, bound_addresses {127.0.0.1:9200}, {[::1]:9200}\n")
148 |         testStream.write(u"[2015-10-08 11:04:15,784][INFO ][node                     ] [Hero] started\n")
149 |         testStream.seek(0)
150 |         server_pid, es_port = parse_es_log_header(testStream)
151 |         self.assertEqual(server_pid, 208)
152 |         self.assertEqual(es_port, 9200)
153 | 
154 |     def test_parse_log_header_esv1_format(self):
155 |         testStream = io.StringIO()
156 |         testStream.write(
157 |             u"[2015-10-08 11:04:09,252][INFO ][node                     ] [Astronomer] version[1.7.2], pid[8248], build[e43676b/2015-09-14T09:49:53Z]\n")
158 |         testStream.write(
159 |             u"[2015-10-08 11:04:15,784][INFO ][http                     ] [Astronomer] bound_address {inet[/0:0:0:0:0:0:0:0:9200]}, publish_address {inet[/10.0.80.134:9200]}\n")
160 |         testStream.write(u"[2015-10-08 11:04:15,784][INFO ][node                     ] [Astronomer] started\n")
161 |         testStream.seek(0)
162 |         server_pid, es_port = parse_es_log_header(testStream)
163 |         self.assertEqual(server_pid, 8248)
164 |         self.assertEqual(es_port, 9200)
165 | 


--------------------------------------------------------------------------------
/elasticsearch_runner/resources/embedded_elasticsearch.yml:
--------------------------------------------------------------------------------
  1 | ##################### Elasticsearch Configuration Example #####################
  2 | 
  3 | # This file contains an overview of various configuration settings,
  4 | # targeted at operations staff. Application developers should
  5 | # consult the guide at <http://elasticsearch.org/guide>.
  6 | #
  7 | # The installation procedure is covered at
  8 | # <http://elasticsearch.org/guide/en/elasticsearch/reference/current/setup.html>.
  9 | #
 10 | # Elasticsearch comes with reasonable defaults for most settings,
 11 | # so you can try it out without bothering with configuration.
 12 | #
 13 | # Most of the time, these defaults are just fine for running a production
 14 | # cluster. If you're fine-tuning your cluster, or wondering about the
 15 | # effect of certain configuration option, please _do ask_ on the
 16 | # mailing list or IRC channel [http://elasticsearch.org/community].
 17 | 
 18 | # Any element in the configuration can be replaced with environment variables
 19 | # by placing them in ${...} notation. For example:
 20 | #
 21 | #node.rack: ${RACK_ENV_VAR}
 22 | 
 23 | # For information on supported formats and syntax for the config file, see
 24 | # <http://elasticsearch.org/guide/en/elasticsearch/reference/current/setup-configuration.html>
 25 | 
 26 | 
 27 | ################################### Cluster ###################################
 28 | 
 29 | # Cluster name identifies your cluster for auto-discovery. If you're running
 30 | # multiple clusters on the same network, make sure you're using unique names.
 31 | #
 32 | # cluster.name: no-cluster
 33 | 
 34 | 
 35 | #################################### Node #####################################
 36 | 
 37 | # Node names are generated dynamically on startup, so you're relieved
 38 | # from configuring them manually. You can tie this node to a specific name:
 39 | #
 40 | node.name: "Embedded Elasticsearch for testing"
 41 | 
 42 | # Every node can be configured to allow or deny being eligible as the master,
 43 | # and to allow or deny to store the data.
 44 | #
 45 | # Allow this node to be eligible as a master node (enabled by default):
 46 | #
 47 | #node.master: true
 48 | #
 49 | # Allow this node to store data (enabled by default):
 50 | #
 51 | #node.data: true
 52 | 
 53 | # You can exploit these settings to design advanced cluster topologies.
 54 | #
 55 | # 1. You want this node to never become a master node, only to hold data.
 56 | #    This will be the "workhorse" of your cluster.
 57 | #
 58 | #node.master: false
 59 | #node.data: true
 60 | #
 61 | # 2. You want this node to only serve as a master: to not store any data and
 62 | #    to have free resources. This will be the "coordinator" of your cluster.
 63 | #
 64 | #node.master: true
 65 | #node.data: false
 66 | #
 67 | # 3. You want this node to be neither master nor data node, but
 68 | #    to act as a "search load balancer" (fetching data from nodes,
 69 | #    aggregating results, etc.)
 70 | #
 71 | #node.master: false
 72 | #node.data: false
 73 | 
 74 | # Use the Cluster Health API [http://localhost:9200/_cluster/health], the
 75 | # Node Info API [http://localhost:9200/_nodes] or GUI tools
 76 | # such as <http://www.elasticsearch.org/overview/marvel/>,
 77 | # <http://github.com/karmi/elasticsearch-paramedic>,
 78 | # <http://github.com/lukas-vlcek/bigdesk> and
 79 | # <http://mobz.github.com/elasticsearch-head> to inspect the cluster state.
 80 | 
 81 | # A node can have generic attributes associated with it, which can later be used
 82 | # for customized shard allocation filtering, or allocation awareness. An attribute
 83 | # is a simple key value pair, similar to node.key: value, here is an example:
 84 | #
 85 | #node.rack: rack314
 86 | 
 87 | # By default, multiple nodes are allowed to start from the same installation location
 88 | # to disable it, set the following:
 89 | #node.max_local_storage_nodes: 1
 90 | 
 91 | 
 92 | #################################### Index ####################################
 93 | 
 94 | # You can set a number of options (such as shard/replica options, mapping
 95 | # or analyzer definitions, translog settings, ...) for indices globally,
 96 | # in this file.
 97 | #
 98 | # Note, that it makes more sense to configure index settings specifically for
 99 | # a certain index, either when creating it or by using the index templates API.
100 | #
101 | # See <http://elasticsearch.org/guide/en/elasticsearch/reference/current/index-modules.html> and
102 | # <http://elasticsearch.org/guide/en/elasticsearch/reference/current/indices-create-index.html>
103 | # for more information.
104 | 
105 | # Set the number of shards (splits) of an index (5 by default):
106 | #
107 | #index.number_of_shards: 5
108 | 
109 | # Set the number of replicas (additional copies) of an index (1 by default):
110 | #
111 | #index.number_of_replicas: 1
112 | 
113 | # Note, that for development on a local machine, with small indices, it usually
114 | # makes sense to "disable" the distributed features:
115 | #
116 | index.number_of_shards: 1
117 | index.number_of_replicas: 0
118 | 
119 | # These settings directly affect the performance of index and search operations
120 | # in your cluster. Assuming you have enough machines to hold shards and
121 | # replicas, the rule of thumb is:
122 | #
123 | # 1. Having more *shards* enhances the _indexing_ performance and allows to
124 | #    _distribute_ a big index across machines.
125 | # 2. Having more *replicas* enhances the _search_ performance and improves the
126 | #    cluster _availability_.
127 | #
128 | # The "number_of_shards" is a one-time setting for an index.
129 | #
130 | # The "number_of_replicas" can be increased or decreased anytime,
131 | # by using the Index Update Settings API.
132 | #
133 | # Elasticsearch takes care about load balancing, relocating, gathering the
134 | # results from nodes, etc. Experiment with different settings to fine-tune
135 | # your setup.
136 | 
137 | # Use the Index Status API (<http://localhost:9200/A/_status>) to inspect
138 | # the index status.
139 | 
140 | 
141 | #################################### Paths ####################################
142 | 
143 | # Path to directory containing configuration (this file and logging.yml):
144 | #
145 | #path.conf: /path/to/conf
146 | 
147 | # Path to directory where to store index data allocated for this node.
148 | #
149 | #path.data: /path/to/data
150 | #
151 | # Can optionally include more than one location, causing data to be striped across
152 | # the locations (a la RAID 0) on a file level, favouring locations with most free
153 | # space on creation. For example:
154 | #
155 | #path.data: /path/to/data1,/path/to/data2
156 | 
157 | # Path to temporary files:
158 | #
159 | #path.work: /path/to/work
160 | 
161 | # Path to log files:
162 | #
163 | #path.logs: /path/to/logs
164 | 
165 | # Path to where plugins are installed:
166 | #
167 | #path.plugins: /path/to/plugins
168 | 
169 | 
170 | #################################### Plugin ###################################
171 | 
172 | # If a plugin listed here is not installed for current node, the node will not start.
173 | #
174 | #plugin.mandatory: mapper-attachments,lang-groovy
175 | 
176 | 
177 | ################################### Memory ####################################
178 | 
179 | # Elasticsearch performs poorly when JVM starts swapping: you should ensure that
180 | # it _never_ swaps.
181 | #
182 | # Set this property to true to lock the memory:
183 | #
184 | #bootstrap.mlockall: true
185 | 
186 | # Make sure that the ES_MIN_MEM and ES_MAX_MEM environment variables are set
187 | # to the same value, and that the machine has enough memory to allocate
188 | # for Elasticsearch, leaving enough memory for the operating system itself.
189 | #
190 | # You should also make sure that the Elasticsearch process is allowed to lock
191 | # the memory, eg. by using `ulimit -l unlimited`.
192 | 
193 | 
194 | ############################## Network And HTTP ###############################
195 | 
196 | # Elasticsearch, by default, binds itself to the 0.0.0.0 address, and listens
197 | # on port [9200-9300] for HTTP traffic and on port [9300-9400] for node-to-node
198 | # communication. (the range means that if the port is busy, it will automatically
199 | # try the next port).
200 | 
201 | # Set the bind address specifically (IPv4 or IPv6):
202 | #
203 | #network.bind_host: 192.168.0.1
204 | 
205 | # Set the address other nodes will use to communicate with this node. If not
206 | # set, it is automatically derived. It must point to an actual IP address.
207 | #
208 | #network.publish_host: 192.168.0.1
209 | 
210 | # Set both 'bind_host' and 'publish_host':
211 | #
212 | #network.host: 192.168.0.1
213 | 
214 | # Set a custom port for the node to node communication (9300 by default):
215 | #
216 | #transport.tcp.port: 9300
217 | 
218 | # Enable compression for all communication between nodes (disabled by default):
219 | #
220 | #transport.tcp.compress: true
221 | 
222 | # Set a custom port to listen for HTTP traffic:
223 | #
224 | #http.port: 9200
225 | 
226 | # Set a custom allowed content length:
227 | #
228 | #http.max_content_length: 100mb
229 | 
230 | # Disable HTTP completely:
231 | #
232 | #http.enabled: false
233 | 
234 | 
235 | ################################### Gateway ###################################
236 | 
237 | # The gateway allows for persisting the cluster state between full cluster
238 | # restarts. Every change to the state (such as adding an index) will be stored
239 | # in the gateway, and when the cluster starts up for the first time,
240 | # it will read its state from the gateway.
241 | 
242 | # There are several types of gateway implementations. For more information, see
243 | # <http://elasticsearch.org/guide/en/elasticsearch/reference/current/modules-gateway.html>.
244 | 
245 | # The default gateway type is the "local" gateway (recommended):
246 | #
247 | #gateway.type: local
248 | 
249 | # Settings below control how and when to start the initial recovery process on
250 | # a full cluster restart (to reuse as much local data as possible when using shared
251 | # gateway).
252 | 
253 | # Allow recovery process after N nodes in a cluster are up:
254 | #
255 | #gateway.recover_after_nodes: 1
256 | 
257 | # Set the timeout to initiate the recovery process, once the N nodes
258 | # from previous setting are up (accepts time value):
259 | #
260 | #gateway.recover_after_time: 5m
261 | 
262 | # Set how many nodes are expected in this cluster. Once these N nodes
263 | # are up (and recover_after_nodes is met), begin recovery process immediately
264 | # (without waiting for recover_after_time to expire):
265 | #
266 | #gateway.expected_nodes: 2
267 | 
268 | 
269 | ############################# Recovery Throttling #############################
270 | 
271 | # These settings allow to control the process of shards allocation between
272 | # nodes during initial recovery, replica allocation, rebalancing,
273 | # or when adding and removing nodes.
274 | 
275 | # Set the number of concurrent recoveries happening on a node:
276 | #
277 | # 1. During the initial recovery
278 | #
279 | #cluster.routing.allocation.node_initial_primaries_recoveries: 4
280 | #
281 | # 2. During adding/removing nodes, rebalancing, etc
282 | #
283 | #cluster.routing.allocation.node_concurrent_recoveries: 2
284 | 
285 | # Set to throttle throughput when recovering (eg. 100mb, by default 20mb):
286 | #
287 | #indices.recovery.max_bytes_per_sec: 20mb
288 | 
289 | # Set to limit the number of open concurrent streams when
290 | # recovering a shard from a peer:
291 | #
292 | #indices.recovery.concurrent_streams: 5
293 | 
294 | 
295 | ################################## Discovery ##################################
296 | 
297 | # Discovery infrastructure ensures nodes can be found within a cluster
298 | # and master node is elected. Multicast discovery is the default.
299 | 
300 | # Set to ensure a node sees N other master eligible nodes to be considered
301 | # operational within the cluster. This should be set to a quorum/majority of 
302 | # the master-eligible nodes in the cluster.
303 | #
304 | #discovery.zen.minimum_master_nodes: 1
305 | 
306 | # Set the time to wait for ping responses from other nodes when discovering.
307 | # Set this option to a higher value on a slow or congested network
308 | # to minimize discovery failures:
309 | #
310 | #discovery.zen.ping.timeout: 3s
311 | 
312 | # For more information, see
313 | # <http://elasticsearch.org/guide/en/elasticsearch/reference/current/modules-discovery-zen.html>
314 | 
315 | # Unicast discovery allows to explicitly control which nodes will be used
316 | # to discover the cluster. It can be used when multicast is not present,
317 | # or to restrict the cluster communication-wise.
318 | #
319 | # 1. Disable multicast discovery (enabled by default):
320 | #
321 | #discovery.zen.ping.multicast.enabled: false
322 | #
323 | # 2. Configure an initial list of master nodes in the cluster
324 | #    to perform discovery when new nodes (master or data) are started:
325 | #
326 | #discovery.zen.ping.unicast.hosts: ["host1", "host2:port"]
327 | 
328 | # EC2 discovery allows to use AWS EC2 API in order to perform discovery.
329 | #
330 | # You have to install the cloud-aws plugin for enabling the EC2 discovery.
331 | #
332 | # For more information, see
333 | # <http://elasticsearch.org/guide/en/elasticsearch/reference/current/modules-discovery-ec2.html>
334 | #
335 | # See <http://elasticsearch.org/tutorials/elasticsearch-on-ec2/>
336 | # for a step-by-step tutorial.
337 | 
338 | # GCE discovery allows to use Google Compute Engine API in order to perform discovery.
339 | #
340 | # You have to install the cloud-gce plugin for enabling the GCE discovery.
341 | #
342 | # For more information, see <https://github.com/elasticsearch/elasticsearch-cloud-gce>.
343 | 
344 | # Azure discovery allows to use Azure API in order to perform discovery.
345 | #
346 | # You have to install the cloud-azure plugin for enabling the Azure discovery.
347 | #
348 | # For more information, see <https://github.com/elasticsearch/elasticsearch-cloud-azure>.
349 | 
350 | ################################## Slow Log ##################################
351 | 
352 | # Shard level query and fetch threshold logging.
353 | 
354 | #index.search.slowlog.threshold.query.warn: 10s
355 | #index.search.slowlog.threshold.query.info: 5s
356 | #index.search.slowlog.threshold.query.debug: 2s
357 | #index.search.slowlog.threshold.query.trace: 500ms
358 | 
359 | #index.search.slowlog.threshold.fetch.warn: 1s
360 | #index.search.slowlog.threshold.fetch.info: 800ms
361 | #index.search.slowlog.threshold.fetch.debug: 500ms
362 | #index.search.slowlog.threshold.fetch.trace: 200ms
363 | 
364 | #index.indexing.slowlog.threshold.index.warn: 10s
365 | #index.indexing.slowlog.threshold.index.info: 5s
366 | #index.indexing.slowlog.threshold.index.debug: 2s
367 | #index.indexing.slowlog.threshold.index.trace: 500ms
368 | 
369 | ################################## GC Logging ################################
370 | 
371 | #monitor.jvm.gc.young.warn: 1000ms
372 | #monitor.jvm.gc.young.info: 700ms
373 | #monitor.jvm.gc.young.debug: 400ms
374 | 
375 | #monitor.jvm.gc.old.warn: 10s
376 | #monitor.jvm.gc.old.info: 5s
377 | #monitor.jvm.gc.old.debug: 2s
378 | 
379 | ################################## Security ################################
380 | 
381 | # Uncomment if you want to enable JSONP as a valid return transport on the
382 | # http server. With this enabled, it may pose a security risk, so disabling
383 | # it unless you need it is recommended (it is disabled by default).
384 | #
385 | #http.jsonp.enable: true
386 | 


--------------------------------------------------------------------------------
/elasticsearch_runner/runner.py:
--------------------------------------------------------------------------------
  1 | from collections import namedtuple
  2 | import json
  3 | import logging
  4 | import os
  5 | import re
  6 | from shutil import copyfile, rmtree
  7 | from tempfile import mkdtemp
  8 | from time import sleep, clock
  9 | from zipfile import ZipFile
 10 | from subprocess import Popen
 11 | import errno
 12 | import sys
 13 | PY3 = sys.version_info > (3,)
 14 | if PY3:
 15 |     import urllib.request, urllib.parse, urllib.error
 16 |     import urllib.parse
 17 | else:
 18 |     from urlparse import urlparse
 19 | 
 20 | from psutil import Process, NoSuchProcess
 21 | import requests
 22 | 
 23 | from elasticsearch_runner.configuration import serialize_config, generate_config, generate_cluster_name, package_path
 24 | 
 25 | """
 26 | Class for starting, stopping and managing an Elasticsearch instance from within a Python process.
 27 | 
 28 | Intended for testing and other lightweight purposes with transient data.
 29 | 
 30 | TODO Faster Elasticsearch startup.
 31 | """
 32 | 
 33 | 
 34 | ES_DEFAULT_VERSION = '2.1.0'
 35 | 
 36 | ES_URLS = {'1.7.2': 'https://download.elastic.co/elasticsearch/elasticsearch/elasticsearch-1.7.2.zip',
 37 |            '2.0.0': 'https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/distribution/zip/elasticsearch/2.0.0/elasticsearch-2.0.0.zip'}
 38 | 
 39 | ES_DEFAULT_URL_LOCATION = 'https://download.elastic.co/elasticsearch/elasticsearch/elasticsearch'
 40 | ES2_DEFAULT_URL_LOCATION= 'https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/distribution/zip/elasticsearch/'
 41 | 
 42 | def fn_from_url(url):
 43 |     """
 44 |     Extract the final part of an url in order to get the filename of a downloaded url.
 45 | 
 46 |     :param url: url string
 47 |     :type url : str|unicode
 48 |     :rtype : str|unicode
 49 |     :return: url filename part
 50 |     """
 51 | 
 52 | 
 53 |     if PY3:
 54 |         parse = urllib.parse.urlparse(url)
 55 |     else:
 56 |         parse = urlparse(url)
 57 |     return os.path.basename(parse.path)
 58 | 
 59 | 
 60 | def download_file(url, dest_path):
 61 |     """
 62 |     Download the file pointed to by the url to the path specified .
 63 |     If the file is already present at the path it will not be downloaded and the path to this file
 64 |     is returned.
 65 | 
 66 |     :param url: url string pointing to the file
 67 |     :type url : str|unicode
 68 |     :param dest_path: path to location where the file will be stored locally
 69 |     :type dest_path : str|unicode
 70 |     :rtype : str|unicode
 71 |     :return: path to the downloaded file
 72 |     """
 73 |     if not os.path.exists(dest_path):
 74 |         os.makedirs(dest_path)
 75 | 
 76 |     fn = fn_from_url(url)
 77 |     full_fn = os.path.join(dest_path, fn)
 78 | 
 79 |     if os.path.exists(full_fn):
 80 |         logging.info('Dataset archive %s already exists in %s ...' % (fn, dest_path))
 81 |     else:
 82 |         r = requests.get(url, stream=True)
 83 |         with open(full_fn, 'wb') as f:
 84 |             for chunk in r.iter_content(chunk_size=1024):
 85 |                 if chunk:  # filter out keep-alive new chunks
 86 |                     f.write(chunk)
 87 |                     f.flush()
 88 | 
 89 |     return full_fn
 90 | 
 91 | 
 92 | def check_java():
 93 |     """
 94 |     Simple check for Java availability on the local system.
 95 | 
 96 |     :rtype : bool
 97 |     :return: True if Java available on the command line
 98 |     """
 99 |     return os.system('java -version') == 0
100 | 
101 | 
102 | def process_exists(pid):
103 |     """
104 |     Check if there is a process with this PID.
105 | 
106 |     :param pid: Process ID
107 |     :type pid: int
108 |     :rtype : bool
109 |     :return: True if the process exists, False otherwise
110 |     """
111 |     if os.name == 'nt':
112 |         # TODO something more solid on windows?
113 |         try:
114 |             return Process(pid).status() == 'running'
115 |         except NoSuchProcess:
116 |             return False
117 |     else:
118 |         try:
119 |             os.kill(pid, 0)
120 |         except OSError:
121 |             return False
122 | 
123 |         return True
124 | 
125 | 
126 | def parse_es_log_header(log_file, limit=200):
127 |     """
128 |     Look at Elasticsearch log for startup messages containing system information. The log is read until the starting
129 |     message is detected or the number of lines read exceed the limit.
130 |     The log file must be open fir reading and at the desired position, ie. the end to read incoming log lines.
131 | 
132 |     :param log_file: open for reading file instance for the log file at the correct position
133 |     :type log_file: FileIO
134 |     :param limit: max lines to read before returning
135 |     :type limit: int
136 |     :rtype : (int|None, int|None)
137 |     :return: A tuple with the Elasticsearch instance PID and REST endpoint port number, ie. (pid, port)
138 |     """
139 |     line = log_file.readline()
140 |     server_pid = None
141 |     es_port = None
142 |     count = 0
143 | 
144 |     while count < limit:
145 |         count += 1
146 |         line = line.strip()
147 | 
148 |         if line == '':
149 |             sleep(.1)
150 | 
151 |         m = re.search('pid\[(\d+)\]', line)
152 |         if m:
153 |             server_pid = int(m.group(1))
154 | 
155 |         m = re.search(r'\[http.*publish_address.*:(\d+)[\]}|}]', line)
156 |         if m:
157 |             es_port = int(m.group(1))
158 | 
159 |         if re.search('started', line):
160 |             return server_pid, es_port
161 | 
162 |         line = log_file.readline()
163 | 
164 |     logging.warn('Read more than %d lines while parsing Elasticsearch log header. Giving up ...' % limit)
165 | 
166 |     return server_pid, es_port
167 | 
168 | 
169 | # tuple holding information about the current Elasticsearch process
170 | ElasticsearchState = namedtuple('ElasticsearchState', 'server_pid wrapper_pid port config_fn')
171 | 
172 | 
173 | class ElasticsearchRunner:
174 |     """
175 |     Runs a basic single node Elasticsearch instance for testing or other lightweight purposes.
176 |     """
177 | 
178 |     def __init__(self, install_path=None, transient=False, version=None):
179 |         """
180 |         :param version: Elasticsearch version to run. Defaults to 2.1.0
181 |         :type version: string
182 |         :param install_path: The path where the Elasticsearch software package and data storage will be kept.
183 |         If no install path set, installs into APPDATA (windows)or  HOME/.elasticsearch_runner (other)
184 |         Install_path can be provided as the environment variable 'elasticsearch-runner-install-path'
185 |         If environment variable provided it will override install_path parameter
186 |         :type install_path: str|unicode
187 |         :param transient: Not implemented.
188 |         :type transient: bool
189 |         """
190 |         if os.getenv('elasticsearch-runner-install-path'):
191 |             install_path = os.getenv('elasticsearch-runner-install-path')
192 | 
193 |         if install_path:
194 |             self.install_path = install_path
195 |         else:
196 |             if os.name == 'nt':
197 |                 self.install_path = os.path.join(os.getenv("APPDATA"), 'elasticsearch_runner', 'embedded-es')
198 |             else:
199 |                 self.install_path = os.path.join(os.getenv("HOME"), '.elasticsearch_runner', 'embedded-es')
200 |         if version:
201 |             self.version = version
202 |         else:
203 |             self.version = ES_DEFAULT_VERSION
204 |         self.version_folder = "elasticsearch-%s" % self.version
205 |         self.transient = transient
206 |         self.es_state = None
207 |         self.es_config = None
208 | 
209 |         if not check_java():
210 |             logging.error('Java not installed. Elasticsearch won\'t be able to run ...')
211 | 
212 |     def install(self):
213 |         """
214 |         Download and install the Elasticsearch software in the install path. If already downloaded or installed
215 |         those steps are skipped.
216 | 
217 |         :rtype : ElasticsearchRunner
218 |         :return: The instance called on.
219 |         """
220 |         if self.version in ES_URLS:
221 |             download_url = ES_URLS[self.version]
222 |         else:
223 |             if self.version.startswith('1'):
224 |                 download_url = "%s-%s.zip" % (ES_DEFAULT_URL_LOCATION, self.version)
225 |             else:
226 |                 download_url = "%s%s/elasticsearch-%s.zip" % (ES2_DEFAULT_URL_LOCATION, self.version, self.version)
227 |         es_archive_fn = download_file(download_url, self.install_path)
228 | 
229 |         if not os.path.exists(os.path.join(self.install_path, self.version_folder)):
230 |             with ZipFile(es_archive_fn, "r") as z:
231 |                 z.extractall(self.install_path)
232 | 
233 |         # insert basic config file
234 |         copyfile(os.path.join(package_path(), 'elasticsearch_runner', 'resources', 'embedded_elasticsearch.yml'),
235 |                  os.path.join(self.install_path, self.version_folder, 'config', 'elasticsearch.yml'))
236 | 
237 |         return self
238 | 
239 |     def run(self):
240 |         """
241 |         Start the elasticsearch server. Running REST port and PID is stored in the es_state field.
242 | 
243 |         :rtype : ElasticsearchRunner
244 |         :return: The instance called on.
245 |         """
246 |         if self.is_running():
247 |             logging.warn('Elasticsearch already running ...')
248 |         else:
249 |             # generate and insert Elasticsearch configuration file with transient data and log paths
250 |             cluster_name = generate_cluster_name()
251 |             cluster_path = mkdtemp(prefix='%s-%s-' % (self.version, cluster_name), dir=self.install_path)
252 |             es_data_dir = os.path.join(cluster_path, "data")
253 |             es_config_dir = os.path.join(cluster_path, "config")
254 |             es_log_dir = os.path.join(cluster_path, "log")
255 |             self.es_config = generate_config(cluster_name=cluster_name, data_path=es_data_dir, log_path=es_log_dir)
256 |             config_fn = os.path.join(es_config_dir, 'elasticsearch.yml')
257 | 
258 |             try:
259 |                 os.makedirs(es_log_dir)
260 |                 os.makedirs(es_data_dir)
261 |                 os.makedirs(es_config_dir)
262 |             except OSError as exception:
263 |                 if exception.errno != errno.EEXIST:
264 |                     raise
265 |             with open(config_fn, 'w') as f:
266 |                 serialize_config(f, self.es_config)
267 | 
268 |             copyfile(os.path.join(package_path(), 'elasticsearch_runner', 'resources', 'embedded_logging.yml'),
269 |                  os.path.join(es_config_dir, 'logging.yml'))
270 | 
271 |             es_log_fn = os.path.join(es_log_dir, '%s.log' % cluster_name)
272 |             # create the log file if it doesn't exist yet. We need to open it and seek to to the end before
273 |             # sniffing out the configuration info from the log.
274 | 
275 |             open(es_log_fn, 'a').close()
276 | 
277 |             runcall = self._es_wrapper_call(os.name) + ['-Des.path.conf=%s' % es_config_dir, '-Des.path.logs=%s' % es_log_dir]
278 |             wrapper_proc = Popen(runcall)
279 | 
280 |             es_log_f = open(es_log_fn, 'r')
281 |             es_log_f.seek(0, 2)
282 | 
283 |             # watch the log
284 |             server_pid, es_port = parse_es_log_header(es_log_f)
285 | 
286 |             if not server_pid:
287 |                 logging.error('Server PID not detected ... runcall was %s' % runcall)
288 | 
289 |             if not es_port:
290 |                 logging.error('Server http port not detected ...')
291 | 
292 |             self.es_state = ElasticsearchState(wrapper_pid=wrapper_proc.pid,
293 |                                                server_pid=server_pid,
294 |                                                port=es_port,
295 |                                                config_fn=config_fn)
296 |         return self
297 | 
298 |     def _es_wrapper_call(self, os_name):
299 |         """
300 |         :param os_name: OS identifier as returned by os.name
301 |         :type os_name: str|unicode
302 |         :rtype : list[str|unicode]
303 |         :return:
304 |         """
305 |         if os_name == 'nt':
306 |             es_bin = [os.path.join(self.install_path, self.version_folder, 'bin', 'elasticsearch.bat')]
307 |         else:
308 |             es_bin = ['/bin/sh', os.path.join(self.install_path, self.version_folder, 'bin', 'elasticsearch')]
309 | 
310 |         return es_bin
311 | 
312 |     def stop(self):
313 |         """
314 |         Stop the Elasticsearch server.
315 | 
316 |         :rtype : ElasticsearchRunner
317 |         :return: The instance called on.
318 |         """
319 |         if self.is_running():
320 |             server_proc = Process(self.es_state.server_pid)
321 |             server_proc.terminate()
322 |             server_proc.wait()
323 | 
324 |             if process_exists(self.es_state.server_pid):
325 |                 logging.warn('Failed to stop Elasticsearch server process PID %d ...' % self.es_state.server_pid)
326 | 
327 |             # delete transient directories
328 |             if 'path' in self.es_config:
329 |                 if 'log' in self.es_config['path']:
330 |                     log_path = self.es_config['path']['log']
331 |                     logging.info('Removing transient log path %s ...' % log_path)
332 |                     rmtree(log_path)
333 | 
334 |                 if 'data' in self.es_config['path']:
335 |                     data_path = self.es_config['path']['data']
336 |                     logging.info('Removing transient data path %s ...' % data_path)
337 |                     rmtree(data_path)
338 | 
339 |             # delete temporary config file
340 |             if os.path.exists(self.es_state.config_fn):
341 |                 logging.info('Removing transient configuration file %s ...' % self.es_state.config_fn)
342 |                 os.remove(self.es_state.config_fn)
343 | 
344 |             self.es_state = None
345 |             self.es_config = None
346 |         else:
347 |             logging.warn('Elasticsearch is not running ...')
348 | 
349 |         return self
350 | 
351 |     def is_running(self):
352 |         """
353 |         Checks if the instance has a running server process and that thhe process exists.
354 | 
355 |         :rtype : bool
356 |         :return: True if the servier is running, False if not.
357 |         """
358 |         state = self.es_state
359 | 
360 |         return state and process_exists(state.server_pid)
361 | 
362 |     def wait_for_green(self, timeout=1.):
363 |         """
364 |         Check if cluster status is green and wait for it to become green if it's not.
365 |         Run after starting the runner to ensure that the Elasticsearch instance is ready.
366 | 
367 |         :param timeout: The time to wait for green cluster response in seconds.
368 |         :type timeout: int|long|float
369 |         :rtype : ElasticsearchRunner
370 |         :return:
371 |         """
372 |         if not self.es_state:
373 |             logging.warn('Elasticsearch runner is not started ...')
374 |             return self
375 | 
376 |         if self.es_state.port is None:
377 |             logging.warn('Elasticsearch runner not properly started ...')
378 |             return self
379 |         end_time = clock() + timeout
380 |         health_resp = requests.get('http://localhost:%d/_cluster/health' % self.es_state.port)
381 |         health_data = json.loads(health_resp.text)
382 | 
383 |         while health_data['status'] != 'green':
384 |             if clock() > end_time:
385 |                 logging.error('Elasticsearch cluster failed to turn green in %f seconds, current status is %s ...' %
386 |                               (timeout, health_data['status']))
387 | 
388 |                 return self
389 | 
390 |             health_resp = requests.get('http://localhost:%d/_cluster/health' % self.es_state.port)
391 |             health_data = json.loads(health_resp.text)
392 | 
393 |         return self
394 | 


--------------------------------------------------------------------------------