├── .gitignore
├── LICENSE
├── MANIFEST.in
├── README.rst
├── requirements.txt
├── setup.cfg
├── setup.py
├── test-requirements.txt
└── tornado_elasticsearch.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.egg-info
3 | .coverage
4 | .idea
5 | .DS_Store
6 | build
7 | dist
8 | tests/cover
9 | cover
10 | atlassian-ide-plugin.xml
11 | docs/_build
12 | MANIFEST
13 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2013-2017 Gavin M. Roy
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without modification,
5 | are permitted provided that the following conditions are met:
6 |
7 | * Redistributions of source code must retain the above copyright notice, this
8 | list of conditions and the following disclaimer.
9 | * Redistributions in binary form must reproduce the above copyright notice,
10 | this list of conditions and the following disclaimer in the documentation
11 | and/or other materials provided with the distribution.
12 | * Neither the name of the tornado_elasticsearch library nor the names of its
13 | contributors may be used to endorse or promote products derived from this
14 | software without specific prior written permission.
15 |
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
20 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
24 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include README.rst
3 | requirements.txt
4 | test-requirements.txt
5 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | tornado_elasticsearch
2 | =====================
3 | Extends the official Elasticsearch Python API adding Tornado AsyncHTTPClient
4 | support.
5 |
6 | |Version| |LICENSE|
7 |
8 | Installation
9 | ------------
10 | tornado_elasticsearch is available on the Python Package Index and can be
11 | installed using pip or easy_install:
12 |
13 | .. code-block:: sh
14 |
15 | pip install tornado_elasticsearch
16 |
17 | Example Request Handlers
18 | ------------------------
19 | .. code-block:: python
20 |
21 | from tornado import gen
22 | from tornado import web
23 | from tornado_elasticsearch import AsyncElasticsearch
24 | import uuid
25 |
26 | class Example(web.RequestHandler):
27 |
28 | def initialize(self):
29 | self.es = AsyncElasticsearch()
30 |
31 | @web.asynchronous
32 | @gen.engine
33 | def delete(self, *args, **kwargs):
34 | result = yield self.es.delete(index='test-index', doc_type='tweet',
35 | id=self.get_argument('id'))
36 | self.finish(result)
37 |
38 | @web.asynchronous
39 | @gen.engine
40 | def get(self, *args, **kwargs):
41 | if self.get_argument('id', None):
42 | result = yield self.es.get(index='test-index', doc_type='tweet',
43 | id=self.get_argument('id'))
44 | else:
45 | result = yield self.es.search(index='test-index')
46 | self.finish(result)
47 |
48 | @web.asynchronous
49 | @gen.engine
50 | def post(self, *args, **kwargs):
51 | doc = {
52 | 'author': self.get_current_user() or 'Unknown',
53 | 'text': self.get_argument('text'),
54 | 'timestamp': datetime.datetime.now()
55 | }
56 | result = yield self.es.index(index='test-index',
57 | doc_type='tweet',
58 | body=doc,
59 | id=str(uuid.uuid4()))
60 | self.finish(result)
61 |
62 |
63 | class Info(web.RequestHandler):
64 |
65 | @web.asynchronous
66 | @gen.engine
67 | def get(self, *args, **kwargs):
68 | es = AsyncElasticsearch()
69 | info = yield es.info()
70 | self.finish(info)
71 |
72 |
73 | Version History
74 | ---------------
75 | - 0.5.0:
76 | - Bugfixes:
77 | - HTTP Auth
78 | - Add timeout support
79 | - Allow scroll to use post, since scroll_id can be too long
80 | - Fix yield issue
81 | - Add max_clients to AsyncElasticSearch constructor
82 | - Added get_alias
83 | - Added get_mapping
84 | - Add cluster health
85 | - 0.4.0: Bugfix: Python3 decoding issues
86 | - 0.3.0: Bugfix: Add body to log_request_fail call (#1)
87 | - 0.2.0: Bugfix: force method to POST if GET and body passed
88 | - 0.1.0: Initial version
89 |
90 | .. |Version| image:: https://img.shields.io/pypi/v/tornado_elasticsearch.svg?
91 | :target: http://badge.fury.io/py/tornado_elasticsearch
92 |
93 | .. |License| image:: https://img.shields.io/pypi/l/tornado_elasticsearch.svg?
94 | :target: https://tornado_elasticsearch.readthedocs.org
95 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tornado>4
2 | elasticsearch>=2.3
3 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [nosetests]
2 | with-coverage=1
3 | cover-package=tornado_elasticsearch
4 | cover-branches=1
5 | cover-erase=1
6 |
7 | [wheel]
8 | universal=1
9 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 |
3 | setuptools.setup(
4 | name='tornado_elasticsearch',
5 | version='0.5.0',
6 | description=('Extends the official Elasticsearch Python API adding '
7 | 'Tornado AsyncHTTPClient support'),
8 | long_description=open('README.rst').read(),
9 | author='Gavin M. Roy',
10 | author_email='gavinmroy@gmail.com',
11 | url='https://github.com/gmr/tornado_elasticsearch',
12 | py_modules=['tornado_elasticsearch'],
13 | install_requires=['elasticsearch', 'tornado'],
14 | license='BSD',
15 | classifiers=[
16 | 'Development Status :: 3 - Alpha',
17 | 'Intended Audience :: Developers',
18 | 'License :: OSI Approved :: BSD License',
19 | 'Operating System :: OS Independent',
20 | 'Programming Language :: Python :: 2',
21 | 'Programming Language :: Python :: 2.6',
22 | 'Programming Language :: Python :: 2.7',
23 | 'Programming Language :: Python :: 3',
24 | 'Programming Language :: Python :: 3.2',
25 | 'Programming Language :: Python :: 3.3',
26 | 'Programming Language :: Python :: Implementation :: CPython',
27 | 'Programming Language :: Python :: Implementation :: PyPy',
28 | 'Topic :: Communications',
29 | 'Topic :: Internet',
30 | 'Topic :: Software Development :: Libraries'
31 | ],
32 | zip_safe=True)
33 |
--------------------------------------------------------------------------------
/test-requirements.txt:
--------------------------------------------------------------------------------
1 | coverage
2 | codecov
3 | nose
4 | mock
5 | yapf
6 | pep8
7 | -r requirements.txt
8 |
--------------------------------------------------------------------------------
/tornado_elasticsearch.py:
--------------------------------------------------------------------------------
1 | """tornado_elasticsearch extends the official elasticsearch library adding
2 | asynchronous support for the Tornado stack.
3 |
4 | See http://elasticsearch-py.readthedocs.org/en/latest/ for information
5 | on how to use the API beyond the introduction for how to use with Tornado::
6 |
7 | from tornado import gen
8 | from tornado import web
9 | from tornado_elasticsearch import AsyncElasticsearch
10 |
11 |
12 | class Info(web.RequestHandler):
13 |
14 | @web.asynchronous
15 | @gen.engine
16 | def get(self, *args, **kwargs):
17 | es = AsyncElasticsearch()
18 | info = yield es.info()
19 | self.finish(info)
20 |
21 | """
22 | from elasticsearch.connection.base import Connection
23 | from elasticsearch import exceptions
24 | from elasticsearch.client import Elasticsearch
25 | from elasticsearch.transport import Transport, TransportError
26 | from elasticsearch.client.utils import query_params, _make_path
27 |
28 | from tornado import concurrent
29 | from tornado import gen
30 | from tornado import httpclient
31 | import logging
32 | import time
33 | try:
34 | from urllib import urlencode
35 | except ImportError:
36 | from urllib.parse import urlencode
37 | from tornado import version
38 |
39 | __version__ = '0.5.0'
40 |
41 | LOGGER = logging.getLogger(__name__)
42 |
43 |
44 | class AsyncHttpConnection(Connection):
45 | """Add Tornado Asynchronous support to ElasticSearch.
46 |
47 | :param str host: The host for the connection
48 | :param int port: The port for the connection
49 | :param str|tuple http_auth: optional http auth information as either a
50 | colon delimited string ``("username:password")`` or
51 | tuple ``(username, password)``
52 | :param int request_timeout: optional default timeout in seconds
53 | :arg use_ssl: use ssl for the connection if ``True``
54 |
55 | """
56 | _auth_user = None
57 | _auth_password = None
58 | _user_agent = 'tornado_elasticsearch %s/Tornado %s' % (__version__, version)
59 | ssl_transport_schema = 'https'
60 |
61 | def __init__(self, host='localhost', port=9200, http_auth=None,
62 | use_ssl=False, request_timeout=None, max_clients=10, **kwargs):
63 | super(AsyncHttpConnection, self).__init__(host=host, port=port,
64 | **kwargs)
65 | self._assign_auth_values(http_auth)
66 | self.base_url = '%s://%s:%s%s' % (self.ssl_transport_schema if use_ssl
67 | else self.transport_schema,
68 | host, port, self.url_prefix)
69 | httpclient.AsyncHTTPClient.configure(None, max_clients=max_clients)
70 | self._client = httpclient.AsyncHTTPClient()
71 | self._headers = {'Content-Type': 'application/json; charset=UTF-8'}
72 | self._start_time = None
73 | self.request_timeout = request_timeout
74 |
75 | @concurrent.return_future
76 | def perform_request(self, method, url, params=None, body=None,
77 | timeout=None, ignore=(), callback=None):
78 | request_uri = self._request_uri(url, params)
79 | LOGGER.debug('%s, %r, %r', url, body, params)
80 | kwargs = self._request_kwargs(method, body, timeout)
81 | self._start_time = time.time()
82 |
83 | def on_response(response):
84 | duration = time.time() - self._start_time
85 | raw_data = response.body.decode('utf-8') \
86 | if response.body is not None else None
87 | LOGGER.info('Response from %s: %s', url, response.code)
88 | if not (200 <= response.code < 300) and \
89 | response.code not in ignore:
90 | LOGGER.debug('Error: %r', raw_data)
91 | self.log_request_fail(method, request_uri, url, body, duration,
92 | response.code)
93 | error = exceptions.HTTP_EXCEPTIONS.get(response.code,
94 | TransportError)
95 | raise error(response.code, raw_data)
96 | self.log_request_success(method, request_uri, url, body,
97 | response.code, raw_data, duration)
98 | callback((response.code, response.headers, raw_data))
99 |
100 | LOGGER.debug('Fetching [%s] %s', kwargs['method'], request_uri)
101 | LOGGER.debug('kwargs: %r', kwargs)
102 | self._client.fetch(httpclient.HTTPRequest(request_uri, **kwargs),
103 | callback=on_response)
104 |
105 | def _assign_auth_values(self, http_auth):
106 | """Take the http_auth value and split it into the attributes that
107 | carry the http auth username and password
108 |
109 | :param str|tuple http_auth: The http auth value
110 |
111 | """
112 | if not http_auth:
113 | pass
114 | elif isinstance(http_auth, (tuple, list)):
115 | self._auth_user, self._auth_password = http_auth
116 | elif isinstance(http_auth, str):
117 | self._auth_user, self._auth_password = http_auth.split(':')
118 | else:
119 | raise ValueError('HTTP Auth Credentials should be str or '
120 | 'tuple, not %s' % type(http_auth))
121 |
122 | def _request_kwargs(self, method, body, timeout):
123 | if body and method == 'GET':
124 | method = 'POST'
125 | kwargs = {'method': method, 'user_agent': self._user_agent,
126 | 'headers': self._headers}
127 | if self.request_timeout is not None:
128 | kwargs['request_timeout'] = self.request_timeout
129 | if self._auth_user and self._auth_password:
130 | kwargs['auth_username'] = self._auth_user
131 | kwargs['auth_password'] = self._auth_password
132 | if body:
133 | kwargs['body'] = body
134 | if timeout:
135 | kwargs['request_timeout'] = timeout
136 |
137 | kwargs['allow_nonstandard_methods'] = True
138 | return kwargs
139 |
140 | def _request_uri(self, url, params):
141 | uri = self.url_prefix + url
142 | if params:
143 | uri = '%s?%s' % (uri, urlencode(params or {}))
144 | return '%s%s' % (self.base_url, uri)
145 |
146 |
147 | class AsyncTransport(Transport):
148 |
149 | @gen.coroutine
150 | def perform_request(self, method, url, params=None, body=None):
151 | """Perform the actual request. Retrieve a connection from the
152 | connection pool, pass all the information to it's perform_request
153 | method and return the data.
154 |
155 | If an exception was raised, mark the connection as failed and retry (up
156 | to `max_retries` times).
157 |
158 | If the operation was successful and the connection used was previously
159 | marked as dead, mark it as live, resetting it's failure count.
160 |
161 | :param method: HTTP method to use
162 | :param url: absolute url (without host) to target
163 | :param params: dictionary of query parameters, will be handed over to
164 | the underlying :class:`~torando_elasticsearch.AsyncHTTPConnection`
165 | class for serialization
166 | :param body: body of the request, will be serialized using serializer
167 | and passed to the connection
168 |
169 | """
170 | if body is not None:
171 | body = self.serializer.dumps(body)
172 |
173 | # some clients or environments don't support sending GET with body
174 | if method in ('HEAD', 'GET') and self.send_get_body_as != 'GET':
175 | # send it as post instead
176 | if self.send_get_body_as == 'POST':
177 | method = 'POST'
178 |
179 | # or as source parameter
180 | elif self.send_get_body_as == 'source':
181 | if params is None:
182 | params = {}
183 | params['source'] = body
184 | body = None
185 |
186 | if body is not None:
187 | try:
188 | body = body.encode('utf-8')
189 | except (UnicodeDecodeError, AttributeError):
190 | # bytes/str - no need to re-encode
191 | pass
192 |
193 | ignore = ()
194 | if params and 'ignore' in params:
195 | ignore = params.pop('ignore')
196 | if isinstance(ignore, int):
197 | ignore = (ignore, )
198 |
199 | for attempt in range(self.max_retries + 1):
200 | connection = self.get_connection()
201 | try:
202 | result = yield connection.perform_request(method, url,
203 | params, body,
204 | ignore=ignore)
205 | (status, headers, data) = result
206 | except TransportError as e:
207 | retry = False
208 | if isinstance(e, exceptions.ConnectionTimeout):
209 | retry = self.retry_on_timeout
210 | elif isinstance(e, exceptions.ConnectionError):
211 | retry = True
212 | elif e.status_code in self.retry_on_status:
213 | retry = True
214 |
215 | if retry:
216 | # only mark as dead if we are retrying
217 | self.mark_dead(connection)
218 | # raise exception on last retry
219 | if attempt == self.max_retries:
220 | raise
221 | else:
222 | raise
223 |
224 | else:
225 | # connection didn't fail, confirm it's live status
226 | self.connection_pool.mark_live(connection)
227 | response = self.deserializer.loads(data,
228 | headers.get('content-type')
229 | ) if data else None
230 | raise gen.Return((status, response))
231 |
232 |
233 | class AsyncElasticsearch(Elasticsearch):
234 | """Extends the official elasticsearch.Elasticsearch object to make the
235 | client invoked methods coroutines.
236 |
237 | """
238 | def __init__(self, hosts=None, **kwargs):
239 | """Create a new AsyncElasticsearch instance
240 |
241 | """
242 | kwargs['connection_class'] = AsyncHttpConnection
243 | kwargs['transport_class'] = AsyncTransport
244 | super(AsyncElasticsearch, self).__init__(hosts, **kwargs)
245 |
246 | @gen.coroutine
247 | @query_params()
248 | def ping(self, params=None):
249 | """ Returns True if the cluster is up, False otherwise. """
250 | try:
251 | self.transport.perform_request('HEAD', '/', params=params)
252 | except TransportError:
253 | raise gen.Return(False)
254 | raise gen.Return(True)
255 |
256 | @gen.coroutine
257 | @query_params()
258 | def info(self, params=None):
259 | """Get the basic info from the current cluster.
260 |
261 | :rtype: dict
262 |
263 | """
264 | _, data = yield self.transport.perform_request('GET', '/',
265 | params=params)
266 | raise gen.Return(data)
267 |
268 | @gen.coroutine
269 | def health(self, params=None):
270 | """Coroutine. Queries cluster Health API.
271 |
272 | Returns a 2-tuple, where first element is request status, and second
273 | element is a dictionary with response data.
274 |
275 | :param params: dictionary of query parameters, will be handed over to
276 | the underlying :class:`~torando_elasticsearch.AsyncHTTPConnection`
277 | class for serialization
278 |
279 | """
280 | status, data = yield self.transport.perform_request(
281 | "GET", "/_cluster/health", params=params)
282 | raise gen.Return((status, data))
283 |
284 | @gen.coroutine
285 | @query_params('consistency', 'id', 'parent', 'percolate', 'refresh',
286 | 'replication', 'routing', 'timeout', 'timestamp', 'ttl',
287 | 'version', 'version_type')
288 | def create(self, index, doc_type, body, id=None, params=None):
289 | """
290 | Adds a typed JSON document in a specific index, making it searchable.
291 | Behind the scenes this method calls index(..., op_type='create')
292 | ``_
293 |
294 | :arg index: The name of the index
295 | :arg doc_type: The type of the document
296 | :arg id: Document ID
297 | :arg body: The document
298 | :arg consistency: Explicit write consistency setting for the operation
299 | :arg id: Specific document ID (when the POST method is used)
300 | :arg parent: ID of the parent document
301 | :arg percolate: Percolator queries to execute while indexing the doc
302 | :arg refresh: Refresh the index after performing the operation
303 | :arg replication: Specific replication type (default: sync)
304 | :arg routing: Specific routing value
305 | :arg timeout: Explicit operation timeout
306 | :arg timestamp: Explicit timestamp for the document
307 | :arg ttl: Expiration time for the document
308 | :arg version: Explicit version number for concurrency control
309 | :arg version_type: Specific version type
310 | """
311 | result = yield self.index(index, doc_type, body, id=id, params=params,
312 | op_type='create')
313 | raise gen.Return(result)
314 |
315 | @gen.coroutine
316 | @query_params('consistency', 'op_type', 'parent', 'percolate', 'refresh',
317 | 'replication', 'routing', 'timeout', 'timestamp', 'ttl',
318 | 'version', 'version_type')
319 | def index(self, index, doc_type, body, id=None, params=None):
320 | """
321 | Adds or updates a typed JSON document in a specific index, making it
322 | searchable. ``_
323 |
324 | :arg index: The name of the index
325 | :arg doc_type: The type of the document
326 | :arg body: The document
327 | :arg id: Document ID
328 | :arg consistency: Explicit write consistency setting for the operation
329 | :arg op_type: Explicit operation type (default: index)
330 | :arg parent: ID of the parent document
331 | :arg percolate: Percolator queries to execute while indexing the doc
332 | :arg refresh: Refresh the index after performing the operation
333 | :arg replication: Specific replication type (default: sync)
334 | :arg routing: Specific routing value
335 | :arg timeout: Explicit operation timeout
336 | :arg timestamp: Explicit timestamp for the document
337 | :arg ttl: Expiration time for the document
338 | :arg version: Explicit version number for concurrency control
339 | :arg version_type: Specific version type
340 |
341 | """
342 | _, data = yield self.transport.perform_request(
343 | 'PUT' if id else 'POST', _make_path(index, doc_type, id),
344 | params=params, body=body)
345 | raise gen.Return(data)
346 |
347 | @gen.coroutine
348 | @query_params('parent', 'preference', 'realtime', 'refresh', 'routing')
349 | def exists(self, index, id, doc_type='_all', params=None):
350 | """
351 | Returns a boolean indicating whether or not given document exists in
352 | Elasticsearch. ``_
353 |
354 | :arg index: The name of the index
355 | :arg id: The document ID
356 | :arg doc_type: The type of the document (uses `_all` by default to
357 | fetch the first document matching the ID across all types)
358 | :arg parent: The ID of the parent document
359 | :arg preference: Specify the node or shard the operation should be
360 | performed on (default: random)
361 | :arg realtime: Specify whether to perform the operation in realtime or
362 | search mode
363 | :arg refresh: Refresh the shard containing the document before
364 | performing the operation
365 | :arg routing: Specific routing value
366 | """
367 | try:
368 | self.transport.perform_request(
369 | 'HEAD', _make_path(index, doc_type, id), params=params)
370 | except exceptions.NotFoundError:
371 | return gen.Return(False)
372 | raise gen.Return(True)
373 |
374 | @gen.coroutine
375 | @query_params('_source', '_source_exclude', '_source_include', 'fields',
376 | 'parent', 'preference', 'realtime', 'refresh', 'routing')
377 | def get(self, index, id, doc_type='_all', params=None):
378 | """
379 | Get a typed JSON document from the index based on its id.
380 | ``_
381 |
382 | :arg index: The name of the index
383 | :arg id: The document ID
384 | :arg doc_type: The type of the document (uses `_all` by default to
385 | fetch the first document matching the ID across all types)
386 | :arg _source: True or false to return the _source field or not, or a
387 | list of fields to return
388 | :arg _source_exclude: A list of fields to exclude from the returned
389 | _source field
390 | :arg _source_include: A list of fields to extract and return from the
391 | _source field
392 | :arg fields: A comma-separated list of fields to return in the response
393 | :arg parent: The ID of the parent document
394 | :arg preference: Specify the node or shard the operation should be
395 | performed on (default: random)
396 | :arg realtime: Specify whether to perform the operation in realtime or
397 | search mode
398 | :arg refresh: Refresh the shard containing the document before
399 | performing the operation
400 | :arg routing: Specific routing value
401 | """
402 | _, data = yield self.transport.perform_request(
403 | 'GET', _make_path(index, doc_type, id), params=params)
404 | raise gen.Return(data)
405 |
406 | @gen.coroutine
407 | @query_params('allow_no_indices', 'expand_wildcards', 'ignore_unavailable',
408 | 'local')
409 | def get_alias(self, index=None, name=None, params=None):
410 | """
411 | Retrieve a specified alias.
412 | ``_
413 | :arg index: A comma-separated list of index names to filter aliases
414 | :arg name: A comma-separated list of alias names to return
415 | :arg allow_no_indices: Whether to ignore if a wildcard indices
416 | expression resolves into no concrete indices. (This includes `_all`
417 | string or when no indices have been specified)
418 | :arg expand_wildcards: Whether to expand wildcard expression to
419 | concrete indices that are open, closed or both., default 'all',
420 | valid choices are: 'open', 'closed', 'none', 'all'
421 | :arg ignore_unavailable: Whether specified concrete indices should be
422 | ignored when unavailable (missing or closed)
423 | :arg local: Return local information, do not retrieve the state from
424 | master node (default: false)
425 | """
426 | _, result = yield self.transport.perform_request(
427 | 'GET', _make_path(index, '_alias', name), params=params)
428 | raise gen.Return(result)
429 |
430 | @gen.coroutine
431 | @query_params('_source_exclude', '_source_include', 'parent', 'preference',
432 | 'realtime', 'refresh', 'routing')
433 | def get_source(self, index, id, doc_type='_all', params=None):
434 | """
435 | Get the source of a document by it's index, type and id.
436 | ``_
437 |
438 | :arg index: The name of the index
439 | :arg doc_type: The type of the document (uses `_all` by default to
440 | fetch the first document matching the ID across all types)
441 | :arg id: The document ID
442 | :arg exclude: A list of fields to exclude from the returned
443 | _source field
444 | :arg include: A list of fields to extract and return from the
445 | _source field
446 | :arg parent: The ID of the parent document
447 | :arg preference: Specify the node or shard the operation should be
448 | performed on (default: random)
449 | :arg realtime: Specify whether to perform the operation in realtime or
450 | search mode
451 | :arg refresh: Refresh the shard containing the document before
452 | performing the operation
453 | :arg routing: Specific routing value
454 | """
455 | _, data = yield self.transport.perform_request(
456 | 'GET', _make_path(index, doc_type, id, '_source'), params=params)
457 | raise gen.Return(data)
458 |
459 | @gen.coroutine
460 | @query_params('_source', '_source_exclude', '_source_include', 'fields',
461 | 'parent', 'preference', 'realtime', 'refresh', 'routing')
462 | def mget(self, body, index=None, doc_type=None, params=None):
463 | """
464 | Get multiple documents based on an index, type (optional) and ids.
465 | ``_
466 |
467 | :arg body: Document identifiers; can be either `docs` (containing full
468 | document information) or `ids` (when index and type is provided
469 | in the URL.
470 | :arg index: The name of the index
471 | :arg doc_type: The type of the document
472 | :arg _source: True or false to return the _source field or not, or a
473 | list of fields to return
474 | :arg _source_exclude: A list of fields to exclude from the returned
475 | _source field
476 | :arg _source_include: A list of fields to extract and return from the
477 | _source field
478 | :arg fields: A comma-separated list of fields to return in the response
479 | :arg parent: The ID of the parent document
480 | :arg preference: Specify the node or shard the operation should be
481 | performed on (default: random)
482 | :arg realtime: Specify whether to perform the operation in realtime or
483 | search mode
484 | :arg refresh: Refresh the shard containing the document before
485 | performing the operation
486 | :arg routing: Specific routing value
487 | """
488 | _, data = yield self.transport.perform_request(
489 | 'GET', _make_path(index, doc_type, '_mget'),
490 | params=params, body=body)
491 | raise gen.Return(data)
492 |
493 | @gen.coroutine
494 | @query_params('consistency', 'fields', 'lang', 'parent', 'percolate',
495 | 'refresh', 'replication', 'retry_on_conflict', 'routing',
496 | 'script', 'timeout', 'timestamp', 'ttl', 'version',
497 | 'version_type')
498 | def update(self, index, doc_type, id, body=None, params=None):
499 | """
500 | Update a document based on a script or partial data provided.
501 | ``_
502 |
503 | :arg index: The name of the index
504 | :arg doc_type: The type of the document
505 | :arg id: Document ID
506 | :arg body: The request definition using either `script` or partial `doc`
507 | :arg consistency: Explicit write consistency setting for the operation
508 | :arg fields: A comma-separated list of fields to return in the response
509 | :arg lang: The script language (default: mvel)
510 | :arg parent: ID of the parent document
511 | :arg percolate: Perform percolation during the operation; use specific
512 | registered query name, attribute, or wildcard
513 | :arg refresh: Refresh the index after performing the operation
514 | :arg replication: Specific replication type (default: sync)
515 | :arg retry_on_conflict: Specify how many times should the operation be
516 | retried when a conflict occurs (default: 0)
517 | :arg routing: Specific routing value
518 | :arg script: The URL-encoded script definition (instead of using
519 | request body)
520 | :arg timeout: Explicit operation timeout
521 | :arg timestamp: Explicit timestamp for the document
522 | :arg ttl: Expiration time for the document
523 | :arg version: Explicit version number for concurrency control
524 | :arg version_type: Explicit version number for concurrency control
525 | """
526 | _, data = yield self.transport.perform_request('POST',
527 | _make_path(index,
528 | doc_type, id,
529 | '_update'),
530 | params=params, body=body)
531 | raise gen.Return(data)
532 |
533 | @gen.coroutine
534 | @query_params('_source', '_source_exclude', '_source_include',
535 | 'analyze_wildcard', 'analyzer', 'default_operator', 'df',
536 | 'explain', 'fields', 'ignore_indices', 'indices_boost',
537 | 'lenient', 'lowercase_expanded_terms', 'from_', 'preference',
538 | 'q', 'routing', 'scroll', 'search_type', 'size', 'sort',
539 | 'source', 'stats', 'suggest_field', 'suggest_mode',
540 | 'suggest_size', 'suggest_text', 'timeout', 'version')
541 | def search(self, index=None, doc_type=None, body=None, params=None):
542 | """
543 | Execute a search query and get back search hits that match the query.
544 | ``_
545 |
546 | :arg index: A comma-separated list of index names to search; use `_all`
547 | or empty string to perform the operation on all indices
548 | :arg doc_type: A comma-separated list of document types to search;
549 | leave empty to perform the operation on all types
550 | :arg body: The search definition using the Query DSL
551 | :arg _source: True or false to return the _source field or not, or a
552 | list of fields to return
553 | :arg _source_exclude: A list of fields to exclude from the returned
554 | _source field
555 | :arg _source_include: A list of fields to extract and return from the
556 | _source field
557 | :arg analyze_wildcard: Specify whether wildcard and prefix queries
558 | should be analyzed (default: false)
559 | :arg analyzer: The analyzer to use for the query string
560 | :arg default_operator: The default operator for query string query (AND
561 | or OR) (default: OR)
562 | :arg df: The field to use as default where no field prefix is given in
563 | the query string
564 | :arg explain: Specify whether to return detailed information about
565 | score computation as part of a hit
566 | :arg fields: A comma-separated list of fields to return as part of a hit
567 | :arg ignore_indices: When performed on multiple indices, allows to
568 | ignore `missing` ones (default: none)
569 | :arg indices_boost: Comma-separated list of index boosts
570 | :arg lenient: Specify whether format-based query failures (such as
571 | providing text to a numeric field) should be ignored
572 | :arg lowercase_expanded_terms: Specify whether query terms should be
573 | lowercased
574 | :arg from_: Starting offset (default: 0)
575 | :arg preference: Specify the node or shard the operation should be
576 | performed on (default: random)
577 | :arg q: Query in the Lucene query string syntax
578 | :arg routing: A comma-separated list of specific routing values
579 | :arg scroll: Specify how long a consistent view of the index should be
580 | maintained for scrolled search
581 | :arg search_type: Search operation type
582 | :arg size: Number of hits to return (default: 10)
583 | :arg sort: A comma-separated list of : pairs
584 | :arg source: The URL-encoded request definition using the Query DSL
585 | (instead of using request body)
586 | :arg stats: Specific 'tag' of the request for logging and statistical
587 | purposes
588 | :arg suggest_field: Specify which field to use for suggestions
589 | :arg suggest_mode: Specify suggest mode (default: missing)
590 | :arg suggest_size: How many suggestions to return in response
591 | :arg suggest_text: The source text for which the suggestions should be
592 | returned
593 | :arg timeout: Explicit operation timeout
594 | :arg version: Specify whether to return document version as part of a
595 | hit
596 | """
597 | # from is a reserved word so it cannot be used, use from_ instead
598 | if 'from_' in params:
599 | params['from'] = params.pop('from_')
600 |
601 | if doc_type and not index:
602 | index = '_all'
603 | _, data = yield self.transport.perform_request('GET',
604 | _make_path(index,
605 | doc_type,
606 | '_search'),
607 | params=params,
608 | body=body)
609 | raise gen.Return(data)
610 |
611 | @gen.coroutine
612 | @query_params('_source', '_source_exclude', '_source_include',
613 | 'analyze_wildcard', 'analyzer', 'default_operator',
614 | 'df', 'fields', 'lenient', 'lowercase_expanded_terms',
615 | 'parent', 'preference', 'q', 'routing', 'source')
616 | def explain(self, index, doc_type, id, body=None, params=None):
617 | """
618 | The explain api computes a score explanation for a query and a specific
619 | document. This can give useful feedback whether a document matches or
620 | didn't match a specific query.
621 | ``_
622 |
623 | :arg index: The name of the index
624 | :arg doc_type: The type of the document
625 | :arg id: The document ID
626 | :arg body: The query definition using the Query DSL
627 | :arg _source: True or false to return the _source field or not, or a
628 | list of fields to return
629 | :arg _source_exclude: A list of fields to exclude from the returned
630 | _source field
631 | :arg _source_include: A list of fields to extract and return from the
632 | _source field
633 | :arg analyze_wildcard: Specify whether wildcards and prefix queries in
634 | the query string query should be analyzed (default: false)
635 | :arg analyzer: The analyzer for the query string query
636 | :arg default_operator: The default operator for query string query (AND
637 | or OR), (default: OR)
638 | :arg df: The default field for query string query (default: _all)
639 | :arg fields: A comma-separated list of fields to return in the response
640 | :arg lenient: Specify whether format-based query failures (such as
641 | providing text to a numeric field) should be ignored
642 | :arg lowercase_expanded_terms: Specify whether query terms should be
643 | lowercased
644 | :arg parent: The ID of the parent document
645 | :arg preference: Specify the node or shard the operation should be
646 | performed on (default: random)
647 | :arg q: Query in the Lucene query string syntax
648 | :arg routing: Specific routing value
649 | :arg source: The URL-encoded query definition (instead of using the
650 | request body)
651 | """
652 | _, data = yield self.transport.perform_request('GET',
653 | _make_path(index,
654 | doc_type, id,
655 | '_explain'),
656 | params=params, body=body)
657 | raise gen.Return(data)
658 |
659 | @gen.coroutine
660 | @query_params()
661 | def scroll(self, scroll_id, scroll, params=None):
662 | """
663 | Scroll a search request created by specifying the scroll parameter.
664 | ``_
665 |
666 | :arg scroll_id: The scroll ID
667 | :arg scroll: Specify how long a consistent view of the index should be
668 | maintained for scrolled search
669 | """
670 | body = {
671 | "scroll": scroll,
672 | "scroll_id": scroll_id
673 | }
674 |
675 | if params:
676 | if "scroll" in params.keys():
677 | params.pop("scroll")
678 | if "scroll_id" in params.keys():
679 | params.pop("scroll_id")
680 |
681 | _, data = yield self.transport.perform_request('POST',
682 | _make_path('_search',
683 | 'scroll'),
684 | body=body,
685 | params=params)
686 | raise gen.Return(data)
687 |
688 | @gen.coroutine
689 | @query_params()
690 | def clear_scroll(self, scroll_id, params=None):
691 | """
692 | Clear the scroll request created by specifying the scroll parameter to
693 | search.
694 | ``_
695 |
696 | :arg scroll_id: The scroll ID or a list of scroll IDs
697 | """
698 | if not isinstance(scroll_id, list):
699 | scroll_id = [scroll_id]
700 |
701 | body = {
702 | "scroll_id": scroll_id
703 | }
704 |
705 | if params and "scroll_id" in params.keys():
706 | params.pop("scroll_id")
707 |
708 | _, data = yield self.transport.perform_request('DELETE',
709 | _make_path('_search',
710 | 'scroll'),
711 | body=body,
712 | params=params)
713 | raise gen.Return(data)
714 |
715 | @gen.coroutine
716 | @query_params('consistency', 'parent', 'refresh', 'replication', 'routing',
717 | 'timeout', 'version', 'version_type')
718 | def delete(self, index, doc_type, id, params=None):
719 | """
720 | Delete a typed JSON document from a specific index based on its id.
721 | ``_
722 |
723 | :arg index: The name of the index
724 | :arg doc_type: The type of the document
725 | :arg id: The document ID
726 | :arg consistency: Specific write consistency setting for the operation
727 | :arg parent: ID of parent document
728 | :arg refresh: Refresh the index after performing the operation
729 | :arg replication: Specific replication type (default: sync)
730 | :arg routing: Specific routing value
731 | :arg timeout: Explicit operation timeout
732 | :arg version: Explicit version number for concurrency control
733 | :arg version_type: Specific version type
734 | """
735 | _, data = yield self.transport.perform_request('DELETE',
736 | _make_path(index,
737 | doc_type, id),
738 | params=params)
739 | raise gen.Return(data)
740 |
741 | @gen.coroutine
742 | @query_params('ignore_indices', 'min_score', 'preference', 'routing',
743 | 'source')
744 | def count(self, index=None, doc_type=None, body=None, params=None):
745 | """
746 | Execute a query and get the number of matches for that query.
747 | ``_
748 |
749 | :arg index: A comma-separated list of indices to restrict the results
750 | :arg doc_type: A comma-separated list of types to restrict the results
751 | :arg body: A query to restrict the results (optional)
752 | :arg ignore_indices: When performed on multiple indices, allows to
753 | ignore `missing` ones (default: none)
754 | :arg min_score: Include only documents with a specific `_score` value
755 | in the result
756 | :arg preference: Specify the node or shard the operation should be
757 | performed on (default: random)
758 | :arg routing: Specific routing value
759 | :arg source: The URL-encoded query definition (instead of using the
760 | request body)
761 | """
762 | _, data = yield self.transport.perform_request('POST',
763 | _make_path(index,
764 | doc_type,
765 | '_count'),
766 | params=params, body=body)
767 | raise gen.Return(data)
768 |
769 | @gen.coroutine
770 | @query_params('consistency', 'refresh', 'replication')
771 | def bulk(self, body, index=None, doc_type=None, params=None):
772 | """
773 | Perform many index/delete operations in a single API call.
774 | ``_
775 |
776 | See the :func:`~elasticsearch.helpers.bulk_index` for a more friendly
777 | API.
778 |
779 | :arg body: The operation definition and data (action-data pairs)
780 | :arg index: Default index for items which don't provide one
781 | :arg doc_type: Default document type for items which don't provide one
782 | :arg consistency: Explicit write consistency setting for the operation
783 | :arg refresh: Refresh the index after performing the operation
784 | :arg replication: Explicitly set the replication type (efault: sync)
785 | """
786 | _, data = yield self.transport.perform_request('POST',
787 | _make_path(index,
788 | doc_type,
789 | '_bulk'),
790 | params=params,
791 | body=self._bulk_body(body))
792 | raise gen.Return(data)
793 |
794 | @gen.coroutine
795 | @query_params('search_type')
796 | def msearch(self, body, index=None, doc_type=None, params=None):
797 | """
798 | Execute several search requests within the same API.
799 | ``_
800 |
801 | :arg body: The request definitions (metadata-search request definition
802 | pairs), separated by newlines
803 | :arg index: A comma-separated list of index names to use as default
804 | :arg doc_type: A comma-separated list of document types to use as default
805 | :arg search_type: Search operation type
806 | """
807 | _, data = yield self.transport.perform_request('GET',
808 | _make_path(index,
809 | doc_type,
810 | '_msearch'),
811 | params=params,
812 | body=self._bulk_body(body))
813 | raise gen.Return(data)
814 |
815 | @gen.coroutine
816 | @query_params('consistency', 'ignore_indices', 'replication', 'routing',
817 | 'source', 'timeout', 'q')
818 | def delete_by_query(self, index, doc_type=None, body=None, params=None):
819 | """
820 | Delete documents from one or more indices and one or more types based
821 | on a query.
822 | ``_
823 |
824 | :arg index: A comma-separated list of indices to restrict the operation
825 | :arg doc_type: A comma-separated list of types to restrict the operation
826 | :arg body: A query to restrict the operation
827 | :arg consistency: Specific write consistency setting for the operation
828 | :arg ignore_indices: When performed on multiple indices, allows to
829 | ignore `missing` ones (default: none)
830 | :arg replication: Specific replication type (default: sync)
831 | :arg routing: Specific routing value
832 | :arg source: The URL-encoded query definition (instead of using the
833 | request body)
834 | :arg q: Query in the Lucene query string syntax
835 | :arg timeout: Explicit operation timeout
836 | """
837 | _, data = yield self.transport.perform_request('DELETE',
838 | _make_path(index,
839 | doc_type,
840 | '_query'),
841 | params=params, body=body)
842 | raise gen.Return(data)
843 |
844 | @gen.coroutine
845 | @query_params('allow_no_indices', 'expand_wildcards', 'ignore_unavailable',
846 | 'local')
847 | def get_mapping(self, index=None, doc_type=None, params=None):
848 | """
849 | Retrieve mapping definition of index or index/type.
850 | ``_
851 | :arg index: A comma-separated list of index names
852 | :arg doc_type: A comma-separated list of document types
853 | :arg allow_no_indices: Whether to ignore if a wildcard indices
854 | expression resolves into no concrete indices. (This includes `_all`
855 | string or when no indices have been specified)
856 | :arg expand_wildcards: Whether to expand wildcard expression to concrete
857 | indices that are open, closed or both., default 'open', valid
858 | choices are: 'open', 'closed', 'none', 'all'
859 | :arg ignore_unavailable: Whether specified concrete indices should be
860 | ignored when unavailable (missing or closed)
861 | :arg local: Return local information, do not retrieve the state from
862 | master node (default: false)
863 | """
864 | _, data = yield self.transport.perform_request('GET',
865 | _make_path(index,
866 | '_mapping',
867 | doc_type),
868 | params=params)
869 | raise gen.Return(data)
870 |
871 | @gen.coroutine
872 | @query_params('ignore_indices', 'preference', 'routing', 'source')
873 | def suggest(self, index=None, body=None, params=None):
874 | """
875 | The suggest feature suggests similar looking terms based on a provided
876 | text by using a suggester.
877 | ``_
878 |
879 | :arg index: A comma-separated list of index names to restrict the
880 | operation; use `_all` or empty string to perform the operation on
881 | all indices
882 | :arg body: The request definition
883 | :arg ignore_indices: When performed on multiple indices, allows to
884 | ignore `missing` ones (default: none)
885 | :arg preference: Specify the node or shard the operation should be
886 | performed on (default: random)
887 | :arg routing: Specific routing value
888 | :arg source: The URL-encoded request definition (instead of using
889 | request body)
890 | """
891 | _, data = yield self.transport.perform_request('POST',
892 | _make_path(index,
893 | '_suggest'),
894 | params=params, body=body)
895 | raise gen.Return(data)
896 |
897 | @gen.coroutine
898 | @query_params('prefer_local')
899 | def percolate(self, index, doc_type, body, params=None):
900 | """
901 | Send a percolate request which include a doc, and get back the queries
902 | that match on that doc out of the set of registered queries.
903 | ``_
904 |
905 | :arg index: The name of the index with a registered percolator query
906 | :arg doc_type: The document type
907 | :arg body: The document (`doc`) to percolate against registered queries;
908 | optionally also a `query` to limit the percolation to specific
909 | registered queries
910 | :arg prefer_local: With `true`, specify that a local shard should be
911 | used if available, with `false`, use a random shard (default: true)
912 | """
913 | _, data = yield self.transport.perform_request('GET',
914 | _make_path(index,
915 | doc_type,
916 | '_percolate'),
917 | params=params, body=body)
918 | raise gen.Return(data)
919 |
920 | @gen.coroutine
921 | @query_params('boost_terms', 'max_doc_freq', 'max_query_terms',
922 | 'max_word_len', 'min_doc_freq', 'min_term_freq',
923 | 'min_word_len', 'mlt_fields', 'percent_terms_to_match',
924 | 'routing', 'search_from', 'search_indices',
925 | 'search_query_hint', 'search_scroll', 'search_size',
926 | 'search_source', 'search_type', 'search_types', 'stop_words')
927 | def mlt(self, index, doc_type, id, body=None, params=None):
928 | """
929 | Get documents that are "like" a specified document.
930 | ``_
931 |
932 | :arg index: The name of the index
933 | :arg doc_type: The type of the document (use `_all` to fetch the first
934 | document matching the ID across all types)
935 | :arg id: The document ID
936 | :arg body: A specific search request definition
937 | :arg boost_terms: The boost factor
938 | :arg max_doc_freq: The word occurrence frequency as count: words with
939 | higher occurrence in the corpus will be ignored
940 | :arg max_query_terms: The maximum query terms to be included in the
941 | generated query
942 | :arg max_word_len: The minimum length of the word: longer words will
943 | be ignored
944 | :arg min_doc_freq: The word occurrence frequency as count: words with
945 | lower occurrence in the corpus will be ignored
946 | :arg min_term_freq: The term frequency as percent: terms with lower
947 | occurrence in the source document will be ignored
948 | :arg min_word_len: The minimum length of the word: shorter words will
949 | be ignored
950 | :arg mlt_fields: Specific fields to perform the query against
951 | :arg percent_terms_to_match: How many terms have to match in order to
952 | consider the document a match (default: 0.3)
953 | :arg routing: Specific routing value
954 | :arg search_from: The offset from which to return results
955 | :arg search_indices: A comma-separated list of indices to perform the
956 | query against (default: the index containing the document)
957 | :arg search_query_hint: The search query hint
958 | :arg search_scroll: A scroll search request definition
959 | :arg search_size: The number of documents to return (default: 10)
960 | :arg search_source: A specific search request definition (instead of
961 | using the request body)
962 | :arg search_type: Specific search type (eg. `dfs_then_fetch`, `count`,
963 | etc)
964 | :arg search_types: A comma-separated list of types to perform the query
965 | against (default: the same type as the document)
966 | :arg stop_words: A list of stop words to be ignored
967 | """
968 | _, data = yield self.transport.perform_request(
969 | 'GET', _make_path(index, doc_type, id, '_mlt'),
970 | params=params, body=body)
971 | raise gen.Return(data)
972 |
--------------------------------------------------------------------------------