├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.rst ├── requirements.txt ├── setup.cfg ├── setup.py ├── test-requirements.txt └── tornado_elasticsearch.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.egg-info 3 | .coverage 4 | .idea 5 | .DS_Store 6 | build 7 | dist 8 | tests/cover 9 | cover 10 | atlassian-ide-plugin.xml 11 | docs/_build 12 | MANIFEST 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013-2017 Gavin M. Roy 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | * Neither the name of the tornado_elasticsearch library nor the names of its 13 | contributors may be used to endorse or promote products derived from this 14 | software without specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 20 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 23 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 24 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 25 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.rst 3 | requirements.txt 4 | test-requirements.txt 5 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | tornado_elasticsearch 2 | ===================== 3 | Extends the official Elasticsearch Python API adding Tornado AsyncHTTPClient 4 | support. 5 | 6 | |Version| |LICENSE| 7 | 8 | Installation 9 | ------------ 10 | tornado_elasticsearch is available on the Python Package Index and can be 11 | installed using pip or easy_install: 12 | 13 | .. code-block:: sh 14 | 15 | pip install tornado_elasticsearch 16 | 17 | Example Request Handlers 18 | ------------------------ 19 | .. code-block:: python 20 | 21 | from tornado import gen 22 | from tornado import web 23 | from tornado_elasticsearch import AsyncElasticsearch 24 | import uuid 25 | 26 | class Example(web.RequestHandler): 27 | 28 | def initialize(self): 29 | self.es = AsyncElasticsearch() 30 | 31 | @web.asynchronous 32 | @gen.engine 33 | def delete(self, *args, **kwargs): 34 | result = yield self.es.delete(index='test-index', doc_type='tweet', 35 | id=self.get_argument('id')) 36 | self.finish(result) 37 | 38 | @web.asynchronous 39 | @gen.engine 40 | def get(self, *args, **kwargs): 41 | if self.get_argument('id', None): 42 | result = yield self.es.get(index='test-index', doc_type='tweet', 43 | id=self.get_argument('id')) 44 | else: 45 | result = yield self.es.search(index='test-index') 46 | self.finish(result) 47 | 48 | @web.asynchronous 49 | @gen.engine 50 | def post(self, *args, **kwargs): 51 | doc = { 52 | 'author': self.get_current_user() or 'Unknown', 53 | 'text': self.get_argument('text'), 54 | 'timestamp': datetime.datetime.now() 55 | } 56 | result = yield self.es.index(index='test-index', 57 | doc_type='tweet', 58 | body=doc, 59 | id=str(uuid.uuid4())) 60 | self.finish(result) 61 | 62 | 63 | class Info(web.RequestHandler): 64 | 65 | @web.asynchronous 66 | @gen.engine 67 | def get(self, *args, **kwargs): 68 | es = AsyncElasticsearch() 69 | info = yield es.info() 70 | self.finish(info) 71 | 72 | 73 | Version History 74 | --------------- 75 | - 0.5.0: 76 | - Bugfixes: 77 | - HTTP Auth 78 | - Add timeout support 79 | - Allow scroll to use post, since scroll_id can be too long 80 | - Fix yield issue 81 | - Add max_clients to AsyncElasticSearch constructor 82 | - Added get_alias 83 | - Added get_mapping 84 | - Add cluster health 85 | - 0.4.0: Bugfix: Python3 decoding issues 86 | - 0.3.0: Bugfix: Add body to log_request_fail call (#1) 87 | - 0.2.0: Bugfix: force method to POST if GET and body passed 88 | - 0.1.0: Initial version 89 | 90 | .. |Version| image:: https://img.shields.io/pypi/v/tornado_elasticsearch.svg? 91 | :target: http://badge.fury.io/py/tornado_elasticsearch 92 | 93 | .. |License| image:: https://img.shields.io/pypi/l/tornado_elasticsearch.svg? 94 | :target: https://tornado_elasticsearch.readthedocs.org 95 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tornado>4 2 | elasticsearch>=2.3 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [nosetests] 2 | with-coverage=1 3 | cover-package=tornado_elasticsearch 4 | cover-branches=1 5 | cover-erase=1 6 | 7 | [wheel] 8 | universal=1 9 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | setuptools.setup( 4 | name='tornado_elasticsearch', 5 | version='0.5.0', 6 | description=('Extends the official Elasticsearch Python API adding ' 7 | 'Tornado AsyncHTTPClient support'), 8 | long_description=open('README.rst').read(), 9 | author='Gavin M. Roy', 10 | author_email='gavinmroy@gmail.com', 11 | url='https://github.com/gmr/tornado_elasticsearch', 12 | py_modules=['tornado_elasticsearch'], 13 | install_requires=['elasticsearch', 'tornado'], 14 | license='BSD', 15 | classifiers=[ 16 | 'Development Status :: 3 - Alpha', 17 | 'Intended Audience :: Developers', 18 | 'License :: OSI Approved :: BSD License', 19 | 'Operating System :: OS Independent', 20 | 'Programming Language :: Python :: 2', 21 | 'Programming Language :: Python :: 2.6', 22 | 'Programming Language :: Python :: 2.7', 23 | 'Programming Language :: Python :: 3', 24 | 'Programming Language :: Python :: 3.2', 25 | 'Programming Language :: Python :: 3.3', 26 | 'Programming Language :: Python :: Implementation :: CPython', 27 | 'Programming Language :: Python :: Implementation :: PyPy', 28 | 'Topic :: Communications', 29 | 'Topic :: Internet', 30 | 'Topic :: Software Development :: Libraries' 31 | ], 32 | zip_safe=True) 33 | -------------------------------------------------------------------------------- /test-requirements.txt: -------------------------------------------------------------------------------- 1 | coverage 2 | codecov 3 | nose 4 | mock 5 | yapf 6 | pep8 7 | -r requirements.txt 8 | -------------------------------------------------------------------------------- /tornado_elasticsearch.py: -------------------------------------------------------------------------------- 1 | """tornado_elasticsearch extends the official elasticsearch library adding 2 | asynchronous support for the Tornado stack. 3 | 4 | See http://elasticsearch-py.readthedocs.org/en/latest/ for information 5 | on how to use the API beyond the introduction for how to use with Tornado:: 6 | 7 | from tornado import gen 8 | from tornado import web 9 | from tornado_elasticsearch import AsyncElasticsearch 10 | 11 | 12 | class Info(web.RequestHandler): 13 | 14 | @web.asynchronous 15 | @gen.engine 16 | def get(self, *args, **kwargs): 17 | es = AsyncElasticsearch() 18 | info = yield es.info() 19 | self.finish(info) 20 | 21 | """ 22 | from elasticsearch.connection.base import Connection 23 | from elasticsearch import exceptions 24 | from elasticsearch.client import Elasticsearch 25 | from elasticsearch.transport import Transport, TransportError 26 | from elasticsearch.client.utils import query_params, _make_path 27 | 28 | from tornado import concurrent 29 | from tornado import gen 30 | from tornado import httpclient 31 | import logging 32 | import time 33 | try: 34 | from urllib import urlencode 35 | except ImportError: 36 | from urllib.parse import urlencode 37 | from tornado import version 38 | 39 | __version__ = '0.5.0' 40 | 41 | LOGGER = logging.getLogger(__name__) 42 | 43 | 44 | class AsyncHttpConnection(Connection): 45 | """Add Tornado Asynchronous support to ElasticSearch. 46 | 47 | :param str host: The host for the connection 48 | :param int port: The port for the connection 49 | :param str|tuple http_auth: optional http auth information as either a 50 | colon delimited string ``("username:password")`` or 51 | tuple ``(username, password)`` 52 | :param int request_timeout: optional default timeout in seconds 53 | :arg use_ssl: use ssl for the connection if ``True`` 54 | 55 | """ 56 | _auth_user = None 57 | _auth_password = None 58 | _user_agent = 'tornado_elasticsearch %s/Tornado %s' % (__version__, version) 59 | ssl_transport_schema = 'https' 60 | 61 | def __init__(self, host='localhost', port=9200, http_auth=None, 62 | use_ssl=False, request_timeout=None, max_clients=10, **kwargs): 63 | super(AsyncHttpConnection, self).__init__(host=host, port=port, 64 | **kwargs) 65 | self._assign_auth_values(http_auth) 66 | self.base_url = '%s://%s:%s%s' % (self.ssl_transport_schema if use_ssl 67 | else self.transport_schema, 68 | host, port, self.url_prefix) 69 | httpclient.AsyncHTTPClient.configure(None, max_clients=max_clients) 70 | self._client = httpclient.AsyncHTTPClient() 71 | self._headers = {'Content-Type': 'application/json; charset=UTF-8'} 72 | self._start_time = None 73 | self.request_timeout = request_timeout 74 | 75 | @concurrent.return_future 76 | def perform_request(self, method, url, params=None, body=None, 77 | timeout=None, ignore=(), callback=None): 78 | request_uri = self._request_uri(url, params) 79 | LOGGER.debug('%s, %r, %r', url, body, params) 80 | kwargs = self._request_kwargs(method, body, timeout) 81 | self._start_time = time.time() 82 | 83 | def on_response(response): 84 | duration = time.time() - self._start_time 85 | raw_data = response.body.decode('utf-8') \ 86 | if response.body is not None else None 87 | LOGGER.info('Response from %s: %s', url, response.code) 88 | if not (200 <= response.code < 300) and \ 89 | response.code not in ignore: 90 | LOGGER.debug('Error: %r', raw_data) 91 | self.log_request_fail(method, request_uri, url, body, duration, 92 | response.code) 93 | error = exceptions.HTTP_EXCEPTIONS.get(response.code, 94 | TransportError) 95 | raise error(response.code, raw_data) 96 | self.log_request_success(method, request_uri, url, body, 97 | response.code, raw_data, duration) 98 | callback((response.code, response.headers, raw_data)) 99 | 100 | LOGGER.debug('Fetching [%s] %s', kwargs['method'], request_uri) 101 | LOGGER.debug('kwargs: %r', kwargs) 102 | self._client.fetch(httpclient.HTTPRequest(request_uri, **kwargs), 103 | callback=on_response) 104 | 105 | def _assign_auth_values(self, http_auth): 106 | """Take the http_auth value and split it into the attributes that 107 | carry the http auth username and password 108 | 109 | :param str|tuple http_auth: The http auth value 110 | 111 | """ 112 | if not http_auth: 113 | pass 114 | elif isinstance(http_auth, (tuple, list)): 115 | self._auth_user, self._auth_password = http_auth 116 | elif isinstance(http_auth, str): 117 | self._auth_user, self._auth_password = http_auth.split(':') 118 | else: 119 | raise ValueError('HTTP Auth Credentials should be str or ' 120 | 'tuple, not %s' % type(http_auth)) 121 | 122 | def _request_kwargs(self, method, body, timeout): 123 | if body and method == 'GET': 124 | method = 'POST' 125 | kwargs = {'method': method, 'user_agent': self._user_agent, 126 | 'headers': self._headers} 127 | if self.request_timeout is not None: 128 | kwargs['request_timeout'] = self.request_timeout 129 | if self._auth_user and self._auth_password: 130 | kwargs['auth_username'] = self._auth_user 131 | kwargs['auth_password'] = self._auth_password 132 | if body: 133 | kwargs['body'] = body 134 | if timeout: 135 | kwargs['request_timeout'] = timeout 136 | 137 | kwargs['allow_nonstandard_methods'] = True 138 | return kwargs 139 | 140 | def _request_uri(self, url, params): 141 | uri = self.url_prefix + url 142 | if params: 143 | uri = '%s?%s' % (uri, urlencode(params or {})) 144 | return '%s%s' % (self.base_url, uri) 145 | 146 | 147 | class AsyncTransport(Transport): 148 | 149 | @gen.coroutine 150 | def perform_request(self, method, url, params=None, body=None): 151 | """Perform the actual request. Retrieve a connection from the 152 | connection pool, pass all the information to it's perform_request 153 | method and return the data. 154 | 155 | If an exception was raised, mark the connection as failed and retry (up 156 | to `max_retries` times). 157 | 158 | If the operation was successful and the connection used was previously 159 | marked as dead, mark it as live, resetting it's failure count. 160 | 161 | :param method: HTTP method to use 162 | :param url: absolute url (without host) to target 163 | :param params: dictionary of query parameters, will be handed over to 164 | the underlying :class:`~torando_elasticsearch.AsyncHTTPConnection` 165 | class for serialization 166 | :param body: body of the request, will be serialized using serializer 167 | and passed to the connection 168 | 169 | """ 170 | if body is not None: 171 | body = self.serializer.dumps(body) 172 | 173 | # some clients or environments don't support sending GET with body 174 | if method in ('HEAD', 'GET') and self.send_get_body_as != 'GET': 175 | # send it as post instead 176 | if self.send_get_body_as == 'POST': 177 | method = 'POST' 178 | 179 | # or as source parameter 180 | elif self.send_get_body_as == 'source': 181 | if params is None: 182 | params = {} 183 | params['source'] = body 184 | body = None 185 | 186 | if body is not None: 187 | try: 188 | body = body.encode('utf-8') 189 | except (UnicodeDecodeError, AttributeError): 190 | # bytes/str - no need to re-encode 191 | pass 192 | 193 | ignore = () 194 | if params and 'ignore' in params: 195 | ignore = params.pop('ignore') 196 | if isinstance(ignore, int): 197 | ignore = (ignore, ) 198 | 199 | for attempt in range(self.max_retries + 1): 200 | connection = self.get_connection() 201 | try: 202 | result = yield connection.perform_request(method, url, 203 | params, body, 204 | ignore=ignore) 205 | (status, headers, data) = result 206 | except TransportError as e: 207 | retry = False 208 | if isinstance(e, exceptions.ConnectionTimeout): 209 | retry = self.retry_on_timeout 210 | elif isinstance(e, exceptions.ConnectionError): 211 | retry = True 212 | elif e.status_code in self.retry_on_status: 213 | retry = True 214 | 215 | if retry: 216 | # only mark as dead if we are retrying 217 | self.mark_dead(connection) 218 | # raise exception on last retry 219 | if attempt == self.max_retries: 220 | raise 221 | else: 222 | raise 223 | 224 | else: 225 | # connection didn't fail, confirm it's live status 226 | self.connection_pool.mark_live(connection) 227 | response = self.deserializer.loads(data, 228 | headers.get('content-type') 229 | ) if data else None 230 | raise gen.Return((status, response)) 231 | 232 | 233 | class AsyncElasticsearch(Elasticsearch): 234 | """Extends the official elasticsearch.Elasticsearch object to make the 235 | client invoked methods coroutines. 236 | 237 | """ 238 | def __init__(self, hosts=None, **kwargs): 239 | """Create a new AsyncElasticsearch instance 240 | 241 | """ 242 | kwargs['connection_class'] = AsyncHttpConnection 243 | kwargs['transport_class'] = AsyncTransport 244 | super(AsyncElasticsearch, self).__init__(hosts, **kwargs) 245 | 246 | @gen.coroutine 247 | @query_params() 248 | def ping(self, params=None): 249 | """ Returns True if the cluster is up, False otherwise. """ 250 | try: 251 | self.transport.perform_request('HEAD', '/', params=params) 252 | except TransportError: 253 | raise gen.Return(False) 254 | raise gen.Return(True) 255 | 256 | @gen.coroutine 257 | @query_params() 258 | def info(self, params=None): 259 | """Get the basic info from the current cluster. 260 | 261 | :rtype: dict 262 | 263 | """ 264 | _, data = yield self.transport.perform_request('GET', '/', 265 | params=params) 266 | raise gen.Return(data) 267 | 268 | @gen.coroutine 269 | def health(self, params=None): 270 | """Coroutine. Queries cluster Health API. 271 | 272 | Returns a 2-tuple, where first element is request status, and second 273 | element is a dictionary with response data. 274 | 275 | :param params: dictionary of query parameters, will be handed over to 276 | the underlying :class:`~torando_elasticsearch.AsyncHTTPConnection` 277 | class for serialization 278 | 279 | """ 280 | status, data = yield self.transport.perform_request( 281 | "GET", "/_cluster/health", params=params) 282 | raise gen.Return((status, data)) 283 | 284 | @gen.coroutine 285 | @query_params('consistency', 'id', 'parent', 'percolate', 'refresh', 286 | 'replication', 'routing', 'timeout', 'timestamp', 'ttl', 287 | 'version', 'version_type') 288 | def create(self, index, doc_type, body, id=None, params=None): 289 | """ 290 | Adds a typed JSON document in a specific index, making it searchable. 291 | Behind the scenes this method calls index(..., op_type='create') 292 | ``_ 293 | 294 | :arg index: The name of the index 295 | :arg doc_type: The type of the document 296 | :arg id: Document ID 297 | :arg body: The document 298 | :arg consistency: Explicit write consistency setting for the operation 299 | :arg id: Specific document ID (when the POST method is used) 300 | :arg parent: ID of the parent document 301 | :arg percolate: Percolator queries to execute while indexing the doc 302 | :arg refresh: Refresh the index after performing the operation 303 | :arg replication: Specific replication type (default: sync) 304 | :arg routing: Specific routing value 305 | :arg timeout: Explicit operation timeout 306 | :arg timestamp: Explicit timestamp for the document 307 | :arg ttl: Expiration time for the document 308 | :arg version: Explicit version number for concurrency control 309 | :arg version_type: Specific version type 310 | """ 311 | result = yield self.index(index, doc_type, body, id=id, params=params, 312 | op_type='create') 313 | raise gen.Return(result) 314 | 315 | @gen.coroutine 316 | @query_params('consistency', 'op_type', 'parent', 'percolate', 'refresh', 317 | 'replication', 'routing', 'timeout', 'timestamp', 'ttl', 318 | 'version', 'version_type') 319 | def index(self, index, doc_type, body, id=None, params=None): 320 | """ 321 | Adds or updates a typed JSON document in a specific index, making it 322 | searchable. ``_ 323 | 324 | :arg index: The name of the index 325 | :arg doc_type: The type of the document 326 | :arg body: The document 327 | :arg id: Document ID 328 | :arg consistency: Explicit write consistency setting for the operation 329 | :arg op_type: Explicit operation type (default: index) 330 | :arg parent: ID of the parent document 331 | :arg percolate: Percolator queries to execute while indexing the doc 332 | :arg refresh: Refresh the index after performing the operation 333 | :arg replication: Specific replication type (default: sync) 334 | :arg routing: Specific routing value 335 | :arg timeout: Explicit operation timeout 336 | :arg timestamp: Explicit timestamp for the document 337 | :arg ttl: Expiration time for the document 338 | :arg version: Explicit version number for concurrency control 339 | :arg version_type: Specific version type 340 | 341 | """ 342 | _, data = yield self.transport.perform_request( 343 | 'PUT' if id else 'POST', _make_path(index, doc_type, id), 344 | params=params, body=body) 345 | raise gen.Return(data) 346 | 347 | @gen.coroutine 348 | @query_params('parent', 'preference', 'realtime', 'refresh', 'routing') 349 | def exists(self, index, id, doc_type='_all', params=None): 350 | """ 351 | Returns a boolean indicating whether or not given document exists in 352 | Elasticsearch. ``_ 353 | 354 | :arg index: The name of the index 355 | :arg id: The document ID 356 | :arg doc_type: The type of the document (uses `_all` by default to 357 | fetch the first document matching the ID across all types) 358 | :arg parent: The ID of the parent document 359 | :arg preference: Specify the node or shard the operation should be 360 | performed on (default: random) 361 | :arg realtime: Specify whether to perform the operation in realtime or 362 | search mode 363 | :arg refresh: Refresh the shard containing the document before 364 | performing the operation 365 | :arg routing: Specific routing value 366 | """ 367 | try: 368 | self.transport.perform_request( 369 | 'HEAD', _make_path(index, doc_type, id), params=params) 370 | except exceptions.NotFoundError: 371 | return gen.Return(False) 372 | raise gen.Return(True) 373 | 374 | @gen.coroutine 375 | @query_params('_source', '_source_exclude', '_source_include', 'fields', 376 | 'parent', 'preference', 'realtime', 'refresh', 'routing') 377 | def get(self, index, id, doc_type='_all', params=None): 378 | """ 379 | Get a typed JSON document from the index based on its id. 380 | ``_ 381 | 382 | :arg index: The name of the index 383 | :arg id: The document ID 384 | :arg doc_type: The type of the document (uses `_all` by default to 385 | fetch the first document matching the ID across all types) 386 | :arg _source: True or false to return the _source field or not, or a 387 | list of fields to return 388 | :arg _source_exclude: A list of fields to exclude from the returned 389 | _source field 390 | :arg _source_include: A list of fields to extract and return from the 391 | _source field 392 | :arg fields: A comma-separated list of fields to return in the response 393 | :arg parent: The ID of the parent document 394 | :arg preference: Specify the node or shard the operation should be 395 | performed on (default: random) 396 | :arg realtime: Specify whether to perform the operation in realtime or 397 | search mode 398 | :arg refresh: Refresh the shard containing the document before 399 | performing the operation 400 | :arg routing: Specific routing value 401 | """ 402 | _, data = yield self.transport.perform_request( 403 | 'GET', _make_path(index, doc_type, id), params=params) 404 | raise gen.Return(data) 405 | 406 | @gen.coroutine 407 | @query_params('allow_no_indices', 'expand_wildcards', 'ignore_unavailable', 408 | 'local') 409 | def get_alias(self, index=None, name=None, params=None): 410 | """ 411 | Retrieve a specified alias. 412 | ``_ 413 | :arg index: A comma-separated list of index names to filter aliases 414 | :arg name: A comma-separated list of alias names to return 415 | :arg allow_no_indices: Whether to ignore if a wildcard indices 416 | expression resolves into no concrete indices. (This includes `_all` 417 | string or when no indices have been specified) 418 | :arg expand_wildcards: Whether to expand wildcard expression to 419 | concrete indices that are open, closed or both., default 'all', 420 | valid choices are: 'open', 'closed', 'none', 'all' 421 | :arg ignore_unavailable: Whether specified concrete indices should be 422 | ignored when unavailable (missing or closed) 423 | :arg local: Return local information, do not retrieve the state from 424 | master node (default: false) 425 | """ 426 | _, result = yield self.transport.perform_request( 427 | 'GET', _make_path(index, '_alias', name), params=params) 428 | raise gen.Return(result) 429 | 430 | @gen.coroutine 431 | @query_params('_source_exclude', '_source_include', 'parent', 'preference', 432 | 'realtime', 'refresh', 'routing') 433 | def get_source(self, index, id, doc_type='_all', params=None): 434 | """ 435 | Get the source of a document by it's index, type and id. 436 | ``_ 437 | 438 | :arg index: The name of the index 439 | :arg doc_type: The type of the document (uses `_all` by default to 440 | fetch the first document matching the ID across all types) 441 | :arg id: The document ID 442 | :arg exclude: A list of fields to exclude from the returned 443 | _source field 444 | :arg include: A list of fields to extract and return from the 445 | _source field 446 | :arg parent: The ID of the parent document 447 | :arg preference: Specify the node or shard the operation should be 448 | performed on (default: random) 449 | :arg realtime: Specify whether to perform the operation in realtime or 450 | search mode 451 | :arg refresh: Refresh the shard containing the document before 452 | performing the operation 453 | :arg routing: Specific routing value 454 | """ 455 | _, data = yield self.transport.perform_request( 456 | 'GET', _make_path(index, doc_type, id, '_source'), params=params) 457 | raise gen.Return(data) 458 | 459 | @gen.coroutine 460 | @query_params('_source', '_source_exclude', '_source_include', 'fields', 461 | 'parent', 'preference', 'realtime', 'refresh', 'routing') 462 | def mget(self, body, index=None, doc_type=None, params=None): 463 | """ 464 | Get multiple documents based on an index, type (optional) and ids. 465 | ``_ 466 | 467 | :arg body: Document identifiers; can be either `docs` (containing full 468 | document information) or `ids` (when index and type is provided 469 | in the URL. 470 | :arg index: The name of the index 471 | :arg doc_type: The type of the document 472 | :arg _source: True or false to return the _source field or not, or a 473 | list of fields to return 474 | :arg _source_exclude: A list of fields to exclude from the returned 475 | _source field 476 | :arg _source_include: A list of fields to extract and return from the 477 | _source field 478 | :arg fields: A comma-separated list of fields to return in the response 479 | :arg parent: The ID of the parent document 480 | :arg preference: Specify the node or shard the operation should be 481 | performed on (default: random) 482 | :arg realtime: Specify whether to perform the operation in realtime or 483 | search mode 484 | :arg refresh: Refresh the shard containing the document before 485 | performing the operation 486 | :arg routing: Specific routing value 487 | """ 488 | _, data = yield self.transport.perform_request( 489 | 'GET', _make_path(index, doc_type, '_mget'), 490 | params=params, body=body) 491 | raise gen.Return(data) 492 | 493 | @gen.coroutine 494 | @query_params('consistency', 'fields', 'lang', 'parent', 'percolate', 495 | 'refresh', 'replication', 'retry_on_conflict', 'routing', 496 | 'script', 'timeout', 'timestamp', 'ttl', 'version', 497 | 'version_type') 498 | def update(self, index, doc_type, id, body=None, params=None): 499 | """ 500 | Update a document based on a script or partial data provided. 501 | ``_ 502 | 503 | :arg index: The name of the index 504 | :arg doc_type: The type of the document 505 | :arg id: Document ID 506 | :arg body: The request definition using either `script` or partial `doc` 507 | :arg consistency: Explicit write consistency setting for the operation 508 | :arg fields: A comma-separated list of fields to return in the response 509 | :arg lang: The script language (default: mvel) 510 | :arg parent: ID of the parent document 511 | :arg percolate: Perform percolation during the operation; use specific 512 | registered query name, attribute, or wildcard 513 | :arg refresh: Refresh the index after performing the operation 514 | :arg replication: Specific replication type (default: sync) 515 | :arg retry_on_conflict: Specify how many times should the operation be 516 | retried when a conflict occurs (default: 0) 517 | :arg routing: Specific routing value 518 | :arg script: The URL-encoded script definition (instead of using 519 | request body) 520 | :arg timeout: Explicit operation timeout 521 | :arg timestamp: Explicit timestamp for the document 522 | :arg ttl: Expiration time for the document 523 | :arg version: Explicit version number for concurrency control 524 | :arg version_type: Explicit version number for concurrency control 525 | """ 526 | _, data = yield self.transport.perform_request('POST', 527 | _make_path(index, 528 | doc_type, id, 529 | '_update'), 530 | params=params, body=body) 531 | raise gen.Return(data) 532 | 533 | @gen.coroutine 534 | @query_params('_source', '_source_exclude', '_source_include', 535 | 'analyze_wildcard', 'analyzer', 'default_operator', 'df', 536 | 'explain', 'fields', 'ignore_indices', 'indices_boost', 537 | 'lenient', 'lowercase_expanded_terms', 'from_', 'preference', 538 | 'q', 'routing', 'scroll', 'search_type', 'size', 'sort', 539 | 'source', 'stats', 'suggest_field', 'suggest_mode', 540 | 'suggest_size', 'suggest_text', 'timeout', 'version') 541 | def search(self, index=None, doc_type=None, body=None, params=None): 542 | """ 543 | Execute a search query and get back search hits that match the query. 544 | ``_ 545 | 546 | :arg index: A comma-separated list of index names to search; use `_all` 547 | or empty string to perform the operation on all indices 548 | :arg doc_type: A comma-separated list of document types to search; 549 | leave empty to perform the operation on all types 550 | :arg body: The search definition using the Query DSL 551 | :arg _source: True or false to return the _source field or not, or a 552 | list of fields to return 553 | :arg _source_exclude: A list of fields to exclude from the returned 554 | _source field 555 | :arg _source_include: A list of fields to extract and return from the 556 | _source field 557 | :arg analyze_wildcard: Specify whether wildcard and prefix queries 558 | should be analyzed (default: false) 559 | :arg analyzer: The analyzer to use for the query string 560 | :arg default_operator: The default operator for query string query (AND 561 | or OR) (default: OR) 562 | :arg df: The field to use as default where no field prefix is given in 563 | the query string 564 | :arg explain: Specify whether to return detailed information about 565 | score computation as part of a hit 566 | :arg fields: A comma-separated list of fields to return as part of a hit 567 | :arg ignore_indices: When performed on multiple indices, allows to 568 | ignore `missing` ones (default: none) 569 | :arg indices_boost: Comma-separated list of index boosts 570 | :arg lenient: Specify whether format-based query failures (such as 571 | providing text to a numeric field) should be ignored 572 | :arg lowercase_expanded_terms: Specify whether query terms should be 573 | lowercased 574 | :arg from_: Starting offset (default: 0) 575 | :arg preference: Specify the node or shard the operation should be 576 | performed on (default: random) 577 | :arg q: Query in the Lucene query string syntax 578 | :arg routing: A comma-separated list of specific routing values 579 | :arg scroll: Specify how long a consistent view of the index should be 580 | maintained for scrolled search 581 | :arg search_type: Search operation type 582 | :arg size: Number of hits to return (default: 10) 583 | :arg sort: A comma-separated list of : pairs 584 | :arg source: The URL-encoded request definition using the Query DSL 585 | (instead of using request body) 586 | :arg stats: Specific 'tag' of the request for logging and statistical 587 | purposes 588 | :arg suggest_field: Specify which field to use for suggestions 589 | :arg suggest_mode: Specify suggest mode (default: missing) 590 | :arg suggest_size: How many suggestions to return in response 591 | :arg suggest_text: The source text for which the suggestions should be 592 | returned 593 | :arg timeout: Explicit operation timeout 594 | :arg version: Specify whether to return document version as part of a 595 | hit 596 | """ 597 | # from is a reserved word so it cannot be used, use from_ instead 598 | if 'from_' in params: 599 | params['from'] = params.pop('from_') 600 | 601 | if doc_type and not index: 602 | index = '_all' 603 | _, data = yield self.transport.perform_request('GET', 604 | _make_path(index, 605 | doc_type, 606 | '_search'), 607 | params=params, 608 | body=body) 609 | raise gen.Return(data) 610 | 611 | @gen.coroutine 612 | @query_params('_source', '_source_exclude', '_source_include', 613 | 'analyze_wildcard', 'analyzer', 'default_operator', 614 | 'df', 'fields', 'lenient', 'lowercase_expanded_terms', 615 | 'parent', 'preference', 'q', 'routing', 'source') 616 | def explain(self, index, doc_type, id, body=None, params=None): 617 | """ 618 | The explain api computes a score explanation for a query and a specific 619 | document. This can give useful feedback whether a document matches or 620 | didn't match a specific query. 621 | ``_ 622 | 623 | :arg index: The name of the index 624 | :arg doc_type: The type of the document 625 | :arg id: The document ID 626 | :arg body: The query definition using the Query DSL 627 | :arg _source: True or false to return the _source field or not, or a 628 | list of fields to return 629 | :arg _source_exclude: A list of fields to exclude from the returned 630 | _source field 631 | :arg _source_include: A list of fields to extract and return from the 632 | _source field 633 | :arg analyze_wildcard: Specify whether wildcards and prefix queries in 634 | the query string query should be analyzed (default: false) 635 | :arg analyzer: The analyzer for the query string query 636 | :arg default_operator: The default operator for query string query (AND 637 | or OR), (default: OR) 638 | :arg df: The default field for query string query (default: _all) 639 | :arg fields: A comma-separated list of fields to return in the response 640 | :arg lenient: Specify whether format-based query failures (such as 641 | providing text to a numeric field) should be ignored 642 | :arg lowercase_expanded_terms: Specify whether query terms should be 643 | lowercased 644 | :arg parent: The ID of the parent document 645 | :arg preference: Specify the node or shard the operation should be 646 | performed on (default: random) 647 | :arg q: Query in the Lucene query string syntax 648 | :arg routing: Specific routing value 649 | :arg source: The URL-encoded query definition (instead of using the 650 | request body) 651 | """ 652 | _, data = yield self.transport.perform_request('GET', 653 | _make_path(index, 654 | doc_type, id, 655 | '_explain'), 656 | params=params, body=body) 657 | raise gen.Return(data) 658 | 659 | @gen.coroutine 660 | @query_params() 661 | def scroll(self, scroll_id, scroll, params=None): 662 | """ 663 | Scroll a search request created by specifying the scroll parameter. 664 | ``_ 665 | 666 | :arg scroll_id: The scroll ID 667 | :arg scroll: Specify how long a consistent view of the index should be 668 | maintained for scrolled search 669 | """ 670 | body = { 671 | "scroll": scroll, 672 | "scroll_id": scroll_id 673 | } 674 | 675 | if params: 676 | if "scroll" in params.keys(): 677 | params.pop("scroll") 678 | if "scroll_id" in params.keys(): 679 | params.pop("scroll_id") 680 | 681 | _, data = yield self.transport.perform_request('POST', 682 | _make_path('_search', 683 | 'scroll'), 684 | body=body, 685 | params=params) 686 | raise gen.Return(data) 687 | 688 | @gen.coroutine 689 | @query_params() 690 | def clear_scroll(self, scroll_id, params=None): 691 | """ 692 | Clear the scroll request created by specifying the scroll parameter to 693 | search. 694 | ``_ 695 | 696 | :arg scroll_id: The scroll ID or a list of scroll IDs 697 | """ 698 | if not isinstance(scroll_id, list): 699 | scroll_id = [scroll_id] 700 | 701 | body = { 702 | "scroll_id": scroll_id 703 | } 704 | 705 | if params and "scroll_id" in params.keys(): 706 | params.pop("scroll_id") 707 | 708 | _, data = yield self.transport.perform_request('DELETE', 709 | _make_path('_search', 710 | 'scroll'), 711 | body=body, 712 | params=params) 713 | raise gen.Return(data) 714 | 715 | @gen.coroutine 716 | @query_params('consistency', 'parent', 'refresh', 'replication', 'routing', 717 | 'timeout', 'version', 'version_type') 718 | def delete(self, index, doc_type, id, params=None): 719 | """ 720 | Delete a typed JSON document from a specific index based on its id. 721 | ``_ 722 | 723 | :arg index: The name of the index 724 | :arg doc_type: The type of the document 725 | :arg id: The document ID 726 | :arg consistency: Specific write consistency setting for the operation 727 | :arg parent: ID of parent document 728 | :arg refresh: Refresh the index after performing the operation 729 | :arg replication: Specific replication type (default: sync) 730 | :arg routing: Specific routing value 731 | :arg timeout: Explicit operation timeout 732 | :arg version: Explicit version number for concurrency control 733 | :arg version_type: Specific version type 734 | """ 735 | _, data = yield self.transport.perform_request('DELETE', 736 | _make_path(index, 737 | doc_type, id), 738 | params=params) 739 | raise gen.Return(data) 740 | 741 | @gen.coroutine 742 | @query_params('ignore_indices', 'min_score', 'preference', 'routing', 743 | 'source') 744 | def count(self, index=None, doc_type=None, body=None, params=None): 745 | """ 746 | Execute a query and get the number of matches for that query. 747 | ``_ 748 | 749 | :arg index: A comma-separated list of indices to restrict the results 750 | :arg doc_type: A comma-separated list of types to restrict the results 751 | :arg body: A query to restrict the results (optional) 752 | :arg ignore_indices: When performed on multiple indices, allows to 753 | ignore `missing` ones (default: none) 754 | :arg min_score: Include only documents with a specific `_score` value 755 | in the result 756 | :arg preference: Specify the node or shard the operation should be 757 | performed on (default: random) 758 | :arg routing: Specific routing value 759 | :arg source: The URL-encoded query definition (instead of using the 760 | request body) 761 | """ 762 | _, data = yield self.transport.perform_request('POST', 763 | _make_path(index, 764 | doc_type, 765 | '_count'), 766 | params=params, body=body) 767 | raise gen.Return(data) 768 | 769 | @gen.coroutine 770 | @query_params('consistency', 'refresh', 'replication') 771 | def bulk(self, body, index=None, doc_type=None, params=None): 772 | """ 773 | Perform many index/delete operations in a single API call. 774 | ``_ 775 | 776 | See the :func:`~elasticsearch.helpers.bulk_index` for a more friendly 777 | API. 778 | 779 | :arg body: The operation definition and data (action-data pairs) 780 | :arg index: Default index for items which don't provide one 781 | :arg doc_type: Default document type for items which don't provide one 782 | :arg consistency: Explicit write consistency setting for the operation 783 | :arg refresh: Refresh the index after performing the operation 784 | :arg replication: Explicitly set the replication type (efault: sync) 785 | """ 786 | _, data = yield self.transport.perform_request('POST', 787 | _make_path(index, 788 | doc_type, 789 | '_bulk'), 790 | params=params, 791 | body=self._bulk_body(body)) 792 | raise gen.Return(data) 793 | 794 | @gen.coroutine 795 | @query_params('search_type') 796 | def msearch(self, body, index=None, doc_type=None, params=None): 797 | """ 798 | Execute several search requests within the same API. 799 | ``_ 800 | 801 | :arg body: The request definitions (metadata-search request definition 802 | pairs), separated by newlines 803 | :arg index: A comma-separated list of index names to use as default 804 | :arg doc_type: A comma-separated list of document types to use as default 805 | :arg search_type: Search operation type 806 | """ 807 | _, data = yield self.transport.perform_request('GET', 808 | _make_path(index, 809 | doc_type, 810 | '_msearch'), 811 | params=params, 812 | body=self._bulk_body(body)) 813 | raise gen.Return(data) 814 | 815 | @gen.coroutine 816 | @query_params('consistency', 'ignore_indices', 'replication', 'routing', 817 | 'source', 'timeout', 'q') 818 | def delete_by_query(self, index, doc_type=None, body=None, params=None): 819 | """ 820 | Delete documents from one or more indices and one or more types based 821 | on a query. 822 | ``_ 823 | 824 | :arg index: A comma-separated list of indices to restrict the operation 825 | :arg doc_type: A comma-separated list of types to restrict the operation 826 | :arg body: A query to restrict the operation 827 | :arg consistency: Specific write consistency setting for the operation 828 | :arg ignore_indices: When performed on multiple indices, allows to 829 | ignore `missing` ones (default: none) 830 | :arg replication: Specific replication type (default: sync) 831 | :arg routing: Specific routing value 832 | :arg source: The URL-encoded query definition (instead of using the 833 | request body) 834 | :arg q: Query in the Lucene query string syntax 835 | :arg timeout: Explicit operation timeout 836 | """ 837 | _, data = yield self.transport.perform_request('DELETE', 838 | _make_path(index, 839 | doc_type, 840 | '_query'), 841 | params=params, body=body) 842 | raise gen.Return(data) 843 | 844 | @gen.coroutine 845 | @query_params('allow_no_indices', 'expand_wildcards', 'ignore_unavailable', 846 | 'local') 847 | def get_mapping(self, index=None, doc_type=None, params=None): 848 | """ 849 | Retrieve mapping definition of index or index/type. 850 | ``_ 851 | :arg index: A comma-separated list of index names 852 | :arg doc_type: A comma-separated list of document types 853 | :arg allow_no_indices: Whether to ignore if a wildcard indices 854 | expression resolves into no concrete indices. (This includes `_all` 855 | string or when no indices have been specified) 856 | :arg expand_wildcards: Whether to expand wildcard expression to concrete 857 | indices that are open, closed or both., default 'open', valid 858 | choices are: 'open', 'closed', 'none', 'all' 859 | :arg ignore_unavailable: Whether specified concrete indices should be 860 | ignored when unavailable (missing or closed) 861 | :arg local: Return local information, do not retrieve the state from 862 | master node (default: false) 863 | """ 864 | _, data = yield self.transport.perform_request('GET', 865 | _make_path(index, 866 | '_mapping', 867 | doc_type), 868 | params=params) 869 | raise gen.Return(data) 870 | 871 | @gen.coroutine 872 | @query_params('ignore_indices', 'preference', 'routing', 'source') 873 | def suggest(self, index=None, body=None, params=None): 874 | """ 875 | The suggest feature suggests similar looking terms based on a provided 876 | text by using a suggester. 877 | ``_ 878 | 879 | :arg index: A comma-separated list of index names to restrict the 880 | operation; use `_all` or empty string to perform the operation on 881 | all indices 882 | :arg body: The request definition 883 | :arg ignore_indices: When performed on multiple indices, allows to 884 | ignore `missing` ones (default: none) 885 | :arg preference: Specify the node or shard the operation should be 886 | performed on (default: random) 887 | :arg routing: Specific routing value 888 | :arg source: The URL-encoded request definition (instead of using 889 | request body) 890 | """ 891 | _, data = yield self.transport.perform_request('POST', 892 | _make_path(index, 893 | '_suggest'), 894 | params=params, body=body) 895 | raise gen.Return(data) 896 | 897 | @gen.coroutine 898 | @query_params('prefer_local') 899 | def percolate(self, index, doc_type, body, params=None): 900 | """ 901 | Send a percolate request which include a doc, and get back the queries 902 | that match on that doc out of the set of registered queries. 903 | ``_ 904 | 905 | :arg index: The name of the index with a registered percolator query 906 | :arg doc_type: The document type 907 | :arg body: The document (`doc`) to percolate against registered queries; 908 | optionally also a `query` to limit the percolation to specific 909 | registered queries 910 | :arg prefer_local: With `true`, specify that a local shard should be 911 | used if available, with `false`, use a random shard (default: true) 912 | """ 913 | _, data = yield self.transport.perform_request('GET', 914 | _make_path(index, 915 | doc_type, 916 | '_percolate'), 917 | params=params, body=body) 918 | raise gen.Return(data) 919 | 920 | @gen.coroutine 921 | @query_params('boost_terms', 'max_doc_freq', 'max_query_terms', 922 | 'max_word_len', 'min_doc_freq', 'min_term_freq', 923 | 'min_word_len', 'mlt_fields', 'percent_terms_to_match', 924 | 'routing', 'search_from', 'search_indices', 925 | 'search_query_hint', 'search_scroll', 'search_size', 926 | 'search_source', 'search_type', 'search_types', 'stop_words') 927 | def mlt(self, index, doc_type, id, body=None, params=None): 928 | """ 929 | Get documents that are "like" a specified document. 930 | ``_ 931 | 932 | :arg index: The name of the index 933 | :arg doc_type: The type of the document (use `_all` to fetch the first 934 | document matching the ID across all types) 935 | :arg id: The document ID 936 | :arg body: A specific search request definition 937 | :arg boost_terms: The boost factor 938 | :arg max_doc_freq: The word occurrence frequency as count: words with 939 | higher occurrence in the corpus will be ignored 940 | :arg max_query_terms: The maximum query terms to be included in the 941 | generated query 942 | :arg max_word_len: The minimum length of the word: longer words will 943 | be ignored 944 | :arg min_doc_freq: The word occurrence frequency as count: words with 945 | lower occurrence in the corpus will be ignored 946 | :arg min_term_freq: The term frequency as percent: terms with lower 947 | occurrence in the source document will be ignored 948 | :arg min_word_len: The minimum length of the word: shorter words will 949 | be ignored 950 | :arg mlt_fields: Specific fields to perform the query against 951 | :arg percent_terms_to_match: How many terms have to match in order to 952 | consider the document a match (default: 0.3) 953 | :arg routing: Specific routing value 954 | :arg search_from: The offset from which to return results 955 | :arg search_indices: A comma-separated list of indices to perform the 956 | query against (default: the index containing the document) 957 | :arg search_query_hint: The search query hint 958 | :arg search_scroll: A scroll search request definition 959 | :arg search_size: The number of documents to return (default: 10) 960 | :arg search_source: A specific search request definition (instead of 961 | using the request body) 962 | :arg search_type: Specific search type (eg. `dfs_then_fetch`, `count`, 963 | etc) 964 | :arg search_types: A comma-separated list of types to perform the query 965 | against (default: the same type as the document) 966 | :arg stop_words: A list of stop words to be ignored 967 | """ 968 | _, data = yield self.transport.perform_request( 969 | 'GET', _make_path(index, doc_type, id, '_mlt'), 970 | params=params, body=body) 971 | raise gen.Return(data) 972 | --------------------------------------------------------------------------------