├── .gitignore ├── LICENSE.txt ├── README.md ├── elasticsearch_parse ├── __init__.py ├── aggs.py ├── exceptions.py ├── faceted_search.py ├── filter.py ├── index.py ├── query.py ├── search.py ├── serializer.py └── utils.py ├── setup.py └── test.py /.gitignore: -------------------------------------------------------------------------------- 1 | .*.swp 2 | *~ 3 | *.py[co] 4 | .coverage 5 | *.egg-info 6 | dist 7 | build 8 | *.egg 9 | coverage.xml 10 | junit.xml 11 | test_elasticsearch_dsl/htmlcov 12 | docs/_build 13 | .cache 14 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 rui fengyun 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # elasticsearch_parse 2 | ================= 3 | 4 | 首先注明下本项目是由来,学习Elasticsearch本身是有些痛苦的, 我们可以熟练的编写SQL 、 Mongodb语句,但对于Elasticsearch Dsl语法总是蒙头,一个劲的翻找笔记... 5 | 简单说 Elasticsearch Parse可以让你更容易的上手ES, 他的功能主要是语句映射, 有点ORM的意思... 6 | 7 | 此项目继承于`elasticsearch-dsl-py`,本来是想fork子项目,开发好后提交pull request, 但是想到我这语法映射的功能, 对于官方来说不是很稀罕,索性直接砍掉了 70% 代码,然后中间又加了一些佐料, 最终只是为了DSL语句映射功能 ! 8 | 9 | 模块安装方法: 10 | ``` 11 | pip install elasticsearch_parse 12 | ``` 13 | 14 | 下面我们来体验下封装后的es语法解释器. 15 | 16 | 首先用原始的DSL语法操作,一眼望去会有些麻烦, 手写起来会更麻烦. 17 | ``` 18 | from elasticsearch import Elasticsearch 19 | client = Elasticsearch() 20 | 21 | response = client.search( 22 | index="my-index", 23 | body={ 24 | "query": { 25 | "filtered": { 26 | "query": { 27 | "bool": { 28 | "must": [{"match": {"title": "python"}}], 29 | "must_not": [{"match": {"description": "beta"}}] 30 | } 31 | }, 32 | "filter": {"term": {"category": "search"}} 33 | } 34 | }, 35 | "aggs" : { 36 | "per_tag": { 37 | "terms": {"field": "tags"}, 38 | "aggs": { 39 | "max_lines": {"max": {"field": "lines"}} 40 | } 41 | } 42 | } 43 | } 44 | ) 45 | 46 | ``` 47 | 48 | 下面是使用Elasticsearch_parse的用法,要多简单就多简单 49 | 50 | ``` 51 | from elasticsearch_parse import Search, Q 52 | 53 | s = Search(index="my-index") \ 54 | .filter("term", blog="xiaorui.cc") \ 55 | .query("match", author="ruifengyun") \ 56 | .query(~Q("match", face="good")) 57 | 58 | s.aggs.bucket('per_tag', 'terms', field='tags') 59 | 60 | response = s.execute() 61 | ``` 62 | 我们得到的结果是: 63 | ``` 64 | { 65 | "query": { 66 | "filtered": { 67 | "filter": { 68 | "term": { 69 | "blog": "xiaorui.cc" 70 | } 71 | }, 72 | "query": { 73 | "bool": { 74 | "must_not": [ 75 | { 76 | "match": { 77 | "face": "good" 78 | } 79 | } 80 | ], 81 | "must": [ 82 | { 83 | "match": { 84 | "author": "ruifengyun" 85 | } 86 | } 87 | ] 88 | } 89 | } 90 | } 91 | }, 92 | "aggs": { 93 | "per_tag": { 94 | "terms": { 95 | "field": "tags" 96 | } 97 | } 98 | } 99 | } 100 | ``` 101 | 102 | 我们把语法的用法给过一遍. 103 | ``` 104 | s = search.Search() 105 | ``` 106 | 107 | 通过match查询,f字段值为55的数据 108 | ``` 109 | s.query('match', f=55) 110 | ``` 111 | 112 | 时间范围 113 | ``` 114 | s.query('range', ** {'@timestamp': {'lt': 'now'}}) 115 | ``` 116 | 117 | 外围的size的控制 118 | ``` 119 | s = s.query('match', f=42) 120 | s[3].to_dict() {'query': {'match_all': {}}, 'from': 3, 'size': 1} 121 | ``` 122 | 123 | ``` 124 | assert s.to_dict(size=10) == {"query": {"match": {'f': 42}}, "size": 10} 125 | ``` 126 | 127 | 嵌入内部size控制 128 | ``` 129 | s = search.Search.from_dict({"size": 5}) 130 | assert { 131 | "query": {"match_all": {}}, 132 | "size": 5 133 | } == s.to_dict() 134 | ``` 135 | 136 | 对于aggs的聚合的使用 137 | ``` 138 | s = s.query('match', f=42) 139 | assert {"query": {"match": {'f': 42}}} == s.to_dict() 140 | assert {"query": {"match": {'f': 42}}, "size": 10} == s.to_dict(size=10) 141 | s.aggs.bucket('per_tag', 'terms', field='f').metric('max_score', 'max', field='score') 142 | d = { 143 | 'aggs': { 144 | 'per_tag': { 145 | 'terms': {'field': 'f'}, 146 | 'aggs': {'max_score': {'max': {'field': 'score'}}} 147 | } 148 | } 149 | ``` 150 | 下面在介绍下aggs的用法: 151 | 152 | 取出某个字段最大的数据 153 | ``` 154 | s.aggs.Max(field='score') 155 | ``` 156 | 157 | 按照条件分组聚合 158 | ``` 159 | s.aggs.A('terms', field='tags', aggs={'max_score': max_score}) 160 | ``` 161 | 162 | 分组聚合,Elasticsearch会帮你针对count排序 163 | ``` 164 | s.aggs.bucket('per_tag', 'terms', field='tags') 165 | ``` 166 | 167 | 168 | ... 169 | 170 | 171 | -------------------------------------------------------------------------------- /elasticsearch_parse/__init__.py: -------------------------------------------------------------------------------- 1 | from .query import Q 2 | from .filter import F 3 | from .aggs import A 4 | from .search import Search 5 | from .index import Index 6 | from .faceted_search import * 7 | 8 | VERSION = (0, 0, 9) 9 | __version__ = VERSION 10 | __versionstr__ = '.'.join(map(str, VERSION)) 11 | -------------------------------------------------------------------------------- /elasticsearch_parse/aggs.py: -------------------------------------------------------------------------------- 1 | from .utils import DslBase, _make_dsl_class 2 | 3 | __all__ = [ 4 | 'A', 'Agg', 'Filter', 'Bucket', 'Children', 'DateHistogram', 'Filters', 5 | 'GeoDistance', 'GeohashGrid', 'Global', 'Histogram', 'Iprange', 'Missing', 6 | 'Nested', 'Range', 'ReverseNested', 'SignificantTerms', 'Terms', 'Avg', 7 | 'Cardinality', 'ExtendedStats', 'GeoBounds', 'Max', 'Min', 'Percentiles', 8 | 'PercenileRanks', 'ScriptedMetric', 'Stats', 'Sum', 'TopHits', 'ValueCount' 9 | ] 10 | 11 | 12 | def A(name_or_agg, filter=None, **params): 13 | if filter is not None: 14 | if name_or_agg != 'filter': 15 | raise ValueError("Aggregation %r doesn't accept positional argument 'filter'." % name_or_agg) 16 | params['filter'] = filter 17 | 18 | # {"terms": {"field": "tags"}, "aggs": {...}} 19 | if isinstance(name_or_agg, dict): 20 | if params: 21 | raise ValueError('A() cannot accept parameters when passing in a dict.') 22 | # copy to avoid modifying in-place 23 | agg = name_or_agg.copy() 24 | # pop out nested aggs 25 | aggs = agg.pop('aggs', None) 26 | # should be {"terms": {"fied": "tags"}} 27 | if len(agg) != 1: 28 | raise ValueError('A() can only accept dict with an aggregation ({"terms": {...}}). ' 29 | 'Instead it got (%r)' % name_or_agg) 30 | agg_type, params = agg.popitem() 31 | if aggs: 32 | params = params.copy() 33 | params['aggs'] = aggs 34 | return Agg.get_dsl_class(agg_type)(**params) 35 | 36 | # Terms(...) just return the nested agg 37 | elif isinstance(name_or_agg, Agg): 38 | if params: 39 | raise ValueError('A() cannot accept parameters when passing in an Agg object.') 40 | return name_or_agg 41 | 42 | # "terms", field="tags" 43 | return Agg.get_dsl_class(name_or_agg)(**params) 44 | 45 | class Agg(DslBase): 46 | _type_name = 'agg' 47 | _type_shortcut = staticmethod(A) 48 | name = None 49 | 50 | class AggBase(object): 51 | _param_defs = { 52 | 'aggs': {'type': 'agg', 'hash': True}, 53 | } 54 | def __getitem__(self, agg_name): 55 | agg = self._params.setdefault('aggs', {})[agg_name] # propagate KeyError 56 | 57 | # make sure we're not mutating a shared state - whenever accessing a 58 | # bucket, return a shallow copy of it to be safe 59 | if isinstance(agg, Bucket): 60 | agg = A(agg.name, **agg._params) 61 | # be sure to store the copy so any modifications to it will affect us 62 | self._params['aggs'][agg_name] = agg 63 | 64 | return agg 65 | 66 | def __setitem__(self, agg_name, agg): 67 | self.aggs[agg_name] = A(agg) 68 | 69 | def _agg(self, bucket, name, agg_type, *args,**params): 70 | agg = self[name] = A(agg_type, *args, **params) 71 | 72 | # For chaining - when creating new buckets return them... 73 | if bucket: 74 | return agg 75 | # otherwise return self._base so we can keep chaining 76 | else: 77 | return self._base 78 | 79 | def metric(self, name, agg_type, *args, **params): 80 | return self._agg(False, name, agg_type, *args, **params) 81 | 82 | def bucket(self, name, agg_type, *args, **params): 83 | return self._agg(True, name, agg_type, *args, **params) 84 | 85 | 86 | class Bucket(AggBase, Agg): 87 | def __init__(self, **params): 88 | super(Bucket, self).__init__(**params) 89 | # remember self for chaining 90 | self._base = self 91 | 92 | def to_dict(self): 93 | d = super(AggBase, self).to_dict() 94 | if 'aggs' in d[self.name]: 95 | d['aggs'] = d[self.name].pop('aggs') 96 | return d 97 | 98 | class Filter(Bucket): 99 | name = 'filter' 100 | _param_defs = { 101 | 'filter': {'type': 'filter'}, 102 | 'aggs': {'type': 'agg', 'hash': True}, 103 | } 104 | 105 | def __init__(self, filter=None, **params): 106 | if filter is not None: 107 | params['filter'] = filter 108 | super(Filter, self).__init__(**params) 109 | 110 | def to_dict(self): 111 | d = super(Filter, self).to_dict() 112 | d[self.name].update(d[self.name].pop('filter', {})) 113 | return d 114 | 115 | AGGS = ( 116 | (Bucket, 'children', None), 117 | (Bucket, 'date_histogram', None), 118 | (Bucket, 'date_range', None), 119 | (Bucket, 'filters', {'filters': {'type': 'filter', 'hash': True}}), 120 | (Bucket, 'geo_distance', None), 121 | (Bucket, 'geohash_grid', None), 122 | (Bucket, 'global', None), 123 | (Bucket, 'histogram', None), 124 | (Bucket, 'iprange', None), 125 | (Bucket, 'missing', None), 126 | (Bucket, 'nested', None), 127 | (Bucket, 'range', None), 128 | (Bucket, 'reverse_nested', None), 129 | (Bucket, 'significant_terms', None), 130 | (Bucket, 'terms', None), 131 | 132 | (Agg, 'avg', None), 133 | (Agg, 'cardinality', None), 134 | (Agg, 'extended_stats', None), 135 | (Agg, 'geo_bounds', None), 136 | (Agg, 'max', None), 137 | (Agg, 'min', None), 138 | (Agg, 'percentiles', None), 139 | (Agg, 'percentile_ranks', None), 140 | (Agg, 'scripted_metric', None), 141 | (Agg, 'stats', None), 142 | (Agg, 'sum', None), 143 | (Agg, 'top_hits', None), 144 | (Agg, 'value_count', None), 145 | ) 146 | 147 | # generate the aggregation classes dynamicaly 148 | for base, fname, params_def in AGGS: 149 | # don't override the params def from AggBase 150 | if params_def: 151 | params_def.update(AggBase._param_defs) 152 | fclass = _make_dsl_class(base, fname, params_def) 153 | globals()[fclass.__name__] = fclass 154 | -------------------------------------------------------------------------------- /elasticsearch_parse/exceptions.py: -------------------------------------------------------------------------------- 1 | class ElasticsearchDslException(Exception): 2 | pass 3 | 4 | 5 | class UnknownDslObject(ElasticsearchDslException): 6 | pass 7 | 8 | 9 | class ValidationException(ValueError, ElasticsearchDslException): 10 | pass 11 | 12 | 13 | class IllegalOperation(ElasticsearchDslException): 14 | pass 15 | -------------------------------------------------------------------------------- /elasticsearch_parse/faceted_search.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta, datetime 2 | from six import iteritems, itervalues 3 | from functools import partial 4 | 5 | from .search import Search 6 | from .filter import F 7 | from .aggs import A 8 | from .utils import AttrDict 9 | from .result import Response 10 | 11 | __all__ = ['FacetedSearch', 'HistogramFacet', 'TermsFacet', 'DateHistogramFacet', 'RangeFacet'] 12 | 13 | class Facet(object): 14 | """ 15 | A facet on faceted search. Wraps and aggregation and provides functionality 16 | to create a filter for selected values and return a list of facet values 17 | from the result of the aggregation. 18 | """ 19 | agg_type = None 20 | 21 | def __init__(self, **kwargs): 22 | self.filter_values = () 23 | self._params = kwargs 24 | 25 | def get_aggregation(self): 26 | """ 27 | Return the aggregation object. 28 | """ 29 | return A(self.agg_type, **self._params) 30 | 31 | def add_filter(self, filter_values): 32 | """ 33 | Construct a filter and remember the values for use in get_values. 34 | """ 35 | self.filter_values = filter_values 36 | 37 | if not filter_values: 38 | return 39 | 40 | f = self.get_value_filter(filter_values[0]) 41 | for v in filter_values[1:]: 42 | f |= self.get_value_filter(v) 43 | return f 44 | 45 | def get_value_filter(self, filter_value): 46 | """ 47 | Construct a filter for an individual value 48 | """ 49 | pass 50 | 51 | def is_filtered(self, key): 52 | """ 53 | Is a filter active on the given key. 54 | """ 55 | return key in self.filter_values 56 | 57 | def get_value(self, bucket): 58 | """ 59 | return a value representing a bucket. Its key as default. 60 | """ 61 | return bucket['key'] 62 | 63 | def get_values(self, data): 64 | """ 65 | Turn the raw bucket data into a list of tuples containing the key, 66 | number of documents and a flag indicating whether this value has been 67 | selected or not. 68 | """ 69 | out = [] 70 | for bucket in data: 71 | key = self.get_value(bucket) 72 | out.append(( 73 | key, 74 | bucket['doc_count'], 75 | self.is_filtered(key) 76 | )) 77 | return out 78 | 79 | 80 | class TermsFacet(Facet): 81 | agg_type = 'terms' 82 | 83 | def add_filter(self, filter_values): 84 | """ Create a terms filter instead of bool containing term filters. """ 85 | self.filter_values = filter_values 86 | if filter_values: 87 | return F('terms', **{self._params['field']: filter_values}) 88 | 89 | 90 | class RangeFacet(Facet): 91 | agg_type = 'range' 92 | 93 | def _range_to_dict(self, range): 94 | key, range = range 95 | out = {'key': key} 96 | if range[0] is not None: 97 | out['from'] = range[0] 98 | if range[1] is not None: 99 | out['to'] = range[1] 100 | return out 101 | 102 | def __init__(self, ranges, **kwargs): 103 | super(RangeFacet, self).__init__(**kwargs) 104 | self._params['ranges'] = list(map(self._range_to_dict, ranges)) 105 | self._params['keyed'] = False 106 | self._ranges = dict(ranges) 107 | 108 | def get_value_filter(self, filter_value): 109 | f, t = self._ranges[filter_value] 110 | limits = {} 111 | if f is not None: 112 | limits['from'] = f 113 | if t is not None: 114 | limits['to'] = t 115 | 116 | return F('range', **{ 117 | self._params['field']: limits 118 | }) 119 | 120 | class HistogramFacet(Facet): 121 | agg_type = 'histogram' 122 | 123 | def get_value_filter(self, filter_value): 124 | return F('range', **{ 125 | self._params['field']: { 126 | 'gte': filter_value, 127 | 'lt': filter_value + self._params['interval'] 128 | } 129 | }) 130 | 131 | 132 | class DateHistogramFacet(Facet): 133 | agg_type = 'date_histogram' 134 | 135 | DATE_INTERVALS = { 136 | 'month': lambda d: (d+timedelta(days=32)).replace(day=1), 137 | 'week': lambda d: d+timedelta(days=7), 138 | 'day': lambda d: d+timedelta(days=1), 139 | 'hour': lambda d: d+timedelta(hours=1), 140 | } 141 | 142 | def get_value(self, bucket): 143 | return datetime.utcfromtimestamp(int(bucket['key']) / 1000) 144 | 145 | def get_value_filter(self, filter_value): 146 | return F('range', **{ 147 | self._params['field']: { 148 | 'gte': filter_value, 149 | 'lt': self.DATE_INTERVALS[self._params['interval']](filter_value) 150 | } 151 | }) 152 | 153 | 154 | class FacetedResponse(Response): 155 | def __init__(self, search, *args, **kwargs): 156 | super(FacetedResponse, self).__init__(*args, **kwargs) 157 | super(AttrDict, self).__setattr__('_search', search) 158 | 159 | @property 160 | def query_string(self): 161 | return self._search._query 162 | 163 | @property 164 | def facets(self): 165 | if not hasattr(self, '_facets'): 166 | super(AttrDict, self).__setattr__('_facets', AttrDict({})) 167 | for name, facet in iteritems(self._search.facets): 168 | self._facets[name] = facet.get_values(self.aggregations['_filter_' + name][name]['buckets']) 169 | return self._facets 170 | 171 | 172 | class FacetedSearch(object): 173 | index = '_all' 174 | doc_types = ['_all'] 175 | fields = ('*', ) 176 | facets = {} 177 | 178 | def __init__(self, query=None, filters={}): 179 | self._query = query 180 | self._filters = {} 181 | for name, value in iteritems(filters): 182 | self.add_filter(name, value) 183 | 184 | def add_filter(self, name, filter_values): 185 | """ 186 | Add a filter for a facet. 187 | """ 188 | # normalize the value into a list 189 | if not isinstance(filter_values, (tuple, list)): 190 | if filter_values in (None, ''): 191 | return 192 | filter_values = [filter_values, ] 193 | 194 | # get the filter from the facet 195 | f = self.facets[name].add_filter(filter_values) 196 | if f is None: 197 | return 198 | 199 | self._filters[name] = f 200 | 201 | def search(self): 202 | """ 203 | Construct the Search object. 204 | """ 205 | return Search(doc_type=self.doc_types, index=self.index) 206 | 207 | def query(self, search, query): 208 | """ 209 | Add query part to ``search``. 210 | 211 | Override this if you wish to customize the query used. 212 | """ 213 | if query: 214 | return search.query('multi_match', fields=self.fields, query=query) 215 | return search 216 | 217 | def aggregate(self, search): 218 | """ 219 | Add aggregations representing the facets selected, including potential 220 | filters. 221 | """ 222 | for f, facet in iteritems(self.facets): 223 | agg = facet.get_aggregation() 224 | agg_filter = F('match_all') 225 | for field, filter in iteritems(self._filters): 226 | if f == field: 227 | continue 228 | agg_filter &= filter 229 | search.aggs.bucket( 230 | '_filter_' + f, 231 | 'filter', 232 | filter=agg_filter 233 | ).bucket(f, agg) 234 | 235 | def filter(self, search): 236 | """ 237 | Add a ``post_filter`` to the search request narrowing the results based 238 | on the facet filters. 239 | """ 240 | post_filter = F('match_all') 241 | for f in itervalues(self._filters): 242 | post_filter &= f 243 | return search.post_filter(post_filter) 244 | 245 | def highlight(self, search): 246 | """ 247 | Add highlighting for all the fields 248 | """ 249 | return search.highlight(*self.fields) 250 | 251 | def build_search(self): 252 | """ 253 | Construct the ``Search`` object. 254 | """ 255 | s = self.search() 256 | s = self.query(s, self._query) 257 | s = self.filter(s) 258 | s = self.highlight(s) 259 | self.aggregate(s) 260 | return s 261 | 262 | def execute(self): 263 | if not hasattr(self, '_response'): 264 | s = self.build_search() 265 | self._response = s.execute(response_class=partial(FacetedResponse, self)) 266 | 267 | return self._response 268 | 269 | -------------------------------------------------------------------------------- /elasticsearch_parse/filter.py: -------------------------------------------------------------------------------- 1 | from .utils import DslBase, BoolMixin, _make_dsl_class 2 | 3 | __all__ = [ 4 | 'F', 'And', 'AndOrFilter', 'Bool', 'EMPTY_FILTER', 'Exists', 'Filter', 5 | 'Fquery', 'GeoBoundingBox', 'GeoDistance', 'GeoDistanceRange', 6 | 'GeoPolygon', 'GeoShape', 'GeohashCell', 'HasChild', 'HasParent', 'Ids', 7 | 'Indices', 'Limit', 'MatchAll', 'Missing', 'Nested', 'Not', 'Or', 'Prefix', 8 | 'Query', 'Range', 'Regexp', 'Script', 'Term', 'Terms', 'Type' 9 | ] 10 | 11 | 12 | def F(name_or_filter='match_all', filters=None, **params): 13 | # 'and/or', [F(), F()] 14 | if filters is not None: 15 | # someone passed in positional argument to F outside of and/or or query 16 | if name_or_filter in ('and', 'or'): 17 | params['filters'] = filters 18 | elif name_or_filter == 'query': 19 | params['query'] = filters 20 | else: 21 | raise ValueError("Filter %r doesn't accept positional argument." % name_or_filter) 22 | 23 | # {"term": {...}} 24 | if isinstance(name_or_filter, dict): 25 | if params: 26 | raise ValueError('F() cannot accept parameters when passing in a dict.') 27 | if len(name_or_filter) != 1: 28 | raise ValueError('F() can only accept dict with a single filter ({"bool": {...}}). ' 29 | 'Instead it got (%r)' % name_or_filter) 30 | name, params = name_or_filter.copy().popitem() 31 | if isinstance(params, dict): 32 | return Filter.get_dsl_class(name)(**params) 33 | else: 34 | # and filter can have list 35 | return Filter.get_dsl_class(name)(params) 36 | 37 | # Term(...) 38 | if isinstance(name_or_filter, Filter): 39 | if params: 40 | raise ValueError('F() cannot accept parameters when passing in a Filter object.') 41 | return name_or_filter 42 | 43 | # s.filter = ~F(s.filter) 44 | if hasattr(name_or_filter, '_proxied'): 45 | return name_or_filter._proxied 46 | 47 | # 'term', tag='python', ... 48 | return Filter.get_dsl_class(name_or_filter)(**params) 49 | 50 | class Filter(DslBase): 51 | _type_name = 'filter' 52 | _type_shortcut = staticmethod(F) 53 | name = None 54 | 55 | class MatchAll(Filter): 56 | name = 'match_all' 57 | def __add__(self, other): 58 | return other._clone() 59 | __and__ = __rand__ = __radd__ = __add__ 60 | 61 | def __or__(self, other): 62 | return self 63 | __ror__ = __or__ 64 | EMPTY_FILTER = MatchAll() 65 | 66 | class Bool(BoolMixin, Filter): 67 | name = 'bool' 68 | _param_defs = { 69 | 'must': {'type': 'filter', 'multi': True}, 70 | 'should': {'type': 'filter', 'multi': True}, 71 | 'must_not': {'type': 'filter', 'multi': True}, 72 | } 73 | 74 | def __and__(self, other): 75 | f = self._clone() 76 | if isinstance(other, self.__class__): 77 | f.must += other.must 78 | f.must_not += other.must_not 79 | f.should = [] 80 | if self.should and other.should: 81 | selfshould, othershould = self.should[:], other.should[:] 82 | # required subfilter, move to must 83 | for s in (selfshould, othershould): 84 | if len(s) == 1: 85 | f.must.append(s.pop()) 86 | 87 | # we have leftover lists, nothing to do but add to must as bool(should) 88 | if selfshould and othershould: 89 | f.must.extend(( 90 | Bool(should=selfshould), 91 | Bool(should=othershould), 92 | )) 93 | # at most one should list is left, keep as should 94 | else: 95 | f.should = selfshould + othershould 96 | 97 | # at most one should list is left, keep as should 98 | else: 99 | f.should = self.should + other.should 100 | else: 101 | f.must.append(other) 102 | return f 103 | __rand__ = __and__ 104 | 105 | # register this as Bool for Filter 106 | Filter._bool = Bool 107 | 108 | class Not(Filter): 109 | name = 'not' 110 | _param_defs = {'filter': {'type': 'filter'}} 111 | 112 | def __init__(self, filter=None, **kwargs): 113 | if filter is None: 114 | filter, kwargs = kwargs, {} 115 | super(Not, self).__init__(filter=filter, **kwargs) 116 | 117 | class AndOrFilter(object): 118 | _param_defs = {'filters': {'type': 'filter', 'multi': True}} 119 | 120 | def __init__(self, filters=None, **kwargs): 121 | if filters is not None: 122 | kwargs['filters'] = filters 123 | super(AndOrFilter, self).__init__(**kwargs) 124 | 125 | # compound filters 126 | class And(AndOrFilter, Filter): 127 | name = 'and' 128 | 129 | class Or(AndOrFilter, Filter): 130 | name = 'or' 131 | 132 | class Query(Filter): 133 | name = 'query' 134 | _param_defs = {'query': {'type': 'query'}} 135 | 136 | def __init__(self, query=None, **kwargs): 137 | if query is not None: 138 | kwargs['query'] = query 139 | super(Query, self).__init__(**kwargs) 140 | 141 | def to_dict(self): 142 | d = super(Query, self).to_dict() 143 | d[self.name].update(d[self.name].pop('query', {})) 144 | return d 145 | 146 | 147 | FILTERS = ( 148 | # relationships 149 | ('nested', {'filter': {'type': 'filter'}}), 150 | ('has_child', {'filter': {'type': 'filter'}}), 151 | ('has_parent', {'filter': {'type': 'filter'}}), 152 | 153 | ('fquery', {'query': {'type': 'query'}}), 154 | 155 | # core filters 156 | ('exists', None), 157 | ('geo_bounding_box', None), 158 | ('geo_distance', None), 159 | ('geo_distance_range', None), 160 | ('geo_polygon', None), 161 | ('geo_shape', None), 162 | ('geohash_cell', None), 163 | ('ids', None), 164 | ('indices', None), 165 | ('limit', None), 166 | ('missing', None), 167 | ('prefix', None), 168 | ('range', None), 169 | ('regexp', None), 170 | ('script', None), 171 | ('term', None), 172 | ('terms', None), 173 | ('type', None), 174 | ) 175 | 176 | # generate the filter classes dynamicaly 177 | for fname, params_def in FILTERS: 178 | fclass = _make_dsl_class(Filter, fname, params_def) 179 | globals()[fclass.__name__] = fclass 180 | 181 | -------------------------------------------------------------------------------- /elasticsearch_parse/index.py: -------------------------------------------------------------------------------- 1 | from .search import Search 2 | 3 | class Index(object): 4 | def __init__(self, name, using='default'): 5 | self._name = name 6 | self._doc_types = {} 7 | self._mappings = {} 8 | self._using = using 9 | self._settings = {} 10 | self._aliases = {} 11 | 12 | def clone(self, name, using=None): 13 | i = Index(name, using=using or self._using) 14 | for attr in ('_doc_types', '_mappings', '_settings', '_aliases'): 15 | setattr(i, attr, getattr(self, attr).copy()) 16 | return i 17 | 18 | def _get_connection(self): 19 | return connections.get_connection(self._using) 20 | connection = property(_get_connection) 21 | 22 | def doc_type(self, doc_type): 23 | name = doc_type._doc_type.name 24 | self._doc_types[name] = doc_type 25 | self._mappings[name] = doc_type._doc_type.mapping 26 | 27 | if not doc_type._doc_type.index: 28 | doc_type._doc_type.index = self._name 29 | return doc_type # to use as decorator??? 30 | 31 | def settings(self, **kwargs): 32 | self._settings.update(kwargs) 33 | return self 34 | 35 | def aliases(self, **kwargs): 36 | self._aliases.update(kwargs) 37 | return self 38 | 39 | def search(self): 40 | return Search( 41 | using=self._using, 42 | index=self._name, 43 | doc_type=[self._doc_types.get(k, k) for k in self._mappings] 44 | ) 45 | 46 | def _get_mappings(self): 47 | analysis, mappings = {}, {} 48 | for mapping in self._mappings.values(): 49 | mappings.update(mapping.to_dict()) 50 | a = mapping._collect_analysis() 51 | # merge the defintion 52 | # TODO: conflict detection/resolution 53 | for key in a: 54 | analysis.setdefault(key, {}).update(a[key]) 55 | 56 | return mappings, analysis 57 | 58 | def to_dict(self): 59 | out = {} 60 | if self._settings: 61 | out['settings'] = self._settings 62 | if self._aliases: 63 | out['aliases'] = self._aliases 64 | mappings, analysis = self._get_mappings() 65 | if mappings: 66 | out['mappings'] = mappings 67 | if analysis: 68 | out.setdefault('settings', {})['analysis'] = analysis 69 | return out 70 | 71 | def create(self, **kwargs): 72 | self.connection.indices.create(index=self._name, body=self.to_dict(), **kwargs) 73 | 74 | def delete(self, **kwargs): 75 | self.connection.indices.delete(index=self._name, **kwargs) 76 | -------------------------------------------------------------------------------- /elasticsearch_parse/query.py: -------------------------------------------------------------------------------- 1 | from .utils import DslBase, BoolMixin, _make_dsl_class 2 | from .function import SF, ScoreFunction 3 | 4 | __all__ = [ 5 | 'Q', 'Bool', 'Boosting', 'Common', 'ConstantScore', 'DisMax', 'Filtered', 6 | 'FunctionScore', 'Fuzzy', 'FuzzyLikeThis', 'FuzzyLikeThisField', 7 | 'GeoShape', 'HasChild', 'HasParent', 'Ids', 'Indices', 'Match', 'MatchAll', 8 | 'MatchPhrase', 'MatchPhrasePrefix', 'MoreLikeThis', 'MoreLikeThisField', 9 | 'MultiMatch', 'Nested', 'Prefix', 'Query', 'QueryString', 'Range', 10 | 'Regexp', 'SF', 'ScoreFunction', 'SimpleQueryString', 'SpanFirst', 11 | 'SpanMulti', 'SpanNear', 'SpanNot', 'SpanOr', 'SpanTerm', 'Template', 12 | 'Term', 'Terms', 'TopChildren', 'Wildcard' 13 | ] 14 | 15 | 16 | def Q(name_or_query='match_all', **params): 17 | # {"match": {"title": "python"}} 18 | if isinstance(name_or_query, dict): 19 | if params: 20 | raise ValueError('Q() cannot accept parameters when passing in a dict.') 21 | if len(name_or_query) != 1: 22 | raise ValueError('Q() can only accept dict with a single query ({"match": {...}}). ' 23 | 'Instead it got (%r)' % name_or_query) 24 | name, params = name_or_query.copy().popitem() 25 | return Query.get_dsl_class(name)(**params) 26 | 27 | # MatchAll() 28 | if isinstance(name_or_query, Query): 29 | if params: 30 | raise ValueError('Q() cannot accept parameters when passing in a Query object.') 31 | return name_or_query 32 | 33 | # s.query = Q('filtered', query=s.query) 34 | if hasattr(name_or_query, '_proxied'): 35 | return name_or_query._proxied 36 | 37 | # "match", title="python" 38 | return Query.get_dsl_class(name_or_query)(**params) 39 | 40 | class Query(DslBase): 41 | _type_name = 'query' 42 | _type_shortcut = staticmethod(Q) 43 | name = None 44 | 45 | class MatchAll(Query): 46 | name = 'match_all' 47 | def __add__(self, other): 48 | return other._clone() 49 | __and__ = __rand__ = __radd__ = __add__ 50 | 51 | def __or__(self, other): 52 | return self 53 | __ror__ = __or__ 54 | EMPTY_QUERY = MatchAll() 55 | 56 | class Bool(BoolMixin, Query): 57 | name = 'bool' 58 | _param_defs = { 59 | 'must': {'type': 'query', 'multi': True}, 60 | 'should': {'type': 'query', 'multi': True}, 61 | 'must_not': {'type': 'query', 'multi': True}, 62 | 'filter': {'type': 'query', 'multi': True}, 63 | } 64 | 65 | def __and__(self, other): 66 | q = self._clone() 67 | if isinstance(other, self.__class__): 68 | q.must += other.must 69 | q.must_not += other.must_not 70 | q.should = [] 71 | for qx in (self, other): 72 | min_should_match = getattr(qx, 'minimum_should_match', 0 if any((qx.must, qx.must_not)) else 1) 73 | # all subqueries are required 74 | if len(qx.should) <= min_should_match: 75 | q.must.extend(qx.should) 76 | # not all of them are required, use it and remember min_should_match 77 | elif not q.should: 78 | q.minimum_should_match = min_should_match 79 | q.should = qx.should 80 | # not all are required, add a should list to the must with proper min_should_match 81 | else: 82 | q.must.append(Bool(should=qx.should, minimum_should_match=min_should_match)) 83 | else: 84 | q.must.append(other) 85 | return q 86 | __rand__ = __and__ 87 | # register this as Bool for Query 88 | Query._bool = Bool 89 | 90 | class FunctionScore(Query): 91 | name = 'function_score' 92 | _param_defs = { 93 | 'query': {'type': 'query'}, 94 | 'filter': {'type': 'filter'}, 95 | 'functions': {'type': 'score_function', 'multi': True}, 96 | } 97 | 98 | def __init__(self, **kwargs): 99 | if 'functions' in kwargs: 100 | pass 101 | else: 102 | fns = kwargs['functions'] = [] 103 | for name in ScoreFunction._classes: 104 | if name in kwargs: 105 | fns.append({name: kwargs.pop(name)}) 106 | super(FunctionScore, self).__init__(**kwargs) 107 | 108 | QUERIES = ( 109 | # compound queries 110 | ('boosting', {'positive': {'type': 'query'}, 'negative': {'type': 'query'}}), 111 | ('constant_score', {'query': {'type': 'query'}, 'filter': {'type': 'filter'}}), 112 | ('dis_max', {'queries': {'type': 'query', 'multi': True}}), 113 | ('filtered', {'query': {'type': 'query'}, 'filter': {'type': 'filter'}}), 114 | ('indices', {'query': {'type': 'query'}, 'no_match_query': {'type': 'query'}}), 115 | 116 | # relationship queries 117 | ('nested', {'query': {'type': 'query'}}), 118 | ('has_child', {'query': {'type': 'query'}}), 119 | ('has_parent', {'query': {'type': 'query'}}), 120 | ('top_children', {'query': {'type': 'query'}}), 121 | 122 | # compount span queries 123 | ('span_first', {'match': {'type': 'query'}}), 124 | ('span_multi', {'match': {'type': 'query'}}), 125 | ('span_near', {'clauses': {'type': 'query', 'multi': True}}), 126 | ('span_not', {'exclude': {'type': 'query'}, 'include': {'type': 'query'}}), 127 | ('span_or', {'clauses': {'type': 'query', 'multi': True}}), 128 | 129 | # core queries 130 | ('common', None), 131 | ('fuzzy', None), 132 | ('fuzzy_like_this', None), 133 | ('fuzzy_like_this_field', None), 134 | ('geo_shape', None), 135 | ('ids', None), 136 | ('match', None), 137 | ('match_phrase', None), 138 | ('match_phrase_prefix', None), 139 | ('more_like_this', None), 140 | ('more_like_this_field', None), 141 | ('multi_match', None), 142 | ('prefix', None), 143 | ('query_string', None), 144 | ('range', None), 145 | ('regexp', None), 146 | ('simple_query_string', None), 147 | ('span_term', None), 148 | ('template', None), 149 | ('term', None), 150 | ('terms', None), 151 | ('wildcard', None), 152 | ) 153 | 154 | # generate the query classes dynamicaly 155 | for qname, params_def in QUERIES: 156 | qclass = _make_dsl_class(Query, qname, params_def) 157 | globals()[qclass.__name__] = qclass 158 | 159 | -------------------------------------------------------------------------------- /elasticsearch_parse/search.py: -------------------------------------------------------------------------------- 1 | from six import iteritems, string_types 2 | 3 | from elasticsearch.helpers import scan 4 | 5 | from .query import Q, EMPTY_QUERY, Filtered 6 | from .filter import F, EMPTY_FILTER 7 | from .aggs import A, AggBase 8 | from .utils import DslBase 9 | 10 | class BaseProxy(object): 11 | """ 12 | Simple proxy around DSL objects (queries and filters) that can be called 13 | (to add query/filter) and also allows attribute access which is proxied to 14 | the wrapped query/filter. 15 | """ 16 | def __init__(self, search, attr_name): 17 | self._search = search 18 | self._proxied = self._empty 19 | self._attr_name = attr_name 20 | 21 | def __nonzero__(self): 22 | return self._proxied != self._empty 23 | __bool__ = __nonzero__ 24 | 25 | def __call__(self, *args, **kwargs): 26 | s = self._search._clone() 27 | getattr(s, self._attr_name)._proxied += self._shortcut(*args, **kwargs) 28 | 29 | # always return search to be chainable 30 | return s 31 | 32 | def __getattr__(self, attr_name): 33 | return getattr(self._proxied, attr_name) 34 | 35 | def __setattr__(self, attr_name, value): 36 | if not attr_name.startswith('_'): 37 | self._proxied = self._shortcut(self._proxied.to_dict()) 38 | setattr(self._proxied, attr_name, value) 39 | super(BaseProxy, self).__setattr__(attr_name, value) 40 | 41 | 42 | class ProxyDescriptor(object): 43 | """ 44 | Simple descriptor to enable setting of queries and filters as: 45 | 46 | s = Search() 47 | s.query = Q(...) 48 | 49 | """ 50 | def __init__(self, name): 51 | self._attr_name = '_%s_proxy' % name 52 | 53 | def __get__(self, instance, owner): 54 | return getattr(instance, self._attr_name) 55 | 56 | def __set__(self, instance, value): 57 | proxy = getattr(instance, self._attr_name) 58 | proxy._proxied = proxy._shortcut(value) 59 | 60 | 61 | class ProxyQuery(BaseProxy): 62 | _empty = EMPTY_QUERY 63 | _shortcut = staticmethod(Q) 64 | 65 | 66 | class ProxyFilter(BaseProxy): 67 | _empty = EMPTY_FILTER 68 | _shortcut = staticmethod(F) 69 | 70 | 71 | class AggsProxy(AggBase, DslBase): 72 | name = 'aggs' 73 | def __init__(self, search): 74 | self._base = self._search = search 75 | self._params = {'aggs': {}} 76 | 77 | def to_dict(self): 78 | return super(AggsProxy, self).to_dict().get('aggs', {}) 79 | 80 | 81 | class Search(object): 82 | query = ProxyDescriptor('query') 83 | filter = ProxyDescriptor('filter') 84 | post_filter = ProxyDescriptor('post_filter') 85 | 86 | def __init__(self, using='default', index=None, doc_type=None, extra=None): 87 | """ 88 | Search request to elasticsearch. 89 | 90 | :arg using: `Elasticsearch` instance to use 91 | :arg index: limit the search to index 92 | :arg doc_type: only query this type. 93 | 94 | All the paramters supplied (or omitted) at creation type can be later 95 | overriden by methods (`using`, `index` and `doc_type` respectively). 96 | """ 97 | self._using = using 98 | 99 | self._index = None 100 | if isinstance(index, (tuple, list)): 101 | self._index = list(index) 102 | elif index: 103 | self._index = [index] 104 | 105 | self._doc_type = [] 106 | self._doc_type_map = {} 107 | if isinstance(doc_type, (tuple, list)): 108 | for dt in doc_type: 109 | self._add_doc_type(dt) 110 | elif isinstance(doc_type, dict): 111 | self._doc_type.extend(doc_type.keys()) 112 | self._doc_type_map.update(doc_type) 113 | elif doc_type: 114 | self._add_doc_type(doc_type) 115 | 116 | self.aggs = AggsProxy(self) 117 | self._sort = [] 118 | self._extra = extra or {} 119 | self._params = {} 120 | self._fields = None 121 | self._partial_fields = {} 122 | self._highlight = {} 123 | self._highlight_opts = {} 124 | self._suggest = {} 125 | self._script_fields = {} 126 | 127 | self._query_proxy = ProxyQuery(self, 'query') 128 | self._filter_proxy = ProxyFilter(self, 'filter') 129 | self._post_filter_proxy = ProxyFilter(self, 'post_filter') 130 | 131 | def __getitem__(self, n): 132 | """ 133 | Support slicing the `Search` instance for pagination. 134 | 135 | Slicing equates to the from/size parameters. E.g.:: 136 | 137 | s = Search().query(...)[0:25] 138 | 139 | is equivalent to:: 140 | 141 | s = Search().query(...).extra(from_=0, size=25) 142 | 143 | """ 144 | s = self._clone() 145 | 146 | if isinstance(n, slice): 147 | # If negative slicing, abort. 148 | if n.start and n.start < 0 or n.stop and n.stop < 0: 149 | raise ValueError("Search does not support negative slicing.") 150 | # Elasticsearch won't get all results so we default to size: 10 if 151 | # stop not given. 152 | s._extra['from'] = n.start or 0 153 | s._extra['size'] = n.stop - (n.start or 0) if n.stop is not None else 10 154 | return s 155 | else: # This is an index lookup, equivalent to slicing by [n:n+1]. 156 | # If negative index, abort. 157 | if n < 0: 158 | raise ValueError("Search does not support negative indexing.") 159 | s._extra['from'] = n 160 | s._extra['size'] = 1 161 | return s 162 | 163 | @classmethod 164 | def from_dict(cls, d): 165 | """ 166 | Construct a `Search` instance from a raw dict containing the search 167 | body. Useful when migrating from raw dictionaries. 168 | 169 | Example:: 170 | 171 | s = Search.from_dict({ 172 | "query": { 173 | "bool": { 174 | "must": [...] 175 | } 176 | }, 177 | "aggs": {...} 178 | }) 179 | s = s.filter('term', published=True) 180 | """ 181 | s = cls() 182 | s.update_from_dict(d) 183 | return s 184 | 185 | def _clone(self): 186 | """ 187 | Return a clone of the current search request. Performs a shallow copy 188 | of all the underlying objects. Used internally by most state modifying 189 | APIs. 190 | """ 191 | s = self.__class__(using=self._using, index=self._index, 192 | doc_type=self._doc_type) 193 | s._doc_type_map = self._doc_type_map.copy() 194 | s._sort = self._sort[:] 195 | s._fields = self._fields[:] if self._fields is not None else None 196 | s._partial_fields = self._partial_fields.copy() 197 | s._extra = self._extra.copy() 198 | s._highlight = self._highlight.copy() 199 | s._highlight_opts = self._highlight_opts.copy() 200 | s._suggest = self._suggest.copy() 201 | s._script_fields = self._script_fields.copy() 202 | for x in ('query', 'filter', 'post_filter'): 203 | getattr(s, x)._proxied = getattr(self, x)._proxied 204 | 205 | # copy top-level bucket definitions 206 | if self.aggs._params.get('aggs'): 207 | s.aggs._params = {'aggs': self.aggs._params['aggs'].copy()} 208 | s._params = self._params.copy() 209 | return s 210 | 211 | def update_from_dict(self, d): 212 | """ 213 | Apply options from a serialized body to the current instance. Modifies 214 | the object in-place. Used mostly by ``from_dict``. 215 | """ 216 | d = d.copy() 217 | if 'query' in d: 218 | self.query._proxied = Q(d.pop('query')) 219 | if 'post_filter' in d: 220 | self.post_filter._proxied = F(d.pop('post_filter')) 221 | 222 | if isinstance(self.query._proxied, Filtered): 223 | self.filter._proxied = self.query._proxied.filter 224 | self.query._proxied = self.query._proxied.query 225 | 226 | aggs = d.pop('aggs', d.pop('aggregations', {})) 227 | if aggs: 228 | self.aggs._params = { 229 | 'aggs': dict( 230 | (name, A(value)) for (name, value) in iteritems(aggs)) 231 | } 232 | if 'sort' in d: 233 | self._sort = d.pop('sort') 234 | if 'fields' in d: 235 | self._fields = d.pop('fields') 236 | if 'partial_fields' in d: 237 | self._partial_fields = d.pop('partial_fields') 238 | if 'highlight' in d: 239 | high = d.pop('highlight').copy() 240 | self._highlight = high.pop('fields') 241 | self._highlight_opts = high 242 | if 'suggest' in d: 243 | self._suggest = d.pop('suggest') 244 | if 'text' in self._suggest: 245 | text = self._suggest.pop('text') 246 | for s in self._suggest.values(): 247 | s.setdefault('text', text) 248 | if 'script_fields' in d: 249 | self._script_fields = d.pop('script_fields') 250 | self._extra = d 251 | 252 | def script_fields(self, **kwargs): 253 | """ 254 | Define script fields to be calculated on hits. See 255 | https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-script-fields.html 256 | for more details. 257 | 258 | Example:: 259 | 260 | s = Search() 261 | s = s.script_fields(times_two="doc['field'].value * 2") 262 | s = s.script_fields( 263 | times_three={ 264 | 'script': "doc['field'].value * n", 265 | 'params': {'n': 3} 266 | } 267 | ) 268 | 269 | """ 270 | s = self._clone() 271 | for name in kwargs: 272 | if isinstance(kwargs[name], string_types): 273 | kwargs[name] = {'script': kwargs[name]} 274 | s._script_fields.update(kwargs) 275 | return s 276 | 277 | def params(self, **kwargs): 278 | """ 279 | Specify query params to be used when executing the search. All the 280 | keyword arguments will override the current values. See 281 | http://elasticsearch-py.readthedocs.org/en/master/api.html#elasticsearch.Elasticsearch.search 282 | for all availible parameters. 283 | 284 | Example:: 285 | 286 | s = Search() 287 | s = s.params(routing='user-1', preference='local') 288 | """ 289 | s = self._clone() 290 | s._params.update(kwargs) 291 | return s 292 | 293 | def extra(self, **kwargs): 294 | """ 295 | Add extra keys to the request body. Mostly here for backwards 296 | compatibility. 297 | """ 298 | s = self._clone() 299 | if 'from_' in kwargs: 300 | kwargs['from'] = kwargs.pop('from_') 301 | s._extra.update(kwargs) 302 | return s 303 | 304 | def fields(self, fields=None): 305 | """ 306 | Selectively load specific stored fields for each document. 307 | 308 | :arg fields: list of fields to return for each document 309 | 310 | If ``fields`` is None, the entire document will be returned for 311 | each hit. If fields is the empty list, no fields will be 312 | returned for each hit, just the metadata. 313 | """ 314 | s = self._clone() 315 | s._fields = fields 316 | return s 317 | 318 | def partial_fields(self, **partial): 319 | """ 320 | Control which part of the fields to extract from the `_source` document 321 | 322 | :kwargs partial: dict specifying which fields to extract from the source 323 | 324 | An example usage would be: 325 | 326 | s = Search().partial_fields(authors_data={ 327 | 'include': ['authors.*'], 328 | 'exclude': ['authors.name'] 329 | }) 330 | 331 | which will include all fields from the `authors` nested property except for 332 | each authors `name` 333 | 334 | If ``partial`` is not provided, the whole `_source` will be fetched. Calling this multiple 335 | times will override the previous values with the new ones. 336 | """ 337 | s = self._clone() 338 | s._partial_fields = partial 339 | return s 340 | 341 | def sort(self, *keys): 342 | """ 343 | Add sorting information to the search request. If called without 344 | arguments it will remove all sort requirements. Otherwise it will 345 | replace them. Acceptable arguments are:: 346 | 347 | 'some.field' 348 | '-some.other.field' 349 | {'different.field': {'any': 'dict'}} 350 | 351 | so for example:: 352 | 353 | s = Search().sort( 354 | 'category', 355 | '-title', 356 | {"price" : {"order" : "asc", "mode" : "avg"}} 357 | ) 358 | 359 | will sort by ``category``, ``title`` (in descending order) and 360 | ``price`` in ascending order using the ``avg`` mode. 361 | 362 | The API returns a copy of the Search object and can thus be chained. 363 | """ 364 | s = self._clone() 365 | s._sort = [] 366 | for k in keys: 367 | if isinstance(k, string_types) and k.startswith('-'): 368 | k = {k[1:]: {"order": "desc"}} 369 | s._sort.append(k) 370 | return s 371 | 372 | def highlight_options(self, **kwargs): 373 | """ 374 | Update the global highlighting options used for this request. For 375 | example:: 376 | 377 | s = Search() 378 | s = s.highlight_options(order='score') 379 | """ 380 | s = self._clone() 381 | s._highlight_opts.update(kwargs) 382 | return s 383 | 384 | def highlight(self, *fields, **kwargs): 385 | """ 386 | Request highliting of some fields. All keyword arguments passed in will be 387 | used as parameters. Example:: 388 | 389 | Search().highlight('title', 'body', fragment_size=50) 390 | 391 | will produce the equivalent of:: 392 | 393 | { 394 | "highlight": { 395 | "fields": { 396 | "body": {"fragment_size": 50}, 397 | "title": {"fragment_size": 50} 398 | } 399 | } 400 | } 401 | 402 | """ 403 | s = self._clone() 404 | for f in fields: 405 | s._highlight[f] = kwargs 406 | return s 407 | 408 | def suggest(self, name, text, **kwargs): 409 | """ 410 | Add a suggestions request to the search. 411 | 412 | :arg name: name of the suggestion 413 | :arg text: text to suggest on 414 | 415 | All keyword arguments will be added to the suggestions body. For example:: 416 | 417 | s = Search() 418 | s = s.suggest('suggestion-1', 'Elasticserach', term={'field': 'body'}) 419 | """ 420 | s = self._clone() 421 | s._suggest[name] = {'text': text} 422 | s._suggest[name].update(kwargs) 423 | return s 424 | 425 | def index(self, *index): 426 | """ 427 | Set the index for the search. If called empty it will rmove all information. 428 | 429 | Example: 430 | 431 | s = Search() 432 | s = s.index('twitter-2015.01.01', 'twitter-2015.01.02') 433 | """ 434 | # .index() resets 435 | s = self._clone() 436 | if not index: 437 | s._index = None 438 | else: 439 | s._index = (self._index or []) + list(index) 440 | return s 441 | 442 | def _add_doc_type(self, doc_type): 443 | if hasattr(doc_type, '_doc_type'): 444 | self._doc_type_map[doc_type._doc_type.name] = doc_type.from_es 445 | doc_type = doc_type._doc_type.name 446 | self._doc_type.append(doc_type) 447 | 448 | def doc_type(self, *doc_type, **kwargs): 449 | """ 450 | Set the type to search through. You can supply a single value or 451 | multiple. Values can be strings or subclasses of ``DocType``. 452 | 453 | You can also pass in any keyword arguments, mapping a doc_type to a 454 | callback that should be used instead of the Result class. 455 | 456 | If no doc_type is supplied any information stored on the instance will 457 | be erased. 458 | 459 | Example: 460 | 461 | s = Search().doc_type('product', 'store', User, custom=my_callback) 462 | """ 463 | # .doc_type() resets 464 | s = self._clone() 465 | if not doc_type and not kwargs: 466 | s._doc_type = [] 467 | s._doc_type_map = {} 468 | else: 469 | for dt in doc_type: 470 | s._add_doc_type(dt) 471 | s._doc_type.extend(kwargs.keys()) 472 | s._doc_type_map.update(kwargs) 473 | return s 474 | 475 | def to_dict(self, count=False, **kwargs): 476 | """ 477 | Serialize the search into the dictionary that will be sent over as the 478 | request's body. 479 | 480 | :arg count: a flag to specify we are interested in a body for count - 481 | no aggregations, no pagination bounds etc. 482 | 483 | All additional keyword arguments will be included into the dictionary. 484 | """ 485 | if self.filter: 486 | d = { 487 | "query": { 488 | "filtered": { 489 | "query": self.query.to_dict(), 490 | "filter": self.filter.to_dict() 491 | } 492 | } 493 | } 494 | else: 495 | d = {"query": self.query.to_dict()} 496 | 497 | if self.post_filter: 498 | d['post_filter'] = self.post_filter.to_dict() 499 | 500 | # count request doesn't care for sorting and other things 501 | if not count: 502 | if self.aggs.aggs: 503 | d.update(self.aggs.to_dict()) 504 | 505 | if self._sort: 506 | d['sort'] = self._sort 507 | 508 | d.update(self._extra) 509 | 510 | if self._fields is not None: 511 | d['fields'] = self._fields 512 | 513 | if self._partial_fields: 514 | d['partial_fields'] = self._partial_fields 515 | 516 | if self._highlight: 517 | d['highlight'] = {'fields': self._highlight} 518 | d['highlight'].update(self._highlight_opts) 519 | 520 | if self._suggest: 521 | d['suggest'] = self._suggest 522 | 523 | if self._script_fields: 524 | d['script_fields'] = self._script_fields 525 | 526 | d.update(kwargs) 527 | return d 528 | 529 | def using(self, client): 530 | """ 531 | Associate the search request with an elasticsearch client. A fresh copy 532 | will be returned with current instance remaining unchanged. 533 | 534 | :arg client: an instance of ``elasticsearch.Elasticsearch`` to use or 535 | an alias to look up in ``elasticsearch_dsl.connections`` 536 | 537 | """ 538 | s = self._clone() 539 | s._using = client 540 | return s 541 | 542 | def count(self): 543 | """ 544 | Return the number of hits matching the query and filters. Note that 545 | only the actual number is returned. 546 | """ 547 | es = connections.get_connection(self._using) 548 | 549 | d = self.to_dict(count=True) 550 | return d 551 | 552 | def execute(self): 553 | """ 554 | Execute the search and return an instance of ``Response`` wrapping all 555 | the data. 556 | 557 | :arg response_class: optional subclass of ``Response`` to use instead. 558 | """ 559 | return self.to_dict() 560 | #return response_class( 561 | # es.search( 562 | # index=self._index, 563 | # doc_type=self._doc_type, 564 | # body=self.to_dict(), 565 | # **self._params 566 | # ), 567 | # callbacks=self._doc_type_map 568 | #) 569 | 570 | def scan(self): 571 | """ 572 | Turn the search into a scan search and return a generator that will 573 | iterate over all the documents matching the query. 574 | 575 | Use ``params`` method to specify any additional arguments you with to 576 | pass to the underlying ``scan`` helper from ``elasticsearch-py`` - 577 | http://elasticsearch-py.readthedocs.org/en/master/helpers.html#elasticsearch.helpers.scan 578 | 579 | """ 580 | es = connections.get_connection(self._using) 581 | 582 | for hit in scan( 583 | es, 584 | query=self.to_dict(), 585 | index=self._index, 586 | doc_type=self._doc_type, 587 | **self._params 588 | ): 589 | yield self._doc_type_map.get(hit['_type'], Result)(hit) 590 | 591 | -------------------------------------------------------------------------------- /elasticsearch_parse/serializer.py: -------------------------------------------------------------------------------- 1 | from elasticsearch.serializer import JSONSerializer 2 | 3 | from .utils import AttrDict, AttrList 4 | 5 | class AttrJSONSerializer(JSONSerializer): 6 | def default(self, data): 7 | if isinstance(data, AttrDict): 8 | return data._d_ 9 | if isinstance(data, AttrList): 10 | return data._l_ 11 | return super(AttrJSONSerializer, self).default(data) 12 | 13 | serializer = AttrJSONSerializer() 14 | -------------------------------------------------------------------------------- /elasticsearch_parse/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from six import iteritems, add_metaclass 4 | from six.moves import map 5 | 6 | from .exceptions import UnknownDslObject, ValidationException 7 | 8 | SKIP_VALUES = ('', None) 9 | 10 | def _wrap(val, obj_wrapper=None): 11 | if isinstance(val, dict): 12 | return AttrDict(val) if obj_wrapper is None else obj_wrapper(val) 13 | if isinstance(val, list): 14 | return AttrList(val) 15 | return val 16 | 17 | def _make_dsl_class(base, name, params_def=None, suffix=''): 18 | """ 19 | Generate a DSL class based on the name of the DSL object and it's parameters 20 | """ 21 | attrs = {'name': name} 22 | if params_def: 23 | attrs['_param_defs'] = params_def 24 | cls_name = str(''.join(s.title() for s in name.split('_')) + suffix) 25 | return type(cls_name, (base, ), attrs) 26 | 27 | class AttrList(object): 28 | def __init__(self, l, obj_wrapper=None): 29 | # make iteables into lists 30 | if not isinstance(l, list): 31 | l = list(l) 32 | self._l_ = l 33 | self._obj_wrapper = obj_wrapper 34 | 35 | def __repr__(self): 36 | return repr(self._l_) 37 | 38 | def __eq__(self, other): 39 | if isinstance(other, AttrList): 40 | return other._l_ == self._l_ 41 | # make sure we still equal to a dict with the same data 42 | return other == self._l_ 43 | 44 | def __getitem__(self, k): 45 | l = self._l_[k] 46 | if isinstance(k, slice): 47 | return AttrList(l) 48 | return _wrap(l, self._obj_wrapper) 49 | 50 | def __setitem__(self, k, value): 51 | self._l_[k] = value 52 | 53 | def __iter__(self): 54 | return map(lambda i: _wrap(i, self._obj_wrapper), self._l_) 55 | 56 | def __len__(self): 57 | return len(self._l_) 58 | 59 | def __nonzero__(self): 60 | return bool(self._l_) 61 | __bool__ = __nonzero__ 62 | 63 | def __getattr__(self, name): 64 | return getattr(self._l_, name) 65 | 66 | 67 | class AttrDict(object): 68 | """ 69 | Helper class to provide attribute like access (read and write) to 70 | dictionaries. Used to provide a convenient way to access both results and 71 | nested dsl dicts. 72 | """ 73 | def __init__(self, d): 74 | # assign the inner dict manually to prevent __setattr__ from firing 75 | super(AttrDict, self).__setattr__('_d_', d) 76 | 77 | def __contains__(self, key): 78 | return key in self._d_ 79 | 80 | def __nonzero__(self): 81 | return bool(self._d_) 82 | __bool__ = __nonzero__ 83 | 84 | def __dir__(self): 85 | # introspection for auto-complete in IPython etc 86 | return list(self._d_.keys()) 87 | 88 | def __eq__(self, other): 89 | if isinstance(other, AttrDict): 90 | return other._d_ == self._d_ 91 | # make sure we still equal to a dict with the same data 92 | return other == self._d_ 93 | 94 | def __repr__(self): 95 | r = repr(self._d_) 96 | if len(r) > 60: 97 | r = r[:60] + '...}' 98 | return r 99 | 100 | def __getattr__(self, attr_name): 101 | try: 102 | return _wrap(self._d_[attr_name]) 103 | except KeyError: 104 | raise AttributeError( 105 | '%r object has no attribute %r' % (self.__class__.__name__, attr_name)) 106 | 107 | def __delattr__(self, attr_name): 108 | try: 109 | del self._d_[attr_name] 110 | except KeyError: 111 | raise AttributeError( 112 | '%r object has no attribute %r' % (self.__class__.__name__, attr_name)) 113 | 114 | def __getitem__(self, key): 115 | return _wrap(self._d_[key]) 116 | 117 | def __setitem__(self, key, value): 118 | self._d_[key] = value 119 | 120 | def __delitem__(self, key): 121 | del self._d_[key] 122 | 123 | def __setattr__(self, name, value): 124 | if name in self._d_ or not hasattr(self.__class__, name): 125 | self._d_[name] = value 126 | else: 127 | # there is an attribute on the class (could be property, ..) - don't add it as field 128 | super(AttrDict, self).__setattr__(name, value) 129 | 130 | def __iter__(self): 131 | return iter(self._d_) 132 | 133 | def to_dict(self): 134 | return self._d_ 135 | 136 | 137 | class DslMeta(type): 138 | """ 139 | Base Metaclass for DslBase subclasses that builds a registry of all classes 140 | for given DslBase subclass (== all the query types for the Query subclass 141 | of DslBase). 142 | 143 | It then uses the information from that registry (as well as `name` and 144 | `shortcut` attributes from the base class) to construct any subclass based 145 | on it's name. 146 | 147 | For typical use see `QueryMeta` and `Query` in `elasticsearch_dsl.query`. 148 | """ 149 | _types = {} 150 | def __init__(cls, name, bases, attrs): 151 | super(DslMeta, cls).__init__(name, bases, attrs) 152 | # skip for DslBase 153 | if not hasattr(cls, '_type_shortcut'): 154 | return 155 | if cls.name is None: 156 | # abstract base class, register it's shortcut 157 | cls._types[cls._type_name] = cls._type_shortcut 158 | # and create a registry for subclasses 159 | if not hasattr(cls, '_classes'): 160 | cls._classes = {} 161 | elif cls.name not in cls._classes: 162 | # normal class, register it 163 | cls._classes[cls.name] = cls 164 | 165 | @classmethod 166 | def get_dsl_type(cls, name): 167 | try: 168 | return cls._types[name] 169 | except KeyError: 170 | raise UnknownDslObject('DSL type %s does not exist.' % name) 171 | 172 | 173 | @add_metaclass(DslMeta) 174 | class DslBase(object): 175 | """ 176 | Base class for all DSL objects - queries, filters, aggregations etc. Wraps 177 | a dictionary representing the object's json. 178 | 179 | Provides several feature: 180 | - attribute access to the wrapped dictionary (.field instead of ['field']) 181 | - _clone method returning a deep copy of self 182 | - to_dict method to serialize into dict (to be sent via elasticsearch-py) 183 | - basic logical operators (&, | and ~) using a Bool(Filter|Query) TODO: 184 | move into a class specific for Query/Filter 185 | - respects the definiton of the class and (de)serializes it's 186 | attributes based on the `_param_defs` definition (for example turning 187 | all values in the `must` attribute into Query objects) 188 | """ 189 | _param_defs = {} 190 | 191 | @classmethod 192 | def get_dsl_class(cls, name): 193 | try: 194 | return cls._classes[name] 195 | except KeyError: 196 | raise UnknownDslObject('DSL class `%s` does not exist in %s.' % (name, cls._type_name)) 197 | 198 | def __init__(self, **params): 199 | self._params = {} 200 | for pname, pvalue in iteritems(params): 201 | if '__' in pname: 202 | pname = pname.replace('__', '.') 203 | self._setattr(pname, pvalue) 204 | 205 | def _repr_params(self): 206 | """ Produce a repr of all our parameters to be used in __repr__. """ 207 | return ', '.join( 208 | '%s=%r' % (n.replace('.', '__'), v) 209 | for (n, v) in sorted(iteritems(self._params)) 210 | # make sure we don't include empty typed params 211 | if 'type' not in self._param_defs.get(n, {}) or v 212 | ) 213 | 214 | def __repr__(self): 215 | return '%s(%s)' % ( 216 | self.__class__.__name__, 217 | self._repr_params() 218 | ) 219 | 220 | def __eq__(self, other): 221 | return isinstance(other, self.__class__) and other.to_dict() == self.to_dict() 222 | 223 | def __ne__(self, other): 224 | return not self == other 225 | 226 | def __setattr__(self, name, value): 227 | if name.startswith('_'): 228 | return super(DslBase, self).__setattr__(name, value) 229 | return self._setattr(name, value) 230 | 231 | def _setattr(self, name, value): 232 | # if this attribute has special type assigned to it... 233 | if name in self._param_defs: 234 | pinfo = self._param_defs[name] 235 | 236 | if 'type' in pinfo: 237 | # get the shortcut used to construct this type (query.Q, aggs.A, etc) 238 | shortcut = self.__class__.get_dsl_type(pinfo['type']) 239 | if pinfo.get('multi'): 240 | value = list(map(shortcut, value)) 241 | 242 | # dict(name -> DslBase), make sure we pickup all the objs 243 | elif pinfo.get('hash'): 244 | value = dict((k, shortcut(v)) for (k, v) in iteritems(value)) 245 | 246 | # single value object, just convert 247 | else: 248 | value = shortcut(value) 249 | self._params[name] = value 250 | 251 | def __getattr__(self, name): 252 | if name.startswith('_'): 253 | raise AttributeError( 254 | '%r object has no attribute %r' % (self.__class__.__name__, name)) 255 | 256 | value = None 257 | try: 258 | value = self._params[name] 259 | except KeyError: 260 | # compound types should never throw AttributeError and return empty 261 | # container instead 262 | if name in self._param_defs: 263 | pinfo = self._param_defs[name] 264 | if pinfo.get('multi'): 265 | value = self._params.setdefault(name, []) 266 | elif pinfo.get('hash'): 267 | value = self._params.setdefault(name, {}) 268 | if value is None: 269 | raise AttributeError( 270 | '%r object has no attribute %r' % (self.__class__.__name__, name)) 271 | 272 | # wrap nested dicts in AttrDict for convenient access 273 | if isinstance(value, dict): 274 | return AttrDict(value) 275 | return value 276 | 277 | def to_dict(self): 278 | """ 279 | Serialize the DSL object to plain dict 280 | """ 281 | d = {} 282 | for pname, value in iteritems(self._params): 283 | pinfo = self._param_defs.get(pname) 284 | 285 | # typed param 286 | if pinfo and 'type' in pinfo: 287 | # don't serialize empty lists and dicts for typed fields 288 | if value in ({}, []): 289 | continue 290 | 291 | # multi-values are serialized as list of dicts 292 | if pinfo.get('multi'): 293 | value = list(map(lambda x: x.to_dict(), value)) 294 | 295 | # squash all the hash values into one dict 296 | elif pinfo.get('hash'): 297 | value = dict((k, v.to_dict()) for k, v in iteritems(value)) 298 | 299 | # serialize single values 300 | else: 301 | value = value.to_dict() 302 | 303 | # serialize anything with to_dict method 304 | elif hasattr(value, 'to_dict'): 305 | value = value.to_dict() 306 | 307 | d[pname] = value 308 | return {self.name: d} 309 | 310 | def _clone(self): 311 | return self._type_shortcut(self.to_dict()) 312 | 313 | def __add__(self, other): 314 | # make sure we give queries that know how to combine themselves 315 | # preference 316 | if hasattr(other, '__radd__'): 317 | return other.__radd__(self) 318 | return self._bool(must=[self, other]) 319 | 320 | def __invert__(self): 321 | return self._bool(must_not=[self]) 322 | 323 | def __or__(self, other): 324 | # make sure we give queries that know how to combine themselves 325 | # preference 326 | if hasattr(other, '__ror__'): 327 | return other.__ror__(self) 328 | return self._bool(should=[self, other]) 329 | 330 | def __and__(self, other): 331 | # make sure we give queries that know how to combine themselves 332 | # preference 333 | if hasattr(other, '__rand__'): 334 | return other.__rand__(self) 335 | return self._bool(must=[self, other]) 336 | 337 | 338 | class BoolMixin(object): 339 | """ 340 | Mixin containing all the operator overrides for Bool queries and filters. 341 | 342 | Except for and where should behavior differs 343 | """ 344 | def __add__(self, other): 345 | q = self._clone() 346 | if isinstance(other, self.__class__): 347 | q.must += other.must 348 | q.should += other.should 349 | q.must_not += other.must_not 350 | else: 351 | q.must.append(other) 352 | return q 353 | __radd__ = __add__ 354 | 355 | def __or__(self, other): 356 | if not (self.must or self.must_not): 357 | # TODO: if only 1 in must or should, append the query instead of other 358 | q = self._clone() 359 | q.should.append(other) 360 | return q 361 | 362 | elif isinstance(other, self.__class__) and not (other.must or other.must_not): 363 | # TODO: if only 1 in must or should, append the query instead of self 364 | q = other._clone() 365 | q.should.append(self) 366 | return q 367 | 368 | return self.__class__(should=[self, other]) 369 | __ror__ = __or__ 370 | 371 | def __invert__(self): 372 | # special case for single negated query 373 | if not (self.must or self.should) and len(self.must_not) == 1: 374 | return self.must_not[0]._clone() 375 | 376 | # bol without should, just flip must and must_not 377 | elif not self.should: 378 | q = self._clone() 379 | q.must, q.must_not = q.must_not, q.must 380 | return q 381 | 382 | # TODO: should -> must_not.append(self.__class__(should=self.should)) ?? 383 | # queries with should just invert normally 384 | return super(BoolMixin, self).__invert__() 385 | 386 | 387 | class ObjectBase(AttrDict): 388 | def __init__(self, **kwargs): 389 | m = self._doc_type.mapping 390 | for k in m: 391 | if k in kwargs and m[k]._coerce: 392 | kwargs[k] = m[k].to_python(kwargs[k]) 393 | super(ObjectBase, self).__init__(kwargs) 394 | 395 | def __getattr__(self, name): 396 | try: 397 | return super(ObjectBase, self).__getattr__(name) 398 | except AttributeError: 399 | if name in self._doc_type.mapping: 400 | f = self._doc_type.mapping[name] 401 | if hasattr(f, 'empty'): 402 | value = f.empty() 403 | if value not in SKIP_VALUES: 404 | setattr(self, name, value) 405 | value = getattr(self, name) 406 | return value 407 | raise 408 | 409 | def __setattr__(self, name, value): 410 | if name in self._doc_type.mapping: 411 | value = self._doc_type.mapping[name].to_python(value) 412 | super(ObjectBase, self).__setattr__(name, value) 413 | 414 | def to_dict(self): 415 | out = {} 416 | for k, v in iteritems(self._d_): 417 | if isinstance(v, (AttrList, list, tuple)): 418 | v = [i.to_dict() if hasattr(i, 'to_dict') else i for i in v] 419 | else: 420 | v = v.to_dict() if hasattr(v, 'to_dict') else v 421 | 422 | # don't serialize empty values 423 | # careful not to include numeric zeros 424 | if v in ([], {}, None): 425 | continue 426 | 427 | out[k] = v 428 | return out 429 | 430 | def clean_fields(self): 431 | errors = {} 432 | for name in self._doc_type.mapping: 433 | field = self._doc_type.mapping[name] 434 | data = self._d_.get(name, None) 435 | try: 436 | # save the cleaned value 437 | self._d_[name] = field.clean(data) 438 | except ValidationException as e: 439 | errors.setdefault(name, []).append(e) 440 | 441 | if errors: 442 | raise ValidationException(errors) 443 | 444 | def clean(self): 445 | pass 446 | 447 | def full_clean(self): 448 | self.clean_fields() 449 | self.clean() 450 | 451 | def merge(data, new_data): 452 | if not (isinstance(data, (AttrDict, dict)) 453 | and isinstance(new_data, (AttrDict, dict))): 454 | raise ValueError('You can only merge two dicts! Got %r and %r instead.' % (data, new_data)) 455 | 456 | for key, value in iteritems(new_data): 457 | if key in data and isinstance(data[key], (AttrDict, dict)): 458 | merge(data[key], value) 459 | else: 460 | data[key] = value 461 | 462 | 463 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | #from distutils.core import setup, Command 4 | from setuptools import setup,find_packages 5 | import os 6 | import os.path 7 | 8 | def read(fname): 9 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 10 | 11 | setup( 12 | name='elasticsearch_parse', 13 | version='2.0', 14 | description='elasticsearch parse', 15 | long_description=open('README.md').read(), 16 | keywords = ["elasticsearch_parse","fengyun"], 17 | url='http://xiaorui.cc', 18 | author='ruifengyun', 19 | author_email='rfyiamcool@163.com', 20 | install_requires=['elasticsearch-dsl'], 21 | packages=['elasticsearch_parse'], 22 | license = "MIT", 23 | classifiers = [ 24 | 'Development Status :: 2 - Pre-Alpha', 25 | 'Intended Audience :: Developers', 26 | 'License :: OSI Approved :: MIT License', 27 | 'Programming Language :: Python :: 2.6', 28 | 'Programming Language :: Python :: 2.7', 29 | 'Programming Language :: Python :: 3.0', 30 | 'Topic :: Software Development :: Libraries :: Python Modules', 31 | ] 32 | ) 33 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | from elasticsearch_parse import Search, Q 2 | 3 | 4 | s = Search(index="my-index") \ 5 | .filter("term", blog="xiaorui.cc") \ 6 | .query("match", author="ruifengyun") \ 7 | .query(~Q("match", face="good")) 8 | 9 | s.aggs.bucket('per_tag', 'terms', field='tags') 10 | 11 | print s.execute() 12 | --------------------------------------------------------------------------------