├── .gitignore
├── LICENSE.txt
├── README.md
├── elasticsearch_parse
    ├── __init__.py
    ├── aggs.py
    ├── exceptions.py
    ├── faceted_search.py
    ├── filter.py
    ├── index.py
    ├── query.py
    ├── search.py
    ├── serializer.py
    └── utils.py
├── setup.py
└── test.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | .*.swp
 2 | *~
 3 | *.py[co]
 4 | .coverage
 5 | *.egg-info
 6 | dist
 7 | build
 8 | *.egg
 9 | coverage.xml
10 | junit.xml
11 | test_elasticsearch_dsl/htmlcov
12 | docs/_build
13 | .cache
14 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 rui fengyun
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # elasticsearch_parse
  2 | =================
  3 | 
  4 | 首先注明下本项目是由来,学习Elasticsearch本身是有些痛苦的, 我们可以熟练的编写SQL 、 Mongodb语句，但对于Elasticsearch Dsl语法总是蒙头，一个劲的翻找笔记...  
  5 | 简单说 Elasticsearch Parse可以让你更容易的上手ES, 他的功能主要是语句映射, 有点ORM的意思... 
  6 | 
  7 | 此项目继承于`elasticsearch-dsl-py`,本来是想fork子项目,开发好后提交pull request, 但是想到我这语法映射的功能, 对于官方来说不是很稀罕,索性直接砍掉了 70% 代码,然后中间又加了一些佐料, 最终只是为了DSL语句映射功能 ! 
  8 | 
  9 | 模块安装方法:
 10 | ```
 11 | pip install elasticsearch_parse
 12 | ```
 13 | 
 14 | 下面我们来体验下封装后的es语法解释器.
 15 | 
 16 | 首先用原始的DSL语法操作,一眼望去会有些麻烦, 手写起来会更麻烦.  
 17 | ```
 18 | from elasticsearch import Elasticsearch
 19 | client = Elasticsearch()
 20 | 
 21 | response = client.search(
 22 |     index="my-index",
 23 |     body={
 24 |       "query": {
 25 |         "filtered": {
 26 |           "query": {
 27 |             "bool": {
 28 |               "must": [{"match": {"title": "python"}}],
 29 |               "must_not": [{"match": {"description": "beta"}}]
 30 |             }
 31 |           },
 32 |           "filter": {"term": {"category": "search"}}
 33 |         }
 34 |       },
 35 |       "aggs" : {
 36 |         "per_tag": {
 37 |           "terms": {"field": "tags"},
 38 |           "aggs": {
 39 |             "max_lines": {"max": {"field": "lines"}}
 40 |           }
 41 |         }
 42 |       }
 43 |     }
 44 | )
 45 | 
 46 | ```
 47 | 
 48 | 下面是使用Elasticsearch_parse的用法,要多简单就多简单
 49 | 
 50 | ```
 51 | from elasticsearch_parse import Search, Q
 52 | 
 53 | s = Search(index="my-index") \
 54 |     .filter("term", blog="xiaorui.cc") \
 55 |     .query("match", author="ruifengyun")   \
 56 |     .query(~Q("match", face="good"))
 57 | 
 58 | s.aggs.bucket('per_tag', 'terms', field='tags')
 59 | 
 60 | response = s.execute()
 61 | ```
 62 | 我们得到的结果是:
 63 | ```
 64 | {
 65 |     "query": {
 66 |         "filtered": {
 67 |             "filter": {
 68 |                 "term": {
 69 |                     "blog": "xiaorui.cc"
 70 |                 }
 71 |             },
 72 |             "query": {
 73 |                 "bool": {
 74 |                     "must_not": [
 75 |                         {
 76 |                             "match": {
 77 |                                 "face": "good"
 78 |                             }
 79 |                         }
 80 |                     ],
 81 |                     "must": [
 82 |                         {
 83 |                             "match": {
 84 |                                 "author": "ruifengyun"
 85 |                             }
 86 |                         }
 87 |                     ]
 88 |                 }
 89 |             }
 90 |         }
 91 |     },
 92 |     "aggs": {
 93 |         "per_tag": {
 94 |             "terms": {
 95 |                 "field": "tags"
 96 |             }
 97 |         }
 98 |     }
 99 | }
100 | ```
101 | 
102 | 我们把语法的用法给过一遍.
103 | ```
104 | s = search.Search()
105 | ```
106 | 
107 | 通过match查询,f字段值为55的数据
108 | ```
109 | s.query('match', f=55)
110 | ```
111 | 
112 | 时间范围
113 | ```
114 | s.query('range', ** {'@timestamp': {'lt': 'now'}})
115 | ```
116 | 
117 | 外围的size的控制  
118 | ```
119 | s = s.query('match', f=42)
120 | s[3].to_dict() {'query': {'match_all': {}}, 'from': 3, 'size': 1}
121 | ```
122 | 
123 | ```
124 | assert s.to_dict(size=10) == {"query": {"match": {'f': 42}}, "size": 10}
125 | ```
126 | 
127 | 嵌入内部size控制
128 | ```
129 | s = search.Search.from_dict({"size": 5})
130 | assert {
131 |      "query": {"match_all": {}},
132 |      "size": 5
133 | } == s.to_dict()
134 | ```
135 | 
136 | 对于aggs的聚合的使用
137 | ```
138 | s = s.query('match', f=42)
139 | assert {"query": {"match": {'f': 42}}} == s.to_dict()
140 | assert {"query": {"match": {'f': 42}}, "size": 10} == s.to_dict(size=10)
141 | s.aggs.bucket('per_tag', 'terms', field='f').metric('max_score', 'max', field='score')
142 | d = {
143 |     'aggs': {
144 |         'per_tag': {
145 |             'terms': {'field': 'f'},
146 |             'aggs': {'max_score': {'max': {'field': 'score'}}}
147 |          }
148 |     }
149 | ```
150 | 下面在介绍下aggs的用法:
151 | 
152 | 取出某个字段最大的数据
153 | ```
154 | s.aggs.Max(field='score')
155 | ```
156 | 
157 | 按照条件分组聚合
158 | ```
159 | s.aggs.A('terms', field='tags', aggs={'max_score': max_score})
160 | ```
161 | 
162 | 分组聚合,Elasticsearch会帮你针对count排序
163 | ```
164 | s.aggs.bucket('per_tag', 'terms', field='tags')
165 | ```
166 | 
167 | 
168 | ...
169 | 
170 | 
171 | 


--------------------------------------------------------------------------------
/elasticsearch_parse/__init__.py:
--------------------------------------------------------------------------------
 1 | from .query import Q
 2 | from .filter import F
 3 | from .aggs import A
 4 | from .search import Search
 5 | from .index import Index
 6 | from .faceted_search import * 
 7 | 
 8 | VERSION = (0, 0, 9)
 9 | __version__ = VERSION
10 | __versionstr__ = '.'.join(map(str, VERSION))
11 | 


--------------------------------------------------------------------------------
/elasticsearch_parse/aggs.py:
--------------------------------------------------------------------------------
  1 | from .utils import DslBase, _make_dsl_class
  2 | 
  3 | __all__ = [
  4 |     'A', 'Agg', 'Filter', 'Bucket', 'Children', 'DateHistogram', 'Filters',
  5 |     'GeoDistance', 'GeohashGrid', 'Global', 'Histogram', 'Iprange', 'Missing',
  6 |     'Nested', 'Range', 'ReverseNested', 'SignificantTerms', 'Terms', 'Avg',
  7 |     'Cardinality', 'ExtendedStats', 'GeoBounds', 'Max', 'Min', 'Percentiles',
  8 |     'PercenileRanks', 'ScriptedMetric', 'Stats', 'Sum', 'TopHits', 'ValueCount'
  9 | ]
 10 | 
 11 | 
 12 | def A(name_or_agg, filter=None, **params):
 13 |     if filter is not None:
 14 |         if name_or_agg != 'filter':
 15 |             raise ValueError("Aggregation %r doesn't accept positional argument 'filter'." % name_or_agg)
 16 |         params['filter'] = filter
 17 | 
 18 |     # {"terms": {"field": "tags"}, "aggs": {...}}
 19 |     if isinstance(name_or_agg, dict):
 20 |         if params:
 21 |             raise ValueError('A() cannot accept parameters when passing in a dict.')
 22 |         # copy to avoid modifying in-place
 23 |         agg = name_or_agg.copy()
 24 |         # pop out nested aggs
 25 |         aggs = agg.pop('aggs', None)
 26 |         # should be {"terms": {"fied": "tags"}}
 27 |         if len(agg) != 1:
 28 |             raise ValueError('A() can only accept dict with an aggregation ({"terms": {...}}). '
 29 |                  'Instead it got (%r)' % name_or_agg)
 30 |         agg_type, params = agg.popitem()
 31 |         if aggs:
 32 |             params = params.copy()
 33 |             params['aggs'] = aggs
 34 |         return Agg.get_dsl_class(agg_type)(**params)
 35 | 
 36 |     # Terms(...) just return the nested agg
 37 |     elif isinstance(name_or_agg, Agg):
 38 |         if params:
 39 |             raise ValueError('A() cannot accept parameters when passing in an Agg object.')
 40 |         return name_or_agg
 41 | 
 42 |     # "terms", field="tags"
 43 |     return Agg.get_dsl_class(name_or_agg)(**params)
 44 | 
 45 | class Agg(DslBase):
 46 |     _type_name = 'agg'
 47 |     _type_shortcut = staticmethod(A)
 48 |     name = None
 49 | 
 50 | class AggBase(object):
 51 |     _param_defs = {
 52 |         'aggs': {'type': 'agg', 'hash': True},
 53 |     }
 54 |     def __getitem__(self, agg_name):
 55 |         agg = self._params.setdefault('aggs', {})[agg_name] # propagate KeyError
 56 | 
 57 |         # make sure we're not mutating a shared state - whenever accessing a
 58 |         # bucket, return a shallow copy of it to be safe
 59 |         if isinstance(agg, Bucket):
 60 |             agg = A(agg.name, **agg._params)
 61 |             # be sure to store the copy so any modifications to it will affect us
 62 |             self._params['aggs'][agg_name] = agg
 63 | 
 64 |         return agg
 65 | 
 66 |     def __setitem__(self, agg_name, agg):
 67 |         self.aggs[agg_name] = A(agg)
 68 | 
 69 |     def _agg(self, bucket, name, agg_type, *args,**params):
 70 |         agg = self[name] = A(agg_type, *args, **params)
 71 | 
 72 |         # For chaining - when creating new buckets return them...
 73 |         if bucket:
 74 |             return agg
 75 |         # otherwise return self._base so we can keep chaining
 76 |         else:
 77 |             return self._base
 78 | 
 79 |     def metric(self, name, agg_type, *args, **params):
 80 |         return self._agg(False, name, agg_type, *args, **params)
 81 | 
 82 |     def bucket(self, name, agg_type, *args, **params):
 83 |         return self._agg(True, name, agg_type, *args, **params)
 84 | 
 85 | 
 86 | class Bucket(AggBase, Agg):
 87 |     def __init__(self, **params):
 88 |         super(Bucket, self).__init__(**params)
 89 |         # remember self for chaining
 90 |         self._base = self
 91 | 
 92 |     def to_dict(self):
 93 |         d = super(AggBase, self).to_dict()
 94 |         if 'aggs' in d[self.name]:
 95 |             d['aggs'] = d[self.name].pop('aggs')
 96 |         return d
 97 | 
 98 | class Filter(Bucket):
 99 |     name = 'filter'
100 |     _param_defs = {
101 |         'filter': {'type': 'filter'},
102 |         'aggs': {'type': 'agg', 'hash': True},
103 |     }
104 | 
105 |     def __init__(self, filter=None, **params):
106 |         if filter is not None:
107 |             params['filter'] = filter
108 |         super(Filter, self).__init__(**params)
109 | 
110 |     def to_dict(self):
111 |         d = super(Filter, self).to_dict()
112 |         d[self.name].update(d[self.name].pop('filter', {}))
113 |         return d
114 | 
115 | AGGS = (
116 |     (Bucket, 'children', None),
117 |     (Bucket, 'date_histogram', None),
118 |     (Bucket, 'date_range', None),
119 |     (Bucket, 'filters', {'filters': {'type': 'filter', 'hash': True}}),
120 |     (Bucket, 'geo_distance', None),
121 |     (Bucket, 'geohash_grid', None),
122 |     (Bucket, 'global', None),
123 |     (Bucket, 'histogram', None),
124 |     (Bucket, 'iprange', None),
125 |     (Bucket, 'missing', None),
126 |     (Bucket, 'nested', None),
127 |     (Bucket, 'range', None),
128 |     (Bucket, 'reverse_nested', None),
129 |     (Bucket, 'significant_terms', None),
130 |     (Bucket, 'terms', None),
131 | 
132 |     (Agg, 'avg', None),
133 |     (Agg, 'cardinality', None),
134 |     (Agg, 'extended_stats', None),
135 |     (Agg, 'geo_bounds', None),
136 |     (Agg, 'max', None),
137 |     (Agg, 'min', None),
138 |     (Agg, 'percentiles', None),
139 |     (Agg, 'percentile_ranks', None),
140 |     (Agg, 'scripted_metric', None),
141 |     (Agg, 'stats', None),
142 |     (Agg, 'sum', None),
143 |     (Agg, 'top_hits', None),
144 |     (Agg, 'value_count', None),
145 | )
146 | 
147 | # generate the aggregation classes dynamicaly
148 | for base, fname, params_def in AGGS:
149 |     # don't override the params def from AggBase
150 |     if params_def:
151 |         params_def.update(AggBase._param_defs)
152 |     fclass = _make_dsl_class(base, fname, params_def)
153 |     globals()[fclass.__name__] = fclass
154 | 


--------------------------------------------------------------------------------
/elasticsearch_parse/exceptions.py:
--------------------------------------------------------------------------------
 1 | class ElasticsearchDslException(Exception):
 2 |     pass
 3 | 
 4 | 
 5 | class UnknownDslObject(ElasticsearchDslException):
 6 |     pass
 7 | 
 8 | 
 9 | class ValidationException(ValueError, ElasticsearchDslException):
10 |     pass
11 | 
12 | 
13 | class IllegalOperation(ElasticsearchDslException):
14 |     pass
15 | 


--------------------------------------------------------------------------------
/elasticsearch_parse/faceted_search.py:
--------------------------------------------------------------------------------
  1 | from datetime import timedelta, datetime
  2 | from six import iteritems, itervalues
  3 | from functools import partial
  4 | 
  5 | from .search import Search
  6 | from .filter import F
  7 | from .aggs import A
  8 | from .utils import AttrDict
  9 | from .result import Response
 10 | 
 11 | __all__ = ['FacetedSearch', 'HistogramFacet', 'TermsFacet', 'DateHistogramFacet', 'RangeFacet']
 12 | 
 13 | class Facet(object):
 14 |     """
 15 |     A facet on faceted search. Wraps and aggregation and provides functionality
 16 |     to create a filter for selected values and return a list of facet values
 17 |     from the result of the aggregation.
 18 |     """
 19 |     agg_type = None
 20 | 
 21 |     def __init__(self, **kwargs):
 22 |         self.filter_values = ()
 23 |         self._params = kwargs
 24 | 
 25 |     def get_aggregation(self):
 26 |         """
 27 |         Return the aggregation object.
 28 |         """
 29 |         return A(self.agg_type, **self._params)
 30 | 
 31 |     def add_filter(self, filter_values):
 32 |         """
 33 |         Construct a filter and remember the values for use in get_values.
 34 |         """
 35 |         self.filter_values = filter_values
 36 | 
 37 |         if not filter_values:
 38 |             return
 39 | 
 40 |         f = self.get_value_filter(filter_values[0])
 41 |         for v in filter_values[1:]:
 42 |             f |= self.get_value_filter(v)
 43 |         return f
 44 | 
 45 |     def get_value_filter(self, filter_value):
 46 |         """
 47 |         Construct a filter for an individual value
 48 |         """
 49 |         pass
 50 | 
 51 |     def is_filtered(self, key):
 52 |         """
 53 |         Is a filter active on the given key.
 54 |         """
 55 |         return key in self.filter_values
 56 | 
 57 |     def get_value(self, bucket):
 58 |         """
 59 |         return a value representing a bucket. Its key as default.
 60 |         """
 61 |         return bucket['key']
 62 | 
 63 |     def get_values(self, data):
 64 |         """
 65 |         Turn the raw bucket data into a list of tuples containing the key,
 66 |         number of documents and a flag indicating whether this value has been
 67 |         selected or not.
 68 |         """
 69 |         out = []
 70 |         for bucket in data:
 71 |             key = self.get_value(bucket)
 72 |             out.append((
 73 |                 key,
 74 |                 bucket['doc_count'],
 75 |                 self.is_filtered(key)
 76 |             ))
 77 |         return out
 78 | 
 79 | 
 80 | class TermsFacet(Facet):
 81 |     agg_type = 'terms'
 82 | 
 83 |     def add_filter(self, filter_values):
 84 |         """ Create a terms filter instead of bool containing term filters.  """
 85 |         self.filter_values = filter_values
 86 |         if filter_values:
 87 |             return F('terms', **{self._params['field']: filter_values})
 88 | 
 89 | 
 90 | class RangeFacet(Facet):
 91 |     agg_type = 'range'
 92 | 
 93 |     def _range_to_dict(self, range):
 94 |         key, range = range
 95 |         out = {'key': key}
 96 |         if range[0] is not None:
 97 |             out['from'] = range[0]
 98 |         if range[1] is not None:
 99 |             out['to'] = range[1]
100 |         return out
101 | 
102 |     def __init__(self, ranges, **kwargs):
103 |         super(RangeFacet, self).__init__(**kwargs)
104 |         self._params['ranges'] = list(map(self._range_to_dict, ranges))
105 |         self._params['keyed'] = False
106 |         self._ranges = dict(ranges)
107 | 
108 |     def get_value_filter(self, filter_value):
109 |         f, t = self._ranges[filter_value]
110 |         limits = {}
111 |         if f is not None:
112 |             limits['from'] = f
113 |         if t is not None:
114 |             limits['to'] = t
115 | 
116 |         return F('range', **{
117 |             self._params['field']: limits
118 |         })
119 | 
120 | class HistogramFacet(Facet):
121 |     agg_type = 'histogram'
122 | 
123 |     def get_value_filter(self, filter_value):
124 |         return F('range', **{
125 |             self._params['field']: {
126 |                 'gte': filter_value,
127 |                 'lt': filter_value + self._params['interval']
128 |             }
129 |         })
130 | 
131 | 
132 | class DateHistogramFacet(Facet):
133 |     agg_type = 'date_histogram'
134 | 
135 |     DATE_INTERVALS = {
136 |         'month': lambda d: (d+timedelta(days=32)).replace(day=1),
137 |         'week': lambda d: d+timedelta(days=7),
138 |         'day': lambda d: d+timedelta(days=1),
139 |         'hour': lambda d: d+timedelta(hours=1),
140 |     }
141 | 
142 |     def get_value(self, bucket):
143 |         return datetime.utcfromtimestamp(int(bucket['key']) / 1000)
144 | 
145 |     def get_value_filter(self, filter_value):
146 |         return F('range', **{
147 |             self._params['field']: {
148 |                 'gte': filter_value,
149 |                 'lt': self.DATE_INTERVALS[self._params['interval']](filter_value)
150 |             }
151 |         })
152 | 
153 | 
154 | class FacetedResponse(Response):
155 |     def __init__(self, search, *args, **kwargs):
156 |         super(FacetedResponse, self).__init__(*args, **kwargs)
157 |         super(AttrDict, self).__setattr__('_search', search)
158 | 
159 |     @property
160 |     def query_string(self):
161 |         return self._search._query
162 | 
163 |     @property
164 |     def facets(self):
165 |         if not hasattr(self, '_facets'):
166 |             super(AttrDict, self).__setattr__('_facets', AttrDict({}))
167 |             for name, facet in iteritems(self._search.facets):
168 |                 self._facets[name] = facet.get_values(self.aggregations['_filter_' + name][name]['buckets'])
169 |         return self._facets
170 | 
171 | 
172 | class FacetedSearch(object):
173 |     index = '_all'
174 |     doc_types = ['_all']
175 |     fields = ('*', )
176 |     facets = {}
177 | 
178 |     def __init__(self, query=None, filters={}):
179 |         self._query = query
180 |         self._filters = {}
181 |         for name, value in iteritems(filters):
182 |             self.add_filter(name, value)
183 | 
184 |     def add_filter(self, name, filter_values):
185 |         """
186 |         Add a filter for a facet.
187 |         """
188 |         # normalize the value into a list
189 |         if not isinstance(filter_values, (tuple, list)):
190 |             if filter_values in (None, ''):
191 |                 return
192 |             filter_values = [filter_values, ]
193 | 
194 |         # get the filter from the facet
195 |         f = self.facets[name].add_filter(filter_values)
196 |         if f is None:
197 |             return
198 | 
199 |         self._filters[name] = f
200 | 
201 |     def search(self):
202 |         """
203 |         Construct the Search object.
204 |         """
205 |         return Search(doc_type=self.doc_types, index=self.index)
206 | 
207 |     def query(self, search, query):
208 |         """
209 |         Add query part to ``search``.
210 | 
211 |         Override this if you wish to customize the query used.
212 |         """
213 |         if query:
214 |             return search.query('multi_match', fields=self.fields, query=query)
215 |         return search
216 | 
217 |     def aggregate(self, search):
218 |         """
219 |         Add aggregations representing the facets selected, including potential
220 |         filters.
221 |         """
222 |         for f, facet in iteritems(self.facets):
223 |             agg = facet.get_aggregation()
224 |             agg_filter = F('match_all')
225 |             for field, filter in iteritems(self._filters):
226 |                 if f == field:
227 |                     continue
228 |                 agg_filter &= filter
229 |             search.aggs.bucket(
230 |                 '_filter_' + f,
231 |                 'filter',
232 |                 filter=agg_filter
233 |             ).bucket(f, agg)
234 | 
235 |     def filter(self, search):
236 |         """
237 |         Add a ``post_filter`` to the search request narrowing the results based
238 |         on the facet filters.
239 |         """
240 |         post_filter = F('match_all')
241 |         for f in itervalues(self._filters):
242 |             post_filter &= f
243 |         return search.post_filter(post_filter)
244 | 
245 |     def highlight(self, search):
246 |         """
247 |         Add highlighting for all the fields
248 |         """
249 |         return search.highlight(*self.fields)
250 | 
251 |     def build_search(self):
252 |         """
253 |         Construct the ``Search`` object.
254 |         """
255 |         s = self.search()
256 |         s = self.query(s, self._query)
257 |         s = self.filter(s)
258 |         s = self.highlight(s)
259 |         self.aggregate(s)
260 |         return s
261 | 
262 |     def execute(self):
263 |         if not hasattr(self, '_response'):
264 |             s = self.build_search()
265 |             self._response = s.execute(response_class=partial(FacetedResponse, self))
266 | 
267 |         return self._response
268 | 
269 | 


--------------------------------------------------------------------------------
/elasticsearch_parse/filter.py:
--------------------------------------------------------------------------------
  1 | from .utils import DslBase, BoolMixin, _make_dsl_class
  2 | 
  3 | __all__ = [
  4 |     'F', 'And', 'AndOrFilter', 'Bool', 'EMPTY_FILTER', 'Exists', 'Filter',
  5 |     'Fquery', 'GeoBoundingBox', 'GeoDistance', 'GeoDistanceRange',
  6 |     'GeoPolygon', 'GeoShape', 'GeohashCell', 'HasChild', 'HasParent', 'Ids',
  7 |     'Indices', 'Limit', 'MatchAll', 'Missing', 'Nested', 'Not', 'Or', 'Prefix',
  8 |     'Query', 'Range', 'Regexp', 'Script', 'Term', 'Terms', 'Type'
  9 | ]
 10 | 
 11 | 
 12 | def F(name_or_filter='match_all', filters=None, **params):
 13 |     # 'and/or', [F(), F()]
 14 |     if filters is not None:
 15 |         # someone passed in positional argument to F outside of and/or or query
 16 |         if name_or_filter in ('and', 'or'):
 17 |             params['filters'] = filters
 18 |         elif name_or_filter == 'query':
 19 |             params['query'] = filters
 20 |         else:
 21 |             raise ValueError("Filter %r doesn't accept positional argument." % name_or_filter)
 22 | 
 23 |     # {"term": {...}}
 24 |     if isinstance(name_or_filter, dict):
 25 |         if params:
 26 |             raise ValueError('F() cannot accept parameters when passing in a dict.')
 27 |         if len(name_or_filter) != 1:
 28 |             raise ValueError('F() can only accept dict with a single filter ({"bool": {...}}). '
 29 |                  'Instead it got (%r)' % name_or_filter)
 30 |         name, params = name_or_filter.copy().popitem()
 31 |         if isinstance(params, dict):
 32 |             return Filter.get_dsl_class(name)(**params)
 33 |         else:
 34 |             # and filter can have list
 35 |             return Filter.get_dsl_class(name)(params)
 36 | 
 37 |     # Term(...)
 38 |     if isinstance(name_or_filter, Filter):
 39 |         if params:
 40 |             raise ValueError('F() cannot accept parameters when passing in a Filter object.')
 41 |         return name_or_filter
 42 | 
 43 |     # s.filter = ~F(s.filter)
 44 |     if hasattr(name_or_filter, '_proxied'):
 45 |         return name_or_filter._proxied
 46 | 
 47 |     # 'term', tag='python', ...
 48 |     return Filter.get_dsl_class(name_or_filter)(**params)
 49 | 
 50 | class Filter(DslBase):
 51 |     _type_name = 'filter'
 52 |     _type_shortcut = staticmethod(F)
 53 |     name = None
 54 | 
 55 | class MatchAll(Filter):
 56 |     name = 'match_all'
 57 |     def __add__(self, other):
 58 |         return other._clone()
 59 |     __and__ = __rand__ = __radd__ = __add__
 60 | 
 61 |     def __or__(self, other):
 62 |         return self
 63 |     __ror__ = __or__
 64 | EMPTY_FILTER = MatchAll()
 65 | 
 66 | class Bool(BoolMixin, Filter):
 67 |     name = 'bool'
 68 |     _param_defs = {
 69 |         'must': {'type': 'filter', 'multi': True},
 70 |         'should': {'type': 'filter', 'multi': True},
 71 |         'must_not': {'type': 'filter', 'multi': True},
 72 |     }
 73 | 
 74 |     def __and__(self, other):
 75 |         f = self._clone()
 76 |         if isinstance(other, self.__class__):
 77 |             f.must += other.must
 78 |             f.must_not += other.must_not
 79 |             f.should = []
 80 |             if self.should and other.should:
 81 |                 selfshould, othershould = self.should[:], other.should[:]
 82 |                 # required subfilter, move to must
 83 |                 for s in (selfshould, othershould):
 84 |                     if len(s) == 1:
 85 |                         f.must.append(s.pop())
 86 | 
 87 |                 # we have leftover lists, nothing to do but add to must as bool(should)
 88 |                 if selfshould and othershould:
 89 |                     f.must.extend((
 90 |                         Bool(should=selfshould),
 91 |                         Bool(should=othershould),
 92 |                     ))
 93 |                 # at most one should list is left, keep as should
 94 |                 else:
 95 |                     f.should = selfshould + othershould
 96 | 
 97 |             # at most one should list is left, keep as should
 98 |             else:
 99 |                 f.should = self.should + other.should
100 |         else:
101 |             f.must.append(other)
102 |         return f
103 |     __rand__ = __and__
104 | 
105 | # register this as Bool for Filter
106 | Filter._bool = Bool
107 | 
108 | class Not(Filter):
109 |     name = 'not'
110 |     _param_defs = {'filter': {'type': 'filter'}}
111 | 
112 |     def __init__(self, filter=None, **kwargs):
113 |         if filter is None:
114 |             filter, kwargs = kwargs, {}
115 |         super(Not, self).__init__(filter=filter, **kwargs)
116 | 
117 | class AndOrFilter(object):
118 |     _param_defs = {'filters': {'type': 'filter', 'multi': True}}
119 | 
120 |     def __init__(self, filters=None, **kwargs):
121 |         if filters is not None:
122 |             kwargs['filters'] = filters
123 |         super(AndOrFilter, self).__init__(**kwargs)
124 | 
125 |     # compound filters
126 | class And(AndOrFilter, Filter):
127 |     name = 'and'
128 | 
129 | class Or(AndOrFilter, Filter):
130 |     name = 'or'
131 | 
132 | class Query(Filter):
133 |     name = 'query'
134 |     _param_defs = {'query': {'type': 'query'}}
135 | 
136 |     def __init__(self, query=None, **kwargs):
137 |         if query is not None:
138 |             kwargs['query'] = query
139 |         super(Query, self).__init__(**kwargs)
140 | 
141 |     def to_dict(self):
142 |         d = super(Query, self).to_dict()
143 |         d[self.name].update(d[self.name].pop('query', {}))
144 |         return d
145 | 
146 | 
147 | FILTERS = (
148 |     # relationships
149 |     ('nested', {'filter': {'type': 'filter'}}),
150 |     ('has_child', {'filter': {'type': 'filter'}}),
151 |     ('has_parent', {'filter': {'type': 'filter'}}),
152 | 
153 |     ('fquery', {'query': {'type': 'query'}}),
154 | 
155 |     # core filters
156 |     ('exists', None),
157 |     ('geo_bounding_box', None),
158 |     ('geo_distance', None),
159 |     ('geo_distance_range', None),
160 |     ('geo_polygon', None),
161 |     ('geo_shape', None),
162 |     ('geohash_cell', None),
163 |     ('ids', None),
164 |     ('indices', None),
165 |     ('limit', None),
166 |     ('missing', None),
167 |     ('prefix', None),
168 |     ('range', None),
169 |     ('regexp', None),
170 |     ('script', None),
171 |     ('term', None),
172 |     ('terms', None),
173 |     ('type', None),
174 | )
175 | 
176 | # generate the filter classes dynamicaly
177 | for fname, params_def in FILTERS:
178 |     fclass = _make_dsl_class(Filter, fname, params_def)
179 |     globals()[fclass.__name__] = fclass
180 | 
181 | 


--------------------------------------------------------------------------------
/elasticsearch_parse/index.py:
--------------------------------------------------------------------------------
 1 | from .search import Search
 2 | 
 3 | class Index(object):
 4 |     def __init__(self, name, using='default'):
 5 |         self._name = name
 6 |         self._doc_types = {}
 7 |         self._mappings = {}
 8 |         self._using = using
 9 |         self._settings = {}
10 |         self._aliases = {}
11 | 
12 |     def clone(self, name, using=None):
13 |         i = Index(name, using=using or self._using)
14 |         for attr in ('_doc_types', '_mappings', '_settings', '_aliases'):
15 |             setattr(i, attr, getattr(self, attr).copy())
16 |         return i
17 | 
18 |     def _get_connection(self):
19 |         return connections.get_connection(self._using)
20 |     connection = property(_get_connection)
21 | 
22 |     def doc_type(self, doc_type):
23 |         name = doc_type._doc_type.name
24 |         self._doc_types[name] = doc_type
25 |         self._mappings[name] = doc_type._doc_type.mapping
26 | 
27 |         if not doc_type._doc_type.index:
28 |             doc_type._doc_type.index = self._name
29 |         return doc_type # to use as decorator???
30 | 
31 |     def settings(self, **kwargs):
32 |         self._settings.update(kwargs)
33 |         return self
34 | 
35 |     def aliases(self, **kwargs):
36 |         self._aliases.update(kwargs)
37 |         return self
38 | 
39 |     def search(self):
40 |         return Search(
41 |             using=self._using,
42 |             index=self._name,
43 |             doc_type=[self._doc_types.get(k, k) for k in self._mappings]
44 |         )
45 | 
46 |     def _get_mappings(self):
47 |         analysis, mappings = {}, {}
48 |         for mapping in self._mappings.values():
49 |             mappings.update(mapping.to_dict())
50 |             a = mapping._collect_analysis()
51 |             # merge the defintion
52 |             # TODO: conflict detection/resolution
53 |             for key in a:
54 |                 analysis.setdefault(key, {}).update(a[key])
55 | 
56 |         return mappings, analysis
57 | 
58 |     def to_dict(self):
59 |         out = {}
60 |         if self._settings:
61 |             out['settings'] = self._settings
62 |         if self._aliases:
63 |             out['aliases'] = self._aliases
64 |         mappings, analysis = self._get_mappings()
65 |         if mappings:
66 |             out['mappings'] = mappings
67 |         if analysis:
68 |             out.setdefault('settings', {})['analysis'] = analysis
69 |         return out
70 | 
71 |     def create(self, **kwargs):
72 |         self.connection.indices.create(index=self._name, body=self.to_dict(), **kwargs)
73 | 
74 |     def delete(self, **kwargs):
75 |         self.connection.indices.delete(index=self._name, **kwargs)
76 | 


--------------------------------------------------------------------------------
/elasticsearch_parse/query.py:
--------------------------------------------------------------------------------
  1 | from .utils import DslBase, BoolMixin, _make_dsl_class
  2 | from .function import SF, ScoreFunction
  3 | 
  4 | __all__ = [
  5 |     'Q', 'Bool', 'Boosting', 'Common', 'ConstantScore', 'DisMax', 'Filtered',
  6 |     'FunctionScore', 'Fuzzy', 'FuzzyLikeThis', 'FuzzyLikeThisField',
  7 |     'GeoShape', 'HasChild', 'HasParent', 'Ids', 'Indices', 'Match', 'MatchAll',
  8 |     'MatchPhrase', 'MatchPhrasePrefix', 'MoreLikeThis', 'MoreLikeThisField',
  9 |     'MultiMatch', 'Nested', 'Prefix', 'Query', 'QueryString', 'Range',
 10 |     'Regexp', 'SF', 'ScoreFunction', 'SimpleQueryString', 'SpanFirst',
 11 |     'SpanMulti', 'SpanNear', 'SpanNot', 'SpanOr', 'SpanTerm', 'Template',
 12 |     'Term', 'Terms', 'TopChildren', 'Wildcard'
 13 | ]
 14 | 
 15 | 
 16 | def Q(name_or_query='match_all', **params):
 17 |     # {"match": {"title": "python"}}
 18 |     if isinstance(name_or_query, dict):
 19 |         if params:
 20 |             raise ValueError('Q() cannot accept parameters when passing in a dict.')
 21 |         if len(name_or_query) != 1:
 22 |             raise ValueError('Q() can only accept dict with a single query ({"match": {...}}). '
 23 |                  'Instead it got (%r)' % name_or_query)
 24 |         name, params = name_or_query.copy().popitem()
 25 |         return Query.get_dsl_class(name)(**params)
 26 | 
 27 |     # MatchAll()
 28 |     if isinstance(name_or_query, Query):
 29 |         if params:
 30 |             raise ValueError('Q() cannot accept parameters when passing in a Query object.')
 31 |         return name_or_query
 32 | 
 33 |     # s.query = Q('filtered', query=s.query)
 34 |     if hasattr(name_or_query, '_proxied'):
 35 |         return name_or_query._proxied
 36 | 
 37 |     # "match", title="python"
 38 |     return Query.get_dsl_class(name_or_query)(**params)
 39 | 
 40 | class Query(DslBase):
 41 |     _type_name = 'query'
 42 |     _type_shortcut = staticmethod(Q)
 43 |     name = None
 44 | 
 45 | class MatchAll(Query):
 46 |     name = 'match_all'
 47 |     def __add__(self, other):
 48 |         return other._clone()
 49 |     __and__ = __rand__ = __radd__ = __add__
 50 | 
 51 |     def __or__(self, other):
 52 |         return self
 53 |     __ror__ = __or__
 54 | EMPTY_QUERY = MatchAll()
 55 | 
 56 | class Bool(BoolMixin, Query):
 57 |     name = 'bool'
 58 |     _param_defs = {
 59 |         'must': {'type': 'query', 'multi': True},
 60 |         'should': {'type': 'query', 'multi': True},
 61 |         'must_not': {'type': 'query', 'multi': True},
 62 |         'filter': {'type': 'query', 'multi': True},
 63 |     }
 64 | 
 65 |     def __and__(self, other):
 66 |         q = self._clone()
 67 |         if isinstance(other, self.__class__):
 68 |             q.must += other.must
 69 |             q.must_not += other.must_not
 70 |             q.should = []
 71 |             for qx in (self, other):
 72 |                 min_should_match = getattr(qx, 'minimum_should_match', 0 if any((qx.must, qx.must_not)) else 1)
 73 |                 # all subqueries are required
 74 |                 if len(qx.should) <= min_should_match:
 75 |                     q.must.extend(qx.should)
 76 |                 # not all of them are required, use it and remember min_should_match
 77 |                 elif not q.should:
 78 |                     q.minimum_should_match = min_should_match
 79 |                     q.should = qx.should
 80 |                 # not all are required, add a should list to the must with proper min_should_match
 81 |                 else:
 82 |                     q.must.append(Bool(should=qx.should, minimum_should_match=min_should_match))
 83 |         else:
 84 |             q.must.append(other)
 85 |         return q
 86 |     __rand__ = __and__
 87 | # register this as Bool for Query
 88 | Query._bool = Bool
 89 | 
 90 | class FunctionScore(Query):
 91 |     name = 'function_score'
 92 |     _param_defs = {
 93 |         'query': {'type': 'query'},
 94 |         'filter': {'type': 'filter'},
 95 |         'functions': {'type': 'score_function', 'multi': True},
 96 |     }
 97 | 
 98 |     def __init__(self, **kwargs):
 99 |         if 'functions' in kwargs:
100 |             pass
101 |         else:
102 |             fns = kwargs['functions'] = []
103 |             for name in ScoreFunction._classes:
104 |                 if name in kwargs:
105 |                     fns.append({name: kwargs.pop(name)})
106 |         super(FunctionScore, self).__init__(**kwargs)
107 | 
108 | QUERIES = (
109 |     # compound queries
110 |     ('boosting', {'positive': {'type': 'query'}, 'negative': {'type': 'query'}}),
111 |     ('constant_score', {'query': {'type': 'query'}, 'filter': {'type': 'filter'}}),
112 |     ('dis_max', {'queries': {'type': 'query', 'multi': True}}),
113 |     ('filtered', {'query': {'type': 'query'}, 'filter': {'type': 'filter'}}),
114 |     ('indices', {'query': {'type': 'query'}, 'no_match_query': {'type': 'query'}}),
115 | 
116 |     # relationship queries
117 |     ('nested', {'query': {'type': 'query'}}),
118 |     ('has_child', {'query': {'type': 'query'}}),
119 |     ('has_parent', {'query': {'type': 'query'}}),
120 |     ('top_children', {'query': {'type': 'query'}}),
121 | 
122 |     # compount span queries
123 |     ('span_first', {'match': {'type': 'query'}}),
124 |     ('span_multi', {'match': {'type': 'query'}}),
125 |     ('span_near', {'clauses': {'type': 'query', 'multi': True}}),
126 |     ('span_not', {'exclude': {'type': 'query'}, 'include': {'type': 'query'}}),
127 |     ('span_or', {'clauses': {'type': 'query', 'multi': True}}),
128 | 
129 |     # core queries
130 |     ('common', None),
131 |     ('fuzzy', None),
132 |     ('fuzzy_like_this', None),
133 |     ('fuzzy_like_this_field', None),
134 |     ('geo_shape', None),
135 |     ('ids', None),
136 |     ('match', None),
137 |     ('match_phrase', None),
138 |     ('match_phrase_prefix', None),
139 |     ('more_like_this', None),
140 |     ('more_like_this_field', None),
141 |     ('multi_match', None),
142 |     ('prefix', None),
143 |     ('query_string', None),
144 |     ('range', None),
145 |     ('regexp', None),
146 |     ('simple_query_string', None),
147 |     ('span_term', None),
148 |     ('template', None),
149 |     ('term', None),
150 |     ('terms', None),
151 |     ('wildcard', None),
152 | )
153 | 
154 | # generate the query classes dynamicaly
155 | for qname, params_def in QUERIES:
156 |     qclass = _make_dsl_class(Query, qname, params_def)
157 |     globals()[qclass.__name__] = qclass
158 | 
159 | 


--------------------------------------------------------------------------------
/elasticsearch_parse/search.py:
--------------------------------------------------------------------------------
  1 | from six import iteritems, string_types
  2 | 
  3 | from elasticsearch.helpers import scan
  4 | 
  5 | from .query import Q, EMPTY_QUERY, Filtered
  6 | from .filter import F, EMPTY_FILTER
  7 | from .aggs import A, AggBase
  8 | from .utils import DslBase
  9 | 
 10 | class BaseProxy(object):
 11 |     """
 12 |     Simple proxy around DSL objects (queries and filters) that can be called
 13 |     (to add query/filter) and also allows attribute access which is proxied to
 14 |     the wrapped query/filter.
 15 |     """
 16 |     def __init__(self, search, attr_name):
 17 |         self._search = search
 18 |         self._proxied = self._empty
 19 |         self._attr_name = attr_name
 20 | 
 21 |     def __nonzero__(self):
 22 |         return self._proxied != self._empty
 23 |     __bool__ = __nonzero__
 24 | 
 25 |     def __call__(self, *args, **kwargs):
 26 |         s = self._search._clone()
 27 |         getattr(s, self._attr_name)._proxied += self._shortcut(*args, **kwargs)
 28 | 
 29 |         # always return search to be chainable
 30 |         return s
 31 | 
 32 |     def __getattr__(self, attr_name):
 33 |         return getattr(self._proxied, attr_name)
 34 | 
 35 |     def __setattr__(self, attr_name, value):
 36 |         if not attr_name.startswith('_'):
 37 |             self._proxied = self._shortcut(self._proxied.to_dict())
 38 |             setattr(self._proxied, attr_name, value)
 39 |         super(BaseProxy, self).__setattr__(attr_name, value)
 40 | 
 41 | 
 42 | class ProxyDescriptor(object):
 43 |     """
 44 |     Simple descriptor to enable setting of queries and filters as:
 45 | 
 46 |         s = Search()
 47 |         s.query = Q(...)
 48 | 
 49 |     """
 50 |     def __init__(self, name):
 51 |         self._attr_name = '_%s_proxy' % name
 52 | 
 53 |     def __get__(self, instance, owner):
 54 |         return getattr(instance, self._attr_name)
 55 | 
 56 |     def __set__(self, instance, value):
 57 |         proxy = getattr(instance, self._attr_name)
 58 |         proxy._proxied = proxy._shortcut(value)
 59 | 
 60 | 
 61 | class ProxyQuery(BaseProxy):
 62 |     _empty = EMPTY_QUERY
 63 |     _shortcut = staticmethod(Q)
 64 | 
 65 | 
 66 | class ProxyFilter(BaseProxy):
 67 |     _empty = EMPTY_FILTER
 68 |     _shortcut = staticmethod(F)
 69 | 
 70 | 
 71 | class AggsProxy(AggBase, DslBase):
 72 |     name = 'aggs'
 73 |     def __init__(self, search):
 74 |         self._base = self._search = search
 75 |         self._params = {'aggs': {}}
 76 | 
 77 |     def to_dict(self):
 78 |         return super(AggsProxy, self).to_dict().get('aggs', {})
 79 | 
 80 | 
 81 | class Search(object):
 82 |     query = ProxyDescriptor('query')
 83 |     filter = ProxyDescriptor('filter')
 84 |     post_filter = ProxyDescriptor('post_filter')
 85 | 
 86 |     def __init__(self, using='default', index=None, doc_type=None, extra=None):
 87 |         """
 88 |         Search request to elasticsearch.
 89 | 
 90 |         :arg using: `Elasticsearch` instance to use
 91 |         :arg index: limit the search to index
 92 |         :arg doc_type: only query this type.
 93 | 
 94 |         All the paramters supplied (or omitted) at creation type can be later
 95 |         overriden by methods (`using`, `index` and `doc_type` respectively).
 96 |         """
 97 |         self._using = using
 98 | 
 99 |         self._index = None
100 |         if isinstance(index, (tuple, list)):
101 |             self._index = list(index)
102 |         elif index:
103 |             self._index = [index]
104 | 
105 |         self._doc_type = []
106 |         self._doc_type_map = {}
107 |         if isinstance(doc_type, (tuple, list)):
108 |             for dt in doc_type:
109 |                 self._add_doc_type(dt)
110 |         elif isinstance(doc_type, dict):
111 |             self._doc_type.extend(doc_type.keys())
112 |             self._doc_type_map.update(doc_type)
113 |         elif doc_type:
114 |             self._add_doc_type(doc_type)
115 | 
116 |         self.aggs = AggsProxy(self)
117 |         self._sort = []
118 |         self._extra = extra or {}
119 |         self._params = {}
120 |         self._fields = None
121 |         self._partial_fields = {}
122 |         self._highlight = {}
123 |         self._highlight_opts = {}
124 |         self._suggest = {}
125 |         self._script_fields = {}
126 | 
127 |         self._query_proxy = ProxyQuery(self, 'query')
128 |         self._filter_proxy = ProxyFilter(self, 'filter')
129 |         self._post_filter_proxy = ProxyFilter(self, 'post_filter')
130 | 
131 |     def __getitem__(self, n):
132 |         """
133 |         Support slicing the `Search` instance for pagination.
134 | 
135 |         Slicing equates to the from/size parameters. E.g.::
136 | 
137 |             s = Search().query(...)[0:25]
138 | 
139 |         is equivalent to::
140 | 
141 |             s = Search().query(...).extra(from_=0, size=25)
142 | 
143 |         """
144 |         s = self._clone()
145 | 
146 |         if isinstance(n, slice):
147 |             # If negative slicing, abort.
148 |             if n.start and n.start < 0 or n.stop and n.stop < 0:
149 |                 raise ValueError("Search does not support negative slicing.")
150 |             # Elasticsearch won't get all results so we default to size: 10 if
151 |             # stop not given.
152 |             s._extra['from'] = n.start or 0
153 |             s._extra['size'] = n.stop - (n.start or 0) if n.stop is not None else 10
154 |             return s
155 |         else:  # This is an index lookup, equivalent to slicing by [n:n+1].
156 |             # If negative index, abort.
157 |             if n < 0:
158 |                 raise ValueError("Search does not support negative indexing.")
159 |             s._extra['from'] = n
160 |             s._extra['size'] = 1
161 |             return s
162 | 
163 |     @classmethod
164 |     def from_dict(cls, d):
165 |         """
166 |         Construct a `Search` instance from a raw dict containing the search
167 |         body. Useful when migrating from raw dictionaries.
168 | 
169 |         Example::
170 | 
171 |             s = Search.from_dict({
172 |                 "query": {
173 |                     "bool": {
174 |                         "must": [...]
175 |                     }
176 |                 },
177 |                 "aggs": {...}
178 |             })
179 |             s = s.filter('term', published=True)
180 |         """
181 |         s = cls()
182 |         s.update_from_dict(d)
183 |         return s
184 | 
185 |     def _clone(self):
186 |         """
187 |         Return a clone of the current search request. Performs a shallow copy
188 |         of all the underlying objects. Used internally by most state modifying
189 |         APIs.
190 |         """
191 |         s = self.__class__(using=self._using, index=self._index,
192 |                            doc_type=self._doc_type)
193 |         s._doc_type_map = self._doc_type_map.copy()
194 |         s._sort = self._sort[:]
195 |         s._fields = self._fields[:] if self._fields is not None else None
196 |         s._partial_fields = self._partial_fields.copy()
197 |         s._extra = self._extra.copy()
198 |         s._highlight = self._highlight.copy()
199 |         s._highlight_opts = self._highlight_opts.copy()
200 |         s._suggest = self._suggest.copy()
201 |         s._script_fields = self._script_fields.copy()
202 |         for x in ('query', 'filter', 'post_filter'):
203 |             getattr(s, x)._proxied = getattr(self, x)._proxied
204 | 
205 |         # copy top-level bucket definitions
206 |         if self.aggs._params.get('aggs'):
207 |             s.aggs._params = {'aggs': self.aggs._params['aggs'].copy()}
208 |         s._params = self._params.copy()
209 |         return s
210 | 
211 |     def update_from_dict(self, d):
212 |         """
213 |         Apply options from a serialized body to the current instance. Modifies
214 |         the object in-place. Used mostly by ``from_dict``.
215 |         """
216 |         d = d.copy()
217 |         if 'query' in d:
218 |             self.query._proxied = Q(d.pop('query'))
219 |         if 'post_filter' in d:
220 |             self.post_filter._proxied = F(d.pop('post_filter'))
221 | 
222 |         if isinstance(self.query._proxied, Filtered):
223 |             self.filter._proxied = self.query._proxied.filter
224 |             self.query._proxied = self.query._proxied.query
225 | 
226 |         aggs = d.pop('aggs', d.pop('aggregations', {}))
227 |         if aggs:
228 |             self.aggs._params = {
229 |                 'aggs': dict(
230 |                     (name, A(value)) for (name, value) in iteritems(aggs))
231 |             }
232 |         if 'sort' in d:
233 |             self._sort = d.pop('sort')
234 |         if 'fields' in d:
235 |             self._fields = d.pop('fields')
236 |         if 'partial_fields' in d:
237 |             self._partial_fields = d.pop('partial_fields')
238 |         if 'highlight' in d:
239 |             high = d.pop('highlight').copy()
240 |             self._highlight = high.pop('fields')
241 |             self._highlight_opts = high
242 |         if 'suggest' in d:
243 |             self._suggest = d.pop('suggest')
244 |             if 'text' in self._suggest:
245 |                 text = self._suggest.pop('text')
246 |                 for s in self._suggest.values():
247 |                     s.setdefault('text', text)
248 |         if 'script_fields' in d:
249 |             self._script_fields = d.pop('script_fields')
250 |         self._extra = d
251 | 
252 |     def script_fields(self, **kwargs):
253 |         """
254 |         Define script fields to be calculated on hits. See
255 |         https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-script-fields.html
256 |         for more details.
257 | 
258 |         Example::
259 | 
260 |             s = Search()
261 |             s = s.script_fields(times_two="doc['field'].value * 2")
262 |             s = s.script_fields(
263 |                 times_three={
264 |                     'script': "doc['field'].value * n",
265 |                     'params': {'n': 3}
266 |                 }
267 |             )
268 |         
269 |         """
270 |         s = self._clone()
271 |         for name in kwargs:
272 |             if isinstance(kwargs[name], string_types):
273 |                 kwargs[name] = {'script': kwargs[name]}
274 |         s._script_fields.update(kwargs)
275 |         return s
276 | 
277 |     def params(self, **kwargs):
278 |         """
279 |         Specify query params to be used when executing the search. All the
280 |         keyword arguments will override the current values. See
281 |         http://elasticsearch-py.readthedocs.org/en/master/api.html#elasticsearch.Elasticsearch.search
282 |         for all availible parameters.
283 | 
284 |         Example::
285 | 
286 |             s = Search()
287 |             s = s.params(routing='user-1', preference='local')
288 |         """
289 |         s = self._clone()
290 |         s._params.update(kwargs)
291 |         return s
292 | 
293 |     def extra(self, **kwargs):
294 |         """
295 |         Add extra keys to the request body. Mostly here for backwards
296 |         compatibility.
297 |         """
298 |         s = self._clone()
299 |         if 'from_' in kwargs:
300 |             kwargs['from'] = kwargs.pop('from_')
301 |         s._extra.update(kwargs)
302 |         return s
303 | 
304 |     def fields(self, fields=None):
305 |         """
306 |         Selectively load specific stored fields for each document.
307 | 
308 |         :arg fields: list of fields to return for each document
309 | 
310 |         If ``fields`` is None, the entire document will be returned for
311 |         each hit.  If fields is the empty list, no fields will be
312 |         returned for each hit, just the metadata.
313 |         """
314 |         s = self._clone()
315 |         s._fields = fields
316 |         return s
317 | 
318 |     def partial_fields(self, **partial):
319 |         """
320 |         Control which part of the fields to extract from the `_source` document
321 | 
322 |         :kwargs partial: dict specifying which fields to extract from the source
323 | 
324 |         An example usage would be:
325 | 
326 |             s = Search().partial_fields(authors_data={
327 |                     'include': ['authors.*'],
328 |                     'exclude': ['authors.name']
329 |                 })
330 | 
331 |         which will include all fields from the `authors` nested property except for
332 |         each authors `name`
333 | 
334 |         If ``partial`` is not provided, the whole `_source` will be fetched. Calling this multiple
335 |         times will override the previous values with the new ones.
336 |         """
337 |         s = self._clone()
338 |         s._partial_fields = partial
339 |         return s
340 | 
341 |     def sort(self, *keys):
342 |         """
343 |         Add sorting information to the search request. If called without
344 |         arguments it will remove all sort requirements. Otherwise it will
345 |         replace them. Acceptable arguments are::
346 | 
347 |             'some.field'
348 |             '-some.other.field'
349 |             {'different.field': {'any': 'dict'}}
350 | 
351 |         so for example::
352 | 
353 |             s = Search().sort(
354 |                 'category',
355 |                 '-title',
356 |                 {"price" : {"order" : "asc", "mode" : "avg"}}
357 |             )
358 | 
359 |         will sort by ``category``, ``title`` (in descending order) and
360 |         ``price`` in ascending order using the ``avg`` mode.
361 | 
362 |         The API returns a copy of the Search object and can thus be chained.
363 |         """
364 |         s = self._clone()
365 |         s._sort = []
366 |         for k in keys:
367 |             if isinstance(k, string_types) and k.startswith('-'):
368 |                 k = {k[1:]: {"order": "desc"}}
369 |             s._sort.append(k)
370 |         return s
371 | 
372 |     def highlight_options(self, **kwargs):
373 |         """
374 |         Update the global highlighting options used for this request. For
375 |         example::
376 | 
377 |             s = Search()
378 |             s = s.highlight_options(order='score')
379 |         """
380 |         s = self._clone()
381 |         s._highlight_opts.update(kwargs)
382 |         return s
383 | 
384 |     def highlight(self, *fields, **kwargs):
385 |         """
386 |         Request highliting of some fields. All keyword arguments passed in will be
387 |         used as parameters. Example::
388 | 
389 |             Search().highlight('title', 'body', fragment_size=50)
390 | 
391 |         will produce the equivalent of::
392 | 
393 |             {
394 |                 "highlight": {
395 |                     "fields": {
396 |                         "body": {"fragment_size": 50},
397 |                         "title": {"fragment_size": 50}
398 |                     }
399 |                 }
400 |             }
401 | 
402 |         """
403 |         s = self._clone()
404 |         for f in fields:
405 |             s._highlight[f] = kwargs
406 |         return s
407 | 
408 |     def suggest(self, name, text, **kwargs):
409 |         """
410 |         Add a suggestions request to the search.
411 | 
412 |         :arg name: name of the suggestion
413 |         :arg text: text to suggest on
414 | 
415 |         All keyword arguments will be added to the suggestions body. For example::
416 | 
417 |             s = Search()
418 |             s = s.suggest('suggestion-1', 'Elasticserach', term={'field': 'body'})
419 |         """
420 |         s = self._clone()
421 |         s._suggest[name] = {'text': text}
422 |         s._suggest[name].update(kwargs)
423 |         return s
424 | 
425 |     def index(self, *index):
426 |         """
427 |         Set the index for the search. If called empty it will rmove all information.
428 | 
429 |         Example:
430 | 
431 |             s = Search()
432 |             s = s.index('twitter-2015.01.01', 'twitter-2015.01.02')
433 |         """
434 |         # .index() resets
435 |         s = self._clone()
436 |         if not index:
437 |             s._index = None
438 |         else:
439 |             s._index = (self._index or []) + list(index)
440 |         return s
441 | 
442 |     def _add_doc_type(self, doc_type):
443 |         if hasattr(doc_type, '_doc_type'):
444 |             self._doc_type_map[doc_type._doc_type.name] = doc_type.from_es
445 |             doc_type = doc_type._doc_type.name
446 |         self._doc_type.append(doc_type)
447 | 
448 |     def doc_type(self, *doc_type, **kwargs):
449 |         """
450 |         Set the type to search through. You can supply a single value or
451 |         multiple. Values can be strings or subclasses of ``DocType``.
452 |         
453 |         You can also pass in any keyword arguments, mapping a doc_type to a
454 |         callback that should be used instead of the Result class.
455 | 
456 |         If no doc_type is supplied any information stored on the instance will
457 |         be erased.
458 | 
459 |         Example:
460 | 
461 |             s = Search().doc_type('product', 'store', User, custom=my_callback)
462 |         """
463 |         # .doc_type() resets
464 |         s = self._clone()
465 |         if not doc_type and not kwargs:
466 |             s._doc_type = []
467 |             s._doc_type_map = {}
468 |         else:
469 |             for dt in doc_type:
470 |                 s._add_doc_type(dt)
471 |             s._doc_type.extend(kwargs.keys())
472 |             s._doc_type_map.update(kwargs)
473 |         return s
474 | 
475 |     def to_dict(self, count=False, **kwargs):
476 |         """
477 |         Serialize the search into the dictionary that will be sent over as the
478 |         request's body.
479 | 
480 |         :arg count: a flag to specify we are interested in a body for count -
481 |             no aggregations, no pagination bounds etc.
482 | 
483 |         All additional keyword arguments will be included into the dictionary.
484 |         """
485 |         if self.filter:
486 |             d = {
487 |               "query": {
488 |                 "filtered": {
489 |                   "query": self.query.to_dict(),
490 |                   "filter": self.filter.to_dict()
491 |                 }
492 |               }
493 |             }
494 |         else:
495 |             d = {"query": self.query.to_dict()}
496 | 
497 |         if self.post_filter:
498 |             d['post_filter'] = self.post_filter.to_dict()
499 | 
500 |         # count request doesn't care for sorting and other things
501 |         if not count:
502 |             if self.aggs.aggs:
503 |                 d.update(self.aggs.to_dict())
504 | 
505 |             if self._sort:
506 |                 d['sort'] = self._sort
507 | 
508 |             d.update(self._extra)
509 | 
510 |             if self._fields is not None:
511 |                 d['fields'] = self._fields
512 | 
513 |             if self._partial_fields:
514 |                 d['partial_fields'] = self._partial_fields
515 | 
516 |             if self._highlight:
517 |                 d['highlight'] = {'fields': self._highlight}
518 |                 d['highlight'].update(self._highlight_opts)
519 | 
520 |             if self._suggest:
521 |                 d['suggest'] = self._suggest
522 | 
523 |             if self._script_fields:
524 |                 d['script_fields'] = self._script_fields
525 | 
526 |         d.update(kwargs)
527 |         return d
528 | 
529 |     def using(self, client):
530 |         """
531 |         Associate the search request with an elasticsearch client. A fresh copy
532 |         will be returned with current instance remaining unchanged.
533 | 
534 |         :arg client: an instance of ``elasticsearch.Elasticsearch`` to use or
535 |             an alias to look up in ``elasticsearch_dsl.connections``
536 | 
537 |         """
538 |         s = self._clone()
539 |         s._using = client
540 |         return s
541 | 
542 |     def count(self):
543 |         """
544 |         Return the number of hits matching the query and filters. Note that
545 |         only the actual number is returned.
546 |         """
547 |         es = connections.get_connection(self._using)
548 | 
549 |         d = self.to_dict(count=True)
550 |         return d
551 | 
552 |     def execute(self):
553 |         """
554 |         Execute the search and return an instance of ``Response`` wrapping all
555 |         the data.
556 | 
557 |         :arg response_class: optional subclass of ``Response`` to use instead.
558 |         """
559 |         return self.to_dict()
560 |         #return response_class(
561 |         #    es.search(
562 |         #        index=self._index,
563 |         #        doc_type=self._doc_type,
564 |         #        body=self.to_dict(),
565 |         #        **self._params
566 |         #    ),
567 |         #    callbacks=self._doc_type_map
568 |         #)
569 | 
570 |     def scan(self):
571 |         """
572 |         Turn the search into a scan search and return a generator that will
573 |         iterate over all the documents matching the query.
574 | 
575 |         Use ``params`` method to specify any additional arguments you with to
576 |         pass to the underlying ``scan`` helper from ``elasticsearch-py`` -
577 |         http://elasticsearch-py.readthedocs.org/en/master/helpers.html#elasticsearch.helpers.scan
578 | 
579 |         """
580 |         es = connections.get_connection(self._using)
581 | 
582 |         for hit in scan(
583 |                 es,
584 |                 query=self.to_dict(),
585 |                 index=self._index,
586 |                 doc_type=self._doc_type,
587 |                 **self._params
588 |             ):
589 |             yield self._doc_type_map.get(hit['_type'], Result)(hit)
590 | 
591 | 


--------------------------------------------------------------------------------
/elasticsearch_parse/serializer.py:
--------------------------------------------------------------------------------
 1 | from elasticsearch.serializer import JSONSerializer
 2 | 
 3 | from .utils import AttrDict, AttrList
 4 | 
 5 | class AttrJSONSerializer(JSONSerializer):
 6 |     def default(self, data):
 7 |         if isinstance(data, AttrDict):
 8 |             return data._d_
 9 |         if isinstance(data, AttrList):
10 |             return data._l_
11 |         return super(AttrJSONSerializer, self).default(data)
12 | 
13 | serializer = AttrJSONSerializer()
14 | 


--------------------------------------------------------------------------------
/elasticsearch_parse/utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import unicode_literals
  2 | 
  3 | from six import iteritems, add_metaclass
  4 | from six.moves import map
  5 | 
  6 | from .exceptions import UnknownDslObject, ValidationException
  7 | 
  8 | SKIP_VALUES = ('', None)
  9 | 
 10 | def _wrap(val, obj_wrapper=None):
 11 |     if isinstance(val, dict):
 12 |         return AttrDict(val) if obj_wrapper is None else obj_wrapper(val)
 13 |     if isinstance(val, list):
 14 |         return AttrList(val)
 15 |     return val
 16 | 
 17 | def _make_dsl_class(base, name, params_def=None, suffix=''):
 18 |     """
 19 |     Generate a DSL class based on the name of the DSL object and it's parameters
 20 |     """
 21 |     attrs = {'name': name}
 22 |     if params_def:
 23 |         attrs['_param_defs'] = params_def
 24 |     cls_name = str(''.join(s.title() for s in name.split('_')) + suffix)
 25 |     return type(cls_name, (base, ), attrs)
 26 | 
 27 | class AttrList(object):
 28 |     def __init__(self, l, obj_wrapper=None):
 29 |         # make iteables into lists
 30 |         if not isinstance(l, list):
 31 |             l = list(l)
 32 |         self._l_ = l
 33 |         self._obj_wrapper = obj_wrapper
 34 | 
 35 |     def __repr__(self):
 36 |         return repr(self._l_)
 37 | 
 38 |     def __eq__(self, other):
 39 |         if isinstance(other, AttrList):
 40 |             return other._l_ == self._l_
 41 |         # make sure we still equal to a dict with the same data
 42 |         return other == self._l_
 43 | 
 44 |     def __getitem__(self, k):
 45 |         l = self._l_[k]
 46 |         if isinstance(k, slice):
 47 |             return AttrList(l)
 48 |         return _wrap(l, self._obj_wrapper)
 49 | 
 50 |     def __setitem__(self, k, value):
 51 |         self._l_[k] = value
 52 | 
 53 |     def __iter__(self):
 54 |         return map(lambda i: _wrap(i, self._obj_wrapper), self._l_)
 55 | 
 56 |     def __len__(self):
 57 |         return len(self._l_)
 58 | 
 59 |     def __nonzero__(self):
 60 |         return bool(self._l_)
 61 |     __bool__ = __nonzero__
 62 | 
 63 |     def __getattr__(self, name):
 64 |         return getattr(self._l_, name)
 65 | 
 66 | 
 67 | class AttrDict(object):
 68 |     """
 69 |     Helper class to provide attribute like access (read and write) to
 70 |     dictionaries. Used to provide a convenient way to access both results and
 71 |     nested dsl dicts.
 72 |     """
 73 |     def __init__(self, d):
 74 |         # assign the inner dict manually to prevent __setattr__ from firing
 75 |         super(AttrDict, self).__setattr__('_d_', d)
 76 | 
 77 |     def __contains__(self, key):
 78 |         return key in self._d_
 79 | 
 80 |     def __nonzero__(self):
 81 |         return bool(self._d_)
 82 |     __bool__ = __nonzero__
 83 | 
 84 |     def __dir__(self):
 85 |         # introspection for auto-complete in IPython etc
 86 |         return list(self._d_.keys())
 87 | 
 88 |     def __eq__(self, other):
 89 |         if isinstance(other, AttrDict):
 90 |             return other._d_ == self._d_
 91 |         # make sure we still equal to a dict with the same data
 92 |         return other == self._d_
 93 | 
 94 |     def __repr__(self):
 95 |         r = repr(self._d_)
 96 |         if len(r) > 60:
 97 |             r = r[:60] + '...}'
 98 |         return r
 99 | 
100 |     def __getattr__(self, attr_name):
101 |         try:
102 |             return _wrap(self._d_[attr_name])
103 |         except KeyError:
104 |             raise AttributeError(
105 |                 '%r object has no attribute %r' % (self.__class__.__name__, attr_name))
106 | 
107 |     def __delattr__(self, attr_name):
108 |         try:
109 |             del self._d_[attr_name]
110 |         except KeyError:
111 |             raise AttributeError(
112 |                 '%r object has no attribute %r' % (self.__class__.__name__, attr_name))
113 | 
114 |     def __getitem__(self, key):
115 |         return _wrap(self._d_[key])
116 | 
117 |     def __setitem__(self, key, value):
118 |         self._d_[key] = value
119 | 
120 |     def __delitem__(self, key):
121 |         del self._d_[key]
122 | 
123 |     def __setattr__(self, name, value):
124 |         if name in self._d_ or not hasattr(self.__class__, name):
125 |             self._d_[name] = value
126 |         else:
127 |             # there is an attribute on the class (could be property, ..) - don't add it as field
128 |             super(AttrDict, self).__setattr__(name, value)
129 | 
130 |     def __iter__(self):
131 |         return iter(self._d_)
132 | 
133 |     def to_dict(self):
134 |         return self._d_
135 | 
136 | 
137 | class DslMeta(type):
138 |     """
139 |     Base Metaclass for DslBase subclasses that builds a registry of all classes
140 |     for given DslBase subclass (== all the query types for the Query subclass
141 |     of DslBase).
142 | 
143 |     It then uses the information from that registry (as well as `name` and
144 |     `shortcut` attributes from the base class) to construct any subclass based
145 |     on it's name.
146 | 
147 |     For typical use see `QueryMeta` and `Query` in `elasticsearch_dsl.query`.
148 |     """
149 |     _types = {}
150 |     def __init__(cls, name, bases, attrs):
151 |         super(DslMeta, cls).__init__(name, bases, attrs)
152 |         # skip for DslBase
153 |         if not hasattr(cls, '_type_shortcut'):
154 |             return
155 |         if cls.name is None:
156 |             # abstract base class, register it's shortcut
157 |             cls._types[cls._type_name] = cls._type_shortcut
158 |             # and create a registry for subclasses
159 |             if not hasattr(cls, '_classes'):
160 |                 cls._classes = {}
161 |         elif cls.name not in cls._classes:
162 |             # normal class, register it
163 |             cls._classes[cls.name] = cls
164 | 
165 |     @classmethod
166 |     def get_dsl_type(cls, name):
167 |         try:
168 |             return cls._types[name]
169 |         except KeyError:
170 |             raise UnknownDslObject('DSL type %s does not exist.' % name)
171 | 
172 | 
173 | @add_metaclass(DslMeta)
174 | class DslBase(object):
175 |     """
176 |     Base class for all DSL objects - queries, filters, aggregations etc. Wraps
177 |     a dictionary representing the object's json.
178 | 
179 |     Provides several feature:
180 |         - attribute access to the wrapped dictionary (.field instead of ['field'])
181 |         - _clone method returning a deep copy of self
182 |         - to_dict method to serialize into dict (to be sent via elasticsearch-py)
183 |         - basic logical operators (&, | and ~) using a Bool(Filter|Query) TODO:
184 |           move into a class specific for Query/Filter
185 |         - respects the definiton of the class and (de)serializes it's
186 |           attributes based on the `_param_defs` definition (for example turning
187 |           all values in the `must` attribute into Query objects)
188 |     """
189 |     _param_defs = {}
190 | 
191 |     @classmethod
192 |     def get_dsl_class(cls, name):
193 |         try:
194 |             return cls._classes[name]
195 |         except KeyError:
196 |             raise UnknownDslObject('DSL class `%s` does not exist in %s.' % (name, cls._type_name))
197 | 
198 |     def __init__(self, **params):
199 |         self._params = {}
200 |         for pname, pvalue in iteritems(params):
201 |             if '__' in pname:
202 |                 pname = pname.replace('__', '.')
203 |             self._setattr(pname, pvalue)
204 | 
205 |     def _repr_params(self):
206 |         """ Produce a repr of all our parameters to be used in __repr__. """
207 |         return  ', '.join(
208 |             '%s=%r' % (n.replace('.', '__'), v)
209 |             for (n, v) in sorted(iteritems(self._params))
210 |             # make sure we don't include empty typed params
211 |             if 'type' not in self._param_defs.get(n, {}) or v
212 |         )
213 | 
214 |     def __repr__(self):
215 |         return '%s(%s)' % (
216 |             self.__class__.__name__,
217 |             self._repr_params()
218 |         )
219 | 
220 |     def __eq__(self, other):
221 |         return isinstance(other, self.__class__) and other.to_dict() == self.to_dict()
222 | 
223 |     def __ne__(self, other):
224 |         return not self == other
225 | 
226 |     def __setattr__(self, name, value):
227 |         if name.startswith('_'):
228 |             return super(DslBase, self).__setattr__(name, value)
229 |         return self._setattr(name, value)
230 | 
231 |     def _setattr(self, name, value):
232 |         # if this attribute has special type assigned to it...
233 |         if name in self._param_defs:
234 |             pinfo = self._param_defs[name]
235 | 
236 |             if 'type' in pinfo:
237 |                 # get the shortcut used to construct this type (query.Q, aggs.A, etc)
238 |                 shortcut = self.__class__.get_dsl_type(pinfo['type'])
239 |                 if pinfo.get('multi'):
240 |                     value = list(map(shortcut, value))
241 | 
242 |                 # dict(name -> DslBase), make sure we pickup all the objs
243 |                 elif pinfo.get('hash'):
244 |                     value = dict((k, shortcut(v)) for (k, v) in iteritems(value))
245 | 
246 |                 # single value object, just convert
247 |                 else:
248 |                     value = shortcut(value)
249 |         self._params[name] = value
250 | 
251 |     def __getattr__(self, name):
252 |         if name.startswith('_'):
253 |             raise AttributeError(
254 |                 '%r object has no attribute %r' % (self.__class__.__name__, name))
255 | 
256 |         value = None
257 |         try:
258 |             value = self._params[name]
259 |         except KeyError:
260 |             # compound types should never throw AttributeError and return empty
261 |             # container instead
262 |             if name in self._param_defs:
263 |                 pinfo = self._param_defs[name]
264 |                 if pinfo.get('multi'):
265 |                     value = self._params.setdefault(name, [])
266 |                 elif pinfo.get('hash'):
267 |                     value = self._params.setdefault(name, {})
268 |         if value is None:
269 |             raise AttributeError(
270 |                 '%r object has no attribute %r' % (self.__class__.__name__, name))
271 | 
272 |         # wrap nested dicts in AttrDict for convenient access
273 |         if isinstance(value, dict):
274 |             return AttrDict(value)
275 |         return value
276 | 
277 |     def to_dict(self):
278 |         """
279 |         Serialize the DSL object to plain dict
280 |         """
281 |         d = {}
282 |         for pname, value in iteritems(self._params):
283 |             pinfo = self._param_defs.get(pname)
284 | 
285 |             # typed param
286 |             if pinfo and 'type' in pinfo:
287 |                 # don't serialize empty lists and dicts for typed fields
288 |                 if value in ({}, []):
289 |                     continue
290 | 
291 |                 # multi-values are serialized as list of dicts
292 |                 if pinfo.get('multi'):
293 |                     value = list(map(lambda x: x.to_dict(), value))
294 | 
295 |                 # squash all the hash values into one dict
296 |                 elif pinfo.get('hash'):
297 |                     value = dict((k, v.to_dict()) for k, v in iteritems(value))
298 | 
299 |                 # serialize single values
300 |                 else:
301 |                     value = value.to_dict()
302 | 
303 |             # serialize anything with to_dict method
304 |             elif hasattr(value, 'to_dict'):
305 |                 value = value.to_dict()
306 | 
307 |             d[pname] = value
308 |         return {self.name: d}
309 | 
310 |     def _clone(self):
311 |         return self._type_shortcut(self.to_dict())
312 | 
313 |     def __add__(self, other):
314 |         # make sure we give queries that know how to combine themselves
315 |         # preference
316 |         if hasattr(other, '__radd__'):
317 |             return other.__radd__(self)
318 |         return self._bool(must=[self, other])
319 | 
320 |     def __invert__(self):
321 |         return self._bool(must_not=[self])
322 | 
323 |     def __or__(self, other):
324 |         # make sure we give queries that know how to combine themselves
325 |         # preference
326 |         if hasattr(other, '__ror__'):
327 |             return other.__ror__(self)
328 |         return self._bool(should=[self, other])
329 | 
330 |     def __and__(self, other):
331 |         # make sure we give queries that know how to combine themselves
332 |         # preference
333 |         if hasattr(other, '__rand__'):
334 |             return other.__rand__(self)
335 |         return self._bool(must=[self, other])
336 | 
337 | 
338 | class BoolMixin(object):
339 |     """
340 |     Mixin containing all the operator overrides for Bool queries and filters.
341 | 
342 |     Except for and where should behavior differs
343 |     """
344 |     def __add__(self, other):
345 |         q = self._clone()
346 |         if isinstance(other, self.__class__):
347 |             q.must += other.must
348 |             q.should += other.should
349 |             q.must_not += other.must_not
350 |         else:
351 |             q.must.append(other)
352 |         return q
353 |     __radd__ = __add__
354 | 
355 |     def __or__(self, other):
356 |         if not (self.must or self.must_not):
357 |             # TODO: if only 1 in must or should, append the query instead of other
358 |             q = self._clone()
359 |             q.should.append(other)
360 |             return q
361 | 
362 |         elif isinstance(other, self.__class__) and not (other.must or other.must_not):
363 |             # TODO: if only 1 in must or should, append the query instead of self
364 |             q = other._clone()
365 |             q.should.append(self)
366 |             return q
367 | 
368 |         return self.__class__(should=[self, other])
369 |     __ror__ = __or__
370 | 
371 |     def __invert__(self):
372 |         # special case for single negated query
373 |         if not (self.must or self.should) and len(self.must_not) == 1:
374 |             return self.must_not[0]._clone()
375 | 
376 |         # bol without should, just flip must and must_not
377 |         elif not self.should:
378 |             q = self._clone()
379 |             q.must, q.must_not = q.must_not, q.must
380 |             return q
381 | 
382 |         # TODO: should -> must_not.append(self.__class__(should=self.should)) ??
383 |         # queries with should just invert normally
384 |         return super(BoolMixin, self).__invert__()
385 | 
386 | 
387 | class ObjectBase(AttrDict):
388 |     def __init__(self, **kwargs):
389 |         m = self._doc_type.mapping
390 |         for k in m:
391 |             if k in kwargs and m[k]._coerce:
392 |                 kwargs[k] = m[k].to_python(kwargs[k])
393 |         super(ObjectBase, self).__init__(kwargs)
394 | 
395 |     def __getattr__(self, name):
396 |         try:
397 |             return super(ObjectBase, self).__getattr__(name)
398 |         except AttributeError:
399 |             if name in self._doc_type.mapping:
400 |                 f = self._doc_type.mapping[name]
401 |                 if hasattr(f, 'empty'):
402 |                     value = f.empty()
403 |                     if value not in SKIP_VALUES:
404 |                         setattr(self, name, value)
405 |                         value = getattr(self, name)
406 |                     return value
407 |             raise
408 | 
409 |     def __setattr__(self, name, value):
410 |         if name in self._doc_type.mapping:
411 |             value = self._doc_type.mapping[name].to_python(value)
412 |         super(ObjectBase, self).__setattr__(name, value)
413 | 
414 |     def to_dict(self):
415 |         out = {}
416 |         for k, v in iteritems(self._d_):
417 |             if isinstance(v, (AttrList, list, tuple)):
418 |                 v = [i.to_dict() if hasattr(i, 'to_dict') else i for i in v]
419 |             else:
420 |                 v = v.to_dict() if hasattr(v, 'to_dict') else v
421 | 
422 |             # don't serialize empty values
423 |             # careful not to include numeric zeros
424 |             if v in ([], {}, None):
425 |                 continue
426 | 
427 |             out[k] = v
428 |         return out
429 | 
430 |     def clean_fields(self):
431 |         errors = {}
432 |         for name in self._doc_type.mapping:
433 |             field = self._doc_type.mapping[name]
434 |             data = self._d_.get(name, None)
435 |             try:
436 |                 # save the cleaned value
437 |                 self._d_[name] = field.clean(data)
438 |             except ValidationException as e:
439 |                 errors.setdefault(name, []).append(e)
440 | 
441 |         if errors:
442 |             raise ValidationException(errors)
443 | 
444 |     def clean(self):
445 |         pass
446 | 
447 |     def full_clean(self):
448 |         self.clean_fields()
449 |         self.clean()
450 | 
451 | def merge(data, new_data):
452 |     if not (isinstance(data, (AttrDict, dict))
453 |             and isinstance(new_data, (AttrDict, dict))):
454 |         raise ValueError('You can only merge two dicts! Got %r and %r instead.' % (data, new_data))
455 | 
456 |     for key, value in iteritems(new_data):
457 |         if key in data and isinstance(data[key], (AttrDict, dict)):
458 |             merge(data[key], value)
459 |         else:
460 |             data[key] = value
461 | 
462 | 
463 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | #from distutils.core import setup, Command
 4 | from setuptools import setup,find_packages
 5 | import os
 6 | import os.path
 7 | 
 8 | def read(fname):
 9 |     return open(os.path.join(os.path.dirname(__file__), fname)).read()
10 | 
11 | setup(
12 |     name='elasticsearch_parse',
13 |     version='2.0',
14 |     description='elasticsearch parse',
15 |     long_description=open('README.md').read(),
16 |     keywords = ["elasticsearch_parse","fengyun"],
17 |     url='http://xiaorui.cc',
18 |     author='ruifengyun',
19 |     author_email='rfyiamcool@163.com',
20 |     install_requires=['elasticsearch-dsl'],
21 |     packages=['elasticsearch_parse'],
22 |     license = "MIT",
23 |     classifiers = [
24 |         'Development Status :: 2 - Pre-Alpha',
25 |         'Intended Audience :: Developers',
26 |         'License :: OSI Approved :: MIT License',
27 |         'Programming Language :: Python :: 2.6',
28 |         'Programming Language :: Python :: 2.7',
29 |         'Programming Language :: Python :: 3.0',
30 |         'Topic :: Software Development :: Libraries :: Python Modules',
31 |             ]
32 | )
33 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | from elasticsearch_parse import Search, Q
 2 | 
 3 | 
 4 | s = Search(index="my-index") \
 5 |     .filter("term", blog="xiaorui.cc") \
 6 |     .query("match", author="ruifengyun")   \
 7 |     .query(~Q("match", face="good"))
 8 | 
 9 | s.aggs.bucket('per_tag', 'terms', field='tags')
10 | 
11 | print s.execute()
12 | 


--------------------------------------------------------------------------------