├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.rst ├── djangosphinx ├── __init__.py ├── admin.py ├── apis │ ├── __init__.py │ ├── api263 │ │ ├── __init__.py │ │ └── templates │ │ │ ├── index-multiple.conf │ │ │ ├── index.conf │ │ │ ├── source-multiple.conf │ │ │ └── source.conf │ ├── api275 │ │ └── __init__.py │ ├── api278 │ │ └── __init__.py │ └── current.py ├── config.py ├── constants.py ├── management │ ├── __init__.py │ └── commands │ │ ├── __init__.py │ │ └── generate_sphinx_config.py ├── manager.py ├── models.py ├── templates │ ├── index-multiple.conf │ ├── index.conf │ ├── source-multiple.conf │ ├── source.conf │ └── sphinx.conf └── utils │ ├── __init__.py │ └── config.py ├── docs ├── Makefile ├── conf.py ├── index.rst └── make.bat ├── setup.py └── sphinxtest ├── __init__.py ├── manage.py ├── settings.py ├── tests ├── __init__.py ├── models.py └── views.py └── urls.py /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | dist/ 3 | *.pyc 4 | django_sphinx.egg-info/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009 David Cramer and individual contributors. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 7 | 8 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 9 | 10 | 3. Neither the name of the django-sphinx nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 11 | 12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include setup.py README.rst MANIFEST.in 2 | recursive-include djangosphinx/templates *.conf 3 | global-exclude *~ -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | **This project is no longer maintained** 2 | 3 | This is a layer that functions much like the Django ORM does except it works on top of the Sphinx (http://www.sphinxsearch.com) full-text search engine. 4 | 5 | Please Note: You will need to create your own sphinx indexes and install sphinx on your server to use this app. 6 | 7 | *There will no longer be release packages available. Please use SVN to checkout the latest trunk version, as it should always be stable and current.* 8 | 9 | Installation 10 | ------------ 11 | 12 | To install the latest stable version:: 13 | 14 | sudo easy_install django-sphinx 15 | 16 | To install the latest development version (updated quite often):: 17 | 18 | git clone git://github.com/dcramer/django-sphinx.git 19 | cd django-sphinx 20 | sudo python setup.py install 21 | 22 | *Note:* You will need to install the `sphinxapi.py` package into your Python Path or use one of the included versions. To use the included version, you must specify the following in your `settings.py` file:: 23 | 24 | # Sphinx 0.9.9 25 | SPHINX_API_VERSION = 0x116 26 | 27 | # Sphinx 0.9.8 28 | SPHINX_API_VERSION = 0x113 29 | 30 | # Sphinx 0.9.7 31 | SPHINX_API_VERSION = 0x107 32 | 33 | Usage 34 | ----- 35 | 36 | The following is some example usage:: 37 | 38 | from djangosphinx.models import SphinxSearch 39 | 40 | class MyModel(models.Model): 41 | search = SphinxSearch() # optional: defaults to db_table 42 | # If your index name does not match MyModel._meta.db_table 43 | # Note: You can only generate automatic configurations from the ./manage.py script 44 | # if your index name matches. 45 | search = SphinxSearch('index_name') 46 | 47 | # Or maybe we want to be more.. specific 48 | searchdelta = SphinxSearch( 49 | index='index_name delta_name', 50 | weights={ 51 | 'name': 100, 52 | 'description': 10, 53 | 'tags': 80, 54 | }, 55 | mode='SPH_MATCH_ALL', 56 | rankmode='SPH_RANK_NONE', 57 | ) 58 | 59 | queryset = MyModel.search.query('query') 60 | results1 = queryset.order_by('@weight', '@id', 'my_attribute') 61 | results2 = queryset.filter(my_attribute=5) 62 | results3 = queryset.filter(my_other_attribute=[5, 3,4]) 63 | results4 = queryset.exclude(my_attribute=5)[0:10] 64 | results5 = queryset.count() 65 | 66 | # as of 2.0 you can now access an attribute to get the weight and similar arguments 67 | for result in results1: 68 | print result, result._sphinx 69 | # you can also access a similar set of meta data on the queryset itself (once it's been sliced or executed in any way) 70 | print results1._sphinx 71 | 72 | 73 | Some additional methods: 74 | * count() 75 | * extra() (passed to the queryset) 76 | * all() (does nothing) 77 | * select_related() (passed to the queryset) 78 | * group_by(field, field, field) 79 | * set_options(index='', weights={}, weights=[], mode='SPH_MODE_*', rankmode='SPH_MATCH_*') 80 | 81 | The django-sphinx layer also supports some basic querying over multiple indexes. To use this you first need to understand the rules of a UNION. Your indexes must contain exactly the same fields. These fields must also include a `content_type` selection which should be the content_type id associated with that table (model). 82 | 83 | You can then do something like this:: 84 | 85 | from djangosphinx.models import SphinxSearch 86 | 87 | SphinxSearch('index1 index2 index3').query('hello') 88 | 89 | This will return a list of all matches, ordered by weight, from all indexes. This performs one SQL query per index with matches in it, as Django's ORM does not support SQL UNION. 90 | 91 | Config Generation 92 | ----------------- 93 | 94 | django-sphinx now includes a tool to create sample configuration for your models. It will generate both a source, and index configuration for a model class. You will still need to manually tweak the output, and insert it into your configuration, but it should aid in initial setup. 95 | 96 | To use it:: 97 | 98 | from djangosphinx.utils import * 99 | 100 | from myproject.myapp.models import MyModel 101 | 102 | output = generate_config_for_model(MyModel) 103 | 104 | print output 105 | 106 | If you have multiple models which you wish to use the UNION searching:: 107 | 108 | model_classes = (ModelOne, ModelTwoWhichResemblesModelOne) 109 | 110 | output = generate_config_for_models(model_classes) 111 | 112 | You can also now output configuration from the command line:: 113 | 114 | ./manage.py generate_sphinx_config 115 | 116 | This will loop through all models in and attempt to find any with a SphinxSearch instance that is using the default index name (db_table). 117 | 118 | Using the Config Generator 119 | -------------------------- 120 | 121 | *New in 2.2* 122 | 123 | django-sphinx now includes a simply python script to generate a config using your default template renderer. By default, we mean that if `coffin` is included in your INSTALLED_APPS, it uses it, otherwise it uses Django. 124 | 125 | Two variables directly relate to the config generation: 126 | 127 | # The base path for sphinx files. Sub directories will include data, log, and run. 128 | SPHINX_ROOT = '/var/sphinx-search/' 129 | 130 | # Optional, defaults to 'conf/sphinx.html'. This should be configuration template. 131 | # See the included templates/sphinx.conf for an example. 132 | SPHINX_CONFIG_TEMPLATE = 'conf/sphinx.html' 133 | 134 | Once done, your config can be passed via any sphinx command like so: 135 | 136 | # Index your stuff 137 | DJANGO_SETTINGS_MODULE=myproject.settings indexer --config /path/to/djangosphinx/config.py --all --rotate 138 | 139 | # Start the daemon 140 | DJANGO_SETTINGS_MODULE=myproject.settings searchd --config /path/to/djangosphinx/config.py 141 | 142 | # Query the daemon 143 | DJANGO_SETTINGS_MODULE=myproject.settings search --config /path/to/djangosphinx/config.py my query 144 | 145 | # Kill the daemon 146 | kill -9 $(cat /var/sphinx-search/run/searchd.pid) 147 | 148 | For now, we recommend you setup some basic bash aliases or scripts to deal with this. This is just the first step in embedded config generation, so stay tuned! 149 | 150 | * Note: Make sure your PYTHON_PATH is setup properly! 151 | 152 | Using Sphinx in Admin 153 | --------------------- 154 | 155 | Sphinx includes it's own ModelAdmin class to allow you to use it with Django's built-in admin app. 156 | 157 | To use it, see the following example:: 158 | 159 | from djangosphinx.admin import SphinxModelAdmin 160 | 161 | class MyAdmin(SphinxModelAdmin): 162 | index = 'my_index_name' # defaults to Model._meta.db_table 163 | weights = {'field': 100} 164 | 165 | Limitations? You know it. 166 | 167 | - Only shows your max sphinx results (defaults to 1000) 168 | - Filters currently don't work. 169 | - This is a huge hack, so it may or may not continue working when Django updates. 170 | 171 | Frequent Questions 172 | ------------------ 173 | 174 | *How do I run multiple copies of Sphinx using django-sphinx?* 175 | 176 | The easiest way is to just run a different SPHINX_PORT setting in your settings.py. If you are using the above config generation, just modify the PORT, and start up the daemon 177 | 178 | Resources 179 | --------- 180 | 181 | * http://groups.google.com/group/django-sphinx 182 | * http://www.davidcramer.net/code/65/setting-up-django-with-sphinx.html 183 | -------------------------------------------------------------------------------- /djangosphinx/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Sphinx Search Engine ORM for Django models 3 | http://www.sphinxsearch.com/ 4 | Developed and maintained David Cramer 5 | 6 | To add a search manager to your model: 7 | 8 | search = SphinxSearch([index=, weight=[,], mode=]) 9 | 10 | 11 | To query the engine and retrieve objects: 12 | 13 | MyModel.search.query('my string') 14 | 15 | 16 | To use multiple index support, you need to define a "content_type" field in your SQL 17 | clause. Each index also needs to have the exact same field's. The rules are almost identical 18 | to that of an SQL UNION query. 19 | 20 | SELECT id, name, 1 as content_type FROM model_myapp 21 | SELECT id, name, 2 as content_type FROM model_myotherapp 22 | search_results = SphinxSearch() 23 | search_results.on_index('model_myapp model_myotherapp') 24 | search_results.query('hello') 25 | 26 | 27 | default settings.py values 28 | 29 | SPHINX_SERVER = 'localhost' 30 | SPHINX_PORT = 3312 31 | 32 | """ 33 | import warnings 34 | import os.path 35 | 36 | __version__ = (2, 2, 4) 37 | 38 | def _get_git_revision(path): 39 | revision_file = os.path.join(path, 'refs', 'heads', 'master') 40 | if not os.path.exists(revision_file): 41 | return None 42 | fh = open(revision_file, 'r') 43 | try: 44 | return fh.read() 45 | finally: 46 | fh.close() 47 | 48 | def get_revision(): 49 | """ 50 | :returns: Revision number of this branch/checkout, if available. None if 51 | no revision number can be determined. 52 | """ 53 | package_dir = os.path.dirname(__file__) 54 | checkout_dir = os.path.normpath(os.path.join(package_dir, '..')) 55 | path = os.path.join(checkout_dir, '.git') 56 | if os.path.exists(path): 57 | return _get_git_revision(path) 58 | return None 59 | 60 | __build__ = get_revision() 61 | 62 | def lazy_object(location): 63 | def inner(*args, **kwargs): 64 | parts = location.rsplit('.', 1) 65 | warnings.warn('`djangosphinx.%s` is deprecated. Please use `%s` instead.' % (parts[1], location), DeprecationWarning) 66 | imp = __import__(parts[0], globals(), locals(), [parts[1]], -1) 67 | func = getattr(imp, parts[1]) 68 | if callable(func): 69 | return func(*args, **kwargs) 70 | return func 71 | return inner 72 | 73 | SphinxSearch = lazy_object('djangosphinx.models.SphinxSearch') 74 | SphinxQuerySet = lazy_object('djangosphinx.models.SphinxQuerySet') 75 | generate_config_for_model = lazy_object('djangosphinx.utils.generate_config_for_model') 76 | generate_config_for_models = lazy_object('djangosphinx.utils.generate_config_for_models') -------------------------------------------------------------------------------- /djangosphinx/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib.admin.views.main import * 2 | from django.contrib.admin import ModelAdmin 3 | from djangosphinx.models import SphinxQuerySet 4 | 5 | class SphinxModelAdmin(ModelAdmin): 6 | index = None 7 | weights = None 8 | # This is a hack 9 | search_fields = ['pk'] 10 | actions = None 11 | 12 | def queryset(self, request): 13 | return SphinxQuerySet( 14 | model=self.model, 15 | index=self.index, 16 | ) 17 | 18 | def get_changelist(self, request, **kwargs): 19 | return SphinxChangeList 20 | 21 | class SphinxChangeList(ChangeList): 22 | def get_query_set(self): 23 | qs = self.root_query_set 24 | lookup_params = self.params.copy() # a dictionary of the query string 25 | for i in (ALL_VAR, ORDER_VAR, ORDER_TYPE_VAR, SEARCH_VAR, IS_POPUP_VAR): 26 | if i in lookup_params: 27 | del lookup_params[i] 28 | for key, value in lookup_params.items(): 29 | if not isinstance(key, str): 30 | # 'key' will be used as a keyword argument later, so Python 31 | # requires it to be a string. 32 | del lookup_params[key] 33 | lookup_params[smart_str(key)] = value 34 | 35 | # if key ends with __in, split parameter into separate values 36 | if key.endswith('__in'): 37 | lookup_params[key] = value.split(',') 38 | 39 | # Apply lookup parameters from the query string. 40 | try: 41 | qs = qs.filter(**lookup_params) 42 | # Naked except! Because we don't have any other way of validating "params". 43 | # They might be invalid if the keyword arguments are incorrect, or if the 44 | # values are not in the correct type, so we might get FieldError, ValueError, 45 | # ValicationError, or ? from a custom field that raises yet something else 46 | # when handed impossible data. 47 | except: 48 | raise IncorrectLookupParameters 49 | 50 | # Use select_related() if one of the list_display options is a field 51 | # with a relationship and the provided queryset doesn't already have 52 | # select_related defined. 53 | if not qs._select_related: 54 | if self.list_select_related: 55 | qs = qs.select_related() 56 | else: 57 | for field_name in self.list_display: 58 | try: 59 | f = self.lookup_opts.get_field(field_name) 60 | except models.FieldDoesNotExist: 61 | pass 62 | else: 63 | if isinstance(f.rel, models.ManyToOneRel): 64 | qs = qs.select_related() 65 | break 66 | 67 | # Set ordering. 68 | if self.order_field: 69 | qs = qs.order_by('%s%s' % ((self.order_type == 'desc' and '-' or ''), self.order_field)) 70 | 71 | if self.query: 72 | qs = qs.query(self.query) 73 | 74 | if not (lookup_params or self.query): 75 | # We don't show bare result sets in Sphinx 76 | return qs.none() 77 | 78 | return qs 79 | 80 | def get_results(self, request): 81 | paginator = Paginator(self.query_set, self.list_per_page) 82 | # Get the number of objects, with admin filters applied. 83 | result_count = paginator.count 84 | 85 | multi_page = result_count > self.list_per_page 86 | 87 | # Get the list of objects to display on this page. 88 | try: 89 | result_list = paginator.page(self.page_num+1).object_list 90 | except InvalidPage: 91 | result_list = () 92 | 93 | self.full_result_count = result_count 94 | self.result_count = result_count 95 | self.result_list = result_list 96 | self.can_show_all = False 97 | self.multi_page = multi_page 98 | self.paginator = paginator -------------------------------------------------------------------------------- /djangosphinx/apis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dcramer/django-sphinx/0071d1cae5390d0ec8c669786ca3c7275abb6410/djangosphinx/apis/__init__.py -------------------------------------------------------------------------------- /djangosphinx/apis/api263/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # $Id: sphinxapi.py,v 1.7 2007/04/01 21:38:13 shodan Exp $ 3 | # 4 | # Python version of Sphinx searchd client (Python API) 5 | # 6 | # Copyright (c) 2006-2007, Andrew Aksyonoff 7 | # Copyright (c) 2006, Mike Osadnik 8 | # All rights reserved 9 | # 10 | # This program is free software; you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License. You should have 12 | # received a copy of the GPL license along with this program; if you 13 | # did not, you can find it at http://www.gnu.org/ 14 | # 15 | 16 | import select 17 | import socket 18 | from struct import * 19 | 20 | 21 | # known searchd commands 22 | SEARCHD_COMMAND_SEARCH = 0 23 | SEARCHD_COMMAND_EXCERPT = 1 24 | 25 | # current client-side command implementation versions 26 | VER_COMMAND_SEARCH = 0x107 27 | VER_COMMAND_EXCERPT = 0x100 28 | 29 | # known searchd status codes 30 | SEARCHD_OK = 0 31 | SEARCHD_ERROR = 1 32 | SEARCHD_RETRY = 2 33 | SEARCHD_WARNING = 3 34 | 35 | # known match modes 36 | SPH_MATCH_ALL = 0 37 | SPH_MATCH_ANY = 1 38 | SPH_MATCH_PHRASE = 2 39 | SPH_MATCH_BOOLEAN = 3 40 | SPH_MATCH_EXTENDED = 4 41 | 42 | # known sort modes 43 | SPH_SORT_RELEVANCE = 0 44 | SPH_SORT_ATTR_DESC = 1 45 | SPH_SORT_ATTR_ASC = 2 46 | SPH_SORT_TIME_SEGMENTS = 3 47 | SPH_SORT_EXTENDED = 4 48 | 49 | # known attribute types 50 | SPH_ATTR_INTEGER = 1 51 | SPH_ATTR_TIMESTAMP = 2 52 | 53 | # known grouping functions 54 | SPH_GROUPBY_DAY = 0 55 | SPH_GROUPBY_WEEK = 1 56 | SPH_GROUPBY_MONTH = 2 57 | SPH_GROUPBY_YEAR = 3 58 | SPH_GROUPBY_ATTR = 4 59 | 60 | class SphinxClient: 61 | _host = 'localhost' # searchd host (default is "localhost") 62 | _port = 3312 # searchd port (default is 3312) 63 | _offset = 0 # how much records to seek from result-set start (default is 0) 64 | _limit = 20 # how much records to return from result-set starting at offset (default is 20) 65 | _mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL) 66 | _weights = [] # per-field weights (default is 1 for all fields) 67 | _sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE) 68 | _sortby = '' # attribute to sort by (defualt is "") 69 | _min_id = 0 # min ID to match (default is 0) 70 | _max_id = 0xFFFFFFFF # max ID to match (default is UINT_MAX) 71 | _filters = [] # search filters 72 | _groupby = '' # group-by attribute name 73 | _groupfunc = SPH_GROUPBY_DAY # group-by function (to pre-process group-by attribute value with) 74 | _groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with) 75 | _maxmatches = 1000 # max matches to retrieve 76 | _error = '' # last error message 77 | _warning = '' # last warning message 78 | 79 | 80 | def __init__ (self): 81 | """ 82 | create a new client object and fill defaults 83 | """ 84 | pass 85 | 86 | 87 | def GetLastError (self): 88 | """ 89 | get last error message (string) 90 | """ 91 | return self._error 92 | 93 | 94 | def GetLastWarning (self): 95 | """ 96 | get last warning message (string) 97 | """ 98 | return self._warning 99 | 100 | 101 | def SetServer (self, host, port): 102 | """ 103 | set searchd server 104 | """ 105 | assert(isinstance(host, str)) 106 | assert(isinstance(port, int)) 107 | 108 | self._host = host 109 | self._port = port 110 | 111 | 112 | def _Connect (self): 113 | """ 114 | connect to searchd server 115 | """ 116 | try: 117 | sock = socket.socket ( socket.AF_INET, socket.SOCK_STREAM ) 118 | sock.connect ( ( self._host, self._port ) ) 119 | except socket.error, msg: 120 | if sock: 121 | sock.close() 122 | self._error = 'connection to %s:%s failed (%s)' % ( self._host, self._port, msg ) 123 | return 0 124 | 125 | v = unpack('>L', sock.recv(4)) 126 | if v<1: 127 | sock.close() 128 | self._error = 'expected searchd protocol version, got %s' % v 129 | return 0 130 | 131 | # all ok, send my version 132 | sock.send(pack('>L', 1)) 133 | return sock 134 | 135 | 136 | def _GetResponse (self, sock, client_ver): 137 | """ 138 | get and check response packet from searchd server 139 | """ 140 | (status, ver, length) = unpack('>2HL', sock.recv(8)) 141 | response = '' 142 | left = length 143 | while left>0: 144 | chunk = sock.recv(left) 145 | if chunk: 146 | response += chunk 147 | left -= len(chunk) 148 | else: 149 | break 150 | 151 | sock.close() 152 | 153 | # check response 154 | read = len(response) 155 | if not response or read!=length: 156 | if length: 157 | self._error = 'failed to read searchd response (status=%s, ver=%s, len=%s, read=%s)' \ 158 | % (status, ver, length, read) 159 | else: 160 | self._error = 'received zero-sized searchd response' 161 | return None 162 | 163 | # check status 164 | if status==SEARCHD_WARNING: 165 | wend = 4 + unpack ( '>L', response[0:4] )[0] 166 | self._warning = response[4:wend] 167 | return response[wend:] 168 | 169 | if status==SEARCHD_ERROR: 170 | self._error = 'searchd error: '+response[4:] 171 | return None 172 | 173 | if status==SEARCHD_RETRY: 174 | self._error = 'temporary searchd error: '+response[4:] 175 | return None 176 | 177 | if status!=SEARCHD_OK: 178 | self._error = 'unknown status code %d' % status 179 | return None 180 | 181 | # check version 182 | if ver>8, ver&0xff, client_ver>>8, client_ver&0xff) 185 | 186 | return response 187 | 188 | 189 | def SetLimits (self, offset, limit, maxmatches=0): 190 | """ 191 | set match offset, count, and max number to retrieve 192 | """ 193 | assert(isinstance(offset, int) and offset>=0) 194 | assert(isinstance(limit, int) and limit>0) 195 | assert(maxmatches>=0) 196 | self._offset = offset 197 | self._limit = limit 198 | if maxmatches>0: 199 | self._maxmatches = maxmatches 200 | 201 | 202 | def SetMatchMode (self, mode): 203 | """ 204 | set match mode 205 | """ 206 | assert(mode in [SPH_MATCH_ALL, SPH_MATCH_ANY, SPH_MATCH_PHRASE, SPH_MATCH_BOOLEAN, SPH_MATCH_EXTENDED]) 207 | self._mode = mode 208 | 209 | 210 | def SetSortMode ( self, mode, clause='' ): 211 | """ 212 | set sort mode 213 | """ 214 | assert ( mode in [SPH_SORT_RELEVANCE, SPH_SORT_ATTR_DESC, SPH_SORT_ATTR_ASC, SPH_SORT_TIME_SEGMENTS, SPH_SORT_EXTENDED] ) 215 | assert ( isinstance ( clause, str ) ) 216 | self._sort = mode 217 | self._sortby = clause 218 | 219 | 220 | def SetWeights (self, weights): 221 | """ 222 | set per-field weights 223 | """ 224 | assert(isinstance(weights, list)) 225 | for w in weights: 226 | assert(isinstance(w, int)) 227 | self._weights = weights 228 | 229 | 230 | def SetIDRange (self, minid, maxid): 231 | """ 232 | set IDs range to match 233 | only match those records where document ID 234 | is beetwen minid and maxid (including minid and maxid) 235 | """ 236 | assert(isinstance(minid, int)) 237 | assert(isinstance(maxid, int)) 238 | assert(minid<=maxid) 239 | self._min_id = minid 240 | self._max_id = maxid 241 | 242 | 243 | def SetFilter ( self, attribute, values, exclude=0 ): 244 | """ 245 | set values filter 246 | only match those records where $attribute column values 247 | are in specified set 248 | """ 249 | assert(isinstance(attribute, str)) 250 | assert(isinstance(values, list)) 251 | assert(values) 252 | 253 | values = map(int, values) 254 | 255 | self._filters.append ( { 'attr':attribute, 'exclude':exclude, 'values':values } ) 256 | 257 | 258 | def SetFilterRange (self, attribute, min_, max_, exclude=0 ): 259 | """ 260 | set range filter 261 | only match those records where $attribute column value 262 | is beetwen $min and $max (including $min and $max) 263 | """ 264 | assert(isinstance(attribute, str)) 265 | assert(isinstance(min_, int)) 266 | assert(isinstance(max_, int)) 267 | assert(min_<=max_) 268 | 269 | self._filters.append ( { 'attr':attribute, 'exclude':exclude, 'min':min_, 'max':max_ } ) 270 | 271 | 272 | def SetGroupBy ( self, attribute, func, groupsort='@group desc' ): 273 | """ 274 | set grouping attribute and function 275 | 276 | in grouping mode, all matches are assigned to different groups 277 | based on grouping function value. 278 | 279 | each group keeps track of the total match count, and the best match 280 | (in this group) according to current sorting function. 281 | 282 | the final result set contains one best match per group, with 283 | grouping function value and matches count attached. 284 | 285 | groups in result set could be sorted by any sorting clause, 286 | including both document attributes and the following special 287 | internal Sphinx attributes: 288 | 289 | - @id - match document ID; 290 | - @weight, @rank, @relevance - match weight; 291 | - @group - groupby function value; 292 | - @count - amount of matches in group. 293 | 294 | the default mode is to sort by groupby value in descending order, 295 | ie. by "@group desc". 296 | 297 | "total_found" would contain total amount of matching groups over 298 | the whole index. 299 | 300 | WARNING: grouping is done in fixed memory and thus its results 301 | are only approximate; so there might be more groups reported 302 | in total_found than actually present. @count might also 303 | be underestimated. 304 | 305 | for example, if sorting by relevance and grouping by "published" 306 | attribute with SPH_GROUPBY_DAY function, then the result set will 307 | contain one most relevant match per each day when there were any 308 | matches published, with day number and per-day match count attached, 309 | and sorted by day number in descending order (ie. recent days first). 310 | """ 311 | assert(isinstance(attribute, str)) 312 | assert(func in [SPH_GROUPBY_DAY, SPH_GROUPBY_WEEK, SPH_GROUPBY_MONTH, SPH_GROUPBY_YEAR, SPH_GROUPBY_ATTR] ) 313 | assert(isinstance(groupsort, str)) 314 | 315 | self._groupby = attribute 316 | self._groupfunc = func 317 | self._groupsort = groupsort 318 | 319 | 320 | def Query (self, query, index='*'): 321 | """ 322 | connect to searchd server and run given search query 323 | 324 | "query" is query string 325 | "index" is index name to query, default is "*" which means to query all indexes 326 | 327 | returns false on failure 328 | returns hash which has the following keys on success: 329 | "matches" 330 | an array of found matches represented as ( "id", "weight", "attrs" ) hashes 331 | "total" 332 | total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h) 333 | "total_found" 334 | total amount of matching documents in index 335 | "time" 336 | search time 337 | "words" 338 | an array of ( "word", "docs", "hits" ) hashes which contains 339 | docs and hits count for stemmed (!) query words 340 | """ 341 | sock = self._Connect() 342 | if not sock: 343 | return {} 344 | 345 | # build request 346 | req = [pack('>4L', self._offset, self._limit, self._mode, self._sort)] 347 | 348 | req.append(pack('>L', len(self._sortby))) 349 | req.append(self._sortby) 350 | 351 | req.append(pack('>L', len(query))) 352 | req.append(query) 353 | 354 | req.append(pack('>L', len(self._weights))) 355 | for w in self._weights: 356 | req.append(pack('>L', w)) 357 | 358 | req.append(pack('>L', len(index))) 359 | req.append(index) 360 | req.append(pack('>L', self._min_id)) 361 | req.append(pack('>L', self._max_id)) 362 | 363 | # filters 364 | req.append ( pack ( '>L', len(self._filters) ) ) 365 | for f in self._filters: 366 | req.append ( pack ( '>L', len(f['attr']) ) ) 367 | req.append ( f['attr'] ) 368 | if ( 'values' in f ): 369 | req.append ( pack ( '>L', len(f['values']) ) ) 370 | for v in f['values']: 371 | req.append ( pack ( '>L', v ) ) 372 | else: 373 | req.append ( pack ( '>3L', 0, f['min'], f['max'] ) ) 374 | req.append ( pack ( '>L', f['exclude'] ) ) 375 | 376 | # group-by, max-matches, group-sort 377 | req.append ( pack ( '>2L', self._groupfunc, len(self._groupby) ) ) 378 | req.append ( self._groupby ) 379 | req.append ( pack ( '>2L', self._maxmatches, len(self._groupsort) ) ) 380 | req.append ( self._groupsort ) 381 | 382 | # send query, get response 383 | req = ''.join(req) 384 | 385 | length = len(req) 386 | req = pack('>2HL', SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, length)+req 387 | sock.send(req) 388 | response = self._GetResponse(sock, VER_COMMAND_SEARCH) 389 | if not response: 390 | return {} 391 | 392 | # parse response 393 | result = {} 394 | max_ = len(response) 395 | 396 | # read schema 397 | p = 0 398 | fields = [] 399 | attrs = [] 400 | 401 | nfields = unpack('>L', response[p:p+4])[0] 402 | p += 4 403 | while nfields>0 and pL', response[p:p+4])[0] 406 | p += 4 407 | fields.append(response[p:p+length]) 408 | p += length 409 | 410 | result['fields'] = fields 411 | 412 | nattrs = unpack('>L', response[p:p+4])[0] 413 | p += 4 414 | while nattrs>0 and pL', response[p:p+4])[0] 417 | p += 4 418 | attr = response[p:p+length] 419 | p += length 420 | type_ = unpack('>L', response[p:p+4])[0] 421 | p += 4 422 | attrs.append([attr,type_]) 423 | 424 | result['attrs'] = attrs 425 | 426 | # read match count 427 | count = unpack('>L', response[p:p+4])[0] 428 | p += 4 429 | 430 | # read matches 431 | result['matches'] = [] 432 | while count>0 and p2L', response[p:p+8]) 435 | p += 8 436 | 437 | match = { 'id':doc, 'weight':weight, 'attrs':{} } 438 | for i in range(len(attrs)): 439 | match['attrs'][attrs[i][0]] = unpack('>L', response[p:p+4])[0] 440 | p += 4 441 | 442 | result['matches'].append ( match ) 443 | 444 | result['total'], result['total_found'], result['time'], words = \ 445 | unpack('>4L', response[p:p+16]) 446 | 447 | result['time'] = '%.3f' % (result['time']/1000.0) 448 | p += 16 449 | 450 | result['words'] = [] 451 | while words>0: 452 | words -= 1 453 | length = unpack('>L', response[p:p+4])[0] 454 | p += 4 455 | word = response[p:p+length] 456 | p += length 457 | docs, hits = unpack('>2L', response[p:p+8]) 458 | p += 8 459 | 460 | result['words'].append({'word':word, 'docs':docs, 'hits':hits}) 461 | 462 | sock.close() 463 | 464 | return result 465 | 466 | 467 | def BuildExcerpts (self, docs, index, words, opts=None): 468 | """ 469 | connect to searchd server and generate exceprts from given documents 470 | 471 | "docs" is an array of strings which represent the documents' contents 472 | "index" is a string specifiying the index which settings will be used 473 | for stemming, lexing and case folding 474 | "words" is a string which contains the words to highlight 475 | "opts" is a hash which contains additional optional highlighting parameters: 476 | "before_match" 477 | a string to insert before a set of matching words, default is "" 478 | "after_match" 479 | a string to insert after a set of matching words, default is "" 480 | "chunk_separator" 481 | a string to insert between excerpts chunks, default is " ... " 482 | "limit" 483 | max excerpt size in symbols (codepoints), default is 256 484 | "around" 485 | how much words to highlight around each match, default is 5 486 | 487 | returns false on failure 488 | returns an array of string excerpts on success 489 | """ 490 | if not opts: 491 | opts = {} 492 | 493 | assert(isinstance(docs, list)) 494 | assert(isinstance(index, str)) 495 | assert(isinstance(words, str)) 496 | assert(isinstance(opts, dict)) 497 | 498 | sock = self._Connect() 499 | 500 | if not sock: 501 | return [] 502 | 503 | # fixup options 504 | opts.setdefault('before_match', '') 505 | opts.setdefault('after_match', '') 506 | opts.setdefault('chunk_separator', ' ... ') 507 | opts.setdefault('limit', 256) 508 | opts.setdefault('around', 5) 509 | 510 | # build request 511 | # v.1.0 req 512 | 513 | # mode=0, flags=1 (remove spaces) 514 | req = [pack('>2L', 0, 1)] 515 | 516 | # req index 517 | req.append(pack('>L', len(index))) 518 | req.append(index) 519 | 520 | # req words 521 | req.append(pack('>L', len(words))) 522 | req.append(words) 523 | 524 | # options 525 | req.append(pack('>L', len(opts['before_match']))) 526 | req.append(opts['before_match']) 527 | 528 | req.append(pack('>L', len(opts['after_match']))) 529 | req.append(opts['after_match']) 530 | 531 | req.append(pack('>L', len(opts['chunk_separator']))) 532 | req.append(opts['chunk_separator']) 533 | 534 | req.append(pack('>L', int(opts['limit']))) 535 | req.append(pack('>L', int(opts['around']))) 536 | 537 | # documents 538 | req.append(pack('>L', len(docs))) 539 | for doc in docs: 540 | assert(isinstance(doc, str)) 541 | req.append(pack('>L', len(doc))) 542 | req.append(doc) 543 | 544 | req = ''.join(req) 545 | 546 | # send query, get response 547 | length = len(req) 548 | 549 | # add header 550 | req = pack('>2HL', SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, length)+req 551 | wrote = sock.send(req) 552 | 553 | response = self._GetResponse(sock, VER_COMMAND_EXCERPT ) 554 | if not response: 555 | return [] 556 | 557 | # parse response 558 | pos = 0 559 | res = [] 560 | rlen = len(response) 561 | 562 | for i in range(len(docs)): 563 | length = unpack('>L', response[pos:pos+4])[0] 564 | pos += 4 565 | 566 | if pos+length > rlen: 567 | self._error = 'incomplete reply' 568 | return [] 569 | 570 | res.append(response[pos:pos+length]) 571 | pos += length 572 | 573 | return res 574 | 575 | # 576 | # $Id: sphinxapi.py,v 1.7 2007/04/01 21:38:13 shodan Exp $ 577 | # 578 | -------------------------------------------------------------------------------- /djangosphinx/apis/api263/templates/index-multiple.conf: -------------------------------------------------------------------------------- 1 | index {{ index_name }} 2 | { 3 | source = {{ source_name }} 4 | path = /var/data/{{ index_name }} 5 | docinfo = extern 6 | morphology = none 7 | stopwords = 8 | min_word_len = 2 9 | charset_type = utf-8 10 | min_prefix_len = 0 11 | min_infix_len = 0 12 | } -------------------------------------------------------------------------------- /djangosphinx/apis/api263/templates/index.conf: -------------------------------------------------------------------------------- 1 | index {{ index_name }} 2 | { 3 | source = {{ source_name }} 4 | path = /var/data/{{ index_name }} 5 | docinfo = extern 6 | morphology = none 7 | stopwords = 8 | min_word_len = 2 9 | charset_type = utf-8 10 | min_prefix_len = 0 11 | min_infix_len = 0 12 | } -------------------------------------------------------------------------------- /djangosphinx/apis/api263/templates/source-multiple.conf: -------------------------------------------------------------------------------- 1 | source {{ source_name }} 2 | { 3 | type = {{ database_engine }} 4 | html_strip = 0 5 | html_index_attrs = 6 | sql_host = {{ database_host }} 7 | sql_user = {{ database_user }} 8 | sql_pass = {{ database_password }} 9 | sql_db = {{ database_name }} 10 | sql_port = {{ database_port }} 11 | 12 | sql_query_pre = 13 | sql_query_post = 14 | sql_query = \ 15 | {% for table_name, content_type in tables %} 16 | SELECT {{ field_names|join:", " }}, {{ content_type.id }} as content_type \ 17 | FROM `{{ table_name }}`{% if not loop.last %} UNION \{% endif %} 18 | {% endfor %} 19 | {% if group_columns %} 20 | # ForeignKey's 21 | {% for field_name in group_columns %} sql_group_column = {{ field_name }} 22 | {% endfor %}{% endif %} 23 | {% if bool_columns %} 24 | # BooleanField's 25 | {% for field_name in bool_columns %} sql_group_column = {{ field_name }} 26 | {% endfor %}{% endif %} 27 | {% if date_columns %} 28 | # DateField's and DateTimeField's 29 | {% for field_name in date_columns %} sql_date_column = {{ field_name }} 30 | {% endfor %}{% endif %} 31 | } -------------------------------------------------------------------------------- /djangosphinx/apis/api263/templates/source.conf: -------------------------------------------------------------------------------- 1 | source {{ source_name }} 2 | { 3 | type = {{ database_engine }} 4 | strip_html = 0 5 | index_html_attrs = 6 | sql_host = {{ database_host }} 7 | sql_user = {{ database_user }} 8 | sql_pass = {{ database_password }} 9 | sql_db = {{ database_name }} 10 | sql_port = {{ database_port }} 11 | log = {{ log_file }} 12 | 13 | sql_query_pre = 14 | sql_query_post = 15 | sql_query = \ 16 | SELECT {{ field_names|join:", " }} \ 17 | FROM {{ table_name }} 18 | sql_query_info = SELECT * FROM `{{ table_name }}` WHERE `{{ primary_key }}` = $id 19 | {% if group_columns %} 20 | # ForeignKey's 21 | {% for field_name in group_columns %} sql_group_column = {{ field_name }} 22 | {% endfor %}{% endif %} 23 | {% if bool_columns %} 24 | # BooleanField's 25 | {% for field_name in bool_columns %} sql_group_column = {{ field_name }} 26 | {% endfor %}{% endif %} 27 | {% if date_columns %} 28 | # DateField's and DateTimeField's 29 | {% for field_name in date_columns %} sql_date_column = {{ field_name }} 30 | {% endfor %}{% endif %} 31 | } -------------------------------------------------------------------------------- /djangosphinx/apis/api275/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # $Id: sphinxapi.py 1216 2008-03-14 23:25:39Z shodan $ 3 | # 4 | # Python version of Sphinx searchd client (Python API) 5 | # 6 | # Copyright (c) 2006-2008, Andrew Aksyonoff 7 | # Copyright (c) 2006, Mike Osadnik 8 | # All rights reserved 9 | # 10 | # This program is free software; you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License. You should have 12 | # received a copy of the GPL license along with this program; if you 13 | # did not, you can find it at http://www.gnu.org/ 14 | # 15 | 16 | import sys 17 | import select 18 | import socket 19 | from struct import * 20 | 21 | 22 | # known searchd commands 23 | SEARCHD_COMMAND_SEARCH = 0 24 | SEARCHD_COMMAND_EXCERPT = 1 25 | SEARCHD_COMMAND_UPDATE = 2 26 | SEARCHD_COMMAND_KEYWORDS= 3 27 | 28 | # current client-side command implementation versions 29 | VER_COMMAND_SEARCH = 0x113 30 | VER_COMMAND_EXCERPT = 0x100 31 | VER_COMMAND_UPDATE = 0x101 32 | VER_COMMAND_KEYWORDS = 0x100 33 | 34 | # known searchd status codes 35 | SEARCHD_OK = 0 36 | SEARCHD_ERROR = 1 37 | SEARCHD_RETRY = 2 38 | SEARCHD_WARNING = 3 39 | 40 | # known match modes 41 | SPH_MATCH_ALL = 0 42 | SPH_MATCH_ANY = 1 43 | SPH_MATCH_PHRASE = 2 44 | SPH_MATCH_BOOLEAN = 3 45 | SPH_MATCH_EXTENDED = 4 46 | SPH_MATCH_FULLSCAN = 5 47 | SPH_MATCH_EXTENDED2 = 6 48 | 49 | # known ranking modes (extended2 mode only) 50 | SPH_RANK_PROXIMITY_BM25 = 0 # default mode, phrase proximity major factor and BM25 minor one 51 | SPH_RANK_BM25 = 1 # statistical mode, BM25 ranking only (faster but worse quality) 52 | SPH_RANK_NONE = 2 # no ranking, all matches get a weight of 1 53 | SPH_RANK_WORDCOUNT = 3 # simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts 54 | 55 | # known sort modes 56 | SPH_SORT_RELEVANCE = 0 57 | SPH_SORT_ATTR_DESC = 1 58 | SPH_SORT_ATTR_ASC = 2 59 | SPH_SORT_TIME_SEGMENTS = 3 60 | SPH_SORT_EXTENDED = 4 61 | SPH_SORT_EXPR = 5 62 | 63 | # known filter types 64 | SPH_FILTER_VALUES = 0 65 | SPH_FILTER_RANGE = 1 66 | SPH_FILTER_FLOATRANGE = 2 67 | 68 | # known attribute types 69 | SPH_ATTR_NONE = 0 70 | SPH_ATTR_INTEGER = 1 71 | SPH_ATTR_TIMESTAMP = 2 72 | SPH_ATTR_ORDINAL = 3 73 | SPH_ATTR_BOOL = 4 74 | SPH_ATTR_FLOAT = 5 75 | SPH_ATTR_MULTI = 0X40000000L 76 | 77 | # known grouping functions 78 | SPH_GROUPBY_DAY = 0 79 | SPH_GROUPBY_WEEK = 1 80 | SPH_GROUPBY_MONTH = 2 81 | SPH_GROUPBY_YEAR = 3 82 | SPH_GROUPBY_ATTR = 4 83 | 84 | 85 | class SphinxClient: 86 | def __init__ (self): 87 | """ 88 | Create a new client object, and fill defaults. 89 | """ 90 | self._host = 'localhost' # searchd host (default is "localhost") 91 | self._port = 3312 # searchd port (default is 3312) 92 | self._offset = 0 # how much records to seek from result-set start (default is 0) 93 | self._limit = 20 # how much records to return from result-set starting at offset (default is 20) 94 | self._mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL) 95 | self._weights = [] # per-field weights (default is 1 for all fields) 96 | self._sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE) 97 | self._sortby = '' # attribute to sort by (defualt is "") 98 | self._min_id = 0 # min ID to match (default is 0) 99 | self._max_id = 0xFFFFFFFF # max ID to match (default is UINT_MAX) 100 | self._filters = [] # search filters 101 | self._groupby = '' # group-by attribute name 102 | self._groupfunc = SPH_GROUPBY_DAY # group-by function (to pre-process group-by attribute value with) 103 | self._groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with) 104 | self._groupdistinct = '' # group-by count-distinct attribute 105 | self._maxmatches = 1000 # max matches to retrieve 106 | self._cutoff = 0 # cutoff to stop searching at 107 | self._retrycount = 0 # distributed retry count 108 | self._retrydelay = 0 # distributed retry delay 109 | self._anchor = {} # geographical anchor point 110 | self._indexweights = {} # per-index weights 111 | self._ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode 112 | self._maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit) 113 | self._fieldweights = {} # per-field-name weights 114 | self._error = '' # last error message 115 | self._warning = '' # last warning message 116 | self._reqs = [] # requests array for multi-query 117 | return 118 | 119 | 120 | def GetLastError (self): 121 | """ 122 | Get last error message (string). 123 | """ 124 | return self._error 125 | 126 | 127 | def GetLastWarning (self): 128 | """ 129 | Get last warning message (string). 130 | """ 131 | return self._warning 132 | 133 | 134 | def SetServer (self, host, port): 135 | """ 136 | Set searchd server host and port. 137 | """ 138 | assert(isinstance(host, str)) 139 | assert(isinstance(port, int)) 140 | self._host = host 141 | self._port = port 142 | 143 | 144 | def _Connect (self): 145 | """ 146 | INTERNAL METHOD, DO NOT CALL. Connects to searchd server. 147 | """ 148 | try: 149 | sock = socket.socket ( socket.AF_INET, socket.SOCK_STREAM ) 150 | sock.connect ( ( self._host, self._port ) ) 151 | except socket.error, msg: 152 | if sock: 153 | sock.close() 154 | self._error = 'connection to %s:%s failed (%s)' % ( self._host, self._port, msg ) 155 | return 0 156 | 157 | v = unpack('>L', sock.recv(4)) 158 | if v<1: 159 | sock.close() 160 | self._error = 'expected searchd protocol version, got %s' % v 161 | return 0 162 | 163 | # all ok, send my version 164 | sock.send(pack('>L', 1)) 165 | return sock 166 | 167 | 168 | def _GetResponse (self, sock, client_ver): 169 | """ 170 | INTERNAL METHOD, DO NOT CALL. Gets and checks response packet from searchd server. 171 | """ 172 | (status, ver, length) = unpack('>2HL', sock.recv(8)) 173 | response = '' 174 | left = length 175 | while left>0: 176 | chunk = sock.recv(left) 177 | if chunk: 178 | response += chunk 179 | left -= len(chunk) 180 | else: 181 | break 182 | 183 | sock.close() 184 | 185 | # check response 186 | read = len(response) 187 | if not response or read!=length: 188 | if length: 189 | self._error = 'failed to read searchd response (status=%s, ver=%s, len=%s, read=%s)' \ 190 | % (status, ver, length, read) 191 | else: 192 | self._error = 'received zero-sized searchd response' 193 | return None 194 | 195 | # check status 196 | if status==SEARCHD_WARNING: 197 | wend = 4 + unpack ( '>L', response[0:4] )[0] 198 | self._warning = response[4:wend] 199 | return response[wend:] 200 | 201 | if status==SEARCHD_ERROR: 202 | self._error = 'searchd error: '+response[4:] 203 | return None 204 | 205 | if status==SEARCHD_RETRY: 206 | self._error = 'temporary searchd error: '+response[4:] 207 | return None 208 | 209 | if status!=SEARCHD_OK: 210 | self._error = 'unknown status code %d' % status 211 | return None 212 | 213 | # check version 214 | if ver>8, ver&0xff, client_ver>>8, client_ver&0xff) 217 | 218 | return response 219 | 220 | 221 | def SetLimits (self, offset, limit, maxmatches=0, cutoff=0): 222 | """ 223 | Set offset and count into result set, and optionally set max-matches and cutoff limits. 224 | """ 225 | assert(isinstance(offset, int) and offset>=0) 226 | assert(isinstance(limit, int) and limit>0) 227 | assert(maxmatches>=0) 228 | self._offset = offset 229 | self._limit = limit 230 | if maxmatches>0: 231 | self._maxmatches = maxmatches 232 | if cutoff>=0: 233 | self._cutoff = cutoff 234 | 235 | 236 | def SetMaxQueryTime (self, maxquerytime): 237 | """ 238 | Set maximum query time, in milliseconds, per-index. 0 means 'do not limit'. 239 | """ 240 | assert(isinstance(maxquerytime,int) and maxquerytime>0) 241 | self._maxquerytime = maxquerytime 242 | 243 | 244 | def SetMatchMode (self, mode): 245 | """ 246 | Set matching mode. 247 | """ 248 | assert(mode in [SPH_MATCH_ALL, SPH_MATCH_ANY, SPH_MATCH_PHRASE, SPH_MATCH_BOOLEAN, SPH_MATCH_EXTENDED, SPH_MATCH_FULLSCAN, SPH_MATCH_EXTENDED2]) 249 | self._mode = mode 250 | 251 | 252 | def SetRankingMode (self, ranker): 253 | """ 254 | Set ranking mode. 255 | """ 256 | assert(ranker in [SPH_RANK_PROXIMITY_BM25, SPH_RANK_BM25, SPH_RANK_NONE, SPH_RANK_WORDCOUNT]) 257 | self._ranker = ranker 258 | 259 | 260 | def SetSortMode ( self, mode, clause='' ): 261 | """ 262 | Set sorting mode. 263 | """ 264 | assert ( mode in [SPH_SORT_RELEVANCE, SPH_SORT_ATTR_DESC, SPH_SORT_ATTR_ASC, SPH_SORT_TIME_SEGMENTS, SPH_SORT_EXTENDED, SPH_SORT_EXPR] ) 265 | assert ( isinstance ( clause, str ) ) 266 | self._sort = mode 267 | self._sortby = clause 268 | 269 | 270 | def SetWeights (self, weights): 271 | """ 272 | Set per-field weights. 273 | WARNING, DEPRECATED; do not use it! use SetFieldWeights() instead 274 | """ 275 | assert(isinstance(weights, list)) 276 | for w in weights: 277 | assert(isinstance(w, int)) 278 | self._weights = weights 279 | 280 | 281 | def SetFieldWeights (self, weights): 282 | """ 283 | Bind per-field weights by name; expects (name,field_weight) dictionary as argument. 284 | """ 285 | assert(isinstance(weights,dict)) 286 | for key,val in weights.items(): 287 | assert(isinstance(key,str)) 288 | assert(isinstance(val,int)) 289 | self._fieldweights = weights 290 | 291 | 292 | def SetIndexWeights (self, weights): 293 | """ 294 | Bind per-index weights by name; expects (name,index_weight) dictionary as argument. 295 | """ 296 | assert(isinstance(weights,dict)) 297 | for key,val in weights.items(): 298 | assert(isinstance(key,str)) 299 | assert(isinstance(val,int)) 300 | self._indexweights = weights 301 | 302 | 303 | def SetIDRange (self, minid, maxid): 304 | """ 305 | Set IDs range to match. 306 | Only match records if document ID is beetwen $min and $max (inclusive). 307 | """ 308 | assert(isinstance(minid, int)) 309 | assert(isinstance(maxid, int)) 310 | assert(minid<=maxid) 311 | self._min_id = minid 312 | self._max_id = maxid 313 | 314 | 315 | def SetFilter ( self, attribute, values, exclude=0 ): 316 | """ 317 | Set values set filter. 318 | Only match records where 'attribute' value is in given 'values' set. 319 | """ 320 | assert(isinstance(attribute, str)) 321 | assert(isinstance(values, list)) 322 | assert(values) 323 | 324 | for value in values: 325 | assert(isinstance(value, (int, long))) 326 | 327 | self._filters.append ( { 'type':SPH_FILTER_VALUES, 'attr':attribute, 'exclude':exclude, 'values':values } ) 328 | 329 | 330 | def SetFilterRange (self, attribute, min_, max_, exclude=0 ): 331 | """ 332 | Set range filter. 333 | Only match records if 'attribute' value is beetwen 'min_' and 'max_' (inclusive). 334 | """ 335 | assert(isinstance(attribute, str)) 336 | assert(isinstance(min_, int)) 337 | assert(isinstance(max_, int)) 338 | assert(min_<=max_) 339 | 340 | self._filters.append ( { 'type':SPH_FILTER_RANGE, 'attr':attribute, 'exclude':exclude, 'min':min_, 'max':max_ } ) 341 | 342 | 343 | def SetFilterFloatRange (self, attribute, min_, max_, exclude=0 ): 344 | assert(isinstance(attribute,str)) 345 | assert(isinstance(min_,float)) 346 | assert(isinstance(max_,float)) 347 | assert(min_ <= max_) 348 | self._filters.append ( {'type':SPH_FILTER_FLOATRANGE, 'attr':attribute, 'exclude':exclude, 'min':min_, 'max':max_} ) 349 | 350 | 351 | def SetGeoAnchor (self, attrlat, attrlong, latitude, longitude): 352 | assert(isinstance(attrlat,str)) 353 | assert(isinstance(attrlong,str)) 354 | assert(isinstance(latitude,float)) 355 | assert(isinstance(longitude,float)) 356 | self._anchor['attrlat'] = attrlat 357 | self._anchor['attrlong'] = attrlong 358 | self._anchor['lat'] = latitude 359 | self._anchor['long'] = longitude 360 | 361 | 362 | def SetGroupBy ( self, attribute, func, groupsort='@group desc' ): 363 | """ 364 | Set grouping attribute and function. 365 | """ 366 | assert(isinstance(attribute, str)) 367 | assert(func in [SPH_GROUPBY_DAY, SPH_GROUPBY_WEEK, SPH_GROUPBY_MONTH, SPH_GROUPBY_YEAR, SPH_GROUPBY_ATTR] ) 368 | assert(isinstance(groupsort, str)) 369 | 370 | self._groupby = attribute 371 | self._groupfunc = func 372 | self._groupsort = groupsort 373 | 374 | 375 | def SetGroupDistinct (self, attribute): 376 | assert(isinstance(attribute,str)) 377 | self._groupdistinct = attribute 378 | 379 | 380 | def SetRetries (self, count, delay=0): 381 | assert(isinstance(count,int) and count>=0) 382 | assert(isinstance(delay,int) and delay>=0) 383 | self._retrycount = count 384 | self._retrydelay = delay 385 | 386 | 387 | def ResetFilters (self): 388 | """ 389 | Clear all filters (for multi-queries). 390 | """ 391 | self._filters = [] 392 | self._anchor = {} 393 | 394 | 395 | def ResetGroupBy (self): 396 | """ 397 | Clear groupby settings (for multi-queries). 398 | """ 399 | self._groupby = '' 400 | self._groupfunc = SPH_GROUPBY_DAY 401 | self._groupsort = '@group desc' 402 | self._groupdistinct = '' 403 | 404 | 405 | def Query (self, query, index='*', comment=''): 406 | """ 407 | Connect to searchd server and run given search query. 408 | Returns None on failure; result set hash on success (see documentation for details). 409 | """ 410 | assert(len(self._reqs)==0) 411 | self.AddQuery(query,index,comment) 412 | results = self.RunQueries() 413 | 414 | if not results or len(results)==0: 415 | return None 416 | self._error = results[0]['error'] 417 | self._warning = results[0]['warning'] 418 | if results[0]['status'] == SEARCHD_ERROR: 419 | return None 420 | return results[0] 421 | 422 | 423 | def AddQuery (self, query, index='*', comment=''): 424 | """ 425 | Add query to batch. 426 | """ 427 | # build request 428 | req = [pack('>5L', self._offset, self._limit, self._mode, self._ranker, self._sort)] 429 | req.append(pack('>L', len(self._sortby))) 430 | req.append(self._sortby) 431 | 432 | if isinstance(query,unicode): 433 | query = query.encode('utf-8') 434 | assert(isinstance(query,str)) 435 | 436 | req.append(pack('>L', len(query))) 437 | req.append(query) 438 | 439 | req.append(pack('>L', len(self._weights))) 440 | for w in self._weights: 441 | req.append(pack('>L', w)) 442 | req.append(pack('>L', len(index))) 443 | req.append(index) 444 | req.append(pack('>L',0)) # id64 range marker FIXME! IMPLEMENT! 445 | req.append(pack('>L', self._min_id)) 446 | req.append(pack('>L', self._max_id)) 447 | 448 | # filters 449 | req.append ( pack ( '>L', len(self._filters) ) ) 450 | for f in self._filters: 451 | req.append ( pack ( '>L', len(f['attr'])) + f['attr']) 452 | filtertype = f['type'] 453 | req.append ( pack ( '>L', filtertype)) 454 | if filtertype == SPH_FILTER_VALUES: 455 | req.append ( pack ('>L', len(f['values']))) 456 | for val in f['values']: 457 | req.append ( pack ('>L', val)) 458 | elif filtertype == SPH_FILTER_RANGE: 459 | req.append ( pack ('>2L', f['min'], f['max'])) 460 | elif filtertype == SPH_FILTER_FLOATRANGE: 461 | req.append ( pack ('>2f', f['min'], f['max'])) 462 | req.append ( pack ( '>L', f['exclude'] ) ) 463 | 464 | # group-by, max-matches, group-sort 465 | req.append ( pack ( '>2L', self._groupfunc, len(self._groupby) ) ) 466 | req.append ( self._groupby ) 467 | req.append ( pack ( '>2L', self._maxmatches, len(self._groupsort) ) ) 468 | req.append ( self._groupsort ) 469 | req.append ( pack ( '>LLL', self._cutoff, self._retrycount, self._retrydelay)) 470 | req.append ( pack ( '>L', len(self._groupdistinct))) 471 | req.append ( self._groupdistinct) 472 | 473 | # anchor point 474 | if len(self._anchor) == 0: 475 | req.append ( pack ('>L', 0)) 476 | else: 477 | attrlat, attrlong = self._anchor['attrlat'], self._anchor['attrlong'] 478 | latitude, longitude = self._anchor['lat'], self._anchor['long'] 479 | req.append ( pack ('>L', 1)) 480 | req.append ( pack ('>L', len(attrlat)) + attrlat) 481 | req.append ( pack ('>L', len(attrlong)) + attrlong) 482 | req.append ( pack ('>f', latitude) + pack ('>f', longitude)) 483 | 484 | # per-index weights 485 | req.append ( pack ('>L',len(self._indexweights))) 486 | for indx,weight in self._indexweights.items(): 487 | req.append ( pack ('>L',len(indx)) + indx + pack ('>L',weight)) 488 | 489 | # max query time 490 | req.append ( pack ('>L', self._maxquerytime) ) 491 | 492 | # per-field weights 493 | req.append ( pack ('>L',len(self._fieldweights) ) ) 494 | for field,weight in self._fieldweights.items(): 495 | req.append ( pack ('>L',len(field)) + field + pack ('>L',weight) ) 496 | 497 | # comment 498 | req.append ( pack('>L',len(comment)) + comment ) 499 | 500 | # send query, get response 501 | req = ''.join(req) 502 | 503 | self._reqs.append(req) 504 | return 505 | 506 | 507 | def RunQueries (self): 508 | """ 509 | Run queries batch. 510 | Returns None on network IO failure; or an array of result set hashes on success. 511 | """ 512 | if len(self._reqs)==0: 513 | self._error = 'no queries defined, issue AddQuery() first' 514 | return None 515 | 516 | sock = self._Connect() 517 | if not sock: 518 | return None 519 | 520 | req = ''.join(self._reqs) 521 | length = len(req)+4 522 | req = pack('>HHLL', SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, length, len(self._reqs))+req 523 | sock.send(req) 524 | 525 | response = self._GetResponse(sock, VER_COMMAND_SEARCH) 526 | if not response: 527 | return None 528 | 529 | nreqs = len(self._reqs) 530 | 531 | # parse response 532 | max_ = len(response) 533 | p = 0 534 | 535 | results = [] 536 | for i in range(0,nreqs,1): 537 | result = {} 538 | result['error'] = '' 539 | result['warning'] = '' 540 | status = unpack('>L', response[p:p+4])[0] 541 | p += 4 542 | result['status'] = status 543 | if status != SEARCHD_OK: 544 | length = unpack('>L', response[p:p+4])[0] 545 | p += 4 546 | message = response[p:p+length] 547 | p += length 548 | 549 | if status == SEARCHD_WARNING: 550 | result['warning'] = message 551 | else: 552 | result['error'] = message 553 | continue 554 | 555 | # read schema 556 | fields = [] 557 | attrs = [] 558 | 559 | nfields = unpack('>L', response[p:p+4])[0] 560 | p += 4 561 | while nfields>0 and pL', response[p:p+4])[0] 564 | p += 4 565 | fields.append(response[p:p+length]) 566 | p += length 567 | 568 | result['fields'] = fields 569 | 570 | nattrs = unpack('>L', response[p:p+4])[0] 571 | p += 4 572 | while nattrs>0 and pL', response[p:p+4])[0] 575 | p += 4 576 | attr = response[p:p+length] 577 | p += length 578 | type_ = unpack('>L', response[p:p+4])[0] 579 | p += 4 580 | attrs.append([attr,type_]) 581 | 582 | result['attrs'] = attrs 583 | 584 | # read match count 585 | count = unpack('>L', response[p:p+4])[0] 586 | p += 4 587 | id64 = unpack('>L', response[p:p+4])[0] 588 | p += 4 589 | 590 | # read matches 591 | result['matches'] = [] 592 | while count>0 and p3L', response[p:p+12]) 596 | doc += (dochi<<32) 597 | p += 12 598 | else: 599 | doc, weight = unpack('>2L', response[p:p+8]) 600 | p += 8 601 | 602 | match = { 'id':doc, 'weight':weight, 'attrs':{} } 603 | for i in range(len(attrs)): 604 | if attrs[i][1] == SPH_ATTR_FLOAT: 605 | match['attrs'][attrs[i][0]] = unpack('>f', response[p:p+4])[0] 606 | elif attrs[i][1] == (SPH_ATTR_MULTI | SPH_ATTR_INTEGER): 607 | match['attrs'][attrs[i][0]] = [] 608 | nvals = unpack('>L', response[p:p+4])[0] 609 | p += 4 610 | for n in range(0,nvals,1): 611 | match['attrs'][attrs[i][0]].append(unpack('>L', response[p:p+4])[0]) 612 | p += 4 613 | p -= 4 614 | else: 615 | match['attrs'][attrs[i][0]] = unpack('>L', response[p:p+4])[0] 616 | p += 4 617 | 618 | result['matches'].append ( match ) 619 | 620 | result['total'], result['total_found'], result['time'], words = unpack('>4L', response[p:p+16]) 621 | 622 | result['time'] = '%.3f' % (result['time']/1000.0) 623 | p += 16 624 | 625 | result['words'] = [] 626 | while words>0: 627 | words -= 1 628 | length = unpack('>L', response[p:p+4])[0] 629 | p += 4 630 | word = response[p:p+length] 631 | p += length 632 | docs, hits = unpack('>2L', response[p:p+8]) 633 | p += 8 634 | 635 | result['words'].append({'word':word, 'docs':docs, 'hits':hits}) 636 | 637 | results.append(result) 638 | 639 | self._reqs = [] 640 | sock.close() 641 | return results 642 | 643 | 644 | def BuildExcerpts (self, docs, index, words, opts=None): 645 | """ 646 | Connect to searchd server and generate exceprts from given documents. 647 | """ 648 | if not opts: 649 | opts = {} 650 | if isinstance(words,unicode): 651 | words = words.encode('utf-8') 652 | 653 | assert(isinstance(docs, list)) 654 | assert(isinstance(index, str)) 655 | assert(isinstance(words, str)) 656 | assert(isinstance(opts, dict)) 657 | 658 | sock = self._Connect() 659 | 660 | if not sock: 661 | return None 662 | 663 | # fixup options 664 | opts.setdefault('before_match', '') 665 | opts.setdefault('after_match', '') 666 | opts.setdefault('chunk_separator', ' ... ') 667 | opts.setdefault('limit', 256) 668 | opts.setdefault('around', 5) 669 | 670 | # build request 671 | # v.1.0 req 672 | 673 | # mode=0, flags=1 (remove spaces) 674 | req = [pack('>2L', 0, 1)] 675 | 676 | # req index 677 | req.append(pack('>L', len(index))) 678 | req.append(index) 679 | 680 | # req words 681 | req.append(pack('>L', len(words))) 682 | req.append(words) 683 | 684 | # options 685 | req.append(pack('>L', len(opts['before_match']))) 686 | req.append(opts['before_match']) 687 | 688 | req.append(pack('>L', len(opts['after_match']))) 689 | req.append(opts['after_match']) 690 | 691 | req.append(pack('>L', len(opts['chunk_separator']))) 692 | req.append(opts['chunk_separator']) 693 | 694 | req.append(pack('>L', int(opts['limit']))) 695 | req.append(pack('>L', int(opts['around']))) 696 | 697 | # documents 698 | req.append(pack('>L', len(docs))) 699 | for doc in docs: 700 | if isinstance(doc,unicode): 701 | doc = doc.encode('utf-8') 702 | assert(isinstance(doc, str)) 703 | req.append(pack('>L', len(doc))) 704 | req.append(doc) 705 | 706 | req = ''.join(req) 707 | 708 | # send query, get response 709 | length = len(req) 710 | 711 | # add header 712 | req = pack('>2HL', SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, length)+req 713 | wrote = sock.send(req) 714 | 715 | response = self._GetResponse(sock, VER_COMMAND_EXCERPT ) 716 | if not response: 717 | return [] 718 | 719 | # parse response 720 | pos = 0 721 | res = [] 722 | rlen = len(response) 723 | 724 | for i in range(len(docs)): 725 | length = unpack('>L', response[pos:pos+4])[0] 726 | pos += 4 727 | 728 | if pos+length > rlen: 729 | self._error = 'incomplete reply' 730 | return [] 731 | 732 | res.append(response[pos:pos+length]) 733 | pos += length 734 | 735 | return res 736 | 737 | 738 | def UpdateAttributes ( self, index, attrs, values ): 739 | """ 740 | Update given attribute values on given documents in given indexes. 741 | Returns amount of updated documents (0 or more) on success, or -1 on failure. 742 | 743 | 'attrs' must be a list of strings. 744 | 'values' must be a dict with int key (document ID) and list of int values (new attribute values). 745 | 746 | Example: 747 | res = cl.UpdateAttributes ( 'test1', [ 'group_id', 'date_added' ], { 2:[123,1000000000], 4:[456,1234567890] } ) 748 | """ 749 | assert ( isinstance ( index, str ) ) 750 | assert ( isinstance ( attrs, list ) ) 751 | assert ( isinstance ( values, dict ) ) 752 | for attr in attrs: 753 | assert ( isinstance ( attr, str ) ) 754 | for docid, entry in values.items(): 755 | assert ( isinstance ( docid, int ) ) 756 | assert ( isinstance ( entry, list ) ) 757 | assert ( len(attrs)==len(entry) ) 758 | for val in entry: 759 | assert ( isinstance ( val, int ) ) 760 | 761 | # build request 762 | req = [ pack('>L',len(index)), index ] 763 | 764 | req.append ( pack('>L',len(attrs)) ) 765 | for attr in attrs: 766 | req.append ( pack('>L',len(attr)) + attr ) 767 | 768 | req.append ( pack('>L',len(values)) ) 769 | for docid, entry in values.items(): 770 | req.append ( pack('>q',docid) ) 771 | for val in entry: 772 | req.append ( pack('>L',val) ) 773 | 774 | # connect, send query, get response 775 | sock = self._Connect() 776 | if not sock: 777 | return None 778 | 779 | req = ''.join(req) 780 | length = len(req) 781 | req = pack ( '>2HL', SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, length ) + req 782 | wrote = sock.send ( req ) 783 | 784 | response = self._GetResponse ( sock, VER_COMMAND_UPDATE ) 785 | if not response: 786 | return -1 787 | 788 | # parse response 789 | updated = unpack ( '>L', response[0:4] )[0] 790 | return updated 791 | 792 | 793 | def BuildKeywords ( self, query, index, hits ): 794 | """ 795 | Connect to searchd server, and generate keywords list for a given query. 796 | Returns None on failure, or a list of keywords on success. 797 | """ 798 | assert ( isinstance ( query, str ) ) 799 | assert ( isinstance ( index, str ) ) 800 | assert ( isinstance ( hits, int ) ) 801 | 802 | # build request 803 | req = [ pack ( '>L', len(query) ) + query ] 804 | req.append ( pack ( '>L', len(index) ) + index ) 805 | req.append ( pack ( '>L', hits ) ) 806 | 807 | # connect, send query, get response 808 | sock = self._Connect() 809 | if not sock: 810 | return None 811 | 812 | req = ''.join(req) 813 | length = len(req) 814 | req = pack ( '>2HL', SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, length ) + req 815 | wrote = sock.send ( req ) 816 | 817 | response = self._GetResponse ( sock, VER_COMMAND_KEYWORDS ) 818 | if not response: 819 | return None 820 | 821 | # parse response 822 | res = [] 823 | 824 | nwords = unpack ( '>L', response[0:4] )[0] 825 | p = 4 826 | max_ = len(response) 827 | 828 | while nwords>0 and pL', response[p:p+4] )[0] 832 | p += 4 833 | tokenized = response[p:p+length] 834 | p += length 835 | 836 | length = unpack ( '>L', response[p:p+4] )[0] 837 | p += 4 838 | normalized = response[p:p+length] 839 | p += length 840 | 841 | entry = { 'tokenized':tokenized, 'normalized':normalized } 842 | if hits: 843 | entry['docs'], entry['hits'] = unpack ( '>2L', response[p:p+8] ) 844 | p += 8 845 | 846 | res.append ( entry ) 847 | 848 | if nwords>0 or p>max_: 849 | self._error = 'incomplete reply' 850 | return None 851 | 852 | return res 853 | # 854 | # $Id: sphinxapi.py 1216 2008-03-14 23:25:39Z shodan $ 855 | # -------------------------------------------------------------------------------- /djangosphinx/apis/api278/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # $Id: sphinxapi.py 2055 2009-11-06 23:09:58Z shodan $ 3 | # 4 | # Python version of Sphinx searchd client (Python API) 5 | # 6 | # Copyright (c) 2006-2008, Andrew Aksyonoff 7 | # Copyright (c) 2006, Mike Osadnik 8 | # All rights reserved 9 | # 10 | # This program is free software; you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License. You should have 12 | # received a copy of the GPL license along with this program; if you 13 | # did not, you can find it at http://www.gnu.org/ 14 | # 15 | 16 | import sys 17 | import select 18 | import socket 19 | import re 20 | from struct import * 21 | 22 | 23 | # known searchd commands 24 | SEARCHD_COMMAND_SEARCH = 0 25 | SEARCHD_COMMAND_EXCERPT = 1 26 | SEARCHD_COMMAND_UPDATE = 2 27 | SEARCHD_COMMAND_KEYWORDS= 3 28 | SEARCHD_COMMAND_PERSIST = 4 29 | 30 | # current client-side command implementation versions 31 | VER_COMMAND_SEARCH = 0x116 32 | VER_COMMAND_EXCERPT = 0x100 33 | VER_COMMAND_UPDATE = 0x101 34 | VER_COMMAND_KEYWORDS = 0x100 35 | 36 | # known searchd status codes 37 | SEARCHD_OK = 0 38 | SEARCHD_ERROR = 1 39 | SEARCHD_RETRY = 2 40 | SEARCHD_WARNING = 3 41 | 42 | # known match modes 43 | SPH_MATCH_ALL = 0 44 | SPH_MATCH_ANY = 1 45 | SPH_MATCH_PHRASE = 2 46 | SPH_MATCH_BOOLEAN = 3 47 | SPH_MATCH_EXTENDED = 4 48 | SPH_MATCH_FULLSCAN = 5 49 | SPH_MATCH_EXTENDED2 = 6 50 | 51 | # known ranking modes (extended2 mode only) 52 | SPH_RANK_PROXIMITY_BM25 = 0 # default mode, phrase proximity major factor and BM25 minor one 53 | SPH_RANK_BM25 = 1 # statistical mode, BM25 ranking only (faster but worse quality) 54 | SPH_RANK_NONE = 2 # no ranking, all matches get a weight of 1 55 | SPH_RANK_WORDCOUNT = 3 # simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts 56 | 57 | # known sort modes 58 | SPH_SORT_RELEVANCE = 0 59 | SPH_SORT_ATTR_DESC = 1 60 | SPH_SORT_ATTR_ASC = 2 61 | SPH_SORT_TIME_SEGMENTS = 3 62 | SPH_SORT_EXTENDED = 4 63 | SPH_SORT_EXPR = 5 64 | 65 | # known filter types 66 | SPH_FILTER_VALUES = 0 67 | SPH_FILTER_RANGE = 1 68 | SPH_FILTER_FLOATRANGE = 2 69 | 70 | # known attribute types 71 | SPH_ATTR_NONE = 0 72 | SPH_ATTR_INTEGER = 1 73 | SPH_ATTR_TIMESTAMP = 2 74 | SPH_ATTR_ORDINAL = 3 75 | SPH_ATTR_BOOL = 4 76 | SPH_ATTR_FLOAT = 5 77 | SPH_ATTR_BIGINT = 6 78 | SPH_ATTR_MULTI = 0X40000000L 79 | 80 | SPH_ATTR_TYPES = (SPH_ATTR_NONE, 81 | SPH_ATTR_INTEGER, 82 | SPH_ATTR_TIMESTAMP, 83 | SPH_ATTR_ORDINAL, 84 | SPH_ATTR_BOOL, 85 | SPH_ATTR_FLOAT, 86 | SPH_ATTR_BIGINT, 87 | SPH_ATTR_MULTI) 88 | 89 | # known grouping functions 90 | SPH_GROUPBY_DAY = 0 91 | SPH_GROUPBY_WEEK = 1 92 | SPH_GROUPBY_MONTH = 2 93 | SPH_GROUPBY_YEAR = 3 94 | SPH_GROUPBY_ATTR = 4 95 | SPH_GROUPBY_ATTRPAIR = 5 96 | 97 | 98 | class SphinxClient: 99 | def __init__ (self): 100 | """ 101 | Create a new client object, and fill defaults. 102 | """ 103 | self._host = 'localhost' # searchd host (default is "localhost") 104 | self._port = 9312 # searchd port (default is 9312) 105 | self._path = None # searchd unix-domain socket path 106 | self._socket = None 107 | self._offset = 0 # how much records to seek from result-set start (default is 0) 108 | self._limit = 20 # how much records to return from result-set starting at offset (default is 20) 109 | self._mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL) 110 | self._weights = [] # per-field weights (default is 1 for all fields) 111 | self._sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE) 112 | self._sortby = '' # attribute to sort by (defualt is "") 113 | self._min_id = 0 # min ID to match (default is 0) 114 | self._max_id = 0 # max ID to match (default is UINT_MAX) 115 | self._filters = [] # search filters 116 | self._groupby = '' # group-by attribute name 117 | self._groupfunc = SPH_GROUPBY_DAY # group-by function (to pre-process group-by attribute value with) 118 | self._groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with) 119 | self._groupdistinct = '' # group-by count-distinct attribute 120 | self._maxmatches = 1000 # max matches to retrieve 121 | self._cutoff = 0 # cutoff to stop searching at 122 | self._retrycount = 0 # distributed retry count 123 | self._retrydelay = 0 # distributed retry delay 124 | self._anchor = {} # geographical anchor point 125 | self._indexweights = {} # per-index weights 126 | self._ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode 127 | self._maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit) 128 | self._fieldweights = {} # per-field-name weights 129 | self._overrides = {} # per-query attribute values overrides 130 | self._select = '*' # select-list (attributes or expressions, with optional aliases) 131 | 132 | self._error = '' # last error message 133 | self._warning = '' # last warning message 134 | self._reqs = [] # requests array for multi-query 135 | 136 | def __del__ (self): 137 | if self._socket: 138 | self._socket.close() 139 | 140 | 141 | def GetLastError (self): 142 | """ 143 | Get last error message (string). 144 | """ 145 | return self._error 146 | 147 | 148 | def GetLastWarning (self): 149 | """ 150 | Get last warning message (string). 151 | """ 152 | return self._warning 153 | 154 | 155 | def SetServer (self, host, port = None): 156 | """ 157 | Set searchd server host and port. 158 | """ 159 | assert(isinstance(host, str)) 160 | if host.startswith('/'): 161 | self._path = host 162 | return 163 | elif host.startswith('unix://'): 164 | self._path = host[7:] 165 | return 166 | assert(isinstance(port, int)) 167 | self._host = host 168 | self._port = port 169 | self._path = None 170 | 171 | 172 | def _Connect (self): 173 | """ 174 | INTERNAL METHOD, DO NOT CALL. Connects to searchd server. 175 | """ 176 | if self._socket: 177 | # we have a socket, but is it still alive? 178 | sr, sw, _ = select.select ( [self._socket], [self._socket], [], 0 ) 179 | 180 | # this is how alive socket should look 181 | if len(sr)==0 and len(sw)==1: 182 | return self._socket 183 | 184 | # oops, looks like it was closed, lets reopen 185 | self._socket.close() 186 | self._socket = None 187 | 188 | try: 189 | if self._path: 190 | af = socket.AF_UNIX 191 | addr = self._path 192 | desc = self._path 193 | else: 194 | af = socket.AF_INET 195 | addr = ( self._host, self._port ) 196 | desc = '%s;%s' % addr 197 | sock = socket.socket ( af, socket.SOCK_STREAM ) 198 | sock.connect ( addr ) 199 | except socket.error, msg: 200 | if sock: 201 | sock.close() 202 | self._error = 'connection to %s failed (%s)' % ( desc, msg ) 203 | return 204 | 205 | v = unpack('>L', sock.recv(4)) 206 | if v<1: 207 | sock.close() 208 | self._error = 'expected searchd protocol version, got %s' % v 209 | return 210 | 211 | # all ok, send my version 212 | sock.send(pack('>L', 1)) 213 | return sock 214 | 215 | 216 | def _GetResponse (self, sock, client_ver): 217 | """ 218 | INTERNAL METHOD, DO NOT CALL. Gets and checks response packet from searchd server. 219 | """ 220 | (status, ver, length) = unpack('>2HL', sock.recv(8)) 221 | response = '' 222 | left = length 223 | while left>0: 224 | chunk = sock.recv(left) 225 | if chunk: 226 | response += chunk 227 | left -= len(chunk) 228 | else: 229 | break 230 | 231 | if not self._socket: 232 | sock.close() 233 | 234 | # check response 235 | read = len(response) 236 | if not response or read!=length: 237 | if length: 238 | self._error = 'failed to read searchd response (status=%s, ver=%s, len=%s, read=%s)' \ 239 | % (status, ver, length, read) 240 | else: 241 | self._error = 'received zero-sized searchd response' 242 | return None 243 | 244 | # check status 245 | if status==SEARCHD_WARNING: 246 | wend = 4 + unpack ( '>L', response[0:4] )[0] 247 | self._warning = response[4:wend] 248 | return response[wend:] 249 | 250 | if status==SEARCHD_ERROR: 251 | self._error = 'searchd error: '+response[4:] 252 | return None 253 | 254 | if status==SEARCHD_RETRY: 255 | self._error = 'temporary searchd error: '+response[4:] 256 | return None 257 | 258 | if status!=SEARCHD_OK: 259 | self._error = 'unknown status code %d' % status 260 | return None 261 | 262 | # check version 263 | if ver>8, ver&0xff, client_ver>>8, client_ver&0xff) 266 | 267 | return response 268 | 269 | 270 | def SetLimits (self, offset, limit, maxmatches=0, cutoff=0): 271 | """ 272 | Set offset and count into result set, and optionally set max-matches and cutoff limits. 273 | """ 274 | assert ( type(offset) in [int,long] and 0<=offset<16777216 ) 275 | assert ( type(limit) in [int,long] and 0=0) 277 | self._offset = offset 278 | self._limit = limit 279 | if maxmatches>0: 280 | self._maxmatches = maxmatches 281 | if cutoff>=0: 282 | self._cutoff = cutoff 283 | 284 | 285 | def SetMaxQueryTime (self, maxquerytime): 286 | """ 287 | Set maximum query time, in milliseconds, per-index. 0 means 'do not limit'. 288 | """ 289 | assert(isinstance(maxquerytime,int) and maxquerytime>0) 290 | self._maxquerytime = maxquerytime 291 | 292 | 293 | def SetMatchMode (self, mode): 294 | """ 295 | Set matching mode. 296 | """ 297 | assert(mode in [SPH_MATCH_ALL, SPH_MATCH_ANY, SPH_MATCH_PHRASE, SPH_MATCH_BOOLEAN, SPH_MATCH_EXTENDED, SPH_MATCH_FULLSCAN, SPH_MATCH_EXTENDED2]) 298 | self._mode = mode 299 | 300 | 301 | def SetRankingMode (self, ranker): 302 | """ 303 | Set ranking mode. 304 | """ 305 | assert(ranker in [SPH_RANK_PROXIMITY_BM25, SPH_RANK_BM25, SPH_RANK_NONE, SPH_RANK_WORDCOUNT]) 306 | self._ranker = ranker 307 | 308 | 309 | def SetSortMode ( self, mode, clause='' ): 310 | """ 311 | Set sorting mode. 312 | """ 313 | assert ( mode in [SPH_SORT_RELEVANCE, SPH_SORT_ATTR_DESC, SPH_SORT_ATTR_ASC, SPH_SORT_TIME_SEGMENTS, SPH_SORT_EXTENDED, SPH_SORT_EXPR] ) 314 | assert ( isinstance ( clause, str ) ) 315 | self._sort = mode 316 | self._sortby = clause 317 | 318 | 319 | def SetWeights (self, weights): 320 | """ 321 | Set per-field weights. 322 | WARNING, DEPRECATED; do not use it! use SetFieldWeights() instead 323 | """ 324 | assert(isinstance(weights, list)) 325 | for w in weights: 326 | assert(isinstance(w, int)) 327 | self._weights = weights 328 | 329 | 330 | def SetFieldWeights (self, weights): 331 | """ 332 | Bind per-field weights by name; expects (name,field_weight) dictionary as argument. 333 | """ 334 | assert(isinstance(weights,dict)) 335 | for key,val in weights.items(): 336 | assert(isinstance(key,str)) 337 | assert(isinstance(val,int)) 338 | self._fieldweights = weights 339 | 340 | 341 | def SetIndexWeights (self, weights): 342 | """ 343 | Bind per-index weights by name; expects (name,index_weight) dictionary as argument. 344 | """ 345 | assert(isinstance(weights,dict)) 346 | for key,val in weights.items(): 347 | assert(isinstance(key,str)) 348 | assert(isinstance(val,int)) 349 | self._indexweights = weights 350 | 351 | 352 | def SetIDRange (self, minid, maxid): 353 | """ 354 | Set IDs range to match. 355 | Only match records if document ID is beetwen $min and $max (inclusive). 356 | """ 357 | assert(isinstance(minid, (int, long))) 358 | assert(isinstance(maxid, (int, long))) 359 | assert(minid<=maxid) 360 | self._min_id = minid 361 | self._max_id = maxid 362 | 363 | 364 | def SetFilter ( self, attribute, values, exclude=0 ): 365 | """ 366 | Set values set filter. 367 | Only match records where 'attribute' value is in given 'values' set. 368 | """ 369 | assert(isinstance(attribute, str)) 370 | assert iter(values) 371 | 372 | for value in values: 373 | assert(isinstance(value, (int, long))) 374 | 375 | self._filters.append ( { 'type':SPH_FILTER_VALUES, 'attr':attribute, 'exclude':exclude, 'values':values } ) 376 | 377 | 378 | def SetFilterRange (self, attribute, min_, max_, exclude=0 ): 379 | """ 380 | Set range filter. 381 | Only match records if 'attribute' value is beetwen 'min_' and 'max_' (inclusive). 382 | """ 383 | assert(isinstance(attribute, str)) 384 | assert(isinstance(min_, int)) 385 | assert(isinstance(max_, int)) 386 | assert(min_<=max_) 387 | 388 | self._filters.append ( { 'type':SPH_FILTER_RANGE, 'attr':attribute, 'exclude':exclude, 'min':min_, 'max':max_ } ) 389 | 390 | 391 | def SetFilterFloatRange (self, attribute, min_, max_, exclude=0 ): 392 | assert(isinstance(attribute,str)) 393 | assert(isinstance(min_,float)) 394 | assert(isinstance(max_,float)) 395 | assert(min_ <= max_) 396 | self._filters.append ( {'type':SPH_FILTER_FLOATRANGE, 'attr':attribute, 'exclude':exclude, 'min':min_, 'max':max_} ) 397 | 398 | 399 | def SetGeoAnchor (self, attrlat, attrlong, latitude, longitude): 400 | assert(isinstance(attrlat,str)) 401 | assert(isinstance(attrlong,str)) 402 | assert(isinstance(latitude,float)) 403 | assert(isinstance(longitude,float)) 404 | self._anchor['attrlat'] = attrlat 405 | self._anchor['attrlong'] = attrlong 406 | self._anchor['lat'] = latitude 407 | self._anchor['long'] = longitude 408 | 409 | 410 | def SetGroupBy ( self, attribute, func, groupsort='@group desc' ): 411 | """ 412 | Set grouping attribute and function. 413 | """ 414 | assert(isinstance(attribute, str)) 415 | assert(func in [SPH_GROUPBY_DAY, SPH_GROUPBY_WEEK, SPH_GROUPBY_MONTH, SPH_GROUPBY_YEAR, SPH_GROUPBY_ATTR, SPH_GROUPBY_ATTRPAIR] ) 416 | assert(isinstance(groupsort, str)) 417 | 418 | self._groupby = attribute 419 | self._groupfunc = func 420 | self._groupsort = groupsort 421 | 422 | 423 | def SetGroupDistinct (self, attribute): 424 | assert(isinstance(attribute,str)) 425 | self._groupdistinct = attribute 426 | 427 | 428 | def SetRetries (self, count, delay=0): 429 | assert(isinstance(count,int) and count>=0) 430 | assert(isinstance(delay,int) and delay>=0) 431 | self._retrycount = count 432 | self._retrydelay = delay 433 | 434 | 435 | def SetOverride (self, name, type, values): 436 | assert(isinstance(name, str)) 437 | assert(type in SPH_ATTR_TYPES) 438 | assert(isinstance(values, dict)) 439 | 440 | self._overrides[name] = {'name': name, 'type': type, 'values': values} 441 | 442 | def SetSelect (self, select): 443 | assert(isinstance(select, str)) 444 | self._select = select 445 | 446 | 447 | def ResetOverrides (self): 448 | self._overrides = {} 449 | 450 | 451 | def ResetFilters (self): 452 | """ 453 | Clear all filters (for multi-queries). 454 | """ 455 | self._filters = [] 456 | self._anchor = {} 457 | 458 | 459 | def ResetGroupBy (self): 460 | """ 461 | Clear groupby settings (for multi-queries). 462 | """ 463 | self._groupby = '' 464 | self._groupfunc = SPH_GROUPBY_DAY 465 | self._groupsort = '@group desc' 466 | self._groupdistinct = '' 467 | 468 | 469 | def Query (self, query, index='*', comment=''): 470 | """ 471 | Connect to searchd server and run given search query. 472 | Returns None on failure; result set hash on success (see documentation for details). 473 | """ 474 | assert(len(self._reqs)==0) 475 | self.AddQuery(query,index,comment) 476 | results = self.RunQueries() 477 | 478 | if not results or len(results)==0: 479 | return None 480 | self._error = results[0]['error'] 481 | self._warning = results[0]['warning'] 482 | if results[0]['status'] == SEARCHD_ERROR: 483 | return None 484 | return results[0] 485 | 486 | 487 | def AddQuery (self, query, index='*', comment=''): 488 | """ 489 | Add query to batch. 490 | """ 491 | # build request 492 | req = [pack('>5L', self._offset, self._limit, self._mode, self._ranker, self._sort)] 493 | req.append(pack('>L', len(self._sortby))) 494 | req.append(self._sortby) 495 | 496 | if isinstance(query,unicode): 497 | query = query.encode('utf-8') 498 | assert(isinstance(query,str)) 499 | 500 | req.append(pack('>L', len(query))) 501 | req.append(query) 502 | 503 | req.append(pack('>L', len(self._weights))) 504 | for w in self._weights: 505 | req.append(pack('>L', w)) 506 | req.append(pack('>L', len(index))) 507 | req.append(index) 508 | req.append(pack('>L',1)) # id64 range marker 509 | req.append(pack('>Q', self._min_id)) 510 | req.append(pack('>Q', self._max_id)) 511 | 512 | # filters 513 | req.append ( pack ( '>L', len(self._filters) ) ) 514 | for f in self._filters: 515 | req.append ( pack ( '>L', len(f['attr'])) + f['attr']) 516 | filtertype = f['type'] 517 | req.append ( pack ( '>L', filtertype)) 518 | if filtertype == SPH_FILTER_VALUES: 519 | req.append ( pack ('>L', len(f['values']))) 520 | for val in f['values']: 521 | req.append ( pack ('>q', val)) 522 | elif filtertype == SPH_FILTER_RANGE: 523 | req.append ( pack ('>2q', f['min'], f['max'])) 524 | elif filtertype == SPH_FILTER_FLOATRANGE: 525 | req.append ( pack ('>2f', f['min'], f['max'])) 526 | req.append ( pack ( '>L', f['exclude'] ) ) 527 | 528 | # group-by, max-matches, group-sort 529 | req.append ( pack ( '>2L', self._groupfunc, len(self._groupby) ) ) 530 | req.append ( self._groupby ) 531 | req.append ( pack ( '>2L', self._maxmatches, len(self._groupsort) ) ) 532 | req.append ( self._groupsort ) 533 | req.append ( pack ( '>LLL', self._cutoff, self._retrycount, self._retrydelay)) 534 | req.append ( pack ( '>L', len(self._groupdistinct))) 535 | req.append ( self._groupdistinct) 536 | 537 | # anchor point 538 | if len(self._anchor) == 0: 539 | req.append ( pack ('>L', 0)) 540 | else: 541 | attrlat, attrlong = self._anchor['attrlat'], self._anchor['attrlong'] 542 | latitude, longitude = self._anchor['lat'], self._anchor['long'] 543 | req.append ( pack ('>L', 1)) 544 | req.append ( pack ('>L', len(attrlat)) + attrlat) 545 | req.append ( pack ('>L', len(attrlong)) + attrlong) 546 | req.append ( pack ('>f', latitude) + pack ('>f', longitude)) 547 | 548 | # per-index weights 549 | req.append ( pack ('>L',len(self._indexweights))) 550 | for indx,weight in self._indexweights.items(): 551 | req.append ( pack ('>L',len(indx)) + indx + pack ('>L',weight)) 552 | 553 | # max query time 554 | req.append ( pack ('>L', self._maxquerytime) ) 555 | 556 | # per-field weights 557 | req.append ( pack ('>L',len(self._fieldweights) ) ) 558 | for field,weight in self._fieldweights.items(): 559 | req.append ( pack ('>L',len(field)) + field + pack ('>L',weight) ) 560 | 561 | # comment 562 | req.append ( pack('>L',len(comment)) + comment ) 563 | 564 | # attribute overrides 565 | req.append ( pack('>L', len(self._overrides)) ) 566 | for v in self._overrides.values(): 567 | req.extend ( ( pack('>L', len(v['name'])), v['name'] ) ) 568 | req.append ( pack('>LL', v['type'], len(v['values'])) ) 569 | for id, value in v['values'].iteritems(): 570 | req.append ( pack('>Q', id) ) 571 | if v['type'] == SPH_ATTR_FLOAT: 572 | req.append ( pack('>f', value) ) 573 | elif v['type'] == SPH_ATTR_BIGINT: 574 | req.append ( pack('>q', value) ) 575 | else: 576 | req.append ( pack('>l', value) ) 577 | 578 | # select-list 579 | req.append ( pack('>L', len(self._select)) ) 580 | req.append ( self._select ) 581 | 582 | # send query, get response 583 | req = ''.join(req) 584 | 585 | self._reqs.append(req) 586 | return 587 | 588 | 589 | def RunQueries (self): 590 | """ 591 | Run queries batch. 592 | Returns None on network IO failure; or an array of result set hashes on success. 593 | """ 594 | if len(self._reqs)==0: 595 | self._error = 'no queries defined, issue AddQuery() first' 596 | return None 597 | 598 | sock = self._Connect() 599 | if not sock: 600 | return None 601 | 602 | req = ''.join(self._reqs) 603 | length = len(req)+4 604 | req = pack('>HHLL', SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, length, len(self._reqs))+req 605 | sock.send(req) 606 | 607 | response = self._GetResponse(sock, VER_COMMAND_SEARCH) 608 | if not response: 609 | return None 610 | 611 | nreqs = len(self._reqs) 612 | 613 | # parse response 614 | max_ = len(response) 615 | p = 0 616 | 617 | results = [] 618 | for i in range(0,nreqs,1): 619 | result = {} 620 | results.append(result) 621 | 622 | result['error'] = '' 623 | result['warning'] = '' 624 | status = unpack('>L', response[p:p+4])[0] 625 | p += 4 626 | result['status'] = status 627 | if status != SEARCHD_OK: 628 | length = unpack('>L', response[p:p+4])[0] 629 | p += 4 630 | message = response[p:p+length] 631 | p += length 632 | 633 | if status == SEARCHD_WARNING: 634 | result['warning'] = message 635 | else: 636 | result['error'] = message 637 | continue 638 | 639 | # read schema 640 | fields = [] 641 | attrs = [] 642 | 643 | nfields = unpack('>L', response[p:p+4])[0] 644 | p += 4 645 | while nfields>0 and pL', response[p:p+4])[0] 648 | p += 4 649 | fields.append(response[p:p+length]) 650 | p += length 651 | 652 | result['fields'] = fields 653 | 654 | nattrs = unpack('>L', response[p:p+4])[0] 655 | p += 4 656 | while nattrs>0 and pL', response[p:p+4])[0] 659 | p += 4 660 | attr = response[p:p+length] 661 | p += length 662 | type_ = unpack('>L', response[p:p+4])[0] 663 | p += 4 664 | attrs.append([attr,type_]) 665 | 666 | result['attrs'] = attrs 667 | 668 | # read match count 669 | count = unpack('>L', response[p:p+4])[0] 670 | p += 4 671 | id64 = unpack('>L', response[p:p+4])[0] 672 | p += 4 673 | 674 | # read matches 675 | result['matches'] = [] 676 | while count>0 and pQL', response[p:p+12]) 680 | p += 12 681 | else: 682 | doc, weight = unpack('>2L', response[p:p+8]) 683 | p += 8 684 | 685 | match = { 'id':doc, 'weight':weight, 'attrs':{} } 686 | for i in range(len(attrs)): 687 | if attrs[i][1] == SPH_ATTR_FLOAT: 688 | match['attrs'][attrs[i][0]] = unpack('>f', response[p:p+4])[0] 689 | elif attrs[i][1] == SPH_ATTR_BIGINT: 690 | match['attrs'][attrs[i][0]] = unpack('>q', response[p:p+8])[0] 691 | p += 4 692 | elif attrs[i][1] == (SPH_ATTR_MULTI | SPH_ATTR_INTEGER): 693 | match['attrs'][attrs[i][0]] = [] 694 | nvals = unpack('>L', response[p:p+4])[0] 695 | p += 4 696 | for n in range(0,nvals,1): 697 | match['attrs'][attrs[i][0]].append(unpack('>L', response[p:p+4])[0]) 698 | p += 4 699 | p -= 4 700 | else: 701 | match['attrs'][attrs[i][0]] = unpack('>L', response[p:p+4])[0] 702 | p += 4 703 | 704 | result['matches'].append ( match ) 705 | 706 | result['total'], result['total_found'], result['time'], words = unpack('>4L', response[p:p+16]) 707 | 708 | result['time'] = '%.3f' % (result['time']/1000.0) 709 | p += 16 710 | 711 | result['words'] = [] 712 | while words>0: 713 | words -= 1 714 | length = unpack('>L', response[p:p+4])[0] 715 | p += 4 716 | word = response[p:p+length] 717 | p += length 718 | docs, hits = unpack('>2L', response[p:p+8]) 719 | p += 8 720 | 721 | result['words'].append({'word':word, 'docs':docs, 'hits':hits}) 722 | 723 | self._reqs = [] 724 | return results 725 | 726 | 727 | def BuildExcerpts (self, docs, index, words, opts=None): 728 | """ 729 | Connect to searchd server and generate exceprts from given documents. 730 | """ 731 | if not opts: 732 | opts = {} 733 | if isinstance(words,unicode): 734 | words = words.encode('utf-8') 735 | 736 | assert(isinstance(docs, list)) 737 | assert(isinstance(index, str)) 738 | assert(isinstance(words, str)) 739 | assert(isinstance(opts, dict)) 740 | 741 | sock = self._Connect() 742 | 743 | if not sock: 744 | return None 745 | 746 | # fixup options 747 | opts.setdefault('before_match', '') 748 | opts.setdefault('after_match', '') 749 | opts.setdefault('chunk_separator', ' ... ') 750 | opts.setdefault('limit', 256) 751 | opts.setdefault('around', 5) 752 | 753 | # build request 754 | # v.1.0 req 755 | 756 | flags = 1 # (remove spaces) 757 | if opts.get('exact_phrase'): flags |= 2 758 | if opts.get('single_passage'): flags |= 4 759 | if opts.get('use_boundaries'): flags |= 8 760 | if opts.get('weight_order'): flags |= 16 761 | 762 | # mode=0, flags 763 | req = [pack('>2L', 0, flags)] 764 | 765 | # req index 766 | req.append(pack('>L', len(index))) 767 | req.append(index) 768 | 769 | # req words 770 | req.append(pack('>L', len(words))) 771 | req.append(words) 772 | 773 | # options 774 | req.append(pack('>L', len(opts['before_match']))) 775 | req.append(opts['before_match']) 776 | 777 | req.append(pack('>L', len(opts['after_match']))) 778 | req.append(opts['after_match']) 779 | 780 | req.append(pack('>L', len(opts['chunk_separator']))) 781 | req.append(opts['chunk_separator']) 782 | 783 | req.append(pack('>L', int(opts['limit']))) 784 | req.append(pack('>L', int(opts['around']))) 785 | 786 | # documents 787 | req.append(pack('>L', len(docs))) 788 | for doc in docs: 789 | if isinstance(doc,unicode): 790 | doc = doc.encode('utf-8') 791 | assert(isinstance(doc, str)) 792 | req.append(pack('>L', len(doc))) 793 | req.append(doc) 794 | 795 | req = ''.join(req) 796 | 797 | # send query, get response 798 | length = len(req) 799 | 800 | # add header 801 | req = pack('>2HL', SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, length)+req 802 | wrote = sock.send(req) 803 | 804 | response = self._GetResponse(sock, VER_COMMAND_EXCERPT ) 805 | if not response: 806 | return [] 807 | 808 | # parse response 809 | pos = 0 810 | res = [] 811 | rlen = len(response) 812 | 813 | for i in range(len(docs)): 814 | length = unpack('>L', response[pos:pos+4])[0] 815 | pos += 4 816 | 817 | if pos+length > rlen: 818 | self._error = 'incomplete reply' 819 | return [] 820 | 821 | res.append(response[pos:pos+length]) 822 | pos += length 823 | 824 | return res 825 | 826 | 827 | def UpdateAttributes ( self, index, attrs, values ): 828 | """ 829 | Update given attribute values on given documents in given indexes. 830 | Returns amount of updated documents (0 or more) on success, or -1 on failure. 831 | 832 | 'attrs' must be a list of strings. 833 | 'values' must be a dict with int key (document ID) and list of int values (new attribute values). 834 | 835 | Example: 836 | res = cl.UpdateAttributes ( 'test1', [ 'group_id', 'date_added' ], { 2:[123,1000000000], 4:[456,1234567890] } ) 837 | """ 838 | assert ( isinstance ( index, str ) ) 839 | assert ( isinstance ( attrs, list ) ) 840 | assert ( isinstance ( values, dict ) ) 841 | for attr in attrs: 842 | assert ( isinstance ( attr, str ) ) 843 | for docid, entry in values.items(): 844 | assert ( isinstance ( docid, int ) ) 845 | assert ( isinstance ( entry, list ) ) 846 | assert ( len(attrs)==len(entry) ) 847 | for val in entry: 848 | assert ( isinstance ( val, int ) ) 849 | 850 | # build request 851 | req = [ pack('>L',len(index)), index ] 852 | 853 | req.append ( pack('>L',len(attrs)) ) 854 | for attr in attrs: 855 | req.append ( pack('>L',len(attr)) + attr ) 856 | 857 | req.append ( pack('>L',len(values)) ) 858 | for docid, entry in values.items(): 859 | req.append ( pack('>Q',docid) ) 860 | for val in entry: 861 | req.append ( pack('>L',val) ) 862 | 863 | # connect, send query, get response 864 | sock = self._Connect() 865 | if not sock: 866 | return None 867 | 868 | req = ''.join(req) 869 | length = len(req) 870 | req = pack ( '>2HL', SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, length ) + req 871 | wrote = sock.send ( req ) 872 | 873 | response = self._GetResponse ( sock, VER_COMMAND_UPDATE ) 874 | if not response: 875 | return -1 876 | 877 | # parse response 878 | updated = unpack ( '>L', response[0:4] )[0] 879 | return updated 880 | 881 | 882 | def BuildKeywords ( self, query, index, hits ): 883 | """ 884 | Connect to searchd server, and generate keywords list for a given query. 885 | Returns None on failure, or a list of keywords on success. 886 | """ 887 | assert ( isinstance ( query, str ) ) 888 | assert ( isinstance ( index, str ) ) 889 | assert ( isinstance ( hits, int ) ) 890 | 891 | # build request 892 | req = [ pack ( '>L', len(query) ) + query ] 893 | req.append ( pack ( '>L', len(index) ) + index ) 894 | req.append ( pack ( '>L', hits ) ) 895 | 896 | # connect, send query, get response 897 | sock = self._Connect() 898 | if not sock: 899 | return None 900 | 901 | req = ''.join(req) 902 | length = len(req) 903 | req = pack ( '>2HL', SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, length ) + req 904 | wrote = sock.send ( req ) 905 | 906 | response = self._GetResponse ( sock, VER_COMMAND_KEYWORDS ) 907 | if not response: 908 | return None 909 | 910 | # parse response 911 | res = [] 912 | 913 | nwords = unpack ( '>L', response[0:4] )[0] 914 | p = 4 915 | max_ = len(response) 916 | 917 | while nwords>0 and pL', response[p:p+4] )[0] 921 | p += 4 922 | tokenized = response[p:p+length] 923 | p += length 924 | 925 | length = unpack ( '>L', response[p:p+4] )[0] 926 | p += 4 927 | normalized = response[p:p+length] 928 | p += length 929 | 930 | entry = { 'tokenized':tokenized, 'normalized':normalized } 931 | if hits: 932 | entry['docs'], entry['hits'] = unpack ( '>2L', response[p:p+8] ) 933 | p += 8 934 | 935 | res.append ( entry ) 936 | 937 | if nwords>0 or p>max_: 938 | self._error = 'incomplete reply' 939 | return None 940 | 941 | return res 942 | 943 | ### persistent connections 944 | 945 | def Open(self): 946 | if self._socket: 947 | self._error = 'already connected' 948 | return 949 | 950 | server = self._Connect() 951 | if not server: 952 | return 953 | 954 | # command, command version = 0, body length = 4, body = 1 955 | request = pack ( '>hhII', SEARCHD_COMMAND_PERSIST, 0, 4, 1 ) 956 | server.send ( request ) 957 | 958 | self._socket = server 959 | 960 | def Close(self): 961 | if not self._socket: 962 | self._error = 'not connected' 963 | return 964 | self._socket.close() 965 | self._socket = None 966 | 967 | def EscapeString(self, string): 968 | return re.sub(r"([=\(\)|\-!@~\"&/\\\^\$\=])", r"\\\1", string) 969 | 970 | # 971 | # $Id: sphinxapi.py 2055 2009-11-06 23:09:58Z shodan $ 972 | # 973 | -------------------------------------------------------------------------------- /djangosphinx/apis/current.py: -------------------------------------------------------------------------------- 1 | from djangosphinx.constants import * 2 | 3 | try: 4 | from sphinxapi import * 5 | except ImportError, exc: 6 | name = 'djangosphinx.apis.api%d' % (SPHINX_API_VERSION,) 7 | sphinxapi = __import__(name) 8 | for name in name.split('.')[1:]: 9 | sphinxapi = getattr(sphinxapi, name) 10 | for attr in dir(sphinxapi): 11 | globals()[attr] = getattr(sphinxapi, attr) 12 | -------------------------------------------------------------------------------- /djangosphinx/config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os, sys, os.path, warnings 4 | 5 | # Add the project to the python path 6 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..')) 7 | 8 | # Set our settings module 9 | if not os.environ.get('DJANGO_SETTINGS_MODULE'): 10 | raise ValueError('`DJANGO_SETTINGS_MODULE` was not set. Please use DJANGO_SETTINGS_MODULE=project.settings --config sphinx.py.') 11 | 12 | from django.conf import settings 13 | 14 | assert getattr(settings, 'SPHINX_ROOT', None) is not None, "You must specify `SPHINX_ROOT` in your settings." 15 | 16 | from django.template import RequestContext 17 | 18 | if 'coffin' in settings.INSTALLED_APPS: 19 | import jinja2 20 | from coffin import shortcuts 21 | else: 22 | from django import shortcuts 23 | 24 | def render_to_string(template, context, request=None): 25 | if request: 26 | context_instance = RequestContext(request) 27 | else: 28 | context_instance = None 29 | return shortcuts.render_to_string(template, context, context_instance) 30 | 31 | def relative_path(*args): 32 | return os.path.abspath(os.path.join(settings.SPHINX_ROOT, *args)) 33 | 34 | context = { 35 | 'SPHINX_HOST': getattr(settings, 'SPHINX_HOST', '127.0.0.1'), 36 | 'SPHINX_PORT': getattr(settings, 'SPHINX_PORT', '3312'), 37 | 'relative_path': relative_path, 38 | } 39 | if getattr(settings, 'DATABASES', None): 40 | context.update({ 41 | 'DATABASE_HOST': settings.DATABASES['default']['HOST'], 42 | 'DATABASE_PASSWORD': settings.DATABASES['default']['PASSWORD'], 43 | 'DATABASE_USER': settings.DATABASES['default']['USER'], 44 | 'DATABASE_PORT': settings.DATABASES['default']['PORT'], 45 | 'DATABASE_NAME': settings.DATABASES['default']['NAME'], 46 | }) 47 | else: 48 | context.update({ 49 | 'DATABASE_HOST': settings.DATABASE_HOST, 50 | 'DATABASE_PASSWORD': settings.DATABASE_PASSWORD, 51 | 'DATABASE_USER': settings.DATABASE_USER, 52 | 'DATABASE_PORT': settings.DATABASE_PORT, 53 | 'DATABASE_NAME': settings.DATABASE_NAME, 54 | }) 55 | 56 | print render_to_string(getattr(settings, 'SPHINX_CONFIG_TEMPLATE', 'conf/sphinx.conf'), context) 57 | -------------------------------------------------------------------------------- /djangosphinx/constants.py: -------------------------------------------------------------------------------- 1 | from django.conf import settings 2 | 3 | __all__ = ('SPHINX_API_VERSION',) 4 | 5 | # 0x113 = 1.19 6 | # 0x107 = 1.17 7 | SPHINX_API_VERSION = getattr(settings, 'SPHINX_API_VERSION', 0x107) -------------------------------------------------------------------------------- /djangosphinx/management/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dcramer/django-sphinx/0071d1cae5390d0ec8c669786ca3c7275abb6410/djangosphinx/management/__init__.py -------------------------------------------------------------------------------- /djangosphinx/management/commands/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dcramer/django-sphinx/0071d1cae5390d0ec8c669786ca3c7275abb6410/djangosphinx/management/commands/__init__.py -------------------------------------------------------------------------------- /djangosphinx/management/commands/generate_sphinx_config.py: -------------------------------------------------------------------------------- 1 | from django.core.management.base import AppCommand 2 | from django.db import models 3 | 4 | from djangosphinx.models import SphinxModelManager 5 | 6 | class Command(AppCommand): 7 | help = "Prints generic configuration for any models which use a standard SphinxSearch manager." 8 | 9 | output_transaction = True 10 | 11 | def handle_app(self, app, **options): 12 | from djangosphinx.utils.config import generate_config_for_model 13 | model_classes = [getattr(app, n) for n in dir(app) if hasattr(getattr(app, n), '_meta')] 14 | found = 0 15 | for model in model_classes: 16 | indexes = getattr(model, '__sphinx_indexes__', []) 17 | for index in indexes: 18 | found += 1 19 | print generate_config_for_model(model, index) 20 | if found == 0: 21 | print "Unable to find any models in application which use standard SphinxSearch configuration." 22 | #return u'\n'.join(sql_create(app, self.style)).encode('utf-8') 23 | -------------------------------------------------------------------------------- /djangosphinx/manager.py: -------------------------------------------------------------------------------- 1 | from models import * 2 | import warnings 3 | 4 | warnings.warn('`djangosphinx.manager` is deprecated. Please use `djangosphinx.models` instead.', DeprecationWarning) 5 | -------------------------------------------------------------------------------- /djangosphinx/models.py: -------------------------------------------------------------------------------- 1 | import select 2 | import socket 3 | import time 4 | import struct 5 | import warnings 6 | import operator 7 | import apis.current as sphinxapi 8 | import logging 9 | import re 10 | try: 11 | import decimal 12 | except ImportError: 13 | from django.utils import _decimal as decimal # for Python 2.3 14 | 15 | from django.db.models.query import QuerySet, Q 16 | from django.conf import settings 17 | 18 | __all__ = ('SearchError', 'ConnectionError', 'SphinxSearch', 'SphinxRelation', 'SphinxQuerySet') 19 | 20 | from django.contrib.contenttypes.models import ContentType 21 | from datetime import datetime, date 22 | 23 | # server settings 24 | SPHINX_SERVER = getattr(settings, 'SPHINX_SERVER', 'localhost') 25 | SPHINX_PORT = int(getattr(settings, 'SPHINX_PORT', 3312)) 26 | 27 | # These require search API 275 (Sphinx 0.9.8) 28 | SPHINX_RETRIES = int(getattr(settings, 'SPHINX_RETRIES', 0)) 29 | SPHINX_RETRIES_DELAY = int(getattr(settings, 'SPHINX_RETRIES_DELAY', 5)) 30 | 31 | MAX_INT = int(2**31-1) 32 | 33 | EMPTY_RESULT_SET = dict( 34 | matches=[], 35 | total=0, 36 | total_found=0, 37 | words=[], 38 | attrs=[], 39 | ) 40 | 41 | UNDEFINED = object() 42 | 43 | class SearchError(Exception): pass 44 | class ConnectionError(Exception): pass 45 | 46 | class SphinxProxy(object): 47 | """ 48 | Acts exactly like a normal instance of an object except that 49 | it will handle any special sphinx attributes in a `_sphinx` class. 50 | 51 | If there is no `sphinx` attribute on the instance, it will also 52 | add a proxy wrapper to `_sphinx` under that name as well. 53 | """ 54 | __slots__ = ('__dict__', '__instance__', '_sphinx', 'sphinx') 55 | 56 | def __init__(self, instance, attributes): 57 | object.__setattr__(self, '__instance__', instance) 58 | object.__setattr__(self, '_sphinx', attributes) 59 | 60 | def _get_current_object(self): 61 | """ 62 | Return the current object. This is useful if you want the real object 63 | behind the proxy at a time for performance reasons or because you want 64 | to pass the object into a different context. 65 | """ 66 | return self.__instance__ 67 | _current_object = property(_get_current_object) 68 | 69 | def __dict__(self): 70 | try: 71 | return self._current_object.__dict__ 72 | except RuntimeError: 73 | return AttributeError('__dict__') 74 | __dict__ = property(__dict__) 75 | 76 | def __repr__(self): 77 | try: 78 | obj = self._current_object 79 | except RuntimeError: 80 | return '<%s unbound>' % self.__class__.__name__ 81 | return repr(obj) 82 | 83 | def __nonzero__(self): 84 | try: 85 | return bool(self._current_object) 86 | except RuntimeError: 87 | return False 88 | 89 | def __unicode__(self): 90 | try: 91 | return unicode(self._current_object) 92 | except RuntimeError: 93 | return repr(self) 94 | 95 | def __dir__(self): 96 | try: 97 | return dir(self._current_object) 98 | except RuntimeError: 99 | return [] 100 | 101 | # def __getattribute__(self, name): 102 | # if not hasattr(self._current_object, 'sphinx') and name == 'sphinx': 103 | # name = '_sphinx' 104 | # if name == '_sphinx': 105 | # return object.__getattribute__(self, name) 106 | # print object.__getattribute__(self, '_current_object') 107 | # return getattr(object.__getattribute__(self, '_current_object'), name) 108 | 109 | def __getattr__(self, name, value=UNDEFINED): 110 | if not hasattr(self._current_object, 'sphinx') and name == 'sphinx': 111 | name = '_sphinx' 112 | if name == '_sphinx': 113 | return getattr(self, '_sphinx', value) 114 | if value == UNDEFINED: 115 | return getattr(self._current_object, name) 116 | return getattr(self._current_object, name, value) 117 | 118 | def __setattr__(self, name, value): 119 | if name == '_sphinx': 120 | return object.__setattr__(self, '_sphinx', value) 121 | elif name == 'sphinx': 122 | if not hasattr(self._current_object, 'sphinx'): 123 | return object.__setattr__(self, '_sphinx', value) 124 | return setattr(self._current_object, name, value) 125 | 126 | def __setitem__(self, key, value): 127 | self._current_object[key] = value 128 | 129 | def __delitem__(self, key): 130 | del self._current_object[key] 131 | 132 | def __setslice__(self, i, j, seq): 133 | self._current_object[i:j] = seq 134 | 135 | def __delslice__(self, i, j): 136 | del self._current_object[i:j] 137 | 138 | __delattr__ = lambda x, n: delattr(x._current_object, n) 139 | __str__ = lambda x: str(x._current_object) 140 | __unicode__ = lambda x: unicode(x._current_object) 141 | __lt__ = lambda x, o: x._current_object < o 142 | __le__ = lambda x, o: x._current_object <= o 143 | __eq__ = lambda x, o: x._current_object == o 144 | __ne__ = lambda x, o: x._current_object != o 145 | __gt__ = lambda x, o: x._current_object > o 146 | __ge__ = lambda x, o: x._current_object >= o 147 | __cmp__ = lambda x, o: cmp(x._current_object, o) 148 | __hash__ = lambda x: hash(x._current_object) 149 | # attributes are currently not callable 150 | # __call__ = lambda x, *a, **kw: x._current_object(*a, **kw) 151 | __len__ = lambda x: len(x._current_object) 152 | __getitem__ = lambda x, i: x._current_object[i] 153 | __iter__ = lambda x: iter(x._current_object) 154 | __contains__ = lambda x, i: i in x._current_object 155 | __getslice__ = lambda x, i, j: x._current_object[i:j] 156 | __add__ = lambda x, o: x._current_object + o 157 | __sub__ = lambda x, o: x._current_object - o 158 | __mul__ = lambda x, o: x._current_object * o 159 | __floordiv__ = lambda x, o: x._current_object // o 160 | __mod__ = lambda x, o: x._current_object % o 161 | __divmod__ = lambda x, o: x._current_object.__divmod__(o) 162 | __pow__ = lambda x, o: x._current_object ** o 163 | __lshift__ = lambda x, o: x._current_object << o 164 | __rshift__ = lambda x, o: x._current_object >> o 165 | __and__ = lambda x, o: x._current_object & o 166 | __xor__ = lambda x, o: x._current_object ^ o 167 | __or__ = lambda x, o: x._current_object | o 168 | __div__ = lambda x, o: x._current_object.__div__(o) 169 | __truediv__ = lambda x, o: x._current_object.__truediv__(o) 170 | __neg__ = lambda x: -(x._current_object) 171 | __pos__ = lambda x: +(x._current_object) 172 | __abs__ = lambda x: abs(x._current_object) 173 | __invert__ = lambda x: ~(x._current_object) 174 | __complex__ = lambda x: complex(x._current_object) 175 | __int__ = lambda x: int(x._current_object) 176 | __long__ = lambda x: long(x._current_object) 177 | __float__ = lambda x: float(x._current_object) 178 | __oct__ = lambda x: oct(x._current_object) 179 | __hex__ = lambda x: hex(x._current_object) 180 | __index__ = lambda x: x._current_object.__index__() 181 | __coerce__ = lambda x, o: x.__coerce__(x, o) 182 | __enter__ = lambda x: x.__enter__() 183 | __exit__ = lambda x, *a, **kw: x.__exit__(*a, **kw) 184 | 185 | def to_sphinx(value): 186 | "Convert a value into a sphinx query value" 187 | if isinstance(value, date) or isinstance(value, datetime): 188 | return int(time.mktime(value.timetuple())) 189 | elif isinstance(value, decimal.Decimal) or isinstance(value, float): 190 | return float(value) 191 | return int(value) 192 | 193 | class SphinxQuerySet(object): 194 | available_kwargs = ('rankmode', 'mode', 'weights', 'maxmatches', 'passages', 'passages_opts') 195 | 196 | def __init__(self, model=None, using=None, **kwargs): 197 | self._select_related = False 198 | self._select_related_args = {} 199 | self._select_related_fields = [] 200 | self._filters = {} 201 | self._excludes = {} 202 | self._extra = {} 203 | self._query = '' 204 | self.__metadata = None 205 | self._offset = 0 206 | self._limit = 20 207 | 208 | self._groupby = None 209 | self._sort = None 210 | self._weights = [1, 100] 211 | 212 | self._passages = False 213 | self._passages_opts = {} 214 | self._maxmatches = 1000 215 | self._result_cache = None 216 | self._mode = sphinxapi.SPH_MATCH_ALL 217 | self._rankmode = getattr(sphinxapi, 'SPH_RANK_PROXIMITY_BM25', None) 218 | self.model = model 219 | self._anchor = {} 220 | self.__metadata = {} 221 | 222 | self.using = using 223 | 224 | options = self._format_options(**kwargs) 225 | for key, value in options.iteritems(): 226 | setattr(self, key, value) 227 | 228 | if model: 229 | self._index = kwargs.get('index', model._meta.db_table) 230 | else: 231 | self._index = kwargs.get('index') 232 | 233 | def __repr__(self): 234 | if self._result_cache is not None: 235 | return repr(self._get_data()) 236 | else: 237 | return '<%s instance>' % (self.__class__.__name__,) 238 | 239 | def __len__(self): 240 | return self.count() 241 | 242 | def __iter__(self): 243 | return iter(self._get_data()) 244 | 245 | def __getitem__(self, k): 246 | if not isinstance(k, (slice, int, long)): 247 | raise TypeError 248 | assert (not isinstance(k, slice) and (k >= 0)) \ 249 | or (isinstance(k, slice) and (k.start is None or k.start >= 0) and (k.stop is None or k.stop >= 0)), \ 250 | "Negative indexing is not supported." 251 | if self._result_cache is not None: 252 | # Check to see if this is a portion of an already existing result cache 253 | if type(k) == slice: 254 | start = k.start 255 | stop = k.stop-k.start 256 | if start < self._offset or k.stop > self._limit: 257 | self._result_cache = None 258 | else: 259 | start = start-self._offset 260 | return self._get_data()[start:k.stop] 261 | else: 262 | if k not in range(self._offset, self._limit+self._offset): 263 | self._result_cache = None 264 | else: 265 | return self._get_data()[k-self._offset] 266 | if type(k) == slice: 267 | self._offset = k.start 268 | self._limit = k.stop-k.start 269 | return self._get_data() 270 | else: 271 | self._offset = k 272 | self._limit = 1 273 | return self._get_data()[0] 274 | 275 | def _format_options(self, **kwargs): 276 | kwargs['rankmode'] = getattr(sphinxapi, kwargs.get('rankmode', 'SPH_RANK_NONE'), None) 277 | kwargs['mode'] = getattr(sphinxapi, kwargs.get('mode', 'SPH_MATCH_ALL'), sphinxapi.SPH_MATCH_ALL) 278 | 279 | kwargs = dict([('_%s' % (key,), value) for key, value in kwargs.iteritems() if key in self.available_kwargs]) 280 | return kwargs 281 | 282 | def get_query_set(self, model): 283 | qs = model._default_manager 284 | if self.using: 285 | qs = qs.db_manager(self.using) 286 | return qs.all() 287 | 288 | def set_options(self, **kwargs): 289 | kwargs = self._format_options(**kwargs) 290 | return self._clone(**kwargs) 291 | 292 | def query(self, string): 293 | return self._clone(_query=unicode(string).encode('utf-8')) 294 | 295 | def group_by(self, attribute, func, groupsort='@group desc'): 296 | return self._clone(_groupby=attribute, _groupfunc=func, _groupsort=groupsort) 297 | 298 | def rank_none(self): 299 | warnings.warn('`rank_none()` is deprecated. Use `set_options(rankmode=None)` instead.', DeprecationWarning) 300 | return self._clone(_rankmode=sphinxapi.SPH_RANK_NONE) 301 | 302 | def mode(self, mode): 303 | warnings.warn('`mode()` is deprecated. Use `set_options(mode='')` instead.', DeprecationWarning) 304 | return self._clone(_mode=mode) 305 | 306 | def weights(self, weights): 307 | warnings.warn('`mode()` is deprecated. Use `set_options(weights=[])` instead.', DeprecationWarning) 308 | return self._clone(_weights=weights) 309 | 310 | def on_index(self, index): 311 | warnings.warn('`mode()` is deprecated. Use `set_options(on_index=foo)` instead.', DeprecationWarning) 312 | return self._clone(_index=index) 313 | 314 | # only works on attributes 315 | def filter(self, **kwargs): 316 | filters = self._filters.copy() 317 | for k,v in kwargs.iteritems(): 318 | if hasattr(v, '__iter__'): 319 | v = list(v) 320 | elif not (isinstance(v, list) or isinstance(v, tuple)): 321 | v = [v,] 322 | filters.setdefault(k, []).extend(map(to_sphinx, v)) 323 | return self._clone(_filters=filters) 324 | 325 | def geoanchor(self, lat_attr, lng_attr, lat, lng): 326 | assert sphinxapi.VER_COMMAND_SEARCH >= 0x113, "You must upgrade sphinxapi to version 0.98 to use Geo Anchoring." 327 | return self._clone(_anchor=(lat_attr, lng_attr, float(lat), float(lng))) 328 | 329 | # this actually does nothing, its just a passthru to 330 | # keep things looking/working generally the same 331 | def all(self): 332 | return self 333 | 334 | def none(self): 335 | c = EmptySphinxQuerySet() 336 | c.__dict__.update(self.__dict__.copy()) 337 | return c 338 | 339 | # only works on attributes 340 | def exclude(self, **kwargs): 341 | filters = self._excludes.copy() 342 | for k,v in kwargs.iteritems(): 343 | if hasattr(v, 'next'): 344 | v = list(v) 345 | elif not (isinstance(v, list) or isinstance(v, tuple)): 346 | v = [v,] 347 | filters.setdefault(k, []).extend(map(to_sphinx, v)) 348 | return self._clone(_excludes=filters) 349 | 350 | def escape(self, value): 351 | return re.sub(r"([=\(\)|\-!@~\"&/\\\^\$\=])", r"\\\1", value) 352 | 353 | # you cannot order by @weight (it always orders in descending) 354 | # keywords are @id, @weight, @rank, and @relevance 355 | def order_by(self, *args, **kwargs): 356 | mode = kwargs.pop('mode', sphinxapi.SPH_SORT_EXTENDED) 357 | if mode == sphinxapi.SPH_SORT_EXTENDED: 358 | sort_by = [] 359 | for arg in args: 360 | sort = 'ASC' 361 | if arg[0] == '-': 362 | arg = arg[1:] 363 | sort = 'DESC' 364 | if arg == 'id': 365 | arg = '@id' 366 | sort_by.append('%s %s' % (arg, sort)) 367 | else: 368 | sort_by = args 369 | if sort_by: 370 | return self._clone(_sort=(mode, ', '.join(sort_by))) 371 | return self 372 | 373 | # pass these thru on the queryset and let django handle it 374 | def select_related(self, *args, **kwargs): 375 | _args = self._select_related_fields[:] 376 | _args.extend(args) 377 | _kwargs = self._select_related_args.copy() 378 | _kwargs.update(kwargs) 379 | 380 | return self._clone( 381 | _select_related=True, 382 | _select_related_fields=_args, 383 | _select_related_args=_kwargs, 384 | ) 385 | 386 | def extra(self, **kwargs): 387 | extra = self._extra.copy() 388 | extra.update(kwargs) 389 | return self._clone(_extra=extra) 390 | 391 | def count(self): 392 | return min(self._sphinx.get('total_found', 0), self._maxmatches) 393 | 394 | def reset(self): 395 | return self.__class__(self.model, self._index) 396 | 397 | # Internal methods 398 | def _get_sphinx_client(self): 399 | client = sphinxapi.SphinxClient() 400 | client.SetServer(SPHINX_SERVER, SPHINX_PORT) 401 | return client 402 | 403 | def _clone(self, **kwargs): 404 | # Clones the queryset passing any changed args 405 | c = self.__class__() 406 | c.__dict__.update(self.__dict__.copy()) 407 | for k, v in kwargs.iteritems(): 408 | setattr(c, k, v) 409 | return c 410 | 411 | def _sphinx(self): 412 | if not self.__metadata: 413 | # We have to force execution if this is accessed beforehand 414 | self._get_data() 415 | return self.__metadata 416 | _sphinx = property(_sphinx) 417 | 418 | def _get_data(self): 419 | assert(self._index) 420 | # need to find a way to make this work yet 421 | if self._result_cache is None: 422 | self._result_cache = list(self._get_results()) 423 | return self._result_cache 424 | 425 | def _get_sphinx_results(self): 426 | assert(self._offset + self._limit <= self._maxmatches) 427 | 428 | client = self._get_sphinx_client() 429 | 430 | params = [] 431 | 432 | if self._sort: 433 | params.append('sort=%s' % (self._sort,)) 434 | client.SetSortMode(*self._sort) 435 | 436 | if isinstance(self._weights, dict): 437 | client.SetFieldWeights(self._weights) 438 | else: 439 | # assume its a list 440 | client.SetWeights(map(int, self._weights)) 441 | params.append('weights=%s' % (self._weights,)) 442 | 443 | params.append('matchmode=%s' % (self._mode,)) 444 | client.SetMatchMode(self._mode) 445 | 446 | def _handle_filters(filter_list, exclude=False): 447 | for name, values in filter_list.iteritems(): 448 | parts = len(name.split('__')) 449 | if parts > 2: 450 | raise NotImplementedError, 'Related object and/or multiple field lookups not supported' 451 | elif parts == 2: 452 | # The float handling for __gt and __lt is kind of ugly.. 453 | name, lookup = name.split('__', 1) 454 | is_float = isinstance(values[0], float) 455 | if lookup in ('gt', 'gte'): 456 | value = values[0] 457 | if lookup == 'gt': 458 | if is_float: 459 | value += (1.0/MAX_INT) 460 | else: 461 | value += 1 462 | _max = MAX_INT 463 | if is_float: 464 | _max = float(_max) 465 | args = (name, value, _max, exclude) 466 | elif lookup in ('lt', 'lte'): 467 | value = values[0] 468 | if lookup == 'lt': 469 | if is_float: 470 | value -= (1.0/MAX_INT) 471 | else: 472 | value -= 1 473 | _max = -MAX_INT 474 | if is_float: 475 | _max = float(_max) 476 | args = (name, _max, value, exclude) 477 | elif lookup == 'in': 478 | args = (name, values, exclude) 479 | elif lookup == 'range': 480 | args = (name, values[0], values[1], exclude) 481 | else: 482 | raise NotImplementedError, 'Related object and/or field lookup "%s" not supported' % lookup 483 | if is_float: 484 | client.SetFilterFloatRange(*args) 485 | elif not exclude and self.model and name == self.model._meta.pk.column: 486 | client.SetIDRange(*args[1:3]) 487 | elif lookup == 'in': 488 | client.SetFilter(name, values, exclude) 489 | else: 490 | client.SetFilterRange(*args) 491 | else: 492 | client.SetFilter(name, values, exclude) 493 | 494 | # Include filters 495 | if self._filters: 496 | params.append('filters=%s' % (self._filters,)) 497 | _handle_filters(self._filters) 498 | 499 | # Exclude filters 500 | if self._excludes: 501 | params.append('excludes=%s' % (self._excludes,)) 502 | _handle_filters(self._excludes, True) 503 | 504 | if self._groupby: 505 | params.append('groupby=%s' % (self._groupby,)) 506 | client.SetGroupBy(self._groupby, self._groupfunc, self._groupsort) 507 | 508 | if self._anchor: 509 | params.append('geoanchor=%s' % (self._anchor,)) 510 | client.SetGeoAnchor(*self._anchor) 511 | 512 | if self._rankmode: 513 | params.append('rankmode=%s' % (self._rankmode,)) 514 | client.SetRankingMode(self._rankmode) 515 | 516 | if not self._limit > 0: 517 | # Fix for Sphinx throwing an assertion error when you pass it an empty limiter 518 | return EMPTY_RESULT_SET 519 | 520 | if sphinxapi.VER_COMMAND_SEARCH >= 0x113: 521 | client.SetRetries(SPHINX_RETRIES, SPHINX_RETRIES_DELAY) 522 | 523 | client.SetLimits(int(self._offset), int(self._limit), int(self._maxmatches)) 524 | 525 | # To avoid modifying the Sphinx API, we solve unicode indexes here 526 | if isinstance(self._index, unicode): 527 | self._index = self._index.encode('utf-8') 528 | 529 | results = client.Query(self._query, self._index) 530 | 531 | # The Sphinx API doesn't raise exceptions 532 | 533 | if not results: 534 | if client.GetLastError(): 535 | raise SearchError, client.GetLastError() 536 | elif client.GetLastWarning(): 537 | raise SearchError, client.GetLastWarning() 538 | else: 539 | results = EMPTY_RESULT_SET 540 | elif not results['matches']: 541 | results = EMPTY_RESULT_SET 542 | 543 | logging.debug('Found %s results for search query %s on %s with params: %s', results['total'], self._query, self._index, ', '.join(params)) 544 | 545 | return results 546 | 547 | def get(self, **kwargs): 548 | """Hack to support ModelAdmin""" 549 | queryset = self.model._default_manager 550 | if self._select_related: 551 | queryset = queryset.select_related(*self._select_related_fields, **self._select_related_args) 552 | if self._extra: 553 | queryset = queryset.extra(**self._extra) 554 | return queryset.get(**kwargs) 555 | 556 | def _get_results(self): 557 | results = self._get_sphinx_results() 558 | if not results: 559 | results = EMPTY_RESULT_SET 560 | self.__metadata = { 561 | 'total': results['total'], 562 | 'total_found': results['total_found'], 563 | 'words': results['words'], 564 | } 565 | if results['matches'] and self._passages: 566 | # We need to do some initial work for passages 567 | # XXX: The passages implementation has a potential gotcha if your id 568 | # column is not actually your primary key 569 | words = ' '.join([w['word'] for w in results['words']]) 570 | 571 | if self.model: 572 | if results['matches']: 573 | queryset = self.get_query_set(self.model) 574 | if self._select_related: 575 | queryset = queryset.select_related(*self._select_related_fields, **self._select_related_args) 576 | if self._extra: 577 | queryset = queryset.extra(**self._extra) 578 | 579 | # django-sphinx supports the compositepks branch 580 | # as well as custom id columns in your sphinx configuration 581 | # but all primary key columns still need to be present in the field list 582 | pks = getattr(self.model._meta, 'pks', [self.model._meta.pk]) 583 | if results['matches'][0]['attrs'].get(pks[0].column): 584 | 585 | # XXX: Sometimes attrs is empty and we cannot have custom primary key attributes 586 | for r in results['matches']: 587 | r['id'] = ', '.join([unicode(r['attrs'][p.column]) for p in pks]) 588 | 589 | # Join our Q objects to get a where clause which 590 | # matches all primary keys, even across multiple columns 591 | q = reduce(operator.or_, [reduce(operator.and_, [Q(**{p.name: r['attrs'][p.column]}) for p in pks]) for r in results['matches']]) 592 | queryset = queryset.filter(q) 593 | else: 594 | for r in results['matches']: 595 | r['id'] = unicode(r['id']) 596 | queryset = queryset.filter(pk__in=[r['id'] for r in results['matches']]) 597 | queryset = dict([(', '.join([unicode(getattr(o, p.attname)) for p in pks]), o) for o in queryset]) 598 | 599 | if self._passages: 600 | # TODO: clean this up 601 | for r in results['matches']: 602 | if r['id'] in queryset: 603 | r['passages'] = self._get_passages(queryset[r['id']], results['fields'], words) 604 | 605 | results = [SphinxProxy(queryset[r['id']], r) for r in results['matches'] if r['id'] in queryset] 606 | else: 607 | results = [] 608 | else: 609 | "We did a query without a model, lets see if there's a content_type" 610 | results['attrs'] = dict(results['attrs']) 611 | if 'content_type' in results['attrs']: 612 | "Now we have to do one query per content_type" 613 | objcache = {} 614 | for r in results['matches']: 615 | ct = r['attrs']['content_type'] 616 | r['id'] = unicode(r['id']) 617 | objcache.setdefault(ct, {})[r['id']] = None 618 | for ct in objcache: 619 | model_class = ContentType.objects.get(pk=ct).model_class() 620 | pks = getattr(model_class._meta, 'pks', [model_class._meta.pk]) 621 | 622 | if results['matches'][0]['attrs'].get(pks[0].column): 623 | for r in results['matches']: 624 | if r['attrs']['content_type'] == ct: 625 | val = ', '.join([unicode(r['attrs'][p.column]) for p in pks]) 626 | objcache[ct][r['id']] = r['id'] = val 627 | 628 | q = reduce(operator.or_, [reduce(operator.and_, [Q(**{p.name: r['attrs'][p.column]}) for p in pks]) for r in results['matches'] if r['attrs']['content_type'] == ct]) 629 | queryset = self.get_query_set(model_class).filter(q) 630 | else: 631 | queryset = self.get_query_set(model_class).filter(pk__in=[r['id'] for r in results['matches'] if r['attrs']['content_type'] == ct]) 632 | 633 | for o in queryset: 634 | objcache[ct][', '.join([unicode(getattr(o, p.name)) for p in pks])] = o 635 | 636 | if self._passages: 637 | for r in results['matches']: 638 | ct = r['attrs']['content_type'] 639 | if r['id'] in objcache[ct]: 640 | r['passages'] = self._get_passages(objcache[ct][r['id']], results['fields'], words) 641 | results = [SphinxProxy(objcache[r['attrs']['content_type']][r['id']], r) for r in results['matches'] if r['id'] in objcache[r['attrs']['content_type']]] 642 | else: 643 | results = results['matches'] 644 | self._result_cache = results 645 | return results 646 | 647 | def _get_passages(self, instance, fields, words): 648 | client = self._get_sphinx_client() 649 | 650 | docs = [getattr(instance, f) for f in fields] 651 | if isinstance(self._passages_opts, dict): 652 | opts = self._passages_opts 653 | else: 654 | opts = {} 655 | if isinstance(self._index, unicode): 656 | self._index = self._index.encode('utf-8') 657 | passages_list = client.BuildExcerpts(docs, self._index, words, opts) 658 | 659 | passages = {} 660 | c = 0 661 | for f in fields: 662 | passages[f] = passages_list[c] 663 | c += 1 664 | return passages 665 | 666 | class EmptySphinxQuerySet(SphinxQuerySet): 667 | def _get_sphinx_results(self): 668 | return None 669 | 670 | class SphinxModelManager(object): 671 | def __init__(self, model, **kwargs): 672 | self.model = model 673 | self._index = kwargs.pop('index', model._meta.db_table) 674 | self._kwargs = kwargs 675 | 676 | def _get_query_set(self): 677 | return SphinxQuerySet(self.model, index=self._index, **self._kwargs) 678 | 679 | def get_index(self): 680 | return self._index 681 | 682 | def all(self): 683 | return self._get_query_set() 684 | 685 | def none(self): 686 | return self._get_query_set().none() 687 | 688 | def filter(self, **kwargs): 689 | return self._get_query_set().filter(**kwargs) 690 | 691 | def query(self, *args, **kwargs): 692 | return self._get_query_set().query(*args, **kwargs) 693 | 694 | def on_index(self, *args, **kwargs): 695 | return self._get_query_set().on_index(*args, **kwargs) 696 | 697 | def geoanchor(self, *args, **kwargs): 698 | return self._get_query_set().geoanchor(*args, **kwargs) 699 | 700 | class SphinxInstanceManager(object): 701 | """Collection of tools useful for objects which are in a Sphinx index.""" 702 | # TODO: deletion support 703 | def __init__(self, instance, index): 704 | self._instance = instance 705 | self._index = index 706 | 707 | def update(self, **kwargs): 708 | assert sphinxapi.VER_COMMAND_SEARCH >= 0x113, "You must upgrade sphinxapi to version 0.98 to use UpdateAttributes." 709 | sphinxapi.UpdateAttributes(self._index, kwargs.keys(), dict(self.instance.pk, map(to_sphinx, kwargs.values()))) 710 | 711 | class SphinxSearch(object): 712 | def __init__(self, index=None, using=None, **kwargs): 713 | self._kwargs = kwargs 714 | self._sphinx = None 715 | self._index = index 716 | self.model = None 717 | self.using = using 718 | 719 | def __call__(self, index, **kwargs): 720 | warnings.warn('For non-model searches use a SphinxQuerySet instance.', DeprecationWarning) 721 | return SphinxQuerySet(index=index, using=self.using, **kwargs) 722 | 723 | def __get__(self, instance, model, **kwargs): 724 | if instance: 725 | return SphinxInstanceManager(instance, self._index) 726 | return self._sphinx 727 | 728 | def get_query_set(self): 729 | """Override this method to change the QuerySet used for config generation.""" 730 | return self.model._default_manager.all() 731 | 732 | def contribute_to_class(self, model, name, **kwargs): 733 | if self._index is None: 734 | self._index = model._meta.db_table 735 | self._sphinx = SphinxModelManager(model, index=self._index, **self._kwargs) 736 | self.model = model 737 | if getattr(model, '__sphinx_indexes__', None) is None: 738 | setattr(model, '__sphinx_indexes__', [self._index]) 739 | else: 740 | model.__sphinx_indexes__.append(self._index) 741 | setattr(model, name, self._sphinx) 742 | 743 | class SphinxRelationProxy(SphinxProxy): 744 | def count(self): 745 | return min(self._sphinx['attrs']['@count'], self._maxmatches) 746 | 747 | class SphinxRelation(SphinxSearch): 748 | """ 749 | Adds "related model" support to django-sphinx -- 750 | http://code.google.com/p/django-sphinx/ 751 | http://www.sphinxsearch.com/ 752 | 753 | Example -- 754 | 755 | class MySearch(SphinxSearch): 756 | myrelatedobject = SphinxRelation(RelatedModel) 757 | anotherone = SphinxRelation(AnotherModel) 758 | ... 759 | 760 | class MyModel(models.Model): 761 | search = MySearch('index') 762 | 763 | """ 764 | def __init__(self, model=None, attr=None, sort='@count desc', **kwargs): 765 | if model: 766 | self._related_model = model 767 | self._related_attr = attr or model.__name__.lower() 768 | self._related_sort = sort 769 | super(SphinxRelation, self).__init__(**kwargs) 770 | 771 | def __get__(self, instance, instance_model, **kwargs): 772 | self._mode = instance._mode 773 | self._rankmode = instance._rankmode 774 | self._index = instance._index 775 | self._query = instance._query 776 | self._filters = instance._filters 777 | self._excludes = instance._excludes 778 | self.model = self._related_model 779 | self._groupby = self._related_attr 780 | self._groupsort = self._related_sort 781 | self._groupfunc = sphinxapi.SPH_GROUPBY_ATTR 782 | return self 783 | 784 | def _get_results(self): 785 | results = self._get_sphinx_results() 786 | if not results or not results['matches']: 787 | # No matches so lets create a dummy result set 788 | results = EMPTY_RESULT_SET 789 | elif self.model: 790 | ids = [] 791 | for r in results['matches']: 792 | value = r['attrs']['@groupby'] 793 | if isinstance(value, (int, long)): 794 | ids.append(value) 795 | else: 796 | ids.extend() 797 | qs = self.get_query_set(self.model).filter(pk__in=set(ids)) 798 | if self._select_related: 799 | qs = qs.select_related(*self._select_related_fields, 800 | **self._select_related_args) 801 | if self._extra: 802 | qs = qs.extra(**self._extra) 803 | queryset = dict([(o.id, o) for o in qs]) 804 | results = [ SphinxRelationProxy(queryset[k['attrs']['@groupby']], k) \ 805 | for k in results['matches'] \ 806 | if k['attrs']['@groupby'] in queryset ] 807 | self.__metadata = { 808 | 'total': results['total'], 809 | 'total_found': results['total_found'], 810 | 'words': results['words'], 811 | } 812 | self._result_cache = results 813 | return results 814 | 815 | def _sphinx(self): 816 | if not self.__metadata: 817 | # We have to force execution if this is accessed beforehand 818 | self._get_data() 819 | return self.__metadata 820 | _sphinx = property(_sphinx) 821 | -------------------------------------------------------------------------------- /djangosphinx/templates/index-multiple.conf: -------------------------------------------------------------------------------- 1 | index {{ index_name }} 2 | { 3 | source = {{ source_name }} 4 | path = /var/data/{{ index_name }} 5 | docinfo = extern 6 | morphology = none 7 | stopwords = 8 | min_word_len = 2 9 | charset_type = utf-8 10 | min_prefix_len = 0 11 | min_infix_len = 0 12 | } -------------------------------------------------------------------------------- /djangosphinx/templates/index.conf: -------------------------------------------------------------------------------- 1 | index {{ index_name }} 2 | { 3 | source = {{ source_name }} 4 | path = /var/data/{{ index_name }} 5 | docinfo = extern 6 | morphology = none 7 | stopwords = 8 | min_word_len = 2 9 | charset_type = utf-8 10 | min_prefix_len = 0 11 | min_infix_len = 0 12 | } -------------------------------------------------------------------------------- /djangosphinx/templates/source-multiple.conf: -------------------------------------------------------------------------------- 1 | source {{ source_name }} 2 | { 3 | type = {{ database_engine }} 4 | html_strip = 0 5 | html_index_attrs = 6 | sql_host = {{ database_host }} 7 | sql_user = {{ database_user }} 8 | sql_pass = {{ database_password }} 9 | sql_db = {{ database_name }} 10 | sql_port = {{ database_port }} 11 | 12 | sql_query_pre = 13 | sql_query_post = 14 | sql_query = \ 15 | {% for table_name, content_type in tables %} 16 | SELECT {{ field_names|join:", " }}, {{ content_type.id }} as content_type \ 17 | FROM `{{ table_name }}`{% if not loop.last %} UNION \{% endif %} 18 | {% endfor %} 19 | {% if group_columns %} 20 | # ForeignKey's 21 | {% for field_name in group_columns %} sql_group_column = {{ field_name }} 22 | {% endfor %}{% endif %} 23 | {% if bool_columns %} 24 | # BooleanField's 25 | {% for field_name in bool_columns %} sql_group_column = {{ field_name }} 26 | {% endfor %}{% endif %} 27 | {% if date_columns %} 28 | # DateField's and DateTimeField's 29 | {% for field_name in date_columns %} sql_date_column = {{ field_name }} 30 | {% endfor %}{% endif %} 31 | {% if gis_columns %} 32 | # GIS Fields 33 | {% for field_name in gis_columns %} sql_attr_float = {{ field_name }}_latitude 34 | sql_attr_float = {{ field_name }}_longitude 35 | {% endfor %}{% endif %} 36 | } -------------------------------------------------------------------------------- /djangosphinx/templates/source.conf: -------------------------------------------------------------------------------- 1 | source {{ source_name }} 2 | { 3 | type = {{ database_engine }} 4 | sql_host = {{ database_host }} 5 | sql_user = {{ database_user }} 6 | sql_pass = {{ database_password }} 7 | sql_db = {{ database_name }} 8 | sql_port = {{ database_port }} 9 | 10 | sql_query_pre = 11 | sql_query_post = 12 | sql_query = \ 13 | SELECT {{ field_names|join:", " }}\ 14 | FROM {{ table_name }} 15 | sql_query_info = SELECT * FROM `{{ table_name }}` WHERE `{{ primary_key }}` = $id 16 | {% if group_columns %} 17 | # ForeignKey's 18 | {% for field_name in group_columns %} sql_attr_uint = {{ field_name }} 19 | {% endfor %}{% endif %} 20 | {% if date_columns %} 21 | # DateField's and DateTimeField's 22 | {% for field_name in date_columns %} sql_attr_timestamp = {{ field_name }} 23 | {% endfor %}{% endif %} 24 | {% if bool_columns %} 25 | # BooleanField's 26 | {% for field_name in bool_columns %} sql_attr_bool = {{ field_name }} 27 | {% endfor %}{% endif %} 28 | {% if float_columns %} 29 | # FloatField's and DecimalField's 30 | {% for field_name in float_columns %} sql_attr_float = {{ field_name }} 31 | {% endfor %}{% endif %} 32 | {% if gis_columns %} 33 | # GIS Fields 34 | {% for field_name in gis_columns %} sql_attr_float = {{ field_name }}_latitude 35 | sql_attr_float = {{ field_name }}_longitude 36 | {% endfor %}{% endif %} 37 | } -------------------------------------------------------------------------------- /djangosphinx/templates/sphinx.conf: -------------------------------------------------------------------------------- 1 | source base 2 | { 3 | type = mysql 4 | sql_host = {{ DATABASE_HOST }} 5 | sql_user = {{ DATABASE_USER }} 6 | sql_pass = {{ DATABASE_PASSWORD }} 7 | sql_db = {{ DATABASE_NAME }} 8 | sql_port = {{ DATABASE_PORT }} 9 | 10 | sql_query_pre = SET NAMES utf8 11 | sql_query_post = 12 | } 13 | 14 | source cities : base 15 | { 16 | sql_query = \ 17 | SELECT id, state_id, name, aliases FROM cities WHERE visible = 1 18 | sql_query_info = SELECT * FROM `cities` WHERE id = $id 19 | 20 | sql_attr_uint = state_id 21 | } 22 | index cities 23 | { 24 | docinfo = extern 25 | morphology = stem_en 26 | stopwords = 27 | min_word_len = 2 28 | charset_type = sbcs 29 | min_prefix_len = 0 30 | min_infix_len = 0 31 | enable_star = 1 32 | source = cities 33 | path = {{ relative_path('data', 'cities') }} 34 | wordforms = {{ relative_path('data', 'wordforms.txt') }} 35 | } 36 | 37 | indexer 38 | { 39 | # memory limit, in bytes, kiloytes (16384K) or megabytes (256M) 40 | # optional, default is 32M, max is 2047M, recommended is 256M to 1024M 41 | mem_limit = 256M 42 | 43 | # maximum IO calls per second (for I/O throttling) 44 | # optional, default is 0 (unlimited) 45 | # 46 | # max_iops = 40 47 | 48 | # maximum IO call size, bytes (for I/O throttling) 49 | # optional, default is 0 (unlimited) 50 | # 51 | max_iosize = 524288 52 | } 53 | 54 | searchd 55 | { 56 | listen = {{ SPHINX_HOST }}:{{ SPHINX_PORT }} 57 | 58 | # log file, searchd run info is logged here 59 | # optional, default is 'searchd.log' 60 | log = {{ relative_path('log', 'searchd.log') }} 61 | 62 | # query log file, all search queries are logged here 63 | # optional, default is empty (do not log queries) 64 | query_log = {{ relative_path('log', 'query.log') }} 65 | 66 | # client read timeout, seconds 67 | # optional, default is 5 68 | read_timeout = 5 69 | 70 | # maximum amount of children to fork (concurrent searches to run) 71 | # optional, default is 0 (unlimited) 72 | max_children = 30 73 | 74 | # PID file, searchd process ID file name 75 | # mandatory 76 | pid_file = {{ relative_path('run', 'searchd.pid') }} 77 | 78 | # max amount of matches the daemon ever keeps in RAM, per-index 79 | # WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL 80 | # default is 1000 (just like Google) 81 | max_matches = 1000 82 | 83 | # seamless rotate, prevents rotate stalls if precaching huge datasets 84 | # optional, default is 1 85 | seamless_rotate = 1 86 | 87 | # whether to forcibly preopen all indexes on startup 88 | # optional, default is 0 (do not preopen) 89 | preopen_indexes = 0 90 | 91 | # whether to unlink .old index copies on succesful rotation. 92 | # optional, default is 1 (do unlink) 93 | unlink_old = 1 94 | } -------------------------------------------------------------------------------- /djangosphinx/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from config import * -------------------------------------------------------------------------------- /djangosphinx/utils/config.py: -------------------------------------------------------------------------------- 1 | from django.conf import settings 2 | from django.template import Template, Context 3 | 4 | from django.db import models 5 | from django.contrib.contenttypes.models import ContentType 6 | 7 | import os.path 8 | 9 | import djangosphinx.apis.current as sphinxapi 10 | 11 | __all__ = ('generate_config_for_model', 'generate_config_for_models') 12 | 13 | def _get_database_engine(): 14 | if settings.DATABASE_ENGINE == 'mysql': 15 | return settings.DATABASE_ENGINE 16 | elif settings.DATABASE_ENGINE.startswith('postgresql'): 17 | return 'pgsql' 18 | raise ValueError, "Only MySQL and PostgreSQL engines are supported by Sphinx." 19 | 20 | def _get_template(name): 21 | paths = ( 22 | os.path.join(os.path.dirname(__file__), '../apis/api%s/templates/' % (sphinxapi.VER_COMMAND_SEARCH,)), 23 | os.path.join(os.path.dirname(__file__), '../templates/'), 24 | ) 25 | for path in paths: 26 | try: 27 | fp = open(path + name, 'r') 28 | except IOError: 29 | continue 30 | try: 31 | t = Template(fp.read()) 32 | return t 33 | finally: 34 | fp.close() 35 | raise ValueError, "Template matching name does not exist: %s." % (name,) 36 | 37 | def _is_sourcable_field(field): 38 | # We can use float fields in 0.98 39 | if sphinxapi.VER_COMMAND_SEARCH >= 0x113 and (isinstance(field, models.FloatField) or isinstance(field, models.DecimalField)): 40 | return True 41 | elif isinstance(field, models.ForeignKey): 42 | return True 43 | elif isinstance(field, models.IntegerField) and field.choices: 44 | return True 45 | elif not field.rel: 46 | return True 47 | return False 48 | 49 | # No trailing slashes on paths 50 | DEFAULT_SPHINX_PARAMS = { 51 | 'database_engine': _get_database_engine(), 52 | 'database_host': settings.DATABASE_HOST, 53 | 'database_port': settings.DATABASE_PORT, 54 | 'database_name': settings.DATABASE_NAME, 55 | 'database_user': settings.DATABASE_USER, 56 | 'database_password': settings.DATABASE_PASSWORD, 57 | 'log_file': '/var/log/sphinx/searchd.log', 58 | 'data_path': '/var/data', 59 | } 60 | 61 | def get_index_context(index): 62 | params = DEFAULT_SPHINX_PARAMS 63 | params.update({ 64 | 'index_name': index, 65 | 'source_name': index, 66 | }) 67 | 68 | return params 69 | 70 | def get_source_context(tables, index, valid_fields): 71 | params = DEFAULT_SPHINX_PARAMS 72 | params.update({ 73 | 'tables': tables, 74 | 'source_name': index, 75 | 'index_name': index, 76 | 'database_engine': _get_database_engine(), 77 | 'field_names': [f[1] for f in valid_fields], 78 | 'group_columns': [f[1] for f in valid_fields if f[2] or isinstance(f[0], models.BooleanField) or isinstance(f[0], models.IntegerField)], 79 | 'date_columns': [f[1] for f in valid_fields if issubclass(f[0], models.DateTimeField) or issubclass(f[0], models.DateField)], 80 | 'float_columns': [f[1] for f in valid_fields if isinstance(f[0], models.FloatField) or isinstance(f[0], models.DecimalField)], 81 | }) 82 | try: 83 | from django.contrib.gis.db.models import PointField 84 | params.update({ 85 | 'gis_columns': [f.column for f in valid_fields if isinstance(f, PointField)], 86 | 'srid': getattr(settings, 'GIS_SRID', 4326), # reasonable lat/lng default 87 | }) 88 | if params['database_engine'] == 'pgsql' and params['gis_columns']: 89 | params['field_names'].extend(["radians(ST_X(ST_Transform(%(field_name)s, %(srid)s))) AS %(field_name)s_longitude, radians(ST_Y(ST_Transform(%(field_name)s, %(srid)s))) AS %(field_name)s_latitude" % {'field_name': f, 'srid': params['srid']} for f in params['gis_columns']]) 90 | except ImportError: 91 | # GIS not supported 92 | pass 93 | return params 94 | 95 | # Generate for single models 96 | 97 | def generate_config_for_model(model_class, index=None, sphinx_params={}): 98 | """ 99 | Generates a sample configuration including an index and source for 100 | the given model which includes all attributes and date fields. 101 | """ 102 | return generate_source_for_model(model_class, index, sphinx_params) + "\n\n" + generate_index_for_model(model_class, index, sphinx_params) 103 | 104 | def generate_index_for_model(model_class, index=None, sphinx_params={}): 105 | """Generates a source configmration for a model.""" 106 | t = _get_template('index.conf') 107 | 108 | if index is None: 109 | index = model_class._meta.db_table 110 | 111 | params = get_index_context(index) 112 | params.update(sphinx_params) 113 | 114 | c = Context(params) 115 | 116 | return t.render(c) 117 | 118 | def generate_source_for_model(model_class, index=None, sphinx_params={}): 119 | """Generates a source configmration for a model.""" 120 | t = _get_template('source.conf') 121 | 122 | def _the_tuple(f): 123 | return (f.__class__, f.column, getattr(f.rel, 'to', None), f.choices) 124 | 125 | valid_fields = [_the_tuple(f) for f in model_class._meta.fields if _is_sourcable_field(f)] 126 | 127 | table = model_class._meta.db_table 128 | 129 | if index is None: 130 | index = table 131 | 132 | params = get_source_context([table], index, valid_fields) 133 | params.update({ 134 | 'table_name': table, 135 | 'primary_key': model_class._meta.pk.column, 136 | }) 137 | params.update(sphinx_params) 138 | 139 | c = Context(params) 140 | 141 | return t.render(c) 142 | 143 | # Generate for multiple models (search UNIONs) 144 | 145 | def generate_config_for_models(model_classes, index=None, sphinx_params={}): 146 | """ 147 | Generates a sample configuration including an index and source for 148 | the given model which includes all attributes and date fields. 149 | """ 150 | return generate_source_for_models(model_classes, index, sphinx_params) + "\n\n" + generate_index_for_models(model_classes, index, sphinx_params) 151 | 152 | def generate_index_for_models(model_classes, index=None, sphinx_params={}): 153 | """Generates a source configmration for a model.""" 154 | t = _get_template('index-multiple.conf') 155 | 156 | if index is None: 157 | index = '_'.join(m._meta.db_table for m in model_classes) 158 | 159 | params = get_index_context(index) 160 | params.update(sphinx_params) 161 | 162 | c = Context(params) 163 | 164 | return t.render(c) 165 | 166 | def generate_source_for_models(model_classes, index=None, sphinx_params={}): 167 | """Generates a source configmration for a model.""" 168 | t = _get_template('source-multiple.conf') 169 | 170 | # We need to loop through each model and find only the fields that exist *exactly* the 171 | # same across models. 172 | def _the_tuple(f): 173 | return (f.__class__, f.column, getattr(f.rel, 'to', None), f.choices) 174 | 175 | valid_fields = [_the_tuple(f) for f in model_classes[0]._meta.fields if _is_sourcable_field(f)] 176 | for model_class in model_classes[1:]: 177 | valid_fields = [_the_tuple(f) for f in model_class._meta.fields if _the_tuple(f) in valid_fields] 178 | 179 | tables = [] 180 | for model_class in model_classes: 181 | tables.append((model_class._meta.db_table, ContentType.objects.get_for_model(model_class))) 182 | 183 | if index is None: 184 | index = '_'.join(m._meta.db_table for m in model_classes) 185 | 186 | params = get_source_context(tables, index, valid_fields) 187 | params.update(sphinx_params) 188 | 189 | c = Context(params) 190 | 191 | return t.render(c) -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | 9 | # Internal variables. 10 | PAPEROPT_a4 = -D latex_paper_size=a4 11 | PAPEROPT_letter = -D latex_paper_size=letter 12 | ALLSPHINXOPTS = -d _build/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 13 | 14 | .PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest 15 | 16 | help: 17 | @echo "Please use \`make ' where is one of" 18 | @echo " html to make standalone HTML files" 19 | @echo " dirhtml to make HTML files named index.html in directories" 20 | @echo " pickle to make pickle files" 21 | @echo " json to make JSON files" 22 | @echo " htmlhelp to make HTML files and a HTML help project" 23 | @echo " qthelp to make HTML files and a qthelp project" 24 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 25 | @echo " changes to make an overview of all changed/added/deprecated items" 26 | @echo " linkcheck to check all external links for integrity" 27 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 28 | 29 | clean: 30 | -rm -rf _build/* 31 | 32 | html: 33 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) _build/html 34 | @echo 35 | @echo "Build finished. The HTML pages are in _build/html." 36 | 37 | dirhtml: 38 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) _build/dirhtml 39 | @echo 40 | @echo "Build finished. The HTML pages are in _build/dirhtml." 41 | 42 | pickle: 43 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) _build/pickle 44 | @echo 45 | @echo "Build finished; now you can process the pickle files." 46 | 47 | json: 48 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) _build/json 49 | @echo 50 | @echo "Build finished; now you can process the JSON files." 51 | 52 | htmlhelp: 53 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) _build/htmlhelp 54 | @echo 55 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 56 | ".hhp project file in _build/htmlhelp." 57 | 58 | qthelp: 59 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) _build/qthelp 60 | @echo 61 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 62 | ".qhcp project file in _build/qthelp, like this:" 63 | @echo "# qcollectiongenerator _build/qthelp/django-sphinx.qhcp" 64 | @echo "To view the help file:" 65 | @echo "# assistant -collectionFile _build/qthelp/django-sphinx.qhc" 66 | 67 | latex: 68 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) _build/latex 69 | @echo 70 | @echo "Build finished; the LaTeX files are in _build/latex." 71 | @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ 72 | "run these through (pdf)latex." 73 | 74 | changes: 75 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) _build/changes 76 | @echo 77 | @echo "The overview file is in _build/changes." 78 | 79 | linkcheck: 80 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) _build/linkcheck 81 | @echo 82 | @echo "Link check complete; look for any errors in the above output " \ 83 | "or in _build/linkcheck/output.txt." 84 | 85 | doctest: 86 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) _build/doctest 87 | @echo "Testing of doctests in the sources finished, look at the " \ 88 | "results in _build/doctest/output.txt." 89 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # django-sphinx documentation build configuration file, created by 4 | # sphinx-quickstart on Mon Sep 14 22:31:42 2009. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys, os 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | #sys.path.append(os.path.abspath('.')) 20 | 21 | # -- General configuration ----------------------------------------------------- 22 | 23 | # Add any Sphinx extension module names here, as strings. They can be extensions 24 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 25 | extensions = ['sphinx.ext.coverage'] 26 | 27 | # Add any paths that contain templates here, relative to this directory. 28 | templates_path = ['_templates'] 29 | 30 | # The suffix of source filenames. 31 | source_suffix = '.rst' 32 | 33 | # The encoding of source files. 34 | #source_encoding = 'utf-8' 35 | 36 | # The master toctree document. 37 | master_doc = 'index' 38 | 39 | # General information about the project. 40 | project = u'django-sphinx' 41 | copyright = u'2009, David Cramer' 42 | 43 | # The version info for the project you're documenting, acts as replacement for 44 | # |version| and |release|, also used in various other places throughout the 45 | # built documents. 46 | # 47 | # The short X.Y version. 48 | version = '2.1.1' 49 | # The full version, including alpha/beta/rc tags. 50 | release = '2.1.1' 51 | 52 | # The language for content autogenerated by Sphinx. Refer to documentation 53 | # for a list of supported languages. 54 | #language = None 55 | 56 | # There are two options for replacing |today|: either, you set today to some 57 | # non-false value, then it is used: 58 | #today = '' 59 | # Else, today_fmt is used as the format for a strftime call. 60 | #today_fmt = '%B %d, %Y' 61 | 62 | # List of documents that shouldn't be included in the build. 63 | #unused_docs = [] 64 | 65 | # List of directories, relative to source directory, that shouldn't be searched 66 | # for source files. 67 | exclude_trees = ['_build'] 68 | 69 | # The reST default role (used for this markup: `text`) to use for all documents. 70 | #default_role = None 71 | 72 | # If true, '()' will be appended to :func: etc. cross-reference text. 73 | #add_function_parentheses = True 74 | 75 | # If true, the current module name will be prepended to all description 76 | # unit titles (such as .. function::). 77 | #add_module_names = True 78 | 79 | # If true, sectionauthor and moduleauthor directives will be shown in the 80 | # output. They are ignored by default. 81 | #show_authors = False 82 | 83 | # The name of the Pygments (syntax highlighting) style to use. 84 | pygments_style = 'sphinx' 85 | 86 | # A list of ignored prefixes for module index sorting. 87 | #modindex_common_prefix = [] 88 | 89 | 90 | # -- Options for HTML output --------------------------------------------------- 91 | 92 | # The theme to use for HTML and HTML Help pages. Major themes that come with 93 | # Sphinx are currently 'default' and 'sphinxdoc'. 94 | html_theme = 'default' 95 | 96 | # Theme options are theme-specific and customize the look and feel of a theme 97 | # further. For a list of options available for each theme, see the 98 | # documentation. 99 | #html_theme_options = {} 100 | 101 | # Add any paths that contain custom themes here, relative to this directory. 102 | #html_theme_path = [] 103 | 104 | # The name for this set of Sphinx documents. If None, it defaults to 105 | # " v documentation". 106 | #html_title = None 107 | 108 | # A shorter title for the navigation bar. Default is the same as html_title. 109 | #html_short_title = None 110 | 111 | # The name of an image file (relative to this directory) to place at the top 112 | # of the sidebar. 113 | #html_logo = None 114 | 115 | # The name of an image file (within the static path) to use as favicon of the 116 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 117 | # pixels large. 118 | #html_favicon = None 119 | 120 | # Add any paths that contain custom static files (such as style sheets) here, 121 | # relative to this directory. They are copied after the builtin static files, 122 | # so a file named "default.css" will overwrite the builtin "default.css". 123 | html_static_path = ['_static'] 124 | 125 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 126 | # using the given strftime format. 127 | #html_last_updated_fmt = '%b %d, %Y' 128 | 129 | # If true, SmartyPants will be used to convert quotes and dashes to 130 | # typographically correct entities. 131 | #html_use_smartypants = True 132 | 133 | # Custom sidebar templates, maps document names to template names. 134 | #html_sidebars = {} 135 | 136 | # Additional templates that should be rendered to pages, maps page names to 137 | # template names. 138 | #html_additional_pages = {} 139 | 140 | # If false, no module index is generated. 141 | #html_use_modindex = True 142 | 143 | # If false, no index is generated. 144 | #html_use_index = True 145 | 146 | # If true, the index is split into individual pages for each letter. 147 | #html_split_index = False 148 | 149 | # If true, links to the reST sources are added to the pages. 150 | #html_show_sourcelink = True 151 | 152 | # If true, an OpenSearch description file will be output, and all pages will 153 | # contain a tag referring to it. The value of this option must be the 154 | # base URL from which the finished HTML is served. 155 | #html_use_opensearch = '' 156 | 157 | # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). 158 | #html_file_suffix = '' 159 | 160 | # Output file base name for HTML help builder. 161 | htmlhelp_basename = 'django-sphinxdoc' 162 | 163 | 164 | # -- Options for LaTeX output -------------------------------------------------- 165 | 166 | # The paper size ('letter' or 'a4'). 167 | #latex_paper_size = 'letter' 168 | 169 | # The font size ('10pt', '11pt' or '12pt'). 170 | #latex_font_size = '10pt' 171 | 172 | # Grouping the document tree into LaTeX files. List of tuples 173 | # (source start file, target name, title, author, documentclass [howto/manual]). 174 | latex_documents = [ 175 | ('index', 'django-sphinx.tex', u'django-sphinx Documentation', 176 | u'David Cramer', 'manual'), 177 | ] 178 | 179 | # The name of an image file (relative to this directory) to place at the top of 180 | # the title page. 181 | #latex_logo = None 182 | 183 | # For "manual" documents, if this is true, then toplevel headings are parts, 184 | # not chapters. 185 | #latex_use_parts = False 186 | 187 | # Additional stuff for the LaTeX preamble. 188 | #latex_preamble = '' 189 | 190 | # Documents to append as an appendix to all manuals. 191 | #latex_appendices = [] 192 | 193 | # If false, no module index is generated. 194 | #latex_use_modindex = True 195 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. django-sphinx documentation master file, created by 2 | sphinx-quickstart on Mon Sep 14 22:31:42 2009. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to django-sphinx's documentation! 7 | ========================================= 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | Indices and tables 15 | ================== 16 | 17 | * :ref:`genindex` 18 | * :ref:`modindex` 19 | * :ref:`search` 20 | 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | set SPHINXBUILD=sphinx-build 6 | set ALLSPHINXOPTS=-d _build/doctrees %SPHINXOPTS% . 7 | if NOT "%PAPER%" == "" ( 8 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 9 | ) 10 | 11 | if "%1" == "" goto help 12 | 13 | if "%1" == "help" ( 14 | :help 15 | echo.Please use `make ^` where ^ is one of 16 | echo. html to make standalone HTML files 17 | echo. dirhtml to make HTML files named index.html in directories 18 | echo. pickle to make pickle files 19 | echo. json to make JSON files 20 | echo. htmlhelp to make HTML files and a HTML help project 21 | echo. qthelp to make HTML files and a qthelp project 22 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 23 | echo. changes to make an overview over all changed/added/deprecated items 24 | echo. linkcheck to check all external links for integrity 25 | echo. doctest to run all doctests embedded in the documentation if enabled 26 | goto end 27 | ) 28 | 29 | if "%1" == "clean" ( 30 | for /d %%i in (_build\*) do rmdir /q /s %%i 31 | del /q /s _build\* 32 | goto end 33 | ) 34 | 35 | if "%1" == "html" ( 36 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% _build/html 37 | echo. 38 | echo.Build finished. The HTML pages are in _build/html. 39 | goto end 40 | ) 41 | 42 | if "%1" == "dirhtml" ( 43 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% _build/dirhtml 44 | echo. 45 | echo.Build finished. The HTML pages are in _build/dirhtml. 46 | goto end 47 | ) 48 | 49 | if "%1" == "pickle" ( 50 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% _build/pickle 51 | echo. 52 | echo.Build finished; now you can process the pickle files. 53 | goto end 54 | ) 55 | 56 | if "%1" == "json" ( 57 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% _build/json 58 | echo. 59 | echo.Build finished; now you can process the JSON files. 60 | goto end 61 | ) 62 | 63 | if "%1" == "htmlhelp" ( 64 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% _build/htmlhelp 65 | echo. 66 | echo.Build finished; now you can run HTML Help Workshop with the ^ 67 | .hhp project file in _build/htmlhelp. 68 | goto end 69 | ) 70 | 71 | if "%1" == "qthelp" ( 72 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% _build/qthelp 73 | echo. 74 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 75 | .qhcp project file in _build/qthelp, like this: 76 | echo.^> qcollectiongenerator _build\qthelp\django-sphinx.qhcp 77 | echo.To view the help file: 78 | echo.^> assistant -collectionFile _build\qthelp\django-sphinx.ghc 79 | goto end 80 | ) 81 | 82 | if "%1" == "latex" ( 83 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% _build/latex 84 | echo. 85 | echo.Build finished; the LaTeX files are in _build/latex. 86 | goto end 87 | ) 88 | 89 | if "%1" == "changes" ( 90 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% _build/changes 91 | echo. 92 | echo.The overview file is in _build/changes. 93 | goto end 94 | ) 95 | 96 | if "%1" == "linkcheck" ( 97 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% _build/linkcheck 98 | echo. 99 | echo.Link check complete; look for any errors in the above output ^ 100 | or in _build/linkcheck/output.txt. 101 | goto end 102 | ) 103 | 104 | if "%1" == "doctest" ( 105 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% _build/doctest 106 | echo. 107 | echo.Testing of doctests in the sources finished, look at the ^ 108 | results in _build/doctest/output.txt. 109 | goto end 110 | ) 111 | 112 | :end 113 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup, find_packages 4 | 5 | import djangosphinx 6 | 7 | setup( 8 | name='django-sphinx', 9 | version=".".join(map(str, djangosphinx.__version__)), 10 | author='David Cramer', 11 | author_email='dcramer@gmail.com', 12 | url='http://github.com/dcramer/django-sphinx', 13 | install_requires=['django'], 14 | description = 'An integration layer bringing Django and Sphinx Search together.', 15 | packages=find_packages(), 16 | include_package_data=True, 17 | classifiers=[ 18 | "Framework :: Django", 19 | "Intended Audience :: Developers", 20 | "Intended Audience :: System Administrators", 21 | "Operating System :: OS Independent", 22 | "Topic :: Software Development" 23 | ], 24 | ) 25 | -------------------------------------------------------------------------------- /sphinxtest/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dcramer/django-sphinx/0071d1cae5390d0ec8c669786ca3c7275abb6410/sphinxtest/__init__.py -------------------------------------------------------------------------------- /sphinxtest/manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from django.core.management import execute_manager 3 | try: 4 | import settings # Assumed to be in the same directory. 5 | except ImportError: 6 | import sys 7 | sys.stderr.write("Error: Can't find the file 'settings.py' in the directory containing %r. It appears you've customized things.\nYou'll have to run django-admin.py, passing it your settings module.\n(If the file settings.py does indeed exist, it's causing an ImportError somehow.)\n" % __file__) 8 | sys.exit(1) 9 | 10 | if __name__ == "__main__": 11 | execute_manager(settings) 12 | -------------------------------------------------------------------------------- /sphinxtest/settings.py: -------------------------------------------------------------------------------- 1 | # Django settings for sphinxtest project. 2 | 3 | DEBUG = True 4 | TEMPLATE_DEBUG = DEBUG 5 | 6 | ADMINS = ( 7 | # ('Your Name', 'your_email@domain.com'), 8 | ) 9 | 10 | MANAGERS = ADMINS 11 | 12 | DATABASE_ENGINE = 'mysql' # 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'oracle'. 13 | DATABASE_NAME = 'sphinxtest' # Or path to database file if using sqlite3. 14 | DATABASE_USER = 'sphinxtest' # Not used with sqlite3. 15 | DATABASE_PASSWORD = 'sphinxtest' # Not used with sqlite3. 16 | DATABASE_HOST = '' # Set to empty string for localhost. Not used with sqlite3. 17 | DATABASE_PORT = '' # Set to empty string for default. Not used with sqlite3. 18 | 19 | # Local time zone for this installation. Choices can be found here: 20 | # http://en.wikipedia.org/wiki/List_of_tz_zones_by_name 21 | # although not all choices may be available on all operating systems. 22 | # If running in a Windows environment this must be set to the same as your 23 | # system time zone. 24 | TIME_ZONE = 'America/Chicago' 25 | 26 | # Language code for this installation. All choices can be found here: 27 | # http://www.i18nguy.com/unicode/language-identifiers.html 28 | LANGUAGE_CODE = 'en-us' 29 | 30 | SITE_ID = 1 31 | 32 | # If you set this to False, Django will make some optimizations so as not 33 | # to load the internationalization machinery. 34 | USE_I18N = True 35 | 36 | # Absolute path to the directory that holds media. 37 | # Example: "/home/media/media.lawrence.com/" 38 | MEDIA_ROOT = '' 39 | 40 | # URL that handles the media served from MEDIA_ROOT. Make sure to use a 41 | # trailing slash if there is a path component (optional in other cases). 42 | # Examples: "http://media.lawrence.com", "http://example.com/media/" 43 | MEDIA_URL = '' 44 | 45 | # URL prefix for admin media -- CSS, JavaScript and images. Make sure to use a 46 | # trailing slash. 47 | # Examples: "http://foo.com/media/", "/media/". 48 | ADMIN_MEDIA_PREFIX = '/media/' 49 | 50 | # Make this unique, and don't share it with anybody. 51 | SECRET_KEY = 'e-9mk4k#jf-u5aj!*6nzt@2f*mm3wgk15k+*f%%1l6hc!t**ai' 52 | 53 | # List of callables that know how to import templates from various sources. 54 | TEMPLATE_LOADERS = ( 55 | 'django.template.loaders.filesystem.load_template_source', 56 | 'django.template.loaders.app_directories.load_template_source', 57 | # 'django.template.loaders.eggs.load_template_source', 58 | ) 59 | 60 | MIDDLEWARE_CLASSES = ( 61 | 'django.middleware.common.CommonMiddleware', 62 | ) 63 | 64 | ROOT_URLCONF = 'sphinxtest.urls' 65 | 66 | TEMPLATE_DIRS = ( 67 | # Put strings here, like "/home/html/django_templates" or "C:/www/django/templates". 68 | # Always use forward slashes, even on Windows. 69 | # Don't forget to use absolute paths, not relative paths. 70 | ) 71 | 72 | INSTALLED_APPS = ( 73 | 'django.contrib.contenttypes', 74 | 'sphinxtest.tests', 75 | ) 76 | -------------------------------------------------------------------------------- /sphinxtest/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dcramer/django-sphinx/0071d1cae5390d0ec8c669786ca3c7275abb6410/sphinxtest/tests/__init__.py -------------------------------------------------------------------------------- /sphinxtest/tests/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | 3 | from djangosphinx import SphinxSearch 4 | 5 | import datetime 6 | 7 | class Group(models.Model): 8 | name = models.CharField(max_length=32) 9 | 10 | class Document(models.Model): 11 | group = models.ForeignKey(Group) 12 | date_added = models.DateTimeField(default=datetime.datetime.now) 13 | title = models.CharField(max_length=32) 14 | content = models.TextField() 15 | 16 | search = SphinxSearch(index="test") 17 | 18 | class Meta: 19 | db_table = 'documents' -------------------------------------------------------------------------------- /sphinxtest/tests/views.py: -------------------------------------------------------------------------------- 1 | # Create your views here. 2 | -------------------------------------------------------------------------------- /sphinxtest/urls.py: -------------------------------------------------------------------------------- 1 | from django.conf.urls.defaults import * 2 | 3 | urlpatterns = patterns('', 4 | # Example: 5 | # (r'^sphinxtest/', include('sphinxtest.foo.urls')), 6 | 7 | # Uncomment this for admin: 8 | # (r'^admin/', include('django.contrib.admin.urls')), 9 | ) 10 | --------------------------------------------------------------------------------