├── setup.py └── sphinxapi.py /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import os 3 | 4 | version_info = (0, 0, 1) 5 | __version__ = ".".join(map(str, version_info)) 6 | 7 | setup(name='sphinxapi', 8 | version=__version__, 9 | description="SphinxAPI", 10 | long_description="", 11 | keywords='', 12 | author='Andrew Aksyonoff', 13 | author_email='', 14 | url='http://sphinxsearch.com/', 15 | license='GPL', 16 | packages=find_packages(), 17 | include_package_data=True, 18 | zip_safe=False, 19 | ) 20 | -------------------------------------------------------------------------------- /sphinxapi.py: -------------------------------------------------------------------------------- 1 | # 2 | # $Id: sphinxapi.py 1775 2009-04-06 22:15:58Z shodan $ 3 | # 4 | # Python version of Sphinx searchd client (Python API) 5 | # 6 | # Copyright (c) 2006-2008, Andrew Aksyonoff 7 | # Copyright (c) 2006, Mike Osadnik 8 | # All rights reserved 9 | # 10 | # This program is free software; you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License. You should have 12 | # received a copy of the GPL license along with this program; if you 13 | # did not, you can find it at http://www.gnu.org/ 14 | # 15 | 16 | import sys 17 | import select 18 | import socket 19 | import re 20 | from struct import * 21 | 22 | 23 | # known searchd commands 24 | SEARCHD_COMMAND_SEARCH = 0 25 | SEARCHD_COMMAND_EXCERPT = 1 26 | SEARCHD_COMMAND_UPDATE = 2 27 | SEARCHD_COMMAND_KEYWORDS= 3 28 | SEARCHD_COMMAND_PERSIST = 4 29 | 30 | # current client-side command implementation versions 31 | VER_COMMAND_SEARCH = 0x116 32 | VER_COMMAND_EXCERPT = 0x100 33 | VER_COMMAND_UPDATE = 0x101 34 | VER_COMMAND_KEYWORDS = 0x100 35 | 36 | # known searchd status codes 37 | SEARCHD_OK = 0 38 | SEARCHD_ERROR = 1 39 | SEARCHD_RETRY = 2 40 | SEARCHD_WARNING = 3 41 | 42 | # known match modes 43 | SPH_MATCH_ALL = 0 44 | SPH_MATCH_ANY = 1 45 | SPH_MATCH_PHRASE = 2 46 | SPH_MATCH_BOOLEAN = 3 47 | SPH_MATCH_EXTENDED = 4 48 | SPH_MATCH_FULLSCAN = 5 49 | SPH_MATCH_EXTENDED2 = 6 50 | 51 | # known ranking modes (extended2 mode only) 52 | SPH_RANK_PROXIMITY_BM25 = 0 # default mode, phrase proximity major factor and BM25 minor one 53 | SPH_RANK_BM25 = 1 # statistical mode, BM25 ranking only (faster but worse quality) 54 | SPH_RANK_NONE = 2 # no ranking, all matches get a weight of 1 55 | SPH_RANK_WORDCOUNT = 3 # simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts 56 | 57 | # known sort modes 58 | SPH_SORT_RELEVANCE = 0 59 | SPH_SORT_ATTR_DESC = 1 60 | SPH_SORT_ATTR_ASC = 2 61 | SPH_SORT_TIME_SEGMENTS = 3 62 | SPH_SORT_EXTENDED = 4 63 | SPH_SORT_EXPR = 5 64 | 65 | # known filter types 66 | SPH_FILTER_VALUES = 0 67 | SPH_FILTER_RANGE = 1 68 | SPH_FILTER_FLOATRANGE = 2 69 | 70 | # known attribute types 71 | SPH_ATTR_NONE = 0 72 | SPH_ATTR_INTEGER = 1 73 | SPH_ATTR_TIMESTAMP = 2 74 | SPH_ATTR_ORDINAL = 3 75 | SPH_ATTR_BOOL = 4 76 | SPH_ATTR_FLOAT = 5 77 | SPH_ATTR_BIGINT = 6 78 | SPH_ATTR_MULTI = 0X40000000L 79 | 80 | SPH_ATTR_TYPES = (SPH_ATTR_NONE, 81 | SPH_ATTR_INTEGER, 82 | SPH_ATTR_TIMESTAMP, 83 | SPH_ATTR_ORDINAL, 84 | SPH_ATTR_BOOL, 85 | SPH_ATTR_FLOAT, 86 | SPH_ATTR_BIGINT, 87 | SPH_ATTR_MULTI) 88 | 89 | # known grouping functions 90 | SPH_GROUPBY_DAY = 0 91 | SPH_GROUPBY_WEEK = 1 92 | SPH_GROUPBY_MONTH = 2 93 | SPH_GROUPBY_YEAR = 3 94 | SPH_GROUPBY_ATTR = 4 95 | SPH_GROUPBY_ATTRPAIR = 5 96 | 97 | 98 | class SphinxClient: 99 | def __init__ (self): 100 | """ 101 | Create a new client object, and fill defaults. 102 | """ 103 | self._host = 'localhost' # searchd host (default is "localhost") 104 | self._port = 3312 # searchd port (default is 3312) 105 | self._path = None # searchd unix-domain socket path 106 | self._socket = None 107 | self._offset = 0 # how much records to seek from result-set start (default is 0) 108 | self._limit = 20 # how much records to return from result-set starting at offset (default is 20) 109 | self._mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL) 110 | self._weights = [] # per-field weights (default is 1 for all fields) 111 | self._sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE) 112 | self._sortby = '' # attribute to sort by (defualt is "") 113 | self._min_id = 0 # min ID to match (default is 0) 114 | self._max_id = 0 # max ID to match (default is UINT_MAX) 115 | self._filters = [] # search filters 116 | self._groupby = '' # group-by attribute name 117 | self._groupfunc = SPH_GROUPBY_DAY # group-by function (to pre-process group-by attribute value with) 118 | self._groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with) 119 | self._groupdistinct = '' # group-by count-distinct attribute 120 | self._maxmatches = 1000 # max matches to retrieve 121 | self._cutoff = 0 # cutoff to stop searching at 122 | self._retrycount = 0 # distributed retry count 123 | self._retrydelay = 0 # distributed retry delay 124 | self._anchor = {} # geographical anchor point 125 | self._indexweights = {} # per-index weights 126 | self._ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode 127 | self._maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit) 128 | self._fieldweights = {} # per-field-name weights 129 | self._overrides = {} # per-query attribute values overrides 130 | self._select = '*' # select-list (attributes or expressions, with optional aliases) 131 | 132 | self._error = '' # last error message 133 | self._warning = '' # last warning message 134 | self._reqs = [] # requests array for multi-query 135 | 136 | def __del__ (self): 137 | if self._socket: 138 | self._socket.close() 139 | 140 | 141 | def GetLastError (self): 142 | """ 143 | Get last error message (string). 144 | """ 145 | return self._error 146 | 147 | 148 | def GetLastWarning (self): 149 | """ 150 | Get last warning message (string). 151 | """ 152 | return self._warning 153 | 154 | 155 | def SetServer (self, host, port = None): 156 | """ 157 | Set searchd server host and port. 158 | """ 159 | assert(isinstance(host, str)) 160 | if host.startswith('/'): 161 | self._path = host 162 | return 163 | elif host.startswith('unix://'): 164 | self._path = host[7:] 165 | return 166 | assert(isinstance(port, int)) 167 | self._host = host 168 | self._port = port 169 | self._path = None 170 | 171 | 172 | def _Connect (self): 173 | """ 174 | INTERNAL METHOD, DO NOT CALL. Connects to searchd server. 175 | """ 176 | if self._socket: 177 | return self._socket 178 | try: 179 | if self._path: 180 | af = socket.AF_UNIX 181 | addr = self._path 182 | desc = self._path 183 | else: 184 | af = socket.AF_INET 185 | addr = ( self._host, self._port ) 186 | desc = '%s;%s' % addr 187 | sock = socket.socket ( af, socket.SOCK_STREAM ) 188 | sock.connect ( addr ) 189 | except socket.error, msg: 190 | if sock: 191 | sock.close() 192 | self._error = 'connection to %s failed (%s)' % ( desc, msg ) 193 | return 194 | 195 | v = unpack('>L', sock.recv(4)) 196 | if v<1: 197 | sock.close() 198 | self._error = 'expected searchd protocol version, got %s' % v 199 | return 200 | 201 | # all ok, send my version 202 | sock.send(pack('>L', 1)) 203 | return sock 204 | 205 | 206 | def _GetResponse (self, sock, client_ver): 207 | """ 208 | INTERNAL METHOD, DO NOT CALL. Gets and checks response packet from searchd server. 209 | """ 210 | (status, ver, length) = unpack('>2HL', sock.recv(8)) 211 | response = '' 212 | left = length 213 | while left>0: 214 | chunk = sock.recv(left) 215 | if chunk: 216 | response += chunk 217 | left -= len(chunk) 218 | else: 219 | break 220 | 221 | if not self._socket: 222 | sock.close() 223 | 224 | # check response 225 | read = len(response) 226 | if not response or read!=length: 227 | if length: 228 | self._error = 'failed to read searchd response (status=%s, ver=%s, len=%s, read=%s)' \ 229 | % (status, ver, length, read) 230 | else: 231 | self._error = 'received zero-sized searchd response' 232 | return None 233 | 234 | # check status 235 | if status==SEARCHD_WARNING: 236 | wend = 4 + unpack ( '>L', response[0:4] )[0] 237 | self._warning = response[4:wend] 238 | return response[wend:] 239 | 240 | if status==SEARCHD_ERROR: 241 | self._error = 'searchd error: '+response[4:] 242 | return None 243 | 244 | if status==SEARCHD_RETRY: 245 | self._error = 'temporary searchd error: '+response[4:] 246 | return None 247 | 248 | if status!=SEARCHD_OK: 249 | self._error = 'unknown status code %d' % status 250 | return None 251 | 252 | # check version 253 | if ver>8, ver&0xff, client_ver>>8, client_ver&0xff) 256 | 257 | return response 258 | 259 | 260 | def SetLimits (self, offset, limit, maxmatches=0, cutoff=0): 261 | """ 262 | Set offset and count into result set, and optionally set max-matches and cutoff limits. 263 | """ 264 | assert ( type(offset) in [int,long] and 0<=offset<16777216 ) 265 | assert ( type(limit) in [int,long] and 0=0) 267 | self._offset = offset 268 | self._limit = limit 269 | if maxmatches>0: 270 | self._maxmatches = maxmatches 271 | if cutoff>=0: 272 | self._cutoff = cutoff 273 | 274 | 275 | def SetMaxQueryTime (self, maxquerytime): 276 | """ 277 | Set maximum query time, in milliseconds, per-index. 0 means 'do not limit'. 278 | """ 279 | assert(isinstance(maxquerytime,int) and maxquerytime>0) 280 | self._maxquerytime = maxquerytime 281 | 282 | 283 | def SetMatchMode (self, mode): 284 | """ 285 | Set matching mode. 286 | """ 287 | assert(mode in [SPH_MATCH_ALL, SPH_MATCH_ANY, SPH_MATCH_PHRASE, SPH_MATCH_BOOLEAN, SPH_MATCH_EXTENDED, SPH_MATCH_FULLSCAN, SPH_MATCH_EXTENDED2]) 288 | self._mode = mode 289 | 290 | 291 | def SetRankingMode (self, ranker): 292 | """ 293 | Set ranking mode. 294 | """ 295 | assert(ranker in [SPH_RANK_PROXIMITY_BM25, SPH_RANK_BM25, SPH_RANK_NONE, SPH_RANK_WORDCOUNT]) 296 | self._ranker = ranker 297 | 298 | 299 | def SetSortMode ( self, mode, clause='' ): 300 | """ 301 | Set sorting mode. 302 | """ 303 | assert ( mode in [SPH_SORT_RELEVANCE, SPH_SORT_ATTR_DESC, SPH_SORT_ATTR_ASC, SPH_SORT_TIME_SEGMENTS, SPH_SORT_EXTENDED, SPH_SORT_EXPR] ) 304 | assert ( isinstance ( clause, str ) ) 305 | self._sort = mode 306 | self._sortby = clause 307 | 308 | 309 | def SetWeights (self, weights): 310 | """ 311 | Set per-field weights. 312 | WARNING, DEPRECATED; do not use it! use SetFieldWeights() instead 313 | """ 314 | assert(isinstance(weights, list)) 315 | for w in weights: 316 | assert(isinstance(w, int)) 317 | self._weights = weights 318 | 319 | 320 | def SetFieldWeights (self, weights): 321 | """ 322 | Bind per-field weights by name; expects (name,field_weight) dictionary as argument. 323 | """ 324 | assert(isinstance(weights,dict)) 325 | for key,val in weights.items(): 326 | assert(isinstance(key,str)) 327 | assert(isinstance(val,int)) 328 | self._fieldweights = weights 329 | 330 | 331 | def SetIndexWeights (self, weights): 332 | """ 333 | Bind per-index weights by name; expects (name,index_weight) dictionary as argument. 334 | """ 335 | assert(isinstance(weights,dict)) 336 | for key,val in weights.items(): 337 | assert(isinstance(key,str)) 338 | assert(isinstance(val,int)) 339 | self._indexweights = weights 340 | 341 | 342 | def SetIDRange (self, minid, maxid): 343 | """ 344 | Set IDs range to match. 345 | Only match records if document ID is beetwen $min and $max (inclusive). 346 | """ 347 | assert(isinstance(minid, (int, long))) 348 | assert(isinstance(maxid, (int, long))) 349 | assert(minid<=maxid) 350 | self._min_id = minid 351 | self._max_id = maxid 352 | 353 | 354 | def SetFilter ( self, attribute, values, exclude=0 ): 355 | """ 356 | Set values set filter. 357 | Only match records where 'attribute' value is in given 'values' set. 358 | """ 359 | assert(isinstance(attribute, str)) 360 | assert iter(values) 361 | 362 | for value in values: 363 | assert(isinstance(value, int)) 364 | 365 | self._filters.append ( { 'type':SPH_FILTER_VALUES, 'attr':attribute, 'exclude':exclude, 'values':values } ) 366 | 367 | 368 | def SetFilterRange (self, attribute, min_, max_, exclude=0 ): 369 | """ 370 | Set range filter. 371 | Only match records if 'attribute' value is beetwen 'min_' and 'max_' (inclusive). 372 | """ 373 | assert(isinstance(attribute, str)) 374 | assert(isinstance(min_, int)) 375 | assert(isinstance(max_, int)) 376 | assert(min_<=max_) 377 | 378 | self._filters.append ( { 'type':SPH_FILTER_RANGE, 'attr':attribute, 'exclude':exclude, 'min':min_, 'max':max_ } ) 379 | 380 | 381 | def SetFilterFloatRange (self, attribute, min_, max_, exclude=0 ): 382 | assert(isinstance(attribute,str)) 383 | assert(isinstance(min_,float)) 384 | assert(isinstance(max_,float)) 385 | assert(min_ <= max_) 386 | self._filters.append ( {'type':SPH_FILTER_FLOATRANGE, 'attr':attribute, 'exclude':exclude, 'min':min_, 'max':max_} ) 387 | 388 | 389 | def SetGeoAnchor (self, attrlat, attrlong, latitude, longitude): 390 | assert(isinstance(attrlat,str)) 391 | assert(isinstance(attrlong,str)) 392 | assert(isinstance(latitude,float)) 393 | assert(isinstance(longitude,float)) 394 | self._anchor['attrlat'] = attrlat 395 | self._anchor['attrlong'] = attrlong 396 | self._anchor['lat'] = latitude 397 | self._anchor['long'] = longitude 398 | 399 | 400 | def SetGroupBy ( self, attribute, func, groupsort='@group desc' ): 401 | """ 402 | Set grouping attribute and function. 403 | """ 404 | assert(isinstance(attribute, str)) 405 | assert(func in [SPH_GROUPBY_DAY, SPH_GROUPBY_WEEK, SPH_GROUPBY_MONTH, SPH_GROUPBY_YEAR, SPH_GROUPBY_ATTR, SPH_GROUPBY_ATTRPAIR] ) 406 | assert(isinstance(groupsort, str)) 407 | 408 | self._groupby = attribute 409 | self._groupfunc = func 410 | self._groupsort = groupsort 411 | 412 | 413 | def SetGroupDistinct (self, attribute): 414 | assert(isinstance(attribute,str)) 415 | self._groupdistinct = attribute 416 | 417 | 418 | def SetRetries (self, count, delay=0): 419 | assert(isinstance(count,int) and count>=0) 420 | assert(isinstance(delay,int) and delay>=0) 421 | self._retrycount = count 422 | self._retrydelay = delay 423 | 424 | 425 | def SetOverride (self, name, type, values): 426 | assert(isinstance(name, str)) 427 | assert(type in SPH_ATTR_TYPES) 428 | assert(isinstance(values, dict)) 429 | 430 | self._overrides[name] = {'name': name, 'type': type, 'values': values} 431 | 432 | def SetSelect (self, select): 433 | assert(isinstance(select, str)) 434 | self._select = select 435 | 436 | 437 | def ResetOverrides (self): 438 | self._overrides = {} 439 | 440 | 441 | def ResetFilters (self): 442 | """ 443 | Clear all filters (for multi-queries). 444 | """ 445 | self._filters = [] 446 | self._anchor = {} 447 | 448 | 449 | def ResetGroupBy (self): 450 | """ 451 | Clear groupby settings (for multi-queries). 452 | """ 453 | self._groupby = '' 454 | self._groupfunc = SPH_GROUPBY_DAY 455 | self._groupsort = '@group desc' 456 | self._groupdistinct = '' 457 | 458 | 459 | def Query (self, query, index='*', comment=''): 460 | """ 461 | Connect to searchd server and run given search query. 462 | Returns None on failure; result set hash on success (see documentation for details). 463 | """ 464 | assert(len(self._reqs)==0) 465 | self.AddQuery(query,index,comment) 466 | results = self.RunQueries() 467 | 468 | if not results or len(results)==0: 469 | return None 470 | self._error = results[0]['error'] 471 | self._warning = results[0]['warning'] 472 | if results[0]['status'] == SEARCHD_ERROR: 473 | return None 474 | return results[0] 475 | 476 | 477 | def AddQuery (self, query, index='*', comment=''): 478 | """ 479 | Add query to batch. 480 | """ 481 | # build request 482 | req = [pack('>5L', self._offset, self._limit, self._mode, self._ranker, self._sort)] 483 | req.append(pack('>L', len(self._sortby))) 484 | req.append(self._sortby) 485 | 486 | if isinstance(query,unicode): 487 | query = query.encode('utf-8') 488 | assert(isinstance(query,str)) 489 | 490 | req.append(pack('>L', len(query))) 491 | req.append(query) 492 | 493 | req.append(pack('>L', len(self._weights))) 494 | for w in self._weights: 495 | req.append(pack('>L', w)) 496 | req.append(pack('>L', len(index))) 497 | req.append(index) 498 | req.append(pack('>L',1)) # id64 range marker 499 | req.append(pack('>Q', self._min_id)) 500 | req.append(pack('>Q', self._max_id)) 501 | 502 | # filters 503 | req.append ( pack ( '>L', len(self._filters) ) ) 504 | for f in self._filters: 505 | req.append ( pack ( '>L', len(f['attr'])) + f['attr']) 506 | filtertype = f['type'] 507 | req.append ( pack ( '>L', filtertype)) 508 | if filtertype == SPH_FILTER_VALUES: 509 | req.append ( pack ('>L', len(f['values']))) 510 | for val in f['values']: 511 | req.append ( pack ('>q', val)) 512 | elif filtertype == SPH_FILTER_RANGE: 513 | req.append ( pack ('>2q', f['min'], f['max'])) 514 | elif filtertype == SPH_FILTER_FLOATRANGE: 515 | req.append ( pack ('>2f', f['min'], f['max'])) 516 | req.append ( pack ( '>L', f['exclude'] ) ) 517 | 518 | # group-by, max-matches, group-sort 519 | req.append ( pack ( '>2L', self._groupfunc, len(self._groupby) ) ) 520 | req.append ( self._groupby ) 521 | req.append ( pack ( '>2L', self._maxmatches, len(self._groupsort) ) ) 522 | req.append ( self._groupsort ) 523 | req.append ( pack ( '>LLL', self._cutoff, self._retrycount, self._retrydelay)) 524 | req.append ( pack ( '>L', len(self._groupdistinct))) 525 | req.append ( self._groupdistinct) 526 | 527 | # anchor point 528 | if len(self._anchor) == 0: 529 | req.append ( pack ('>L', 0)) 530 | else: 531 | attrlat, attrlong = self._anchor['attrlat'], self._anchor['attrlong'] 532 | latitude, longitude = self._anchor['lat'], self._anchor['long'] 533 | req.append ( pack ('>L', 1)) 534 | req.append ( pack ('>L', len(attrlat)) + attrlat) 535 | req.append ( pack ('>L', len(attrlong)) + attrlong) 536 | req.append ( pack ('>f', latitude) + pack ('>f', longitude)) 537 | 538 | # per-index weights 539 | req.append ( pack ('>L',len(self._indexweights))) 540 | for indx,weight in self._indexweights.items(): 541 | req.append ( pack ('>L',len(indx)) + indx + pack ('>L',weight)) 542 | 543 | # max query time 544 | req.append ( pack ('>L', self._maxquerytime) ) 545 | 546 | # per-field weights 547 | req.append ( pack ('>L',len(self._fieldweights) ) ) 548 | for field,weight in self._fieldweights.items(): 549 | req.append ( pack ('>L',len(field)) + field + pack ('>L',weight) ) 550 | 551 | # comment 552 | req.append ( pack('>L',len(comment)) + comment ) 553 | 554 | # attribute overrides 555 | req.append ( pack('>L', len(self._overrides)) ) 556 | for v in self._overrides.values(): 557 | req.extend ( ( pack('>L', len(v['name'])), v['name'] ) ) 558 | req.append ( pack('>LL', v['type'], len(v['values'])) ) 559 | for id, value in v['values'].iteritems(): 560 | req.append ( pack('>Q', id) ) 561 | if v['type'] == SPH_ATTR_FLOAT: 562 | req.append ( pack('>f', value) ) 563 | elif v['type'] == SPH_ATTR_BIGINT: 564 | req.append ( pack('>q', value) ) 565 | else: 566 | req.append ( pack('>l', value) ) 567 | 568 | # select-list 569 | req.append ( pack('>L', len(self._select)) ) 570 | req.append ( self._select ) 571 | 572 | # send query, get response 573 | req = ''.join(req) 574 | 575 | self._reqs.append(req) 576 | return 577 | 578 | 579 | def RunQueries (self): 580 | """ 581 | Run queries batch. 582 | Returns None on network IO failure; or an array of result set hashes on success. 583 | """ 584 | if len(self._reqs)==0: 585 | self._error = 'no queries defined, issue AddQuery() first' 586 | return None 587 | 588 | sock = self._Connect() 589 | if not sock: 590 | return None 591 | 592 | req = ''.join(self._reqs) 593 | length = len(req)+4 594 | req = pack('>HHLL', SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, length, len(self._reqs))+req 595 | sock.send(req) 596 | 597 | response = self._GetResponse(sock, VER_COMMAND_SEARCH) 598 | if not response: 599 | return None 600 | 601 | nreqs = len(self._reqs) 602 | 603 | # parse response 604 | max_ = len(response) 605 | p = 0 606 | 607 | results = [] 608 | for i in range(0,nreqs,1): 609 | result = {} 610 | results.append(result) 611 | 612 | result['error'] = '' 613 | result['warning'] = '' 614 | status = unpack('>L', response[p:p+4])[0] 615 | p += 4 616 | result['status'] = status 617 | if status != SEARCHD_OK: 618 | length = unpack('>L', response[p:p+4])[0] 619 | p += 4 620 | message = response[p:p+length] 621 | p += length 622 | 623 | if status == SEARCHD_WARNING: 624 | result['warning'] = message 625 | else: 626 | result['error'] = message 627 | continue 628 | 629 | # read schema 630 | fields = [] 631 | attrs = [] 632 | 633 | nfields = unpack('>L', response[p:p+4])[0] 634 | p += 4 635 | while nfields>0 and pL', response[p:p+4])[0] 638 | p += 4 639 | fields.append(response[p:p+length]) 640 | p += length 641 | 642 | result['fields'] = fields 643 | 644 | nattrs = unpack('>L', response[p:p+4])[0] 645 | p += 4 646 | while nattrs>0 and pL', response[p:p+4])[0] 649 | p += 4 650 | attr = response[p:p+length] 651 | p += length 652 | type_ = unpack('>L', response[p:p+4])[0] 653 | p += 4 654 | attrs.append([attr,type_]) 655 | 656 | result['attrs'] = attrs 657 | 658 | # read match count 659 | count = unpack('>L', response[p:p+4])[0] 660 | p += 4 661 | id64 = unpack('>L', response[p:p+4])[0] 662 | p += 4 663 | 664 | # read matches 665 | result['matches'] = [] 666 | while count>0 and pQL', response[p:p+12]) 670 | p += 12 671 | else: 672 | doc, weight = unpack('>2L', response[p:p+8]) 673 | p += 8 674 | 675 | match = { 'id':doc, 'weight':weight, 'attrs':{} } 676 | for i in range(len(attrs)): 677 | if attrs[i][1] == SPH_ATTR_FLOAT: 678 | match['attrs'][attrs[i][0]] = unpack('>f', response[p:p+4])[0] 679 | elif attrs[i][1] == SPH_ATTR_BIGINT: 680 | match['attrs'][attrs[i][0]] = unpack('>q', response[p:p+8])[0] 681 | p += 4 682 | elif attrs[i][1] == (SPH_ATTR_MULTI | SPH_ATTR_INTEGER): 683 | match['attrs'][attrs[i][0]] = [] 684 | nvals = unpack('>L', response[p:p+4])[0] 685 | p += 4 686 | for n in range(0,nvals,1): 687 | match['attrs'][attrs[i][0]].append(unpack('>L', response[p:p+4])[0]) 688 | p += 4 689 | p -= 4 690 | else: 691 | match['attrs'][attrs[i][0]] = unpack('>L', response[p:p+4])[0] 692 | p += 4 693 | 694 | result['matches'].append ( match ) 695 | 696 | result['total'], result['total_found'], result['time'], words = unpack('>4L', response[p:p+16]) 697 | 698 | result['time'] = '%.3f' % (result['time']/1000.0) 699 | p += 16 700 | 701 | result['words'] = [] 702 | while words>0: 703 | words -= 1 704 | length = unpack('>L', response[p:p+4])[0] 705 | p += 4 706 | word = response[p:p+length] 707 | p += length 708 | docs, hits = unpack('>2L', response[p:p+8]) 709 | p += 8 710 | 711 | result['words'].append({'word':word, 'docs':docs, 'hits':hits}) 712 | 713 | self._reqs = [] 714 | return results 715 | 716 | 717 | def BuildExcerpts (self, docs, index, words, opts=None): 718 | """ 719 | Connect to searchd server and generate exceprts from given documents. 720 | """ 721 | if not opts: 722 | opts = {} 723 | if isinstance(words,unicode): 724 | words = words.encode('utf-8') 725 | 726 | assert(isinstance(docs, list)) 727 | assert(isinstance(index, str)) 728 | assert(isinstance(words, str)) 729 | assert(isinstance(opts, dict)) 730 | 731 | sock = self._Connect() 732 | 733 | if not sock: 734 | return None 735 | 736 | # fixup options 737 | opts.setdefault('before_match', '') 738 | opts.setdefault('after_match', '') 739 | opts.setdefault('chunk_separator', ' ... ') 740 | opts.setdefault('limit', 256) 741 | opts.setdefault('around', 5) 742 | 743 | # build request 744 | # v.1.0 req 745 | 746 | flags = 1 # (remove spaces) 747 | if opts.get('exact_phrase'): flags |= 2 748 | if opts.get('single_passage'): flags |= 4 749 | if opts.get('use_boundaries'): flags |= 8 750 | if opts.get('weight_order'): flags |= 16 751 | 752 | # mode=0, flags 753 | req = [pack('>2L', 0, flags)] 754 | 755 | # req index 756 | req.append(pack('>L', len(index))) 757 | req.append(index) 758 | 759 | # req words 760 | req.append(pack('>L', len(words))) 761 | req.append(words) 762 | 763 | # options 764 | req.append(pack('>L', len(opts['before_match']))) 765 | req.append(opts['before_match']) 766 | 767 | req.append(pack('>L', len(opts['after_match']))) 768 | req.append(opts['after_match']) 769 | 770 | req.append(pack('>L', len(opts['chunk_separator']))) 771 | req.append(opts['chunk_separator']) 772 | 773 | req.append(pack('>L', int(opts['limit']))) 774 | req.append(pack('>L', int(opts['around']))) 775 | 776 | # documents 777 | req.append(pack('>L', len(docs))) 778 | for doc in docs: 779 | if isinstance(doc,unicode): 780 | doc = doc.encode('utf-8') 781 | assert(isinstance(doc, str)) 782 | req.append(pack('>L', len(doc))) 783 | req.append(doc) 784 | 785 | req = ''.join(req) 786 | 787 | # send query, get response 788 | length = len(req) 789 | 790 | # add header 791 | req = pack('>2HL', SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, length)+req 792 | wrote = sock.send(req) 793 | 794 | response = self._GetResponse(sock, VER_COMMAND_EXCERPT ) 795 | if not response: 796 | return [] 797 | 798 | # parse response 799 | pos = 0 800 | res = [] 801 | rlen = len(response) 802 | 803 | for i in range(len(docs)): 804 | length = unpack('>L', response[pos:pos+4])[0] 805 | pos += 4 806 | 807 | if pos+length > rlen: 808 | self._error = 'incomplete reply' 809 | return [] 810 | 811 | res.append(response[pos:pos+length]) 812 | pos += length 813 | 814 | return res 815 | 816 | 817 | def UpdateAttributes ( self, index, attrs, values ): 818 | """ 819 | Update given attribute values on given documents in given indexes. 820 | Returns amount of updated documents (0 or more) on success, or -1 on failure. 821 | 822 | 'attrs' must be a list of strings. 823 | 'values' must be a dict with int key (document ID) and list of int values (new attribute values). 824 | 825 | Example: 826 | res = cl.UpdateAttributes ( 'test1', [ 'group_id', 'date_added' ], { 2:[123,1000000000], 4:[456,1234567890] } ) 827 | """ 828 | assert ( isinstance ( index, str ) ) 829 | assert ( isinstance ( attrs, list ) ) 830 | assert ( isinstance ( values, dict ) ) 831 | for attr in attrs: 832 | assert ( isinstance ( attr, str ) ) 833 | for docid, entry in values.items(): 834 | assert ( isinstance ( docid, int ) ) 835 | assert ( isinstance ( entry, list ) ) 836 | assert ( len(attrs)==len(entry) ) 837 | for val in entry: 838 | assert ( isinstance ( val, int ) ) 839 | 840 | # build request 841 | req = [ pack('>L',len(index)), index ] 842 | 843 | req.append ( pack('>L',len(attrs)) ) 844 | for attr in attrs: 845 | req.append ( pack('>L',len(attr)) + attr ) 846 | 847 | req.append ( pack('>L',len(values)) ) 848 | for docid, entry in values.items(): 849 | req.append ( pack('>Q',docid) ) 850 | for val in entry: 851 | req.append ( pack('>L',val) ) 852 | 853 | # connect, send query, get response 854 | sock = self._Connect() 855 | if not sock: 856 | return None 857 | 858 | req = ''.join(req) 859 | length = len(req) 860 | req = pack ( '>2HL', SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, length ) + req 861 | wrote = sock.send ( req ) 862 | 863 | response = self._GetResponse ( sock, VER_COMMAND_UPDATE ) 864 | if not response: 865 | return -1 866 | 867 | # parse response 868 | updated = unpack ( '>L', response[0:4] )[0] 869 | return updated 870 | 871 | 872 | def BuildKeywords ( self, query, index, hits ): 873 | """ 874 | Connect to searchd server, and generate keywords list for a given query. 875 | Returns None on failure, or a list of keywords on success. 876 | """ 877 | assert ( isinstance ( query, str ) ) 878 | assert ( isinstance ( index, str ) ) 879 | assert ( isinstance ( hits, int ) ) 880 | 881 | # build request 882 | req = [ pack ( '>L', len(query) ) + query ] 883 | req.append ( pack ( '>L', len(index) ) + index ) 884 | req.append ( pack ( '>L', hits ) ) 885 | 886 | # connect, send query, get response 887 | sock = self._Connect() 888 | if not sock: 889 | return None 890 | 891 | req = ''.join(req) 892 | length = len(req) 893 | req = pack ( '>2HL', SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, length ) + req 894 | wrote = sock.send ( req ) 895 | 896 | response = self._GetResponse ( sock, VER_COMMAND_KEYWORDS ) 897 | if not response: 898 | return None 899 | 900 | # parse response 901 | res = [] 902 | 903 | nwords = unpack ( '>L', response[0:4] )[0] 904 | p = 4 905 | max_ = len(response) 906 | 907 | while nwords>0 and pL', response[p:p+4] )[0] 911 | p += 4 912 | tokenized = response[p:p+length] 913 | p += length 914 | 915 | length = unpack ( '>L', response[p:p+4] )[0] 916 | p += 4 917 | normalized = response[p:p+length] 918 | p += length 919 | 920 | entry = { 'tokenized':tokenized, 'normalized':normalized } 921 | if hits: 922 | entry['docs'], entry['hits'] = unpack ( '>2L', response[p:p+8] ) 923 | p += 8 924 | 925 | res.append ( entry ) 926 | 927 | if nwords>0 or p>max_: 928 | self._error = 'incomplete reply' 929 | return None 930 | 931 | return res 932 | 933 | ### persistent connections 934 | 935 | def Open(self): 936 | if self._socket: 937 | self._error = 'already connected' 938 | return 939 | 940 | server = self._Connect() 941 | if not server: 942 | return 943 | 944 | # command, command version = 0, body length = 4, body = 1 945 | request = pack ( '>hhII', SEARCHD_COMMAND_PERSIST, 0, 4, 1 ) 946 | server.send ( request ) 947 | 948 | self._socket = server 949 | 950 | def Close(self): 951 | if not self._socket: 952 | self._error = 'not connected' 953 | return 954 | self._socket.close() 955 | self._socket = None 956 | 957 | def EscapeString(self, string): 958 | return re.sub(r"([=\(\)|\-!@~\"&/\\\^\$\=])", r"\\\1", string) 959 | 960 | # 961 | # $Id: sphinxapi.py 1775 2009-04-06 22:15:58Z shodan $ 962 | # 963 | --------------------------------------------------------------------------------