├── README.md ├── composer.json └── sphinxapi.php /README.md: -------------------------------------------------------------------------------- 1 | # SphinxSearch PHP API 2 | 3 | [SphinxSearch](http://sphinxsearch.com/) is a powerful search engine to index MySQL and PostgreSQL 4 | databases. 5 | 6 | This repository is used to make SphinxSearch API PHP Client available through 7 | [composer](http://getcomposer.org/). 8 | 9 | # License 10 | 11 | This is licensed under the GNU General Public License 12 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "neutron/sphinxsearch-api", 3 | "description": "SphinxSearch PHP API", 4 | "require": { 5 | "php": ">=5" 6 | }, 7 | "license": "GPL-2.0", 8 | "type": "library", 9 | "keywords": ["Sphinx Search","search-engine","api"], 10 | "homepage": "http://sphinxsearch.com/", 11 | "authors": [ 12 | { 13 | "name": "Andrew Aksyonoff", 14 | "homepage": "http://sphinxsearch.com/" 15 | } 16 | ], 17 | "autoload": { 18 | "classmap": ["sphinxapi.php"] 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /sphinxapi.php: -------------------------------------------------------------------------------- 1 | =8 ) 132 | { 133 | $v = (int)$v; 134 | return pack ( "NN", $v>>32, $v&0xFFFFFFFF ); 135 | } 136 | 137 | // x32, int 138 | if ( is_int($v) ) 139 | return pack ( "NN", $v < 0 ? -1 : 0, $v ); 140 | 141 | // x32, bcmath 142 | if ( function_exists("bcmul") ) 143 | { 144 | if ( bccomp ( $v, 0 ) == -1 ) 145 | $v = bcadd ( "18446744073709551616", $v ); 146 | $h = bcdiv ( $v, "4294967296", 0 ); 147 | $l = bcmod ( $v, "4294967296" ); 148 | return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit 149 | } 150 | 151 | // x32, no-bcmath 152 | $p = max(0, strlen($v) - 13); 153 | $lo = abs((float)substr($v, $p)); 154 | $hi = abs((float)substr($v, 0, $p)); 155 | 156 | $m = $lo + $hi*1316134912.0; // (10 ^ 13) % (1 << 32) = 1316134912 157 | $q = floor($m/4294967296.0); 158 | $l = $m - ($q*4294967296.0); 159 | $h = $hi*2328.0 + $q; // (10 ^ 13) / (1 << 32) = 2328 160 | 161 | if ( $v<0 ) 162 | { 163 | if ( $l==0 ) 164 | $h = 4294967296.0 - $h; 165 | else 166 | { 167 | $h = 4294967295.0 - $h; 168 | $l = 4294967296.0 - $l; 169 | } 170 | } 171 | return pack ( "NN", $h, $l ); 172 | } 173 | 174 | /// pack 64-bit unsigned 175 | function sphPackU64 ( $v ) 176 | { 177 | assert ( is_numeric($v) ); 178 | 179 | // x64 180 | if ( PHP_INT_SIZE>=8 ) 181 | { 182 | assert ( $v>=0 ); 183 | 184 | // x64, int 185 | if ( is_int($v) ) 186 | return pack ( "NN", $v>>32, $v&0xFFFFFFFF ); 187 | 188 | // x64, bcmath 189 | if ( function_exists("bcmul") ) 190 | { 191 | $h = bcdiv ( $v, 4294967296, 0 ); 192 | $l = bcmod ( $v, 4294967296 ); 193 | return pack ( "NN", $h, $l ); 194 | } 195 | 196 | // x64, no-bcmath 197 | $p = max ( 0, strlen($v) - 13 ); 198 | $lo = (int)substr ( $v, $p ); 199 | $hi = (int)substr ( $v, 0, $p ); 200 | 201 | $m = $lo + $hi*1316134912; 202 | $l = $m % 4294967296; 203 | $h = $hi*2328 + (int)($m/4294967296); 204 | 205 | return pack ( "NN", $h, $l ); 206 | } 207 | 208 | // x32, int 209 | if ( is_int($v) ) 210 | return pack ( "NN", 0, $v ); 211 | 212 | // x32, bcmath 213 | if ( function_exists("bcmul") ) 214 | { 215 | $h = bcdiv ( $v, "4294967296", 0 ); 216 | $l = bcmod ( $v, "4294967296" ); 217 | return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit 218 | } 219 | 220 | // x32, no-bcmath 221 | $p = max(0, strlen($v) - 13); 222 | $lo = (float)substr($v, $p); 223 | $hi = (float)substr($v, 0, $p); 224 | 225 | $m = $lo + $hi*1316134912.0; 226 | $q = floor($m / 4294967296.0); 227 | $l = $m - ($q * 4294967296.0); 228 | $h = $hi*2328.0 + $q; 229 | 230 | return pack ( "NN", $h, $l ); 231 | } 232 | 233 | // unpack 64-bit unsigned 234 | function sphUnpackU64 ( $v ) 235 | { 236 | list ( $hi, $lo ) = array_values ( unpack ( "N*N*", $v ) ); 237 | 238 | if ( PHP_INT_SIZE>=8 ) 239 | { 240 | if ( $hi<0 ) $hi += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again 241 | if ( $lo<0 ) $lo += (1<<32); 242 | 243 | // x64, int 244 | if ( $hi<=2147483647 ) 245 | return ($hi<<32) + $lo; 246 | 247 | // x64, bcmath 248 | if ( function_exists("bcmul") ) 249 | return bcadd ( $lo, bcmul ( $hi, "4294967296" ) ); 250 | 251 | // x64, no-bcmath 252 | $C = 100000; 253 | $h = ((int)($hi / $C) << 32) + (int)($lo / $C); 254 | $l = (($hi % $C) << 32) + ($lo % $C); 255 | if ( $l>$C ) 256 | { 257 | $h += (int)($l / $C); 258 | $l = $l % $C; 259 | } 260 | 261 | if ( $h==0 ) 262 | return $l; 263 | return sprintf ( "%d%05d", $h, $l ); 264 | } 265 | 266 | // x32, int 267 | if ( $hi==0 ) 268 | { 269 | if ( $lo>0 ) 270 | return $lo; 271 | return sprintf ( "%u", $lo ); 272 | } 273 | 274 | $hi = sprintf ( "%u", $hi ); 275 | $lo = sprintf ( "%u", $lo ); 276 | 277 | // x32, bcmath 278 | if ( function_exists("bcmul") ) 279 | return bcadd ( $lo, bcmul ( $hi, "4294967296" ) ); 280 | 281 | // x32, no-bcmath 282 | $hi = (float)$hi; 283 | $lo = (float)$lo; 284 | 285 | $q = floor($hi/10000000.0); 286 | $r = $hi - $q*10000000.0; 287 | $m = $lo + $r*4967296.0; 288 | $mq = floor($m/10000000.0); 289 | $l = $m - $mq*10000000.0; 290 | $h = $q*4294967296.0 + $r*429.0 + $mq; 291 | 292 | $h = sprintf ( "%.0f", $h ); 293 | $l = sprintf ( "%07.0f", $l ); 294 | if ( $h=="0" ) 295 | return sprintf( "%.0f", (float)$l ); 296 | return $h . $l; 297 | } 298 | 299 | // unpack 64-bit signed 300 | function sphUnpackI64 ( $v ) 301 | { 302 | list ( $hi, $lo ) = array_values ( unpack ( "N*N*", $v ) ); 303 | 304 | // x64 305 | if ( PHP_INT_SIZE>=8 ) 306 | { 307 | if ( $hi<0 ) $hi += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again 308 | if ( $lo<0 ) $lo += (1<<32); 309 | 310 | return ($hi<<32) + $lo; 311 | } 312 | 313 | // x32, int 314 | if ( $hi==0 ) 315 | { 316 | if ( $lo>0 ) 317 | return $lo; 318 | return sprintf ( "%u", $lo ); 319 | } 320 | // x32, int 321 | elseif ( $hi==-1 ) 322 | { 323 | if ( $lo<0 ) 324 | return $lo; 325 | return sprintf ( "%.0f", $lo - 4294967296.0 ); 326 | } 327 | 328 | $neg = ""; 329 | $c = 0; 330 | if ( $hi<0 ) 331 | { 332 | $hi = ~$hi; 333 | $lo = ~$lo; 334 | $c = 1; 335 | $neg = "-"; 336 | } 337 | 338 | $hi = sprintf ( "%u", $hi ); 339 | $lo = sprintf ( "%u", $lo ); 340 | 341 | // x32, bcmath 342 | if ( function_exists("bcmul") ) 343 | return $neg . bcadd ( bcadd ( $lo, bcmul ( $hi, "4294967296" ) ), $c ); 344 | 345 | // x32, no-bcmath 346 | $hi = (float)$hi; 347 | $lo = (float)$lo; 348 | 349 | $q = floor($hi/10000000.0); 350 | $r = $hi - $q*10000000.0; 351 | $m = $lo + $r*4967296.0; 352 | $mq = floor($m/10000000.0); 353 | $l = $m - $mq*10000000.0 + $c; 354 | $h = $q*4294967296.0 + $r*429.0 + $mq; 355 | if ( $l==10000000 ) 356 | { 357 | $l = 0; 358 | $h += 1; 359 | } 360 | 361 | $h = sprintf ( "%.0f", $h ); 362 | $l = sprintf ( "%07.0f", $l ); 363 | if ( $h=="0" ) 364 | return $neg . sprintf( "%.0f", (float)$l ); 365 | return $neg . $h . $l; 366 | } 367 | 368 | 369 | function sphFixUint ( $value ) 370 | { 371 | if ( PHP_INT_SIZE>=8 ) 372 | { 373 | // x64 route, workaround broken unpack() in 5.2.2+ 374 | if ( $value<0 ) $value += (1<<32); 375 | return $value; 376 | } 377 | else 378 | { 379 | // x32 route, workaround php signed/unsigned braindamage 380 | return sprintf ( "%u", $value ); 381 | } 382 | } 383 | 384 | 385 | /// sphinx searchd client class 386 | class SphinxClient 387 | { 388 | var $_host; ///< searchd host (default is "localhost") 389 | var $_port; ///< searchd port (default is 9312) 390 | var $_offset; ///< how many records to seek from result-set start (default is 0) 391 | var $_limit; ///< how many records to return from result-set starting at offset (default is 20) 392 | var $_mode; ///< query matching mode (default is SPH_MATCH_ALL) 393 | var $_weights; ///< per-field weights (default is 1 for all fields) 394 | var $_sort; ///< match sorting mode (default is SPH_SORT_RELEVANCE) 395 | var $_sortby; ///< attribute to sort by (defualt is "") 396 | var $_min_id; ///< min ID to match (default is 0, which means no limit) 397 | var $_max_id; ///< max ID to match (default is 0, which means no limit) 398 | var $_filters; ///< search filters 399 | var $_groupby; ///< group-by attribute name 400 | var $_groupfunc; ///< group-by function (to pre-process group-by attribute value with) 401 | var $_groupsort; ///< group-by sorting clause (to sort groups in result set with) 402 | var $_groupdistinct;///< group-by count-distinct attribute 403 | var $_maxmatches; ///< max matches to retrieve 404 | var $_cutoff; ///< cutoff to stop searching at (default is 0) 405 | var $_retrycount; ///< distributed retries count 406 | var $_retrydelay; ///< distributed retries delay 407 | var $_anchor; ///< geographical anchor point 408 | var $_indexweights; ///< per-index weights 409 | var $_ranker; ///< ranking mode (default is SPH_RANK_PROXIMITY_BM25) 410 | var $_rankexpr; ///< ranking mode expression (for SPH_RANK_EXPR) 411 | var $_maxquerytime; ///< max query time, milliseconds (default is 0, do not limit) 412 | var $_fieldweights; ///< per-field-name weights 413 | var $_overrides; ///< per-query attribute values overrides 414 | var $_select; ///< select-list (attributes or expressions, with optional aliases) 415 | 416 | var $_error; ///< last error message 417 | var $_warning; ///< last warning message 418 | var $_connerror; ///< connection error vs remote error flag 419 | 420 | var $_reqs; ///< requests array for multi-query 421 | var $_mbenc; ///< stored mbstring encoding 422 | var $_arrayresult; ///< whether $result["matches"] should be a hash or an array 423 | var $_timeout; ///< connect timeout 424 | 425 | ///////////////////////////////////////////////////////////////////////////// 426 | // common stuff 427 | ///////////////////////////////////////////////////////////////////////////// 428 | 429 | /// create a new client object and fill defaults 430 | function __construct () 431 | { 432 | // per-client-object settings 433 | $this->_host = "localhost"; 434 | $this->_port = 9312; 435 | $this->_path = false; 436 | $this->_socket = false; 437 | 438 | // per-query settings 439 | $this->_offset = 0; 440 | $this->_limit = 20; 441 | $this->_mode = SPH_MATCH_ALL; 442 | $this->_weights = array (); 443 | $this->_sort = SPH_SORT_RELEVANCE; 444 | $this->_sortby = ""; 445 | $this->_min_id = 0; 446 | $this->_max_id = 0; 447 | $this->_filters = array (); 448 | $this->_groupby = ""; 449 | $this->_groupfunc = SPH_GROUPBY_DAY; 450 | $this->_groupsort = "@group desc"; 451 | $this->_groupdistinct= ""; 452 | $this->_maxmatches = 1000; 453 | $this->_cutoff = 0; 454 | $this->_retrycount = 0; 455 | $this->_retrydelay = 0; 456 | $this->_anchor = array (); 457 | $this->_indexweights= array (); 458 | $this->_ranker = SPH_RANK_PROXIMITY_BM25; 459 | $this->_rankexpr = ""; 460 | $this->_maxquerytime= 0; 461 | $this->_fieldweights= array(); 462 | $this->_overrides = array(); 463 | $this->_select = "*"; 464 | 465 | $this->_error = ""; // per-reply fields (for single-query case) 466 | $this->_warning = ""; 467 | $this->_connerror = false; 468 | 469 | $this->_reqs = array (); // requests storage (for multi-query case) 470 | $this->_mbenc = ""; 471 | $this->_arrayresult = false; 472 | $this->_timeout = 0; 473 | } 474 | 475 | function __destruct() 476 | { 477 | if ( $this->_socket !== false ) 478 | fclose ( $this->_socket ); 479 | } 480 | 481 | /// get last error message (string) 482 | function GetLastError () 483 | { 484 | return $this->_error; 485 | } 486 | 487 | /// get last warning message (string) 488 | function GetLastWarning () 489 | { 490 | return $this->_warning; 491 | } 492 | 493 | /// get last error flag (to tell network connection errors from searchd errors or broken responses) 494 | function IsConnectError() 495 | { 496 | return $this->_connerror; 497 | } 498 | 499 | /// set searchd host name (string) and port (integer) 500 | function SetServer ( $host, $port = 0 ) 501 | { 502 | assert ( is_string($host) ); 503 | if ( $host[0] == '/') 504 | { 505 | $this->_path = 'unix://' . $host; 506 | return; 507 | } 508 | if ( substr ( $host, 0, 7 )=="unix://" ) 509 | { 510 | $this->_path = $host; 511 | return; 512 | } 513 | 514 | $this->_host = $host; 515 | $port = intval($port); 516 | assert ( 0<=$port && $port<65536 ); 517 | $this->_port = ( $port==0 ) ? 9312 : $port; 518 | $this->_path = ''; 519 | } 520 | 521 | /// set server connection timeout (0 to remove) 522 | function SetConnectTimeout ( $timeout ) 523 | { 524 | assert ( is_numeric($timeout) ); 525 | $this->_timeout = $timeout; 526 | } 527 | 528 | 529 | function _Send ( $handle, $data, $length ) 530 | { 531 | if ( feof($handle) || fwrite ( $handle, $data, $length ) !== $length ) 532 | { 533 | $this->_error = 'connection unexpectedly closed (timed out?)'; 534 | $this->_connerror = true; 535 | return false; 536 | } 537 | return true; 538 | } 539 | 540 | ///////////////////////////////////////////////////////////////////////////// 541 | 542 | /// enter mbstring workaround mode 543 | function _MBPush () 544 | { 545 | $this->_mbenc = ""; 546 | if ( ini_get ( "mbstring.func_overload" ) & 2 ) 547 | { 548 | $this->_mbenc = mb_internal_encoding(); 549 | mb_internal_encoding ( "latin1" ); 550 | } 551 | } 552 | 553 | /// leave mbstring workaround mode 554 | function _MBPop () 555 | { 556 | if ( $this->_mbenc ) 557 | mb_internal_encoding ( $this->_mbenc ); 558 | } 559 | 560 | /// connect to searchd server 561 | function _Connect () 562 | { 563 | if ( $this->_socket!==false ) 564 | { 565 | // we are in persistent connection mode, so we have a socket 566 | // however, need to check whether it's still alive 567 | if ( !@feof ( $this->_socket ) ) 568 | return $this->_socket; 569 | 570 | // force reopen 571 | $this->_socket = false; 572 | } 573 | 574 | $errno = 0; 575 | $errstr = ""; 576 | $this->_connerror = false; 577 | 578 | if ( $this->_path ) 579 | { 580 | $host = $this->_path; 581 | $port = 0; 582 | } 583 | else 584 | { 585 | $host = $this->_host; 586 | $port = $this->_port; 587 | } 588 | 589 | if ( $this->_timeout<=0 ) 590 | $fp = @fsockopen ( $host, $port, $errno, $errstr ); 591 | else 592 | $fp = @fsockopen ( $host, $port, $errno, $errstr, $this->_timeout ); 593 | 594 | if ( !$fp ) 595 | { 596 | if ( $this->_path ) 597 | $location = $this->_path; 598 | else 599 | $location = "{$this->_host}:{$this->_port}"; 600 | 601 | $errstr = trim ( $errstr ); 602 | $this->_error = "connection to $location failed (errno=$errno, msg=$errstr)"; 603 | $this->_connerror = true; 604 | return false; 605 | } 606 | 607 | // send my version 608 | // this is a subtle part. we must do it before (!) reading back from searchd. 609 | // because otherwise under some conditions (reported on FreeBSD for instance) 610 | // TCP stack could throttle write-write-read pattern because of Nagle. 611 | if ( !$this->_Send ( $fp, pack ( "N", 1 ), 4 ) ) 612 | { 613 | fclose ( $fp ); 614 | $this->_error = "failed to send client protocol version"; 615 | return false; 616 | } 617 | 618 | // check version 619 | list(,$v) = unpack ( "N*", fread ( $fp, 4 ) ); 620 | $v = (int)$v; 621 | if ( $v<1 ) 622 | { 623 | fclose ( $fp ); 624 | $this->_error = "expected searchd protocol version 1+, got version '$v'"; 625 | return false; 626 | } 627 | 628 | return $fp; 629 | } 630 | 631 | /// get and check response packet from searchd server 632 | function _GetResponse ( $fp, $client_ver ) 633 | { 634 | $response = ""; 635 | $len = 0; 636 | 637 | $header = fread ( $fp, 8 ); 638 | if ( strlen($header)==8 ) 639 | { 640 | list ( $status, $ver, $len ) = array_values ( unpack ( "n2a/Nb", $header ) ); 641 | $left = $len; 642 | while ( $left>0 && !feof($fp) ) 643 | { 644 | $chunk = fread ( $fp, min ( 8192, $left ) ); 645 | if ( $chunk ) 646 | { 647 | $response .= $chunk; 648 | $left -= strlen($chunk); 649 | } 650 | } 651 | } 652 | if ( $this->_socket === false ) 653 | fclose ( $fp ); 654 | 655 | // check response 656 | $read = strlen ( $response ); 657 | if ( !$response || $read!=$len ) 658 | { 659 | $this->_error = $len 660 | ? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)" 661 | : "received zero-sized searchd response"; 662 | return false; 663 | } 664 | 665 | // check status 666 | if ( $status==SEARCHD_WARNING ) 667 | { 668 | list(,$wlen) = unpack ( "N*", substr ( $response, 0, 4 ) ); 669 | $this->_warning = substr ( $response, 4, $wlen ); 670 | return substr ( $response, 4+$wlen ); 671 | } 672 | if ( $status==SEARCHD_ERROR ) 673 | { 674 | $this->_error = "searchd error: " . substr ( $response, 4 ); 675 | return false; 676 | } 677 | if ( $status==SEARCHD_RETRY ) 678 | { 679 | $this->_error = "temporary searchd error: " . substr ( $response, 4 ); 680 | return false; 681 | } 682 | if ( $status!=SEARCHD_OK ) 683 | { 684 | $this->_error = "unknown status code '$status'"; 685 | return false; 686 | } 687 | 688 | // check version 689 | if ( $ver<$client_ver ) 690 | { 691 | $this->_warning = sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work", 692 | $ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff ); 693 | } 694 | 695 | return $response; 696 | } 697 | 698 | ///////////////////////////////////////////////////////////////////////////// 699 | // searching 700 | ///////////////////////////////////////////////////////////////////////////// 701 | 702 | /// set offset and count into result set, 703 | /// and optionally set max-matches and cutoff limits 704 | function SetLimits ( $offset, $limit, $max=0, $cutoff=0 ) 705 | { 706 | assert ( is_int($offset) ); 707 | assert ( is_int($limit) ); 708 | assert ( $offset>=0 ); 709 | assert ( $limit>0 ); 710 | assert ( $max>=0 ); 711 | $this->_offset = $offset; 712 | $this->_limit = $limit; 713 | if ( $max>0 ) 714 | $this->_maxmatches = $max; 715 | if ( $cutoff>0 ) 716 | $this->_cutoff = $cutoff; 717 | } 718 | 719 | /// set maximum query time, in milliseconds, per-index 720 | /// integer, 0 means "do not limit" 721 | function SetMaxQueryTime ( $max ) 722 | { 723 | assert ( is_int($max) ); 724 | assert ( $max>=0 ); 725 | $this->_maxquerytime = $max; 726 | } 727 | 728 | /// set matching mode 729 | function SetMatchMode ( $mode ) 730 | { 731 | assert ( $mode==SPH_MATCH_ALL 732 | || $mode==SPH_MATCH_ANY 733 | || $mode==SPH_MATCH_PHRASE 734 | || $mode==SPH_MATCH_BOOLEAN 735 | || $mode==SPH_MATCH_EXTENDED 736 | || $mode==SPH_MATCH_FULLSCAN 737 | || $mode==SPH_MATCH_EXTENDED2 ); 738 | $this->_mode = $mode; 739 | } 740 | 741 | /// set ranking mode 742 | function SetRankingMode ( $ranker, $rankexpr="" ) 743 | { 744 | assert ( $ranker===0 || $ranker>=1 && $ranker_ranker = $ranker; 747 | $this->_rankexpr = $rankexpr; 748 | } 749 | 750 | /// set matches sorting mode 751 | function SetSortMode ( $mode, $sortby="" ) 752 | { 753 | assert ( 754 | $mode==SPH_SORT_RELEVANCE || 755 | $mode==SPH_SORT_ATTR_DESC || 756 | $mode==SPH_SORT_ATTR_ASC || 757 | $mode==SPH_SORT_TIME_SEGMENTS || 758 | $mode==SPH_SORT_EXTENDED || 759 | $mode==SPH_SORT_EXPR ); 760 | assert ( is_string($sortby) ); 761 | assert ( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0 ); 762 | 763 | $this->_sort = $mode; 764 | $this->_sortby = $sortby; 765 | } 766 | 767 | /// bind per-field weights by order 768 | /// DEPRECATED; use SetFieldWeights() instead 769 | function SetWeights ( $weights ) 770 | { 771 | assert ( is_array($weights) ); 772 | foreach ( $weights as $weight ) 773 | assert ( is_int($weight) ); 774 | 775 | $this->_weights = $weights; 776 | } 777 | 778 | /// bind per-field weights by name 779 | function SetFieldWeights ( $weights ) 780 | { 781 | assert ( is_array($weights) ); 782 | foreach ( $weights as $name=>$weight ) 783 | { 784 | assert ( is_string($name) ); 785 | assert ( is_int($weight) ); 786 | } 787 | $this->_fieldweights = $weights; 788 | } 789 | 790 | /// bind per-index weights by name 791 | function SetIndexWeights ( $weights ) 792 | { 793 | assert ( is_array($weights) ); 794 | foreach ( $weights as $index=>$weight ) 795 | { 796 | assert ( is_string($index) ); 797 | assert ( is_int($weight) ); 798 | } 799 | $this->_indexweights = $weights; 800 | } 801 | 802 | /// set IDs range to match 803 | /// only match records if document ID is beetwen $min and $max (inclusive) 804 | function SetIDRange ( $min, $max ) 805 | { 806 | assert ( is_numeric($min) ); 807 | assert ( is_numeric($max) ); 808 | assert ( $min<=$max ); 809 | $this->_min_id = $min; 810 | $this->_max_id = $max; 811 | } 812 | 813 | /// set values set filter 814 | /// only match records where $attribute value is in given set 815 | function SetFilter ( $attribute, $values, $exclude=false ) 816 | { 817 | assert ( is_string($attribute) ); 818 | assert ( is_array($values) ); 819 | assert ( count($values) ); 820 | 821 | if ( is_array($values) && count($values) ) 822 | { 823 | foreach ( $values as $value ) 824 | assert ( is_numeric($value) ); 825 | 826 | $this->_filters[] = array ( "type"=>SPH_FILTER_VALUES, "attr"=>$attribute, "exclude"=>$exclude, "values"=>$values ); 827 | } 828 | } 829 | 830 | /// set range filter 831 | /// only match records if $attribute value is beetwen $min and $max (inclusive) 832 | function SetFilterRange ( $attribute, $min, $max, $exclude=false ) 833 | { 834 | assert ( is_string($attribute) ); 835 | assert ( is_numeric($min) ); 836 | assert ( is_numeric($max) ); 837 | assert ( $min<=$max ); 838 | 839 | $this->_filters[] = array ( "type"=>SPH_FILTER_RANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max ); 840 | } 841 | 842 | /// set float range filter 843 | /// only match records if $attribute value is beetwen $min and $max (inclusive) 844 | function SetFilterFloatRange ( $attribute, $min, $max, $exclude=false ) 845 | { 846 | assert ( is_string($attribute) ); 847 | assert ( is_float($min) ); 848 | assert ( is_float($max) ); 849 | assert ( $min<=$max ); 850 | 851 | $this->_filters[] = array ( "type"=>SPH_FILTER_FLOATRANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max ); 852 | } 853 | 854 | /// setup anchor point for geosphere distance calculations 855 | /// required to use @geodist in filters and sorting 856 | /// latitude and longitude must be in radians 857 | function SetGeoAnchor ( $attrlat, $attrlong, $lat, $long ) 858 | { 859 | assert ( is_string($attrlat) ); 860 | assert ( is_string($attrlong) ); 861 | assert ( is_float($lat) ); 862 | assert ( is_float($long) ); 863 | 864 | $this->_anchor = array ( "attrlat"=>$attrlat, "attrlong"=>$attrlong, "lat"=>$lat, "long"=>$long ); 865 | } 866 | 867 | /// set grouping attribute and function 868 | function SetGroupBy ( $attribute, $func, $groupsort="@group desc" ) 869 | { 870 | assert ( is_string($attribute) ); 871 | assert ( is_string($groupsort) ); 872 | assert ( $func==SPH_GROUPBY_DAY 873 | || $func==SPH_GROUPBY_WEEK 874 | || $func==SPH_GROUPBY_MONTH 875 | || $func==SPH_GROUPBY_YEAR 876 | || $func==SPH_GROUPBY_ATTR 877 | || $func==SPH_GROUPBY_ATTRPAIR ); 878 | 879 | $this->_groupby = $attribute; 880 | $this->_groupfunc = $func; 881 | $this->_groupsort = $groupsort; 882 | } 883 | 884 | /// set count-distinct attribute for group-by queries 885 | function SetGroupDistinct ( $attribute ) 886 | { 887 | assert ( is_string($attribute) ); 888 | $this->_groupdistinct = $attribute; 889 | } 890 | 891 | /// set distributed retries count and delay 892 | function SetRetries ( $count, $delay=0 ) 893 | { 894 | assert ( is_int($count) && $count>=0 ); 895 | assert ( is_int($delay) && $delay>=0 ); 896 | $this->_retrycount = $count; 897 | $this->_retrydelay = $delay; 898 | } 899 | 900 | /// set result set format (hash or array; hash by default) 901 | /// PHP specific; needed for group-by-MVA result sets that may contain duplicate IDs 902 | function SetArrayResult ( $arrayresult ) 903 | { 904 | assert ( is_bool($arrayresult) ); 905 | $this->_arrayresult = $arrayresult; 906 | } 907 | 908 | /// set attribute values override 909 | /// there can be only one override per attribute 910 | /// $values must be a hash that maps document IDs to attribute values 911 | function SetOverride ( $attrname, $attrtype, $values ) 912 | { 913 | assert ( is_string ( $attrname ) ); 914 | assert ( in_array ( $attrtype, array ( SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT ) ) ); 915 | assert ( is_array ( $values ) ); 916 | 917 | $this->_overrides[$attrname] = array ( "attr"=>$attrname, "type"=>$attrtype, "values"=>$values ); 918 | } 919 | 920 | /// set select-list (attributes or expressions), SQL-like syntax 921 | function SetSelect ( $select ) 922 | { 923 | assert ( is_string ( $select ) ); 924 | $this->_select = $select; 925 | } 926 | 927 | ////////////////////////////////////////////////////////////////////////////// 928 | 929 | /// clear all filters (for multi-queries) 930 | function ResetFilters () 931 | { 932 | $this->_filters = array(); 933 | $this->_anchor = array(); 934 | } 935 | 936 | /// clear groupby settings (for multi-queries) 937 | function ResetGroupBy () 938 | { 939 | $this->_groupby = ""; 940 | $this->_groupfunc = SPH_GROUPBY_DAY; 941 | $this->_groupsort = "@group desc"; 942 | $this->_groupdistinct= ""; 943 | } 944 | 945 | /// clear all attribute value overrides (for multi-queries) 946 | function ResetOverrides () 947 | { 948 | $this->_overrides = array (); 949 | } 950 | 951 | ////////////////////////////////////////////////////////////////////////////// 952 | 953 | /// connect to searchd server, run given search query through given indexes, 954 | /// and return the search results 955 | function Query ( $query, $index="*", $comment="" ) 956 | { 957 | assert ( empty($this->_reqs) ); 958 | 959 | $this->AddQuery ( $query, $index, $comment ); 960 | $results = $this->RunQueries (); 961 | $this->_reqs = array (); // just in case it failed too early 962 | 963 | if ( !is_array($results) ) 964 | return false; // probably network error; error message should be already filled 965 | 966 | $this->_error = $results[0]["error"]; 967 | $this->_warning = $results[0]["warning"]; 968 | if ( $results[0]["status"]==SEARCHD_ERROR ) 969 | return false; 970 | else 971 | return $results[0]; 972 | } 973 | 974 | /// helper to pack floats in network byte order 975 | function _PackFloat ( $f ) 976 | { 977 | $t1 = pack ( "f", $f ); // machine order 978 | list(,$t2) = unpack ( "L*", $t1 ); // int in machine order 979 | return pack ( "N", $t2 ); 980 | } 981 | 982 | /// add query to multi-query batch 983 | /// returns index into results array from RunQueries() call 984 | function AddQuery ( $query, $index="*", $comment="" ) 985 | { 986 | // mbstring workaround 987 | $this->_MBPush (); 988 | 989 | // build request 990 | $req = pack ( "NNNN", $this->_offset, $this->_limit, $this->_mode, $this->_ranker ); 991 | if ( $this->_ranker==SPH_RANK_EXPR ) 992 | $req .= pack ( "N", strlen($this->_rankexpr) ) . $this->_rankexpr; 993 | $req .= pack ( "N", $this->_sort ); // (deprecated) sort mode 994 | $req .= pack ( "N", strlen($this->_sortby) ) . $this->_sortby; 995 | $req .= pack ( "N", strlen($query) ) . $query; // query itself 996 | $req .= pack ( "N", count($this->_weights) ); // weights 997 | foreach ( $this->_weights as $weight ) 998 | $req .= pack ( "N", (int)$weight ); 999 | $req .= pack ( "N", strlen($index) ) . $index; // indexes 1000 | $req .= pack ( "N", 1 ); // id64 range marker 1001 | $req .= sphPackU64 ( $this->_min_id ) . sphPackU64 ( $this->_max_id ); // id64 range 1002 | 1003 | // filters 1004 | $req .= pack ( "N", count($this->_filters) ); 1005 | foreach ( $this->_filters as $filter ) 1006 | { 1007 | $req .= pack ( "N", strlen($filter["attr"]) ) . $filter["attr"]; 1008 | $req .= pack ( "N", $filter["type"] ); 1009 | switch ( $filter["type"] ) 1010 | { 1011 | case SPH_FILTER_VALUES: 1012 | $req .= pack ( "N", count($filter["values"]) ); 1013 | foreach ( $filter["values"] as $value ) 1014 | $req .= sphPackI64 ( $value ); 1015 | break; 1016 | 1017 | case SPH_FILTER_RANGE: 1018 | $req .= sphPackI64 ( $filter["min"] ) . sphPackI64 ( $filter["max"] ); 1019 | break; 1020 | 1021 | case SPH_FILTER_FLOATRANGE: 1022 | $req .= $this->_PackFloat ( $filter["min"] ) . $this->_PackFloat ( $filter["max"] ); 1023 | break; 1024 | 1025 | default: 1026 | assert ( 0 && "internal error: unhandled filter type" ); 1027 | } 1028 | $req .= pack ( "N", $filter["exclude"] ); 1029 | } 1030 | 1031 | // group-by clause, max-matches count, group-sort clause, cutoff count 1032 | $req .= pack ( "NN", $this->_groupfunc, strlen($this->_groupby) ) . $this->_groupby; 1033 | $req .= pack ( "N", $this->_maxmatches ); 1034 | $req .= pack ( "N", strlen($this->_groupsort) ) . $this->_groupsort; 1035 | $req .= pack ( "NNN", $this->_cutoff, $this->_retrycount, $this->_retrydelay ); 1036 | $req .= pack ( "N", strlen($this->_groupdistinct) ) . $this->_groupdistinct; 1037 | 1038 | // anchor point 1039 | if ( empty($this->_anchor) ) 1040 | { 1041 | $req .= pack ( "N", 0 ); 1042 | } else 1043 | { 1044 | $a =& $this->_anchor; 1045 | $req .= pack ( "N", 1 ); 1046 | $req .= pack ( "N", strlen($a["attrlat"]) ) . $a["attrlat"]; 1047 | $req .= pack ( "N", strlen($a["attrlong"]) ) . $a["attrlong"]; 1048 | $req .= $this->_PackFloat ( $a["lat"] ) . $this->_PackFloat ( $a["long"] ); 1049 | } 1050 | 1051 | // per-index weights 1052 | $req .= pack ( "N", count($this->_indexweights) ); 1053 | foreach ( $this->_indexweights as $idx=>$weight ) 1054 | $req .= pack ( "N", strlen($idx) ) . $idx . pack ( "N", $weight ); 1055 | 1056 | // max query time 1057 | $req .= pack ( "N", $this->_maxquerytime ); 1058 | 1059 | // per-field weights 1060 | $req .= pack ( "N", count($this->_fieldweights) ); 1061 | foreach ( $this->_fieldweights as $field=>$weight ) 1062 | $req .= pack ( "N", strlen($field) ) . $field . pack ( "N", $weight ); 1063 | 1064 | // comment 1065 | $req .= pack ( "N", strlen($comment) ) . $comment; 1066 | 1067 | // attribute overrides 1068 | $req .= pack ( "N", count($this->_overrides) ); 1069 | foreach ( $this->_overrides as $key => $entry ) 1070 | { 1071 | $req .= pack ( "N", strlen($entry["attr"]) ) . $entry["attr"]; 1072 | $req .= pack ( "NN", $entry["type"], count($entry["values"]) ); 1073 | foreach ( $entry["values"] as $id=>$val ) 1074 | { 1075 | assert ( is_numeric($id) ); 1076 | assert ( is_numeric($val) ); 1077 | 1078 | $req .= sphPackU64 ( $id ); 1079 | switch ( $entry["type"] ) 1080 | { 1081 | case SPH_ATTR_FLOAT: $req .= $this->_PackFloat ( $val ); break; 1082 | case SPH_ATTR_BIGINT: $req .= sphPackI64 ( $val ); break; 1083 | default: $req .= pack ( "N", $val ); break; 1084 | } 1085 | } 1086 | } 1087 | 1088 | // select-list 1089 | $req .= pack ( "N", strlen($this->_select) ) . $this->_select; 1090 | 1091 | // mbstring workaround 1092 | $this->_MBPop (); 1093 | 1094 | // store request to requests array 1095 | $this->_reqs[] = $req; 1096 | return count($this->_reqs)-1; 1097 | } 1098 | 1099 | /// connect to searchd, run queries batch, and return an array of result sets 1100 | function RunQueries () 1101 | { 1102 | if ( empty($this->_reqs) ) 1103 | { 1104 | $this->_error = "no queries defined, issue AddQuery() first"; 1105 | return false; 1106 | } 1107 | 1108 | // mbstring workaround 1109 | $this->_MBPush (); 1110 | 1111 | if (!( $fp = $this->_Connect() )) 1112 | { 1113 | $this->_MBPop (); 1114 | return false; 1115 | } 1116 | 1117 | // send query, get response 1118 | $nreqs = count($this->_reqs); 1119 | $req = join ( "", $this->_reqs ); 1120 | $len = 8+strlen($req); 1121 | $req = pack ( "nnNNN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len, 0, $nreqs ) . $req; // add header 1122 | 1123 | if ( !( $this->_Send ( $fp, $req, $len+8 ) ) || 1124 | !( $response = $this->_GetResponse ( $fp, VER_COMMAND_SEARCH ) ) ) 1125 | { 1126 | $this->_MBPop (); 1127 | return false; 1128 | } 1129 | 1130 | // query sent ok; we can reset reqs now 1131 | $this->_reqs = array (); 1132 | 1133 | // parse and return response 1134 | return $this->_ParseSearchResponse ( $response, $nreqs ); 1135 | } 1136 | 1137 | /// parse and return search query (or queries) response 1138 | function _ParseSearchResponse ( $response, $nreqs ) 1139 | { 1140 | $p = 0; // current position 1141 | $max = strlen($response); // max position for checks, to protect against broken responses 1142 | 1143 | $results = array (); 1144 | for ( $ires=0; $ires<$nreqs && $p<$max; $ires++ ) 1145 | { 1146 | $results[] = array(); 1147 | $result =& $results[$ires]; 1148 | 1149 | $result["error"] = ""; 1150 | $result["warning"] = ""; 1151 | 1152 | // extract status 1153 | list(,$status) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1154 | $result["status"] = $status; 1155 | if ( $status!=SEARCHD_OK ) 1156 | { 1157 | list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1158 | $message = substr ( $response, $p, $len ); $p += $len; 1159 | 1160 | if ( $status==SEARCHD_WARNING ) 1161 | { 1162 | $result["warning"] = $message; 1163 | } else 1164 | { 1165 | $result["error"] = $message; 1166 | continue; 1167 | } 1168 | } 1169 | 1170 | // read schema 1171 | $fields = array (); 1172 | $attrs = array (); 1173 | 1174 | list(,$nfields) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1175 | while ( $nfields-->0 && $p<$max ) 1176 | { 1177 | list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1178 | $fields[] = substr ( $response, $p, $len ); $p += $len; 1179 | } 1180 | $result["fields"] = $fields; 1181 | 1182 | list(,$nattrs) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1183 | while ( $nattrs-->0 && $p<$max ) 1184 | { 1185 | list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1186 | $attr = substr ( $response, $p, $len ); $p += $len; 1187 | list(,$type) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1188 | $attrs[$attr] = $type; 1189 | } 1190 | $result["attrs"] = $attrs; 1191 | 1192 | // read match count 1193 | list(,$count) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1194 | list(,$id64) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1195 | 1196 | // read matches 1197 | $idx = -1; 1198 | while ( $count-->0 && $p<$max ) 1199 | { 1200 | // index into result array 1201 | $idx++; 1202 | 1203 | // parse document id and weight 1204 | if ( $id64 ) 1205 | { 1206 | $doc = sphUnpackU64 ( substr ( $response, $p, 8 ) ); $p += 8; 1207 | list(,$weight) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1208 | } 1209 | else 1210 | { 1211 | list ( $doc, $weight ) = array_values ( unpack ( "N*N*", 1212 | substr ( $response, $p, 8 ) ) ); 1213 | $p += 8; 1214 | $doc = sphFixUint($doc); 1215 | } 1216 | $weight = sprintf ( "%u", $weight ); 1217 | 1218 | // create match entry 1219 | if ( $this->_arrayresult ) 1220 | $result["matches"][$idx] = array ( "id"=>$doc, "weight"=>$weight ); 1221 | else 1222 | $result["matches"][$doc]["weight"] = $weight; 1223 | 1224 | // parse and create attributes 1225 | $attrvals = array (); 1226 | foreach ( $attrs as $attr=>$type ) 1227 | { 1228 | // handle 64bit ints 1229 | if ( $type==SPH_ATTR_BIGINT ) 1230 | { 1231 | $attrvals[$attr] = sphUnpackI64 ( substr ( $response, $p, 8 ) ); $p += 8; 1232 | continue; 1233 | } 1234 | 1235 | // handle floats 1236 | if ( $type==SPH_ATTR_FLOAT ) 1237 | { 1238 | list(,$uval) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1239 | list(,$fval) = unpack ( "f*", pack ( "L", $uval ) ); 1240 | $attrvals[$attr] = $fval; 1241 | continue; 1242 | } 1243 | 1244 | // handle everything else as unsigned ints 1245 | list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1246 | if ( $type==SPH_ATTR_MULTI ) 1247 | { 1248 | $attrvals[$attr] = array (); 1249 | $nvalues = $val; 1250 | while ( $nvalues-->0 && $p<$max ) 1251 | { 1252 | list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1253 | $attrvals[$attr][] = sphFixUint($val); 1254 | } 1255 | } else if ( $type==SPH_ATTR_MULTI64 ) 1256 | { 1257 | $attrvals[$attr] = array (); 1258 | $nvalues = $val; 1259 | while ( $nvalues>0 && $p<$max ) 1260 | { 1261 | $attrvals[$attr][] = sphUnpackI64 ( substr ( $response, $p, 8 ) ); $p += 8; 1262 | $nvalues -= 2; 1263 | } 1264 | } else if ( $type==SPH_ATTR_STRING ) 1265 | { 1266 | $attrvals[$attr] = substr ( $response, $p, $val ); 1267 | $p += $val; 1268 | } else 1269 | { 1270 | $attrvals[$attr] = sphFixUint($val); 1271 | } 1272 | } 1273 | 1274 | if ( $this->_arrayresult ) 1275 | $result["matches"][$idx]["attrs"] = $attrvals; 1276 | else 1277 | $result["matches"][$doc]["attrs"] = $attrvals; 1278 | } 1279 | 1280 | list ( $total, $total_found, $msecs, $words ) = 1281 | array_values ( unpack ( "N*N*N*N*", substr ( $response, $p, 16 ) ) ); 1282 | $result["total"] = sprintf ( "%u", $total ); 1283 | $result["total_found"] = sprintf ( "%u", $total_found ); 1284 | $result["time"] = sprintf ( "%.3f", $msecs/1000 ); 1285 | $p += 16; 1286 | 1287 | while ( $words-->0 && $p<$max ) 1288 | { 1289 | list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1290 | $word = substr ( $response, $p, $len ); $p += $len; 1291 | list ( $docs, $hits ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8; 1292 | $result["words"][$word] = array ( 1293 | "docs"=>sprintf ( "%u", $docs ), 1294 | "hits"=>sprintf ( "%u", $hits ) ); 1295 | } 1296 | } 1297 | 1298 | $this->_MBPop (); 1299 | return $results; 1300 | } 1301 | 1302 | ///////////////////////////////////////////////////////////////////////////// 1303 | // excerpts generation 1304 | ///////////////////////////////////////////////////////////////////////////// 1305 | 1306 | /// connect to searchd server, and generate exceprts (snippets) 1307 | /// of given documents for given query. returns false on failure, 1308 | /// an array of snippets on success 1309 | function BuildExcerpts ( $docs, $index, $words, $opts=array() ) 1310 | { 1311 | assert ( is_array($docs) ); 1312 | assert ( is_string($index) ); 1313 | assert ( is_string($words) ); 1314 | assert ( is_array($opts) ); 1315 | 1316 | $this->_MBPush (); 1317 | 1318 | if (!( $fp = $this->_Connect() )) 1319 | { 1320 | $this->_MBPop(); 1321 | return false; 1322 | } 1323 | 1324 | ///////////////// 1325 | // fixup options 1326 | ///////////////// 1327 | 1328 | if ( !isset($opts["before_match"]) ) $opts["before_match"] = ""; 1329 | if ( !isset($opts["after_match"]) ) $opts["after_match"] = ""; 1330 | if ( !isset($opts["chunk_separator"]) ) $opts["chunk_separator"] = " ... "; 1331 | if ( !isset($opts["limit"]) ) $opts["limit"] = 256; 1332 | if ( !isset($opts["limit_passages"]) ) $opts["limit_passages"] = 0; 1333 | if ( !isset($opts["limit_words"]) ) $opts["limit_words"] = 0; 1334 | if ( !isset($opts["around"]) ) $opts["around"] = 5; 1335 | if ( !isset($opts["exact_phrase"]) ) $opts["exact_phrase"] = false; 1336 | if ( !isset($opts["single_passage"]) ) $opts["single_passage"] = false; 1337 | if ( !isset($opts["use_boundaries"]) ) $opts["use_boundaries"] = false; 1338 | if ( !isset($opts["weight_order"]) ) $opts["weight_order"] = false; 1339 | if ( !isset($opts["query_mode"]) ) $opts["query_mode"] = false; 1340 | if ( !isset($opts["force_all_words"]) ) $opts["force_all_words"] = false; 1341 | if ( !isset($opts["start_passage_id"]) ) $opts["start_passage_id"] = 1; 1342 | if ( !isset($opts["load_files"]) ) $opts["load_files"] = false; 1343 | if ( !isset($opts["html_strip_mode"]) ) $opts["html_strip_mode"] = "index"; 1344 | if ( !isset($opts["allow_empty"]) ) $opts["allow_empty"] = false; 1345 | if ( !isset($opts["passage_boundary"]) ) $opts["passage_boundary"] = "none"; 1346 | if ( !isset($opts["emit_zones"]) ) $opts["emit_zones"] = false; 1347 | if ( !isset($opts["load_files_scattered"]) ) $opts["load_files_scattered"] = false; 1348 | 1349 | 1350 | ///////////////// 1351 | // build request 1352 | ///////////////// 1353 | 1354 | // v.1.2 req 1355 | $flags = 1; // remove spaces 1356 | if ( $opts["exact_phrase"] ) $flags |= 2; 1357 | if ( $opts["single_passage"] ) $flags |= 4; 1358 | if ( $opts["use_boundaries"] ) $flags |= 8; 1359 | if ( $opts["weight_order"] ) $flags |= 16; 1360 | if ( $opts["query_mode"] ) $flags |= 32; 1361 | if ( $opts["force_all_words"] ) $flags |= 64; 1362 | if ( $opts["load_files"] ) $flags |= 128; 1363 | if ( $opts["allow_empty"] ) $flags |= 256; 1364 | if ( $opts["emit_zones"] ) $flags |= 512; 1365 | if ( $opts["load_files_scattered"] ) $flags |= 1024; 1366 | $req = pack ( "NN", 0, $flags ); // mode=0, flags=$flags 1367 | $req .= pack ( "N", strlen($index) ) . $index; // req index 1368 | $req .= pack ( "N", strlen($words) ) . $words; // req words 1369 | 1370 | // options 1371 | $req .= pack ( "N", strlen($opts["before_match"]) ) . $opts["before_match"]; 1372 | $req .= pack ( "N", strlen($opts["after_match"]) ) . $opts["after_match"]; 1373 | $req .= pack ( "N", strlen($opts["chunk_separator"]) ) . $opts["chunk_separator"]; 1374 | $req .= pack ( "NN", (int)$opts["limit"], (int)$opts["around"] ); 1375 | $req .= pack ( "NNN", (int)$opts["limit_passages"], (int)$opts["limit_words"], (int)$opts["start_passage_id"] ); // v.1.2 1376 | $req .= pack ( "N", strlen($opts["html_strip_mode"]) ) . $opts["html_strip_mode"]; 1377 | $req .= pack ( "N", strlen($opts["passage_boundary"]) ) . $opts["passage_boundary"]; 1378 | 1379 | // documents 1380 | $req .= pack ( "N", count($docs) ); 1381 | foreach ( $docs as $doc ) 1382 | { 1383 | assert ( is_string($doc) ); 1384 | $req .= pack ( "N", strlen($doc) ) . $doc; 1385 | } 1386 | 1387 | //////////////////////////// 1388 | // send query, get response 1389 | //////////////////////////// 1390 | 1391 | $len = strlen($req); 1392 | $req = pack ( "nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ) . $req; // add header 1393 | if ( !( $this->_Send ( $fp, $req, $len+8 ) ) || 1394 | !( $response = $this->_GetResponse ( $fp, VER_COMMAND_EXCERPT ) ) ) 1395 | { 1396 | $this->_MBPop (); 1397 | return false; 1398 | } 1399 | 1400 | ////////////////// 1401 | // parse response 1402 | ////////////////// 1403 | 1404 | $pos = 0; 1405 | $res = array (); 1406 | $rlen = strlen($response); 1407 | for ( $i=0; $i $rlen ) 1413 | { 1414 | $this->_error = "incomplete reply"; 1415 | $this->_MBPop (); 1416 | return false; 1417 | } 1418 | $res[] = $len ? substr ( $response, $pos, $len ) : ""; 1419 | $pos += $len; 1420 | } 1421 | 1422 | $this->_MBPop (); 1423 | return $res; 1424 | } 1425 | 1426 | 1427 | ///////////////////////////////////////////////////////////////////////////// 1428 | // keyword generation 1429 | ///////////////////////////////////////////////////////////////////////////// 1430 | 1431 | /// connect to searchd server, and generate keyword list for a given query 1432 | /// returns false on failure, 1433 | /// an array of words on success 1434 | function BuildKeywords ( $query, $index, $hits ) 1435 | { 1436 | assert ( is_string($query) ); 1437 | assert ( is_string($index) ); 1438 | assert ( is_bool($hits) ); 1439 | 1440 | $this->_MBPush (); 1441 | 1442 | if (!( $fp = $this->_Connect() )) 1443 | { 1444 | $this->_MBPop(); 1445 | return false; 1446 | } 1447 | 1448 | ///////////////// 1449 | // build request 1450 | ///////////////// 1451 | 1452 | // v.1.0 req 1453 | $req = pack ( "N", strlen($query) ) . $query; // req query 1454 | $req .= pack ( "N", strlen($index) ) . $index; // req index 1455 | $req .= pack ( "N", (int)$hits ); 1456 | 1457 | //////////////////////////// 1458 | // send query, get response 1459 | //////////////////////////// 1460 | 1461 | $len = strlen($req); 1462 | $req = pack ( "nnN", SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, $len ) . $req; // add header 1463 | if ( !( $this->_Send ( $fp, $req, $len+8 ) ) || 1464 | !( $response = $this->_GetResponse ( $fp, VER_COMMAND_KEYWORDS ) ) ) 1465 | { 1466 | $this->_MBPop (); 1467 | return false; 1468 | } 1469 | 1470 | ////////////////// 1471 | // parse response 1472 | ////////////////// 1473 | 1474 | $pos = 0; 1475 | $res = array (); 1476 | $rlen = strlen($response); 1477 | list(,$nwords) = unpack ( "N*", substr ( $response, $pos, 4 ) ); 1478 | $pos += 4; 1479 | for ( $i=0; $i<$nwords; $i++ ) 1480 | { 1481 | list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4; 1482 | $tokenized = $len ? substr ( $response, $pos, $len ) : ""; 1483 | $pos += $len; 1484 | 1485 | list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4; 1486 | $normalized = $len ? substr ( $response, $pos, $len ) : ""; 1487 | $pos += $len; 1488 | 1489 | $res[] = array ( "tokenized"=>$tokenized, "normalized"=>$normalized ); 1490 | 1491 | if ( $hits ) 1492 | { 1493 | list($ndocs,$nhits) = array_values ( unpack ( "N*N*", substr ( $response, $pos, 8 ) ) ); 1494 | $pos += 8; 1495 | $res [$i]["docs"] = $ndocs; 1496 | $res [$i]["hits"] = $nhits; 1497 | } 1498 | 1499 | if ( $pos > $rlen ) 1500 | { 1501 | $this->_error = "incomplete reply"; 1502 | $this->_MBPop (); 1503 | return false; 1504 | } 1505 | } 1506 | 1507 | $this->_MBPop (); 1508 | return $res; 1509 | } 1510 | 1511 | function EscapeString ( $string ) 1512 | { 1513 | $from = array ( '\\', '(',')','|','-','!','@','~','"','&', '/', '^', '$', '=' ); 1514 | $to = array ( '\\\\', '\(','\)','\|','\-','\!','\@','\~','\"', '\&', '\/', '\^', '\$', '\=' ); 1515 | 1516 | return str_replace ( $from, $to, $string ); 1517 | } 1518 | 1519 | ///////////////////////////////////////////////////////////////////////////// 1520 | // attribute updates 1521 | ///////////////////////////////////////////////////////////////////////////// 1522 | 1523 | /// batch update given attributes in given rows in given indexes 1524 | /// returns amount of updated documents (0 or more) on success, or -1 on failure 1525 | function UpdateAttributes ( $index, $attrs, $values, $mva=false ) 1526 | { 1527 | // verify everything 1528 | assert ( is_string($index) ); 1529 | assert ( is_bool($mva) ); 1530 | 1531 | assert ( is_array($attrs) ); 1532 | foreach ( $attrs as $attr ) 1533 | assert ( is_string($attr) ); 1534 | 1535 | assert ( is_array($values) ); 1536 | foreach ( $values as $id=>$entry ) 1537 | { 1538 | assert ( is_numeric($id) ); 1539 | assert ( is_array($entry) ); 1540 | assert ( count($entry)==count($attrs) ); 1541 | foreach ( $entry as $v ) 1542 | { 1543 | if ( $mva ) 1544 | { 1545 | assert ( is_array($v) ); 1546 | foreach ( $v as $vv ) 1547 | assert ( is_int($vv) ); 1548 | } else 1549 | assert ( is_int($v) ); 1550 | } 1551 | } 1552 | 1553 | // build request 1554 | $this->_MBPush (); 1555 | $req = pack ( "N", strlen($index) ) . $index; 1556 | 1557 | $req .= pack ( "N", count($attrs) ); 1558 | foreach ( $attrs as $attr ) 1559 | { 1560 | $req .= pack ( "N", strlen($attr) ) . $attr; 1561 | $req .= pack ( "N", $mva ? 1 : 0 ); 1562 | } 1563 | 1564 | $req .= pack ( "N", count($values) ); 1565 | foreach ( $values as $id=>$entry ) 1566 | { 1567 | $req .= sphPackU64 ( $id ); 1568 | foreach ( $entry as $v ) 1569 | { 1570 | $req .= pack ( "N", $mva ? count($v) : $v ); 1571 | if ( $mva ) 1572 | foreach ( $v as $vv ) 1573 | $req .= pack ( "N", $vv ); 1574 | } 1575 | } 1576 | 1577 | // connect, send query, get response 1578 | if (!( $fp = $this->_Connect() )) 1579 | { 1580 | $this->_MBPop (); 1581 | return -1; 1582 | } 1583 | 1584 | $len = strlen($req); 1585 | $req = pack ( "nnN", SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, $len ) . $req; // add header 1586 | if ( !$this->_Send ( $fp, $req, $len+8 ) ) 1587 | { 1588 | $this->_MBPop (); 1589 | return -1; 1590 | } 1591 | 1592 | if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_UPDATE ) )) 1593 | { 1594 | $this->_MBPop (); 1595 | return -1; 1596 | } 1597 | 1598 | // parse response 1599 | list(,$updated) = unpack ( "N*", substr ( $response, 0, 4 ) ); 1600 | $this->_MBPop (); 1601 | return $updated; 1602 | } 1603 | 1604 | ///////////////////////////////////////////////////////////////////////////// 1605 | // persistent connections 1606 | ///////////////////////////////////////////////////////////////////////////// 1607 | 1608 | function Open() 1609 | { 1610 | if ( $this->_socket !== false ) 1611 | { 1612 | $this->_error = 'already connected'; 1613 | return false; 1614 | } 1615 | if ( !$fp = $this->_Connect() ) 1616 | return false; 1617 | 1618 | // command, command version = 0, body length = 4, body = 1 1619 | $req = pack ( "nnNN", SEARCHD_COMMAND_PERSIST, 0, 4, 1 ); 1620 | if ( !$this->_Send ( $fp, $req, 12 ) ) 1621 | return false; 1622 | 1623 | $this->_socket = $fp; 1624 | return true; 1625 | } 1626 | 1627 | function Close() 1628 | { 1629 | if ( $this->_socket === false ) 1630 | { 1631 | $this->_error = 'not connected'; 1632 | return false; 1633 | } 1634 | 1635 | fclose ( $this->_socket ); 1636 | $this->_socket = false; 1637 | 1638 | return true; 1639 | } 1640 | 1641 | ////////////////////////////////////////////////////////////////////////// 1642 | // status 1643 | ////////////////////////////////////////////////////////////////////////// 1644 | 1645 | function Status () 1646 | { 1647 | $this->_MBPush (); 1648 | if (!( $fp = $this->_Connect() )) 1649 | { 1650 | $this->_MBPop(); 1651 | return false; 1652 | } 1653 | 1654 | $req = pack ( "nnNN", SEARCHD_COMMAND_STATUS, VER_COMMAND_STATUS, 4, 1 ); // len=4, body=1 1655 | if ( !( $this->_Send ( $fp, $req, 12 ) ) || 1656 | !( $response = $this->_GetResponse ( $fp, VER_COMMAND_STATUS ) ) ) 1657 | { 1658 | $this->_MBPop (); 1659 | return false; 1660 | } 1661 | 1662 | $res = substr ( $response, 4 ); // just ignore length, error handling, etc 1663 | $p = 0; 1664 | list ( $rows, $cols ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8; 1665 | 1666 | $res = array(); 1667 | for ( $i=0; $i<$rows; $i++ ) 1668 | for ( $j=0; $j<$cols; $j++ ) 1669 | { 1670 | list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1671 | $res[$i][] = substr ( $response, $p, $len ); $p += $len; 1672 | } 1673 | 1674 | $this->_MBPop (); 1675 | return $res; 1676 | } 1677 | 1678 | ////////////////////////////////////////////////////////////////////////// 1679 | // flush 1680 | ////////////////////////////////////////////////////////////////////////// 1681 | 1682 | function FlushAttributes () 1683 | { 1684 | $this->_MBPush (); 1685 | if (!( $fp = $this->_Connect() )) 1686 | { 1687 | $this->_MBPop(); 1688 | return -1; 1689 | } 1690 | 1691 | $req = pack ( "nnN", SEARCHD_COMMAND_FLUSHATTRS, VER_COMMAND_FLUSHATTRS, 0 ); // len=0 1692 | if ( !( $this->_Send ( $fp, $req, 8 ) ) || 1693 | !( $response = $this->_GetResponse ( $fp, VER_COMMAND_FLUSHATTRS ) ) ) 1694 | { 1695 | $this->_MBPop (); 1696 | return -1; 1697 | } 1698 | 1699 | $tag = -1; 1700 | if ( strlen($response)==4 ) 1701 | list(,$tag) = unpack ( "N*", $response ); 1702 | else 1703 | $this->_error = "unexpected response length"; 1704 | 1705 | $this->_MBPop (); 1706 | return $tag; 1707 | } 1708 | } 1709 | 1710 | // 1711 | // $Id: sphinxapi.php 3782 2013-04-06 18:22:58Z kevg $ 1712 | // 1713 | --------------------------------------------------------------------------------