├── Admin-Manticore.english.php
├── Admin-Sphinx.english.php
├── DCO.txt
├── License.txt
├── SMF 2.0
└── SearchAPI-Sphinxql.php
├── SMF 2.1
├── SearchAPI-Manticore.php
└── SearchAPI-Sphinxql.php
├── package-info.xml
└── readme.md
/Admin-Manticore.english.php:
--------------------------------------------------------------------------------
1 | smf_delta_index, an index that only stores the recent changes and can be called frequently. smf_base_index, an index that stores the full database and should be called less frequently.
64 | Adding the following lines to /etc/crontab would let the index rebuild every day (at 3 am) and update the most recently changed messages each hour:';
--------------------------------------------------------------------------------
/Admin-Sphinx.english.php:
--------------------------------------------------------------------------------
1 | smf_delta_index, an index that only stores the recent changes and can be called frequently. smf_base_index, an index that stores the full database and should be called less frequently.
68 | Adding the following lines to /etc/crontab would let the index rebuild every day (at 3 am) and update the most recently changed messages each hour:';
--------------------------------------------------------------------------------
/DCO.txt:
--------------------------------------------------------------------------------
1 | Developer's Certificate of Origin 1.1
2 |
3 | By making a contribution to this project, I certify that:
4 |
5 | (a) The contribution was created in whole or in part by me and I
6 | have the right to submit it under the open source license
7 | indicated in the file; or
8 |
9 | (b) The contribution is based upon previous work that, to the best
10 | of my knowledge, is covered under an appropriate open source
11 | license and I have the right under that license to submit that
12 | work with modifications, whether created in whole or in part
13 | by me, under the same open source license (unless I am
14 | permitted to submit under a different license), as indicated
15 | in the file; or
16 |
17 | (c) The contribution was provided directly to me by some other
18 | person who certified (a), (b) or (c) and I have not modified
19 | it.
20 |
21 | (d) I understand and agree that this project and the contribution
22 | are public and that a record of the contribution (including all
23 | personal information I submit with it, including my sign-off) is
24 | maintained indefinitely and may be redistributed consistent with
25 | this project or the open source license(s) involved.
--------------------------------------------------------------------------------
/License.txt:
--------------------------------------------------------------------------------
1 | Copyright © 2023 Simple Machines. All rights reserved.
2 |
3 | Developed by: Simple Machines Forum Project
4 | Simple Machines
5 | https://www.simplemachines.org
6 |
7 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
8 | [x] Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimers.
9 | [x] Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimers in the documentation and/or other materials provided with the distribution.
10 | [x] Neither the names of Simple Machines Forum, Simple Machines, nor the names of its contributors may be used to endorse or promote products derived from this Software without specific prior written permission.
11 |
--------------------------------------------------------------------------------
/SMF 2.0/SearchAPI-Sphinxql.php:
--------------------------------------------------------------------------------
1 | supported_databases))
72 | {
73 | $this->is_supported = false;
74 | return;
75 | }
76 |
77 | // We sorta support mysqli at this point.
78 | if ($db_type == 'mysqli' || (function_exists('mysqli_connect') && !function_exists('mysql_connect')))
79 | $this->db_type = 'mysqli';
80 | }
81 |
82 | /**
83 | * Check whether the search can be performed by this API.
84 | *
85 | * @access public
86 | * @param string $methodName The method we would like to use.
87 | * @param mixed $query_params The query parameters used for advanced or more defined support checking.
88 | * @return bool true or false whether this is supported.
89 | */
90 | public function supportsMethod($methodName, $query_params = null)
91 | {
92 | switch ($methodName)
93 | {
94 | case 'searchSort':
95 | case 'prepareIndexes':
96 | case 'indexedWordQuery':
97 | case 'searchQuery':
98 | return true;
99 | break;
100 |
101 | // We don't support these yet.
102 | case 'topicsMoved':
103 | case 'topicsRemoved':
104 | case 'postRemoved':
105 | case 'postModified':
106 | case 'postCreated':
107 | return false;
108 | break;
109 |
110 | default:
111 | // All other methods, too bad dunno you.
112 | return false;
113 | return;
114 | }
115 | }
116 |
117 | /**
118 | * The Admin Search Settings calls this in order to define extra API settings.
119 | *
120 | * @access public
121 | * @param array $config_vars All the configuration variables, we have to append or merge these.
122 | */
123 | public static function searchSettings(&$config_vars)
124 | {
125 | global $txt, $scripturl, $context, $settings, $sc, $modSettings;
126 |
127 | loadLanguage('Admin-Sphinx');
128 |
129 | if (isset($_GET['generateConfig']))
130 | generateSphinxConfig();
131 |
132 | $local_config_vars = array(
133 | array('title', 'sphinx_server_config_tittle'),
134 | '' . $txt['sphinx_server_config_note'] . '',
135 | array('text', 'sphinx_data_path', 65, 'default_value' => '/var/sphinx/data', 'subtext' => $txt['sphinx_data_path_subtext']),
136 | array('text', 'sphinx_log_path', 65, 'default_value' => '/var/sphinx/log', 'subtext' => $txt['sphinx_log_path_subtext']),
137 | array('text', 'sphinx_conf_path', 65, 'default_value' => '/etc/sphinxsearch', 'subtext' => $txt['sphinx_conf_path_subtext']),
138 | array('text', 'sphinx_bin_path', 65, 'default_value' => '/usr/bin', 'subtext' => $txt['sphinx_bin_path_subtext']),
139 | array('text', 'sphinx_stopword_path', 65, 'default_value' => '', 'subtext' => $txt['sphinx_stopword_path_subtext']),
140 | array('int', 'sphinx_indexer_mem', 6, 'default_value' => '32', 'subtext' => $txt['sphinx_indexer_mem_subtext'], 'postinput' => $txt['sphinx_indexer_mem_postinput']),
141 | array('int', 'sphinx_indexer_mem', 6, 'default_value' => '32', 'subtext' => $txt['sphinx_indexer_mem_subtext'], 'postinput' => $txt['sphinx_indexer_mem_postinput']),
142 |
143 | // SMF Configuration Settings.
144 | array('title', 'sphinx_smf_sphinx_tittle'),
145 | array('text', 'sphinx_searchd_server', 32, 'default_value' => 'localhost', 'subtext' => $txt['sphinx_searchd_server_subtext']),
146 | array('check', 'sphinx_searchd_bind', 0, 'subtext' => $txt['sphinx_searchd_bind_subtext']),
147 | // This is for the non legacy QL version, which we are not going support at this time.
148 | //array('int', 'sphinx_searchd_port', 6, 'default_value' => '9312', 'subtext' => $txt['sphinx_searchd_port_subtext']),
149 | array('int', 'sphinxql_searchd_port', 6, 'default_value' => '9306', 'subtext' => $txt['sphinxql_searchd_port_subtext']),
150 | array('int', 'sphinx_max_results', 6, 'default_value' => '1000', 'subtext' => $txt['sphinx_max_results_subtext']),
151 |
152 | // Just a hints section.
153 | array('title', 'sphinx_config_hints_title'),
154 | array('callback', 'SMFAction_Sphinx_Hints'),
155 | );
156 |
157 | // Merge them in.
158 | $config_vars = array_merge($config_vars, $local_config_vars);
159 |
160 | $context['post_url'] = $scripturl . '?action=admin;area=modsettings;save;sa=sphinx';
161 | $context['settings_title'] = $txt['sphinx_server_config_tittle'];
162 | $context['sphinx_version'] = self::sphinxversion();
163 |
164 | // Saving?
165 | if (isset($_GET['save']))
166 | {
167 | // Make sure this exists, but just push it with the other changes.
168 | if (!isset($modSettings['sphinx_indexed_msg_until']))
169 | $config_vars[] = array('int', 'sphinx_indexed_msg_until', 'default_value' => 1);
170 |
171 | // We still need a port.
172 | if (empty($_POST['sphinxql_searchd_port']))
173 | $_POST['sphinxql_searchd_port'] = 9306;
174 | }
175 |
176 | // This hacks in some defaults that are needed to generate a proper configuration file.
177 | foreach ($config_vars as $id => $cv)
178 | if (is_array($cv) && isset($cv[1], $cv['default_value']) && !isset($modSettings[$cv[1]]))
179 | $config_vars[$id]['value'] = $cv['default_value'];
180 | }
181 |
182 | /**
183 | * Callback function for usort used to sort the fulltext results.
184 | * the order of sorting is: large words, small words, large words that
185 | * are excluded from the search, small words that are excluded.
186 | *
187 | * @access public
188 | * @param string $a Word A
189 | * @param string $b Word B
190 | * @return int An integer indicating how the words should be sorted
191 | */
192 | public function searchSort($a, $b)
193 | {
194 | global $modSettings, $excludedWords;
195 |
196 | $x = strlen($a) - (in_array($a, $excludedWords) ? 1000 : 0);
197 | $y = strlen($b) - (in_array($b, $excludedWords) ? 1000 : 0);
198 |
199 | return $x < $y ? 1 : ($x > $y ? -1 : 0);
200 | }
201 |
202 | /**
203 | * Callback while preparing indexes for searching
204 | *
205 | * @access public
206 | * @param string $word A word to index
207 | * @param array $wordsSearch Search words
208 | * @param array $wordsExclude Words to exclude
209 | * @param bool $isExcluded Whether the specfied word should be excluded
210 | */
211 | public function prepareIndexes($word, &$wordsSearch, &$wordsExclude, $isExcluded)
212 | {
213 | global $modSettings;
214 |
215 | $subwords = text2words($word, null, false);
216 |
217 | $fulltextWord = count($subwords) === 1 ? $word : '"' . $word . '"';
218 | $wordsSearch['indexed_words'][] = $fulltextWord;
219 | if ($isExcluded)
220 | $wordsExclude[] = $fulltextWord;
221 | }
222 |
223 | /**
224 | * Callback for actually performing the search query
225 | *
226 | * @access public
227 | * @param array $query_params An array of parameters for the query
228 | * @param array $searchWords The words that were searched for
229 | * @param array $excludedIndexWords Indexed words that should be excluded
230 | * @param array $participants
231 | * @param array $searchArray
232 | * @return mixed
233 | */
234 | public function searchQuery($search_params, $search_words, $excluded_words, &$participants, &$search_results)
235 | {
236 | global $user_info, $context, $modSettings;
237 |
238 | // Only request the results if they haven't been cached yet.
239 | $cached_results = cache_get_data('search_results_' . md5($user_info['query_see_board'] . '_' . $context['params']));
240 | if (!is_array($cached_results))
241 | {
242 | // Create an instance of the sphinx client.
243 | $mySphinx = $this->dbfunc_connect();
244 |
245 | // Make sure we have a max results.
246 | if (!isset($modSettings['sphinx_max_results']))
247 | $modSettings['sphinx_max_results'] = '1000';
248 |
249 | // Compile different options for our query
250 | $query = 'SELECT * FROM smf_index';
251 |
252 | // Construct the (binary mode) query.
253 | $where_match = $this->_constructQuery($search_params['search']);
254 | // Nothing to search, return zero results
255 | if (trim($where_match) == '')
256 | return 0;
257 |
258 | if ($search_params['subject_only'])
259 | $where_match = '@subject ' . $where_match;
260 |
261 | $query .= ' WHERE MATCH(\'' . $where_match . '\')';
262 |
263 | // Set the limits based on the search parameters.
264 | $extra_where = array();
265 | if (!empty($search_params['min_msg_id']) || !empty($search_params['max_msg_id']))
266 | $extra_where[] = 'id >= ' . $search_params['min_msg_id'] . ' AND id <=' . (empty($search_params['max_msg_id']) ? (int) $modSettings['maxMsgID'] : $search_params['max_msg_id']);
267 | if (!empty($search_params['topic']))
268 | $extra_where[] = 'id_topic = ' . (int) $search_params['topic'];
269 | if (!empty($search_params['brd']) && is_array($search_params['brd']))
270 | $extra_where[] = 'id_board IN (' . implode(',', $search_params['brd']) . ')';
271 | if (!empty($search_params['memberlist']) && is_array($search_params['memberlist']))
272 | $extra_where[] = 'id_member IN (' . implode(',', $search_params['memberlist']) . ')';
273 |
274 | if (!empty($extra_where) && is_array($extra_where))
275 | $query .= ' AND ' . implode(' AND ', $extra_where);
276 |
277 | // Put together a sort string; besides the main column sort (relevance, id_topic, or num_replies), add secondary sorting based on relevance value (if not the main sort method) and age
278 | $sphinx_sort = ($search_params['sort'] === 'id_msg' ? 'id_topic' : $search_params['sort']) . ' ' . strtoupper($search_params['sort_dir']) . ($search_params['sort'] === 'relevance' ? '' : ', relevance desc') . ', poster_time DESC';
279 | // Grouping by topic id makes it return only one result per topic, so don't set that for in-topic searches
280 | if (empty($search_params['topic']))
281 | $query .= ' GROUP BY id_topic WITHIN GROUP ORDER BY ' . $sphinx_sort;
282 | $query .= ' ORDER BY ' . $sphinx_sort;
283 |
284 | $query .= ' LIMIT 0,' . (int) $modSettings['sphinx_max_results'];
285 |
286 | // Any limitations we need to add?
287 | if (!empty($modSettings['sphinx_max_results']) && (int) $modSettings['sphinx_max_results'] > 0)
288 | $query .= ' OPTION max_matches=' . (int) $modSettings['sphinx_max_results'];
289 |
290 | // Execute the search query.
291 | $request = $this->dbfunc_query($query, $mySphinx);
292 |
293 | // Can a connection to the daemon be made?
294 | if ($request === false)
295 | {
296 | // Just log the error.
297 | if ($this->dbfunc_error($mySphinx))
298 | log_error($this->dbfunc_error($mySphinx));
299 | fatal_lang_error('error_no_search_daemon');
300 | }
301 |
302 | // Get the relevant information from the search results.
303 | $cached_results = array(
304 | 'matches' => array(),
305 | );
306 | $num_rows = $this->dbfunc_num_rows($request);
307 | if ($num_rows != 0)
308 | while($match = $this->dbfunc_fetch_assoc($request))
309 | $cached_results['matches'][$match['id']] = array(
310 | 'id' => $match['id_topic'],
311 | 'relevance' => round($match['relevance'] / 10000, 1) . '%',
312 | 'num_matches' => empty($search_params['topic']) ? $num_rows : 0,
313 | 'matches' => array(),
314 | );
315 | $this->dbfunc_free_result($request);
316 | $this->dbfunc_close($mySphinx);
317 |
318 | $cached_results['total'] = count($cached_results['matches']);
319 | // Store the search results in the cache.
320 | cache_put_data('search_results_' . md5($user_info['query_see_board'] . '_' . $context['params']), $cached_results, 600);
321 | }
322 |
323 | $participants = array();
324 | foreach (array_slice(array_keys($cached_results['matches']), (int) $_REQUEST['start'], $modSettings['search_results_per_page']) as $msgID)
325 | {
326 | $context['topics'][$msgID] = $cached_results['matches'][$msgID];
327 | $participants[$cached_results['matches'][$msgID]['id']] = false;
328 | }
329 |
330 | // Sentences need to be broken up in words for proper highlighting.
331 | $search_results = array();
332 | foreach ($search_words as $orIndex => $words)
333 | $search_results = array_merge($search_results, $search_words[$orIndex]['subject_words']);
334 |
335 | return $cached_results['total'];
336 | }
337 |
338 | /**
339 | * Constructs a binary mode query to pass back to sphinx
340 | *
341 | * @param string $string The user entered query to construct with
342 | * @return string A binary mode query
343 | */
344 | function _constructQuery($string)
345 | {
346 | $keywords = array('include' => array(), 'exclude' => array());
347 |
348 | // Split our search string and return an empty string if no matches
349 | if (!preg_match_all('~ (-?)("[^"]+"|[^" ]+)~', ' ' . $string , $tokens, PREG_SET_ORDER))
350 | return '';
351 |
352 | // First we split our string into included and excluded words and phrases
353 | $or_part = FALSE;
354 | foreach ($tokens as $token)
355 | {
356 | // Strip the quotes off of a phrase
357 | if ($token[2][0] == '"')
358 | {
359 | $token[2] = substr($token[2], 1, -1);
360 | $phrase = TRUE;
361 | }
362 | else
363 | $phrase = FALSE;
364 |
365 | // Prepare this token
366 | $cleanWords = $this->_cleanString($token[2]);
367 |
368 | // Explode the cleanWords again incase the cleaning put more spaces into it
369 | $addWords = $phrase ? array('"' . $cleanWords . '"') : preg_split('~ ~u', $cleanWords, -1, PREG_SPLIT_NO_EMPTY);
370 |
371 | if ($token[1] == '-')
372 | $keywords['exclude'] = array_merge($keywords['exclude'], $addWords);
373 |
374 | // OR'd keywords (we only do this if we have something to OR with)
375 | elseif (($token[2] == 'OR' || $token[2] == '|') && count($keywords['include']))
376 | {
377 | $last = array_pop($keywords['include']);
378 | if (!is_array($last))
379 | $last = array($last);
380 | $keywords['include'][] = $last;
381 | $or_part = TRUE;
382 | continue;
383 | }
384 |
385 | // AND is implied in a Sphinx Search
386 | elseif ($token[2] == 'AND' || $token[2] == '&')
387 | continue;
388 |
389 | // If this part of the query ended up being blank, skip it
390 | elseif (trim($cleanWords) == '')
391 | continue;
392 |
393 | // Must be something they want to search for!
394 | else
395 | {
396 | // If this was part of an OR branch, add it to the proper section
397 | if ($or_part)
398 | $keywords['include'][count($keywords['include']) - 1] = array_merge($keywords['include'][count($keywords['include']) - 1], $addWords);
399 | else
400 | $keywords['include'] = array_merge($keywords['include'], $addWords);
401 | }
402 |
403 | // Start fresh on this...
404 | $or_part = FALSE;
405 | }
406 |
407 | // Let's make sure they're not canceling each other out
408 | if (!count(array_diff($keywords['include'], $keywords['exclude'])))
409 | return '';
410 |
411 | // Now we compile our arrays into a valid search string
412 | $query_parts = array();
413 | foreach ($keywords['include'] as $keyword)
414 | $query_parts[] = is_array($keyword) ? '(' . implode(' | ', $keyword) . ')' : $keyword;
415 |
416 | foreach ($keywords['exclude'] as $keyword)
417 | $query_parts[] = '-' . $keyword;
418 |
419 | return implode(' ', $query_parts);
420 | }
421 |
422 | /**
423 | * Cleans a string of everything but alphanumeric characters
424 | *
425 | * @param string $string A string to clean
426 | * @return string A cleaned up string
427 | */
428 | function _cleanString($string)
429 | {
430 | global $smcFunc;
431 |
432 | // Decode the entities first
433 | $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8');
434 |
435 | // Lowercase string
436 | $string = $smcFunc['strtolower']($string);
437 |
438 | // Fix numbers so they search easier (phone numbers, SSN, dates, etc)
439 | $string = preg_replace('~([[:digit:]]+)\pP+(?=[[:digit:]])~u', '', $string);
440 |
441 | // Last but not least, strip everything out that's not alphanumeric or a underscore.
442 | $string = preg_replace('~[^\pL\pN_]+~u', ' ', $string);
443 |
444 | return $string;
445 | }
446 |
447 | /**
448 | * Sphinx Database Support API: connect
449 | *
450 | * @access private
451 | * @param string $host The sphinx search address, this will default to $modSettings['sphinx_searchd_server'].
452 | * @param string $port The port Sphinx runs on, this will default to $modSettings['sphinxql_searchd_port'].
453 | * @return void
454 | */
455 | private function dbfunc_connect($host = '', $port = '')
456 | {
457 | global $modSettings, $txt;
458 |
459 | // Fill out our host and port if needed.
460 | if (empty($host))
461 | $host = $modSettings['sphinx_searchd_server'] == 'localhost' ? '127.0.0.1' : $modSettings['sphinx_searchd_server'];
462 | if (empty($port))
463 | $port = empty($modSettings['sphinxql_searchd_port']) ? 9306 : (int) $modSettings['sphinxql_searchd_port'];
464 |
465 | if ($this->db_type == 'mysqli')
466 | {
467 | $mySphinx = @mysqli_connect($host, '', '', '', $port);
468 |
469 | // Mysqli is never a resource, but an object.
470 | if (!is_object($mySphinx) || $mySphinx->connect_errno > 0)
471 | {
472 | loadLanguage('Errors');
473 | fatal_error($txt['error_no_search_daemon']);
474 | }
475 | }
476 | else
477 | {
478 | // I tried to do this properly by changing error_reporting, but PHP ignores that. So surpress!
479 | $mySphinx = @mysql_connect($host . ':' . $port);
480 |
481 | if (!is_resource($mySphinx))
482 | {
483 | loadLanguage('Errors');
484 | fatal_error($txt['error_no_search_daemon']);
485 | }
486 | }
487 |
488 | return $mySphinx;
489 | }
490 | /**
491 | * Sphinx Database Support API: query
492 | *
493 | * @access private
494 | * @param string $query The query to run.
495 | * @param resource $mySphinx A SphinxQL connection resource.
496 | * @return void
497 | */
498 | private function dbfunc_query($query, $mySphinx)
499 | {
500 | // MySQLI Procedural Style has the resource first then the query.
501 | if ($this->db_type == 'mysqli')
502 | return mysqli_query($mySphinx, $query);
503 | else
504 | return mysql_query($query, $mySphinx);
505 | }
506 |
507 | /**
508 | * Sphinx Database Support API: num_rows
509 | *
510 | * @access private
511 | * @param resource $mySphinx A SphinxQL request resource.
512 | * @return void
513 | */
514 | private function dbfunc_num_rows($mySphinx)
515 | {
516 | if ($this->db_type == 'mysqli')
517 | return mysqli_num_rows($mySphinx);
518 | else
519 | return mysql_num_rows($mySphinx);
520 | }
521 |
522 | /**
523 | * Sphinx Database Support API: fetch_assoc
524 | *
525 | * @access private
526 | * @param resource $mySphinx A SphinxQL request resource.
527 | * @return void
528 | */
529 | private function dbfunc_fetch_assoc($mySphinx)
530 | {
531 | if ($this->db_type == 'mysqli')
532 | return mysqli_fetch_assoc($mySphinx);
533 | else
534 | return mysql_fetch_assoc($mySphinx);
535 | }
536 |
537 | /**
538 | * Sphinx Database Support API: free_result
539 | *
540 | * @access private
541 | * @param resource $mySphinx A SphinxQL request resource.
542 | * @return void
543 | */
544 | private function dbfunc_free_result($mySphinx)
545 | {
546 | if ($this->db_type == 'mysqli')
547 | return mysqli_free_result($mySphinx);
548 | else
549 | return mysql_free_result($mySphinx);
550 | }
551 |
552 | /**
553 | * Sphinx Database Support API: free_result
554 | *
555 | * @access private
556 | * @param resource $mySphinx A SphinxQL connection resource.
557 | * @return void
558 | */
559 | private function dbfunc_close($mySphinx)
560 | {
561 | if ($this->db_type == 'mysqli')
562 | return mysqli_close($mySphinx);
563 | else
564 | return mysql_close($mySphinx);
565 | }
566 |
567 | /**
568 | * Sphinx Database Support API: error
569 | *
570 | * @access private
571 | * @param resource $mySphinx A SphinxQL connection resource.
572 | * @return void
573 | */
574 | private function dbfunc_error($mySphinx)
575 | {
576 | if ($this->db_type == 'mysqli')
577 | return mysqli_error($mySphinx);
578 | else
579 | return mysql_error($mySphinx);
580 | }
581 |
582 | /**
583 | * Sphinx Version
584 | *
585 | * @access private
586 | * @return decimal The Major + minor version of Sphinx.
587 | */
588 | private static function sphinxversion()
589 | {
590 | global $modSettings;
591 |
592 | if (empty($modSettings['sphinx_bin_path']))
593 | $modSettings['sphinx_bin_path'] = '/usr/bin';
594 |
595 | if (!file_exists(realpath($modSettings['sphinx_bin_path'] . '/indexer')))
596 | return;
597 |
598 | $binary = realpath($modSettings['sphinx_bin_path'] . '/indexer');
599 |
600 | $raw_version = shell_exec($binary . ' -v');
601 |
602 | if (empty($raw_version))
603 | return;
604 |
605 | preg_match('~Sphinx (\d+)\.(\d+)~i', $raw_version, $m);
606 |
607 | // No version?
608 | if (empty($m) || empty($m[1]) || empty($m[2]))
609 | return;
610 |
611 | return $m[1] . '.' . $m[2];
612 | }
613 | }
614 |
615 | /**
616 | * Callback to a template from our admin search settings page.
617 | * This is used to generate hints and links to generate the Sphinx
618 | * configuration file.
619 | *
620 | * @access public
621 | */
622 | function template_callback_SMFAction_Sphinx_Hints()
623 | {
624 | global $db_type, $scripturl, $txt, $modSettings;
625 |
626 | if (!isset($modSettings['sphinx_data_path'], $modSettings['sphinx_log_path']))
627 | {
628 | echo '
629 |
630 | ', $txt['sphinx_config_hints_save'], '';
631 |
632 | return;
633 | }
634 |
635 | // Ensure these exist.
636 | if (empty($modSettings['sphinx_conf_path']))
637 | $modSettings['sphinx_conf_path'] = '/etc/sphinxsearch';
638 | if (empty($modSettings['sphinx_bin_path']))
639 | $modSettings['sphinx_bin_path'] = '/usr/bin';
640 |
641 | echo '
642 |
643 | ', $txt['search_weights'], '
644 | [', $txt['sphinx_view_config'], ' | ', $txt['sphinx_download_config'], '] (', $txt['sphinx_config_hints_save'], ')
645 | ';
646 |
647 |
648 | $message = '
649 | ' . sprintf($txt['sphinx_config_hints_desc'], $modSettings['sphinx_data_path']) . '[pre]mkdir -p ' . $modSettings['sphinx_data_path'] . '
650 | mkdir -p ' . $modSettings['sphinx_log_path'] . '
651 | chmod a+w ' . $modSettings['sphinx_data_path'] . '
652 | chmod a+w ' . $modSettings['sphinx_log_path'] . '[/pre]';
653 |
654 | // Add a extra step for postgresql.
655 | if ($db_type == 'postgresql')
656 | $message .= '
657 | [hr]
658 | ' . $txt['sphinx_config_hints_pgsql_func'] . '
659 | [code]
660 | CREATE FUNCTION update_settings(var TEXT, val INT) RETURNS VOID AS $$
661 | BEGIN
662 | LOOP
663 | -- first try to update the key
664 | UPDATE PREFIX_settings SET value = val WHERE variable = var;
665 | IF found THEN
666 | RETURN;
667 | END IF;
668 | -- not there so try to insert the key
669 | BEGIN
670 | INSERT INTO PREFIX_settings(variable,value) VALUES (var,val);
671 | RETURN;
672 | EXCEPTION WHEN unique_violation THEN
673 | -- do nothing, loop again to try the UPDATE
674 | END;
675 | END LOOP;
676 | END;
677 | $$
678 | LANGUAGE plpgsql;[/code]';
679 |
680 | $message .= '
681 | [hr]
682 | ' . $txt['sphinx_config_hints_index_start'] . '[pre]' . $modSettings['sphinx_bin_path'] . '/indexer --config ' . $modSettings['sphinx_conf_path'] . '/sphinx.conf --all
683 | ' . $modSettings['sphinx_bin_path'] . '/searchd --config ' . $modSettings['sphinx_conf_path'] . '/sphinx.conf[/pre]
684 | ' . $txt['sphinx_config_hints_index_finish'] . '
685 | [hr]
686 | ' . $txt['sphinx_config_hints_cron_start'] . '[pre]# search indexer
687 | 10 3 * * * ' . $modSettings['sphinx_bin_path'] . '/indexer --config ' . $modSettings['sphinx_conf_path'] . '/sphinx.conf --rotate smf_base_index
688 | 0 * * * * ' . $modSettings['sphinx_bin_path'] . '/indexer --config ' . $modSettings['sphinx_conf_path'] . '/sphinx.conf --rotate smf_delta_index[/pre]';
689 |
690 | // Print out our message.
691 | echo parse_bbc($message);
692 |
693 | echo '
694 | ';
695 |
696 | }
697 |
698 | // This is the sphinx configuration file.
699 | /**
700 | * The Sphinx generated configuration file. We perform some checks and
701 | * calculation and then issue a download with the appropriate setup.
702 | *
703 | * @access public
704 | */
705 | function generateSphinxConfig()
706 | {
707 | global $context, $db_server, $db_name, $db_user, $db_passwd, $db_prefix;
708 | global $db_type, $db_character_set, $modSettings;
709 |
710 | $weight_factors = array(
711 | 'age',
712 | 'length',
713 | 'first_message',
714 | 'sticky',
715 | );
716 | $weight = array();
717 | $weight_total = 0;
718 | foreach ($weight_factors as $weight_factor)
719 | {
720 | $weight[$weight_factor] = empty($modSettings['search_weight_' . $weight_factor]) ? 0 : (int) $modSettings['search_weight_' . $weight_factor];
721 | $weight_total += $weight[$weight_factor];
722 | }
723 |
724 | if ($weight_total === 0)
725 | {
726 | $weight = array(
727 | 'age' => 25,
728 | 'length' => 25,
729 | 'first_message' => 25,
730 | 'sticky' => 25,
731 | );
732 | $weight_total = 100;
733 | }
734 |
735 | if ($db_type == 'postgresq')
736 | $supported_db_type = 'pgsql';
737 | else
738 | $supported_db_type = 'mysql';
739 |
740 | $host = $modSettings['sphinx_searchd_server'] == 'localhost' ? '127.0.0.1' : $modSettings['sphinx_searchd_server'];
741 |
742 | // Lets fall out of SMF templating and start the headers to serve a file.
743 | ob_end_clean();
744 | ob_start();
745 |
746 | // Send the attachment headers.
747 | header('Pragma: ');
748 | if (!$context['browser']['is_gecko'])
749 | header('Content-Transfer-Encoding: binary');
750 | header('Expires: ' . gmdate('D, d M Y H:i:s', time() + 525600 * 60) . ' GMT');
751 | header('Last-Modified: ' . gmdate('D, d M Y H:i:s', time()) . ' GMT');
752 | header('Accept-Ranges: bytes');
753 | header('Connection: close');
754 | header('ETag: ' . sha1('sphinx.conf' + time()));
755 |
756 | if (isset($_GET['view']))
757 | header('Content-Type: text/plain');
758 | else
759 | {
760 | header('Content-Type: ' . ($context['browser']['is_ie'] || $context['browser']['is_opera'] ? 'application/octetstream' : 'application/octet-stream'));
761 | header('Content-Disposition: attachment; filename="sphinx.conf"');
762 | }
763 |
764 | header('Cache-Control: max-age=' . (525600 * 60) . ', private');
765 |
766 | // At this point, we are generating the configuration file.
767 | echo '#
768 | # Sphinx configuration file (sphinx.conf), configured for SMF 2.0
769 | #
770 | # By default the location of this file would probably be:
771 | # ' . $modSettings['sphinx_conf_path'] . '/sphinx.conf
772 |
773 | source smf_source
774 | {
775 | type = ', $supported_db_type, '
776 | sql_host = ', $db_server, '
777 | sql_user = ', $db_user, '
778 | sql_pass = ', $db_passwd, '
779 | sql_db = ', $db_name, '
780 | sql_port = 3306', empty($db_character_set) ? '' : '
781 | sql_query_pre = SET NAMES ' . $db_character_set;
782 |
783 | // Thanks to TheStupidOne for pgsql queries.
784 | if ($db_type == 'pgsql')
785 | echo '
786 | sql_query_pre = \
787 | SELECT update_settings(\'sphinx_indexed_msg_until\', (SELECT MAX(id_msg) FROM PREFIX_messages))';
788 | else
789 | echo '
790 | sql_query_pre = \
791 | REPLACE INTO ', $db_prefix, 'settings (variable, value) \
792 | SELECT \'sphinx_indexed_msg_until\', MAX(id_msg) \
793 | FROM ', $db_prefix, 'messages';
794 |
795 | echo '
796 | sql_query_range = \
797 | SELECT 1, value \
798 | FROM ', $db_prefix, 'settings \
799 | WHERE variable = \'sphinx_indexed_msg_until\'
800 | sql_range_step = 1000';
801 |
802 | // Thanks to TheStupidOne for pgsql queries.
803 | if ($db_type == 'pgsql')
804 | echo '
805 | sql_query = \
806 | SELECT \
807 | m.id_msg, m.id_topic, m.id_board, CASE WHEN m.id_member = 0 THEN 4294967295 ELSE m.id_member END AS id_member, m.poster_time, m.body, m.subject, \
808 | t.num_replies + 1 AS num_replies, CEILING(1000000 * ( \
809 | CASE WHEN m.id_msg < 0.7 * cast(s.value as INT) THEN 0 ELSE (m.id_msg - 0.7 * cast(s.value as INT)) / (0.3 * cast(s.value as INT)) END * ' . $weight['age'] . ' + \
810 | CASE WHEN t.num_replies < 200 THEN t.num_replies / 200 ELSE 1 END * ' . $weight['length'] . ' + \
811 | CASE WHEN m.id_msg = t.id_first_msg THEN 1 ELSE 0 END * ' . $weight['first_message'] . ' + \
812 | CASE WHEN t.is_sticky = 0 THEN 0 ELSE 1 END * ' . $weight['sticky'] . ' \
813 | ) / ' . $weight_total . ') AS relevance \
814 | FROM ', $db_prefix, 'messages AS m, ', $db_prefix, 'topics AS t, ', $db_prefix, 'settings AS s \
815 | WHERE t.id_topic = m.id_topic \
816 | AND s.variable = \'maxMsgID\' \
817 | AND m.id_msg BETWEEN $start AND $end';
818 | else
819 | echo '
820 | sql_query = \
821 | SELECT \
822 | m.id_msg, m.id_topic, m.id_board, IF(m.id_member = 0, 4294967295, m.id_member) AS id_member, m.poster_time, m.body, m.subject, \
823 | t.num_replies + 1 AS num_replies, CEILING(1000000 * ( \
824 | IF(m.id_msg < 0.7 * s.value, 0, (m.id_msg - 0.7 * s.value) / (0.3 * s.value)) * ' . $weight['age'] . ' + \
825 | IF(t.num_replies < 200, t.num_replies / 200, 1) * ' . $weight['length'] . ' + \
826 | IF(m.id_msg = t.id_first_msg, 1, 0) * ' . $weight['first_message'] . ' + \
827 | IF(t.is_sticky = 0, 0, 1) * ' . $weight['sticky'] . ' \
828 | ) / ' . $weight_total . ') AS relevance \
829 | FROM ', $db_prefix, 'messages AS m, ', $db_prefix, 'topics AS t, ', $db_prefix, 'settings AS s \
830 | WHERE t.id_topic = m.id_topic \
831 | AND s.variable = \'maxMsgID\' \
832 | AND m.id_msg BETWEEN $start AND $end';
833 |
834 | echo '
835 | sql_attr_uint = id_topic
836 | sql_attr_uint = id_board
837 | sql_attr_uint = id_member';
838 |
839 | // Sphinx 3.0 dropped sql_attr_timestamp, but sql_attr_uint should be compatible.
840 | if (version_compare($context['sphinx_version'], '3.0', '>'))
841 | echo '
842 | sql_attr_timestamp = poster_time
843 | sql_attr_timestamp = relevance
844 | sql_attr_timestamp = num_replies';
845 | else
846 | echo '
847 | sql_attr_uint = poster_time
848 | sql_attr_uint = relevance
849 | sql_attr_uint = num_replies';
850 |
851 | echo '
852 | }
853 |
854 | source smf_delta_source : smf_source
855 | {
856 | sql_query_pre = ', isset($db_character_set) ? 'SET NAMES ' . $db_character_set : '', '
857 | sql_query_range = \
858 | SELECT s1.value, s2.value \
859 | FROM ', $db_prefix, 'settings AS s1, ', $db_prefix, 'settings AS s2 \
860 | WHERE s1.variable = \'sphinx_indexed_msg_until\' \
861 | AND s2.variable = \'maxMsgID\'
862 | }
863 |
864 | index smf_base_index
865 | {
866 | html_strip = 1
867 | source = smf_source
868 | path = ', $modSettings['sphinx_data_path'], '/smf_sphinx_base.index', empty($modSettings['sphinx_stopword_path']) ? '' : '
869 | stopwords = ' . $modSettings['sphinx_stopword_path'], '
870 | min_word_len = 2
871 | charset_table = 0..9, A..Z->a..z, _, a..z
872 | }
873 |
874 | index smf_delta_index : smf_base_index
875 | {
876 | source = smf_delta_source
877 | path = ', $modSettings['sphinx_data_path'], '/smf_sphinx_delta.index
878 | }
879 |
880 | index smf_index
881 | {
882 | type = distributed
883 | local = smf_base_index
884 | local = smf_delta_index
885 | }
886 |
887 | indexer
888 | {
889 | mem_limit = ', (int) $modSettings['sphinx_indexer_mem'], 'M
890 | }
891 |
892 | searchd
893 | {';
894 |
895 | // This is for the non legacy QL version, which we are not going support at this time.
896 | // listen = ', (int) $modSettings['sphinx_searchd_port'], '
897 |
898 | echo '
899 | listen = ', !empty($modSettings['sphinx_searchd_bind']) ? $host : '0.0.0.0', ':', (empty($modSettings['sphinxql_searchd_port']) ? 9306 : (int) $modSettings['sphinxql_searchd_port']), ':mysql41
900 | log = ', $modSettings['sphinx_log_path'], '/searchd.log
901 | query_log = ', $modSettings['sphinx_log_path'], '/query.log
902 | read_timeout = 5
903 | max_children = 30
904 | pid_file = ', $modSettings['sphinx_data_path'], '/searchd.pid
905 | binlog_path = ', $modSettings['sphinx_data_path'], '
906 | }';
907 |
908 | die;
909 | }
910 |
--------------------------------------------------------------------------------
/SMF 2.1/SearchAPI-Manticore.php:
--------------------------------------------------------------------------------
1 | supported_databases))
58 | {
59 | $this->is_supported = false;
60 | return;
61 | }
62 | }
63 |
64 | /**
65 | * Check whether the search can be performed by this API.
66 | *
67 | * @access public
68 | * @param string $methodName The method we would like to use.
69 | * @param mixed $query_params The query parameters used for advanced or more defined support checking.
70 | * @return bool true or false whether this is supported.
71 | */
72 | public function supportsMethod($methodName, $query_params = null)
73 | {
74 | switch ($methodName)
75 | {
76 | case 'searchSort':
77 | case 'prepareIndexes':
78 | case 'indexedWordQuery':
79 | case 'searchQuery':
80 | case 'isValid':
81 | return true;
82 | break;
83 |
84 | // We don't support these yet.
85 | case 'topicsMoved':
86 | case 'topicsRemoved':
87 | case 'postRemoved':
88 | case 'postModified':
89 | case 'postCreated':
90 | return false;
91 | break;
92 |
93 | default:
94 | // All other methods, too bad dunno you.
95 | return false;
96 | return false;
97 | }
98 | }
99 |
100 | public function isValid()
101 | {
102 | return true;
103 | }
104 |
105 | /**
106 | * The Admin Search Settings calls this in order to define extra API settings.
107 | *
108 | * @access public
109 | * @param array $config_vars All the configuration variables, we have to append or merge these.
110 | */
111 | public static function searchSettings(&$config_vars)
112 | {
113 | global $txt, $scripturl, $context, $settings, $sc, $modSettings;
114 |
115 | loadLanguage('Admin-Manticore');
116 |
117 | if (isset($_GET['generateConfig']))
118 | generateManticoreConfig();
119 |
120 | $local_config_vars = array(
121 | array('title', 'manticore_server_config_tittle'),
122 | '
' . $txt['manticore_server_config_note'] . '',
123 | array('text', 'manticore_index_name', 65, 'default_value' => 'smf', 'subtext' => $txt['manticore_index_name_subtext']),
124 | array('text', 'manticore_data_path', 65, 'default_value' => '/var/lib/manticore/data', 'subtext' => $txt['manticore_data_path_subtext']),
125 | array('text', 'manticore_log_path', 65, 'default_value' => '/var/log/manticore', 'subtext' => $txt['manticore_log_path_subtext']),
126 | array('text', 'manticore_conf_path', 65, 'default_value' => '/etc/manticoresearch', 'subtext' => $txt['manticore_conf_path_subtext']),
127 | array('text', 'manticore_pid_path', 65, 'default_value' => '/var/run/manticore', 'subtext' => $txt['manticore_pid_path_subtext']),
128 | array('text', 'manticore_bin_path', 65, 'default_value' => '/usr/bin', 'subtext' => $txt['manticore_bin_path_subtext']),
129 | array('text', 'manticore_stopword_path', 65, 'default_value' => '', 'subtext' => $txt['manticore_stopword_path_subtext']),
130 | array('int', 'manticore_indexer_mem', 6, 'default_value' => '32', 'subtext' => $txt['manticore_indexer_mem_subtext'], 'postinput' => $txt['manticore_indexer_mem_postinput']),
131 | array('int', 'manticore_indexer_mem', 6, 'default_value' => '32', 'subtext' => $txt['manticore_indexer_mem_subtext'], 'postinput' => $txt['manticore_indexer_mem_postinput']),
132 |
133 | // SMF Configuration Settings.
134 | array('title', 'manticore_smf_manticore_tittle'),
135 | array('text', 'manticore_searchd_server', 32, 'default_value' => 'localhost', 'subtext' => $txt['manticore_searchd_server_subtext']),
136 | array('check', 'manticore_searchd_bind', 0, 'subtext' => $txt['manticore_searchd_bind_subtext']),
137 | array('int', 'manticore_searchd_port', 6, 'default_value' => '9306', 'subtext' => $txt['manticore_searchd_port_subtext']),
138 | array('int', 'manticore_version', 6, 'default_value' => '3.0', 'subtext' => $txt['manticore_version_subtext']),
139 | array('int', 'manticore_max_results', 6, 'default_value' => '1000', 'subtext' => $txt['manticore_max_results_subtext']),
140 |
141 | // Just a hints section.
142 | array('title', 'manticore_config_hints_title'),
143 | array('callback', 'SMFAction_Manticore_Hints'),
144 | );
145 |
146 | // Merge them in.
147 | $config_vars = array_merge($config_vars, $local_config_vars);
148 |
149 | $context['post_url'] = $scripturl . '?action=admin;area=modsettings;save;sa=manticore';
150 | $context['settings_title'] = $txt['manticore_server_config_tittle'];
151 | $context['manticore_version'] = self::manticoreversion();
152 |
153 | // Try to fall back.
154 | if (empty($context['manticore_version']) && !empty($context['manticore_version']))
155 | $context['manticore_version'] = $modSettings['manticore_version'];
156 | else if (!empty($context['manticore_version']) && empty($context['manticore_version']))
157 | $modSettings['manticore_version'] = $context['manticore_version'];
158 | else
159 | $context['sphinx_version'] = '4.2.0';
160 |
161 | // Saving?
162 | if (isset($_GET['save']))
163 | {
164 | // Make sure this exists, but just push it with the other changes.
165 | if (!isset($modSettings['manticore_indexed_msg_until']))
166 | $config_vars[] = array('int', 'manticore_indexed_msg_until', 'default_value' => 1);
167 |
168 | // We still need a port.
169 | if (empty($_POST['manticore_searchd_port']))
170 | $_POST['manticore_searchd_port'] = 9306;
171 | }
172 |
173 | // This hacks in some defaults that are needed to generate a proper configuration file.
174 | foreach ($config_vars as $id => $cv)
175 | if (is_array($cv) && isset($cv[1], $cv['default_value']) && !isset($modSettings[$cv[1]]))
176 | $config_vars[$id]['value'] = $cv['default_value'];
177 | }
178 |
179 | /**
180 | * Callback function for usort used to sort the fulltext results.
181 | * the order of sorting is: large words, small words, large words that
182 | * are excluded from the search, small words that are excluded.
183 | *
184 | * @access public
185 | * @param string $a Word A
186 | * @param string $b Word B
187 | * @return int An integer indicating how the words should be sorted
188 | */
189 | public function searchSort($a, $b)
190 | {
191 | global $modSettings, $excludedWords;
192 |
193 | $x = strlen($a) - (in_array($a, $excludedWords) ? 1000 : 0);
194 | $y = strlen($b) - (in_array($b, $excludedWords) ? 1000 : 0);
195 |
196 | return $x < $y ? 1 : ($x > $y ? -1 : 0);
197 | }
198 |
199 | /**
200 | * Callback while preparing indexes for searching
201 | *
202 | * @access public
203 | * @param string $word A word to index
204 | * @param array $wordsSearch Search words
205 | * @param array $wordsExclude Words to exclude
206 | * @param bool $isExcluded Whether the specfied word should be excluded
207 | */
208 | public function prepareIndexes($word, array &$wordsSearch, array &$wordsExclude, $isExcluded)
209 | {
210 | global $modSettings;
211 |
212 | $subwords = text2words($word, null, false);
213 |
214 | $fulltextWord = count($subwords) === 1 ? $word : '"' . $word . '"';
215 | $wordsSearch['indexed_words'][] = $fulltextWord;
216 | if ($isExcluded)
217 | $wordsExclude[] = $fulltextWord;
218 | }
219 |
220 | /**
221 | * Callback for actually performing the search query
222 | *
223 | * @access public
224 | * @param array $query_params An array of parameters for the query
225 | * @param array $searchWords The words that were searched for
226 | * @param array $excludedIndexWords Indexed words that should be excluded
227 | * @param array $participants - Only used if we have enabled participation.
228 | * @param array $searchArray - Builds $context['key_words'] used for highlighting
229 | * @return mixed
230 | - Both $participants and $searchArray are updated by reference
231 | - $context['topics'] is populated with a id_msg => array(
232 | 'id' => id_topic
233 | 'relevance' => round(relevance / 10000, 1) . '%',
234 | 'num_matches' => A topic is specififed (ie, searching one topic only) ? $num_rows : 0,
235 | 'matches' => array(),
236 | ),
237 | */
238 | public function searchQuery(array $query_params, array $searchWords, array $excludedIndexWords, array &$participants, array &$searchArray)
239 | {
240 | global $user_info, $context, $modSettings;
241 |
242 | // Only request the results if they haven't been cached yet.
243 | $cached_results = cache_get_data('Xsearch_results_' . md5($user_info['query_see_board'] . '_' . $context['params']));
244 | if (!is_array($cached_results))
245 | {
246 | // Create an instance of the manticore client.
247 | $myManticore = $this->dbfunc_connect();
248 |
249 | // Make sure we have a max results.
250 | if (!isset($modSettings['manticore_max_results']))
251 | $modSettings['manticore_max_results'] = '1000';
252 |
253 | // Compile different options for our query
254 | $query = 'SELECT * FROM ' . self::indexName() . '_index';
255 |
256 | // Construct the (binary mode) query.
257 | $where_match = $this->_constructQuery($query_params['search']);
258 | // Nothing to search, return zero results
259 | if (trim($where_match) == '')
260 | return 0;
261 |
262 | if ($query_params['subject_only'])
263 | $where_match = '@subject ' . $where_match;
264 |
265 | $query .= ' WHERE MATCH(\'' . $where_match . '\')';
266 |
267 | // Set the limits based on the search parameters.
268 | $extra_where = array();
269 | if (!empty($query_params['min_msg_id']) || !empty($query_params['max_msg_id']))
270 | $extra_where[] = 'id >= ' . $query_params['min_msg_id'] . ' AND id <=' . (empty($query_params['max_msg_id']) ? (int) $modSettings['maxMsgID'] : $query_params['max_msg_id']);
271 | if (!empty($query_params['topic']))
272 | $extra_where[] = 'id_topic = ' . (int) $query_params['topic'];
273 | if (!empty($query_params['brd']) && is_array($query_params['brd']))
274 | $extra_where[] = 'id_board IN (' . implode(',', $query_params['brd']) . ')';
275 | if (!empty($query_params['memberlist']) && is_array($query_params['memberlist']))
276 | $extra_where[] = 'id_member IN (' . implode(',', $query_params['memberlist']) . ')';
277 |
278 | if (!empty($extra_where) && is_array($extra_where))
279 | $query .= ' AND ' . implode(' AND ', $extra_where);
280 |
281 | // Put together a sort string; besides the main column sort (relevance, id_topic, or num_replies), add secondary sorting based on relevance value (if not the main sort method) and age
282 | $manticore_sort = ($query_params['sort'] === 'id_msg' ? 'id_topic' : $query_params['sort']) . ' ' . strtoupper($query_params['sort_dir']) . ($query_params['sort'] === 'relevance' ? '' : ', relevance desc') . ', poster_time DESC';
283 | // Grouping by topic id makes it return only one result per topic, so don't set that for in-topic searches
284 | if (empty($query_params['topic']))
285 | $query .= ' GROUP BY id_topic WITHIN GROUP ORDER BY ' . $manticore_sort;
286 | $query .= ' ORDER BY ' . $manticore_sort;
287 |
288 | $query .= ' LIMIT 0,' . (int) $modSettings['manticore_max_results'];
289 |
290 | // Any limitations we need to add?
291 | if (!empty($modSettings['manticore_max_results']) && (int) $modSettings['manticore_max_results'] > 0)
292 | $query .= ' OPTION max_matches=' . (int) $modSettings['manticore_max_results'];
293 |
294 | // Execute the search query.
295 | $request = $this->dbfunc_query($query, $myManticore);
296 |
297 | // Can a connection to the daemon be made?
298 | if ($request === false)
299 | {
300 | // Just log the error.
301 | if ($this->dbfunc_error($myManticore))
302 | log_error($this->dbfunc_error($myManticore));
303 | fatal_lang_error('error_no_search_daemon');
304 | }
305 |
306 | // Get the relevant information from the search results.
307 | $cached_results = array(
308 | 'matches' => array(),
309 | );
310 | $num_rows = $this->dbfunc_num_rows($request);
311 | if ($num_rows != 0)
312 | while($match = $this->dbfunc_fetch_assoc($request))
313 | $cached_results['matches'][$match['id']] = array(
314 | 'id' => $match['id_topic'],
315 | 'relevance' => round($match['relevance'] / 10000, 1) . '%',
316 | 'num_matches' => empty($query_params['topic']) ? $num_rows : 0,
317 | 'matches' => array(),
318 | );
319 | $this->dbfunc_free_result($request);
320 | $this->dbfunc_close($myManticore);
321 |
322 | $cached_results['total'] = count($cached_results['matches']);
323 |
324 | // Store the search results in the cache.
325 | cache_put_data('search_results_' . md5($user_info['query_see_board'] . '_' . $context['params']), $cached_results, 600);
326 | }
327 |
328 | $participants = array();
329 | foreach (array_slice(array_keys($cached_results['matches']), (int) $_REQUEST['start'], $modSettings['search_results_per_page']) as $msgID)
330 | {
331 | $context['topics'][$msgID] = $cached_results['matches'][$msgID];
332 | $participants[$cached_results['matches'][$msgID]['id']] = false;
333 | }
334 |
335 | // Sentences need to be broken up in words for proper highlighting.
336 | $searchArray = array();
337 | foreach ($searchWords as $orIndex => $words)
338 | $searchArray = array_merge($searchArray, $searchWords[$orIndex]['subject_words']);
339 |
340 | // Work around SMF bug causing multiple pages to not work right.
341 | if (!isset($_SESSION['search_cache']['num_results']))
342 | $_SESSION['search_cache'] = [
343 | 'num_results' => $cached_results['total']
344 | ];
345 |
346 | return $cached_results['total'];
347 | }
348 |
349 | /**
350 | * Constructs a binary mode query to pass back to manticore
351 | *
352 | * @param string $string The user entered query to construct with
353 | * @return string A binary mode query
354 | */
355 | private function _constructQuery($string)
356 | {
357 | $keywords = array('include' => array(), 'exclude' => array());
358 |
359 | // Split our search string and return an empty string if no matches
360 | if (!preg_match_all('~ (-?)("[^"]+"|[^" ]+)~', ' ' . $string , $tokens, PREG_SET_ORDER))
361 | return '';
362 |
363 | // First we split our string into included and excluded words and phrases
364 | $or_part = FALSE;
365 | foreach ($tokens as $token)
366 | {
367 | // Strip the quotes off of a phrase
368 | if ($token[2][0] == '"')
369 | {
370 | $token[2] = substr($token[2], 1, -1);
371 | $phrase = TRUE;
372 | }
373 | else
374 | $phrase = FALSE;
375 |
376 | // Prepare this token
377 | $cleanWords = $this->_cleanString($token[2]);
378 |
379 | // Explode the cleanWords again incase the cleaning put more spaces into it
380 | $addWords = $phrase ? array('"' . $cleanWords . '"') : preg_split('~ ~u', $cleanWords, -1, PREG_SPLIT_NO_EMPTY);
381 |
382 | if ($token[1] == '-')
383 | $keywords['exclude'] = array_merge($keywords['exclude'], $addWords);
384 |
385 | // OR'd keywords (we only do this if we have something to OR with)
386 | elseif (($token[2] == 'OR' || $token[2] == '|') && count($keywords['include']))
387 | {
388 | $last = array_pop($keywords['include']);
389 | if (!is_array($last))
390 | $last = array($last);
391 | $keywords['include'][] = $last;
392 | $or_part = TRUE;
393 | continue;
394 | }
395 |
396 | // AND is implied in a Manticore Search
397 | elseif ($token[2] == 'AND' || $token[2] == '&')
398 | continue;
399 |
400 | // If this part of the query ended up being blank, skip it
401 | elseif (trim($cleanWords) == '')
402 | continue;
403 |
404 | // Must be something they want to search for!
405 | else
406 | {
407 | // If this was part of an OR branch, add it to the proper section
408 | if ($or_part)
409 | $keywords['include'][count($keywords['include']) - 1] = array_merge($keywords['include'][count($keywords['include']) - 1], $addWords);
410 | else
411 | $keywords['include'] = array_merge($keywords['include'], $addWords);
412 | }
413 |
414 | // Start fresh on this...
415 | $or_part = FALSE;
416 | }
417 |
418 | // Let's make sure they're not canceling each other out
419 | if (!count(array_diff($keywords['include'], $keywords['exclude'])))
420 | return '';
421 |
422 | // Now we compile our arrays into a valid search string
423 | $query_parts = array();
424 | foreach ($keywords['include'] as $keyword)
425 | $query_parts[] = is_array($keyword) ? '(' . implode(' | ', $keyword) . ')' : $keyword;
426 |
427 | foreach ($keywords['exclude'] as $keyword)
428 | $query_parts[] = '-' . $keyword;
429 |
430 | return implode(' ', $query_parts);
431 | }
432 |
433 | /**
434 | * Cleans a string of everything but alphanumeric characters
435 | *
436 | * @param string $string A string to clean
437 | * @return string A cleaned up string
438 | */
439 | private function _cleanString($string)
440 | {
441 | global $smcFunc;
442 |
443 | // Decode the entities first
444 | $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8');
445 |
446 | // Lowercase string
447 | $string = $smcFunc['strtolower']($string);
448 |
449 | // Fix numbers so they search easier (phone numbers, SSN, dates, etc)
450 | $string = preg_replace('~([[:digit:]]+)\pP+(?=[[:digit:]])~u', '', $string);
451 |
452 | // Last but not least, strip everything out that's not alphanumeric or a underscore.
453 | $string = preg_replace('~[^\pL\pN_]+~u', ' ', $string);
454 |
455 | return $string;
456 | }
457 |
458 | /**
459 | * Callback when a post is created
460 | * @see createPost()
461 | *
462 | * @access public
463 | * @param array $msgOptions An array of post data
464 | * @param array $topicOptions An array of topic data
465 | * @param array $posterOptions An array of info about the person who made this post
466 | * @return void
467 | */
468 | public function postCreated(array &$msgOptions, array &$topicOptions, array &$posterOptions)
469 | {
470 | return true;
471 | // !! Manticore Does support updating the search index from its Plain Text index.
472 | // !! Manticore for SMF does not support this at this time. Code is provided
473 | // !! here as examples/testing purposes.
474 |
475 | global $smcFunc, $modSettings;
476 |
477 | // Create an instance of the manticore client.
478 | $myManticore = $this->dbfunc_connect();
479 |
480 | // Figure out our weights.
481 | $weight_factors = array(
482 | 'age',
483 | 'length',
484 | 'first_message',
485 | 'sticky',
486 | );
487 | $weight = array();
488 | $weight_total = 0;
489 | foreach ($weight_factors as $weight_factor)
490 | {
491 | $weight[$weight_factor] = empty($modSettings['search_weight_' . $weight_factor]) ? 0 : (int) $modSettings['search_weight_' . $weight_factor];
492 | $weight_total += $weight[$weight_factor];
493 | }
494 | if ($weight_total === 0)
495 | {
496 | $weight = array(
497 | 'age' => 25,
498 | 'length' => 25,
499 | 'first_message' => 25,
500 | 'sticky' => 25,
501 | );
502 | $weight_total = 100;
503 | }
504 |
505 | // The data was inserted at this point, lets get some data as the passed variables don't contain all we need.
506 | $request = $smcFunc['db_query']('', '
507 | SELECT
508 | m.id_msg, m.id_topic, m.id_board, IF(m.id_member = 0, 4294967295, m.id_member) AS id_member, m.poster_time, m.body, m.subject,
509 | t.num_replies + 1 AS num_replies,
510 | CEILING(1000000 * (
511 | IF(m.id_msg < 0.7 * s.value, 0, (m.id_msg - 0.7 * s.value) / (0.3 * s.value)) * {int:weight_age} +
512 | IF(t.num_replies < 200, t.num_replies / 200, 1) * {int:weight_length} +
513 | IF(m.id_msg = t.id_first_msg, 1, 0) * {int:weight_first_msg} +
514 | IF(t.is_sticky = 0, 0, 1) * {int:weight_sticky}
515 | ) / {int:weight_total) AS relevance
516 | FROM {db_prefix}messages AS m
517 | INNER JOIN {db_prefix}topics AS t ON (t.id_topic = m.id_topic)
518 | WHERE m.id_msg = {int:newMessage}',
519 | array(
520 | 'newMessage' => $msgOptions['id'],
521 | 'weight_age' => $weight['age'],
522 | 'weight_length' => $weight['length'],
523 | 'weight_first_msg' => $weight['first_message'],
524 | 'weight_sticky' => $weight['sticky'],
525 | 'weight_total' => $weight_total,
526 | )
527 | );
528 | $tempMessage = $smcFunc['db_fetch_assoc']($request);
529 | $smcFunc['db_free_result']($request);
530 |
531 | $insertValues = array(
532 | 'id_msg' => $tempMessage['id_msg'],
533 | 'id_topic' => $tempMessage['id_topic'],
534 | 'id_board' => $tempMessage['id_board'],
535 | 'id_member' => $tempMessage['id_member'],
536 | 'poster_time' => $tempMessage['poster_time'],
537 | 'body' => '"' . $tempMessage['body'] . '"',
538 | 'subject' => '"' . $tempMessage['subject'] . '"',
539 | 'num_replies' => $tempMessage['num_replies'],
540 | 'relevance' => $tempMessage['relevance'],
541 | );
542 |
543 | // The insert query, use replace to make sure we don't get duplicates.
544 | $query = '
545 | REPLACE INTO ' . self::indexName() . '_index (' . implode(', ', array_keys($insertValues)) . ')
546 | VALUES (' . implode(', ', array_values($insertValues)) . ')';
547 |
548 | // Execute the search query.
549 | $request = $this->dbfunc_query($query, $myManticore);
550 |
551 | // Can a connection to the daemon be made?
552 | if ($request === false)
553 | {
554 | // Just log the error.
555 | if ($this->dbfunc_error($myManticore))
556 | log_error($this->dbfunc_error($myManticore));
557 |
558 | // Silently bail out, We can let the reindex cron take care of fixing this.
559 | return true;
560 | }
561 |
562 | return true;
563 | }
564 |
565 | /**
566 | * Callback when a post is modified
567 | * @see modifyPost()
568 | *
569 | * @access public
570 | * @param array $msgOptions An array of post data
571 | * @param array $topicOptions An array of topic data
572 | * @param array $posterOptions An array of info about the person who made this post
573 | * @return void
574 | */
575 | public function postModified(array &$msgOptions, array &$topicOptions, array &$posterOptions)
576 | {
577 | return true;
578 | // !! Manticore Does support updating the search index from its Plain Text index.
579 | // !! Manticore for SMF does not support this at this time. Code is provided
580 | // !! here as examples/testing purposes.
581 |
582 | // Just call the postCreated as it does a replace.
583 | $this->postCreated($msgOptions, $topicOptions, $posterOptions);
584 | }
585 |
586 | /**
587 | * Callback when a post is removed, not recycled.
588 | *
589 | * @access public
590 | * @param int $id_msg The ID of the post that was removed
591 | * @return void
592 | */
593 | public function postRemoved($id_msg)
594 | {
595 | return true;
596 | // !! Manticore Does support updating the search index from its Plain Text index.
597 | // !! Manticore for SMF does not support this at this time. Code is provided
598 | // !! here as examples/testing purposes.
599 |
600 | global $smcFunc, $modSettings;
601 |
602 | // Create an instance of the manticore client.
603 | $myManticore = $this->dbfunc_connect();
604 |
605 | // SMF only calls this search API when we delete, not recycle. So this will always be a remove.
606 | $query = '
607 | DELETE FROM ' . self::indexName() . '_index
608 | WHERE id_msg = ' . $id_msg;
609 |
610 | // Execute the search query.
611 | $request = $this->dbfunc_query($query, $myManticore);
612 |
613 | // Can a connection to the daemon be made?
614 | if ($request === false)
615 | {
616 | // Just log the error.
617 | if ($this->dbfunc_error($myManticore))
618 | log_error($this->dbfunc_error($myManticore));
619 |
620 | // Silently bail out, We can let the reindex cron take care of fixing this.
621 | return true;
622 | }
623 |
624 | return true;
625 | }
626 |
627 | /**
628 | * Callback when a topic is removed
629 | *
630 | * @access public
631 | * @param array $topics The ID(s) of the removed topic(s)
632 | * @return void
633 | */
634 | public function topicsRemoved(array $topics)
635 | {
636 | return true;
637 | // !! Manticore Does support updating the search index from its Plain Text index.
638 | // !! Manticore for SMF does not support this at this time. Code is provided
639 | // !! here as examples/testing purposes.
640 |
641 | global $smcFunc, $modSettings;
642 |
643 | // Create an instance of the manticore client.
644 | $myManticore = $this->dbfunc_connect();
645 |
646 | // SMF only calls this search API when we delete, not recycle. So this will always be a remove.
647 | $query = '
648 | DELETE FROM ' . self::indexName() . '_index
649 | WHERE id_topic IN (' . implode(', ', $topics) . ')';
650 |
651 | // Execute the search query.
652 | $request = $this->dbfunc_query($query, $myManticore);
653 |
654 | // Can a connection to the daemon be made?
655 | if ($request === false)
656 | {
657 | // Just log the error.
658 | if ($this->dbfunc_error($myManticore))
659 | log_error($this->dbfunc_error($myManticore));
660 |
661 | // Silently bail out, We can let the reindex cron take care of fixing this.
662 | return true;
663 | }
664 |
665 | return true;
666 | }
667 |
668 | /**
669 | * Callback when a topic is moved
670 | *
671 | * @access public
672 | * @param array $topics The ID(s) of the moved topic(s)
673 | * @param int $board_to The board that the topics were moved to
674 | * @return void
675 | */
676 | public function topicsMoved(array $topics, $board_to)
677 | {
678 | return true;
679 | // !! Manticore Does support updating the search index from its Plain Text index.
680 | // !! Manticore for SMF does not support this at this time. Code is provided
681 | // !! here as examples/testing purposes.
682 |
683 | global $smcFunc, $modSettings;
684 |
685 | // Create an instance of the manticore client.
686 | $myManticore = $this->dbfunc_connect();
687 |
688 | // SMF only calls this search API when we delete, not recycle. So this will always be a remove.
689 | $query = '
690 | UPDATE ' . self::indexName() . '_index
691 | SET id_board = ' . $board_to . '
692 | WHERE id_topic IN (' . implode(', ', $topics) . ')';
693 |
694 | // Execute the search query.
695 | $request = $this->dbfunc_query($query, $myManticore);
696 |
697 | // Can a connection to the daemon be made?
698 | if ($request === false)
699 | {
700 | // Just log the error.
701 | if ($this->dbfunc_error($myManticore))
702 | log_error($this->dbfunc_error($myManticore));
703 |
704 | // Silently bail out, We can let the reindex cron take care of fixing this.
705 | return true;
706 | }
707 |
708 | return true;
709 | }
710 |
711 | /**
712 | * Manticore Database Support API: connect
713 | *
714 | * @access private
715 | * @param string $host The manticore search address, this will default to $modSettings['manticore_searchd_server'].
716 | * @param string $port The port Manticore runs on, this will default to $modSettings['manticore_searchd_port'].
717 | * @return resource
718 | */
719 | private function dbfunc_connect(string $host = '', string $port = '')
720 | {
721 | global $modSettings, $txt;
722 |
723 | // Fill out our host and port if needed.
724 | if (empty($host))
725 | $host = $modSettings['manticore_searchd_server'] == 'localhost' ? '127.0.0.1' : $modSettings['manticore_searchd_server'];
726 | if (empty($port))
727 | $port = empty($modSettings['manticore_searchd_port']) ? 9306 : (int) $modSettings['manticore_searchd_port'];
728 |
729 | $myManticore = @mysqli_connect($host, '', '', '', $port);
730 |
731 | // Mysqli is never a resource, but an object.
732 | if (!is_object($myManticore) || $myManticore->connect_errno > 0)
733 | {
734 | loadLanguage('Errors');
735 | fatal_error($txt['error_no_search_daemon']);
736 | }
737 |
738 | return $myManticore;
739 | }
740 | /**
741 | * Manticore Database Support API: query
742 | *
743 | * @access private
744 | * @param string $query The query to run.
745 | * @param resource $myManticore A Manticore connection resource.
746 | * @return resource
747 | */
748 | private function dbfunc_query(string $query, $myManticore)
749 | {
750 | return mysqli_query($myManticore, $query);
751 | }
752 |
753 | /**
754 | * Manticore Database Support API: num_rows
755 | *
756 | * @access private
757 | * @param resource $myManticore A Manticore request resource.
758 | * @return int|string
759 | */
760 | private function dbfunc_num_rows($myManticore)
761 | {
762 | return mysqli_num_rows($myManticore);
763 | }
764 |
765 | /**
766 | * Manticore Database Support API: fetch_assoc
767 | *
768 | * @access private
769 | * @param resource $myManticore A Manticore request resource.
770 | * @return array
771 | */
772 | private function dbfunc_fetch_assoc($myManticore)
773 | {
774 | return mysqli_fetch_assoc($myManticore);
775 | }
776 |
777 | /**
778 | * Manticore Database Support API: free_result
779 | *
780 | * @access private
781 | * @param resource $myManticore A Manticore request resource.
782 | * @return void
783 | */
784 | private function dbfunc_free_result($myManticore)
785 | {
786 | return mysqli_free_result($myManticore);
787 | }
788 |
789 | /**
790 | * Manticore Database Support API: free_result
791 | *
792 | * @access private
793 | * @param resource $myManticore A Manticore connection resource.
794 | * @return bool
795 | */
796 | private function dbfunc_close($myManticore)
797 | {
798 | return mysqli_close($myManticore);
799 | }
800 |
801 | /**
802 | * Manticore Database Support API: error
803 | *
804 | * @access private
805 | * @param resource $myManticore A Manticore connection resource.
806 | * @return string
807 | */
808 | private function dbfunc_error($myManticore)
809 | {
810 | return mysqli_error($myManticore);
811 | }
812 |
813 | /**
814 | * Manticore Version
815 | *
816 | * @access private
817 | * @return decimal The Major + minor version of Manticore.
818 | */
819 | private static function manticoreversion()
820 | {
821 | global $modSettings;
822 |
823 | if (empty($modSettings['manticore_bin_path']))
824 | $modSettings['manticore_bin_path'] = '/usr/bin';
825 |
826 | // Try to safely check for the indexer file, but do this in a way we can catch the error so PHP doesn't output it.
827 | try {
828 | set_error_handler(static function ($severity, $message, $file, $line) {
829 | throw new \ErrorException($message, 0, $severity, $file, $line);
830 | });
831 |
832 | if (!file_exists(realpath($modSettings['sphinx_bin_path'] . '/indexer')))
833 | return;
834 | } catch (\Throwable $e) {
835 | return;
836 | } finally {
837 | restore_error_handler();
838 | }
839 |
840 | $binary = realpath($modSettings['manticore_bin_path'] . '/indexer');
841 |
842 | $raw_version = shell_exec($binary . ' -v');
843 |
844 | if (empty($raw_version))
845 | return;
846 |
847 | preg_match('~Manticore (\d+)\.(\d+)~i', $raw_version, $m);
848 |
849 | // No version?
850 | if (empty($m) || empty($m[1]) || empty($m[2]))
851 | return;
852 |
853 | return $m[1] . '.' . $m[2];
854 | }
855 |
856 | /**
857 | * Index name
858 | *
859 | * @access private
860 | * @return string The name of the idnex.
861 | */
862 | private static function indexName()
863 | {
864 | global $modSettings;
865 | return !empty($modSettings['manticore_index_name']) ? $modSettings['manticore_index_name'] : 'smf';
866 | }
867 | }
868 |
869 | /**
870 | * Callback to a template from our admin search settings page.
871 | * This is used to generate hints and links to generate the Manticore
872 | * configuration file.
873 | *
874 | * @access public
875 | */
876 | function template_callback_SMFAction_Manticore_Hints()
877 | {
878 | global $db_type, $scripturl, $txt, $modSettings;
879 |
880 | if (!isset($modSettings['manticore_data_path'], $modSettings['manticore_log_path']))
881 | {
882 | echo '
883 |
884 | ', $txt['manticore_config_hints_save'], '';
885 |
886 | return;
887 | }
888 |
889 | // Ensure these exist.
890 | $index_name = !empty($modSettings['sphinx_index_name']) ? $modSettings['sphinx_index_name'] : 'smf';
891 | if (empty($modSettings['manticore_conf_path']))
892 | $modSettings['manticore_conf_path'] = '/etc/manticoresearch';
893 | if (empty($modSettings['manticore_bin_path']))
894 | $modSettings['manticore_bin_path'] = '/usr/bin';
895 |
896 | echo '
897 |
898 | ', $txt['search_weights'], '
899 | [', $txt['manticore_view_config'], ' | ', $txt['manticore_download_config'], '] (', $txt['manticore_config_hints_save'], ')
900 | ';
901 |
902 | $message = '
903 | ' . sprintf($txt['manticore_config_hints_desc'], $modSettings['manticore_data_path']) . '[pre]mkdir -p ' . $modSettings['manticore_data_path'] . '
904 | mkdir -p ' . $modSettings['manticore_log_path'] . '
905 | chmod a+w ' . $modSettings['manticore_data_path'] . '
906 | chmod a+w ' . $modSettings['manticore_log_path'] . '[/pre]';
907 |
908 | // Add a extra step for postgresql.
909 | if ($db_type == 'postgresql')
910 | $message .= '
911 | [hr]
912 | ' . $txt['manticore_config_hints_pgsql_func'] . '
913 | [code]
914 | CREATE FUNCTION update_settings(var TEXT, val INT) RETURNS VOID AS $$
915 | BEGIN
916 | LOOP
917 | -- first try to update the key
918 | UPDATE PREFIX_settings SET value = val WHERE variable = var;
919 | IF found THEN
920 | RETURN;
921 | END IF;
922 | -- not there so try to insert the key
923 | BEGIN
924 | INSERT INTO PREFIX_settings(variable,value) VALUES (var,val);
925 | RETURN;
926 | EXCEPTION WHEN unique_violation THEN
927 | -- do nothing, loop again to try the UPDATE
928 | END;
929 | END LOOP;
930 | END;
931 | $$
932 | LANGUAGE plpgsql;[/code]';
933 |
934 | $message .= '
935 | [hr]
936 | ' . $txt['manticore_config_hints_index_start'] . '[pre]sudo -u manticore ' . $modSettings['manticore_bin_path'] . '/indexer --config ' . $modSettings['manticore_conf_path'] . '/manticore.conf --all
937 | sudo -u manticore ' . $modSettings['manticore_bin_path'] . '/searchd --config ' . $modSettings['manticore_conf_path'] . '/manticore.conf[/pre]
938 | ' . $txt['manticore_config_hints_index_finish'] . '
939 | [hr]
940 | ' . $txt['manticore_config_hints_cron_start'] . '[pre]# search indexer
941 | 10 3 * * * ' . $modSettings['manticore_bin_path'] . '/indexer --config ' . $modSettings['manticore_conf_path'] . '/manticore.conf --rotate ' . $index_name . '_base_index
942 | 0 * * * * ' . $modSettings['manticore_bin_path'] . '/indexer --config ' . $modSettings['manticore_conf_path'] . '/manticore.conf --rotate ' . $index_name . '_delta_index[/pre]';
943 |
944 | // Print out our message.
945 | echo parse_bbc($message);
946 |
947 | echo '
948 | ';
949 | }
950 |
951 | // This is the manticore configuration file.
952 | /**
953 | * The Manticore generated configuration file. We perform some checks and
954 | * calculation and then issue a download with the appropriate setup.
955 | *
956 | * @access public
957 | */
958 | function generateManticoreConfig()
959 | {
960 | global $context, $db_server, $db_name, $db_user, $db_passwd, $db_prefix;
961 | global $db_type, $db_character_set, $modSettings;
962 |
963 | $weight_factors = array(
964 | 'age',
965 | 'length',
966 | 'first_message',
967 | 'sticky',
968 | );
969 | $weight = array();
970 | $weight_total = 0;
971 | foreach ($weight_factors as $weight_factor)
972 | {
973 | $weight[$weight_factor] = empty($modSettings['search_weight_' . $weight_factor]) ? 0 : (int) $modSettings['search_weight_' . $weight_factor];
974 | $weight_total += $weight[$weight_factor];
975 | }
976 |
977 | if ($weight_total === 0)
978 | {
979 | $weight = array(
980 | 'age' => 25,
981 | 'length' => 25,
982 | 'first_message' => 25,
983 | 'sticky' => 25,
984 | );
985 | $weight_total = 100;
986 | }
987 |
988 | if ($db_type == 'postgresq')
989 | $supported_db_type = 'pgsql';
990 | else
991 | $supported_db_type = 'mysql';
992 |
993 | $host = $modSettings['manticore_searchd_server'] == 'localhost' ? '127.0.0.1' : $modSettings['manticore_searchd_server'];
994 | $index_name = !empty($modSettings['manticore_index_name']) ? $modSettings['manticore_index_name'] : 'smf';
995 |
996 | // Lets fall out of SMF templating and start the headers to serve a file.
997 | ob_end_clean();
998 | ob_start();
999 |
1000 | // Send the attachment headers.
1001 | header('Pragma: ');
1002 | if (!$context['browser']['is_gecko'])
1003 | header('Content-Transfer-Encoding: binary');
1004 | header('Expires: ' . gmdate('D, d M Y H:i:s', time() + 525600 * 60) . ' GMT');
1005 | header('Last-Modified: ' . gmdate('D, d M Y H:i:s', time()) . ' GMT');
1006 | header('Accept-Ranges: bytes');
1007 | header('Connection: close');
1008 | header('ETag: ' . sha1('manticore.conf' . time()));
1009 |
1010 | if (isset($_GET['view']))
1011 | header('Content-Type: text/plain');
1012 | else
1013 | {
1014 | header('Content-Type: ' . ($context['browser']['is_ie'] || $context['browser']['is_opera'] ? 'application/octetstream' : 'application/octet-stream'));
1015 | header('Content-Disposition: attachment; filename="manticore.conf"');
1016 | }
1017 |
1018 | header('Cache-Control: max-age=' . (525600 * 60) . ', private');
1019 |
1020 | // At this point, we are generating the configuration file.
1021 | echo '#
1022 | # Manticore configuration file (manticore.conf), configured for SMF 2.1
1023 | #
1024 | # By default the location of this file would probably be:
1025 | # ' . (empty($modSettings['manticore_conf_path']) ? '/etc/manticoresearch' : $modSettings['manticore_conf_path']) . '/manticore.conf
1026 |
1027 | source ' . $index_name . '_source
1028 | {
1029 | type = ', $supported_db_type, '
1030 | sql_host = ', $db_server, '
1031 | sql_user = ', $db_user, '
1032 | sql_pass = ', $db_passwd, '
1033 | sql_db = ', $db_name, '
1034 | sql_port = 3306', empty($db_character_set) ? '' : '
1035 | sql_query_pre = SET NAMES ' . $db_character_set;
1036 |
1037 | // Thanks to TheStupidOne for pgsql queries.
1038 | if ($db_type == 'pgsql')
1039 | echo '
1040 | sql_query_pre = \
1041 | SELECT update_settings(\'manticore_indexed_msg_until\', (SELECT MAX(id_msg) FROM PREFIX_messages))';
1042 | else
1043 | echo '
1044 | sql_query_pre = \
1045 | REPLACE INTO ', $db_prefix, 'settings (variable, value) \
1046 | SELECT \'manticore_indexed_msg_until\', MAX(id_msg) \
1047 | FROM ', $db_prefix, 'messages';
1048 |
1049 | echo '
1050 | sql_query_range = \
1051 | SELECT 1, value \
1052 | FROM ', $db_prefix, 'settings \
1053 | WHERE variable = \'manticore_indexed_msg_until\'
1054 | sql_range_step = 1000';
1055 |
1056 | // Thanks to TheStupidOne for pgsql queries.
1057 | if ($db_type == 'pgsql')
1058 | echo '
1059 | sql_query = \
1060 | SELECT \
1061 | m.id_msg, m.id_topic, m.id_board, CASE WHEN m.id_member = 0 THEN 4294967295 ELSE m.id_member END AS id_member, m.poster_time, m.body, m.subject, \
1062 | t.num_replies + 1 AS num_replies, CEILING(1000000 * ( \
1063 | CASE WHEN m.id_msg < 0.7 * cast(s.value as INT) THEN 0 ELSE (m.id_msg - 0.7 * cast(s.value as INT)) / (0.3 * cast(s.value as INT)) END * ' . $weight['age'] . ' + \
1064 | CASE WHEN t.num_replies < 200 THEN t.num_replies / 200 ELSE 1 END * ' . $weight['length'] . ' + \
1065 | CASE WHEN m.id_msg = t.id_first_msg THEN 1 ELSE 0 END * ' . $weight['first_message'] . ' + \
1066 | CASE WHEN t.is_sticky = 0 THEN 0 ELSE 1 END * ' . $weight['sticky'] . ' \
1067 | ) / ' . $weight_total . ') AS relevance \
1068 | FROM ', $db_prefix, 'messages AS m, ', $db_prefix, 'topics AS t, ', $db_prefix, 'settings AS s \
1069 | WHERE t.id_topic = m.id_topic \
1070 | AND s.variable = \'maxMsgID\' \
1071 | AND m.id_msg BETWEEN $start AND $end';
1072 | else
1073 | echo '
1074 | sql_query = \
1075 | SELECT \
1076 | m.id_msg, m.id_topic, m.id_board, IF(m.id_member = 0, 4294967295, m.id_member) AS id_member, m.poster_time, m.body, m.subject, \
1077 | t.num_replies + 1 AS num_replies, CEILING(1000000 * ( \
1078 | IF(m.id_msg < 0.7 * s.value, 0, (m.id_msg - 0.7 * s.value) / (0.3 * s.value)) * ' . $weight['age'] . ' + \
1079 | IF(t.num_replies < 200, t.num_replies / 200, 1) * ' . $weight['length'] . ' + \
1080 | IF(m.id_msg = t.id_first_msg, 1, 0) * ' . $weight['first_message'] . ' + \
1081 | IF(t.is_sticky = 0, 0, 1) * ' . $weight['sticky'] . ' \
1082 | ) / ' . $weight_total . ') AS relevance \
1083 | FROM ', $db_prefix, 'messages AS m, ', $db_prefix, 'topics AS t, ', $db_prefix, 'settings AS s \
1084 | WHERE t.id_topic = m.id_topic \
1085 | AND s.variable = \'maxMsgID\' \
1086 | AND m.id_msg BETWEEN $start AND $end';
1087 |
1088 | echo '
1089 | sql_attr_uint = id_topic
1090 | sql_attr_uint = id_board
1091 | sql_attr_uint = id_member
1092 | sql_attr_timestamp = poster_time
1093 | sql_attr_timestamp = relevance
1094 | sql_attr_timestamp = num_replies';
1095 |
1096 | echo '
1097 | }
1098 |
1099 | source ' . $index_name . '_delta_source : ' . $index_name . '_source
1100 | {
1101 | sql_query_pre = ', isset($db_character_set) ? 'SET NAMES ' . $db_character_set : '', '
1102 | sql_query_range = \
1103 | SELECT s1.value, s2.value \
1104 | FROM ', $db_prefix, 'settings AS s1, ', $db_prefix, 'settings AS s2 \
1105 | WHERE s1.variable = \'manticore_indexed_msg_until\' \
1106 | AND s2.variable = \'maxMsgID\'
1107 | }
1108 |
1109 | index ' . $index_name . '_base_index
1110 | {
1111 | type = plain
1112 | html_strip = 1
1113 | source = ' . $index_name . '_source
1114 | path = ', $modSettings['manticore_data_path'], '/' . $index_name . '_manticore_base.index', empty($modSettings['manticore_stopword_path']) ? '' : '
1115 | stopwords = ' . $modSettings['manticore_stopword_path'], '
1116 | min_word_len = 2
1117 | charset_table = 0..9, A..Z->a..z, _, a..z
1118 | }
1119 |
1120 | index ' . $index_name . '_delta_index : ' . $index_name . '_base_index
1121 | {
1122 | type = plain
1123 | source = ' . $index_name . '_delta_source
1124 | path = ', $modSettings['manticore_data_path'], '/' . $index_name . '_manticore_delta.index
1125 | }
1126 |
1127 | index ' . $index_name . '_index
1128 | {
1129 | type = distributed
1130 | local = ' . $index_name . '_base_index
1131 | local = ' . $index_name . '_delta_index
1132 | }
1133 |
1134 | indexer
1135 | {
1136 | mem_limit = ', (int) $modSettings['manticore_indexer_mem'], 'M
1137 | }
1138 |
1139 | searchd
1140 | {
1141 | listen = ', !empty($modSettings['manticore_searchd_bind']) ? $host : '0.0.0.0', ':', (empty($modSettings['manticore_searchd_port']) ? 9306 : (int) $modSettings['manticore_searchd_port']), ':mysql41
1142 | log = ', $modSettings['manticore_log_path'], '/searchd.log
1143 | query_log = ', $modSettings['manticore_log_path'], '/query.log
1144 | network_timeout = 5
1145 | pid_file = ', $modSettings['manticore_data_path'], '/searchd.pid
1146 | binlog_path = ', $modSettings['manticore_data_path'], '
1147 | }';
1148 |
1149 | die;
1150 | }
1151 |
--------------------------------------------------------------------------------
/SMF 2.1/SearchAPI-Sphinxql.php:
--------------------------------------------------------------------------------
1 | supported_databases))
65 | {
66 | $this->is_supported = false;
67 | return;
68 | }
69 |
70 | // We sorta support mysqli at this point.
71 | if ($db_type == 'mysqli' || (function_exists('mysqli_connect') && !function_exists('mysql_connect')))
72 | $this->db_type = 'mysqli';
73 | }
74 |
75 | /**
76 | * Check whether the search can be performed by this API.
77 | *
78 | * @access public
79 | * @param string $methodName The method we would like to use.
80 | * @param mixed $query_params The query parameters used for advanced or more defined support checking.
81 | * @return bool true or false whether this is supported.
82 | */
83 | public function supportsMethod($methodName, $query_params = null)
84 | {
85 | switch ($methodName)
86 | {
87 | case 'searchSort':
88 | case 'prepareIndexes':
89 | case 'indexedWordQuery':
90 | case 'searchQuery':
91 | case 'isValid':
92 | return true;
93 | break;
94 |
95 | // We don't support these yet.
96 | case 'topicsMoved':
97 | case 'topicsRemoved':
98 | case 'postRemoved':
99 | case 'postModified':
100 | case 'postCreated':
101 | return false;
102 | break;
103 |
104 | default:
105 | // All other methods, too bad dunno you.
106 | return false;
107 | return false;
108 | }
109 | }
110 |
111 | public function isValid()
112 | {
113 | return true;
114 | }
115 |
116 | /**
117 | * The Admin Search Settings calls this in order to define extra API settings.
118 | *
119 | * @access public
120 | * @param array $config_vars All the configuration variables, we have to append or merge these.
121 | */
122 | public static function searchSettings(&$config_vars)
123 | {
124 | global $txt, $scripturl, $context, $settings, $sc, $modSettings;
125 |
126 | loadLanguage('Admin-Sphinx');
127 |
128 | if (isset($_GET['generateConfig']))
129 | generateSphinxConfig();
130 |
131 | $local_config_vars = array(
132 | array('title', 'sphinx_server_config_tittle'),
133 | '
' . $txt['sphinx_server_config_note'] . '',
134 | array('text', 'sphinx_index_name', 65, 'default_value' => 'smf', 'subtext' => $txt['sphinx_index_name_subtext']),
135 | array('text', 'sphinx_data_path', 65, 'default_value' => '/var/sphinx/data', 'subtext' => $txt['sphinx_data_path_subtext']),
136 | array('text', 'sphinx_log_path', 65, 'default_value' => '/var/sphinx/log', 'subtext' => $txt['sphinx_log_path_subtext']),
137 | array('text', 'sphinx_conf_path', 65, 'default_value' => '/etc/sphinxsearch', 'subtext' => $txt['sphinx_conf_path_subtext']),
138 | array('text', 'sphinx_bin_path', 65, 'default_value' => '/usr/bin', 'subtext' => $txt['sphinx_bin_path_subtext']),
139 | array('text', 'sphinx_stopword_path', 65, 'default_value' => '', 'subtext' => $txt['sphinx_stopword_path_subtext']),
140 | array('int', 'sphinx_indexer_mem', 6, 'default_value' => '32', 'subtext' => $txt['sphinx_indexer_mem_subtext'], 'postinput' => $txt['sphinx_indexer_mem_postinput']),
141 | array('int', 'sphinx_indexer_mem', 6, 'default_value' => '32', 'subtext' => $txt['sphinx_indexer_mem_subtext'], 'postinput' => $txt['sphinx_indexer_mem_postinput']),
142 |
143 | // SMF Configuration Settings.
144 | array('title', 'sphinx_smf_sphinx_tittle'),
145 | array('text', 'sphinx_searchd_server', 32, 'default_value' => 'localhost', 'subtext' => $txt['sphinx_searchd_server_subtext']),
146 | array('check', 'sphinx_searchd_bind', 0, 'subtext' => $txt['sphinx_searchd_bind_subtext']),
147 | array('int', 'sphinxql_searchd_port', 6, 'default_value' => '9306', 'subtext' => $txt['sphinxql_searchd_port_subtext']),
148 | array('int', 'sphinx_version', 6, 'default_value' => '3.0', 'subtext' => $txt['sphinx_version_subtext']),
149 | array('int', 'sphinx_max_results', 6, 'default_value' => '1000', 'subtext' => $txt['sphinx_max_results_subtext']),
150 |
151 | // Just a hints section.
152 | array('title', 'sphinx_config_hints_title'),
153 | array('callback', 'SMFAction_Sphinx_Hints'),
154 | );
155 |
156 | // Merge them in.
157 | $config_vars = array_merge($config_vars, $local_config_vars);
158 |
159 | $context['post_url'] = $scripturl . '?action=admin;area=modsettings;save;sa=sphinx';
160 | $context['settings_title'] = $txt['sphinx_server_config_tittle'];
161 | $context['sphinx_version'] = self::sphinxversion();
162 |
163 | // Try to fall back.
164 | if (empty($context['sphinx_version']) && !empty($context['sphinx_version']))
165 | $context['sphinx_version'] = $modSettings['sphinx_version'];
166 | else if (!empty($context['sphinx_version']) && empty($context['sphinx_version']))
167 | $modSettings['sphinx_version'] = $context['sphinx_version'];
168 | else
169 | $context['sphinx_version'] = '3.0';
170 |
171 | // Saving?
172 | if (isset($_GET['save']))
173 | {
174 | // Make sure this exists, but just push it with the other changes.
175 | if (!isset($modSettings['sphinx_indexed_msg_until']))
176 | $config_vars[] = array('int', 'sphinx_indexed_msg_until', 'default_value' => 1);
177 |
178 | // We still need a port.
179 | if (empty($_POST['sphinxql_searchd_port']))
180 | $_POST['sphinxql_searchd_port'] = 9306;
181 | }
182 |
183 | // This hacks in some defaults that are needed to generate a proper configuration file.
184 | foreach ($config_vars as $id => $cv)
185 | if (is_array($cv) && isset($cv[1], $cv['default_value']) && !isset($modSettings[$cv[1]]))
186 | $config_vars[$id]['value'] = $cv['default_value'];
187 | }
188 |
189 | /**
190 | * Callback function for usort used to sort the fulltext results.
191 | * the order of sorting is: large words, small words, large words that
192 | * are excluded from the search, small words that are excluded.
193 | *
194 | * @access public
195 | * @param string $a Word A
196 | * @param string $b Word B
197 | * @return int An integer indicating how the words should be sorted
198 | */
199 | public function searchSort($a, $b)
200 | {
201 | global $modSettings, $excludedWords;
202 |
203 | $x = strlen($a) - (in_array($a, $excludedWords) ? 1000 : 0);
204 | $y = strlen($b) - (in_array($b, $excludedWords) ? 1000 : 0);
205 |
206 | return $x < $y ? 1 : ($x > $y ? -1 : 0);
207 | }
208 |
209 | /**
210 | * Callback while preparing indexes for searching
211 | *
212 | * @access public
213 | * @param string $word A word to index
214 | * @param array $wordsSearch Search words
215 | * @param array $wordsExclude Words to exclude
216 | * @param bool $isExcluded Whether the specfied word should be excluded
217 | */
218 | public function prepareIndexes($word, array &$wordsSearch, array &$wordsExclude, $isExcluded)
219 | {
220 | global $modSettings;
221 |
222 | $subwords = text2words($word, null, false);
223 |
224 | $fulltextWord = count($subwords) === 1 ? $word : '"' . $word . '"';
225 | $wordsSearch['indexed_words'][] = $fulltextWord;
226 | if ($isExcluded)
227 | $wordsExclude[] = $fulltextWord;
228 | }
229 |
230 | /**
231 | * Callback for actually performing the search query
232 | *
233 | * @access public
234 | * @param array $query_params An array of parameters for the query
235 | * @param array $searchWords The words that were searched for
236 | * @param array $excludedIndexWords Indexed words that should be excluded
237 | * @param array $participants - Only used if we have enabled participation.
238 | * @param array $searchArray - Builds $context['key_words'] used for highlighting
239 | * @return mixed
240 | - Both $participants and $searchArray are updated by reference
241 | - $context['topics'] is populated with a id_msg => array(
242 | 'id' => id_topic
243 | 'relevance' => round(relevance / 10000, 1) . '%',
244 | 'num_matches' => A topic is specififed (ie, searching one topic only) ? $num_rows : 0,
245 | 'matches' => array(),
246 | ),
247 | */
248 | public function searchQuery(array $query_params, array $searchWords, array $excludedIndexWords, array &$participants, array &$searchArray)
249 | {
250 | global $user_info, $context, $modSettings;
251 |
252 | // Only request the results if they haven't been cached yet.
253 | $cached_results = cache_get_data('Xsearch_results_' . md5($user_info['query_see_board'] . '_' . $context['params']));
254 | if (!is_array($cached_results))
255 | {
256 | // Create an instance of the sphinx client.
257 | $mySphinx = $this->dbfunc_connect();
258 |
259 | // Make sure we have a max results.
260 | if (!isset($modSettings['sphinx_max_results']))
261 | $modSettings['sphinx_max_results'] = '1000';
262 |
263 | // Compile different options for our query
264 | $query = 'SELECT * FROM ' . self::indexName() . '_index';
265 |
266 | // Construct the (binary mode) query.
267 | $where_match = $this->_constructQuery($query_params['search']);
268 | // Nothing to search, return zero results
269 | if (trim($where_match) == '')
270 | return 0;
271 |
272 | if ($query_params['subject_only'])
273 | $where_match = '@subject ' . $where_match;
274 |
275 | $query .= ' WHERE MATCH(\'' . $where_match . '\')';
276 |
277 | // Set the limits based on the search parameters.
278 | $extra_where = array();
279 | if (!empty($query_params['min_msg_id']) || !empty($query_params['max_msg_id']))
280 | $extra_where[] = 'id >= ' . $query_params['min_msg_id'] . ' AND id <=' . (empty($query_params['max_msg_id']) ? (int) $modSettings['maxMsgID'] : $query_params['max_msg_id']);
281 | if (!empty($query_params['topic']))
282 | $extra_where[] = 'id_topic = ' . (int) $query_params['topic'];
283 | if (!empty($query_params['brd']) && is_array($query_params['brd']))
284 | $extra_where[] = 'id_board IN (' . implode(',', $query_params['brd']) . ')';
285 | if (!empty($query_params['memberlist']) && is_array($query_params['memberlist']))
286 | $extra_where[] = 'id_member IN (' . implode(',', $query_params['memberlist']) . ')';
287 |
288 | if (!empty($extra_where) && is_array($extra_where))
289 | $query .= ' AND ' . implode(' AND ', $extra_where);
290 |
291 | // Put together a sort string; besides the main column sort (relevance, id_topic, or num_replies), add secondary sorting based on relevance value (if not the main sort method) and age
292 | $sphinx_sort = ($query_params['sort'] === 'id_msg' ? 'id_topic' : $query_params['sort']) . ' ' . strtoupper($query_params['sort_dir']) . ($query_params['sort'] === 'relevance' ? '' : ', relevance desc') . ', poster_time DESC';
293 | // Grouping by topic id makes it return only one result per topic, so don't set that for in-topic searches
294 | if (empty($query_params['topic']))
295 | $query .= ' GROUP BY id_topic WITHIN GROUP ORDER BY ' . $sphinx_sort;
296 | $query .= ' ORDER BY ' . $sphinx_sort;
297 |
298 | $query .= ' LIMIT 0,' . (int) $modSettings['sphinx_max_results'];
299 |
300 | // Any limitations we need to add?
301 | if (!empty($modSettings['sphinx_max_results']) && (int) $modSettings['sphinx_max_results'] > 0)
302 | $query .= ' OPTION max_matches=' . (int) $modSettings['sphinx_max_results'];
303 |
304 | // Execute the search query.
305 | $request = $this->dbfunc_query($query, $mySphinx);
306 |
307 | // Can a connection to the daemon be made?
308 | if ($request === false)
309 | {
310 | // Just log the error.
311 | if ($this->dbfunc_error($mySphinx))
312 | log_error($this->dbfunc_error($mySphinx));
313 | fatal_lang_error('error_no_search_daemon');
314 | }
315 |
316 | // Get the relevant information from the search results.
317 | $cached_results = array(
318 | 'matches' => array(),
319 | );
320 | $num_rows = $this->dbfunc_num_rows($request);
321 | if ($num_rows != 0)
322 | while($match = $this->dbfunc_fetch_assoc($request))
323 | $cached_results['matches'][$match['id']] = array(
324 | 'id' => $match['id_topic'],
325 | 'relevance' => round($match['relevance'] / 10000, 1) . '%',
326 | 'num_matches' => empty($query_params['topic']) ? $num_rows : 0,
327 | 'matches' => array(),
328 | );
329 | $this->dbfunc_free_result($request);
330 | $this->dbfunc_close($mySphinx);
331 |
332 | $cached_results['total'] = count($cached_results['matches']);
333 |
334 | // Store the search results in the cache.
335 | cache_put_data('search_results_' . md5($user_info['query_see_board'] . '_' . $context['params']), $cached_results, 600);
336 | }
337 |
338 | $participants = array();
339 | foreach (array_slice(array_keys($cached_results['matches']), (int) $_REQUEST['start'], $modSettings['search_results_per_page']) as $msgID)
340 | {
341 | $context['topics'][$msgID] = $cached_results['matches'][$msgID];
342 | $participants[$cached_results['matches'][$msgID]['id']] = false;
343 | }
344 |
345 | // Sentences need to be broken up in words for proper highlighting.
346 | $searchArray = array();
347 | foreach ($searchWords as $orIndex => $words)
348 | $searchArray = array_merge($searchArray, $searchWords[$orIndex]['subject_words']);
349 |
350 | // Work around SMF bug causing multiple pages to not work right.
351 | if (!isset($_SESSION['search_cache']['num_results']))
352 | $_SESSION['search_cache'] = [
353 | 'num_results' => $cached_results['total']
354 | ];
355 |
356 | return $cached_results['total'];
357 | }
358 |
359 | /**
360 | * Constructs a binary mode query to pass back to sphinx
361 | *
362 | * @param string $string The user entered query to construct with
363 | * @return string A binary mode query
364 | */
365 | private function _constructQuery($string)
366 | {
367 | $keywords = array('include' => array(), 'exclude' => array());
368 |
369 | // Split our search string and return an empty string if no matches
370 | if (!preg_match_all('~ (-?)("[^"]+"|[^" ]+)~', ' ' . $string , $tokens, PREG_SET_ORDER))
371 | return '';
372 |
373 | // First we split our string into included and excluded words and phrases
374 | $or_part = FALSE;
375 | foreach ($tokens as $token)
376 | {
377 | // Strip the quotes off of a phrase
378 | if ($token[2][0] == '"')
379 | {
380 | $token[2] = substr($token[2], 1, -1);
381 | $phrase = TRUE;
382 | }
383 | else
384 | $phrase = FALSE;
385 |
386 | // Prepare this token
387 | $cleanWords = $this->_cleanString($token[2]);
388 |
389 | // Explode the cleanWords again incase the cleaning put more spaces into it
390 | $addWords = $phrase ? array('"' . $cleanWords . '"') : preg_split('~ ~u', $cleanWords, -1, PREG_SPLIT_NO_EMPTY);
391 |
392 | if ($token[1] == '-')
393 | $keywords['exclude'] = array_merge($keywords['exclude'], $addWords);
394 |
395 | // OR'd keywords (we only do this if we have something to OR with)
396 | elseif (($token[2] == 'OR' || $token[2] == '|') && count($keywords['include']))
397 | {
398 | $last = array_pop($keywords['include']);
399 | if (!is_array($last))
400 | $last = array($last);
401 | $keywords['include'][] = $last;
402 | $or_part = TRUE;
403 | continue;
404 | }
405 |
406 | // AND is implied in a Sphinx Search
407 | elseif ($token[2] == 'AND' || $token[2] == '&')
408 | continue;
409 |
410 | // If this part of the query ended up being blank, skip it
411 | elseif (trim($cleanWords) == '')
412 | continue;
413 |
414 | // Must be something they want to search for!
415 | else
416 | {
417 | // If this was part of an OR branch, add it to the proper section
418 | if ($or_part)
419 | $keywords['include'][count($keywords['include']) - 1] = array_merge($keywords['include'][count($keywords['include']) - 1], $addWords);
420 | else
421 | $keywords['include'] = array_merge($keywords['include'], $addWords);
422 | }
423 |
424 | // Start fresh on this...
425 | $or_part = FALSE;
426 | }
427 |
428 | // Let's make sure they're not canceling each other out
429 | if (!count(array_diff($keywords['include'], $keywords['exclude'])))
430 | return '';
431 |
432 | // Now we compile our arrays into a valid search string
433 | $query_parts = array();
434 | foreach ($keywords['include'] as $keyword)
435 | $query_parts[] = is_array($keyword) ? '(' . implode(' | ', $keyword) . ')' : $keyword;
436 |
437 | foreach ($keywords['exclude'] as $keyword)
438 | $query_parts[] = '-' . $keyword;
439 |
440 | return implode(' ', $query_parts);
441 | }
442 |
443 | /**
444 | * Cleans a string of everything but alphanumeric characters
445 | *
446 | * @param string $string A string to clean
447 | * @return string A cleaned up string
448 | */
449 | private function _cleanString($string)
450 | {
451 | global $smcFunc;
452 |
453 | // Decode the entities first
454 | $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8');
455 |
456 | // Lowercase string
457 | $string = $smcFunc['strtolower']($string);
458 |
459 | // Fix numbers so they search easier (phone numbers, SSN, dates, etc)
460 | $string = preg_replace('~([[:digit:]]+)\pP+(?=[[:digit:]])~u', '', $string);
461 |
462 | // Last but not least, strip everything out that's not alphanumeric or a underscore.
463 | $string = preg_replace('~[^\pL\pN_]+~u', ' ', $string);
464 |
465 | return $string;
466 | }
467 |
468 | /**
469 | * Callback when a post is created
470 | * @see createPost()
471 | *
472 | * @access public
473 | * @param array $msgOptions An array of post data
474 | * @param array $topicOptions An array of topic data
475 | * @param array $posterOptions An array of info about the person who made this post
476 | * @return void
477 | */
478 | public function postCreated(array &$msgOptions, array &$topicOptions, array &$posterOptions)
479 | {
480 | return true;
481 | // !! SphinxQL Does support updating the search index from its QL interface.
482 | // !! Sphinx for SMF does not support this at this time. Code is provided
483 | // !! here as examples/testing purposes.
484 |
485 | global $smcFunc, $modSettings;
486 |
487 | // Create an instance of the sphinx client.
488 | $mySphinx = $this->dbfunc_connect();
489 |
490 | // Figure out our weights.
491 | $weight_factors = array(
492 | 'age',
493 | 'length',
494 | 'first_message',
495 | 'sticky',
496 | );
497 | $weight = array();
498 | $weight_total = 0;
499 | foreach ($weight_factors as $weight_factor)
500 | {
501 | $weight[$weight_factor] = empty($modSettings['search_weight_' . $weight_factor]) ? 0 : (int) $modSettings['search_weight_' . $weight_factor];
502 | $weight_total += $weight[$weight_factor];
503 | }
504 | if ($weight_total === 0)
505 | {
506 | $weight = array(
507 | 'age' => 25,
508 | 'length' => 25,
509 | 'first_message' => 25,
510 | 'sticky' => 25,
511 | );
512 | $weight_total = 100;
513 | }
514 |
515 | // The data was inserted at this point, lets get some data as the passed variables don't contain all we need.
516 | $request = $smcFunc['db_query']('', '
517 | SELECT
518 | m.id_msg, m.id_topic, m.id_board, IF(m.id_member = 0, 4294967295, m.id_member) AS id_member, m.poster_time, m.body, m.subject,
519 | t.num_replies + 1 AS num_replies,
520 | CEILING(1000000 * (
521 | IF(m.id_msg < 0.7 * s.value, 0, (m.id_msg - 0.7 * s.value) / (0.3 * s.value)) * {int:weight_age} +
522 | IF(t.num_replies < 200, t.num_replies / 200, 1) * {int:weight_length} +
523 | IF(m.id_msg = t.id_first_msg, 1, 0) * {int:weight_first_msg} +
524 | IF(t.is_sticky = 0, 0, 1) * {int:weight_sticky}
525 | ) / {int:weight_total) AS relevance
526 | FROM {db_prefix}messages AS m
527 | INNER JOIN {db_prefix}topics AS t ON (t.id_topic = m.id_topic)
528 | WHERE m.id_msg = {int:newMessage}',
529 | array(
530 | 'newMessage' => $msgOptions['id'],
531 | 'weight_age' => $weight['age'],
532 | 'weight_length' => $weight['length'],
533 | 'weight_first_msg' => $weight['first_message'],
534 | 'weight_sticky' => $weight['sticky'],
535 | 'weight_total' => $weight_total,
536 | )
537 | );
538 | $tempMessage = $smcFunc['db_fetch_assoc']($request);
539 | $smcFunc['db_free_result']($request);
540 |
541 | $insertValues = array(
542 | 'id_msg' => $tempMessage['id_msg'],
543 | 'id_topic' => $tempMessage['id_topic'],
544 | 'id_board' => $tempMessage['id_board'],
545 | 'id_member' => $tempMessage['id_member'],
546 | 'poster_time' => $tempMessage['poster_time'],
547 | 'body' => '"' . $tempMessage['body'] . '"',
548 | 'subject' => '"' . $tempMessage['subject'] . '"',
549 | 'num_replies' => $tempMessage['num_replies'],
550 | 'relevance' => $tempMessage['relevance'],
551 | );
552 |
553 | // The insert query, use replace to make sure we don't get duplicates.
554 | $query = '
555 | REPLACE INTO ' . self::indexName() . '_index (' . implode(', ', array_keys($insertValues)) . ')
556 | VALUES (' . implode(', ', array_values($insertValues)) . ')';
557 |
558 | // Execute the search query.
559 | $request = $this->dbfunc_query($query, $mySphinx);
560 |
561 | // Can a connection to the daemon be made?
562 | if ($request === false)
563 | {
564 | // Just log the error.
565 | if ($this->dbfunc_error($mySphinx))
566 | log_error($this->dbfunc_error($mySphinx));
567 |
568 | // Silently bail out, We can let the reindex cron take care of fixing this.
569 | return true;
570 | }
571 |
572 | return true;
573 | }
574 |
575 | /**
576 | * Callback when a post is modified
577 | * @see modifyPost()
578 | *
579 | * @access public
580 | * @param array $msgOptions An array of post data
581 | * @param array $topicOptions An array of topic data
582 | * @param array $posterOptions An array of info about the person who made this post
583 | * @return void
584 | */
585 | public function postModified(array &$msgOptions, array &$topicOptions, array &$posterOptions)
586 | {
587 | return true;
588 | // !! SphinxQL Does support updating the search index from its QL interface.
589 | // !! Sphinx for SMF does not support this at this time. Code is provided
590 | // !! here as examples/testing purposes.
591 |
592 | // Just call the postCreated as it does a replace.
593 | $this->postCreated($msgOptions, $topicOptions, $posterOptions);
594 | }
595 |
596 | /**
597 | * Callback when a post is removed, not recycled.
598 | *
599 | * @access public
600 | * @param int $id_msg The ID of the post that was removed
601 | * @return void
602 | */
603 | public function postRemoved($id_msg)
604 | {
605 | return true;
606 | // !! SphinxQL Does support updating the search index from its QL interface.
607 | // !! Sphinx for SMF does not support this at this time. Code is provided
608 | // !! here as examples/testing purposes.
609 |
610 | global $smcFunc, $modSettings;
611 |
612 | // Create an instance of the sphinx client.
613 | $mySphinx = $this->dbfunc_connect();
614 |
615 | // SMF only calls this search API when we delete, not recycle. So this will always be a remove.
616 | $query = '
617 | DELETE FROM ' . self::indexName() . '_index
618 | WHERE id_msg = ' . $id_msg;
619 |
620 | // Execute the search query.
621 | $request = $this->dbfunc_query($query, $mySphinx);
622 |
623 | // Can a connection to the daemon be made?
624 | if ($request === false)
625 | {
626 | // Just log the error.
627 | if ($this->dbfunc_error($mySphinx))
628 | log_error($this->dbfunc_error($mySphinx));
629 |
630 | // Silently bail out, We can let the reindex cron take care of fixing this.
631 | return true;
632 | }
633 |
634 | return true;
635 | }
636 |
637 | /**
638 | * Callback when a topic is removed
639 | *
640 | * @access public
641 | * @param array $topics The ID(s) of the removed topic(s)
642 | * @return void
643 | */
644 | public function topicsRemoved(array $topics)
645 | {
646 | return true;
647 | // !! SphinxQL Does support updating the search index from its QL interface.
648 | // !! Sphinx for SMF does not support this at this time. Code is provided
649 | // !! here as examples/testing purposes.
650 |
651 | global $smcFunc, $modSettings;
652 |
653 | // Create an instance of the sphinx client.
654 | $mySphinx = $this->dbfunc_connect();
655 |
656 | // SMF only calls this search API when we delete, not recycle. So this will always be a remove.
657 | $query = '
658 | DELETE FROM ' . self::indexName() . '_index
659 | WHERE id_topic IN (' . implode(', ', $topics) . ')';
660 |
661 | // Execute the search query.
662 | $request = $this->dbfunc_query($query, $mySphinx);
663 |
664 | // Can a connection to the daemon be made?
665 | if ($request === false)
666 | {
667 | // Just log the error.
668 | if ($this->dbfunc_error($mySphinx))
669 | log_error($this->dbfunc_error($mySphinx));
670 |
671 | // Silently bail out, We can let the reindex cron take care of fixing this.
672 | return true;
673 | }
674 |
675 | return true;
676 | }
677 |
678 | /**
679 | * Callback when a topic is moved
680 | *
681 | * @access public
682 | * @param array $topics The ID(s) of the moved topic(s)
683 | * @param int $board_to The board that the topics were moved to
684 | * @return void
685 | */
686 | public function topicsMoved(array $topics, $board_to)
687 | {
688 | return true;
689 | // !! SphinxQL Does support updating the search index from its QL interface.
690 | // !! Sphinx for SMF does not support this at this time. Code is provided
691 | // !! here as examples/testing purposes.
692 |
693 | global $smcFunc, $modSettings;
694 |
695 | // Create an instance of the sphinx client.
696 | $mySphinx = $this->dbfunc_connect();
697 |
698 | // SMF only calls this search API when we delete, not recycle. So this will always be a remove.
699 | $query = '
700 | UPDATE ' . self::indexName() . '_index
701 | SET id_board = ' . $board_to . '
702 | WHERE id_topic IN (' . implode(', ', $topics) . ')';
703 |
704 | // Execute the search query.
705 | $request = $this->dbfunc_query($query, $mySphinx);
706 |
707 | // Can a connection to the daemon be made?
708 | if ($request === false)
709 | {
710 | // Just log the error.
711 | if ($this->dbfunc_error($mySphinx))
712 | log_error($this->dbfunc_error($mySphinx));
713 |
714 | // Silently bail out, We can let the reindex cron take care of fixing this.
715 | return true;
716 | }
717 |
718 | return true;
719 | }
720 |
721 | /**
722 | * Sphinx Database Support API: connect
723 | *
724 | * @access private
725 | * @param string $host The sphinx search address, this will default to $modSettings['sphinx_searchd_server'].
726 | * @param string $port The port Sphinx runs on, this will default to $modSettings['sphinxql_searchd_port'].
727 | * @return resource
728 | */
729 | private function dbfunc_connect(string $host = '', string $port = '')
730 | {
731 | global $modSettings, $txt;
732 |
733 | // Fill out our host and port if needed.
734 | if (empty($host))
735 | $host = $modSettings['sphinx_searchd_server'] == 'localhost' ? '127.0.0.1' : $modSettings['sphinx_searchd_server'];
736 | if (empty($port))
737 | $port = empty($modSettings['sphinxql_searchd_port']) ? 9306 : (int) $modSettings['sphinxql_searchd_port'];
738 |
739 | if ($this->db_type == 'mysqli')
740 | {
741 | $mySphinx = @mysqli_connect($host, '', '', '', $port);
742 |
743 | // Mysqli is never a resource, but an object.
744 | if (!is_object($mySphinx) || $mySphinx->connect_errno > 0)
745 | {
746 | loadLanguage('Errors');
747 | fatal_error($txt['error_no_search_daemon']);
748 | }
749 | }
750 | else
751 | {
752 | // I tried to do this properly by changing error_reporting, but PHP ignores that. So surpress!
753 | $mySphinx = @mysql_connect($host . ':' . $port);
754 |
755 | if (!is_resource($mySphinx))
756 | {
757 | loadLanguage('Errors');
758 | fatal_error($txt['error_no_search_daemon']);
759 | }
760 | }
761 |
762 | return $mySphinx;
763 | }
764 | /**
765 | * Sphinx Database Support API: query
766 | *
767 | * @access private
768 | * @param string $query The query to run.
769 | * @param resource $mySphinx A SphinxQL connection resource.
770 | * @return resource
771 | */
772 | private function dbfunc_query(string $query, $mySphinx)
773 | {
774 | // MySQLI Procedural Style has the resource first then the query.
775 | if ($this->db_type == 'mysqli')
776 | return mysqli_query($mySphinx, $query);
777 | else
778 | return mysql_query($query, $mySphinx);
779 | }
780 |
781 | /**
782 | * Sphinx Database Support API: num_rows
783 | *
784 | * @access private
785 | * @param resource $mySphinx A SphinxQL request resource.
786 | * @return int|string
787 | */
788 | private function dbfunc_num_rows($mySphinx)
789 | {
790 | if ($this->db_type == 'mysqli')
791 | return mysqli_num_rows($mySphinx);
792 | else
793 | return mysql_num_rows($mySphinx);
794 | }
795 |
796 | /**
797 | * Sphinx Database Support API: fetch_assoc
798 | *
799 | * @access private
800 | * @param resource $mySphinx A SphinxQL request resource.
801 | * @return array
802 | */
803 | private function dbfunc_fetch_assoc($mySphinx)
804 | {
805 | if ($this->db_type == 'mysqli')
806 | return mysqli_fetch_assoc($mySphinx);
807 | else
808 | return mysql_fetch_assoc($mySphinx);
809 | }
810 |
811 | /**
812 | * Sphinx Database Support API: free_result
813 | *
814 | * @access private
815 | * @param resource $mySphinx A SphinxQL request resource.
816 | * @return void
817 | */
818 | private function dbfunc_free_result($mySphinx)
819 | {
820 | if ($this->db_type == 'mysqli')
821 | return mysqli_free_result($mySphinx);
822 | else
823 | return mysql_free_result($mySphinx);
824 | }
825 |
826 | /**
827 | * Sphinx Database Support API: free_result
828 | *
829 | * @access private
830 | * @param resource $mySphinx A SphinxQL connection resource.
831 | * @return bool
832 | */
833 | private function dbfunc_close($mySphinx)
834 | {
835 | if ($this->db_type == 'mysqli')
836 | return mysqli_close($mySphinx);
837 | else
838 | return mysql_close($mySphinx);
839 | }
840 |
841 | /**
842 | * Sphinx Database Support API: error
843 | *
844 | * @access private
845 | * @param resource $mySphinx A SphinxQL connection resource.
846 | * @return string
847 | */
848 | private function dbfunc_error($mySphinx)
849 | {
850 | if ($this->db_type == 'mysqli')
851 | return mysqli_error($mySphinx);
852 | else
853 | return mysql_error($mySphinx);
854 | }
855 |
856 | /**
857 | * Sphinx Version
858 | *
859 | * @access private
860 | * @return decimal The Major + minor version of Sphinx.
861 | */
862 | private static function sphinxversion()
863 | {
864 | global $modSettings;
865 |
866 | if (empty($modSettings['sphinx_bin_path']))
867 | $modSettings['sphinx_bin_path'] = '/usr/bin';
868 |
869 | // Try to safely check for the indexer file, but do this in a way we can catch the error so PHP doesn't output it.
870 | try {
871 | set_error_handler(static function ($severity, $message, $file, $line) {
872 | throw new \ErrorException($message, 0, $severity, $file, $line);
873 | });
874 |
875 | if (!file_exists(realpath($modSettings['sphinx_bin_path'] . '/indexer')))
876 | return;
877 | } catch (\Throwable $e) {
878 | return;
879 | } finally {
880 | restore_error_handler();
881 | }
882 |
883 | $binary = realpath($modSettings['sphinx_bin_path'] . '/indexer');
884 |
885 | $raw_version = shell_exec($binary . ' -v');
886 |
887 | if (empty($raw_version))
888 | return;
889 |
890 | preg_match('~Sphinx (\d+)\.(\d+)~i', $raw_version, $m);
891 |
892 | // No version?
893 | if (empty($m) || empty($m[1]) || empty($m[2]))
894 | return;
895 |
896 | return $m[1] . '.' . $m[2];
897 | }
898 |
899 | /**
900 | * Index name
901 | *
902 | * @access private
903 | * @return string The name of the idnex.
904 | */
905 | private static function indexName()
906 | {
907 | global $modSettings;
908 | return !empty($modSettings['sphinx_index_name']) ? $modSettings['sphinx_index_name'] : 'smf';
909 | }
910 | }
911 |
912 | /**
913 | * Callback to a template from our admin search settings page.
914 | * This is used to generate hints and links to generate the Sphinx
915 | * configuration file.
916 | *
917 | * @access public
918 | */
919 | function template_callback_SMFAction_Sphinx_Hints()
920 | {
921 | global $db_type, $scripturl, $txt, $modSettings;
922 |
923 | if (!isset($modSettings['sphinx_data_path'], $modSettings['sphinx_log_path']))
924 | {
925 | echo '
926 |
927 | ', $txt['sphinx_config_hints_save'], '';
928 |
929 | return;
930 | }
931 |
932 | // Ensure these exist.
933 | $index_name = !empty($modSettings['sphinx_index_name']) ? $modSettings['sphinx_index_name'] : 'smf';
934 | if (empty($modSettings['sphinx_conf_path']))
935 | $modSettings['sphinx_conf_path'] = '/etc/sphinxsearch';
936 | if (empty($modSettings['sphinx_bin_path']))
937 | $modSettings['sphinx_bin_path'] = '/usr/bin';
938 |
939 | echo '
940 |
941 | ', $txt['search_weights'], '
942 | [', $txt['sphinx_view_config'], ' | ', $txt['sphinx_download_config'], '] (', $txt['sphinx_config_hints_save'], ')
943 | ';
944 |
945 | $message = '
946 | ' . sprintf($txt['sphinx_config_hints_desc'], $modSettings['sphinx_data_path']) . '[pre]mkdir -p ' . $modSettings['sphinx_data_path'] . '
947 | mkdir -p ' . $modSettings['sphinx_log_path'] . '
948 | chmod a+w ' . $modSettings['sphinx_data_path'] . '
949 | chmod a+w ' . $modSettings['sphinx_log_path'] . '[/pre]';
950 |
951 | // Add a extra step for postgresql.
952 | if ($db_type == 'postgresql')
953 | $message .= '
954 | [hr]
955 | ' . $txt['sphinx_config_hints_pgsql_func'] . '
956 | [code]
957 | CREATE FUNCTION update_settings(var TEXT, val INT) RETURNS VOID AS $$
958 | BEGIN
959 | LOOP
960 | -- first try to update the key
961 | UPDATE PREFIX_settings SET value = val WHERE variable = var;
962 | IF found THEN
963 | RETURN;
964 | END IF;
965 | -- not there so try to insert the key
966 | BEGIN
967 | INSERT INTO PREFIX_settings(variable,value) VALUES (var,val);
968 | RETURN;
969 | EXCEPTION WHEN unique_violation THEN
970 | -- do nothing, loop again to try the UPDATE
971 | END;
972 | END LOOP;
973 | END;
974 | $$
975 | LANGUAGE plpgsql;[/code]';
976 |
977 | $message .= '
978 | [hr]
979 | ' . $txt['sphinx_config_hints_index_start'] . '[pre]' . $modSettings['sphinx_bin_path'] . '/indexer --config ' . $modSettings['sphinx_conf_path'] . '/sphinx.conf --all
980 | ' . $modSettings['sphinx_bin_path'] . '/searchd --config ' . $modSettings['sphinx_conf_path'] . '/sphinx.conf[/pre]
981 | ' . $txt['sphinx_config_hints_index_finish'] . '
982 | [hr]
983 | ' . $txt['sphinx_config_hints_cron_start'] . '[pre]# search indexer
984 | 10 3 * * * ' . $modSettings['sphinx_bin_path'] . '/indexer --config ' . $modSettings['sphinx_conf_path'] . '/sphinx.conf --rotate ' . $index_name . '_base_index
985 | 0 * * * * ' . $modSettings['sphinx_bin_path'] . '/indexer --config ' . $modSettings['sphinx_conf_path'] . '/sphinx.conf --rotate ' . $index_name . '_delta_index[/pre]';
986 |
987 | // Print out our message.
988 | echo parse_bbc($message);
989 |
990 | echo '
991 | ';
992 | }
993 |
994 | // This is the sphinx configuration file.
995 | /**
996 | * The Sphinx generated configuration file. We perform some checks and
997 | * calculation and then issue a download with the appropriate setup.
998 | *
999 | * @access public
1000 | */
1001 | function generateSphinxConfig()
1002 | {
1003 | global $context, $db_server, $db_name, $db_user, $db_passwd, $db_prefix;
1004 | global $db_type, $db_character_set, $modSettings;
1005 |
1006 | $weight_factors = array(
1007 | 'age',
1008 | 'length',
1009 | 'first_message',
1010 | 'sticky',
1011 | );
1012 | $weight = array();
1013 | $weight_total = 0;
1014 | foreach ($weight_factors as $weight_factor)
1015 | {
1016 | $weight[$weight_factor] = empty($modSettings['search_weight_' . $weight_factor]) ? 0 : (int) $modSettings['search_weight_' . $weight_factor];
1017 | $weight_total += $weight[$weight_factor];
1018 | }
1019 |
1020 | if ($weight_total === 0)
1021 | {
1022 | $weight = array(
1023 | 'age' => 25,
1024 | 'length' => 25,
1025 | 'first_message' => 25,
1026 | 'sticky' => 25,
1027 | );
1028 | $weight_total = 100;
1029 | }
1030 |
1031 | if ($db_type == 'postgresq')
1032 | $supported_db_type = 'pgsql';
1033 | else
1034 | $supported_db_type = 'mysql';
1035 |
1036 | $host = $modSettings['sphinx_searchd_server'] == 'localhost' ? '127.0.0.1' : $modSettings['sphinx_searchd_server'];
1037 | $index_name = !empty($modSettings['sphinx_index_name']) ? $modSettings['sphinx_index_name'] : 'smf';
1038 |
1039 | // Lets fall out of SMF templating and start the headers to serve a file.
1040 | ob_end_clean();
1041 | ob_start();
1042 |
1043 | // Send the attachment headers.
1044 | header('Pragma: ');
1045 | if (!$context['browser']['is_gecko'])
1046 | header('Content-Transfer-Encoding: binary');
1047 | header('Expires: ' . gmdate('D, d M Y H:i:s', time() + 525600 * 60) . ' GMT');
1048 | header('Last-Modified: ' . gmdate('D, d M Y H:i:s', time()) . ' GMT');
1049 | header('Accept-Ranges: bytes');
1050 | header('Connection: close');
1051 | header('ETag: ' . sha1('sphinx.conf' . time()));
1052 |
1053 | if (isset($_GET['view']))
1054 | header('Content-Type: text/plain');
1055 | else
1056 | {
1057 | header('Content-Type: ' . ($context['browser']['is_ie'] || $context['browser']['is_opera'] ? 'application/octetstream' : 'application/octet-stream'));
1058 | header('Content-Disposition: attachment; filename="sphinx.conf"');
1059 | }
1060 |
1061 | header('Cache-Control: max-age=' . (525600 * 60) . ', private');
1062 |
1063 | // At this point, we are generating the configuration file.
1064 | echo '#
1065 | # Sphinx configuration file (sphinx.conf), configured for SMF 2.1
1066 | #
1067 | # By default the location of this file would probably be:
1068 | # ' . (empty($modSettings['sphinx_conf_path']) ? '/etc/sphinxsearch' : $modSettings['sphinx_conf_path']) . '/sphinx.conf
1069 |
1070 | source ' . $index_name . '_source
1071 | {
1072 | type = ', $supported_db_type, '
1073 | sql_host = ', $db_server, '
1074 | sql_user = ', $db_user, '
1075 | sql_pass = ', $db_passwd, '
1076 | sql_db = ', $db_name, '
1077 | sql_port = 3306', empty($db_character_set) ? '' : '
1078 | sql_query_pre = SET NAMES ' . $db_character_set;
1079 |
1080 | // Thanks to TheStupidOne for pgsql queries.
1081 | if ($db_type == 'pgsql')
1082 | echo '
1083 | sql_query_pre = \
1084 | SELECT update_settings(\'sphinx_indexed_msg_until\', (SELECT MAX(id_msg) FROM PREFIX_messages))';
1085 | else
1086 | echo '
1087 | sql_query_pre = \
1088 | REPLACE INTO ', $db_prefix, 'settings (variable, value) \
1089 | SELECT \'sphinx_indexed_msg_until\', MAX(id_msg) \
1090 | FROM ', $db_prefix, 'messages';
1091 |
1092 | echo '
1093 | sql_query_range = \
1094 | SELECT 1, value \
1095 | FROM ', $db_prefix, 'settings \
1096 | WHERE variable = \'sphinx_indexed_msg_until\'
1097 | sql_range_step = 1000';
1098 |
1099 | // Thanks to TheStupidOne for pgsql queries.
1100 | if ($db_type == 'pgsql')
1101 | echo '
1102 | sql_query = \
1103 | SELECT \
1104 | m.id_msg, m.id_topic, m.id_board, CASE WHEN m.id_member = 0 THEN 4294967295 ELSE m.id_member END AS id_member, m.poster_time, m.body, m.subject, \
1105 | t.num_replies + 1 AS num_replies, CEILING(1000000 * ( \
1106 | CASE WHEN m.id_msg < 0.7 * cast(s.value as INT) THEN 0 ELSE (m.id_msg - 0.7 * cast(s.value as INT)) / (0.3 * cast(s.value as INT)) END * ' . $weight['age'] . ' + \
1107 | CASE WHEN t.num_replies < 200 THEN t.num_replies / 200 ELSE 1 END * ' . $weight['length'] . ' + \
1108 | CASE WHEN m.id_msg = t.id_first_msg THEN 1 ELSE 0 END * ' . $weight['first_message'] . ' + \
1109 | CASE WHEN t.is_sticky = 0 THEN 0 ELSE 1 END * ' . $weight['sticky'] . ' \
1110 | ) / ' . $weight_total . ') AS relevance \
1111 | FROM ', $db_prefix, 'messages AS m, ', $db_prefix, 'topics AS t, ', $db_prefix, 'settings AS s \
1112 | WHERE t.id_topic = m.id_topic \
1113 | AND s.variable = \'maxMsgID\' \
1114 | AND m.id_msg BETWEEN $start AND $end';
1115 | else
1116 | echo '
1117 | sql_query = \
1118 | SELECT \
1119 | m.id_msg, m.id_topic, m.id_board, IF(m.id_member = 0, 4294967295, m.id_member) AS id_member, m.poster_time, m.body, m.subject, \
1120 | t.num_replies + 1 AS num_replies, CEILING(1000000 * ( \
1121 | IF(m.id_msg < 0.7 * s.value, 0, (m.id_msg - 0.7 * s.value) / (0.3 * s.value)) * ' . $weight['age'] . ' + \
1122 | IF(t.num_replies < 200, t.num_replies / 200, 1) * ' . $weight['length'] . ' + \
1123 | IF(m.id_msg = t.id_first_msg, 1, 0) * ' . $weight['first_message'] . ' + \
1124 | IF(t.is_sticky = 0, 0, 1) * ' . $weight['sticky'] . ' \
1125 | ) / ' . $weight_total . ') AS relevance \
1126 | FROM ', $db_prefix, 'messages AS m, ', $db_prefix, 'topics AS t, ', $db_prefix, 'settings AS s \
1127 | WHERE t.id_topic = m.id_topic \
1128 | AND s.variable = \'maxMsgID\' \
1129 | AND m.id_msg BETWEEN $start AND $end';
1130 |
1131 | echo '
1132 | sql_attr_uint = id_topic
1133 | sql_attr_uint = id_board
1134 | sql_attr_uint = id_member';
1135 |
1136 | // Sphinx 3.0 dropped sql_attr_timestamp, but sql_attr_uint should be compatible.
1137 | if (!empty($context['sphinx_version']) && version_compare($context['sphinx_version'], '3.0', '>'))
1138 | echo '
1139 | sql_attr_timestamp = poster_time
1140 | sql_attr_timestamp = relevance
1141 | sql_attr_timestamp = num_replies';
1142 | else
1143 | echo '
1144 | sql_attr_uint = poster_time
1145 | sql_attr_uint = relevance
1146 | sql_attr_uint = num_replies';
1147 |
1148 | echo '
1149 | }
1150 |
1151 | source ' . $index_name . '_delta_source : ' . $index_name . '_source
1152 | {
1153 | sql_query_pre = ', isset($db_character_set) ? 'SET NAMES ' . $db_character_set : '', '
1154 | sql_query_range = \
1155 | SELECT s1.value, s2.value \
1156 | FROM ', $db_prefix, 'settings AS s1, ', $db_prefix, 'settings AS s2 \
1157 | WHERE s1.variable = \'sphinx_indexed_msg_until\' \
1158 | AND s2.variable = \'maxMsgID\'
1159 | }
1160 |
1161 | index ' . $index_name . '_base_index
1162 | {
1163 | html_strip = 1
1164 | source = ' . $index_name . '_source
1165 | path = ', $modSettings['sphinx_data_path'], '/' . $index_name . '_sphinx_base.index', empty($modSettings['sphinx_stopword_path']) ? '' : '
1166 | stopwords = ' . $modSettings['sphinx_stopword_path'], '
1167 | min_word_len = 2
1168 | charset_table = 0..9, A..Z->a..z, _, a..z
1169 | }
1170 |
1171 | index ' . $index_name . '_delta_index : ' . $index_name . '_base_index
1172 | {
1173 | source = ' . $index_name . '_delta_source
1174 | path = ', $modSettings['sphinx_data_path'], '/' . $index_name . '_sphinx_delta.index
1175 | }
1176 |
1177 | index ' . $index_name . '_index
1178 | {
1179 | type = distributed
1180 | local = ' . $index_name . '_base_index
1181 | local = ' . $index_name . '_delta_index
1182 | }
1183 |
1184 | indexer
1185 | {
1186 | mem_limit = ', (int) $modSettings['sphinx_indexer_mem'], 'M
1187 | }
1188 |
1189 | searchd
1190 | {
1191 | listen = ', !empty($modSettings['sphinx_searchd_bind']) ? $host : '0.0.0.0', ':', (empty($modSettings['sphinxql_searchd_port']) ? 9306 : (int) $modSettings['sphinxql_searchd_port']), ':mysql41
1192 | log = ', $modSettings['sphinx_log_path'], '/searchd.log
1193 | query_log = ', $modSettings['sphinx_log_path'], '/query.log
1194 | read_timeout = 5
1195 | max_children = 30
1196 | pid_file = ', $modSettings['sphinx_data_path'], '/searchd.pid
1197 | binlog_path = ', $modSettings['sphinx_data_path'], '
1198 | }';
1199 |
1200 | die;
1201 | }
1202 |
--------------------------------------------------------------------------------
/package-info.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | simplemachines:Sphinx-for-SMF
4 | Sphinx for SMF
5 | 1.3
6 | modification
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # Sphinx for SMF
2 |
3 | Sphinx for SMF is a customization that allows SMF to make use of the [Sphinx Search](https://sphinxsearch.com/) engine or [Manticore](https://manticoresearch.com/). This is not designed for any forum with less than 300,000 messages.
4 |
5 | SMF ships by default with the standard, full text and custom index options. Although most forums will have no problems using these options, larger SMF forums will start to suffer performance degradation. These performance issues are mostly related to what databases such as MySQL and PostgreSQL can handle for how the storage is designed for text based matches. Sphinx solves this by sucking all messages into its own database designed to handle searching.
6 |
7 | At this time, this API requires sphinx to update itself. Future support may include having SMF inform Sphinx of updates to a message.
8 |
9 | In order to use this, you must use the following or higher.
10 | - SMF 2.0.x, 2.1.x or higher
11 | - Latest release for 2.0 and 2.1 branches is recommended.
12 | - Supports MySQL (via MySQLi functions)
13 | - Initial PostgreSQL support was added but has had limited testing.
14 | - Sphinx or Manticore
15 | - Sphinx - Using SphinxQL
16 | - Tested with 2.2.10
17 | - Tested with 3.4.1 (Only supported on SMF 2.1 or higher)
18 | - Manticore - Using SphinxQL with Plain Index
19 | - Tested with 4.2.0 (Only supported on SMF 2.1 or higher)
20 | - Any modern Linux distribution
21 | - PHP 7.0
22 | - PHP 7.4 or higher recommended
23 | - Follow SMF release versioning for specifics of minimum and recommend versions. This customization will support them.
--------------------------------------------------------------------------------