├── LICENSE ├── README.md └── src ├── .htaccess ├── common.php ├── favicon.ico ├── fetch.php ├── functions.php ├── includes ├── page.php └── simple_html_dom.php ├── index.php ├── search.php ├── static ├── css │ ├── bootstrap.css │ ├── common.css │ ├── icon.css │ ├── link.css │ └── moe.css ├── fonts │ ├── icomoon.eot │ ├── icomoon.svg │ ├── icomoon.ttf │ └── icomoon.woff ├── img │ ├── bg.jpg │ ├── bg_w.png │ ├── logo.png │ └── no-js.jpg └── js │ ├── app.js │ ├── instantclick.min.js │ ├── jquery.hotkeys.js │ ├── jquery.js │ └── respond.js └── templates ├── foot.html ├── head.html ├── main.html ├── no-script.html └── results.html /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Kokororin (https://return.moe) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Moe Search 2 | === 3 | 4 | ## 简介 5 | 这是一个用PHP编写的Google搜索服务,原理是拿着用户的关键词去Google服务器搜索,然后将返回的结果响应给用户。 6 | 7 | [测试地址](https://niconicono.science) 8 | 9 | -------------------------------------------------------------------------------- /src/.htaccess: -------------------------------------------------------------------------------- 1 | RewriteEngine on 2 | RewriteCond %{SERVER_PORT} !^443$ 3 | RewriteRule (.*) https://%{SERVER_NAME}/$1 [R] -------------------------------------------------------------------------------- /src/common.php: -------------------------------------------------------------------------------- 1 | each_disNums = intval($each_disNums); 30 | $this->nums = intval($nums); 31 | if (!$current_page) { 32 | $this->current_page = 1; 33 | } else { 34 | $this->current_page = intval($current_page); 35 | } 36 | $this->sub_pages = intval($sub_pages); 37 | $this->pageNums = ceil($nums / $each_disNums); 38 | $temp = $_SERVER['REQUEST_URI']; 39 | $temp = str_replace('&page=' . $this->current_page, '', $temp); 40 | $temp = str_replace('?page=' . $this->current_page, '', $temp); 41 | $this->subPage_link = $temp . "&page="; 42 | } 43 | /** 44 | * 照顾低版本 45 | */ 46 | /*function __construct($each_disNums, $nums, $current_page, $sub_pages, $subPage_linke) { 47 | $this->Page($each_disNums, $nums, $current_page, $sub_pages, $subPage_link); 48 | } 49 | */ 50 | /* 51 | __destruct析构函数,当类不在使用的时候调用,该函数用来释放资源。 52 | */ 53 | public function __destruct() 54 | { 55 | unset($each_disNums); 56 | unset($nums); 57 | unset($current_page); 58 | unset($sub_pages); 59 | unset($pageNums); 60 | unset($page_array); 61 | unset($subPage_link); 62 | } 63 | 64 | /* 65 | 用来给建立分页的数组初始化的函数。 66 | */ 67 | public function initArray() 68 | { 69 | for ($i = 0; $i < $this->sub_pages; $i++) { 70 | $this->page_array[$i] = $i; 71 | } 72 | return $this->page_array; 73 | } 74 | 75 | /* 76 | construct_num_Page该函数使用来构造显示的条目 77 | 即使:[1][2][3][4][5][6][7][8][9][10] 78 | */ 79 | public function construct_num_Page() 80 | { 81 | if ($this->pageNums < $this->sub_pages) { 82 | $current_array = array(); 83 | for ($i = 0; $i < $this->pageNums; $i++) { 84 | $current_array[$i] = $i + 1; 85 | } 86 | } else { 87 | $current_array = $this->initArray(); 88 | if ($this->current_page <= 3) { 89 | for ($i = 0; $i < count($current_array); $i++) { 90 | $current_array[$i] = $i + 1; 91 | } 92 | } elseif ($this->current_page <= $this->pageNums && $this->current_page > $this->pageNums - $this->sub_pages + 1) { 93 | for ($i = 0; $i < count($current_array); $i++) { 94 | $current_array[$i] = ($this->pageNums) - ($this->sub_pages) + 1 + $i; 95 | } 96 | } else { 97 | for ($i = 0; $i < count($current_array); $i++) { 98 | $current_array[$i] = $this->current_page - 2 + $i; 99 | } 100 | } 101 | } 102 | 103 | return $current_array; 104 | } 105 | 106 | /* 107 | 构造经典模式的分页 108 | 当前第1/453页 [首页] [上页] 1 2 3 4 5 6 7 8 9 10 [下页] [尾页] 109 | */ 110 | public function show() 111 | { 112 | $str = ""; 113 | 114 | if ($this->current_page > 1) { 115 | $firstPageUrl = $this->subPage_link . "1"; 116 | $prevPageUrl = $this->subPage_link . ($this->current_page - 1); 117 | $str .= '
  • <
  • '; 118 | } else { 119 | $str .= ''; 120 | } 121 | 122 | $a = $this->construct_num_Page(); 123 | for ($i = 0; $i < count($a); $i++) { 124 | $s = $a[$i]; 125 | if ($s == $this->current_page) { 126 | $str .= '
  • ' . $s . '
  • '; 127 | } else { 128 | $url = $this->subPage_link . $s; 129 | $str .= '
  • ' . $s . '
  • '; 130 | } 131 | } 132 | 133 | if ($this->current_page < $this->pageNums) { 134 | $lastPageUrl = $this->subPage_link . $this->pageNums; 135 | $nextPageUrl = $this->subPage_link . ($this->current_page + 1); 136 | $str .= '
  • >
  • '; 137 | } else { 138 | $str .= ''; 139 | } 140 | return $str; 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /src/includes/simple_html_dom.php: -------------------------------------------------------------------------------- 1 | size is the "real" number of bytes the dom was created from. 18 | * but for most purposes, it's a really good estimation. 19 | * Paperg - Added the forceTagsClosed to the dom constructor. Forcing tags closed is great for malformed html, but it CAN lead to parsing errors. 20 | * Allow the user to tell us how much they trust the html. 21 | * Paperg add the text and plaintext to the selectors for the find syntax. plaintext implies text in the innertext of a node. text implies that the tag is a text node. 22 | * This allows for us to find tags based on the text they contain. 23 | * Create find_ancestor_tag to see if a tag is - at any level - inside of another specific tag. 24 | * Paperg: added parse_charset so that we know about the character set of the source document. 25 | * NOTE: If the user's system has a routine called get_last_retrieve_url_contents_content_type availalbe, we will assume it's returning the content-type header from the 26 | * last transfer or curl_exec, and we will parse that and use it in preference to any other method of charset detection. 27 | * 28 | * Found infinite loop in the case of broken html in restore_noise. Rewrote to protect from that. 29 | * PaperG (John Schlick) Added get_display_size for "IMG" tags. 30 | * 31 | * Licensed under The MIT License 32 | * Redistributions of files must retain the above copyright notice. 33 | * 34 | * @author S.C. Chen 35 | * @author John Schlick 36 | * @author Rus Carroll 37 | * @version 1.5 ($Rev: 210 $) 38 | * @package PlaceLocalInclude 39 | * @subpackage simple_html_dom 40 | */ 41 | 42 | /** 43 | * All of the Defines for the classes below. 44 | * @author S.C. Chen 45 | */ 46 | define('HDOM_TYPE_ELEMENT', 1); 47 | define('HDOM_TYPE_COMMENT', 2); 48 | define('HDOM_TYPE_TEXT', 3); 49 | define('HDOM_TYPE_ENDTAG', 4); 50 | define('HDOM_TYPE_ROOT', 5); 51 | define('HDOM_TYPE_UNKNOWN', 6); 52 | define('HDOM_QUOTE_DOUBLE', 0); 53 | define('HDOM_QUOTE_SINGLE', 1); 54 | define('HDOM_QUOTE_NO', 3); 55 | define('HDOM_INFO_BEGIN', 0); 56 | define('HDOM_INFO_END', 1); 57 | define('HDOM_INFO_QUOTE', 2); 58 | define('HDOM_INFO_SPACE', 3); 59 | define('HDOM_INFO_TEXT', 4); 60 | define('HDOM_INFO_INNER', 5); 61 | define('HDOM_INFO_OUTER', 6); 62 | define('HDOM_INFO_ENDSPACE',7); 63 | define('DEFAULT_TARGET_CHARSET', 'UTF-8'); 64 | define('DEFAULT_BR_TEXT', "\r\n"); 65 | define('DEFAULT_SPAN_TEXT', " "); 66 | define('MAX_FILE_SIZE', 600000); 67 | // helper functions 68 | // ----------------------------------------------------------------------------- 69 | // get html dom from file 70 | // $maxlen is defined in the code as PHP_STREAM_COPY_ALL which is defined as -1. 71 | function file_get_html($url, $use_include_path = false, $context=null, $offset = -1, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT) 72 | { 73 | // We DO force the tags to be terminated. 74 | $dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText); 75 | // For sourceforge users: uncomment the next line and comment the retreive_url_contents line 2 lines down if it is not already done. 76 | $contents = file_get_contents($url, $use_include_path, $context, $offset); 77 | // Paperg - use our own mechanism for getting the contents as we want to control the timeout. 78 | //$contents = retrieve_url_contents($url); 79 | if (empty($contents) || strlen($contents) > MAX_FILE_SIZE) 80 | { 81 | return false; 82 | } 83 | // The second parameter can force the selectors to all be lowercase. 84 | $dom->load($contents, $lowercase, $stripRN); 85 | return $dom; 86 | } 87 | 88 | // get html dom from string 89 | function str_get_html($str, $lowercase=true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT) 90 | { 91 | $dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText); 92 | if (empty($str) || strlen($str) > MAX_FILE_SIZE) 93 | { 94 | $dom->clear(); 95 | return false; 96 | } 97 | $dom->load($str, $lowercase, $stripRN); 98 | return $dom; 99 | } 100 | 101 | // dump html dom tree 102 | function dump_html_tree($node, $show_attr=true, $deep=0) 103 | { 104 | $node->dump($node); 105 | } 106 | 107 | 108 | /** 109 | * simple html dom node 110 | * PaperG - added ability for "find" routine to lowercase the value of the selector. 111 | * PaperG - added $tag_start to track the start position of the tag in the total byte index 112 | * 113 | * @package PlaceLocalInclude 114 | */ 115 | class simple_html_dom_node 116 | { 117 | public $nodetype = HDOM_TYPE_TEXT; 118 | public $tag = 'text'; 119 | public $attr = array(); 120 | public $children = array(); 121 | public $nodes = array(); 122 | public $parent = null; 123 | // The "info" array - see HDOM_INFO_... for what each element contains. 124 | public $_ = array(); 125 | public $tag_start = 0; 126 | private $dom = null; 127 | 128 | function __construct($dom) 129 | { 130 | $this->dom = $dom; 131 | $dom->nodes[] = $this; 132 | } 133 | 134 | function __destruct() 135 | { 136 | $this->clear(); 137 | } 138 | 139 | function __toString() 140 | { 141 | return $this->outertext(); 142 | } 143 | 144 | // clean up memory due to php5 circular references memory leak... 145 | function clear() 146 | { 147 | $this->dom = null; 148 | $this->nodes = null; 149 | $this->parent = null; 150 | $this->children = null; 151 | } 152 | 153 | // dump node's tree 154 | function dump($show_attr=true, $deep=0) 155 | { 156 | $lead = str_repeat(' ', $deep); 157 | 158 | echo $lead.$this->tag; 159 | if ($show_attr && count($this->attr)>0) 160 | { 161 | echo '('; 162 | foreach ($this->attr as $k=>$v) 163 | echo "[$k]=>\"".$this->$k.'", '; 164 | echo ')'; 165 | } 166 | echo "\n"; 167 | 168 | if ($this->nodes) 169 | { 170 | foreach ($this->nodes as $c) 171 | { 172 | $c->dump($show_attr, $deep+1); 173 | } 174 | } 175 | } 176 | 177 | 178 | // Debugging function to dump a single dom node with a bunch of information about it. 179 | function dump_node($echo=true) 180 | { 181 | 182 | $string = $this->tag; 183 | if (count($this->attr)>0) 184 | { 185 | $string .= '('; 186 | foreach ($this->attr as $k=>$v) 187 | { 188 | $string .= "[$k]=>\"".$this->$k.'", '; 189 | } 190 | $string .= ')'; 191 | } 192 | if (count($this->_)>0) 193 | { 194 | $string .= ' $_ ('; 195 | foreach ($this->_ as $k=>$v) 196 | { 197 | if (is_array($v)) 198 | { 199 | $string .= "[$k]=>("; 200 | foreach ($v as $k2=>$v2) 201 | { 202 | $string .= "[$k2]=>\"".$v2.'", '; 203 | } 204 | $string .= ")"; 205 | } else { 206 | $string .= "[$k]=>\"".$v.'", '; 207 | } 208 | } 209 | $string .= ")"; 210 | } 211 | 212 | if (isset($this->text)) 213 | { 214 | $string .= " text: (" . $this->text . ")"; 215 | } 216 | 217 | $string .= " HDOM_INNER_INFO: '"; 218 | if (isset($node->_[HDOM_INFO_INNER])) 219 | { 220 | $string .= $node->_[HDOM_INFO_INNER] . "'"; 221 | } 222 | else 223 | { 224 | $string .= ' NULL '; 225 | } 226 | 227 | $string .= " children: " . count($this->children); 228 | $string .= " nodes: " . count($this->nodes); 229 | $string .= " tag_start: " . $this->tag_start; 230 | $string .= "\n"; 231 | 232 | if ($echo) 233 | { 234 | echo $string; 235 | return; 236 | } 237 | else 238 | { 239 | return $string; 240 | } 241 | } 242 | 243 | // returns the parent of node 244 | // If a node is passed in, it will reset the parent of the current node to that one. 245 | function parent($parent=null) 246 | { 247 | // I am SURE that this doesn't work properly. 248 | // It fails to unset the current node from it's current parents nodes or children list first. 249 | if ($parent !== null) 250 | { 251 | $this->parent = $parent; 252 | $this->parent->nodes[] = $this; 253 | $this->parent->children[] = $this; 254 | } 255 | 256 | return $this->parent; 257 | } 258 | 259 | // verify that node has children 260 | function has_child() 261 | { 262 | return !empty($this->children); 263 | } 264 | 265 | // returns children of node 266 | function children($idx=-1) 267 | { 268 | if ($idx===-1) 269 | { 270 | return $this->children; 271 | } 272 | if (isset($this->children[$idx])) 273 | { 274 | return $this->children[$idx]; 275 | } 276 | return null; 277 | } 278 | 279 | // returns the first child of node 280 | function first_child() 281 | { 282 | if (count($this->children)>0) 283 | { 284 | return $this->children[0]; 285 | } 286 | return null; 287 | } 288 | 289 | // returns the last child of node 290 | function last_child() 291 | { 292 | if (($count=count($this->children))>0) 293 | { 294 | return $this->children[$count-1]; 295 | } 296 | return null; 297 | } 298 | 299 | // returns the next sibling of node 300 | function next_sibling() 301 | { 302 | if ($this->parent===null) 303 | { 304 | return null; 305 | } 306 | 307 | $idx = 0; 308 | $count = count($this->parent->children); 309 | while ($idx<$count && $this!==$this->parent->children[$idx]) 310 | { 311 | ++$idx; 312 | } 313 | if (++$idx>=$count) 314 | { 315 | return null; 316 | } 317 | return $this->parent->children[$idx]; 318 | } 319 | 320 | // returns the previous sibling of node 321 | function prev_sibling() 322 | { 323 | if ($this->parent===null) return null; 324 | $idx = 0; 325 | $count = count($this->parent->children); 326 | while ($idx<$count && $this!==$this->parent->children[$idx]) 327 | ++$idx; 328 | if (--$idx<0) return null; 329 | return $this->parent->children[$idx]; 330 | } 331 | 332 | // function to locate a specific ancestor tag in the path to the root. 333 | function find_ancestor_tag($tag) 334 | { 335 | global $debug_object; 336 | if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } 337 | 338 | // Start by including ourselves in the comparison. 339 | $returnDom = $this; 340 | 341 | while (!is_null($returnDom)) 342 | { 343 | if (is_object($debug_object)) { $debug_object->debug_log(2, "Current tag is: " . $returnDom->tag); } 344 | 345 | if ($returnDom->tag == $tag) 346 | { 347 | break; 348 | } 349 | $returnDom = $returnDom->parent; 350 | } 351 | return $returnDom; 352 | } 353 | 354 | // get dom node's inner html 355 | function innertext() 356 | { 357 | if (isset($this->_[HDOM_INFO_INNER])) return $this->_[HDOM_INFO_INNER]; 358 | if (isset($this->_[HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); 359 | 360 | $ret = ''; 361 | foreach ($this->nodes as $n) 362 | $ret .= $n->outertext(); 363 | return $ret; 364 | } 365 | 366 | // get dom node's outer text (with tag) 367 | function outertext() 368 | { 369 | global $debug_object; 370 | if (is_object($debug_object)) 371 | { 372 | $text = ''; 373 | if ($this->tag == 'text') 374 | { 375 | if (!empty($this->text)) 376 | { 377 | $text = " with text: " . $this->text; 378 | } 379 | } 380 | $debug_object->debug_log(1, 'Innertext of tag: ' . $this->tag . $text); 381 | } 382 | 383 | if ($this->tag==='root') return $this->innertext(); 384 | 385 | // trigger callback 386 | if ($this->dom && $this->dom->callback!==null) 387 | { 388 | call_user_func_array($this->dom->callback, array($this)); 389 | } 390 | 391 | if (isset($this->_[HDOM_INFO_OUTER])) return $this->_[HDOM_INFO_OUTER]; 392 | if (isset($this->_[HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); 393 | 394 | // render begin tag 395 | if ($this->dom && $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]) 396 | { 397 | $ret = $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]->makeup(); 398 | } else { 399 | $ret = ""; 400 | } 401 | 402 | // render inner text 403 | if (isset($this->_[HDOM_INFO_INNER])) 404 | { 405 | // If it's a br tag... don't return the HDOM_INNER_INFO that we may or may not have added. 406 | if ($this->tag != "br") 407 | { 408 | $ret .= $this->_[HDOM_INFO_INNER]; 409 | } 410 | } else { 411 | if ($this->nodes) 412 | { 413 | foreach ($this->nodes as $n) 414 | { 415 | $ret .= $this->convert_text($n->outertext()); 416 | } 417 | } 418 | } 419 | 420 | // render end tag 421 | if (isset($this->_[HDOM_INFO_END]) && $this->_[HDOM_INFO_END]!=0) 422 | $ret .= 'tag.'>'; 423 | return $ret; 424 | } 425 | 426 | // get dom node's plain text 427 | function text() 428 | { 429 | if (isset($this->_[HDOM_INFO_INNER])) return $this->_[HDOM_INFO_INNER]; 430 | switch ($this->nodetype) 431 | { 432 | case HDOM_TYPE_TEXT: return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); 433 | case HDOM_TYPE_COMMENT: return ''; 434 | case HDOM_TYPE_UNKNOWN: return ''; 435 | } 436 | if (strcasecmp($this->tag, 'script')===0) return ''; 437 | if (strcasecmp($this->tag, 'style')===0) return ''; 438 | 439 | $ret = ''; 440 | // In rare cases, (always node type 1 or HDOM_TYPE_ELEMENT - observed for some span tags, and some p tags) $this->nodes is set to NULL. 441 | // NOTE: This indicates that there is a problem where it's set to NULL without a clear happening. 442 | // WHY is this happening? 443 | if (!is_null($this->nodes)) 444 | { 445 | foreach ($this->nodes as $n) 446 | { 447 | $ret .= $this->convert_text($n->text()); 448 | } 449 | 450 | // If this node is a span... add a space at the end of it so multiple spans don't run into each other. This is plaintext after all. 451 | if ($this->tag == "span") 452 | { 453 | $ret .= $this->dom->default_span_text; 454 | } 455 | 456 | 457 | } 458 | return $ret; 459 | } 460 | 461 | function xmltext() 462 | { 463 | $ret = $this->innertext(); 464 | $ret = str_ireplace('', '', $ret); 466 | return $ret; 467 | } 468 | 469 | // build node's text with tag 470 | function makeup() 471 | { 472 | // text, comment, unknown 473 | if (isset($this->_[HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); 474 | 475 | $ret = '<'.$this->tag; 476 | $i = -1; 477 | 478 | foreach ($this->attr as $key=>$val) 479 | { 480 | ++$i; 481 | 482 | // skip removed attribute 483 | if ($val===null || $val===false) 484 | continue; 485 | 486 | $ret .= $this->_[HDOM_INFO_SPACE][$i][0]; 487 | //no value attr: nowrap, checked selected... 488 | if ($val===true) 489 | $ret .= $key; 490 | else { 491 | switch ($this->_[HDOM_INFO_QUOTE][$i]) 492 | { 493 | case HDOM_QUOTE_DOUBLE: $quote = '"'; break; 494 | case HDOM_QUOTE_SINGLE: $quote = '\''; break; 495 | default: $quote = ''; 496 | } 497 | $ret .= $key.$this->_[HDOM_INFO_SPACE][$i][1].'='.$this->_[HDOM_INFO_SPACE][$i][2].$quote.$val.$quote; 498 | } 499 | } 500 | $ret = $this->dom->restore_noise($ret); 501 | return $ret . $this->_[HDOM_INFO_ENDSPACE] . '>'; 502 | } 503 | 504 | // find elements by css selector 505 | //PaperG - added ability for find to lowercase the value of the selector. 506 | function find($selector, $idx=null, $lowercase=false) 507 | { 508 | $selectors = $this->parse_selector($selector); 509 | if (($count=count($selectors))===0) return array(); 510 | $found_keys = array(); 511 | 512 | // find each selector 513 | for ($c=0; $c<$count; ++$c) 514 | { 515 | // The change on the below line was documented on the sourceforge code tracker id 2788009 516 | // used to be: if (($levle=count($selectors[0]))===0) return array(); 517 | if (($levle=count($selectors[$c]))===0) return array(); 518 | if (!isset($this->_[HDOM_INFO_BEGIN])) return array(); 519 | 520 | $head = array($this->_[HDOM_INFO_BEGIN]=>1); 521 | 522 | // handle descendant selectors, no recursive! 523 | for ($l=0; $l<$levle; ++$l) 524 | { 525 | $ret = array(); 526 | foreach ($head as $k=>$v) 527 | { 528 | $n = ($k===-1) ? $this->dom->root : $this->dom->nodes[$k]; 529 | //PaperG - Pass this optional parameter on to the seek function. 530 | $n->seek($selectors[$c][$l], $ret, $lowercase); 531 | } 532 | $head = $ret; 533 | } 534 | 535 | foreach ($head as $k=>$v) 536 | { 537 | if (!isset($found_keys[$k])) 538 | { 539 | $found_keys[$k] = 1; 540 | } 541 | } 542 | } 543 | 544 | // sort keys 545 | ksort($found_keys); 546 | 547 | $found = array(); 548 | foreach ($found_keys as $k=>$v) 549 | $found[] = $this->dom->nodes[$k]; 550 | 551 | // return nth-element or array 552 | if (is_null($idx)) return $found; 553 | else if ($idx<0) $idx = count($found) + $idx; 554 | return (isset($found[$idx])) ? $found[$idx] : null; 555 | } 556 | 557 | // seek for given conditions 558 | // PaperG - added parameter to allow for case insensitive testing of the value of a selector. 559 | protected function seek($selector, &$ret, $lowercase=false) 560 | { 561 | global $debug_object; 562 | if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } 563 | 564 | list($tag, $key, $val, $exp, $no_key) = $selector; 565 | 566 | // xpath index 567 | if ($tag && $key && is_numeric($key)) 568 | { 569 | $count = 0; 570 | foreach ($this->children as $c) 571 | { 572 | if ($tag==='*' || $tag===$c->tag) { 573 | if (++$count==$key) { 574 | $ret[$c->_[HDOM_INFO_BEGIN]] = 1; 575 | return; 576 | } 577 | } 578 | } 579 | return; 580 | } 581 | 582 | $end = (!empty($this->_[HDOM_INFO_END])) ? $this->_[HDOM_INFO_END] : 0; 583 | if ($end==0) { 584 | $parent = $this->parent; 585 | while (!isset($parent->_[HDOM_INFO_END]) && $parent!==null) { 586 | $end -= 1; 587 | $parent = $parent->parent; 588 | } 589 | $end += $parent->_[HDOM_INFO_END]; 590 | } 591 | 592 | for ($i=$this->_[HDOM_INFO_BEGIN]+1; $i<$end; ++$i) { 593 | $node = $this->dom->nodes[$i]; 594 | 595 | $pass = true; 596 | 597 | if ($tag==='*' && !$key) { 598 | if (in_array($node, $this->children, true)) 599 | $ret[$i] = 1; 600 | continue; 601 | } 602 | 603 | // compare tag 604 | if ($tag && $tag!=$node->tag && $tag!=='*') {$pass=false;} 605 | // compare key 606 | if ($pass && $key) { 607 | if ($no_key) { 608 | if (isset($node->attr[$key])) $pass=false; 609 | } else { 610 | if (($key != "plaintext") && !isset($node->attr[$key])) $pass=false; 611 | } 612 | } 613 | // compare value 614 | if ($pass && $key && $val && $val!=='*') { 615 | // If they have told us that this is a "plaintext" search then we want the plaintext of the node - right? 616 | if ($key == "plaintext") { 617 | // $node->plaintext actually returns $node->text(); 618 | $nodeKeyValue = $node->text(); 619 | } else { 620 | // this is a normal search, we want the value of that attribute of the tag. 621 | $nodeKeyValue = $node->attr[$key]; 622 | } 623 | if (is_object($debug_object)) {$debug_object->debug_log(2, "testing node: " . $node->tag . " for attribute: " . $key . $exp . $val . " where nodes value is: " . $nodeKeyValue);} 624 | 625 | //PaperG - If lowercase is set, do a case insensitive test of the value of the selector. 626 | if ($lowercase) { 627 | $check = $this->match($exp, strtolower($val), strtolower($nodeKeyValue)); 628 | } else { 629 | $check = $this->match($exp, $val, $nodeKeyValue); 630 | } 631 | if (is_object($debug_object)) {$debug_object->debug_log(2, "after match: " . ($check ? "true" : "false"));} 632 | 633 | // handle multiple class 634 | if (!$check && strcasecmp($key, 'class')===0) { 635 | foreach (explode(' ',$node->attr[$key]) as $k) { 636 | // Without this, there were cases where leading, trailing, or double spaces lead to our comparing blanks - bad form. 637 | if (!empty($k)) { 638 | if ($lowercase) { 639 | $check = $this->match($exp, strtolower($val), strtolower($k)); 640 | } else { 641 | $check = $this->match($exp, $val, $k); 642 | } 643 | if ($check) break; 644 | } 645 | } 646 | } 647 | if (!$check) $pass = false; 648 | } 649 | if ($pass) $ret[$i] = 1; 650 | unset($node); 651 | } 652 | // It's passed by reference so this is actually what this function returns. 653 | if (is_object($debug_object)) {$debug_object->debug_log(1, "EXIT - ret: ", $ret);} 654 | } 655 | 656 | protected function match($exp, $pattern, $value) { 657 | global $debug_object; 658 | if (is_object($debug_object)) {$debug_object->debug_log_entry(1);} 659 | 660 | switch ($exp) { 661 | case '=': 662 | return ($value===$pattern); 663 | case '!=': 664 | return ($value!==$pattern); 665 | case '^=': 666 | return preg_match("/^".preg_quote($pattern,'/')."/", $value); 667 | case '$=': 668 | return preg_match("/".preg_quote($pattern,'/')."$/", $value); 669 | case '*=': 670 | if ($pattern[0]=='/') { 671 | return preg_match($pattern, $value); 672 | } 673 | return preg_match("/".$pattern."/i", $value); 674 | } 675 | return false; 676 | } 677 | 678 | protected function parse_selector($selector_string) { 679 | global $debug_object; 680 | if (is_object($debug_object)) {$debug_object->debug_log_entry(1);} 681 | 682 | // pattern of CSS selectors, modified from mootools 683 | // Paperg: Add the colon to the attrbute, so that it properly finds like google does. 684 | // Note: if you try to look at this attribute, yo MUST use getAttribute since $dom->x:y will fail the php syntax check. 685 | // Notice the \[ starting the attbute? and the @? following? This implies that an attribute can begin with an @ sign that is not captured. 686 | // This implies that an html attribute specifier may start with an @ sign that is NOT captured by the expression. 687 | // farther study is required to determine of this should be documented or removed. 688 | // $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; 689 | $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; 690 | preg_match_all($pattern, trim($selector_string).' ', $matches, PREG_SET_ORDER); 691 | if (is_object($debug_object)) {$debug_object->debug_log(2, "Matches Array: ", $matches);} 692 | 693 | $selectors = array(); 694 | $result = array(); 695 | //print_r($matches); 696 | 697 | foreach ($matches as $m) { 698 | $m[0] = trim($m[0]); 699 | if ($m[0]==='' || $m[0]==='/' || $m[0]==='//') continue; 700 | // for browser generated xpath 701 | if ($m[1]==='tbody') continue; 702 | 703 | list($tag, $key, $val, $exp, $no_key) = array($m[1], null, null, '=', false); 704 | if (!empty($m[2])) {$key='id'; $val=$m[2];} 705 | if (!empty($m[3])) {$key='class'; $val=$m[3];} 706 | if (!empty($m[4])) {$key=$m[4];} 707 | if (!empty($m[5])) {$exp=$m[5];} 708 | if (!empty($m[6])) {$val=$m[6];} 709 | 710 | // convert to lowercase 711 | if ($this->dom->lowercase) {$tag=strtolower($tag); $key=strtolower($key);} 712 | //elements that do NOT have the specified attribute 713 | if (isset($key[0]) && $key[0]==='!') {$key=substr($key, 1); $no_key=true;} 714 | 715 | $result[] = array($tag, $key, $val, $exp, $no_key); 716 | if (trim($m[7])===',') { 717 | $selectors[] = $result; 718 | $result = array(); 719 | } 720 | } 721 | if (count($result)>0) 722 | $selectors[] = $result; 723 | return $selectors; 724 | } 725 | 726 | function __get($name) 727 | { 728 | if (isset($this->attr[$name])) 729 | { 730 | return $this->convert_text($this->attr[$name]); 731 | } 732 | switch ($name) 733 | { 734 | case 'outertext': return $this->outertext(); 735 | case 'innertext': return $this->innertext(); 736 | case 'plaintext': return $this->text(); 737 | case 'xmltext': return $this->xmltext(); 738 | default: return array_key_exists($name, $this->attr); 739 | } 740 | } 741 | 742 | function __set($name, $value) 743 | { 744 | global $debug_object; 745 | if (is_object($debug_object)) {$debug_object->debug_log_entry(1);} 746 | 747 | switch ($name) 748 | { 749 | case 'outertext': return $this->_[HDOM_INFO_OUTER] = $value; 750 | case 'innertext': 751 | if (isset($this->_[HDOM_INFO_TEXT])) return $this->_[HDOM_INFO_TEXT] = $value; 752 | return $this->_[HDOM_INFO_INNER] = $value; 753 | } 754 | if (!isset($this->attr[$name])) 755 | { 756 | $this->_[HDOM_INFO_SPACE][] = array(' ', '', ''); 757 | $this->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE; 758 | } 759 | $this->attr[$name] = $value; 760 | } 761 | 762 | function __isset($name) 763 | { 764 | switch ($name) 765 | { 766 | case 'outertext': return true; 767 | case 'innertext': return true; 768 | case 'plaintext': return true; 769 | } 770 | //no value attr: nowrap, checked selected... 771 | return (array_key_exists($name, $this->attr)) ? true : isset($this->attr[$name]); 772 | } 773 | 774 | function __unset($name) { 775 | if (isset($this->attr[$name])) 776 | unset($this->attr[$name]); 777 | } 778 | 779 | // PaperG - Function to convert the text from one character set to another if the two sets are not the same. 780 | function convert_text($text) 781 | { 782 | global $debug_object; 783 | if (is_object($debug_object)) {$debug_object->debug_log_entry(1);} 784 | 785 | $converted_text = $text; 786 | 787 | $sourceCharset = ""; 788 | $targetCharset = ""; 789 | 790 | if ($this->dom) 791 | { 792 | $sourceCharset = strtoupper($this->dom->_charset); 793 | $targetCharset = strtoupper($this->dom->_target_charset); 794 | } 795 | if (is_object($debug_object)) {$debug_object->debug_log(3, "source charset: " . $sourceCharset . " target charaset: " . $targetCharset);} 796 | 797 | if (!empty($sourceCharset) && !empty($targetCharset) && (strcasecmp($sourceCharset, $targetCharset) != 0)) 798 | { 799 | // Check if the reported encoding could have been incorrect and the text is actually already UTF-8 800 | if ((strcasecmp($targetCharset, 'UTF-8') == 0) && ($this->is_utf8($text))) 801 | { 802 | $converted_text = $text; 803 | } 804 | else 805 | { 806 | $converted_text = iconv($sourceCharset, $targetCharset, $text); 807 | } 808 | } 809 | 810 | // Lets make sure that we don't have that silly BOM issue with any of the utf-8 text we output. 811 | if ($targetCharset == 'UTF-8') 812 | { 813 | if (substr($converted_text, 0, 3) == "\xef\xbb\xbf") 814 | { 815 | $converted_text = substr($converted_text, 3); 816 | } 817 | if (substr($converted_text, -3) == "\xef\xbb\xbf") 818 | { 819 | $converted_text = substr($converted_text, 0, -3); 820 | } 821 | } 822 | 823 | return $converted_text; 824 | } 825 | 826 | /** 827 | * Returns true if $string is valid UTF-8 and false otherwise. 828 | * 829 | * @param mixed $str String to be tested 830 | * @return boolean 831 | */ 832 | static function is_utf8($str) 833 | { 834 | $c=0; $b=0; 835 | $bits=0; 836 | $len=strlen($str); 837 | for($i=0; $i<$len; $i++) 838 | { 839 | $c=ord($str[$i]); 840 | if($c > 128) 841 | { 842 | if(($c >= 254)) return false; 843 | elseif($c >= 252) $bits=6; 844 | elseif($c >= 248) $bits=5; 845 | elseif($c >= 240) $bits=4; 846 | elseif($c >= 224) $bits=3; 847 | elseif($c >= 192) $bits=2; 848 | else return false; 849 | if(($i+$bits) > $len) return false; 850 | while($bits > 1) 851 | { 852 | $i++; 853 | $b=ord($str[$i]); 854 | if($b < 128 || $b > 191) return false; 855 | $bits--; 856 | } 857 | } 858 | } 859 | return true; 860 | } 861 | /* 862 | function is_utf8($string) 863 | { 864 | //this is buggy 865 | return (utf8_encode(utf8_decode($string)) == $string); 866 | } 867 | */ 868 | 869 | /** 870 | * Function to try a few tricks to determine the displayed size of an img on the page. 871 | * NOTE: This will ONLY work on an IMG tag. Returns FALSE on all other tag types. 872 | * 873 | * @author John Schlick 874 | * @version April 19 2012 875 | * @return array an array containing the 'height' and 'width' of the image on the page or -1 if we can't figure it out. 876 | */ 877 | function get_display_size() 878 | { 879 | global $debug_object; 880 | 881 | $width = -1; 882 | $height = -1; 883 | 884 | if ($this->tag !== 'img') 885 | { 886 | return false; 887 | } 888 | 889 | // See if there is aheight or width attribute in the tag itself. 890 | if (isset($this->attr['width'])) 891 | { 892 | $width = $this->attr['width']; 893 | } 894 | 895 | if (isset($this->attr['height'])) 896 | { 897 | $height = $this->attr['height']; 898 | } 899 | 900 | // Now look for an inline style. 901 | if (isset($this->attr['style'])) 902 | { 903 | // Thanks to user gnarf from stackoverflow for this regular expression. 904 | $attributes = array(); 905 | preg_match_all("/([\w-]+)\s*:\s*([^;]+)\s*;?/", $this->attr['style'], $matches, PREG_SET_ORDER); 906 | foreach ($matches as $match) { 907 | $attributes[$match[1]] = $match[2]; 908 | } 909 | 910 | // If there is a width in the style attributes: 911 | if (isset($attributes['width']) && $width == -1) 912 | { 913 | // check that the last two characters are px (pixels) 914 | if (strtolower(substr($attributes['width'], -2)) == 'px') 915 | { 916 | $proposed_width = substr($attributes['width'], 0, -2); 917 | // Now make sure that it's an integer and not something stupid. 918 | if (filter_var($proposed_width, FILTER_VALIDATE_INT)) 919 | { 920 | $width = $proposed_width; 921 | } 922 | } 923 | } 924 | 925 | // If there is a width in the style attributes: 926 | if (isset($attributes['height']) && $height == -1) 927 | { 928 | // check that the last two characters are px (pixels) 929 | if (strtolower(substr($attributes['height'], -2)) == 'px') 930 | { 931 | $proposed_height = substr($attributes['height'], 0, -2); 932 | // Now make sure that it's an integer and not something stupid. 933 | if (filter_var($proposed_height, FILTER_VALIDATE_INT)) 934 | { 935 | $height = $proposed_height; 936 | } 937 | } 938 | } 939 | 940 | } 941 | 942 | // Future enhancement: 943 | // Look in the tag to see if there is a class or id specified that has a height or width attribute to it. 944 | 945 | // Far future enhancement 946 | // Look at all the parent tags of this image to see if they specify a class or id that has an img selector that specifies a height or width 947 | // Note that in this case, the class or id will have the img subselector for it to apply to the image. 948 | 949 | // ridiculously far future development 950 | // If the class or id is specified in a SEPARATE css file thats not on the page, go get it and do what we were just doing for the ones on the page. 951 | 952 | $result = array('height' => $height, 953 | 'width' => $width); 954 | return $result; 955 | } 956 | 957 | // camel naming conventions 958 | function getAllAttributes() {return $this->attr;} 959 | function getAttribute($name) {return $this->__get($name);} 960 | function setAttribute($name, $value) {$this->__set($name, $value);} 961 | function hasAttribute($name) {return $this->__isset($name);} 962 | function removeAttribute($name) {$this->__set($name, null);} 963 | function getElementById($id) {return $this->find("#$id", 0);} 964 | function getElementsById($id, $idx=null) {return $this->find("#$id", $idx);} 965 | function getElementByTagName($name) {return $this->find($name, 0);} 966 | function getElementsByTagName($name, $idx=null) {return $this->find($name, $idx);} 967 | function parentNode() {return $this->parent();} 968 | function childNodes($idx=-1) {return $this->children($idx);} 969 | function firstChild() {return $this->first_child();} 970 | function lastChild() {return $this->last_child();} 971 | function nextSibling() {return $this->next_sibling();} 972 | function previousSibling() {return $this->prev_sibling();} 973 | function hasChildNodes() {return $this->has_child();} 974 | function nodeName() {return $this->tag;} 975 | function appendChild($node) {$node->parent($this); return $node;} 976 | 977 | } 978 | 979 | /** 980 | * simple html dom parser 981 | * Paperg - in the find routine: allow us to specify that we want case insensitive testing of the value of the selector. 982 | * Paperg - change $size from protected to public so we can easily access it 983 | * Paperg - added ForceTagsClosed in the constructor which tells us whether we trust the html or not. Default is to NOT trust it. 984 | * 985 | * @package PlaceLocalInclude 986 | */ 987 | class simple_html_dom 988 | { 989 | public $root = null; 990 | public $nodes = array(); 991 | public $callback = null; 992 | public $lowercase = false; 993 | // Used to keep track of how large the text was when we started. 994 | public $original_size; 995 | public $size; 996 | protected $pos; 997 | protected $doc; 998 | protected $char; 999 | protected $cursor; 1000 | protected $parent; 1001 | protected $noise = array(); 1002 | protected $token_blank = " \t\r\n"; 1003 | protected $token_equal = ' =/>'; 1004 | protected $token_slash = " />\r\n\t"; 1005 | protected $token_attr = ' >'; 1006 | // Note that this is referenced by a child node, and so it needs to be public for that node to see this information. 1007 | public $_charset = ''; 1008 | public $_target_charset = ''; 1009 | protected $default_br_text = ""; 1010 | public $default_span_text = ""; 1011 | 1012 | // use isset instead of in_array, performance boost about 30%... 1013 | protected $self_closing_tags = array('img'=>1, 'br'=>1, 'input'=>1, 'meta'=>1, 'link'=>1, 'hr'=>1, 'base'=>1, 'embed'=>1, 'spacer'=>1); 1014 | protected $block_tags = array('root'=>1, 'body'=>1, 'form'=>1, 'div'=>1, 'span'=>1, 'table'=>1); 1015 | // Known sourceforge issue #2977341 1016 | // B tags that are not closed cause us to return everything to the end of the document. 1017 | protected $optional_closing_tags = array( 1018 | 'tr'=>array('tr'=>1, 'td'=>1, 'th'=>1), 1019 | 'th'=>array('th'=>1), 1020 | 'td'=>array('td'=>1), 1021 | 'li'=>array('li'=>1), 1022 | 'dt'=>array('dt'=>1, 'dd'=>1), 1023 | 'dd'=>array('dd'=>1, 'dt'=>1), 1024 | 'dl'=>array('dd'=>1, 'dt'=>1), 1025 | 'p'=>array('p'=>1), 1026 | 'nobr'=>array('nobr'=>1), 1027 | 'b'=>array('b'=>1), 1028 | 'option'=>array('option'=>1), 1029 | ); 1030 | 1031 | function __construct($str=null, $lowercase=true, $forceTagsClosed=true, $target_charset=DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT) 1032 | { 1033 | if ($str) 1034 | { 1035 | if (preg_match("/^http:\/\//i",$str) || is_file($str)) 1036 | { 1037 | $this->load_file($str); 1038 | } 1039 | else 1040 | { 1041 | $this->load($str, $lowercase, $stripRN, $defaultBRText, $defaultSpanText); 1042 | } 1043 | } 1044 | // Forcing tags to be closed implies that we don't trust the html, but it can lead to parsing errors if we SHOULD trust the html. 1045 | if (!$forceTagsClosed) { 1046 | $this->optional_closing_array=array(); 1047 | } 1048 | $this->_target_charset = $target_charset; 1049 | } 1050 | 1051 | function __destruct() 1052 | { 1053 | $this->clear(); 1054 | } 1055 | 1056 | // load html from string 1057 | function load($str, $lowercase=true, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT) 1058 | { 1059 | global $debug_object; 1060 | 1061 | // prepare 1062 | $this->prepare($str, $lowercase, $stripRN, $defaultBRText, $defaultSpanText); 1063 | // strip out cdata 1064 | $this->remove_noise("''is", true); 1065 | // strip out comments 1066 | $this->remove_noise("''is"); 1067 | // Per sourceforge http://sourceforge.net/tracker/?func=detail&aid=2949097&group_id=218559&atid=1044037 1068 | // Script tags removal now preceeds style tag removal. 1069 | // strip out 16 | 17 | 18 | 19 | 22 | -------------------------------------------------------------------------------- /src/templates/main.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Moe Search - 萌搜索 5 | 6 | 9 | 10 | 11 | 12 |
    13 |
    14 | 17 |
    18 |
    19 | 20 |
    21 |
    22 | 37 |
    38 | 39 |
    40 | 41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /src/templates/no-script.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/templates/results.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | <?php echo $word?> - Moe Search - 萌搜索 5 | 6 | 7 | 8 | 9 |
    10 |
    11 | 14 | 28 |
    29 |
    30 |
    31 |
    32 |
    33 |
    34 |
    35 |
    36 |
    37 |
    38 | 41 |
    42 |
    43 |
    44 |
    45 |
    46 |
    47 | 48 | 49 |
    50 |

    哥哥,不要搜奇怪的东西辣!我已经报警了!!

    51 |
    52 | 53 | 54 | $value):?> 55 | 56 |
    57 |

    58 |

    59 |

    60 |

    61 |

    62 |
    63 | 64 | 65 | 66 |
    67 |
    68 |
      69 | 70 |
    71 |
    72 |
    73 |
    74 |
    75 |
    76 |
    77 |
    78 |
    79 |
    80 | 81 | 82 | 83 | 84 | 85 | --------------------------------------------------------------------------------