├── crawler ├── crawlStatus.txt ├── PHPCrawl │ ├── CHANGELOG.txt │ ├── test_interface │ │ ├── info.gif │ │ ├── style.css │ │ ├── phpcrawl_testinterface.conf.php │ │ └── setups │ │ │ └── Example_Setup.psf │ ├── documentation │ │ ├── testinterface.jpg │ │ ├── classreferences │ │ │ ├── expand.gif │ │ │ ├── collapse.gif │ │ │ ├── PHPCrawler │ │ │ │ ├── expand.gif │ │ │ │ ├── collapse.gif │ │ │ │ ├── google_code.php │ │ │ │ ├── index.html │ │ │ │ └── property_detail_tpl_property_class_version.htm │ │ │ ├── PHPCrawlerAbortReasons │ │ │ │ ├── expand.gif │ │ │ │ ├── collapse.gif │ │ │ │ ├── google_code.php │ │ │ │ ├── index.html │ │ │ │ ├── constant_detail_tpl_constant_ABORTREASON_PASSEDTHROUGH.htm │ │ │ │ ├── constant_detail_tpl_constant_ABORTREASON_USERABORT.htm │ │ │ │ ├── constant_detail_tpl_constant_ABORTREASON_FILELIMIT_REACHED.htm │ │ │ │ └── constant_detail_tpl_constant_ABORTREASON_TRAFFICLIMIT_REACHED.htm │ │ │ ├── PHPCrawlerDocumentInfo │ │ │ │ ├── expand.gif │ │ │ │ ├── collapse.gif │ │ │ │ ├── google_code.php │ │ │ │ ├── index.html │ │ │ │ ├── method_detail_tpl_method_toArray.htm │ │ │ │ ├── property_detail_tpl_property_benchmarks.htm │ │ │ │ ├── method_detail_tpl_method_setLinksFoundArray.htm │ │ │ │ ├── property_detail_tpl_property_port.htm │ │ │ │ ├── property_detail_tpl_property_file.htm │ │ │ │ ├── property_detail_tpl_property_path.htm │ │ │ │ ├── property_detail_tpl_property_source.htm │ │ │ │ ├── property_detail_tpl_property_query.htm │ │ │ │ ├── property_detail_tpl_property_host.htm │ │ │ │ ├── property_detail_tpl_property_header.htm │ │ │ │ ├── property_detail_tpl_property_protocol.htm │ │ │ │ ├── property_detail_tpl_property_url.htm │ │ │ │ ├── property_detail_tpl_property_referer_url.htm │ │ │ │ ├── property_detail_tpl_property_header_send.htm │ │ │ │ ├── property_detail_tpl_property_content_type.htm │ │ │ │ ├── property_detail_tpl_property_bytes_received.htm │ │ │ │ ├── property_detail_tpl_property_error_occured.htm │ │ │ │ ├── property_detail_tpl_property_header_bytes_received.htm │ │ │ │ ├── property_detail_tpl_property_http_status_code.htm │ │ │ │ └── property_detail_tpl_property_refering_link_raw.htm │ │ │ ├── PHPCrawlerHTTPProtocols │ │ │ │ ├── expand.gif │ │ │ │ ├── collapse.gif │ │ │ │ ├── google_code.php │ │ │ │ ├── index.html │ │ │ │ ├── constant_detail_tpl_constant_HTTP_1_0.htm │ │ │ │ └── constant_detail_tpl_constant_HTTP_1_1.htm │ │ │ ├── PHPCrawlerProcessReport │ │ │ │ ├── expand.gif │ │ │ │ ├── collapse.gif │ │ │ │ ├── google_code.php │ │ │ │ ├── index.html │ │ │ │ ├── method_detail_tpl_method_toArray.htm │ │ │ │ ├── property_detail_tpl_property_files_received.htm │ │ │ │ ├── property_detail_tpl_property_bytes_received.htm │ │ │ │ ├── property_detail_tpl_property_links_followed.htm │ │ │ │ ├── property_detail_tpl_property_data_throughput.htm │ │ │ │ ├── property_detail_tpl_property_file_limit_reached.htm │ │ │ │ ├── property_detail_tpl_property_process_runtime.htm │ │ │ │ └── property_detail_tpl_property_traffic_limit_reached.htm │ │ │ ├── PHPCrawlerRequestErrors │ │ │ │ ├── expand.gif │ │ │ │ ├── collapse.gif │ │ │ │ ├── google_code.php │ │ │ │ ├── index.html │ │ │ │ ├── constant_detail_tpl_constant_ERROR_HOST_UNREACHABLE.htm │ │ │ │ ├── constant_detail_tpl_constant_ERROR_PROXY_UNREACHABLE.htm │ │ │ │ ├── constant_detail_tpl_constant_ERROR_SOCKET_TIMEOUT.htm │ │ │ │ ├── constant_detail_tpl_constant_ERROR_NO_HTTP_HEADER.htm │ │ │ │ ├── constant_detail_tpl_constant_ERROR_TMP_FILE_NOT_WRITEABLE.htm │ │ │ │ └── constant_detail_tpl_constant_ERROR_SSL_NOT_SUPPORTED.htm │ │ │ ├── PHPCrawlerURLDescriptor │ │ │ │ ├── expand.gif │ │ │ │ ├── collapse.gif │ │ │ │ ├── google_code.php │ │ │ │ ├── index.html │ │ │ │ ├── method_detail_tpl_method_PHPCrawlerURLDescriptor.htm │ │ │ │ ├── property_detail_tpl_property_linktext.htm │ │ │ │ ├── property_detail_tpl_property_link_raw.htm │ │ │ │ ├── property_detail_tpl_property_url_rebuild.htm │ │ │ │ ├── property_detail_tpl_property_linkcode.htm │ │ │ │ ├── property_detail_tpl_property_refering_url.htm │ │ │ │ └── property_detail_tpl_property_is_redirect_url.htm │ │ │ ├── PHPCrawlerUrlCacheTypes │ │ │ │ ├── expand.gif │ │ │ │ ├── collapse.gif │ │ │ │ ├── google_code.php │ │ │ │ ├── index.html │ │ │ │ ├── constant_detail_tpl_constant_URLCACHE_MEMORY.htm │ │ │ │ └── constant_detail_tpl_constant_URLCACHE_SQLITE.htm │ │ │ ├── PHPCrawlerResponseHeader │ │ │ │ ├── expand.gif │ │ │ │ ├── collapse.gif │ │ │ │ ├── google_code.php │ │ │ │ ├── index.html │ │ │ │ ├── property_detail_tpl_property_content_type.htm │ │ │ │ ├── property_detail_tpl_property_http_status_code.htm │ │ │ │ ├── property_detail_tpl_property_header_raw.htm │ │ │ │ ├── property_detail_tpl_property_source_url.htm │ │ │ │ ├── property_detail_tpl_property_content_length.htm │ │ │ │ ├── property_detail_tpl_property_content_encoding.htm │ │ │ │ ├── property_detail_tpl_property_transfer_encoding.htm │ │ │ │ └── method_detail_tpl_method_PHPCrawlerResponseHeader.htm │ │ │ ├── PHPCrawlerCookieDescriptor │ │ │ │ ├── collapse.gif │ │ │ │ ├── expand.gif │ │ │ │ ├── google_code.php │ │ │ │ ├── index.html │ │ │ │ ├── property_detail_tpl_property_name.htm │ │ │ │ ├── property_detail_tpl_property_value.htm │ │ │ │ ├── property_detail_tpl_property_path.htm │ │ │ │ ├── property_detail_tpl_property_domain.htm │ │ │ │ ├── property_detail_tpl_property_source_url.htm │ │ │ │ ├── property_detail_tpl_property_expires.htm │ │ │ │ ├── property_detail_tpl_property_expire_timestamp.htm │ │ │ │ ├── property_detail_tpl_property_source_domain.htm │ │ │ │ └── property_detail_tpl_property_cookie_send_time.htm │ │ │ ├── PHPCrawlerMultiProcessModes │ │ │ │ ├── expand.gif │ │ │ │ ├── collapse.gif │ │ │ │ ├── google_code.php │ │ │ │ ├── index.html │ │ │ │ ├── constant_detail_tpl_constant_MPMODE_NONE.htm │ │ │ │ ├── constant_detail_tpl_constant_MPMODE_PARENT_EXECUTES_USERCODE.htm │ │ │ │ └── constant_detail_tpl_constant_MPMODE_CHILDS_EXECUTES_USERCODE.htm │ │ │ ├── google_code.php │ │ │ ├── index.html │ │ │ └── print_googlead_div.js │ │ ├── google_code.php │ │ └── index.html │ ├── libs │ │ ├── PHPCrawlerUtils.class.php │ │ ├── PHPCrawlerHTTPRequest.class.php │ │ ├── Enums │ │ │ ├── PHPCrawlerHTTPProtocols.class.php │ │ │ ├── PHPCrawlerUrlCacheTypes.class.php │ │ │ ├── PHPCrawlerMultiProcessModes.class.php │ │ │ ├── PHPCrawlerAbortReasons.class.php │ │ │ └── PHPCrawlerRequestErrors.class.php │ │ ├── CookieCache │ │ │ └── PHPCrawlerCookieCacheBase.class.php │ │ ├── PHPCrawlerUrlPartsDescriptor.class.php │ │ ├── PHPCrawlerURLDescriptor.class.php │ │ ├── PHPCrawlerDNSCache.class.php │ │ └── PHPCrawlerStatus.class.php │ └── classes │ │ └── phpcrawler.class.php ├── bgCrawl.php ├── admin │ ├── login.php │ ├── home.php │ └── index.php └── runCrawl.php ├── favicon.ico ├── .openshift └── cron │ └── minutely │ ├── ticktock │ ├── crawler.php │ └── jobs.deny ├── robots.txt ├── inc ├── error.php ├── config.php ├── track.php └── spellcheck.php ├── tables.sql ├── cdn └── css │ ├── index.css │ └── search.css ├── README.md ├── url.php ├── .htaccess ├── about ├── bot.php ├── stats.php └── index.php └── index.php /crawler/crawlStatus.txt: -------------------------------------------------------------------------------- 1 | 0 -------------------------------------------------------------------------------- /favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/favicon.ico -------------------------------------------------------------------------------- /.openshift/cron/minutely/ticktock: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | php ${OPENSHIFT_REPO_DIR}.openshift/cron/minutely/crawler.php 3 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/CHANGELOG.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/CHANGELOG.txt -------------------------------------------------------------------------------- /crawler/PHPCrawl/test_interface/info.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/test_interface/info.gif -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/testinterface.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/documentation/testinterface.jpg -------------------------------------------------------------------------------- /crawler/PHPCrawl/libs/PHPCrawlerUtils.class.php: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/libs/PHPCrawlerUtils.class.php -------------------------------------------------------------------------------- /.openshift/cron/minutely/crawler.php: -------------------------------------------------------------------------------- 1 | 6 | -------------------------------------------------------------------------------- /crawler/bgCrawl.php: -------------------------------------------------------------------------------- 1 | 7 | -------------------------------------------------------------------------------- /robots.txt: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /inc/ 3 | Disallow: /cdn/ 4 | Disallow: /crawler/ 5 | Disallow: /url.php 6 | Disallow: /search.php 7 | Allow: / 8 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/libs/PHPCrawlerHTTPRequest.class.php: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/libs/PHPCrawlerHTTPRequest.class.php -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/expand.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/documentation/classreferences/expand.gif -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/collapse.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/documentation/classreferences/collapse.gif -------------------------------------------------------------------------------- /.openshift/cron/minutely/jobs.deny: -------------------------------------------------------------------------------- 1 | # 2 | # Any script or job files listed in here (one entry per line) will NOT be 3 | # executed (read as ignored by run-parts). 4 | # 5 | crawler.php 6 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawler/expand.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/documentation/classreferences/PHPCrawler/expand.gif -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawler/collapse.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/documentation/classreferences/PHPCrawler/collapse.gif -------------------------------------------------------------------------------- /inc/error.php: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |

404 Not Found

5 |

6 | The request file was not found on this server. 7 |

8 | 9 | 10 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerAbortReasons/expand.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/documentation/classreferences/PHPCrawlerAbortReasons/expand.gif -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerDocumentInfo/expand.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/documentation/classreferences/PHPCrawlerDocumentInfo/expand.gif -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerHTTPProtocols/expand.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/documentation/classreferences/PHPCrawlerHTTPProtocols/expand.gif -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerProcessReport/expand.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/documentation/classreferences/PHPCrawlerProcessReport/expand.gif -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerRequestErrors/expand.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/documentation/classreferences/PHPCrawlerRequestErrors/expand.gif -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerURLDescriptor/expand.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/documentation/classreferences/PHPCrawlerURLDescriptor/expand.gif -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerUrlCacheTypes/expand.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/documentation/classreferences/PHPCrawlerUrlCacheTypes/expand.gif -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerAbortReasons/collapse.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/documentation/classreferences/PHPCrawlerAbortReasons/collapse.gif -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerDocumentInfo/collapse.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/documentation/classreferences/PHPCrawlerDocumentInfo/collapse.gif -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerHTTPProtocols/collapse.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/documentation/classreferences/PHPCrawlerHTTPProtocols/collapse.gif -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerProcessReport/collapse.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/documentation/classreferences/PHPCrawlerProcessReport/collapse.gif -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerRequestErrors/collapse.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/documentation/classreferences/PHPCrawlerRequestErrors/collapse.gif -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerResponseHeader/expand.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/documentation/classreferences/PHPCrawlerResponseHeader/expand.gif -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerURLDescriptor/collapse.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/documentation/classreferences/PHPCrawlerURLDescriptor/collapse.gif -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerUrlCacheTypes/collapse.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/documentation/classreferences/PHPCrawlerUrlCacheTypes/collapse.gif -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerCookieDescriptor/collapse.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/documentation/classreferences/PHPCrawlerCookieDescriptor/collapse.gif -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerCookieDescriptor/expand.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/documentation/classreferences/PHPCrawlerCookieDescriptor/expand.gif -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerMultiProcessModes/expand.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/documentation/classreferences/PHPCrawlerMultiProcessModes/expand.gif -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerResponseHeader/collapse.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/documentation/classreferences/PHPCrawlerResponseHeader/collapse.gif -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerMultiProcessModes/collapse.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienwithin/search/master/crawler/PHPCrawl/documentation/classreferences/PHPCrawlerMultiProcessModes/collapse.gif -------------------------------------------------------------------------------- /crawler/admin/login.php: -------------------------------------------------------------------------------- 1 | 11 | -------------------------------------------------------------------------------- /tables.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE IF NOT EXISTS `search` ( 2 | `id` int(11) NOT NULL AUTO_INCREMENT, 3 | `title` varchar(60) NOT NULL, 4 | `url` text NOT NULL, 5 | `description` varchar(160) NOT NULL, 6 | PRIMARY KEY (`id`), 7 | UNIQUE KEY `id` (`id`) 8 | ) ENGINE=MyISAM DEFAULT CHARSET=latin1 AUTO_INCREMENT=0 ; -------------------------------------------------------------------------------- /crawler/PHPCrawl/classes/phpcrawler.class.php: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cdn/css/index.css: -------------------------------------------------------------------------------- 1 | .container .searchForm{ 2 | margin-top:20px; 3 | padding:5px; 4 | } 5 | .container .searchForm #query{ 6 | width: 400px; 7 | padding:4px 5px; 8 | font-size:15px; 9 | } 10 | .container .searchForm .shape-search{ 11 | width: 100px; 12 | height:15px; 13 | } 14 | .container .searchForm div, .container .searchForm p{ 15 | margin-top:10px; 16 | } 17 | -------------------------------------------------------------------------------- /inc/config.php: -------------------------------------------------------------------------------- 1 | 13 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/libs/Enums/PHPCrawlerHTTPProtocols.class.php: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | search 2 | ====== 3 | 4 | An Open Source Search Engine 5 | Completely written in PHP and uses the following PHP libraries : 6 | 7 | 1) PHPCrawler (customized) 8 | 9 | 2) SimpleHTMLDom 10 | 11 | Called by the name "Web Search" (WS) and it has its own crawler named Dingo! which is also written in PHP. Crawler runs every minute and indexes upto 100 pages each minute. 12 | That's 6000 pages every hour ! 13 | 14 | See Stats : http://search.subinsb.com/about/stats.php 15 | -------------------------------------------------------------------------------- /url.php: -------------------------------------------------------------------------------- 1 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 18 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/libs/Enums/PHPCrawlerUrlCacheTypes.class.php: -------------------------------------------------------------------------------- 1 | /dev/null &"); 12 | } 13 | } 14 | execInbg("php -q $dir/bgCrawl.php"); 15 | file_put_contents($s, 1); 16 | echo "Started Running"; 17 | }else{ 18 | echo "Currently Running"; 19 | } 20 | ?> 21 | -------------------------------------------------------------------------------- /.htaccess: -------------------------------------------------------------------------------- 1 | DirectoryIndex index.php /inc/error.php 2 | ErrorDocument 403 /inc/error.php 3 | ErrorDocument 404 /inc/error.php 4 | ErrorDocument 405 /inc/error.php 5 | ErrorDocument 408 /inc/error.php 6 | ErrorDocument 410 /inc/error.php 7 | ErrorDocument 411 /inc/error.php 8 | ErrorDocument 412 /inc/error.php 9 | ErrorDocument 413 /inc/error.php 10 | ErrorDocument 414 /inc/error.php 11 | ErrorDocument 415 /inc/error.php 12 | ErrorDocument 500 /inc/error.php 13 | ErrorDocument 501 /inc/error.php 14 | ErrorDocument 502 /inc/error.php 15 | ErrorDocument 503 /inc/error.php 16 | ErrorDocument 506 /inc/error.php 17 | -------------------------------------------------------------------------------- /about/bot.php: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |

Dingo!

10 |

Web Search's bot is called Dingo. Its used for crawling the web.
No, Dingo doesn't SPAM and is 100% safe.
If it visited your site, be happy
Your website was just indexed by Dingo.

11 |

Dingo runs every minute and indexes up to
100 sites each minute.

12 |

And it's randomly named.

13 |
14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /crawler/admin/home.php: -------------------------------------------------------------------------------- 1 | 2 |
3 |
4 |
5 |
6 |
7 | 8 |
9 | 17 | -------------------------------------------------------------------------------- /inc/track.php: -------------------------------------------------------------------------------- 1 | 4 | 11 | 16 | 19 | -------------------------------------------------------------------------------- /inc/spellcheck.php: -------------------------------------------------------------------------------- 1 | url."?client=t&sl=en&tl=en&hl=en&sc=2&ie=UTF-8&oe=UTF-8&uptl=en&oc=1&otf=1&ssel=3&tsel=0"; 10 | $url.="&q=$s"; 11 | return $url; 12 | } 13 | public function check($s){ 14 | $a=""; 15 | $c=file_get_contents($this->makeURL($s)); 16 | $c=substr_replace($c, "", 0, 41); 17 | preg_match('/u003e","(.*?)",\[1]/', $c, $m); 18 | if(isset($m[1])){ 19 | $a=$m[1]; 20 | $a=str_replace('",', '', $a); 21 | } 22 | return $a; 23 | } 24 | } 25 | ?> 26 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/google_code.php: -------------------------------------------------------------------------------- 1 | 5 | --> 6 | 7 |
8 | 16 | 19 |

20 | 21 | 6 | 7 |
8 | 16 | 19 |

20 | 21 | 6 | 7 |
8 | 16 | 19 |

20 | 21 | 6 | 7 |
8 | 16 | 19 |

20 | 21 | 6 | 7 |
8 | 16 | 19 |

20 | 21 | 6 | 7 |
8 | 16 | 19 |

20 | 21 | 6 | 7 |
8 | 16 | 19 |

20 | 21 | 6 | 7 |
8 | 16 | 19 |

20 | 21 | 6 | 7 |
8 | 16 | 19 |

20 | 21 | 6 | 7 |
8 | 16 | 19 |

20 | 21 | 6 | 7 |
8 | 16 | 19 |

20 | 21 | 6 | 7 |
8 | 16 | 19 |

20 | 21 | \r\n"+ 12 | "\r\n"+ 13 | "\r\n"+ 16 | "\r\n
"); 17 | } -------------------------------------------------------------------------------- /crawler/PHPCrawl/test_interface/style.css: -------------------------------------------------------------------------------- 1 | td, input, select, textarea { 2 | font-family: verdana; 3 | font-size: 10px; 4 | } 5 | 6 | td.red { 7 | color: red; 8 | } 9 | 10 | td.head { 11 | background-color: #ffffff; 12 | font-size: 12px; 13 | font-weight: bold; 14 | } 15 | 16 | td.white { 17 | background-color: #ffffff; 18 | } 19 | 20 | table { 21 | background-color:#e1e1e1; 22 | } 23 | 24 | table.bordered { 25 | border-color:#000000; 26 | border-width:1px; 27 | border-style:solid; 28 | } 29 | 30 | a { 31 | font-size: 10px; 32 | font-weight: normal; 33 | } 34 | 35 | .warning { 36 | color: red; 37 | font-size: 12px; 38 | } 39 | 40 | div#comment_div { 41 | position:absolute; 42 | visibility:hidden; 43 | border-right:3px solid black; 44 | border-bottom:3px solid black; 45 | border-left:1px solid black; 46 | border-top:1px solid black; 47 | background-color:#e1e1e1; 48 | padding:3px; 49 | display: inline; 50 | } -------------------------------------------------------------------------------- /crawler/PHPCrawl/libs/Enums/PHPCrawlerAbortReasons.class.php: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/google_code.php: -------------------------------------------------------------------------------- 1 | 5 | --> 6 | 13 | 14 | 27 | 28 | 32 | 48 | 49 |
50 | 51 |
52 | 53 | HTTP protocol 1.0 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const HTTP_1_0 = 1 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerHTTPProtocols/constant_detail_tpl_constant_HTTP_1_1.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerHTTPProtocols::HTTP_1_1 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerHTTPProtocols::HTTP_1_1 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | HTTP protocol 1.1 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const HTTP_1_1 = 2 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerDocumentInfo/method_detail_tpl_method_setLinksFoundArray.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Method Details 8 | 9 | 10 | 24 | 25 | 26 | 27 | 28 | 29 |
30 | 31 |

32 | Method: 33 | PHPCrawlerDocumentInfo::setLinksFoundArray() 34 |

35 | 36 | 39 | 40 |
41 | 42 | 43 | 44 | 45 |
46 | 47 | Workaround-method, copies and converts the array $links_found_url_descriptors to $links_found. 48 |
49 | 50 |
51 | Signature: 52 |

53 | 54 | public setLinksFoundArray() 55 |

56 |
57 | 58 |
59 | Parameters: 60 |

61 | 62 | 63 | 64 |
No parameter-descriptions available.
65 |

66 |
67 | 68 |
69 | Returns: 70 |

71 | 72 | 73 | 74 |
No information
75 |

76 |
77 | 78 |
79 | Description: 80 |

81 | 82 | 83 | - 84 | 85 |

86 |
87 | 88 | 89 | 90 | 91 | 92 |
93 | 94 | 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerMultiProcessModes/constant_detail_tpl_constant_MPMODE_NONE.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerMultiProcessModes::MPMODE_NONE 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerMultiProcessModes::MPMODE_NONE 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Crawler runs in a single process 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const MPMODE_NONE = 0 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerURLDescriptor/method_detail_tpl_method_PHPCrawlerURLDescriptor.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Method Details 8 | 9 | 10 | 24 | 25 | 26 | 27 | 28 | 29 |
30 | 31 |

32 | Method: 33 | PHPCrawlerURLDescriptor::PHPCrawlerURLDescriptor() 34 |

35 | 36 | 39 | 40 |
41 | 42 | 43 | 44 | 45 |
46 | 47 | 48 |
49 | 50 |
51 | Signature: 52 |

53 | 54 | public PHPCrawlerURLDescriptor($url_rebuild, $link_raw = null, $linkcode = null, $linktext = null, $refering_url = null) 55 |

56 |
57 | 58 |
59 | Parameters: 60 |

61 | 62 | 63 | 64 |
No parameter-descriptions available.
65 |

66 |
67 | 68 |
69 | Returns: 70 |

71 | 72 | 73 | 74 |
No information
75 |

76 |
77 | 78 |
79 | Description: 80 |

81 | 82 | 83 | - 84 | 85 |

86 |
87 | 88 | 89 | 90 | 91 | 92 |
93 | 94 | 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerUrlCacheTypes/constant_detail_tpl_constant_URLCACHE_MEMORY.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerUrlCacheTypes::URLCACHE_MEMORY 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerUrlCacheTypes::URLCACHE_MEMORY 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | URLs get cached in local RAM. Best performance. 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const URLCACHE_MEMORY = 1 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/test_interface/setups/Example_Setup.psf: -------------------------------------------------------------------------------- 1 | a:3:{s:5:"setup";a:21:{s:6:"setURL";s:43:"http://www.php.net/manual/en/book.mysql.php";s:7:"setPort";s:0:"";s:13:"setFollowMode";s:1:"2";s:18:"setFollowRedirects";s:1:"1";s:17:"setCookieHandling";s:1:"1";s:27:"setAggressiveLinkExtraction";s:1:"1";s:13:"obeyRobotsTxt";s:1:"0";s:12:"setPageLimit";s:0:"";s:15:"setTrafficLimit";s:0:"";s:19:"setContentSizeLimit";s:0:"";s:20:"setConnectionTimeout";s:0:"";s:16:"setStreamTimeout";s:0:"";s:21:"addReceiveContentType";a:3:{i:0;s:11:"#text/html#";i:1;s:0:"";i:2;s:0:"";}s:14:"addFollowMatch";a:3:{i:0;s:27:"#manual/en/.*mysql[^a-z]# i";i:1;s:0:"";i:2;s:0:"";}s:17:"addNonFollowMatch";a:3:{i:0;s:34:"#(jpg|gif|png|pdf|jpeg|css|js)$# i";i:1;s:0:"";i:2;s:0:"";}s:15:"addLinkPriority";a:3:{i:0;a:2:{i:0;s:0:"";i:1;s:0:"";}i:1;a:2:{i:0;s:0:"";i:1;s:0:"";}i:2;a:2:{i:0;s:0:"";i:1;s:0:"";}}s:24:"addReceiveToTmpFileMatch";a:3:{i:0;s:0:"";i:1;s:0:"";i:2;s:0:"";}s:21:"addLinkExtractionTags";a:9:{i:0;s:0:"";i:1;s:0:"";i:2;s:0:"";i:3;s:0:"";i:4;s:0:"";i:5;s:0:"";i:6;s:0:"";i:7;s:0:"";i:8;s:0:"";}s:22:"addBasicAuthentication";a:3:{i:0;a:3:{i:0;s:0:"";i:1;s:0:"";i:2;s:0:"";}i:1;a:3:{i:0;s:0:"";i:1;s:0:"";i:2;s:0:"";}i:2;a:3:{i:0;s:0:"";i:1;s:0:"";i:2;s:0:"";}}s:19:"setWorkingDirectory";s:0:"";s:18:"setUserAgentString";s:0:"";}s:6:"output";a:3:{s:13:"requested_url";s:1:"1";s:16:"http_status_code";s:1:"1";s:14:"bytes_received";s:1:"1";}s:4:"misc";a:2:{s:7:"comment";s:625:"The example-setup 'spiders' the documentation 2 | of the php-mysql-extension on php.net 3 | (http://php.net/manual/en/book.mysql.php) including all it's subsections and links. 4 | 5 | By defining some rules is it assured that all other links leading to other sites and sections on php.net get ignored. 6 | 7 | Every URL within the mysql-documentation looks like "http://www.php.net/manual/en/function.mysql-affected-rows.php" or "http://www.php.net/manual/en/mysql.setup.php", they all contain "http://www.php.net/manual/en/" followed by "mysql" somewhere. 8 | So we add a corresponding follow-rule "#manual/en/.*mysql[^a-z]# i" to the crawler";s:11:"force_flush";s:1:"1";}} -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerRequestErrors/constant_detail_tpl_constant_ERROR_HOST_UNREACHABLE.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerRequestErrors::ERROR_HOST_UNREACHABLE 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerRequestErrors::ERROR_HOST_UNREACHABLE 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Error-Code: Host not reachable 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const ERROR_HOST_UNREACHABLE = 2 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerRequestErrors/constant_detail_tpl_constant_ERROR_PROXY_UNREACHABLE.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerRequestErrors::ERROR_PROXY_UNREACHABLE 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerRequestErrors::ERROR_PROXY_UNREACHABLE 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Error-Code: Proxy not reachable 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const ERROR_PROXY_UNREACHABLE = 6 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerRequestErrors/constant_detail_tpl_constant_ERROR_SOCKET_TIMEOUT.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerRequestErrors::ERROR_SOCKET_TIMEOUT 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerRequestErrors::ERROR_SOCKET_TIMEOUT 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Error-Code: Socket timed out while reading data. 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const ERROR_SOCKET_TIMEOUT = 5 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerRequestErrors/constant_detail_tpl_constant_ERROR_NO_HTTP_HEADER.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerRequestErrors::ERROR_NO_HTTP_HEADER 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerRequestErrors::ERROR_NO_HTTP_HEADER 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Error-Code: Host didn't respond with a valid HTTP-header. 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const ERROR_NO_HTTP_HEADER = 3 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerUrlCacheTypes/constant_detail_tpl_constant_URLCACHE_SQLITE.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerUrlCacheTypes::URLCACHE_SQLITE 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerUrlCacheTypes::URLCACHE_SQLITE 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | URLs get cached in a SQLite-database-file. Recommended for spidering huge websites. 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const URLCACHE_SQLITE = 2 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerRequestErrors/constant_detail_tpl_constant_ERROR_TMP_FILE_NOT_WRITEABLE.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerRequestErrors::ERROR_TMP_FILE_NOT_WRITEABLE 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerRequestErrors::ERROR_TMP_FILE_NOT_WRITEABLE 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Error-Code: Could not write or create TMP-file. 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const ERROR_TMP_FILE_NOT_WRITEABLE = 4 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerAbortReasons/constant_detail_tpl_constant_ABORTREASON_PASSEDTHROUGH.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerAbortReasons::ABORTREASON_PASSEDTHROUGH 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerAbortReasons::ABORTREASON_PASSEDTHROUGH 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Crawling-process aborted because everything is done/passedthrough. 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const ABORTREASON_PASSEDTHROUGH = 1 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerAbortReasons/constant_detail_tpl_constant_ABORTREASON_USERABORT.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerAbortReasons::ABORTREASON_USERABORT 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerAbortReasons::ABORTREASON_USERABORT 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Crawling-process aborted because the handleDocumentInfo-method returned a negative value 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const ABORTREASON_USERABORT = 4 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerRequestErrors/constant_detail_tpl_constant_ERROR_SSL_NOT_SUPPORTED.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerRequestErrors::ERROR_SSL_NOT_SUPPORTED 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerRequestErrors::ERROR_SSL_NOT_SUPPORTED 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Error-Code: SSL/HTTPS not supported (probably openssl-extension not installed) 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const ERROR_SSL_NOT_SUPPORTED = 1 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerAbortReasons/constant_detail_tpl_constant_ABORTREASON_FILELIMIT_REACHED.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerAbortReasons::ABORTREASON_FILELIMIT_REACHED 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerAbortReasons::ABORTREASON_FILELIMIT_REACHED 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Crawling-process aborted because the filelimit set by user was reached. 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const ABORTREASON_FILELIMIT_REACHED = 3 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerAbortReasons/constant_detail_tpl_constant_ABORTREASON_TRAFFICLIMIT_REACHED.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerAbortReasons::ABORTREASON_TRAFFICLIMIT_REACHED 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerAbortReasons::ABORTREASON_TRAFFICLIMIT_REACHED 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Crawling-process aborted because the traffic-limit set by user was reached. 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const ABORTREASON_TRAFFICLIMIT_REACHED = 2 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerMultiProcessModes/constant_detail_tpl_constant_MPMODE_PARENT_EXECUTES_USERCODE.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerMultiProcessModes::MPMODE_PARENT_EXECUTES_USERCODE 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerMultiProcessModes::MPMODE_PARENT_EXECUTES_USERCODE 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Crawler runs in multiprocess-mode, usercode is executed by parent-process only. 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const MPMODE_PARENT_EXECUTES_USERCODE = 1 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerMultiProcessModes/constant_detail_tpl_constant_MPMODE_CHILDS_EXECUTES_USERCODE.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerMultiProcessModes::MPMODE_CHILDS_EXECUTES_USERCODE 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerMultiProcessModes::MPMODE_CHILDS_EXECUTES_USERCODE 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Crawler runs in multiprocess-mode, usercode is executed by child-processes directly. 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const MPMODE_CHILDS_EXECUTES_USERCODE = 2 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawler/property_detail_tpl_property_class_version.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawler::class_version 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawler::class_version 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $class_version = "0.82" 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
No information
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerCookieDescriptor/property_detail_tpl_property_name.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerCookieDescriptor::name 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerCookieDescriptor::name 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | Cookie-name 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $name 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerCookieDescriptor/property_detail_tpl_property_value.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerCookieDescriptor::value 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerCookieDescriptor::value 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | Cookie-value 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $value 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerCookieDescriptor/property_detail_tpl_property_path.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerCookieDescriptor::path 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerCookieDescriptor::path 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | Cookie-path 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $path = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerCookieDescriptor/property_detail_tpl_property_domain.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerCookieDescriptor::domain 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerCookieDescriptor::domain 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | Cookie-domain 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $domain = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_port.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::port 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::port 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The port of the URL the request was send to, e.g. 80 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $port 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
int 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerResponseHeader/property_detail_tpl_property_content_type.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerResponseHeader::content_type 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerResponseHeader::content_type 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The content-type 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $content_type 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerURLDescriptor/property_detail_tpl_property_linktext.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerURLDescriptor::linktext 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerURLDescriptor::linktext 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The linktext or html-code the link to this URL was layed over. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $linktext = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
No information
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_file.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::file 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::file 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The name of the requested page or file, e.g. "page.html". 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $file = "" 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_path.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::path 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::path 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The path in the URL of the requested page or file, e.g. "/page/". 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $path = "" 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_source.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::source 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::source 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | Same as "content", the content of the requested document. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $source = "" 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerResponseHeader/property_detail_tpl_property_http_status_code.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerResponseHeader::http_status_code 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerResponseHeader::http_status_code 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The HTTP-statuscode 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $http_status_code 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
int 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerCookieDescriptor/property_detail_tpl_property_source_url.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerCookieDescriptor::source_url 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerCookieDescriptor::source_url 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The URL the cookie was send from 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $source_url = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_query.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::query 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::query 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The query-part of the URL of the requested page or file, e.g. "?x=y". 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $query = "" 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerResponseHeader/property_detail_tpl_property_header_raw.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerResponseHeader::header_raw 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerResponseHeader::header_raw 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The raw HTTP-header as it was send by the server 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $header_raw 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerURLDescriptor/property_detail_tpl_property_link_raw.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerURLDescriptor::link_raw 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerURLDescriptor::link_raw 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The raw link to this URL as it was found in the HTML-source, i.e. "../dunno/index.php" 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $link_raw = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
No information
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerCookieDescriptor/property_detail_tpl_property_expires.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerCookieDescriptor::expires 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerCookieDescriptor::expires 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | Expire-string, e.g. "Sat, 08-Aug-2020 23:59:08 GMT" 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $expires = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_host.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::host 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::host 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The host-part of the URL of the requested page or file, e.g. "www.foo.com". 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $host = "" 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerResponseHeader/property_detail_tpl_property_source_url.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerResponseHeader::source_url 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerResponseHeader::source_url 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The URL of the website the header was recevied from. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $source_url 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_header.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::header 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::header 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The complete HTTP-header the webserver responded with this page or file. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $header = "" 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_protocol.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::protocol 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::protocol 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The protocol-part of the URL of the page or file, e.g. "http://" 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $protocol = "" 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerResponseHeader/property_detail_tpl_property_content_length.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerResponseHeader::content_length 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerResponseHeader::content_length 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The content-length as stated in the header. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $content_length 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
int 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerURLDescriptor/property_detail_tpl_property_url_rebuild.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerURLDescriptor::url_rebuild 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerURLDescriptor::url_rebuild 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The complete, full qualified and normalized URL 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $url_rebuild = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerCookieDescriptor/property_detail_tpl_property_expire_timestamp.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerCookieDescriptor::expire_timestamp 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerCookieDescriptor::expire_timestamp 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | Expire-date as unix-timestamp 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $expire_timestamp = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
int 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerCookieDescriptor/property_detail_tpl_property_source_domain.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerCookieDescriptor::source_domain 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerCookieDescriptor::source_domain 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The domain the cookie was send from 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $source_domain = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_url.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::url 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::url 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The complete, full qualified URL of the page or file, e.g. "http://www.foo.com/bar/page.html?x=y". 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $url = "" 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerProcessReport/property_detail_tpl_property_files_received.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerProcessReport::files_received 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerProcessReport::files_received 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The total number of documents the crawler received. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $files_received = 0 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
int 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerResponseHeader/property_detail_tpl_property_content_encoding.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerResponseHeader::content_encoding 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerResponseHeader::content_encoding 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The content-encoding as stated in the header. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $content_encoding 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerURLDescriptor/property_detail_tpl_property_linkcode.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerURLDescriptor::linkcode 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerURLDescriptor::linkcode 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The html-codepart that contained the link to this URL, i.e. "<a href="../foo.html">LINKTEXT</a>" 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $linkcode = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
No information
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerProcessReport/property_detail_tpl_property_bytes_received.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerProcessReport::bytes_received 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerProcessReport::bytes_received 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The total number of bytes the crawler received alltogether. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $bytes_received = 0 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
int 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerResponseHeader/property_detail_tpl_property_transfer_encoding.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerResponseHeader::transfer_encoding 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerResponseHeader::transfer_encoding 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The transfer-encoding as stated in the header. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $transfer_encoding 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_referer_url.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::referer_url 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::referer_url 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The complete URL of the page that contained the link to this document. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $referer_url = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerProcessReport/property_detail_tpl_property_links_followed.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerProcessReport::links_followed 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerProcessReport::links_followed 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The total number of links/URLs the crawler found and followed. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $links_followed = 0 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
int 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerURLDescriptor/property_detail_tpl_property_refering_url.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerURLDescriptor::refering_url 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerURLDescriptor::refering_url 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The URL of the page that contained the link to the URL described here. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $refering_url 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_header_send.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::header_send 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::header_send 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The complete HTTP-request-header the crawler sent to the server (debugging info). 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $header_send = "" 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerProcessReport/property_detail_tpl_property_data_throughput.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerProcessReport::data_throughput 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerProcessReport::data_throughput 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The total data-throughput of the crawler 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $data_throughput = 0 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
float  The rate in bytes/second
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerProcessReport/property_detail_tpl_property_file_limit_reached.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerProcessReport::file_limit_reached 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerProcessReport::file_limit_reached 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | Will be TRUE if the page/file-limit was reached. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $file_limit_reached = false 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
bool 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerURLDescriptor/property_detail_tpl_property_is_redirect_url.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerURLDescriptor::is_redirect_url 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerURLDescriptor::is_redirect_url 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | Flag indicating whether this URL was target of an HTTP-redirect. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $is_redirect_url = false 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerCookieDescriptor/property_detail_tpl_property_cookie_send_time.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerCookieDescriptor::cookie_send_time 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerCookieDescriptor::cookie_send_time 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The time the cookie was send 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $cookie_send_time = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
float  time in secs and microseconds
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_content_type.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::content_type 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::content_type 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The content-type of the page or file, e.g. "text/html" or "image/gif". 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $content_type = "" 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string  The content-type
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_bytes_received.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::bytes_received 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::bytes_received 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The number of bytes the crawler received of the content of the document. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $bytes_received = 0 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
int  Received bytes
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerProcessReport/property_detail_tpl_property_process_runtime.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerProcessReport::process_runtime 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerProcessReport::process_runtime 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The total time the crawling-process was running in seconds. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $process_runtime = 0 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
float  Proess-runtime in seconds.
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_error_occured.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::error_occured 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::error_occured 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | Indicates whether an error occured while requesting/receiving the document. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $error_occured = false 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
bool  TRUE if an error occured.
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_header_bytes_received.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::header_bytes_received 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::header_bytes_received 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The number of bytes the crawler received of the header of the document. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $header_bytes_received = 0 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
int  Received bytes
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_http_status_code.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::http_status_code 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::http_status_code 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The HTTP-statuscode the webserver responded for the request, e.g. 200 (OK) or 404 (file not found). 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $http_status_code = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
int 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_refering_link_raw.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::refering_link_raw 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::refering_link_raw 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | Contains the raw link as it was found in the content of the refering URL. (E.g. "../foo.html") 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $refering_link_raw = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerResponseHeader/method_detail_tpl_method_PHPCrawlerResponseHeader.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Method Details 8 | 9 | 10 | 24 | 25 | 26 | 27 | 28 | 29 |
30 | 31 |

32 | Method: 33 | PHPCrawlerResponseHeader::PHPCrawlerResponseHeader() 34 |

35 | 36 | 39 | 40 |
41 | 42 | 43 | 44 | 45 |
46 | 47 | Initiates an new PHPCrawlerResponseHeader. 48 |
49 | 50 |
51 | Signature: 52 |

53 | 54 | public PHPCrawlerResponseHeader($header_string, $source_url) 55 |

56 |
57 | 58 |
59 | Parameters: 60 |

61 | 62 | 63 | 64 |
$header_string string A complete response-header as it was send by the server
$source_url string The URL of the website the header was recevied from.
65 |

66 |
67 | 68 |
69 | Returns: 70 |

71 | 72 | 73 | 74 |
No information
75 |

76 |
77 | 78 |
79 | Description: 80 |

81 | 82 | 83 | - 84 | 85 |

86 |
87 | 88 | 89 | 90 | 91 | 92 |
93 | 94 | 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /crawler/PHPCrawl/documentation/classreferences/PHPCrawlerProcessReport/property_detail_tpl_property_traffic_limit_reached.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerProcessReport::traffic_limit_reached 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerProcessReport::traffic_limit_reached 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | Will be TRUE if the crawling-process stopped becaus the traffic-limit was reached. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $traffic_limit_reached = false 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
bool 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | --------------------------------------------------------------------------------