├── .gitignore ├── CHANGELOG.txt ├── LICENSE.txt ├── README.md ├── classes └── phpcrawler.class.php ├── composer.json ├── documentation ├── about.html ├── classreferences │ ├── PHPCrawler │ │ ├── collapse.gif │ │ ├── expand.gif │ │ ├── google_code.php │ │ ├── index.html │ │ ├── method_detail_tpl_method_PHPCrawler.htm │ │ ├── method_detail_tpl_method_addBasicAuthentication.htm │ │ ├── method_detail_tpl_method_addContentTypeReceiveRule.htm │ │ ├── method_detail_tpl_method_addFollowMatch.htm │ │ ├── method_detail_tpl_method_addLinkExtractionTags.htm │ │ ├── method_detail_tpl_method_addLinkPriority.htm │ │ ├── method_detail_tpl_method_addLinkSearchContentType.htm │ │ ├── method_detail_tpl_method_addNonFollowMatch.htm │ │ ├── method_detail_tpl_method_addPostData.htm │ │ ├── method_detail_tpl_method_addReceiveContentType.htm │ │ ├── method_detail_tpl_method_addReceiveToMemoryMatch.htm │ │ ├── method_detail_tpl_method_addReceiveToTmpFileMatch.htm │ │ ├── method_detail_tpl_method_addStreamToFileContentType.htm │ │ ├── method_detail_tpl_method_addURLFilterRule.htm │ │ ├── method_detail_tpl_method_addURLFollowRule.htm │ │ ├── method_detail_tpl_method_disableExtendedLinkInfo.htm │ │ ├── method_detail_tpl_method_enableAggressiveLinkSearch.htm │ │ ├── method_detail_tpl_method_enableCookieHandling.htm │ │ ├── method_detail_tpl_method_enableResumption.htm │ │ ├── method_detail_tpl_method_excludeLinkSearchDocumentSections.htm │ │ ├── method_detail_tpl_method_getCrawlerId.htm │ │ ├── method_detail_tpl_method_getProcessReport.htm │ │ ├── method_detail_tpl_method_getReport.htm │ │ ├── method_detail_tpl_method_go.htm │ │ ├── method_detail_tpl_method_goMultiProcessed.htm │ │ ├── method_detail_tpl_method_handleDocumentInfo.htm │ │ ├── method_detail_tpl_method_handleHeaderInfo.htm │ │ ├── method_detail_tpl_method_handlePageData.htm │ │ ├── method_detail_tpl_method_initChildProcess.htm │ │ ├── method_detail_tpl_method_obeyNoFollowTags.htm │ │ ├── method_detail_tpl_method_obeyRobotsTxt.htm │ │ ├── method_detail_tpl_method_requestGzipContent.htm │ │ ├── method_detail_tpl_method_resume.htm │ │ ├── method_detail_tpl_method_setAggressiveLinkExtraction.htm │ │ ├── method_detail_tpl_method_setConnectionTimeout.htm │ │ ├── method_detail_tpl_method_setContentSizeLimit.htm │ │ ├── method_detail_tpl_method_setCookieHandling.htm │ │ ├── method_detail_tpl_method_setCrawlingDepthLimit.htm │ │ ├── method_detail_tpl_method_setFollowMode.htm │ │ ├── method_detail_tpl_method_setFollowRedirects.htm │ │ ├── method_detail_tpl_method_setFollowRedirectsTillContent.htm │ │ ├── method_detail_tpl_method_setHTTPProtocolVersion.htm │ │ ├── method_detail_tpl_method_setLinkExtractionTags.htm │ │ ├── method_detail_tpl_method_setPageLimit.htm │ │ ├── method_detail_tpl_method_setPort.htm │ │ ├── method_detail_tpl_method_setProxy.htm │ │ ├── method_detail_tpl_method_setRequestDelay.htm │ │ ├── method_detail_tpl_method_setRequestLimit.htm │ │ ├── method_detail_tpl_method_setStreamTimeout.htm │ │ ├── method_detail_tpl_method_setTmpFile.htm │ │ ├── method_detail_tpl_method_setTrafficLimit.htm │ │ ├── method_detail_tpl_method_setURL.htm │ │ ├── method_detail_tpl_method_setUrlCacheType.htm │ │ ├── method_detail_tpl_method_setUserAgentString.htm │ │ ├── method_detail_tpl_method_setWorkingDirectory.htm │ │ ├── overview.html │ │ ├── property_detail_tpl_property_class_version.htm │ │ └── style.css │ ├── PHPCrawlerAbortReasons │ │ ├── collapse.gif │ │ ├── constant_detail_tpl_constant_ABORTREASON_FILELIMIT_REACHED.htm │ │ ├── constant_detail_tpl_constant_ABORTREASON_PASSEDTHROUGH.htm │ │ ├── constant_detail_tpl_constant_ABORTREASON_TRAFFICLIMIT_REACHED.htm │ │ ├── constant_detail_tpl_constant_ABORTREASON_USERABORT.htm │ │ ├── expand.gif │ │ ├── google_code.php │ │ ├── index.html │ │ ├── overview.html │ │ └── style.css │ ├── PHPCrawlerCookieDescriptor │ │ ├── collapse.gif │ │ ├── expand.gif │ │ ├── google_code.php │ │ ├── index.html │ │ ├── method_detail_tpl_method_PHPCrawlerCookieDescriptor.htm │ │ ├── method_detail_tpl_method_getFromHeaderLine.htm │ │ ├── overview.html │ │ ├── property_detail_tpl_property_cookie_send_time.htm │ │ ├── property_detail_tpl_property_domain.htm │ │ ├── property_detail_tpl_property_expire_timestamp.htm │ │ ├── property_detail_tpl_property_expires.htm │ │ ├── property_detail_tpl_property_name.htm │ │ ├── property_detail_tpl_property_path.htm │ │ ├── property_detail_tpl_property_source_domain.htm │ │ ├── property_detail_tpl_property_source_url.htm │ │ ├── property_detail_tpl_property_value.htm │ │ └── style.css │ ├── PHPCrawlerDocumentInfo │ │ ├── collapse.gif │ │ ├── expand.gif │ │ ├── google_code.php │ │ ├── index.html │ │ ├── method_detail_tpl_method_setLinksFoundArray.htm │ │ ├── method_detail_tpl_method_toArray.htm │ │ ├── overview.html │ │ ├── property_detail_tpl_property_benchmarks.htm │ │ ├── property_detail_tpl_property_bytes_received.htm │ │ ├── property_detail_tpl_property_content.htm │ │ ├── property_detail_tpl_property_content_tmp_file.htm │ │ ├── property_detail_tpl_property_content_type.htm │ │ ├── property_detail_tpl_property_cookies.htm │ │ ├── property_detail_tpl_property_data_transfer_rate.htm │ │ ├── property_detail_tpl_property_data_transfer_time.htm │ │ ├── property_detail_tpl_property_error_code.htm │ │ ├── property_detail_tpl_property_error_occured.htm │ │ ├── property_detail_tpl_property_error_string.htm │ │ ├── property_detail_tpl_property_file.htm │ │ ├── property_detail_tpl_property_header.htm │ │ ├── property_detail_tpl_property_header_bytes_received.htm │ │ ├── property_detail_tpl_property_header_send.htm │ │ ├── property_detail_tpl_property_host.htm │ │ ├── property_detail_tpl_property_http_status_code.htm │ │ ├── property_detail_tpl_property_links_found.htm │ │ ├── property_detail_tpl_property_links_found_url_descriptors.htm │ │ ├── property_detail_tpl_property_meta_attributes.htm │ │ ├── property_detail_tpl_property_path.htm │ │ ├── property_detail_tpl_property_port.htm │ │ ├── property_detail_tpl_property_protocol.htm │ │ ├── property_detail_tpl_property_query.htm │ │ ├── property_detail_tpl_property_received.htm │ │ ├── property_detail_tpl_property_received_completely.htm │ │ ├── property_detail_tpl_property_received_completly.htm │ │ ├── property_detail_tpl_property_received_to_file.htm │ │ ├── property_detail_tpl_property_received_to_memory.htm │ │ ├── property_detail_tpl_property_referer_url.htm │ │ ├── property_detail_tpl_property_refering_link_raw.htm │ │ ├── property_detail_tpl_property_refering_linkcode.htm │ │ ├── property_detail_tpl_property_refering_linktext.htm │ │ ├── property_detail_tpl_property_responseHeader.htm │ │ ├── property_detail_tpl_property_server_connect_time.htm │ │ ├── property_detail_tpl_property_server_response_time.htm │ │ ├── property_detail_tpl_property_source.htm │ │ ├── property_detail_tpl_property_traffic_limit_reached.htm │ │ ├── property_detail_tpl_property_unbuffered_bytes_read.htm │ │ ├── property_detail_tpl_property_url.htm │ │ ├── property_detail_tpl_property_url_link_depth.htm │ │ └── style.css │ ├── PHPCrawlerHTTPProtocols │ │ ├── collapse.gif │ │ ├── constant_detail_tpl_constant_HTTP_1_0.htm │ │ ├── constant_detail_tpl_constant_HTTP_1_1.htm │ │ ├── expand.gif │ │ ├── google_code.php │ │ ├── index.html │ │ ├── overview.html │ │ └── style.css │ ├── PHPCrawlerLinkSearchDocumentSections │ │ ├── collapse.gif │ │ ├── constant_detail_tpl_constant_ALL_SPECIAL_SECTIONS.htm │ │ ├── constant_detail_tpl_constant_HTML_COMMENT_SECTIONS.htm │ │ ├── constant_detail_tpl_constant_JS_TRIGGERING_SECTIONS.htm │ │ ├── constant_detail_tpl_constant_SCRIPT_SECTIONS.htm │ │ ├── expand.gif │ │ ├── google_code.php │ │ ├── index.html │ │ ├── overview.html │ │ └── style.css │ ├── PHPCrawlerMultiProcessModes │ │ ├── collapse.gif │ │ ├── constant_detail_tpl_constant_MPMODE_CHILDS_EXECUTES_USERCODE.htm │ │ ├── constant_detail_tpl_constant_MPMODE_NONE.htm │ │ ├── constant_detail_tpl_constant_MPMODE_PARENT_EXECUTES_USERCODE.htm │ │ ├── expand.gif │ │ ├── google_code.php │ │ ├── index.html │ │ ├── overview.html │ │ └── style.css │ ├── PHPCrawlerProcessReport │ │ ├── collapse.gif │ │ ├── expand.gif │ │ ├── google_code.php │ │ ├── index.html │ │ ├── method_detail_tpl_method_toArray.htm │ │ ├── overview.html │ │ ├── property_detail_tpl_property_abort_reason.htm │ │ ├── property_detail_tpl_property_avg_proc_data_transfer_rate.htm │ │ ├── property_detail_tpl_property_avg_server_connect_time.htm │ │ ├── property_detail_tpl_property_avg_server_response_time.htm │ │ ├── property_detail_tpl_property_bytes_received.htm │ │ ├── property_detail_tpl_property_data_throughput.htm │ │ ├── property_detail_tpl_property_file_limit_reached.htm │ │ ├── property_detail_tpl_property_files_received.htm │ │ ├── property_detail_tpl_property_links_followed.htm │ │ ├── property_detail_tpl_property_memory_peak_usage.htm │ │ ├── property_detail_tpl_property_process_runtime.htm │ │ ├── property_detail_tpl_property_traffic_limit_reached.htm │ │ ├── property_detail_tpl_property_user_abort.htm │ │ └── style.css │ ├── PHPCrawlerRequestErrors │ │ ├── collapse.gif │ │ ├── constant_detail_tpl_constant_ERROR_HOST_UNREACHABLE.htm │ │ ├── constant_detail_tpl_constant_ERROR_NO_HTTP_HEADER.htm │ │ ├── constant_detail_tpl_constant_ERROR_PROXY_UNREACHABLE.htm │ │ ├── constant_detail_tpl_constant_ERROR_SOCKET_TIMEOUT.htm │ │ ├── constant_detail_tpl_constant_ERROR_SSL_NOT_SUPPORTED.htm │ │ ├── constant_detail_tpl_constant_ERROR_TMP_FILE_NOT_WRITEABLE.htm │ │ ├── expand.gif │ │ ├── google_code.php │ │ ├── index.html │ │ ├── overview.html │ │ └── style.css │ ├── PHPCrawlerResponseHeader │ │ ├── collapse.gif │ │ ├── expand.gif │ │ ├── google_code.php │ │ ├── index.html │ │ ├── method_detail_tpl_method_PHPCrawlerResponseHeader.htm │ │ ├── overview.html │ │ ├── property_detail_tpl_property_content_encoding.htm │ │ ├── property_detail_tpl_property_content_length.htm │ │ ├── property_detail_tpl_property_content_type.htm │ │ ├── property_detail_tpl_property_cookies.htm │ │ ├── property_detail_tpl_property_header_raw.htm │ │ ├── property_detail_tpl_property_http_status_code.htm │ │ ├── property_detail_tpl_property_source_url.htm │ │ ├── property_detail_tpl_property_transfer_encoding.htm │ │ └── style.css │ ├── PHPCrawlerURLDescriptor │ │ ├── collapse.gif │ │ ├── expand.gif │ │ ├── google_code.php │ │ ├── index.html │ │ ├── method_detail_tpl_method_PHPCrawlerURLDescriptor.htm │ │ ├── overview.html │ │ ├── property_detail_tpl_property_is_redirect_url.htm │ │ ├── property_detail_tpl_property_link_raw.htm │ │ ├── property_detail_tpl_property_linkcode.htm │ │ ├── property_detail_tpl_property_linktext.htm │ │ ├── property_detail_tpl_property_refering_url.htm │ │ ├── property_detail_tpl_property_url_link_depth.htm │ │ ├── property_detail_tpl_property_url_rebuild.htm │ │ └── style.css │ ├── PHPCrawlerUrlCacheTypes │ │ ├── collapse.gif │ │ ├── constant_detail_tpl_constant_URLCACHE_MEMORY.htm │ │ ├── constant_detail_tpl_constant_URLCACHE_SQLITE.htm │ │ ├── expand.gif │ │ ├── google_code.php │ │ ├── index.html │ │ ├── overview.html │ │ └── style.css │ ├── collapse.gif │ ├── expand.gif │ ├── google_code.php │ ├── index.html │ ├── print_googlead_div.js │ ├── project_overview.htm │ └── style.css ├── example.html ├── example2.html ├── faq.html ├── google_code.php ├── index.html ├── menu.html ├── multiprocesses.html ├── multiprocessing_modes.html ├── quickstart.html ├── requirements.html ├── resume_aborted_processes.html ├── spidering_huge_websites.html ├── style.css ├── testinterface.html ├── testinterface.jpg └── versionhistory.html ├── example.php ├── libs ├── CookieCache │ ├── PHPCrawlerCookieCacheBase.class.php │ ├── PHPCrawlerMemoryCookieCache.class.php │ └── PHPCrawlerSQLiteCookieCache.class.php ├── Enums │ ├── PHPCrawlerAbortReasons.class.php │ ├── PHPCrawlerHTTPProtocols.class.php │ ├── PHPCrawlerLinkSearchDocumentSections.class.php │ ├── PHPCrawlerMultiProcessModes.class.php │ ├── PHPCrawlerRequestErrors.class.php │ └── PHPCrawlerUrlCacheTypes.class.php ├── PHPCrawler.class.php ├── PHPCrawlerBenchmark.class.php ├── PHPCrawlerCookieDescriptor.class.php ├── PHPCrawlerDNSCache.class.php ├── PHPCrawlerDocumentInfo.class.php ├── PHPCrawlerHTTPRequest.class.php ├── PHPCrawlerLinkFinder.class.php ├── PHPCrawlerProcessReport.class.php ├── PHPCrawlerResponseHeader.class.php ├── PHPCrawlerRobotsTxtParser.class.php ├── PHPCrawlerStatus.class.php ├── PHPCrawlerURLDescriptor.class.php ├── PHPCrawlerURLFilter.class.php ├── PHPCrawlerUrlPartsDescriptor.class.php ├── PHPCrawlerUserSendDataCache.class.php ├── ProcessCommunication │ ├── PHPCrawlerDocumentInfoQueue.class.php │ ├── PHPCrawlerProcessHandler.class.php │ └── PHPCrawlerStatusHandler.class.php ├── UrlCache │ ├── PHPCrawlerMemoryURLCache.class.php │ ├── PHPCrawlerSQLiteURLCache.class.php │ └── PHPCrawlerURLCacheBase.class.php └── Utils │ ├── PHPCrawlerEncodingUtils.class.php │ └── PHPCrawlerUtils.class.php ├── multiprocessing_example.php ├── resumable_example.php └── test_interface ├── index.php ├── info.gif ├── js.js ├── output.php ├── phpcrawl_testinterface.conf.php ├── phpcrawl_testinterface.func.php ├── phpcrawlsetup.class.php ├── setups └── Example_Setup.psf └── style.css /.gitignore: -------------------------------------------------------------------------------- 1 | libs/PHPCrawlerUtils.class.php 2 | vendor/ 3 | -------------------------------------------------------------------------------- /CHANGELOG.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmerian/phpcrawl/1c5e07ff33cf079c69191eb9540a3ced64d392dc/CHANGELOG.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # phpcrawl 2 | Copy of http://phpcrawl.cuab.de/ for using with composer 3 | 4 | [![Latest Stable Version](https://poser.pugx.org/mmerian/phpcrawl/v/stable)](https://packagist.org/packages/mmerian/phpcrawl) [![Total Downloads](https://poser.pugx.org/mmerian/phpcrawl/downloads)](https://packagist.org/packages/mmerian/phpcrawl) [![Latest Unstable Version](https://poser.pugx.org/mmerian/phpcrawl/v/unstable)](https://packagist.org/packages/mmerian/phpcrawl) [![License](https://poser.pugx.org/mmerian/phpcrawl/license)](https://packagist.org/packages/mmerian/phpcrawl) 5 | -------------------------------------------------------------------------------- /classes/phpcrawler.class.php: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mmerian/phpcrawl", 3 | "description": "PHPCrawl is a webcrawler/webspider-library written in PHP. It supports filters, limiters, cookie-handling, robots.txt-handling, multiprocessing and much more.", 4 | "license": "GPL-v2", 5 | "authors": [ 6 | { 7 | "name": "Uwe Hunfeld", 8 | "email": "phpcrawl@cuab.de" 9 | } 10 | ], 11 | "autoload": { 12 | "classmap": [ 13 | "libs/Utils/PHPCrawlerUtils.class.php", 14 | "libs" 15 | ] 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawler/collapse.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmerian/phpcrawl/1c5e07ff33cf079c69191eb9540a3ced64d392dc/documentation/classreferences/PHPCrawler/collapse.gif -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawler/expand.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmerian/phpcrawl/1c5e07ff33cf079c69191eb9540a3ced64d392dc/documentation/classreferences/PHPCrawler/expand.gif -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawler/google_code.php: -------------------------------------------------------------------------------- 1 | 5 | --> 6 | 7 |
8 | 16 | 19 |

20 | 21 | 51 | 52 |
53 | 54 |
55 | 56 | 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $class_version = "0.83rc1" 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
No information
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerAbortReasons/collapse.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmerian/phpcrawl/1c5e07ff33cf079c69191eb9540a3ced64d392dc/documentation/classreferences/PHPCrawlerAbortReasons/collapse.gif -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerAbortReasons/constant_detail_tpl_constant_ABORTREASON_FILELIMIT_REACHED.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerAbortReasons::ABORTREASON_FILELIMIT_REACHED 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerAbortReasons::ABORTREASON_FILELIMIT_REACHED 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Crawling-process aborted because the filelimit set by user was reached. 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const ABORTREASON_FILELIMIT_REACHED = 3 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerAbortReasons/constant_detail_tpl_constant_ABORTREASON_PASSEDTHROUGH.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerAbortReasons::ABORTREASON_PASSEDTHROUGH 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerAbortReasons::ABORTREASON_PASSEDTHROUGH 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Crawling-process aborted because everything is done/passedthrough. 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const ABORTREASON_PASSEDTHROUGH = 1 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerAbortReasons/constant_detail_tpl_constant_ABORTREASON_TRAFFICLIMIT_REACHED.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerAbortReasons::ABORTREASON_TRAFFICLIMIT_REACHED 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerAbortReasons::ABORTREASON_TRAFFICLIMIT_REACHED 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Crawling-process aborted because the traffic-limit set by user was reached. 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const ABORTREASON_TRAFFICLIMIT_REACHED = 2 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerAbortReasons/constant_detail_tpl_constant_ABORTREASON_USERABORT.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerAbortReasons::ABORTREASON_USERABORT 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerAbortReasons::ABORTREASON_USERABORT 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Crawling-process aborted because the handleDocumentInfo-method returned a negative value 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const ABORTREASON_USERABORT = 4 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerAbortReasons/expand.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmerian/phpcrawl/1c5e07ff33cf079c69191eb9540a3ced64d392dc/documentation/classreferences/PHPCrawlerAbortReasons/expand.gif -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerAbortReasons/google_code.php: -------------------------------------------------------------------------------- 1 | 5 | --> 6 | 7 |
8 | 16 | 19 |

20 | 21 | 6 | 7 |
8 | 16 | 19 |

20 | 21 | 51 | 52 |
53 | 54 |
55 | 56 | The time the cookie was send 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $cookie_send_time = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
float  time in secs and microseconds
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerCookieDescriptor/property_detail_tpl_property_domain.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerCookieDescriptor::domain 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerCookieDescriptor::domain 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | Cookie-domain 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $domain = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerCookieDescriptor/property_detail_tpl_property_expire_timestamp.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerCookieDescriptor::expire_timestamp 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerCookieDescriptor::expire_timestamp 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | Expire-date as unix-timestamp 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $expire_timestamp = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
int 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerCookieDescriptor/property_detail_tpl_property_expires.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerCookieDescriptor::expires 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerCookieDescriptor::expires 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | Expire-string, e.g. "Sat, 08-Aug-2020 23:59:08 GMT" 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $expires = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerCookieDescriptor/property_detail_tpl_property_name.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerCookieDescriptor::name 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerCookieDescriptor::name 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | Cookie-name 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $name 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerCookieDescriptor/property_detail_tpl_property_path.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerCookieDescriptor::path 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerCookieDescriptor::path 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | Cookie-path 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $path = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerCookieDescriptor/property_detail_tpl_property_source_domain.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerCookieDescriptor::source_domain 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerCookieDescriptor::source_domain 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The domain the cookie was send from 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $source_domain = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerCookieDescriptor/property_detail_tpl_property_source_url.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerCookieDescriptor::source_url 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerCookieDescriptor::source_url 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The URL the cookie was send from 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $source_url = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerCookieDescriptor/property_detail_tpl_property_value.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerCookieDescriptor::value 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerCookieDescriptor::value 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | Cookie-value 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $value 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerDocumentInfo/collapse.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmerian/phpcrawl/1c5e07ff33cf079c69191eb9540a3ced64d392dc/documentation/classreferences/PHPCrawlerDocumentInfo/collapse.gif -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerDocumentInfo/expand.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmerian/phpcrawl/1c5e07ff33cf079c69191eb9540a3ced64d392dc/documentation/classreferences/PHPCrawlerDocumentInfo/expand.gif -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerDocumentInfo/google_code.php: -------------------------------------------------------------------------------- 1 | 5 | --> 6 | 7 |
8 | 16 | 19 |

20 | 21 | 51 | 52 |
53 | 54 |
55 | 56 | The number of bytes the crawler received of the content of the document. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $bytes_received = 0 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
int  Received bytes
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_content_type.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::content_type 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::content_type 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The content-type of the page or file, e.g. "text/html" or "image/gif". 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $content_type = "" 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string  The content-type
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_error_occured.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::error_occured 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::error_occured 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | Indicates whether an error occured while requesting/receiving the document. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $error_occured = false 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
bool  TRUE if an error occured.
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_file.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::file 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::file 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The name of the requested page or file, e.g. "page.html". 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $file = "" 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_header.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::header 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::header 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The complete HTTP-header the webserver responded with this page or file. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $header = "" 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_header_bytes_received.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::header_bytes_received 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::header_bytes_received 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The number of bytes the crawler received of the header of the document. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $header_bytes_received = 0 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
int  Received bytes
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_header_send.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::header_send 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::header_send 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The complete HTTP-request-header the crawler sent to the server (debugging info). 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $header_send = "" 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_host.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::host 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::host 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The host-part of the URL of the requested page or file, e.g. "www.foo.com". 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $host = "" 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_http_status_code.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::http_status_code 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::http_status_code 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The HTTP-statuscode the webserver responded for the request, e.g. 200 (OK) or 404 (file not found). 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $http_status_code = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
int 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_path.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::path 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::path 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The path in the URL of the requested page or file, e.g. "/page/". 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $path = "" 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_port.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::port 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::port 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The port of the URL the request was send to, e.g. 80 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $port 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
int 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_protocol.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::protocol 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::protocol 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The protocol-part of the URL of the page or file, e.g. "http://" 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $protocol = "" 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_query.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::query 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::query 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The query-part of the URL of the requested page or file, e.g. "?x=y". 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $query = "" 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_referer_url.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::referer_url 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::referer_url 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The complete URL of the page that contained the link to this document. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $referer_url = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_refering_link_raw.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::refering_link_raw 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::refering_link_raw 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | Contains the raw link as it was found in the content of the refering URL. (E.g. "../foo.html") 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $refering_link_raw = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_source.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::source 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::source 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | Same as "content", the content of the requested document. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $source = "" 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_url.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::url 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::url 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The complete, full qualified URL of the page or file, e.g. "http://www.foo.com/bar/page.html?x=y". 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $url = "" 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerDocumentInfo/property_detail_tpl_property_url_link_depth.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerDocumentInfo::url_link_depth 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerDocumentInfo::url_link_depth 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The linking-depth of the URL related to the entry-URL of the crawling-process. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $url_link_depth = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
int 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerHTTPProtocols/collapse.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmerian/phpcrawl/1c5e07ff33cf079c69191eb9540a3ced64d392dc/documentation/classreferences/PHPCrawlerHTTPProtocols/collapse.gif -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerHTTPProtocols/constant_detail_tpl_constant_HTTP_1_0.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerHTTPProtocols::HTTP_1_0 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerHTTPProtocols::HTTP_1_0 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | HTTP protocol 1.0 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const HTTP_1_0 = 1 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerHTTPProtocols/constant_detail_tpl_constant_HTTP_1_1.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerHTTPProtocols::HTTP_1_1 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerHTTPProtocols::HTTP_1_1 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | HTTP protocol 1.1 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const HTTP_1_1 = 2 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerHTTPProtocols/expand.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmerian/phpcrawl/1c5e07ff33cf079c69191eb9540a3ced64d392dc/documentation/classreferences/PHPCrawlerHTTPProtocols/expand.gif -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerHTTPProtocols/google_code.php: -------------------------------------------------------------------------------- 1 | 5 | --> 6 | 7 |
8 | 16 | 19 |

20 | 21 | 48 | 49 |
50 | 51 |
52 | 53 | All of the listed sections 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const ALL_SPECIAL_SECTIONS = 7 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerLinkSearchDocumentSections/constant_detail_tpl_constant_HTML_COMMENT_SECTIONS.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerLinkSearchDocumentSections::HTML_COMMENT_SECTIONS 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerLinkSearchDocumentSections::HTML_COMMENT_SECTIONS 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | HTML-comments of html-documents (<!-->...<-->) 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const HTML_COMMENT_SECTIONS = 2 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerLinkSearchDocumentSections/constant_detail_tpl_constant_JS_TRIGGERING_SECTIONS.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerLinkSearchDocumentSections::JS_TRIGGERING_SECTIONS 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerLinkSearchDocumentSections::JS_TRIGGERING_SECTIONS 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Javascript-triggering attributes like onClick, onMouseOver etc. 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const JS_TRIGGERING_SECTIONS = 4 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerLinkSearchDocumentSections/constant_detail_tpl_constant_SCRIPT_SECTIONS.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerLinkSearchDocumentSections::SCRIPT_SECTIONS 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerLinkSearchDocumentSections::SCRIPT_SECTIONS 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Script-parts of html-documents (<script>...</script>) 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const SCRIPT_SECTIONS = 1 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerLinkSearchDocumentSections/expand.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmerian/phpcrawl/1c5e07ff33cf079c69191eb9540a3ced64d392dc/documentation/classreferences/PHPCrawlerLinkSearchDocumentSections/expand.gif -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerLinkSearchDocumentSections/google_code.php: -------------------------------------------------------------------------------- 1 | 5 | --> 6 | 7 |
8 | 16 | 19 |

20 | 21 | 48 | 49 |
50 | 51 |
52 | 53 | Crawler runs in multiprocess-mode, usercode is executed by child-processes directly. 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const MPMODE_CHILDS_EXECUTES_USERCODE = 2 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerMultiProcessModes/constant_detail_tpl_constant_MPMODE_NONE.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerMultiProcessModes::MPMODE_NONE 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerMultiProcessModes::MPMODE_NONE 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Crawler runs in a single process 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const MPMODE_NONE = 0 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerMultiProcessModes/constant_detail_tpl_constant_MPMODE_PARENT_EXECUTES_USERCODE.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerMultiProcessModes::MPMODE_PARENT_EXECUTES_USERCODE 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerMultiProcessModes::MPMODE_PARENT_EXECUTES_USERCODE 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Crawler runs in multiprocess-mode, usercode is executed by parent-process only. 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const MPMODE_PARENT_EXECUTES_USERCODE = 1 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerMultiProcessModes/expand.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmerian/phpcrawl/1c5e07ff33cf079c69191eb9540a3ced64d392dc/documentation/classreferences/PHPCrawlerMultiProcessModes/expand.gif -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerMultiProcessModes/google_code.php: -------------------------------------------------------------------------------- 1 | 5 | --> 6 | 7 |
8 | 16 | 19 |

20 | 21 | 6 | 7 |
8 | 16 | 19 |

20 | 21 | 51 | 52 |
53 | 54 |
55 | 56 | The total number of bytes the crawler received alltogether. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $bytes_received = 0 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
int 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerProcessReport/property_detail_tpl_property_data_throughput.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerProcessReport::data_throughput 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerProcessReport::data_throughput 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The total data-throughput of the crawler 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $data_throughput = 0 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
float  The rate in bytes/second
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerProcessReport/property_detail_tpl_property_file_limit_reached.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerProcessReport::file_limit_reached 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerProcessReport::file_limit_reached 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | Will be TRUE if the page/file-limit was reached. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $file_limit_reached = false 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
bool 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerProcessReport/property_detail_tpl_property_files_received.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerProcessReport::files_received 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerProcessReport::files_received 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The total number of documents the crawler received. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $files_received = 0 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
int 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerProcessReport/property_detail_tpl_property_links_followed.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerProcessReport::links_followed 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerProcessReport::links_followed 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The total number of links/URLs the crawler found and followed. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $links_followed = 0 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
int 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerProcessReport/property_detail_tpl_property_process_runtime.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerProcessReport::process_runtime 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerProcessReport::process_runtime 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The total time the crawling-process was running in seconds. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $process_runtime = 0 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
float  Proess-runtime in seconds.
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerProcessReport/property_detail_tpl_property_traffic_limit_reached.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerProcessReport::traffic_limit_reached 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerProcessReport::traffic_limit_reached 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | Will be TRUE if the crawling-process stopped becaus the traffic-limit was reached. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $traffic_limit_reached = false 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
bool 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerRequestErrors/collapse.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmerian/phpcrawl/1c5e07ff33cf079c69191eb9540a3ced64d392dc/documentation/classreferences/PHPCrawlerRequestErrors/collapse.gif -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerRequestErrors/constant_detail_tpl_constant_ERROR_HOST_UNREACHABLE.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerRequestErrors::ERROR_HOST_UNREACHABLE 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerRequestErrors::ERROR_HOST_UNREACHABLE 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Error-Code: Host not reachable 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const ERROR_HOST_UNREACHABLE = 2 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerRequestErrors/constant_detail_tpl_constant_ERROR_NO_HTTP_HEADER.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerRequestErrors::ERROR_NO_HTTP_HEADER 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerRequestErrors::ERROR_NO_HTTP_HEADER 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Error-Code: Host didn't respond with a valid HTTP-header. 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const ERROR_NO_HTTP_HEADER = 3 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerRequestErrors/constant_detail_tpl_constant_ERROR_PROXY_UNREACHABLE.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerRequestErrors::ERROR_PROXY_UNREACHABLE 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerRequestErrors::ERROR_PROXY_UNREACHABLE 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Error-Code: Proxy not reachable 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const ERROR_PROXY_UNREACHABLE = 6 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerRequestErrors/constant_detail_tpl_constant_ERROR_SOCKET_TIMEOUT.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerRequestErrors::ERROR_SOCKET_TIMEOUT 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerRequestErrors::ERROR_SOCKET_TIMEOUT 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Error-Code: Socket timed out while reading data. 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const ERROR_SOCKET_TIMEOUT = 5 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerRequestErrors/constant_detail_tpl_constant_ERROR_SSL_NOT_SUPPORTED.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerRequestErrors::ERROR_SSL_NOT_SUPPORTED 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerRequestErrors::ERROR_SSL_NOT_SUPPORTED 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Error-Code: SSL/HTTPS not supported (probably openssl-extension not installed) 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const ERROR_SSL_NOT_SUPPORTED = 1 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerRequestErrors/constant_detail_tpl_constant_ERROR_TMP_FILE_NOT_WRITEABLE.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerRequestErrors::ERROR_TMP_FILE_NOT_WRITEABLE 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerRequestErrors::ERROR_TMP_FILE_NOT_WRITEABLE 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | Error-Code: Could not write or create TMP-file. 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const ERROR_TMP_FILE_NOT_WRITEABLE = 4 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerRequestErrors/expand.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmerian/phpcrawl/1c5e07ff33cf079c69191eb9540a3ced64d392dc/documentation/classreferences/PHPCrawlerRequestErrors/expand.gif -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerRequestErrors/google_code.php: -------------------------------------------------------------------------------- 1 | 5 | --> 6 | 7 |
8 | 16 | 19 |

20 | 21 | 6 | 7 |
8 | 16 | 19 |

20 | 21 | 51 | 52 |
53 | 54 |
55 | 56 | The content-encoding as stated in the header. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $content_encoding 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerResponseHeader/property_detail_tpl_property_content_length.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerResponseHeader::content_length 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerResponseHeader::content_length 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The content-length as stated in the header. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $content_length 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
int 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerResponseHeader/property_detail_tpl_property_content_type.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerResponseHeader::content_type 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerResponseHeader::content_type 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The content-type 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $content_type 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerResponseHeader/property_detail_tpl_property_header_raw.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerResponseHeader::header_raw 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerResponseHeader::header_raw 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The raw HTTP-header as it was send by the server 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $header_raw 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerResponseHeader/property_detail_tpl_property_http_status_code.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerResponseHeader::http_status_code 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerResponseHeader::http_status_code 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The HTTP-statuscode 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $http_status_code 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
int 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerResponseHeader/property_detail_tpl_property_source_url.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerResponseHeader::source_url 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerResponseHeader::source_url 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The URL of the website the header was recevied from. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $source_url 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerResponseHeader/property_detail_tpl_property_transfer_encoding.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerResponseHeader::transfer_encoding 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerResponseHeader::transfer_encoding 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The transfer-encoding as stated in the header. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $transfer_encoding 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerURLDescriptor/collapse.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmerian/phpcrawl/1c5e07ff33cf079c69191eb9540a3ced64d392dc/documentation/classreferences/PHPCrawlerURLDescriptor/collapse.gif -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerURLDescriptor/expand.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmerian/phpcrawl/1c5e07ff33cf079c69191eb9540a3ced64d392dc/documentation/classreferences/PHPCrawlerURLDescriptor/expand.gif -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerURLDescriptor/google_code.php: -------------------------------------------------------------------------------- 1 | 5 | --> 6 | 7 |
8 | 16 | 19 |

20 | 21 | 51 | 52 |
53 | 54 |
55 | 56 | Flag indicating whether this URL was target of an HTTP-redirect. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $is_redirect_url = false 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerURLDescriptor/property_detail_tpl_property_link_raw.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerURLDescriptor::link_raw 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerURLDescriptor::link_raw 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The raw link to this URL as it was found in the HTML-source, i.e. "../dunno/index.php" 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $link_raw = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
No information
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerURLDescriptor/property_detail_tpl_property_linkcode.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerURLDescriptor::linkcode 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerURLDescriptor::linkcode 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The html-codepart that contained the link to this URL, i.e. "<a href="../foo.html">LINKTEXT</a>" 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $linkcode = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
No information
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerURLDescriptor/property_detail_tpl_property_linktext.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerURLDescriptor::linktext 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerURLDescriptor::linktext 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The linktext or html-code the link to this URL was layed over. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $linktext = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
No information
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerURLDescriptor/property_detail_tpl_property_refering_url.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerURLDescriptor::refering_url 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerURLDescriptor::refering_url 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The URL of the page that contained the link to the URL described here. 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $refering_url 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerURLDescriptor/property_detail_tpl_property_url_link_depth.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerURLDescriptor::url_link_depth 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerURLDescriptor::url_link_depth 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The URL/link-depth of this URL relevant to the entry-URL of the crawling-process 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $url_link_depth 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
int 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerURLDescriptor/property_detail_tpl_property_url_rebuild.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for property: 9 | PHPCrawlerURLDescriptor::url_rebuild 10 | 11 | 12 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Property: 37 | PHPCrawlerURLDescriptor::url_rebuild 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | 56 | The complete, full qualified and normalized URL 57 |
58 | 59 |
60 | Signature: 61 |

62 | 63 | public $url_rebuild = null 64 |

65 |
66 | 67 |
68 | Type: 69 |

70 | 71 | 72 | 73 |
string 
74 |

75 |
76 | 77 |
78 | Description: 79 |

80 | 81 | 82 | - 83 | 84 |

85 |
86 | 87 | 88 | 89 | 90 |
91 | 92 | 93 | 94 | 95 |
96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerUrlCacheTypes/collapse.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmerian/phpcrawl/1c5e07ff33cf079c69191eb9540a3ced64d392dc/documentation/classreferences/PHPCrawlerUrlCacheTypes/collapse.gif -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerUrlCacheTypes/constant_detail_tpl_constant_URLCACHE_MEMORY.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerUrlCacheTypes::URLCACHE_MEMORY 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerUrlCacheTypes::URLCACHE_MEMORY 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | URLs get cached in local RAM. Best performance. 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const URLCACHE_MEMORY = 1 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerUrlCacheTypes/constant_detail_tpl_constant_URLCACHE_SQLITE.htm: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Documentation for constant: 9 | PHPCrawlerUrlCacheTypes::URLCACHE_SQLITE 10 | 11 | 12 | 13 | 14 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |

36 | Constant: 37 | PHPCrawlerUrlCacheTypes::URLCACHE_SQLITE 38 |

39 | 40 | 43 | 44 |
45 | 46 | 47 | 48 | 49 |
50 | 51 |
52 | 53 | URLs get cached in a SQLite-database-file. Recommended for spidering huge websites. 54 |
55 | 56 |
57 | Signature: 58 |

59 | 60 | const URLCACHE_SQLITE = 2 61 |

62 |
63 | 64 |
65 | Description: 66 |

67 | 68 | 69 | - 70 | 71 |

72 |
73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerUrlCacheTypes/expand.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmerian/phpcrawl/1c5e07ff33cf079c69191eb9540a3ced64d392dc/documentation/classreferences/PHPCrawlerUrlCacheTypes/expand.gif -------------------------------------------------------------------------------- /documentation/classreferences/PHPCrawlerUrlCacheTypes/google_code.php: -------------------------------------------------------------------------------- 1 | 5 | --> 6 | 7 |
8 | 16 | 19 |

20 | 21 | 6 | 7 |
8 | 16 | 19 |

20 | 21 | \r\n"+ 12 | "\r\n"+ 13 | "\r\n"+ 16 | "\r\n
"); 17 | } -------------------------------------------------------------------------------- /documentation/google_code.php: -------------------------------------------------------------------------------- 1 | 5 | --> 6 | 13 | 14 | 27 | 28 | 32 | ...<-->) 16 | */ 17 | const HTML_COMMENT_SECTIONS = 2; 18 | 19 | /** 20 | * Javascript-triggering attributes like onClick, onMouseOver etc. 21 | */ 22 | const JS_TRIGGERING_SECTIONS = 4; 23 | 24 | /** 25 | * All of the listed sections 26 | */ 27 | const ALL_SPECIAL_SECTIONS = 7; 28 | } 29 | ?> -------------------------------------------------------------------------------- /libs/Enums/PHPCrawlerMultiProcessModes.class.php: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /libs/Enums/PHPCrawlerRequestErrors.class.php: -------------------------------------------------------------------------------- 1 | host_ip_array[$hostname])) 30 | { 31 | return $this->host_ip_array[$hostname]; 32 | } 33 | 34 | // Else do DNS-query 35 | else 36 | { 37 | $ip = gethostbyname($hostname); 38 | $this->host_ip_array[$hostname] = $ip; 39 | return $ip; 40 | } 41 | } 42 | 43 | /** 44 | * Checks whether a hostname is already cached. 45 | * 46 | * @param string $hostname The hostname 47 | * @return bool 48 | */ 49 | public function hostInCache($hostname) 50 | { 51 | if (isset($this->host_ip_array[$hostname])) return true; 52 | else return false; 53 | } 54 | 55 | /** 56 | * Checks whether the hostname of the given URL is already cached 57 | * 58 | * @param PHPCrawlerURLDescriptor $URL The URL 59 | * @return bool 60 | */ 61 | public function urlHostInCache(PHPCrawlerURLDescriptor $URL) 62 | { 63 | $url_parts = PHPCrawlerUtils::splitURL($URL->url_rebuild); 64 | return $this->hostInCache($url_parts["host"]); 65 | } 66 | } 67 | ?> -------------------------------------------------------------------------------- /libs/PHPCrawlerStatus.class.php: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /libs/PHPCrawlerURLDescriptor.class.php: -------------------------------------------------------------------------------- 1 | LINKTEXT" 23 | */ 24 | public $linkcode = null; 25 | 26 | /** 27 | * The linktext or html-code the link to this URL was layed over. 28 | */ 29 | public $linktext = null; 30 | 31 | /** 32 | * The URL of the page that contained the link to the URL described here. 33 | * 34 | * @var string 35 | */ 36 | public $refering_url; 37 | 38 | /** 39 | * Flag indicating whether this URL was target of an HTTP-redirect. 40 | * 41 | * @var string 42 | */ 43 | public $is_redirect_url = false; 44 | 45 | /** 46 | * The URL/link-depth of this URL relevant to the entry-URL of the crawling-process 47 | * 48 | * @var int 49 | */ 50 | public $url_link_depth; 51 | 52 | /** 53 | * Initiates an URL-descriptor 54 | * 55 | * @internal 56 | */ 57 | public function __construct($url_rebuild, $link_raw = null, $linkcode = null, $linktext = null, $refering_url = null, $url_link_depth = null) 58 | { 59 | $this->url_rebuild = $url_rebuild; 60 | 61 | if (!empty($link_raw)) $this->link_raw = $link_raw; 62 | if (!empty($linkcode)) $this->linkcode = $linkcode; 63 | if (!empty($linktext)) $this->linktext = $linktext; 64 | if (!empty($refering_url)) $this->refering_url = $refering_url; 65 | if ($url_link_depth !== null) $this->url_link_depth = (int)$url_link_depth; 66 | } 67 | } 68 | ?> -------------------------------------------------------------------------------- /libs/PHPCrawlerUrlPartsDescriptor.class.php: -------------------------------------------------------------------------------- 1 | protocol = $parts["protocol"]; 38 | $tmp->host = $parts["host"]; 39 | $tmp->path = $parts["path"]; 40 | $tmp->file = $parts["file"]; 41 | $tmp->domain = $parts["domain"]; 42 | $tmp->port = $parts["port"]; 43 | $tmp->auth_username = $parts["auth_username"]; 44 | $tmp->auth_password = $parts["auth_password"]; 45 | 46 | return $tmp; 47 | } 48 | 49 | public function toArray() 50 | { 51 | return get_object_vars($this); 52 | } 53 | } 54 | ?> -------------------------------------------------------------------------------- /test_interface/info.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmerian/phpcrawl/1c5e07ff33cf079c69191eb9540a3ced64d392dc/test_interface/info.gif -------------------------------------------------------------------------------- /test_interface/phpcrawl_testinterface.conf.php: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test_interface/setups/Example_Setup.psf: -------------------------------------------------------------------------------- 1 | a:3:{s:5:"setup";a:21:{s:6:"setURL";s:43:"http://www.php.net/manual/en/book.mysql.php";s:7:"setPort";s:0:"";s:13:"setFollowMode";s:1:"2";s:18:"setFollowRedirects";s:1:"1";s:17:"setCookieHandling";s:1:"1";s:27:"setAggressiveLinkExtraction";s:1:"1";s:13:"obeyRobotsTxt";s:1:"0";s:12:"setPageLimit";s:0:"";s:15:"setTrafficLimit";s:0:"";s:19:"setContentSizeLimit";s:0:"";s:20:"setConnectionTimeout";s:0:"";s:16:"setStreamTimeout";s:0:"";s:21:"addReceiveContentType";a:3:{i:0;s:11:"#text/html#";i:1;s:0:"";i:2;s:0:"";}s:14:"addFollowMatch";a:3:{i:0;s:27:"#manual/en/.*mysql[^a-z]# i";i:1;s:0:"";i:2;s:0:"";}s:17:"addNonFollowMatch";a:3:{i:0;s:34:"#(jpg|gif|png|pdf|jpeg|css|js)$# i";i:1;s:0:"";i:2;s:0:"";}s:15:"addLinkPriority";a:3:{i:0;a:2:{i:0;s:0:"";i:1;s:0:"";}i:1;a:2:{i:0;s:0:"";i:1;s:0:"";}i:2;a:2:{i:0;s:0:"";i:1;s:0:"";}}s:24:"addReceiveToTmpFileMatch";a:3:{i:0;s:0:"";i:1;s:0:"";i:2;s:0:"";}s:21:"addLinkExtractionTags";a:9:{i:0;s:0:"";i:1;s:0:"";i:2;s:0:"";i:3;s:0:"";i:4;s:0:"";i:5;s:0:"";i:6;s:0:"";i:7;s:0:"";i:8;s:0:"";}s:22:"addBasicAuthentication";a:3:{i:0;a:3:{i:0;s:0:"";i:1;s:0:"";i:2;s:0:"";}i:1;a:3:{i:0;s:0:"";i:1;s:0:"";i:2;s:0:"";}i:2;a:3:{i:0;s:0:"";i:1;s:0:"";i:2;s:0:"";}}s:19:"setWorkingDirectory";s:0:"";s:18:"setUserAgentString";s:0:"";}s:6:"output";a:3:{s:13:"requested_url";s:1:"1";s:16:"http_status_code";s:1:"1";s:14:"bytes_received";s:1:"1";}s:4:"misc";a:2:{s:7:"comment";s:625:"The example-setup 'spiders' the documentation 2 | of the php-mysql-extension on php.net 3 | (http://php.net/manual/en/book.mysql.php) including all it's subsections and links. 4 | 5 | By defining some rules is it assured that all other links leading to other sites and sections on php.net get ignored. 6 | 7 | Every URL within the mysql-documentation looks like "http://www.php.net/manual/en/function.mysql-affected-rows.php" or "http://www.php.net/manual/en/mysql.setup.php", they all contain "http://www.php.net/manual/en/" followed by "mysql" somewhere. 8 | So we add a corresponding follow-rule "#manual/en/.*mysql[^a-z]# i" to the crawler";s:11:"force_flush";s:1:"1";}} -------------------------------------------------------------------------------- /test_interface/style.css: -------------------------------------------------------------------------------- 1 | td, input, select, textarea { 2 | font-family: verdana; 3 | font-size: 10px; 4 | } 5 | 6 | td.red { 7 | color: red; 8 | } 9 | 10 | td.head { 11 | background-color: #ffffff; 12 | font-size: 12px; 13 | font-weight: bold; 14 | } 15 | 16 | td.white { 17 | background-color: #ffffff; 18 | } 19 | 20 | table { 21 | background-color:#e1e1e1; 22 | } 23 | 24 | table.bordered { 25 | border-color:#000000; 26 | border-width:1px; 27 | border-style:solid; 28 | } 29 | 30 | a { 31 | font-size: 10px; 32 | font-weight: normal; 33 | } 34 | 35 | .warning { 36 | color: red; 37 | font-size: 12px; 38 | } 39 | 40 | div#comment_div { 41 | position:absolute; 42 | visibility:hidden; 43 | border-right:3px solid black; 44 | border-bottom:3px solid black; 45 | border-left:1px solid black; 46 | border-top:1px solid black; 47 | background-color:#e1e1e1; 48 | padding:3px; 49 | display: inline; 50 | } --------------------------------------------------------------------------------