├── data_vnexpress ├── data │ └── img1.jpg ├── learnGetData │ ├── content.php │ ├── function.php │ ├── headers.php │ ├── index.php │ └── proxy.txt └── useLibs │ ├── SimpleHTMLDom │ ├── CHANGELOG.md │ ├── LICENSE │ ├── example │ │ ├── example_advanced_selector.php │ │ ├── example_basic_selector.php │ │ ├── example_callback.php │ │ ├── example_extract_html.php │ │ ├── example_modify_contents.php │ │ ├── scraping │ │ │ ├── example_scraping_digg.php │ │ │ ├── example_scraping_general.php │ │ │ ├── example_scraping_imdb.php │ │ │ └── example_scraping_slashdot.php │ │ └── simple_html_dom_utility.php │ ├── manual │ │ ├── README.md │ │ ├── custom_theme │ │ │ └── main.html │ │ ├── docs │ │ │ ├── api │ │ │ │ ├── api.md │ │ │ │ ├── constants.md │ │ │ │ ├── definitions.md │ │ │ │ ├── file_get_html.md │ │ │ │ ├── simple_html_dom │ │ │ │ │ ├── __construct.md │ │ │ │ │ ├── __destruct.md │ │ │ │ │ ├── __get.md │ │ │ │ │ ├── __toString.md │ │ │ │ │ ├── as_text_node.md │ │ │ │ │ ├── childNodes.md │ │ │ │ │ ├── clear.md │ │ │ │ │ ├── copy_skip.md │ │ │ │ │ ├── copy_until.md │ │ │ │ │ ├── copy_until_char.md │ │ │ │ │ ├── createElement.md │ │ │ │ │ ├── createTextNode.md │ │ │ │ │ ├── dump.md │ │ │ │ │ ├── find.md │ │ │ │ │ ├── firstChild.md │ │ │ │ │ ├── getElementById.md │ │ │ │ │ ├── getElementByTagName.md │ │ │ │ │ ├── getElementsById.md │ │ │ │ │ ├── getElementsByTagName.md │ │ │ │ │ ├── lastChild.md │ │ │ │ │ ├── link_nodes.md │ │ │ │ │ ├── load.md │ │ │ │ │ ├── loadFile.md │ │ │ │ │ ├── load_file.md │ │ │ │ │ ├── parse.md │ │ │ │ │ ├── parse_attr.md │ │ │ │ │ ├── parse_charset.md │ │ │ │ │ ├── prepare.md │ │ │ │ │ ├── read_tag.md │ │ │ │ │ ├── remove_callback.md │ │ │ │ │ ├── remove_noise.md │ │ │ │ │ ├── restore_noise.md │ │ │ │ │ ├── save.md │ │ │ │ │ ├── search_noise.md │ │ │ │ │ ├── set_callback.md │ │ │ │ │ ├── simple_html_dom.md │ │ │ │ │ └── skip.md │ │ │ │ ├── simple_html_dom_node │ │ │ │ │ ├── __construct.md │ │ │ │ │ ├── __destruct.md │ │ │ │ │ ├── __get.md │ │ │ │ │ ├── __isset.md │ │ │ │ │ ├── __set.md │ │ │ │ │ ├── __toString.md │ │ │ │ │ ├── __unset.md │ │ │ │ │ ├── addClass.md │ │ │ │ │ ├── appendChild.md │ │ │ │ │ ├── childNodes.md │ │ │ │ │ ├── children.md │ │ │ │ │ ├── clear.md │ │ │ │ │ ├── convert_text.md │ │ │ │ │ ├── dump.md │ │ │ │ │ ├── dump_node.md │ │ │ │ │ ├── find.md │ │ │ │ │ ├── find_ancestor_tag.md │ │ │ │ │ ├── firstChild.md │ │ │ │ │ ├── first_child.md │ │ │ │ │ ├── getAllAttributes.md │ │ │ │ │ ├── getAttribute.md │ │ │ │ │ ├── getElementById.md │ │ │ │ │ ├── getElementByTagName.md │ │ │ │ │ ├── getElementsById.md │ │ │ │ │ ├── getElementsByTagName.md │ │ │ │ │ ├── get_display_size.md │ │ │ │ │ ├── hasAttribute.md │ │ │ │ │ ├── hasChildNodes.md │ │ │ │ │ ├── hasClass.md │ │ │ │ │ ├── has_child.md │ │ │ │ │ ├── innertext.md │ │ │ │ │ ├── is_utf8.md │ │ │ │ │ ├── lastChild.md │ │ │ │ │ ├── last_child.md │ │ │ │ │ ├── makeup.md │ │ │ │ │ ├── match.md │ │ │ │ │ ├── nextSibling.md │ │ │ │ │ ├── next_sibling.md │ │ │ │ │ ├── nodeName.md │ │ │ │ │ ├── outertext.md │ │ │ │ │ ├── parent.md │ │ │ │ │ ├── parentNode.md │ │ │ │ │ ├── parse_selector.md │ │ │ │ │ ├── prevSibling.md │ │ │ │ │ ├── prev_sibling.md │ │ │ │ │ ├── remove.md │ │ │ │ │ ├── removeAttribute.md │ │ │ │ │ ├── removeChild.md │ │ │ │ │ ├── removeClass.md │ │ │ │ │ ├── save.md │ │ │ │ │ ├── seek.md │ │ │ │ │ ├── setAttribute.md │ │ │ │ │ ├── simple_html_dom_node.md │ │ │ │ │ ├── text.md │ │ │ │ │ └── xmltext.md │ │ │ │ └── str_get_html.md │ │ │ ├── faq.md │ │ │ ├── index.md │ │ │ ├── manual │ │ │ │ ├── accessing-element-attributes.md │ │ │ │ ├── adding-nodes.md │ │ │ │ ├── creating-dom-objects.md │ │ │ │ ├── customizing-parsing-behavior.md │ │ │ │ ├── finding-html-elements.md │ │ │ │ ├── saving-dom-objects.md │ │ │ │ └── traversing-dom-tree.md │ │ │ ├── quick-start.md │ │ │ └── requirements.md │ │ ├── extra.css │ │ ├── mkdocs.yml │ │ └── site │ │ │ └── .gitkeep │ ├── phpcompatibility.xml │ ├── phpcs.xml │ ├── simple_html_dom.php │ └── simplehtmldom_1_9_1.zip │ ├── connection.php │ ├── content.php │ ├── library │ ├── Curl │ │ ├── ArrayUtil.php │ │ ├── CaseInsensitiveArray.php │ │ ├── Curl.php │ │ ├── Decoder.php │ │ ├── Encoder.php │ │ ├── MultiCurl.php │ │ ├── StringUtil.php │ │ └── Url.php │ └── php-curl-class-master.zip │ └── text.php └── first_learning ├── api.php ├── controller.php ├── index.php ├── showResult.php └── view.php /data_vnexpress/data/img1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bacdong/PHP_Web-crawler/c94118d752a6a39026f3a542ec00c1315059a745/data_vnexpress/data/img1.jpg -------------------------------------------------------------------------------- /data_vnexpress/learnGetData/content.php: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data_vnexpress/learnGetData/function.php: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data_vnexpress/learnGetData/headers.php: -------------------------------------------------------------------------------- 1 | '; 22 | // var_dump($headers); 23 | // echo ''; die; 24 | 25 | // curl_setopt($ch, CURLOPT_HEADER, true); 26 | // curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); 27 | ?> -------------------------------------------------------------------------------- /data_vnexpress/learnGetData/index.php: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data_vnexpress/learnGetData/proxy.txt: -------------------------------------------------------------------------------- 1 | 157.230.33.138:8080 -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | All notable changes to this project will be documented in this file. 3 | 4 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). 5 | 6 | ## [1.9.1] - 2019-10-20 7 | ### Fixed 8 | - Fixed broken "text" selectors [#175](https://sourceforge.net/p/simplehtmldom/bugs/175/) 9 | 10 | ## [1.9] - 2019-05-30 11 | ### Added 12 | - Added unit test for bug reports 13 | - Added test for bug [#153](https://sourceforge.net/p/simplehtmldom/bugs/153/) 14 | - Added test for bug [#163](https://sourceforge.net/p/simplehtmldom/bugs/163/) 15 | - Added test for bug [#166](https://sourceforge.net/p/simplehtmldom/bugs/166/) 16 | - Added test for bug [#169](https://sourceforge.net/p/simplehtmldom/bugs/169/) 17 | - Added unit test for character sets UTF-8, CP1251 and CP1252 (#142) 18 | - Added support for meta charset to parse_charset 19 | - Added detection for CP1251 to parse_charset, using iconv 20 | - Added LICENSE file (MIT) to the project root 21 | - Added functions to `simple_html_dom_node` 22 | - `remove`: Removes the current node recursively from the DOM tree 23 | - `removeChild`: Removes a child node recursively from the DOM tree 24 | - `hasClass`: Checks if the current node has the specified class name 25 | - `addClass`: Adds one or more classes to the current node 26 | - `removeClass`: Removes one or more classes from the current node 27 | - `save`: Saves the current node to disk 28 | ### Changed 29 | - Changed manual from custom implementation to MkDocs (https://www.mkdocs.org/) 30 | ### Fixed 31 | - Fixed warning when trying to clear() the DOM on a null nodes list (#153) 32 | - Fixed missing whitespace when returning plaintext (#163) 33 | - Fixed broken detection of duplicate attributes (#166) 34 | - Fixed broken detection of CP1252 (ISO-8859-1) documents (#142) 35 | - Fixed error using next-sibling combinator ('E + F') on last child 36 | - Fixed selector parsing for attribute selectors ending on "s" or "i" (#169) 37 | 38 | ## [1.8.1] - 2019-01-13 39 | ### Fixed 40 | - Fixed various bugs related to parsing classes and ids 41 | 42 | ## [1.8] - 2019-01-13 43 | ### Added 44 | - Added documentation for `simple_html_dom_node::find` 45 | - Added documentation for `simple_html_dom_node::parse_selector` 46 | - Added documentation for `simple_html_dom_node::seek` 47 | - Added documentation for `simple_html_dom_node::match` 48 | - Added unit tests for bug reports 49 | - Added test for bug [#62](https://sourceforge.net/p/simplehtmldom/bugs/62/) 50 | - Added test for bug [#79](https://sourceforge.net/p/simplehtmldom/bugs/79/) 51 | - Added test for bug [#144](https://sourceforge.net/p/simplehtmldom/bugs/144/) 52 | - Added unit tests for CSS selectors 53 | - Added ability to define constants before simple_html_dom does 54 | - 'DEFAULT_TARGET_CHARSET' 55 | - 'DEFAULT_BR_TEXT' 56 | - 'DEFAULT_SPAN_TEXT' 57 | - 'MAX_FILE_SIZE' 58 | - Added support for CSS combinators 59 | - Added support for Child Combinator (`>`) 60 | - Added support for Next Sibling Combinator (`+`) 61 | - Added support for Subsequent Sibling Combinator (`~`) 62 | - Added support for multiclass selectors (`.class.class.class`) 63 | - Added support for multiattribute selectors (`[attr1][attr2][attribute3]`) 64 | - Added support for attribute selectors 65 | - Added support for pipe selectors (`|=`) 66 | - Added support for tilde selectors (`~=`) 67 | - Added support for case sensitivity selectors (`i` and `s`) 68 | - Added unit tests for PHP compatibility to PHP 5.6+ 69 | - Added coding standard using PHP_CodeSniffer 70 | ### Changed 71 | - Removed automatic filtering of 'tbody' selectors (#79) 72 | > Remove 'tbody' from all selectors to maintain the previous state! 73 | - Coding standard using PHP_CodeSniffer 74 | ### Fixed 75 | - Fixed broken CSS selector attributes with value "0" (#62) 76 | - Fixed broken simple_html_dom::load_file 77 | - Fixed forward slashes in CSS selector breaks value matching using '*=' (#144) 78 | - Fixed Universal Selectors 79 | 80 | ## [1.7] - 2018-12-10 81 | ### Added 82 | - Added code documentation to improve readability 83 | - Added unit tests for `simple_html_dom::$self_closing_tags` 84 | - Added unit tests for `simple_html_dom::$optional_closing_tags` 85 | - Added unit tests for bug reports 86 | - Added test for bug [#56](https://sourceforge.net/p/simplehtmldom/bugs/56/) 87 | - Added test for bug [#97](https://sourceforge.net/p/simplehtmldom/bugs/97/) 88 | - Added test for bug [#116](https://sourceforge.net/p/simplehtmldom/bugs/116/) 89 | - Added test for bug [#121](https://sourceforge.net/p/simplehtmldom/bugs/127/) 90 | - Added test for bug [#127](https://sourceforge.net/p/simplehtmldom/bugs/127/) 91 | - Added test for bug [#154](https://sourceforge.net/p/simplehtmldom/bugs/154/) 92 | - Added test for bug [#160](https://sourceforge.net/p/simplehtmldom/bugs/160/) 93 | - Added unit tests for memory management of the parser 94 | - Added bit flags to `simple_html_dom::load()` 95 | - Added bit flag `HDOM_SMARTY_AS_TEXT` to optionally filter Smarty scripts (#154)\ 96 | **Note**: Smarty scripts are no longer filtered by default!\ 97 | - Added build script to automate releases 98 | - Added support for attributes without whitespace to separate them 99 | ### Changed 100 | - Improved documentation and readability for `$self_closing_tags` 101 | - Improved documentation and readability for `$block_tags` 102 | - Improved documentation and readability for `$optional_closing_tags` 103 | - Updated list of `simple_html_dom::$self_closing_tags` 104 | - Removed 'spacer' (obsolete) 105 | - Added 'area' 106 | - Added 'col' 107 | - Added 'meta' 108 | - Added 'param' 109 | - Added 'source' 110 | - Added 'track' 111 | - Added 'wbr' 112 | - Updated list of `simple_html_dom::$optional_closing_tags` 113 | - Removed "nobr" (obsolete) 114 | - Added 'th' as closable element to 'td' 115 | - Added 'td' as closable element to 'th' 116 | - Added 'optgroup' with 'optgroup' and 'option' as closable elements 117 | - Added 'optgroup' as closable element to 'option' 118 | - Added 'rp' with 'rp' and 'rt' as closable elements 119 | - Added 'rt' with 'rt' and 'rp' as closable elements 120 | - Clarified meaning of `simple_html_dom->parent` 121 | - Changed default `$offset` for `file_get_html()` from -1 to 0 (#161) 122 | - Changed `simple_html_dom::load()` to remove script tags before replacing newline characters 123 | - `simple_html_dom_node::text()` no longer adds whitespace to top level span elements (only to sub-elements) 124 | - `simple_html_dom_node::text()` adds blank lines between paragraphs 125 | - Normalized line endings in the repository to LF via `.gitattributes` 126 | - Improved performance of `simple_html_dom::parse_charset()` by approximately 25% 127 | - Improved performance of `simple_html_dom::parse()` by approximately 10% 128 | ### Deprecated 129 | - `str_get_html()` is deprecated and should be replaced by `new simple_html_dom()` 130 | ### Removed 131 | - Removed protected function `simple_html_dom::copy_until_char_escaped()` 132 | ### Fixed 133 | - Fixed compatibility issues with PHP 7.3 134 | - Fixed typo (#147) 135 | - Fixed handling of incorrectly escaped text (#160) 136 | - Restore functionality of `$maxLen` in `file_get_html()` 137 | - Fixed load_file breaks if an error ocurred in another script 138 | 139 | ## [1.6] - 2014-05-28 140 | ### Added 141 | - Added some ability to insert and create nodes 142 | - Add ability to search the "noise" array 143 | 144 | ## [1.5] - 2012-09-10 145 | ### Added 146 | - Added flag: LOCK_EX while calling "file_put_contents()" 147 | - Added support for detecting the source html character set. This is used to convert characters when plaintext is requested. 148 | - Other little fixes and features, too numerous to categorize 149 | ### Changed 150 | - Error of "file_get_contents()" will be thrown as an exception 151 | ### Fixed 152 | - Fixed the typo of "token_blank_t" 153 | - Memory leak fixed 154 | 155 | ## [1.11] - 2008-12-14 156 | ### Added 157 | - Supports xpath generated from Firebug 158 | - New method "dump" of "simple_html_dom_node" 159 | - New attribute "xmltext" of "simple_html_dom_node" 160 | ### Changed 161 | - Remove preg_quote on selector match function: `[attribute*=value]` 162 | - Element "Comment" will treat as children 163 | ### Fixed 164 | - Fixed the problem with `
` 165 | - Fixed bug #2207477 (does not load some pages properly) 166 | - Fixed bug #2315853 (Error with character after < sign) 167 | 168 | ## [1.10] - 2008-10-25 169 | ### Changed 170 | - Negative indexes supports of "find" method, thanks for Vadim Voituk 171 | - Constructor with automatically load contents either text or file/url, thanks for Antcs 172 | - Fully supports wildcard in selectors 173 | ### Fixed 174 | - Fixed bug of confusing by the < symbol inside the text 175 | - Fixed bug of dash in selectors 176 | - Fixed bug of `` 177 | - Fixed bug #2155883 (Nested List Parses Incorrectly) 178 | - Fixed bug #2155113 (error with unclosed html tags) 179 | 180 | ## [1.00] - 2008-09-05 181 | ### Added 182 | - New method "getAllAttributes" of "simple_html_dom_node" 183 | - Supports full javascript string in selector: `$e->find("a[onclick=alert('hello')]")` 184 | ### Changed 185 | - Changed selector "*=" to case-insentive 186 | ### Fixed 187 | - Fixed the bug of selector in some critical conditions 188 | - Fixed the bug of striping php tags 189 | - Fixed the bug of remove_noise() 190 | - Fixed the bug of noise in attributes 191 | 192 | ## [0.99] - 2008-08-03 193 | ### Changed 194 | - Performance tuning (boost 10%) 195 | - Memory requirement reduced by 25% 196 | - Changed function name from "file_get_dom()" to "file_get_html()" 197 | - Changed function name from "str_get_dom()" to "str_get_html()" 198 | ### Fixed 199 | - Fixed bug #2011286 (Error with unclosed html tags) 200 | - Fixed bug #2012551 (Error parsing divs) 201 | - Fixed bug #2020924 (Error for missed tag) 202 | - Fixed bug (problem with `` tag's innertext) 203 | 204 | ## [0.98] - 2008-06-24 205 | ### Added 206 | - Supports "multiple class" selector feature: `` 207 | - New "callback function" feature 208 | - New "multiple selectors" feature: $dom->find('p,a,b') 209 | - New examples 210 | - Supports extract contents from HTML features: $dom->plaintext 211 | ### Changed 212 | - Performance tuning (boost 20%) 213 | - Changed simple_html_dom_node method name from "text()" to "makeup()" 214 | ### Fixed 215 | - Fixed the bug of $dom->clear() 216 | - Fixed the bug of text nodes' innertext 217 | - Fixed the bug of comment nodes' innertext 218 | - Fixed the bug of decendent selector with optional tags 219 | 220 | ## [0.97] - 2008-05-09 221 | ### Added 222 | - New node type "comment" (eg. $dom->find('comment')) 223 | - Add self-closing tags: 'base', 'spacer' 224 | - New example "simple_html_dom_utility.php" 225 | ### Changed 226 | - File and class name changed (html_dom_parser->simple_html_dom) 227 | ### Removed 228 | - ($dom->save_file) will not support anymore 229 | - Remove example "example_customize_parser.php" 230 | ### Fixed 231 | - Fixed the bug of outertext (th) 232 | - Fixed the bug of regular expression escaping chars ($dom->find) 233 | - Fixed the bug while line-breaker and "\t" in tags 234 | 235 | ## [0.96] - 2008-04-27 236 | ### Added 237 | - Reference section in manual 238 | - Added traverse section in manual 239 | - Added the solution while server behind proxy in FAQ (Thanks to Yousuke Shaggy) 240 | - New method to remove attribute. 241 | - New DOM operations(first_child, last_child, next_sibling, previous_sibling) (Request #1936000) 242 | ### Changed 243 | - Now file_get_dom supports full file_get_contents parameters 244 | ### Fixed 245 | - Fixed the bug of self-closing tags in the end of file 246 | - Fixed the bug of blanks in the end of tag 247 | - Fixed some typo of testcase 248 | 249 | ## [0.95] - 2008-04-13 250 | ### Added 251 | - Supports tag name with namespace 252 | ### Changed 253 | - New attribute filters (Thanks to Yousuke Kumakura) 254 | - Refine structure of testcase 255 | ### Fixed 256 | - Fix the bug of optional-closing tags 257 | - Fix the bug of parsing the line break next to the tag's name 258 | 259 | ## [0.94] - 2008-04-06 260 | ### Added 261 | - Add FAQ section in manual 262 | ### Fixed 263 | - Fixed infinity loop while the source content is BAD HTML 264 | - Fixed the bug of adding new attributes to self closing tags 265 | - Fixed the bug of customize parser without $dom->remove_noise() -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 S.C. Chen, John Schlick, logmanoriginal 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/example/example_advanced_selector.php: -------------------------------------------------------------------------------- 1 | 9 | 10 |12 | 13 | HTML; 14 | 15 | $html = str_get_html($str); 16 | echo $html->find('div div div', 0)->innertext . ' 11 |
'; // result: "ok" 17 | 18 | // ----------------------------------------------------------------------------- 19 | // nested selector 20 | $str = << 22 |item:1 23 |item:2 24 | 25 |26 |
29 | HTML; 30 | 31 | $html = str_get_html($str); 32 | foreach($html->find('ul') as $ul) { 33 | foreach($ul->find('li') as $li) 34 | echo $li->innertext . '- item:3
27 |- item:4
28 |
'; 35 | } 36 | 37 | // ----------------------------------------------------------------------------- 38 | // parsing checkbox 39 | $str = << 41 | item1
42 | item2
43 | item3
44 | 45 | HTML; 46 | 47 | $html = str_get_html($str); 48 | foreach($html->find('input[type=checkbox]') as $checkbox) { 49 | if ($checkbox->checked) 50 | echo $checkbox->name . ' is checked
'; 51 | else 52 | echo $checkbox->name . ' is not checked
'; 53 | } 54 | ?> -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/example/example_basic_selector.php: -------------------------------------------------------------------------------- 1 | find('a') as $e) 10 | echo $e->href . '
'; 11 | 12 | // find all image 13 | foreach($html->find('img') as $e) 14 | echo $e->src . '
'; 15 | 16 | // find all image with full tag 17 | foreach($html->find('img') as $e) 18 | echo $e->outertext . '
'; 19 | 20 | // find all div tags with id=gbar 21 | foreach($html->find('div#gbar') as $e) 22 | echo $e->innertext . '
'; 23 | 24 | // find all span tags with class=gb1 25 | foreach($html->find('span.gb1') as $e) 26 | echo $e->outertext . '
'; 27 | 28 | // find all td tags with attribite align=center 29 | foreach($html->find('td[align=center]') as $e) 30 | echo $e->innertext . '
'; 31 | 32 | // extract text from table 33 | echo $html->find('td[align="center"]', 1)->plaintext.'
'; 34 | 35 | // extract text from HTML 36 | echo $html->plaintext; 37 | ?> -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/example/example_callback.php: -------------------------------------------------------------------------------- 1 | tag=='input') 8 | $element->outertext = 'input'; 9 | 10 | if ($element->tag=='img') 11 | $element->outertext = 'img'; 12 | 13 | if ($element->tag=='a') 14 | $element->outertext = 'a'; 15 | } 16 | 17 | 18 | // 2. create HTML Dom 19 | $html = file_get_html('http://www.google.com/'); 20 | 21 | 22 | // 3. Register the callback function with it's function name 23 | $html->set_callback('my_callback'); 24 | 25 | 26 | // 4. Callback function will be invoked while dumping 27 | echo $html; 28 | ?> -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/example/example_extract_html.php: -------------------------------------------------------------------------------- 1 | plaintext; 5 | ?> -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/example/example_modify_contents.php: -------------------------------------------------------------------------------- 1 | find('img') as $e) 10 | $e->outertext = ''; 11 | 12 | // replace all input 13 | foreach($html->find('input') as $e) 14 | $e->outertext = '[INPUT]'; 15 | 16 | // dump contents 17 | echo $html; 18 | ?> -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/example/scraping/example_scraping_digg.php: -------------------------------------------------------------------------------- 1 | find('div.news-summary') as $article) { 10 | // get title 11 | $item['title'] = trim($article->find('h3', 0)->plaintext); 12 | // get details 13 | $item['details'] = trim($article->find('p', 0)->plaintext); 14 | // get intro 15 | $item['diggs'] = trim($article->find('li a strong', 0)->plaintext); 16 | 17 | $ret[] = $item; 18 | } 19 | 20 | // clean up memory 21 | $html->clear(); 22 | unset($html); 23 | 24 | return $ret; 25 | } 26 | 27 | 28 | // ----------------------------------------------------------------------------- 29 | // test it! 30 | 31 | // "http://digg.com" will check user_agent header... 32 | ini_set('user_agent', 'My-Application/2.5'); 33 | 34 | $ret = scraping_digg(); 35 | 36 | foreach($ret as $v) { 37 | echo $v['title'].'
'; 38 | echo ''; 39 | echo '
'; 42 | } 43 | 44 | ?> -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/example/scraping/example_scraping_general.php: -------------------------------------------------------------------------------- 1 | "; 9 | // create HTML DOM 10 | $html = file_get_html($url); 11 | echo "url has been read.- '.$v['details'].'
'; 40 | echo '- Diggs: '.$v['diggs'].'
'; 41 | echo '
"; 12 | 13 | // get article block 14 | foreach($html->find($search) as $found) { 15 | // Found at least one. 16 | $return - true; 17 | echo "found a: " . $search . ""; 18 | $found->dump(); 19 | echo "
"; 20 | } 21 | 22 | // clean up memory 23 | $html->clear(); 24 | unset($html); 25 | 26 | return $return; 27 | } 28 | 29 | 30 | // ------------------------------------------ 31 | error_log ("post:" . print_r($_POST, true)); 32 | $url = ""; 33 | if (isset($_POST['url'])) 34 | { 35 | $url = $_POST['url']; 36 | } 37 | $search = ""; 38 | if (isset($_POST['search'])) 39 | { 40 | $search = $_POST['search']; 41 | } 42 | ?> 43 | 48 | "; 57 | } 58 | } 59 | ?> -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/example/scraping/example_scraping_imdb.php: -------------------------------------------------------------------------------- 1 | find('title', 0)->innertext; 10 | 11 | // get rating 12 | $ret['Rating'] = $html->find('div[class="general rating"] b', 0)->innertext; 13 | 14 | // get overview 15 | foreach($html->find('div[class="info"]') as $div) { 16 | // skip user comments 17 | if($div->find('h5', 0)->innertext=='User Comments:') 18 | return $ret; 19 | 20 | $key = ''; 21 | $val = ''; 22 | 23 | foreach($div->find('*') as $node) { 24 | if ($node->tag=='h5') 25 | $key = $node->plaintext; 26 | 27 | if ($node->tag=='a' && $node->plaintext!='more') 28 | $val .= trim(str_replace("\n", '', $node->plaintext)); 29 | 30 | if ($node->tag=='text') 31 | $val .= trim(str_replace("\n", '', $node->plaintext)); 32 | } 33 | 34 | $ret[$key] = $val; 35 | } 36 | 37 | // clean up memory 38 | $html->clear(); 39 | unset($html); 40 | 41 | return $ret; 42 | } 43 | 44 | 45 | // ----------------------------------------------------------------------------- 46 | // test it! 47 | $ret = scraping_IMDB('http://imdb.com/title/tt0335266/'); 48 | 49 | foreach($ret as $k=>$v) 50 | echo ''.$k.' '.$v.'
'; 51 | ?> -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/example/scraping/example_scraping_slashdot.php: -------------------------------------------------------------------------------- 1 | find('div[id^=firehose-]') as $article) { 10 | // get title 11 | $item['title'] = trim($article->find('a.datitle', 0)->plaintext); 12 | // get body 13 | $item['body'] = trim($article->find('div.body', 0)->plaintext); 14 | 15 | $ret[] = $item; 16 | } 17 | 18 | // clean up memory 19 | $html->clear(); 20 | unset($html); 21 | 22 | return $ret; 23 | } 24 | 25 | // ----------------------------------------------------------------------------- 26 | // test it! 27 | $ret = scraping_slashdot(); 28 | 29 | foreach($ret as $v) { 30 | echo $v['title'].'
'; 31 | echo ''; 32 | echo '
'; 34 | } 35 | ?> -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/example/simple_html_dom_utility.php: -------------------------------------------------------------------------------- 1 | find('comment') as $e) 12 | $e->outertext = ''; 13 | 14 | $ret = $html->save(); 15 | 16 | // clean up memory 17 | $html->clear(); 18 | unset($html); 19 | 20 | return $ret; 21 | } 22 | 23 | // ----------------------------------------------------------------------------- 24 | // search elements that contains an specific text 25 | function find_contains($html, $selector, $keyword, $index=-1) { 26 | $ret = array(); 27 | foreach ($html->find($selector) as $e) { 28 | if (strpos($e->innertext, $keyword)!==false) 29 | $ret[] = $e; 30 | } 31 | 32 | if ($index<0) return $ret; 33 | return (isset($ret[$index])) ? $ret[$index] : null; 34 | } 35 | ?> -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/README.md: -------------------------------------------------------------------------------- 1 | This folder contains the source files for http://simplehtmldom.sourceforge.net/, 2 | the project page for PHP Simple HTML DOM Parser. 3 | 4 | Source files are written in Markdown: https://en.wikipedia.org/wiki/Markdown 5 | 6 | Site data is generated by MkDocs, a lightweight static site generator for project 7 | documentation: https://www.mkdocs.org/ 8 | 9 | # Folder structure 10 | 11 | `custom_theme` : Contains customizations to the theme provided by MkDocs. 12 | `docs` : Contains the source files for the project page (the actual pages). 13 | `site` : Contains the output files for the project page when build with MkDocs. 14 | `extra.css` : Customizations to the styles provided by MkDocs. 15 | `mkdocs.yml` : The configuration file that is used by MkDocs to generate pages. 16 | 17 | # Adding new pages 18 | 19 | Place new files in `source`. Use subfolders (as few levels as possible) to 20 | separate categories. 21 | 22 | Files added to the manual will **not** appear on the project page automatically. 23 | All pages need to be specified in the _mkdocs.yml_ file under "nav:". Simply add 24 | the relative path to the new file where appropriate. 25 | 26 | Note: Files are not added automatically because they are sorted by name if not 27 | specified manually. Since readability is key factor for manuals, the files must 28 | be sorted in a way that makes it clear to users. 29 | 30 | # Setting up MkDocs 31 | 32 | The installation instructions for MkDocs are provided on their homepage: 33 | https://www.mkdocs.org/#installation 34 | 35 | MkDocs automatically builds the project based on the _mkdocs.yml_ file. Find the 36 | specification for this file at https://www.mkdocs.org/user-guide/configuration/. 37 | 38 | # Building project pages 39 | 40 | The build process depends on your installation of MkDocs. Typically MkDocs is 41 | made available via the command line. 42 | 43 | ## Step 1 - Check your version of MkDocs 44 | 45 | To check your version of MkDocs run this command: 46 | 47 | `mkdocs --version` or 48 | `python3 -m mkdocs --version` 49 | 50 | Should return `version 1.0.4` or higher. If it doesn't make sure to install the 51 | latest version using `pip install mkdocs` or `python3 -m pip install mkdocs`. If 52 | you don't have pip installed, install it via package manager or follow the 53 | instructions at https://pip.pypa.io/en/stable/installing/ 54 | 55 | ## Step 2 - View the project locally 56 | 57 | MkDocs allows you to view the project files in a browser on your local machine: 58 | 59 | `mkdocs serve` or 60 | `python3 -m mkdocs serve` 61 | 62 | If the process is successful you can access the site at http://127.0.0.1:8000. 63 | 64 | ## Step 3 - Build the project 65 | 66 | If you are satisfied with the results of the project, build the final project 67 | with this command: 68 | 69 | `mkdocs build` or 70 | `python3 -m mkdocs build` 71 | 72 | Find the output files in the `site` folder. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/custom_theme/main.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block footer %} 4 | {% include "footer.html" %} 5 |- '.$v['body'].'
'; 33 | echo '
6 |7 | {% endblock %} -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/api.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: API Reference 3 | --- 4 | 5 | # Parsing documents 6 | 7 | The parser accepts documents in the form of URLs, files and strings. The document 8 | must be accessible for reading and cannot exceed [`MAX_FILE_SIZE`](constants.md#max_file_size). 9 | 10 | Name | Description 11 | ---- | ----------- 12 | `str_get_html( string $content ) : object` | Creates a DOM object from string. 13 | `file_get_html( string $filename ) : object` | Creates a DOM object from file or URL. 14 | 15 | # DOM methods & properties 16 | 17 | Name | Description 18 | ---- | ----------- 19 | `__construct( [string $filename] ) : void` | Constructor, set the filename parameter will automatically load the contents, either text or file/url. 20 | `plaintext : string` | Returns the contents extracted from HTML. 21 | `clear() : void` | Clean up memory. 22 | `load( string $content ) : void` | Load contents from string. 23 | `save( [string $filename] ) : string` | Dumps the internal DOM tree back into a string. If the $filename is set, result string will save to file. 24 | `load_file( string $filename ) : void` | Load contents from a file or a URL. 25 | `set_callback( string $function_name ) : void` | Set a callback function. 26 | `find( string $selector [, int $index] ) : mixed` | Find elements by the CSS selector. Returns the Nth element object if index is set, otherwise return an array of object. 27 | 28 | # Element methods & properties 29 | 30 | Name | Description 31 | ---- | ----------- 32 | `[attribute] : string` | Read or write element's attribute value. 33 | `tag : string` | Read or write the tag name of element. 34 | `outertext : string` | Read or write the outer HTML text of element. 35 | `innertext : string` | Read or write the inner HTML text of element. 36 | `plaintext : string` | Read or write the plain text of element. 37 | `find( string $selector [, int $index] ) : mixed` | Find children by the CSS selector. Returns the Nth element object if index is set, otherwise return an array of object. 38 | 39 | # DOM traversing 40 | 41 | Name | Description 42 | ---- | ----------- 43 | `$e->children( [int $index] ) : mixed` | Returns the Nth child object if index is set, otherwise return an array of children. 44 | `$e->parent() : element` | Returns the parent of element. 45 | `$e->first_child() : element` | Returns the first child of element, or null if not found. 46 | `$e->last_child() : element` | Returns the last child of element, or null if not found. 47 | `$e->next_sibling() : element` | Returns the next sibling of element, or null if not found. 48 | `$e->prev_sibling() : element` | Returns the previous sibling of element, or null if not found. 49 | 50 | # Camel naming conventions 51 | 52 | Method | Mapping 53 | ------ | ------- 54 | `$e->getAllAttributes()` | `$e->attr` 55 | `$e->getAttribute( $name )` | `$e->attribute` 56 | `$e->setAttribute( $name, $value)` | `$value = $e->attribute` 57 | `$e->hasAttribute( $name )` | `isset($e->attribute)` 58 | `$e->removeAttribute ( $name )` | `$e->attribute = null` 59 | `$e->getElementById ( $id )` | `$e->find ( "#$id", 0 )` 60 | `$e->getElementsById ( $id [,$index] )` | `$e->find ( "#$id" [, int $index] )` 61 | `$e->getElementByTagName ($name )` | `$e->find ( $name, 0 )` 62 | `$e->getElementsByTagName ( $name [, $index] )` | `$e->find ( $name [, int $index] )` 63 | `$e->parentNode ()` | `$e->parent ()` 64 | `$e->childNodes ( [$index] )` | `$e->children ( [int $index] )` 65 | `$e->firstChild ()` | `$e->first_child ()` 66 | `$e->lastChild ()` | `$e->last_child ()` 67 | `$e->nextSibling ()` | `$e->next_sibling ()` 68 | `$e->previousSibling ()` | `$e->prev_sibling ()` -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/constants.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Constants 3 | --- 4 | 5 | # Constants 6 | 7 | Constants define how the parser treats documents. They can be defined before 8 | loading the parser to globally replace the default values. 9 | 10 | ## DEFAULT_TARGET_CHARSET 11 | 12 | Defines the default target charset for text returned by the parser. 13 | 14 | Default: `'UTF-8'` 15 | 16 | ## DEFAULT_BR_TEXT 17 | 18 | Defines the default text to return for `
` elements. 19 | 20 | Default: `"\r\n"` 21 | 22 | ## DEFAULT_SPAN_TEXT 23 | 24 | Defines the default text to return for `` elements. 25 | 26 | Default: `' '` 27 | 28 | ## MAX_FILE_SIZE 29 | 30 | Defines the maximum number of bytes the parser can load into memory. This limit 31 | only applies to the source file or string. 32 | 33 | Default: `600000` -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/definitions.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Definitions 3 | --- 4 | 5 | # Definitions 6 | 7 | The definitions below are an essential part of the parser. 8 | 9 | ## Node Types 10 | 11 | The type of a node is determined during parsing and represented by one of the elements in the list below. 12 | 13 | | Type | Description 14 | | ---- | ----------- 15 | | `HDOM_TYPE_ELEMENT` | Start tag (i.e. ``) 16 | | `HDOM_TYPE_COMMENT` | HTML comment (i.e. ``) 17 | | `HDOM_TYPE_TEXT` | Plain text (i.e. `Hello, World!`) 18 | | `HDOM_TYPE_ENDTAG` | End tag (i.e. ``) 19 | | `HDOM_TYPE_ROOT` | Root element. There can always only be one root element in the DOM. 20 | | `HDOM_TYPE_UNKNOWN` | Unknown type (i.e. CDATA, DOCTYPE, etc...) 21 | 22 | ### Example 23 | 24 | ```html 25 | Hello, World! 26 | ``` 27 | 28 | _Note_: `HDOM_TYPE_ROOT` always exists regardless of the actual document structure. 29 | 30 | | HTML | Node Type 31 | | ---- | --------- 32 | | | `HDOM_TYPE_ROOT` 33 | | `` | `HDOM_TYPE_UNKNOWN` 34 | | `` | `HDOM_TYPE_ELEMENT` 35 | | `` | `HDOM_TYPE_COMMENT` 36 | | `` | `HDOM_TYPE_ENDTAG` 37 | | `Hello, World!` | `HDOM_TYPE_TEXT` 38 | 39 | ## Quote Types 40 | 41 | Identifies the quoting type on attribute values. 42 | 43 | | Type | Description 44 | | ---- | ----------- 45 | | `HDOM_QUOTE_DOUBLE` | Double quotes (`""`) 46 | | `HDOM_QUOTE_SINGLE` | Single quotes (`''`) 47 | | `HDOM_QUOTE_NO` | Not quoted (flag) 48 | 49 | _Note_: Attributes with no values (flags) are stored as `HDOM_QUOTE_NO`. 50 | 51 | ### Example 52 | 53 | ```html 54 |Hello, World!
55 | ``` 56 | 57 | | Attribute | Description 58 | | --------- | ----------- 59 | | `class="paragraph"` | `HDOM_QUOTE_DOUBLE` 60 | | `id='info1'` | `HDOM_QUOTE_SINGLE` 61 | | `hidden` | `HDOM_QUOTE_NO` 62 | 63 | ## Node Info Types 64 | 65 | Each node stores additional information (metadata) that is identified by the elements below. 66 | 67 | | Type | Description 68 | | ---- | ----------- 69 | | `HDOM_INFO_BEGIN` | Cursor position for the start tag of a node. 70 | | `HDOM_INFO_END` | Cursor position for the end tag of a node. A value of zero indicates a node with no end tag (missing closing tag). 71 | | `HDOM_INFO_QUOTE` | Quote type for attribute values. The value must be an element of [Quote Type](#quote-types). 72 | | `HDOM_INFO_SPACE` | Array of whitespace around attributes (see [Attribute Whitespace](#attribute-whitespace)). 73 | | `HDOM_INFO_TEXT` | Non-HTML text in tags (i.e. comments, doctype, etc...). 74 | | `HDOM_INFO_INNER` | Inner text of a node. 75 | | `HDOM_INFO_OUTER` | Outer text of a node. 76 | | `HDOM_INFO_ENDSPACE` | Whitespace at the end of a tag before the closing bracket. 77 | 78 | ## Attribute Whitespace 79 | 80 | Whitespace around attributes is stored in the form of an array with three elements: 81 | 82 | | Element | Description 83 | | ------- | ----------- 84 | | `0` | Whitespace before the attribute name. 85 | | `1` | Whitespace between attribute name and the equal sign. 86 | | `2` | Whitespace between the equal sign and the attribute value 87 | 88 | ### Example 89 | 90 | ```html 91 |Hello, World!
92 | ``` 93 | 94 | _Note_: Whitespace before attribute names is not displayed in the browser. It is, however, part of the attributes. 95 | 96 | | Attribute | Description 97 | | --------- | ----------- 98 | | ` class="paragraph"` | `[0] => ' ', [1] => '', [2] => ''` 99 | | ` id = 'info1'` | `[0] => ' ', [1] => ' ', [2] => ' '` 100 | | `hidden` | `[0] => '', [1] => '', [2] => ''` -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/file_get_html.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: file_get_html 3 | --- 4 | 5 | # file_get_html 6 | 7 | ```php 8 | file_get_html ( string $url [, bool $use_include_path = false [, resouce $context = null [, int $offset = 0 [, int $maxLen = -1 [, bool $lowercase = true [, bool $forceTagsClosed = true [, string $target_charset = DEFAULT_TARGET_CHARSET [, bool $stripRN = true [, string $defaultBRText = DEFAULT_BR_TEXT [, string $defaultSpanText = DEFAULT_SPAN_TEXT ]]]]]]]]]] ) 9 | ``` 10 | 11 | Parses the provided file and returns the DOM object. 12 | 13 | | Parameter | Description 14 | | --------- | ----------- 15 | | `url` | Name or URL of the file to read. 16 | | `use_include_path` | See [`file_get_contents`](http://php.net/manual/en/function.file-get-contents.php#refsect1-function.file-get-contents-parameters) 17 | | `context` | See [`file_get_contents`](http://php.net/manual/en/function.file-get-contents.php#refsect1-function.file-get-contents-parameters) 18 | | `offset` | See [`file_get_contents`](http://php.net/manual/en/function.file-get-contents.php#refsect1-function.file-get-contents-parameters) 19 | | `maxLen` | See [`file_get_contents`](http://php.net/manual/en/function.file-get-contents.php#refsect1-function.file-get-contents-parameters) 20 | | `lowercase` | Forces lowercase matching of tags if enabled. This is very useful when loading documents with mixed naming conventions. 21 | | `forceTagsClosed` | Obsolete. This parameter is no longer used by the parser. 22 | | `target_charset` | Defines the target charset when returning text from the document. 23 | | `stripRN` | If enabled, removes newlines before parsing the document. 24 | | `defaultBRText` | Defines the default text to return for `
` elements. 25 | | `defaultSpanText` | Defines the default text to return for `` elements. 26 | -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/__construct.md: -------------------------------------------------------------------------------- 1 | # __construct 2 | 3 | ```php 4 | __construct ( [ string $str = null [, bool $lowercase = true [, bool $forceTagsClosed = true [, string $target_charset = DEFAULT_TARGET_CHARSET [, bool $stripRN = true [, string $defaultBRText = DEFAULT_BR_TEXT [, string $defaultSpanText = DEFAULT_SPAN_TEXT [, int $options = 0 ]]]]]]]]) : object 5 | ``` 6 | 7 | Creates a new `simple_html_dom` object. 8 | 9 | | Parameter | Description 10 | | --------- | ----------- 11 | | `str` | The HTML document string. 12 | | `lowercase` | Tag names are parsed in lowercase letters if enabled. 13 | | `forceTagsClosed` | Tags inside block tags are forcefully closed if the closing tag was omitted. 14 | | `target_charset` | Defines the target charset for text returned by the parser. 15 | | `stripRN` | Newline characters are replaced by whitespace if enabled. 16 | | `defaultBRText` | Defines the default text to return for `
` elements. 17 | | `defaultSpanText` | Defines the default text to return for `` elements. 18 | | `options` | Additional options for the parser. Currently supports `'HDOM_SMARTY_AS_TEXT'` to remove [Smarty](https://www.smarty.net/) scripts. 19 | 20 | Returns the object. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/__destruct.md: -------------------------------------------------------------------------------- 1 | # __destruct 2 | 3 | ```php 4 | __destruct () 5 | ``` 6 | 7 | Destroys the current object and clears memory. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/__get.md: -------------------------------------------------------------------------------- 1 | # __get 2 | 3 | ```php 4 | __get ( string $name ) : mixed 5 | ``` 6 | 7 | See [magic methods](http://php.net/manual/en/language.oop5.overloading.php#object.get) 8 | 9 | Supports following names: 10 | 11 | | Name | Description 12 | | ---- | ----------- 13 | | `outertext` | Returns the outer text of the root element. 14 | | `innertext` | Returns the inner text of the root element. 15 | | `plaintext` | Returns the plain text of the root element. 16 | | `charset` | Returns the charset for the document. 17 | | `target_charset` | Returns the target charset for the document. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/__toString.md: -------------------------------------------------------------------------------- 1 | # __toString 2 | 3 | ```php 4 | __toString () : string 5 | ``` 6 | 7 | Returns the inner text of the root element of the DOM. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/as_text_node.md: -------------------------------------------------------------------------------- 1 | # as_text_node (protected) 2 | 3 | ```php 4 | as_text_node ( string $tag ) : bool 5 | ``` 6 | 7 | Adds a tag as text node. 8 | 9 | | Parameter | Description 10 | | --------- | ----------- 11 | | `tag` | The element's tag name. 12 | 13 | Returns true on success. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/childNodes.md: -------------------------------------------------------------------------------- 1 | # childNodes 2 | 3 | ```php 4 | childNodes ( [ int $idx = -1 ] ) : mixed 5 | ``` 6 | 7 | Returns children of the root element. 8 | 9 | | Parameter | Description 10 | | --------- | ----------- 11 | | `idx` | Index of the child element to return. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/clear.md: -------------------------------------------------------------------------------- 1 | # clear 2 | 3 | ```php 4 | clear () 5 | ``` 6 | 7 | Cleans up memory to prevent [PHP 5 circular references memory leak](https://bugs.php.net/bug.php?id=33595). -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/copy_skip.md: -------------------------------------------------------------------------------- 1 | # copy_skip (protected) 2 | 3 | ```php 4 | copy_skip ( string $chars ) : string 5 | ``` 6 | 7 | Skips characters starting at the current parsing position in the document. Sets the parsing position to the first character not in the provided list of characters. 8 | 9 | | Parameter | Description 10 | | --------- | ----------- 11 | | `chars` | A list of characters to skip. 12 | 13 | Returns the skipped characters. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/copy_until.md: -------------------------------------------------------------------------------- 1 | # copy_until (protected) 2 | 3 | ```php 4 | copy_until ( string $chars ) : string 5 | ``` 6 | 7 | Copies all characters starting at the current parsing position in the document. Sets the parsing position to the first character that matches any of the characters in the provided list of characters. 8 | 9 | | Parameter | Description 10 | | --------- | ----------- 11 | | `chars` | A list of characters to stop copying at. 12 | 13 | Returns the copied characters. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/copy_until_char.md: -------------------------------------------------------------------------------- 1 | # copy_until_char (protected) 2 | 3 | ```php 4 | copy_until ( string $char ) : string 5 | ``` 6 | 7 | Copies all characters starting at the current parsing position in the document. Sets the parsing position to the first character that matches the provided character. 8 | 9 | | Parameter | Description 10 | | --------- | ----------- 11 | | `char` | A character to stop copying at. 12 | 13 | Returns the copied characters. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/createElement.md: -------------------------------------------------------------------------------- 1 | # createElement 2 | 3 | ```php 4 | createElement ( string $name [, string $value = null ] ) : object 5 | ``` 6 | 7 | Creates a new element. 8 | 9 | | Parameter | Description 10 | | --------- | ----------- 11 | | `name` | Name of the element 12 | | `value` | Value of the element 13 | 14 | Returns the element. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/createTextNode.md: -------------------------------------------------------------------------------- 1 | # createTextNode 2 | 3 | ```php 4 | createTextNode ( string $value ) : object 5 | ``` 6 | 7 | Creates a new text element. 8 | 9 | Returns the element. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/dump.md: -------------------------------------------------------------------------------- 1 | # dump 2 | 3 | ```php 4 | dump ( [ bool show_attr = true ] ) : string 5 | ``` 6 | 7 | Dumps the entire DOM into a string. Useful for debugging purposes. 8 | 9 | | Parameter | Description 10 | | --------- | ----------- 11 | | `show_attr` | Attributes are included in the dump when enabled. 12 | 13 | Returns the DOM tree as string. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/find.md: -------------------------------------------------------------------------------- 1 | # find 2 | 3 | ```php 4 | find ( string $selector [, int $idx = null [, bool $lowercase = false ]] ) : mixed 5 | ``` 6 | 7 | Finds elements in the DOM. 8 | 9 | | Parameter | Description 10 | | --------- | ----------- 11 | | `selector` | A [CSS style selector](/manual/selectors). 12 | | `idx` | Index of the element to return. 13 | | `lowercase` | Matches tag names case insensitive when enabled. 14 | 15 | Returns an array of matches or a single element if `idx` is defined. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/firstChild.md: -------------------------------------------------------------------------------- 1 | # firstChild 2 | 3 | ```php 4 | firstChild () : object 5 | ``` 6 | 7 | Returns the first child of the root element. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/getElementById.md: -------------------------------------------------------------------------------- 1 | # getElementById 2 | 3 | ```php 4 | getElementById ( string $id ) : object 5 | ``` 6 | 7 | Searches an element by id. 8 | 9 | | Parameter | Description 10 | | --------- | ----------- 11 | | `id` | ID of the element to find. 12 | 13 | Returns the element or null if no match was found. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/getElementByTagName.md: -------------------------------------------------------------------------------- 1 | # getElementByTagName 2 | 3 | ```php 4 | getElementByTagName ( string $name ) : object 5 | ``` 6 | 7 | Searches an element by tag name. 8 | 9 | | Parameter | Description 10 | | --------- | ----------- 11 | | `name` | Tag name of the element to find. 12 | 13 | Returns the element or null if no match was found. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/getElementsById.md: -------------------------------------------------------------------------------- 1 | # getElementsById 2 | 3 | ```php 4 | getElementsById ( string $id [, int $idx = null ] ) : object 5 | ``` 6 | 7 | Searches elements by id. 8 | 9 | | Parameter | Description 10 | | --------- | ----------- 11 | | `id` | ID of the element to find. 12 | | `idx` | Returns the element at the specified index if defined. 13 | 14 | Returns the element(s) or null if no match was found. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/getElementsByTagName.md: -------------------------------------------------------------------------------- 1 | # getElementsByTagName 2 | 3 | ```php 4 | getElementsByTagName ( string $name [, int $idx = -1 ] ) : object 5 | ``` 6 | 7 | Searches elements by tag name. 8 | 9 | | Parameter | Description 10 | | --------- | ----------- 11 | | `name` | Tag name of the element to find. 12 | | `idx` | Returns the element at the specified index. 13 | 14 | Returns the element(s) or null if no match was found. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/lastChild.md: -------------------------------------------------------------------------------- 1 | # lastChild 2 | 3 | ```php 4 | lastChild () : object 5 | ``` 6 | 7 | Returns the last child of the root element. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/link_nodes.md: -------------------------------------------------------------------------------- 1 | # link_nodes (protected) 2 | 3 | ```php 4 | link_nodes ( object &$node, bool $is_child ) 5 | ``` 6 | 7 | Links the provided node to the DOM tree. 8 | 9 | | Parameter | Description 10 | | --------- | ----------- 11 | | `node` | The node to link to the DOM tree. 12 | | `is_child` | If active, makes the node a sibling of the current node (child of parent). -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/load.md: -------------------------------------------------------------------------------- 1 | # load 2 | 3 | ```php 4 | load ( string $str [, bool $lowercase = true [, bool $stripRN = true [, string $defaultBRText = DEFAULT_BR_TEXT [, string $defaultSpanText = DEFAULT_SPAN_TEXT [, int $options = 0 ]]]]]) : object 5 | ``` 6 | 7 | Loads the provided HTML document string. 8 | 9 | | Parameter | Description 10 | | --------- | ----------- 11 | | `str` | The HTML document string. 12 | | `lowercase` | Tag names are parsed in lowercase letters if enabled. 13 | | `stripRN` | Newline characters are replaced by whitespace if enabled. 14 | | `defaultBRText` | Defines the default text to return for `
` elements. 15 | | `defaultSpanText` | Defines the default text to return for `` elements. 16 | | `options` | Additional options for the parser. Currently supports `'HDOM_SMARTY_AS_TEXT'` to remove [Smarty](https://www.smarty.net/) scripts. 17 | 18 | Returns the object. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/loadFile.md: -------------------------------------------------------------------------------- 1 | # loadFile 2 | 3 | ```php 4 | loadFile (...) 5 | ``` 6 | 7 | This function is a wrapper for [`load_file`](#load_file) -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/load_file.md: -------------------------------------------------------------------------------- 1 | # load_file 2 | 3 | ```php 4 | load_file (...) : object 5 | ``` 6 | 7 | Loads a HTML document from file. Supports arguments of [`file_get_contents`](http://php.net/manual/en/function.file-get-contents.php). 8 | 9 | Returns the object. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/parse.md: -------------------------------------------------------------------------------- 1 | # parse (protected) 2 | 3 | ```php 4 | parse () 5 | ``` 6 | 7 | Parses the document. This function is called after the document was loaded into `$this->doc`. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/parse_attr.md: -------------------------------------------------------------------------------- 1 | # parse_attr (protected) 2 | 3 | ```php 4 | parse_attr ( object $node, string $name, array &$space ) 5 | ``` 6 | 7 | Parses a single attribute starting at the current parsing position in the document. 8 | 9 | | Parameter | Description 10 | | --------- | ----------- 11 | | `node` | The current element (node). 12 | | `name` | The attribute name. 13 | | `space` | An array of whitespace sorounding the current attribute (see [Attribute Whitespace](../definitions/#attribute-whitespace)). -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/parse_charset.md: -------------------------------------------------------------------------------- 1 | # parse_charset (protected) 2 | 3 | ```php 4 | parse_charset () 5 | ``` 6 | 7 | Parses the charset. 8 | 9 | If the callback function `get_last_retrieve_url_contents_content_type` exists, it is assumed to return the content type header for the current document as string. 10 | 11 | Uses the charset from the metadata of the page if defined. 12 | 13 | If none of the previous conditions are met, the charset is determined by `mb_detect_encoding` if multi-byte support is active. 14 | 15 | If multi-byte support is not active the charset is assumed to be `'UTF-8'`. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/prepare.md: -------------------------------------------------------------------------------- 1 | # prepare (protected) 2 | 3 | ```php 4 | prepare ( string $str [, bool $lowercase = true [, string $defaultBRText = DEFAULT_BR_TEXT [, string $defaultSpanText = DEFAULT_SPAN_TEXT ]]] ) 5 | ``` 6 | 7 | Initializes the DOM object. 8 | 9 | | Parameters | Description 10 | | ---------- | ----------- 11 | | `str` | The HTML document string. 12 | | `lowercase` | Tag names are parsed in lowercase letters if enabled. 13 | | `defaultBRText` | Defines the default text to return for `
` elements. 14 | | `defaultSpanText` | Defines the default text to return for `` elements. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/read_tag.md: -------------------------------------------------------------------------------- 1 | # read_tag (protected) 2 | 3 | ```php 4 | read_tag () : bool 5 | ``` 6 | 7 | Reads a single tag starting at the current parsing position in the document. The tag is automatically added to the DOM. 8 | 9 | Returns true if a tag was found. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/remove_callback.md: -------------------------------------------------------------------------------- 1 | # remove_callback 2 | 3 | ```php 4 | remove_callback () 5 | ``` 6 | 7 | Removes the callback set by [`set_callback`](#set_callback). -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/remove_noise.md: -------------------------------------------------------------------------------- 1 | # remove_noise (protected) 2 | 3 | ```php 4 | remove_noise ( string $pattern [, bool $remove_tag = false] ) 5 | ``` 6 | 7 | Replaces noise in the document (i.e. scripts) by placeholders and adds the removed contents to `$this->noise`. 8 | 9 | _Note_: Noise is replaced by placeholders in order to allow restoring the original contents. Placeholders take the form of `'___noise___1000'` where the number is increased by one for each removed noise. 10 | 11 | | Parameter | Description 12 | | --------- | ----------- 13 | | `pattern` | A regular expression that matches the noise to remove. 14 | | `remove_tag` | Removes the entire match when enabled or submatches when disabled. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/restore_noise.md: -------------------------------------------------------------------------------- 1 | # restore_noise (protected) 2 | 3 | ```php 4 | restore_noise ( string $text ) : string 5 | ``` 6 | 7 | Restores noise in the provided string by replacing noise placeholders by their original contents. 8 | 9 | | Parameter | Description 10 | | --------- | ----------- 11 | | `text` | A string (potentially) containing noise placeholders. 12 | 13 | Returns the string with original contents restored or the original string if it doesn't contain noise placeholders. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/save.md: -------------------------------------------------------------------------------- 1 | # save 2 | 3 | ```php 4 | save ( [ string $filepath = '' ] ) : string 5 | ``` 6 | 7 | Writes the current DOM to file. 8 | 9 | | Parameter | Description 10 | | --------- | ----------- 11 | | `filepath` | Writes to file if the provided file path is not empty. 12 | 13 | Returns the document string. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/search_noise.md: -------------------------------------------------------------------------------- 1 | # search_noise (protected) 2 | 3 | ```php 4 | search_noise ( string $text ) : string 5 | ``` 6 | 7 | Find a single noise element by providing the noise placeholder text. 8 | 9 | | Parameter | Description 10 | | --------- | ----------- 11 | | `text` | The noise placeholder to find. 12 | 13 | Returns the original contents for the placeholder. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/set_callback.md: -------------------------------------------------------------------------------- 1 | # set_callback 2 | 3 | ```php 4 | set_callback ( string $function_name ) 5 | ``` 6 | 7 | Sets the callback function which is called on each element of the DOM when building outertext. 8 | The function must accept a single parameter of type `simple_html_dom_node`. 9 | 10 | | Parameter | Description 11 | | --------- | ----------- 12 | | `function_name` | Name of the function. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/simple_html_dom.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: simple_html_dom 3 | --- 4 | 5 | # simple_html_dom 6 | 7 | Represents the [DOM](https://en.wikipedia.org/wiki/Document_Object_Model) in memory. Provides functions to parse documents and access individual elements (see [`simple_html_dom_node`](../simple_html_dom_node/simple_html_dom_node.md)). 8 | 9 | # Public Properties 10 | 11 | | Property | Description 12 | | -------- | ----------- 13 | | `root` | Root node of the document. 14 | | `nodes` | List of top-level nodes in the document. 15 | | `callback` | Callback function that is called for each element in the DOM when generating outertext. 16 | | `lowercase` | If enabled, all tag names are converted to lowercase when parsing documents. 17 | | `original_size` | Original document size in bytes. 18 | | `size` | Current document size in bytes. 19 | | `_charset` | Charset of the original document. 20 | | `_target_charset` | Target charset for the current document. 21 | | `default_span_text` | Text to return for `` elements. 22 | 23 | # Protected Properties 24 | 25 | | Property | Description 26 | | -------- | ----------- 27 | | `pos` | Current parsing position within `doc`. 28 | | `doc` | The original document. 29 | | `char` | Character at position `pos` in `doc`. 30 | | `cursor` | Current element cursor in the document. 31 | | `parent` | Parent element node. 32 | | `noise` | Noise from the original document (i.e. scripts, comments, etc...). 33 | | `token_blank` | Tokens that are considered whitespace in HTML. 34 | | `token_equal` | Tokens to identify the equal sign for attributes, stopping either at the closing tag ("/" i.e. ``) or the end of an opening tag (">" i.e. ``). 35 | | `token_slash` | Tokens to identify the end of a tag name. A tag name either ends on the ending slash ("/" i.e. ``) or whitespace (`"\s\r\n\t"`). 36 | | `token_attr` | Tokens to identify the end of an attribute. 37 | | `default_br_text` | Text to return for `
` elements. 38 | | `self_closing_tags` | A list of tag names where the closing tag is omitted. 39 | | `block_tags` | A list of tag names where remaining unclosed tags are forcibly closed. 40 | | `optional_closing_tags` | A list of tag names where the closing tag can be omitted. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom/skip.md: -------------------------------------------------------------------------------- 1 | 2 | # skip (protected) 3 | 4 | ```php 5 | skip ( string $chars ) 6 | ``` 7 | 8 | Skips characters starting at the current parsing position in the document. Sets the parsing position to the first character not in the provided list of characters. 9 | 10 | | Parameter | Description 11 | | --------- | ----------- 12 | | `chars` | A list of characters to skip. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/__construct.md: -------------------------------------------------------------------------------- 1 | # __construct 2 | 3 | ```php 4 | __construct ( [ object $dom ] ) : object 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `dom` | An object of type [`simple_html_dom`](api/simple_html_dom/). 10 | 11 | Constructs a new object of type `simple_html_dom_node`, assignes `$dom` as DOM object and adds itself to the list of nodes in `$dom`. 12 | -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/__destruct.md: -------------------------------------------------------------------------------- 1 | # __destruct 2 | 3 | ```php 4 | __destruct ( ) 5 | ``` 6 | 7 | Destructs the current object and frees memory. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/__get.md: -------------------------------------------------------------------------------- 1 | # __get 2 | 3 | ```php 4 | __get ( string $name ) : mixed 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `name` | `outertext`, `innertext`, `plaintext`, `xmltext` or attribute name. 10 | 11 | See [magic methods](http://php.net/manual/en/language.oop5.overloading.php#object.get) 12 | 13 | If the provided name is a valid attribute name, returns the attribute value. Otherwise a value according to the table below. 14 | 15 | | Name | Description 16 | | ---- | ----------- 17 | | `outertext` | Returns the outer text of the current node. 18 | | `innertext` | Returns the inner text of the current node. 19 | | `plaintext` | Returns the plain text of the current node. 20 | | `xmltext` | Returns the xml representation for the inner text of the current node as a CDATA section. 21 | 22 | Returns nothing if the provided name is neither a valid attribute name, nor a valid parameter name. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/__isset.md: -------------------------------------------------------------------------------- 1 | # __isset 2 | 3 | ```php 4 | __isset ( string $name ) : bool 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `name` | `outertext`, `innertext`, `plaintext` or attribute name. 10 | 11 | See [magic methods](http://php.net/manual/en/language.oop5.overloading.php#object.get) 12 | 13 | Returns true if the provided name is a valid attribute name or any of the values in the table below. False otherwise. 14 | 15 | | Name | Description 16 | | ---- | ----------- 17 | | `outertext` | Returns the outer text of the current node. 18 | | `innertext` | Returns the inner text of the current node. 19 | | `plaintext` | Returns the plain text of the current node. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/__set.md: -------------------------------------------------------------------------------- 1 | # __set 2 | 3 | ```php 4 | __set ( string $name, mixed $value ) 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `name` | `outertext`, `innertext` or attribute name. 10 | | `value` | Value to set. 11 | 12 | See [magic methods](http://php.net/manual/en/language.oop5.overloading.php#object.get) 13 | 14 | Sets the outer text of the current node to `$value` if `$name` is `outertext`. 15 | 16 | Sets the inner text of the current node to `$value` if `$name` is `innertext`. 17 | 18 | Otherwise, adds or updates an attribute with name `$name` and value `$value` to the current node. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/__toString.md: -------------------------------------------------------------------------------- 1 | # __toString 2 | 3 | ```php 4 | __toString ( ) : string 5 | ``` 6 | 7 | Returns the outer text of the current node. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/__unset.md: -------------------------------------------------------------------------------- 1 | # __unset 2 | 3 | ```php 4 | __unset ( string $name ) 5 | ``` 6 | 7 | Removes the attribute with name `$name` from the current node if it exists. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/addClass.md: -------------------------------------------------------------------------------- 1 | # addClass 2 | 3 | ```php 4 | addClass ( mixed $class ) 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `class` | Specifies one or more class names to be added. 10 | 11 | Adds one or more class names to the current node. 12 | 13 | **Remarks** 14 | 15 | * To add more than one class, separate the class names with space or provide them as an array. 16 | 17 | **Examples** 18 | 19 | ```php 20 | $node->addClass('hidden'); 21 | $node->addClass('article important'); 22 | $node->addClass(array('article', 'new')); 23 | ``` -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/appendChild.md: -------------------------------------------------------------------------------- 1 | # appendChild 2 | 3 | ```php 4 | appendChild ( object $node ) : object 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `node` | An object of type [`simple_html_dom_node`](../simple_html_dom_node/) 10 | 11 | Makes the current node parent of the node provided to this function. 12 | 13 | Returns the provided node. 14 | -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/childNodes.md: -------------------------------------------------------------------------------- 1 | # childNodes 2 | 3 | ```php 4 | childNodes ( [ int $idx = -1 ] ) : mixed 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `idx` | Index of the node to return or `-1` to return all nodes. 10 | 11 | Returns all or one specific child node from the current node. 12 | 13 | ## Remarks 14 | 15 | This function is a wrapper for [`children`](../children/) -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/children.md: -------------------------------------------------------------------------------- 1 | # children 2 | 3 | ```php 4 | children ( [ int $idx = -1 ] ) : mixed 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `idx` | Index of the node to return or `-1` to return all nodes. 10 | 11 | Returns all or one specific child node from the current node. 12 | -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/clear.md: -------------------------------------------------------------------------------- 1 | # clear 2 | 3 | ```php 4 | clear ( ) 5 | ``` 6 | 7 | Sets all properties in the current node, which contain objects, to null. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/convert_text.md: -------------------------------------------------------------------------------- 1 | # convert_text 2 | 3 | ```php 4 | convert_text ( string $text ) : string 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `text` | Text to convert. 10 | 11 | Assumes that the provided text is in the form of the configured source character set (see [`sourceCharset`](../simple_html_dom_node/) and converts it to the specified target character set (see [`targetCharset`](../simple_html_dom_node/)). 12 | 13 | Returns the converted text. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/dump.md: -------------------------------------------------------------------------------- 1 | # dump 2 | 3 | ```php 4 | dump ( [ bool $show_attr = false [, int $depth = 0 ]] ) 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `show_attr` | Attribute names are included in the output if enabled. 10 | | `depth` | Depth of the current element 11 | 12 | Dumps information about the current node and all child nodes recursively. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/dump_node.md: -------------------------------------------------------------------------------- 1 | # dump_node 2 | 3 | ```php 4 | dump_node ( [ bool $echo = true ] ) : mixed 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `echo` | Echoes the dump details directly if enabled. 10 | 11 | Dumps information about the current document node. Returns a string if `$echo` is set to false, null otherwise. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/find.md: -------------------------------------------------------------------------------- 1 | # find 2 | 3 | ```php 4 | find ( 5 | string $selector 6 | [, int $idx = null ] 7 | [, bool $lowercase = false ] 8 | ) : mixed 9 | ``` 10 | 11 | | Parameter | Description 12 | | --------- | ----------- 13 | | `selector` | [CSS](https://www.w3.org/TR/selectors/) selector. 14 | | `idx` | Index of element to return. 15 | | `lowercase` | Matches tag names case insensitive (lowercase) if enabled. 16 | 17 | Finds one or more nodes in the current document, using CSS selectors. 18 | 19 | * Returns null if no match was found. 20 | * Returns an array of [`simple_html_dom_node`](../simple_html_dom_node/) if `$idx` is null. 21 | * Returns an object of type [`simple_html_dom_node`](../simple_html_dom_node/) if `$idx` is anything __but__ null. 22 | 23 | ## Supported Selectors 24 | 25 | | Selector | Description 26 | | --------- | ----------- 27 | | `*` | [Universal selector](https://www.w3.org/TR/selectors/#the-universal-selector) 28 | | `E` | [Type (tag name) selector](https://www.w3.org/TR/selectors/#type-selectors) 29 | | `E#id` | [ID selector](https://www.w3.org/TR/selectors/#id-selectors) 30 | | `E.class` | [Class selector](https://www.w3.org/TR/selectors/#class-html) 31 | | `E[attr]` | [Attribute selector](https://www.w3.org/TR/selectors/#attribute-selectors) 32 | | `E[attr="value"]` | [Attribute selector](https://www.w3.org/TR/selectors/#attribute-selectors) 33 | | `E[attr="value"] i` | [Case-sensitivity](https://www.w3.org/TR/selectors/#attribute-case) 34 | | `E[attr="value"] s` | [Case-sensitivity](https://www.w3.org/TR/selectors/#attribute-case) 35 | | `E[attr~="value"]` | [Attribute selector](https://www.w3.org/TR/selectors/#attribute-selectors) 36 | | `E[attr^="value"]` | [Substring matching attribute selector](https://www.w3.org/TR/selectors/#attribute-substrings) 37 | | `E[attr$="value"]` | [Substring matching attribute selector](https://www.w3.org/TR/selectors/#attribute-substrings) 38 | | `E[attr*="value"]` | [Substring matching attribute selector](https://www.w3.org/TR/selectors/#attribute-substrings) 39 | | `E[attr|="value"]` | [Attribute selector](https://www.w3.org/TR/selectors/#attribute-selectors) 40 | | `E F` | [Descendant combinator](https://www.w3.org/TR/selectors/#descendant-combinators) 41 | | `E > F` | [Child combinator](https://www.w3.org/TR/selectors/#child-combinators) 42 | | `E + F` | [Next-sibling combinator](https://www.w3.org/TR/selectors/#adjacent-sibling-combinators) 43 | | `E ~ F` | [Subsequent-sibling combinator](https://www.w3.org/TR/selectors/#general-sibling-combinators) 44 | | `E, F` | [Selector list](https://www.w3.org/TR/selectors/#selector-list) -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/find_ancestor_tag.md: -------------------------------------------------------------------------------- 1 | # find_ancestor_tag 2 | 3 | ```php 4 | find_ancestor_tag ( string $tag ) : object 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `tag` | Tag name of the element to find. 10 | 11 | Returns the first matching node that matches the specified tag name or null if no match was found. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/firstChild.md: -------------------------------------------------------------------------------- 1 | # firstChild 2 | 3 | ```php 4 | firstChild ( ) : mixed 5 | ``` 6 | 7 | This function is a wrapper for [`first_child`](../first_child/) -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/first_child.md: -------------------------------------------------------------------------------- 1 | # first_child 2 | 3 | ```php 4 | first_child ( ) : mixed 5 | ``` 6 | 7 | Returns the first child node of the current node or null if the current nod has no child nodes. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/getAllAttributes.md: -------------------------------------------------------------------------------- 1 | # getAllAttributes 2 | 3 | ```php 4 | getAllAttributes ( ) : array 5 | ``` 6 | 7 | Returns all attributes for the current node. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/getAttribute.md: -------------------------------------------------------------------------------- 1 | # getAttribute 2 | 3 | ```php 4 | getAttribute ( string $name ) : mixed 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `name` | Attribute name. 10 | 11 | Returns the value for the attribute `$name`. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/getElementById.md: -------------------------------------------------------------------------------- 1 | # getElementById 2 | 3 | ```php 4 | getElementById ( string $id ) : object 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `id` | Element id. 10 | 11 | Returns the first element with the specified id. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/getElementByTagName.md: -------------------------------------------------------------------------------- 1 | # getElementByTagName 2 | 3 | ```php 4 | getElementByTagName ( string $name ) : object 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `name` | Tag name. 10 | 11 | Returns the first element with the specified tag name. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/getElementsById.md: -------------------------------------------------------------------------------- 1 | # getElementsById 2 | 3 | ```php 4 | getElementsById ( string $id [, int $idx = null] ) : mixed 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `id` | Element id. 10 | | `idx` | Index of element to return. 11 | 12 | Returns all elements with the specified id if `$idx` is null, or a specific one if `$idx` is a valid index. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/getElementsByTagName.md: -------------------------------------------------------------------------------- 1 | # getElementsByTagName 2 | 3 | ```php 4 | getElementsByTagName ( string $name [, int $idx = null ] ) : mixed 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `name` | Tag name. 10 | | `idx` | Index of the element to return. 11 | 12 | Returns all elements with the specified tag name if `$idx` is null, or a specific one if `$idx` is a valid index. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/get_display_size.md: -------------------------------------------------------------------------------- 1 | # get_display_size 2 | 3 | ```php 4 | get_display_size ( ) : mixed 5 | ``` 6 | 7 | Returns false if the current node is not an image. 8 | 9 | Returns an associative array of two elements - `height` and `width` - that represent the display size of the image. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/hasAttribute.md: -------------------------------------------------------------------------------- 1 | # hasAttribute 2 | 3 | ```php 4 | hasAttribute ( string $name ) : bool 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `name` | Name of the attribute. 10 | 11 | Returns true if the current node has an attribute with the specified name. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/hasChildNodes.md: -------------------------------------------------------------------------------- 1 | # hasChildNodes 2 | 3 | ```php 4 | hasChildNodes ( ) : bool 5 | ``` 6 | 7 | This is a wrapper function for [`has_child`](../has_child/). -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/hasClass.md: -------------------------------------------------------------------------------- 1 | # hasClass 2 | 3 | ```php 4 | hasClass ( string $class ) : bool 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `class` | Specifies the class name to search for. 10 | 11 | Returns true if the current node has the specified class name. 12 | 13 | **Examples** 14 | 15 | ```php 16 | $node->hasClass('article'); 17 | ``` -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/has_child.md: -------------------------------------------------------------------------------- 1 | # has_child 2 | 3 | ```php 4 | has_child ( ) : bool 5 | ``` 6 | 7 | Returns true if the current node has one or more child nodes. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/innertext.md: -------------------------------------------------------------------------------- 1 | # innertext 2 | 3 | ```php 4 | innertext ( ) : string 5 | ``` 6 | 7 | Returns the inner text (everything inside the opening and closing tags) of the current node. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/is_utf8.md: -------------------------------------------------------------------------------- 1 | # is_utf8 (static) 2 | 3 | ```php 4 | is_utf8 ( string $str ) : bool 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `str` | String to test. 10 | 11 | Returns true if the provided string is a valid UTF-8 string. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/lastChild.md: -------------------------------------------------------------------------------- 1 | # lastChild 2 | 3 | ```php 4 | lastChild ( ) : object 5 | ``` 6 | 7 | This is a wrapper for [`last_child`](../last_child/). -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/last_child.md: -------------------------------------------------------------------------------- 1 | # last_child 2 | 3 | ```php 4 | last_child ( ) : object 5 | ``` 6 | 7 | Returns the last child of the current node or null if the current node has no child elements. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/makeup.md: -------------------------------------------------------------------------------- 1 | # makeup 2 | 3 | ```php 4 | makeup ( ) : string 5 | ``` 6 | 7 | Returns the HTML representation of the current node. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/match.md: -------------------------------------------------------------------------------- 1 | # match (protected) 2 | 3 | ```php 4 | match ( 5 | string $exp 6 | , string $pattern 7 | , string $value 8 | , string $case_sensitivity 9 | ) : bool 10 | ``` 11 | 12 | | Parameter | Description 13 | | --------- | ----------- 14 | | `exp` | Expression 15 | | `pattern` | Pattern 16 | | `value` | Value 17 | | `case_sensitivity` | Case sensitivity 18 | 19 | Matches a single attribute value against the specified attribute selector. See also [`find`](../find/). -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/nextSibling.md: -------------------------------------------------------------------------------- 1 | # nextSibling 2 | 3 | ```php 4 | nextSibling ( ) : object 5 | ``` 6 | 7 | This is a wrapper for [`next_sibling`](../next_sibling/). -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/next_sibling.md: -------------------------------------------------------------------------------- 1 | # next_sibling 2 | 3 | ```php 4 | next_sibling ( ) : object 5 | ``` 6 | 7 | Returns the next sibling of the current node or null if the current node has no next sibling. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/nodeName.md: -------------------------------------------------------------------------------- 1 | # nodeName 2 | 3 | ```php 4 | nodeName ( ) : string 5 | ``` 6 | 7 | Returns the name of the current node (tag name). -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/outertext.md: -------------------------------------------------------------------------------- 1 | # outertext 2 | 3 | ```php 4 | outertext ( ) : string 5 | ``` 6 | 7 | Returns the outer text (everything including the opening and closing tags) of the current node. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/parent.md: -------------------------------------------------------------------------------- 1 | # parent 2 | 3 | ```php 4 | parent ( [ object $parent = null ] ) : object 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `parent` | The parent node 10 | 11 | * Returns the parent node of the current node if `$parent` is null. 12 | * Sets the parent node of the current node if `$parent` is not null. In this case the current node is automatically added to the list of nodes in the parent node. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/parentNode.md: -------------------------------------------------------------------------------- 1 | # parentNode 2 | 3 | ```php 4 | parentNode () : object 5 | ``` 6 | 7 | Returns the current's node parent. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/parse_selector.md: -------------------------------------------------------------------------------- 1 | # parse_selector (protected) 2 | 3 | ```php 4 | parse_selector ( string $selector_string ) : array 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `selector_string` | The selector string 10 | 11 | Parses a CSS selector into an internal format for further use. See also [`find`](../find/). -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/prevSibling.md: -------------------------------------------------------------------------------- 1 | # prevSibling 2 | 3 | ```php 4 | prevSibling ( ) : object 5 | ``` 6 | 7 | This is a wrapper for [`previous_sibling`](../previous_sibling/). -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/prev_sibling.md: -------------------------------------------------------------------------------- 1 | # prev_sibling 2 | 3 | ```php 4 | prev_sibling ( ) : object 5 | ``` 6 | 7 | Returns the previous sibling of the current node, or null if the current node has no previous sibling. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/remove.md: -------------------------------------------------------------------------------- 1 | # remove 2 | 3 | ```php 4 | remove ( ) 5 | ``` 6 | 7 | Removes the current node recursively from the DOM. 8 | Does nothing if the node has no parent (root node); 9 | 10 | **Example** 11 | 12 | ```php 13 | $html = str_get_html(<<15 | 16 | 17 |
20 | 21 | 22 | EOD 23 | ); 24 | 25 | $table = $html->find('table', 0); 26 | $table->remove(); 27 | 28 | echo $html; 29 | 30 | /** 31 | * Returns 32 | * 33 | * 34 | */ 35 | ``` 36 | 37 | **Remarks** 38 | 39 | * Whitespace immediately **before** the removed node will remain in the DOM. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/removeAttribute.md: -------------------------------------------------------------------------------- 1 | # removeAttribute 2 | 3 | ```php 4 | removeAttribute ( string $name ) 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `name` | Name of the attribute to remove. 10 | 11 | Removes the attribute with the speicified name from the current node. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/removeChild.md: -------------------------------------------------------------------------------- 1 | # removeChild 2 | 3 | ```php 4 | removeChild ( object $node ) 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `node` | Node to remove from current element, must be a child of the current element. 10 | 11 | Removes the node recursively from the DOM. 12 | Does nothing if the provided node is not a child of the current node. 13 | 14 | **Example** 15 | 16 | ```php 17 | $html = str_get_html(<<18 | Title 19 | Row 1 19 | 20 | 21 |
24 | 25 | 26 | EOD 27 | ); 28 | 29 | $body = $html->find('body', 0); 30 | $body->removeChild($body->find('table', 0)); 31 | 32 | echo $html; 33 | 34 | /** 35 | * Returns 36 | * 37 | * 38 | */ 39 | ``` 40 | 41 | **Remarks** 42 | 43 | * Whitespace immediately **before** the removed node will remain in the DOM. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/removeClass.md: -------------------------------------------------------------------------------- 1 | # removeClass 2 | 3 | ```php 4 | removeClass ( [ mixed $class = null ] ) 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `class` | Specifies one or more class names to be removed. 10 | 11 | Removes one or more class names from the current node. 12 | 13 | **Remarks** 14 | 15 | * To remove more than one class, separate the class names with space or provide them as an array. 16 | * If no parameter is specified, this method will remove all class names from the current node. 17 | 18 | **Examples** 19 | 20 | ```php 21 | $node->removeClass('hidden'); 22 | $node->removeClass('article important'); 23 | $node->removeClass(array('article', 'new')); 24 | $node->removeClass(); 25 | ``` -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/save.md: -------------------------------------------------------------------------------- 1 | # save 2 | 3 | ```php 4 | save ( [ string $filepath = '' ] ) : string 5 | ``` 6 | 7 | Writes the current node to file. 8 | 9 | | Parameter | Description 10 | | --------- | ----------- 11 | | `filepath` | Writes to file if the provided file path is not empty. 12 | 13 | Returns the document string. 14 | 15 | **Examples** 16 | 17 | ```php 18 | $string = $node->save(); 19 | $string = $node->save($file); 20 | ``` -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/seek.md: -------------------------------------------------------------------------------- 1 | # seek (protected) 2 | 3 | ```php 4 | seek ( 5 | string $selector 6 | , array &$ret 7 | , string $parent_cmd 8 | [, bool $lowercase = false ] 9 | ) 10 | ``` 11 | 12 | | Parameter | Description 13 | | --------- | ----------- 14 | | `selector` | The current selector. 15 | | `ret` | Previous return value (starting point). 16 | | `parent_cmd` | The combinator used before the current selector. 17 | | `lowercase` | Matches tag names case insensitive (lowercase) if enabled. 18 | 19 | Starts by searching for child elements of `$ret` that match the specified selector. Adds matching elements to `$ret` (for the next iteration). -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/setAttribute.md: -------------------------------------------------------------------------------- 1 | # setAttribute 2 | 3 | ```php 4 | setAttribute ( string $name, string $value ) 5 | ``` 6 | 7 | | Parameter | Description 8 | | --------- | ----------- 9 | | `name` | Attribute name 10 | | `value` | Attribute value 11 | 12 | Adds or sets an attribute in the current node to the specified value. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/simple_html_dom_node.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: simple_html_dom_node 3 | --- 4 | 5 | # simple_html_dom_node 6 | 7 | Represents a single node in the DOM tree (see [`simple_html_dom`](../../simple_html_dom/simple_html_dom/)). 8 | 9 | # Public Properties 10 | 11 | | Property | Description 12 | | -------- | ----------- 13 | | `_` | Node meta data (i.e. type of node). 14 | | `attr` | List of attributes. 15 | | `children` | List of child nodes. 16 | | `nodes` | List of nodes. 17 | | `nodetype` | Node type. 18 | | `parent` | Parent node object. 19 | | `tag` | Node's tag name. 20 | | `tag_start` | Start position of the tag name in the original document. 21 | 22 | # Protected Properties 23 | 24 | None. 25 | 26 | # Private Properties 27 | 28 | | Property | Description 29 | | -------- | ----------- 30 | | `dom` | The DOM object (see [`simple_html_dom`](../../simple_html_dom/simple_html_dom/)). -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/text.md: -------------------------------------------------------------------------------- 1 | # text 2 | 3 | ```php 4 | text ( ) : string 5 | ``` 6 | 7 | Returns the (HTML) text representation for the current node recursively. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/simple_html_dom_node/xmltext.md: -------------------------------------------------------------------------------- 1 | # xmltext 2 | 3 | ```php 4 | xmltext ( ) : string 5 | ``` 6 | 7 | Returns the xml representation for the inner text of the current node as a CDATA section. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/api/str_get_html.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: str_get_html 3 | --- 4 | 5 | # str_get_html 6 | 7 | ```php 8 | str_get_html ( string $str [, bool $lowercase = true [, bool $forceTagsClosed = true [, string $target_charset = DEFAULT_TARGET_CHARSET [, bool $stripRN = true [, string $defaultBRText = DEFAULT_BR_TEXT [, string $defaultSpanText = DEFAULT_SPAN_TEXT ]]]]]] ) 9 | ``` 10 | 11 | Parses the provided string and returns the DOM object. 12 | 13 | | Parameter | Description 14 | | --------- | ----------- 15 | | `str` | The HTML document string. 16 | | `lowercase` | Forces lowercase matching of tags if enabled. This is very useful when loading documents with mixed naming conventions. 17 | | `forceTagsClosed` | Obsolete. This parameter is no longer used by the parser. 18 | | `target_charset` | Defines the target charset when returning text from the document. 19 | | `stripRN` | If enabled, removes newlines before parsing the document. 20 | | `defaultBRText` | Defines the default text to return for `22 | Title 23 | Row 1
` elements. 21 | | `defaultSpanText` | Defines the default text to return for `` elements. 22 | -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/faq.md: -------------------------------------------------------------------------------- 1 | # FAQ 2 | 3 | ## Problem with finding 4 | 5 | Q: Element not found in such case: `$html->find('div[style=padding: 0px 2px;] span[class=rf]');` 6 | 7 | A: If there is blank in selectors, quote it! 8 | $html->find('div[style="padding: 0px 2px;"] span[class=rf]'); 9 | 10 | ## Problem with hosting 11 | 12 | Q: On my local server everything works fine, but when I put it on my esternal server it doesn't work. 13 | 14 | A: The "file_get_dom" function is a wrapper of "file_get_contents" function, you must set "allow_url_fopen" as TRUE in "php.ini" to allow accessing files via HTTP or FTP. However, some hosting venders disabled PHP's "allow_url_fopen" flag for security issues... PHP provides excellent support for "curl" library to do the same job, Use curl to get the page, then call "str_get_dom" to create DOM object. 15 | 16 | Example: 17 | 18 | $curl = curl_init(); 19 | curl_setopt($curl, CURLOPT_URL, 'http://????????'); 20 | curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); 21 | curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 10); 22 | $str = curl_exec($curl); 23 | curl_close($curl); 24 | 25 | $html= str_get_html($str); 26 | ... 27 | 28 | ## Behind a proxy 29 | 30 | Q: My server is behind a Proxy and i can't use file_get_contents b/c it returns a unauthorized error. 31 | 32 | A: Thanks for Shaggy to provide the solution: 33 | 34 | // Define a context for HTTP. 35 | $context = array 36 | ( 37 | 'http' => array 38 | ( 39 | 'proxy' => 'addresseproxy:portproxy', // This needs to be the server and the port of the NTLM Authentication Proxy Server. 40 | 'request_fulluri' => true, 41 | ), 42 | ); 43 | 44 | $context = stream_context_create($context); 45 | 46 | $html= file_get_html('http://www.php.net', false, $context); 47 | ... 48 | 49 | ## Memory leak 50 | 51 | Q: This script is leaking memory seriously... After it finished running, it's not cleaning up dom object properly from memory.. 52 | 53 | A: Due to php5 circular references memory leak, after creating DOM object, you must call $dom->clear() to free memory if call file_get_dom() more then once. 54 | 55 | Example: 56 | 57 | $html = file_get_html(...); 58 | // do something... 59 | $html->clear(); 60 | unset($html); -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/index.md: -------------------------------------------------------------------------------- 1 | # PHP Simple HTML DOM Parser 2 | 3 | A fast, simple and reliable HTML document parser for PHP. 4 | 5 | Created by S.C. Chen, based on [HTML Parser for PHP 4](http://php-html.sourceforge.net/) by Jose 6 | Solorzano. 7 | 8 | # Parse any HTML document 9 | 10 | PHP Simple HTML DOM Parser handles any HTML document, even ones that are considered 11 | invalid by the [HTML](https://www.w3.org/TR/html/) specification. 12 | 13 | # Select elements using CSS selectors 14 | 15 | PHP Simple HTML DOM Parser supports CSS style selectors to navigate the DOM, 16 | similar to [jQuery](https://jquery.com/). 17 | 18 | # Download 19 | 20 | * Download the latest version from [SourceForge](https://sourceforge.net/projects/simplehtmldom/) 21 | 22 | # Contributing 23 | 24 | * Request features on the [Feature Request Tracker](https://sourceforge.net/p/simplehtmldom/feature-requests/) 25 | * Report bugs on the [Bug Tracker](https://sourceforge.net/p/simplehtmldom/bugs/) 26 | * Get involved with the community on the [Discussions Board](https://sourceforge.net/p/simplehtmldom/discussion/) 27 | 28 | # License 29 | 30 | PHP Simple HTML DOM Parser is [Free Software](https://en.wikipedia.org/wiki/Free_software) 31 | licensed under the [MIT License](https://opensource.org/licenses/MIT). -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/manual/accessing-element-attributes.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Accessing HTML element attributes 3 | --- 4 | 5 | # Get, Set and Remove attributes 6 | 7 | ```php 8 | // Get a attribute ( If the attribute is non-value attribute (eg. checked, selected...), it will returns true or false) 9 | $value = $e->href; 10 | 11 | // Set a attribute(If the attribute is non-value attribute (eg. checked, selected...), set it's value as true or false) 12 | $e->href = 'my link'; 13 | 14 | // Remove a attribute, set it's value as null! 15 | $e->href = null; 16 | 17 | // Determine whether a attribute exist? 18 | if(isset($e->href)) 19 | echo 'href exist!'; 20 | ``` 21 | 22 | # Magic attributes 23 | 24 | ```php 25 | // Example 26 | $html = str_get_html("foo bar"); 27 | $e = $html->find("div", 0); 28 | 29 | echo $e->tag; // Returns: " div" 30 | echo $e->outertext; // Returns: "foo bar" 31 | echo $e->innertext; // Returns: " foo bar" 32 | echo $e->plaintext; // Returns: " foo bar" 33 | ``` 34 | 35 | Attribute name | Description 36 | -------------- | ----------- 37 | `$e->tag` | Read or write the **tag name** of element. 38 | `$e->outertext`| Read or write the **outer HTML text** of element. 39 | `$e->innertext`| Read or write the **inner HTML text** of element. 40 | `$e->plaintext`| Read or write the **plain text** of element. 41 | 42 | # Tips 43 | 44 | ```php 45 | // Extract contents from HTML 46 | echo $html->plaintext; 47 | 48 | // Wrap a element 49 | $e->outertext = '' . $e->outertext . ''; 50 | 51 | // Remove a element, set it's outertext as an empty string 52 | $e->outertext = ''; 53 | 54 | // Append a element 55 | $e->outertext = $e->outertext . 'foo'; 56 | 57 | // Insert a element 58 | $e->outertext = 'foo' . $e->outertext; 59 | ``` -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/manual/adding-nodes.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Adding Nodes to the DOM 3 | --- 4 | 5 | The parser allows you to add new elements to an existing document. Find below an 6 | example for that. 7 | 8 | **Remarks** 9 | 10 | - It is not possible to create nodes via `->outertext`, `->innertext`, or 11 | `->plaintext`. These properties only change the text representation of a node 12 | and will return undesired results if used incorrectly. 13 | - Use [`$html->createElement`](/api/simple_html_dom/createElement) and 14 | [`$html->createTextNode`](/api/simple_html_dom/createTextNode) to create 15 | new nodes. 16 | - Use [`$node->appendChild`](/api/simple_html_dom_node/appendChild) to add a 17 | node as child to another node. 18 | - Nodes can be combined in any order. 19 | 20 | **Example** 21 | 22 | ```php 23 | 46 | 47 | 53 | 54 | 55 |Volumes of the World's Oceans
56 | 57 | 58 | EOD; 59 | 60 | /***************************** code *******************************************/ 61 | 62 | $html = str_get_html($doc); 63 | $body = $html->find('body', 0); 64 | $table = $html->createElement('table'); 65 | 66 | // Header row 67 | $tr = $html->createElement('tr'); 68 | foreach ($header as $entry) { 69 | $th = $html->createElement('th', $entry); 70 | $tr->appendChild($th); 71 | } 72 | $table->appendChild($tr); 73 | 74 | // Table data 75 | foreach ($data as $row) { 76 | $tr = $html->createElement('tr'); 77 | foreach ($row as $entry) { 78 | 79 | // (optional) Add info to the volume column 80 | if (is_numeric($entry)) { 81 | $value = number_format($entry); 82 | $td = $html->createElement('td', $value); 83 | $td->setAttribute('volume', $entry); 84 | } else { 85 | $td = $html->createElement('td', $entry); 86 | } 87 | 88 | $tr->appendChild($td); 89 | } 90 | $table->appendChild($tr); 91 | } 92 | 93 | $body->appendChild($table); 94 | 95 | echo $html . PHP_EOL; 96 | 97 | /** 98 | * Output (beautified) 99 | * 100 | * 101 | * 102 | * 107 | * 108 | * 109 | *Volumes of the World's Oceans
110 | *111 | *
119 | * 120 | * 121 | */ 122 | ``` -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/manual/creating-dom-objects.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Creating HTML DOM objects 3 | --- 4 | 5 | ## Functional 6 | 7 | ```php 8 | // Create a DOM object from a string 9 | $html = str_get_html('Hello!'); 10 | 11 | // Create a DOM object from a URL 12 | $html = file_get_html('http://www.google.com/'); 13 | 14 | // Create a DOM object from a HTML file 15 | $html = file_get_html('test.htm'); 16 | ``` 17 | 18 | ## Object Oriented 19 | 20 | ```php 21 | // Create a DOM object 22 | $html = new simple_html_dom(); 23 | 24 | // Load HTML from a string 25 | $html->load('Hello!'); 26 | 27 | // Load HTML from a URL 28 | $html->load_file('http://www.google.com/'); 29 | 30 | // Load HTML from a HTML file 31 | $html->load_file('test.htm'); 32 | ``` -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/manual/customizing-parsing-behavior.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Customizing Parsing behavior 3 | --- 4 | 5 | ```php 6 | // Write a function with parameter "$element" 7 | function my_callback($element) { 8 | // Hide all tags 9 | if ($element->tag=='b') 10 | $element->outertext = ''; 11 | } 12 | 13 | // Register the callback function with it's function name 14 | $html->set_callback('my_callback'); 15 | 16 | // Callback function will be invoked while dumping 17 | echo $html; 18 | ``` -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/manual/finding-html-elements.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Finding HTML Elements 3 | --- 4 | 5 | # Finding elements by tag name 6 | 7 | ```php 8 | // Find all anchors, returns a array of element objects 9 | $ret = $html->find('a'); 10 | 11 | // Find all anchors and images, returns an array of element objects 12 | $ret = $html->find('a, img'); 13 | 14 | // Find (N)th anchor, returns element object or null if not found (zero based) 15 | $ret = $html->find('a', 0); 16 | 17 | // Find last anchor, returns element object or null if not found (zero based) 18 | $ret = $html->find('a', -1); 19 | ``` 20 | 21 | # Finding elements by class name or id 22 | 23 | ```php 24 | // Find all element which id=foo 25 | $ret = $html->find('#foo'); 26 | 27 | // Find all element which class=foo 28 | $ret = $html->find('.foo'); 29 | ``` 30 | 31 | # Finding elements by attribute 32 | 33 | ```php 34 | // Find all112 | * Ocean Volume (km^3) 113 | * Arctic Ocean 18,750,000 114 | * Atlantic Ocean 310,410,900 115 | * Indian Ocean 264,000,000 116 | * Pacific Ocean 660,000,000 117 | * Souce China Sea 9,880,000 118 | * Southern Ocean 71,800,000 with the id attribute 35 | $ret = $html->find('div[id]'); 36 | 37 | // Find allwhich attribute id=foo 38 | $ret = $html->find('div[id=foo]'); 39 | 40 | // Find all anchors and images with the "title" attribute 41 | $ret = $html->find('a[title], img[title]'); 42 | 43 | // Find all element has attribute id 44 | $ret = $html->find('*[id]'); 45 | ``` 46 | 47 | ## Attribute filters 48 | 49 | Supports these operators in attribute selectors: 50 | 51 | Filter | Description | 52 | -----|-----------| 53 | `[attribute]` | Matches elements that **have** the specified attribute. 54 | `[!attribute]` | Matches elements that **don't have** the specified attribute. 55 | `[attribute=value]` | Matches elements that have the specified attribute with a **certain value**. 56 | `[attribute!=value]` | Matches elements that **don't have** the specified attribute with a certain value. 57 | `[attribute^=value]` | Matches elements that have the specified attribute and it **starts** with a certain value. 58 | `[attribute$=value]` | Matches elements that have the specified attribute and it **ends** with a certain value. 59 | `[attribute*=value]` | Matches elements that have the specified attribute and it **contains** a certain value. 60 | 61 | # Finding descendants 62 | 63 | ```php 64 | // Find allin 65 | $es = $html->find('ul li'); 66 | 67 | // Find Nested
tags 68 | $es = $html->find('div div div'); 69 | 70 | // Find allin which class=hello 71 | $es = $html->find('table.hello td'); 72 | 73 | // Find all td tags with attribite align=center in table tags 74 | $es = $html->find('table td[align=center]'); 75 | ``` 76 | 77 | # Finding nested elements 78 | 79 | ```php 80 | // Find all
- in
81 | foreach($html->find('ul') as $ul) 82 | { 83 | foreach($ul->find('li') as $li) 84 | { 85 | // do something... 86 | } 87 | } 88 | 89 | // Find first
- in first
90 | $e = $html->find('ul', 0)->find('li', 0); 91 | ``` 92 | 93 | # Finding text blocks and comments 94 | 95 | ```php 96 | // Find all text blocks 97 | $es = $html->find('text'); 98 | 99 | // Find all comment () blocks 100 | $es = $html->find('comment'); 101 | ``` -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/manual/saving-dom-objects.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Saving DOM objects 3 | --- 4 | 5 | # Functional 6 | 7 | ```php 8 | // Dumps the internal DOM tree back into string 9 | $str = $html; 10 | 11 | // Print it! 12 | echo $html; 13 | ``` 14 | 15 | # Object Oriented 16 | 17 | ```php 18 | // Dumps the internal DOM tree back into string 19 | $str = $html->save(); 20 | 21 | // Dumps the internal DOM tree back into a file 22 | $html->save('result.htm'); 23 | ``` -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/manual/traversing-dom-tree.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Traversing the DOM tree 3 | --- 4 | 5 | If you are not so familiar with HTML DOM, check this [link](http://php.net/manual/en/book.dom.php) to learn more... 6 | 7 | ```php 8 | // Example 9 | echo $html->find("#div1", 0)->children(1)->children(1)->children(2)->id; 10 | // or 11 | echo $html->getElementById("div1")->childNodes(1)->childNodes(1)->childNodes(2)->getAttribute('id'); 12 | ``` 13 | 14 | You can also call methods using the **Camel naming conventions**. 15 | 16 | Method | Description 17 | ------ | ----------- 18 | `$e->children( [int $index] ) : mixed` | Returns the Nth **child object** if **index** is set, otherwise return an **array of children**. 19 | `$e->parent() : element` | Returns the **parent** of element. 20 | `$e->first_child() : element` | Returns the **first child** of element, or **null** if not found. 21 | `$e->last_child() : element` | Returns the **last child** of element, or **null** if not found. 22 | `$e->next_sibling() : element` | Returns the **next sibling** of element, or **null** if not found. 23 | `$e->prev_sibling() : element` | Returns the **previous sibling** of element, or **null** if not found. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/quick-start.md: -------------------------------------------------------------------------------- 1 | # Quick Start 2 | 3 | Find below sample code that demonstrate the fundamental features of PHP Simple 4 | HTML DOM Parser. 5 | 6 | ## Read plain text from HTML document 7 | 8 | ```php 9 | echo file_get_html('https://www.google.com/')->plaintext; 10 | ``` 11 | 12 | Loads the specified HTML **document** into memory, parses it and returns the 13 | plain text. Note that [`file_get_html`](api/api.md) supports local files as well 14 | as remote files! 15 | 16 | ## Read plaint text from HTML string 17 | 18 | ```php 19 | echo str_get_html('
')->plaintext; 20 | ``` 21 | 22 | Parses the provided HTML **string** and returns the plain text. Note that the 23 | parser handles partial documents as well as full documents. 24 | 25 | ## Read specific elements from HTML document 26 | 27 | ```php 28 | $html = file_get_html('https://www.google.com/'); 29 | 30 | foreach($html->find('img') as $element) 31 | echo $element->src . '
- Hello, World!
'; 32 | 33 | foreach($html->find('a') as $element) 34 | echo $element->href . '
'; 35 | ``` 36 | 37 | Loads the specified document into memory and returns a list of image sources as 38 | well as anchor links. Note that [`find`](manual/finding-html-elements.md) 39 | supports [CSS](https://www.w3.org/TR/selectors/) selectors to find elements in 40 | the DOM. 41 | 42 | ## Modify HTML documents 43 | 44 | ```php 45 | $doc = 'Hello,World!'; 46 | 47 | $html = str_get_html($doc); 48 | 49 | $html->find('div', 1)->class = 'bar'; 50 | $html->find('div[id=hello]', 0)->innertext = 'foo'; 51 | 52 | echo $html; //foo53 | ``` 54 | 55 | Parses the provided HTML string and replaces elements in the DOM before returning 56 | the updated HTML string. In this example, the class for the second `div` element 57 | is set to `bar` and the inner text for the first `div` element to `foo`. 58 | 59 | Note that [`find`](manual/finding-html-elements.md) supports a second parameter 60 | to return a single element from the array of matches. 61 | 62 | Note that attributes can be accessed directly by the means of magic methods 63 | (`->class` and `->innertext` in the example above). 64 | 65 | ## Collect information from Slashdot 66 | 67 | ```php 68 | $html = file_get_html('https://slashdot.org/'); 69 | 70 | $articles = $html->find('article[data-fhtype="story"]'); 71 | 72 | foreach($articles as $article) { 73 | $item['title'] = $article->find('.story-title', 0)->plaintext; 74 | $item['intro'] = $article->find('.p', 0)->plaintext; 75 | $item['details'] = $article->find('.details', 0)->plaintext; 76 | $items[] = $item; 77 | } 78 | 79 | print_r($items); 80 | ``` 81 | 82 | Collects information from [Slashdot](https://slashdot.org/) for further processing. 83 | 84 | Note that the combination of CSS selectors and magic methods make the process of 85 | parsing HTML documents a simple task that is easy to understand. -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/docs/requirements.md: -------------------------------------------------------------------------------- 1 | | Requirement | Minimum | Recommended | 2 | | ----------- |:-------:|:-----------: | 3 | | PHP Version | 5.6.0 | [Latest stable release](https://www.php.net/supported-versions.php) | 4 | | PHP Extensions | [iconv](https://www.php.net/manual/en/book.iconv.php) | [iconv](https://www.php.net/manual/en/book.iconv.php),
[mbstring](https://www.php.net/manual/en/book.mbstring.php) 5 | | PHP INI Settings | --- | [allow_url_fopen = 1](https://www.php.net/manual/en/filesystem.configuration.php#ini.allow-url-fopen) ** 6 | 7 | ** This makes it possible to load files from URL using [`file_get_html`](/api/file_get_html/) -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/extra.css: -------------------------------------------------------------------------------- 1 | code { 2 | white-space: nowrap; 3 | } 4 | 5 | .logo { 6 | display: block; 7 | margin: auto; 8 | text-align: center; 9 | } -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Simple HTML DOM documentation 2 | site_url: http://simplehtmldom.sourceforge.net/ 3 | site_description: A simple HTML DOM parser written in PHP 4 | 5 | repo_name: SourceForge 6 | repo_url: https://sourceforge.net/projects/simplehtmldom/ 7 | 8 | theme: 9 | name: readthedocs 10 | custom_dir: 'custom_theme/' 11 | 12 | google_analytics: ['UA-3452027-2', 'simplehtmldom.sourceforge.net'] 13 | 14 | extra_css: [extra.css] 15 | 16 | nav: 17 | - 'index.md' 18 | - 'requirements.md' 19 | - 'quick-start.md' 20 | - 'faq.md' 21 | - Manual: 22 | - 'manual/creating-dom-objects.md' 23 | - 'manual/finding-html-elements.md' 24 | - 'manual/accessing-element-attributes.md' 25 | - 'manual/traversing-dom-tree.md' 26 | - 'manual/saving-dom-objects.md' 27 | - 'manual/customizing-parsing-behavior.md' 28 | - 'manual/adding-nodes.md' 29 | - API: 30 | - 'api/api.md' 31 | - 'api/constants.md' 32 | - 'api/definitions.md' 33 | - 'api/str_get_html.md' 34 | - 'api/file_get_html.md' 35 | - simple_html_dom: 36 | - 'api/simple_html_dom/simple_html_dom.md' 37 | - 'api/simple_html_dom/__construct.md' 38 | - 'api/simple_html_dom/__destruct.md' 39 | - 'api/simple_html_dom/load.md' 40 | - 'api/simple_html_dom/load_file.md' 41 | - 'api/simple_html_dom/set_callback.md' 42 | - 'api/simple_html_dom/remove_callback.md' 43 | - 'api/simple_html_dom/save.md' 44 | - 'api/simple_html_dom/find.md' 45 | - 'api/simple_html_dom/clear.md' 46 | - 'api/simple_html_dom/dump.md' 47 | - 'api/simple_html_dom/prepare.md' 48 | - 'api/simple_html_dom/parse.md' 49 | - 'api/simple_html_dom/parse_charset.md' 50 | - 'api/simple_html_dom/read_tag.md' 51 | - 'api/simple_html_dom/parse_attr.md' 52 | - 'api/simple_html_dom/link_nodes.md' 53 | - 'api/simple_html_dom/as_text_node.md' 54 | - 'api/simple_html_dom/skip.md' 55 | - 'api/simple_html_dom/copy_skip.md' 56 | - 'api/simple_html_dom/copy_until.md' 57 | - 'api/simple_html_dom/copy_until_char.md' 58 | - 'api/simple_html_dom/remove_noise.md' 59 | - 'api/simple_html_dom/restore_noise.md' 60 | - 'api/simple_html_dom/search_noise.md' 61 | - 'api/simple_html_dom/__toString.md' 62 | - 'api/simple_html_dom/__get.md' 63 | - 'api/simple_html_dom/childNodes.md' 64 | - 'api/simple_html_dom/firstChild.md' 65 | - 'api/simple_html_dom/lastChild.md' 66 | - 'api/simple_html_dom/createElement.md' 67 | - 'api/simple_html_dom/createTextNode.md' 68 | - 'api/simple_html_dom/getElementById.md' 69 | - 'api/simple_html_dom/getElementsById.md' 70 | - 'api/simple_html_dom/getElementByTagName.md' 71 | - 'api/simple_html_dom/getElementsByTagName.md' 72 | - 'api/simple_html_dom/loadFile.md' 73 | - simple_html_dom_node: 74 | - 'api/simple_html_dom_node/simple_html_dom_node.md' 75 | - 'api/simple_html_dom_node/__construct.md' 76 | - 'api/simple_html_dom_node/__destruct.md' 77 | - 'api/simple_html_dom_node/__get.md' 78 | - 'api/simple_html_dom_node/__isset.md' 79 | - 'api/simple_html_dom_node/__set.md' 80 | - 'api/simple_html_dom_node/__toString.md' 81 | - 'api/simple_html_dom_node/__unset.md' 82 | - 'api/simple_html_dom_node/addClass.md' 83 | - 'api/simple_html_dom_node/appendChild.md' 84 | - 'api/simple_html_dom_node/childNodes.md' 85 | - 'api/simple_html_dom_node/children.md' 86 | - 'api/simple_html_dom_node/clear.md' 87 | - 'api/simple_html_dom_node/convert_text.md' 88 | - 'api/simple_html_dom_node/dump.md' 89 | - 'api/simple_html_dom_node/dump_node.md' 90 | - 'api/simple_html_dom_node/find.md' 91 | - 'api/simple_html_dom_node/find_ancestor_tag.md' 92 | - 'api/simple_html_dom_node/first_child.md' 93 | - 'api/simple_html_dom_node/firstChild.md' 94 | - 'api/simple_html_dom_node/get_display_size.md' 95 | - 'api/simple_html_dom_node/getAllAttributes.md' 96 | - 'api/simple_html_dom_node/getAttribute.md' 97 | - 'api/simple_html_dom_node/getElementById.md' 98 | - 'api/simple_html_dom_node/getElementByTagName.md' 99 | - 'api/simple_html_dom_node/getElementsById.md' 100 | - 'api/simple_html_dom_node/getElementsByTagName.md' 101 | - 'api/simple_html_dom_node/has_child.md' 102 | - 'api/simple_html_dom_node/hasAttribute.md' 103 | - 'api/simple_html_dom_node/hasChildNodes.md' 104 | - 'api/simple_html_dom_node/hasClass.md' 105 | - 'api/simple_html_dom_node/innertext.md' 106 | - 'api/simple_html_dom_node/is_utf8.md' 107 | - 'api/simple_html_dom_node/last_child.md' 108 | - 'api/simple_html_dom_node/lastChild.md' 109 | - 'api/simple_html_dom_node/makeup.md' 110 | - 'api/simple_html_dom_node/match.md' 111 | - 'api/simple_html_dom_node/next_sibling.md' 112 | - 'api/simple_html_dom_node/nextSibling.md' 113 | - 'api/simple_html_dom_node/nodeName.md' 114 | - 'api/simple_html_dom_node/outertext.md' 115 | - 'api/simple_html_dom_node/parent.md' 116 | - 'api/simple_html_dom_node/parentNode.md' 117 | - 'api/simple_html_dom_node/parse_selector.md' 118 | - 'api/simple_html_dom_node/prev_sibling.md' 119 | - 'api/simple_html_dom_node/prevSibling.md' 120 | - 'api/simple_html_dom_node/remove.md' 121 | - 'api/simple_html_dom_node/removeAttribute.md' 122 | - 'api/simple_html_dom_node/removeChild.md' 123 | - 'api/simple_html_dom_node/removeClass.md' 124 | - 'api/simple_html_dom_node/save.md' 125 | - 'api/simple_html_dom_node/seek.md' 126 | - 'api/simple_html_dom_node/setAttribute.md' 127 | - 'api/simple_html_dom_node/text.md' 128 | - 'api/simple_html_dom_node/xmltext.md' 129 | 130 | docs_dir: 'docs' -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/manual/site/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bacdong/PHP_Web-crawler/c94118d752a6a39026f3a542ec00c1315059a745/data_vnexpress/useLibs/SimpleHTMLDom/manual/site/.gitkeep -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/phpcompatibility.xml: -------------------------------------------------------------------------------- 1 | 2 |3 | 12 | -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/phpcs.xml: -------------------------------------------------------------------------------- 1 | 2 |Defines rules for PHPCompatibility 4 |./app 5 |./example 6 |./manual 7 |./testcase 8 |./tests 9 |10 | 11 | 3 | 49 | -------------------------------------------------------------------------------- /data_vnexpress/useLibs/SimpleHTMLDom/simplehtmldom_1_9_1.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bacdong/PHP_Web-crawler/c94118d752a6a39026f3a542ec00c1315059a745/data_vnexpress/useLibs/SimpleHTMLDom/simplehtmldom_1_9_1.zip -------------------------------------------------------------------------------- /data_vnexpress/useLibs/connection.php: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data_vnexpress/useLibs/content.php: -------------------------------------------------------------------------------- 1 | load($html ->save()); 11 | 12 | // get latest news 13 | $items = $html->find('article.item-news.full-thumb.article-topstory', 0); 14 | $linkItems = $items->find('.thumb-art a', 0)->href; 15 | $thumbnail = $items->find('img', 0)->src; 16 | $title = $items->find('h3', 0)->plaintext; 17 | $description = $items->find('p.description a', 0)->plaintext; 18 | // $created_at = $items->find('p.meta-news span.time-public span.time-ago', 0)->datetime; 19 | 20 | 21 | // get news detail 22 | $urlPost = $linkItems.'/'; 23 | $htmlDetail = file_get_html($urlPost); 24 | $htmlDetail ->load($htmlDetail ->save()); 25 | $content = $htmlDetail->find('p'); 26 | $string = ""; 27 | foreach ($content as $item) { 28 | $string .= $item->plaintext.'Created with the PHP Coding Standard Generator. http://edorian.github.com/php-coding-standard-generator/ 4 |./app 5 |./example 6 |./manual 7 |./testcase 8 |9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 30 |26 | 29 |27 | 28 | 31 | 35 |32 | 34 |33 | 36 | 41 |37 | 40 |38 | 39 | 42 | 43 | 44 | 46 |45 | 47 | 48 |
'.'
'; 29 | } 30 | 31 | $string = str_replace('"', "", $string); 32 | $string = str_replace(';', ",", $string); 33 | $qry = "SELECT * FROM latestnews"; 34 | $rs = $connection->query($qry)->fetch_assoc(); 35 | $id = $rs['id']; 36 | 37 | $queryContent = 'INSERT INTO news_detail(content, news_id) VALUES("'.$string.'", "'.$id.'")'; 38 | // die($queryContent); 39 | $rsQryContent = $connection->query($queryContent); 40 | if ($rsQryContent) { 41 | echo 'Success'; 42 | } else { 43 | echo 'Failed'; 44 | } 45 | die; 46 | $query = 'INSERT INTO latestnews(title, thumbnail, content, created_at) VALUES("'.$title.'", "'.$thumbnail.'", "'.$description.'", "'.$created_at.'")'; 47 | ?> -------------------------------------------------------------------------------- /data_vnexpress/useLibs/library/Curl/ArrayUtil.php: -------------------------------------------------------------------------------- 1 | isArrayAssoc($array); 32 | } 33 | 34 | /** 35 | * Is Array Multidim 36 | * 37 | * @access public 38 | * @param $array 39 | * 40 | * @return boolean 41 | */ 42 | public static function isArrayMultidim($array) 43 | { 44 | if (!is_array($array)) { 45 | return false; 46 | } 47 | 48 | return (bool)count(array_filter($array, 'is_array')); 49 | } 50 | 51 | /** 52 | * Is Array Multidim 53 | * 54 | * @deprecated Use ArrayUtil::isArrayMultidim(). 55 | * @access public 56 | * @param $array 57 | * 58 | * @return boolean 59 | */ 60 | public static function is_array_multidim($array) 61 | { 62 | return $this->isArrayMultidim($array); 63 | } 64 | 65 | /** 66 | * Array Flatten Multidim 67 | * 68 | * @access public 69 | * @param $array 70 | * @param $prefix 71 | * 72 | * @return array 73 | */ 74 | public static function arrayFlattenMultidim($array, $prefix = false) 75 | { 76 | $return = array(); 77 | if (is_array($array) || is_object($array)) { 78 | if (empty($array)) { 79 | $return[$prefix] = ''; 80 | } else { 81 | foreach ($array as $key => $value) { 82 | if (is_scalar($value)) { 83 | if ($prefix) { 84 | $return[$prefix . '[' . $key . ']'] = $value; 85 | } else { 86 | $return[$key] = $value; 87 | } 88 | } else { 89 | if ($value instanceof \CURLFile) { 90 | $return[$key] = $value; 91 | } else { 92 | $return = array_merge( 93 | $return, 94 | self::arrayFlattenMultidim( 95 | $value, 96 | $prefix ? $prefix . '[' . $key . ']' : $key 97 | ) 98 | ); 99 | } 100 | } 101 | } 102 | } 103 | } elseif ($array === null) { 104 | $return[$prefix] = $array; 105 | } 106 | return $return; 107 | } 108 | 109 | /** 110 | * Array Flatten Multidim 111 | * 112 | * @deprecated Use ArrayUtil::arrayFlattenMultidim(). 113 | * @access public 114 | * @param $array 115 | * @param $prefix 116 | * 117 | * @return array 118 | */ 119 | public static function array_flatten_multidim($array, $prefix = false) 120 | { 121 | return $this->arrayFlattenMultidim($array, $prefix); 122 | } 123 | 124 | /** 125 | * Array Random 126 | * 127 | * @access public 128 | * @param $array 129 | * 130 | * @return mixed 131 | */ 132 | public static function arrayRandom($array) 133 | { 134 | return $array[mt_rand(0, count($array) - 1)]; 135 | } 136 | 137 | /** 138 | * Array Random 139 | * 140 | * @deprecated Use ArrayUtil::arrayRandom(). 141 | * @access public 142 | * @param $array 143 | * 144 | * @return mixed 145 | */ 146 | public static function array_random($array) 147 | { 148 | return $this->arrayRandom($array); 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /data_vnexpress/useLibs/library/Curl/CaseInsensitiveArray.php: -------------------------------------------------------------------------------- 1 | $value) { 46 | $this->offsetSet($key, $value); 47 | } 48 | } 49 | } 50 | 51 | /** 52 | * Offset Set 53 | * 54 | * Set data at a specified offset. Converts the offset to lowercase, and 55 | * stores the case-sensitive offset and the data at the lowercase indexes in 56 | * $this->keys and @this->data. 57 | * 58 | * @see https://secure.php.net/manual/en/arrayaccess.offsetset.php 59 | * 60 | * @param string $offset The offset to store the data at (case-insensitive). 61 | * @param mixed $value The data to store at the specified offset. 62 | * 63 | * @return void 64 | * 65 | * @access public 66 | */ 67 | public function offsetSet($offset, $value) 68 | { 69 | if ($offset === null) { 70 | $this->data[] = $value; 71 | } else { 72 | $offsetlower = strtolower($offset); 73 | $this->data[$offsetlower] = $value; 74 | $this->keys[$offsetlower] = $offset; 75 | } 76 | } 77 | 78 | /** 79 | * Offset Exists 80 | * 81 | * Checks if the offset exists in data storage. The index is looked up with 82 | * the lowercase version of the provided offset. 83 | * 84 | * @see https://secure.php.net/manual/en/arrayaccess.offsetexists.php 85 | * 86 | * @param string $offset Offset to check 87 | * 88 | * @return bool If the offset exists. 89 | * 90 | * @access public 91 | */ 92 | public function offsetExists($offset) 93 | { 94 | return (bool) array_key_exists(strtolower($offset), $this->data); 95 | } 96 | 97 | /** 98 | * Offset Unset 99 | * 100 | * Unsets the specified offset. Converts the provided offset to lowercase, 101 | * and unsets the case-sensitive key, as well as the stored data. 102 | * 103 | * @see https://secure.php.net/manual/en/arrayaccess.offsetunset.php 104 | * 105 | * @param string $offset The offset to unset. 106 | * 107 | * @return void 108 | * 109 | * @access public 110 | */ 111 | public function offsetUnset($offset) 112 | { 113 | $offsetlower = strtolower($offset); 114 | unset($this->data[$offsetlower]); 115 | unset($this->keys[$offsetlower]); 116 | } 117 | 118 | /** 119 | * Offset Get 120 | * 121 | * Return the stored data at the provided offset. The offset is converted to 122 | * lowercase and the lookup is done on the data store directly. 123 | * 124 | * @see https://secure.php.net/manual/en/arrayaccess.offsetget.php 125 | * 126 | * @param string $offset Offset to lookup. 127 | * 128 | * @return mixed The data stored at the offset. 129 | * 130 | * @access public 131 | */ 132 | public function offsetGet($offset) 133 | { 134 | $offsetlower = strtolower($offset); 135 | return isset($this->data[$offsetlower]) ? $this->data[$offsetlower] : null; 136 | } 137 | 138 | /** 139 | * Count 140 | * 141 | * @see https://secure.php.net/manual/en/countable.count.php 142 | * 143 | * @param void 144 | * 145 | * @return int The number of elements stored in the array. 146 | * 147 | * @access public 148 | */ 149 | public function count() 150 | { 151 | return (int) count($this->data); 152 | } 153 | 154 | /** 155 | * Current 156 | * 157 | * @see https://secure.php.net/manual/en/iterator.current.php 158 | * 159 | * @param void 160 | * 161 | * @return mixed Data at the current position. 162 | * 163 | * @access public 164 | */ 165 | public function current() 166 | { 167 | return current($this->data); 168 | } 169 | 170 | /** 171 | * Next 172 | * 173 | * @see https://secure.php.net/manual/en/iterator.next.php 174 | * 175 | * @param void 176 | * 177 | * @return void 178 | * 179 | * @access public 180 | */ 181 | public function next() 182 | { 183 | next($this->data); 184 | } 185 | 186 | /** 187 | * Key 188 | * 189 | * @see https://secure.php.net/manual/en/iterator.key.php 190 | * 191 | * @param void 192 | * 193 | * @return mixed Case-sensitive key at current position. 194 | * 195 | * @access public 196 | */ 197 | public function key() 198 | { 199 | $key = key($this->data); 200 | return isset($this->keys[$key]) ? $this->keys[$key] : $key; 201 | } 202 | 203 | /** 204 | * Valid 205 | * 206 | * @see https://secure.php.net/manual/en/iterator.valid.php 207 | * 208 | * @return bool If the current position is valid. 209 | * 210 | * @access public 211 | */ 212 | public function valid() 213 | { 214 | return (bool) (key($this->data) !== null); 215 | } 216 | 217 | /** 218 | * Rewind 219 | * 220 | * @see https://secure.php.net/manual/en/iterator.rewind.php 221 | * 222 | * @param void 223 | * 224 | * @return void 225 | * 226 | * @access public 227 | */ 228 | public function rewind() 229 | { 230 | reset($this->data); 231 | } 232 | } 233 | -------------------------------------------------------------------------------- /data_vnexpress/useLibs/library/Curl/Decoder.php: -------------------------------------------------------------------------------- 1 | multiCurl = curl_multi_init(); 52 | $this->headers = new CaseInsensitiveArray(); 53 | $this->setUrl($base_url); 54 | } 55 | 56 | /** 57 | * Add Delete 58 | * 59 | * @access public 60 | * @param $url 61 | * @param $query_parameters 62 | * @param $data 63 | * 64 | * @return object 65 | */ 66 | public function addDelete($url, $query_parameters = array(), $data = array()) 67 | { 68 | if (is_array($url)) { 69 | $data = $query_parameters; 70 | $query_parameters = $url; 71 | $url = $this->baseUrl; 72 | } 73 | $curl = new Curl(); 74 | $this->queueHandle($curl); 75 | $curl->setUrl($url, $query_parameters); 76 | $curl->setOpt(CURLOPT_CUSTOMREQUEST, 'DELETE'); 77 | $curl->setOpt(CURLOPT_POSTFIELDS, $curl->buildPostData($data)); 78 | return $curl; 79 | } 80 | 81 | /** 82 | * Add Download 83 | * 84 | * @access public 85 | * @param $url 86 | * @param $mixed_filename 87 | * 88 | * @return object 89 | */ 90 | public function addDownload($url, $mixed_filename) 91 | { 92 | $curl = new Curl(); 93 | $this->queueHandle($curl); 94 | $curl->setUrl($url); 95 | 96 | // Use tmpfile() or php://temp to avoid "Too many open files" error. 97 | if (is_callable($mixed_filename)) { 98 | $curl->downloadCompleteCallback = $mixed_filename; 99 | $curl->downloadFileName = null; 100 | $curl->fileHandle = tmpfile(); 101 | } else { 102 | $filename = $mixed_filename; 103 | 104 | // Use a temporary file when downloading. Not using a temporary file can cause an error when an existing 105 | // file has already fully completed downloading and a new download is started with the same destination save 106 | // path. The download request will include header "Range: bytes=$filesize-" which is syntactically valid, 107 | // but unsatisfiable. 108 | $download_filename = $filename . '.pccdownload'; 109 | $this->downloadFileName = $download_filename; 110 | 111 | // Attempt to resume download only when a temporary download file exists and is not empty. 112 | if (is_file($download_filename) && $filesize = filesize($download_filename)) { 113 | $first_byte_position = $filesize; 114 | $range = $first_byte_position . '-'; 115 | $curl->setRange($range); 116 | $curl->fileHandle = fopen($download_filename, 'ab'); 117 | 118 | // Move the downloaded temporary file to the destination save path. 119 | $curl->downloadCompleteCallback = function ($instance, $fh) use ($download_filename, $filename) { 120 | // Close the open file handle before renaming the file. 121 | if (is_resource($fh)) { 122 | fclose($fh); 123 | } 124 | 125 | rename($download_filename, $filename); 126 | }; 127 | } else { 128 | $curl->fileHandle = fopen('php://temp', 'wb'); 129 | $curl->downloadCompleteCallback = function ($instance, $fh) use ($filename) { 130 | file_put_contents($filename, stream_get_contents($fh)); 131 | }; 132 | } 133 | } 134 | 135 | $curl->setFile($curl->fileHandle); 136 | $curl->setOpt(CURLOPT_CUSTOMREQUEST, 'GET'); 137 | $curl->setOpt(CURLOPT_HTTPGET, true); 138 | return $curl; 139 | } 140 | 141 | /** 142 | * Add Get 143 | * 144 | * @access public 145 | * @param $url 146 | * @param $data 147 | * 148 | * @return object 149 | */ 150 | public function addGet($url, $data = array()) 151 | { 152 | if (is_array($url)) { 153 | $data = $url; 154 | $url = $this->baseUrl; 155 | } 156 | $curl = new Curl(); 157 | $this->queueHandle($curl); 158 | $curl->setUrl($url, $data); 159 | $curl->setOpt(CURLOPT_CUSTOMREQUEST, 'GET'); 160 | $curl->setOpt(CURLOPT_HTTPGET, true); 161 | return $curl; 162 | } 163 | 164 | /** 165 | * Add Head 166 | * 167 | * @access public 168 | * @param $url 169 | * @param $data 170 | * 171 | * @return object 172 | */ 173 | public function addHead($url, $data = array()) 174 | { 175 | if (is_array($url)) { 176 | $data = $url; 177 | $url = $this->baseUrl; 178 | } 179 | $curl = new Curl(); 180 | $this->queueHandle($curl); 181 | $curl->setUrl($url, $data); 182 | $curl->setOpt(CURLOPT_CUSTOMREQUEST, 'HEAD'); 183 | $curl->setOpt(CURLOPT_NOBODY, true); 184 | return $curl; 185 | } 186 | 187 | /** 188 | * Add Options 189 | * 190 | * @access public 191 | * @param $url 192 | * @param $data 193 | * 194 | * @return object 195 | */ 196 | public function addOptions($url, $data = array()) 197 | { 198 | if (is_array($url)) { 199 | $data = $url; 200 | $url = $this->baseUrl; 201 | } 202 | $curl = new Curl(); 203 | $this->queueHandle($curl); 204 | $curl->setUrl($url, $data); 205 | $curl->removeHeader('Content-Length'); 206 | $curl->setOpt(CURLOPT_CUSTOMREQUEST, 'OPTIONS'); 207 | return $curl; 208 | } 209 | 210 | /** 211 | * Add Patch 212 | * 213 | * @access public 214 | * @param $url 215 | * @param $data 216 | * 217 | * @return object 218 | */ 219 | public function addPatch($url, $data = array()) 220 | { 221 | if (is_array($url)) { 222 | $data = $url; 223 | $url = $this->baseUrl; 224 | } 225 | 226 | $curl = new Curl(); 227 | 228 | if (is_array($data) && empty($data)) { 229 | $curl->removeHeader('Content-Length'); 230 | } 231 | 232 | $this->queueHandle($curl); 233 | $curl->setUrl($url); 234 | $curl->setOpt(CURLOPT_CUSTOMREQUEST, 'PATCH'); 235 | $curl->setOpt(CURLOPT_POSTFIELDS, $curl->buildPostData($data)); 236 | return $curl; 237 | } 238 | 239 | /** 240 | * Add Post 241 | * 242 | * @access public 243 | * @param $url 244 | * @param $data 245 | * @param $follow_303_with_post 246 | * If true, will cause 303 redirections to be followed using GET requests (default: false). 247 | * Note: Redirections are only followed if the CURLOPT_FOLLOWLOCATION option is set to true. 248 | * 249 | * @return object 250 | */ 251 | public function addPost($url, $data = '', $follow_303_with_post = false) 252 | { 253 | if (is_array($url)) { 254 | $follow_303_with_post = (bool)$data; 255 | $data = $url; 256 | $url = $this->baseUrl; 257 | } 258 | 259 | $curl = new Curl(); 260 | $this->queueHandle($curl); 261 | 262 | if (is_array($data) && empty($data)) { 263 | $curl->removeHeader('Content-Length'); 264 | } 265 | 266 | $curl->setUrl($url); 267 | 268 | /* 269 | * For post-redirect-get requests, the CURLOPT_CUSTOMREQUEST option must not 270 | * be set, otherwise cURL will perform POST requests for redirections. 271 | */ 272 | if (!$follow_303_with_post) { 273 | $curl->setOpt(CURLOPT_CUSTOMREQUEST, 'POST'); 274 | } 275 | 276 | $curl->setOpt(CURLOPT_POST, true); 277 | $curl->setOpt(CURLOPT_POSTFIELDS, $curl->buildPostData($data)); 278 | return $curl; 279 | } 280 | 281 | /** 282 | * Add Put 283 | * 284 | * @access public 285 | * @param $url 286 | * @param $data 287 | * 288 | * @return object 289 | */ 290 | public function addPut($url, $data = array()) 291 | { 292 | if (is_array($url)) { 293 | $data = $url; 294 | $url = $this->baseUrl; 295 | } 296 | $curl = new Curl(); 297 | $this->queueHandle($curl); 298 | $curl->setUrl($url); 299 | $curl->setOpt(CURLOPT_CUSTOMREQUEST, 'PUT'); 300 | $put_data = $curl->buildPostData($data); 301 | if (is_string($put_data)) { 302 | $curl->setHeader('Content-Length', strlen($put_data)); 303 | } 304 | $curl->setOpt(CURLOPT_POSTFIELDS, $put_data); 305 | return $curl; 306 | } 307 | 308 | /** 309 | * Add Search 310 | * 311 | * @access public 312 | * @param $url 313 | * @param $data 314 | * 315 | * @return object 316 | */ 317 | public function addSearch($url, $data = array()) 318 | { 319 | if (is_array($url)) { 320 | $data = $url; 321 | $url = $this->baseUrl; 322 | } 323 | $curl = new Curl(); 324 | $this->queueHandle($curl); 325 | $curl->setUrl($url); 326 | $curl->setOpt(CURLOPT_CUSTOMREQUEST, 'SEARCH'); 327 | $put_data = $curl->buildPostData($data); 328 | if (is_string($put_data)) { 329 | $curl->setHeader('Content-Length', strlen($put_data)); 330 | } 331 | $curl->setOpt(CURLOPT_POSTFIELDS, $put_data); 332 | return $curl; 333 | } 334 | 335 | /** 336 | * Add Curl 337 | * 338 | * Add a Curl instance to the handle queue. 339 | * 340 | * @access public 341 | * @param $curl 342 | * 343 | * @return object 344 | */ 345 | public function addCurl(Curl $curl) 346 | { 347 | $this->queueHandle($curl); 348 | return $curl; 349 | } 350 | 351 | /** 352 | * Before Send 353 | * 354 | * @access public 355 | * @param $callback 356 | */ 357 | public function beforeSend($callback) 358 | { 359 | $this->beforeSendCallback = $callback; 360 | } 361 | 362 | /** 363 | * Close 364 | * 365 | * @access public 366 | */ 367 | public function close() 368 | { 369 | foreach ($this->curls as $curl) { 370 | $curl->close(); 371 | } 372 | 373 | if (is_resource($this->multiCurl)) { 374 | curl_multi_close($this->multiCurl); 375 | } 376 | } 377 | 378 | /** 379 | * Complete 380 | * 381 | * @access public 382 | * @param $callback 383 | */ 384 | public function complete($callback) 385 | { 386 | $this->completeCallback = $callback; 387 | } 388 | 389 | /** 390 | * Error 391 | * 392 | * @access public 393 | * @param $callback 394 | */ 395 | public function error($callback) 396 | { 397 | $this->errorCallback = $callback; 398 | } 399 | 400 | /** 401 | * Get Opt 402 | * 403 | * @access public 404 | * @param $option 405 | * 406 | * @return mixed 407 | */ 408 | public function getOpt($option) 409 | { 410 | return isset($this->options[$option]) ? $this->options[$option] : null; 411 | } 412 | 413 | /** 414 | * Set Basic Authentication 415 | * 416 | * @access public 417 | * @param $username 418 | * @param $password 419 | */ 420 | public function setBasicAuthentication($username, $password = '') 421 | { 422 | $this->setOpt(CURLOPT_HTTPAUTH, CURLAUTH_BASIC); 423 | $this->setOpt(CURLOPT_USERPWD, $username . ':' . $password); 424 | } 425 | 426 | /** 427 | * Set Concurrency 428 | * 429 | * @access public 430 | * @param $concurrency 431 | */ 432 | public function setConcurrency($concurrency) 433 | { 434 | $this->concurrency = $concurrency; 435 | } 436 | 437 | /** 438 | * Set Digest Authentication 439 | * 440 | * @access public 441 | * @param $username 442 | * @param $password 443 | */ 444 | public function setDigestAuthentication($username, $password = '') 445 | { 446 | $this->setOpt(CURLOPT_HTTPAUTH, CURLAUTH_DIGEST); 447 | $this->setOpt(CURLOPT_USERPWD, $username . ':' . $password); 448 | } 449 | 450 | /** 451 | * Set Cookie 452 | * 453 | * @access public 454 | * @param $key 455 | * @param $value 456 | */ 457 | public function setCookie($key, $value) 458 | { 459 | $this->cookies[$key] = $value; 460 | } 461 | 462 | /** 463 | * Set Cookies 464 | * 465 | * @access public 466 | * @param $cookies 467 | */ 468 | public function setCookies($cookies) 469 | { 470 | foreach ($cookies as $key => $value) { 471 | $this->cookies[$key] = $value; 472 | } 473 | } 474 | 475 | /** 476 | * Set Port 477 | * 478 | * @access public 479 | * @param $port 480 | */ 481 | public function setPort($port) 482 | { 483 | $this->setOpt(CURLOPT_PORT, intval($port)); 484 | } 485 | 486 | /** 487 | * Set Connect Timeout 488 | * 489 | * @access public 490 | * @param $seconds 491 | */ 492 | public function setConnectTimeout($seconds) 493 | { 494 | $this->setOpt(CURLOPT_CONNECTTIMEOUT, $seconds); 495 | } 496 | 497 | /** 498 | * Set Cookie String 499 | * 500 | * @access public 501 | * @param $string 502 | */ 503 | public function setCookieString($string) 504 | { 505 | $this->setOpt(CURLOPT_COOKIE, $string); 506 | } 507 | 508 | /** 509 | * Set Cookie File 510 | * 511 | * @access public 512 | * @param $cookie_file 513 | */ 514 | public function setCookieFile($cookie_file) 515 | { 516 | $this->setOpt(CURLOPT_COOKIEFILE, $cookie_file); 517 | } 518 | 519 | /** 520 | * Set Cookie Jar 521 | * 522 | * @access public 523 | * @param $cookie_jar 524 | */ 525 | public function setCookieJar($cookie_jar) 526 | { 527 | $this->setOpt(CURLOPT_COOKIEJAR, $cookie_jar); 528 | } 529 | 530 | /** 531 | * Set File 532 | * 533 | * @access public 534 | * @param $file 535 | */ 536 | public function setFile($file) 537 | { 538 | $this->setOpt(CURLOPT_FILE, $file); 539 | } 540 | 541 | /** 542 | * Set Header 543 | * 544 | * Add extra header to include in the request. 545 | * 546 | * @access public 547 | * @param $key 548 | * @param $value 549 | */ 550 | public function setHeader($key, $value) 551 | { 552 | $this->headers[$key] = $value; 553 | $this->updateHeaders(); 554 | } 555 | 556 | /** 557 | * Set Headers 558 | * 559 | * Add extra headers to include in the request. 560 | * 561 | * @access public 562 | * @param $headers 563 | */ 564 | public function setHeaders($headers) 565 | { 566 | foreach ($headers as $key => $value) { 567 | $this->headers[$key] = $value; 568 | } 569 | $this->updateHeaders(); 570 | } 571 | 572 | /** 573 | * Set JSON Decoder 574 | * 575 | * @access public 576 | * @param $mixed boolean|callable 577 | */ 578 | public function setJsonDecoder($mixed) 579 | { 580 | if ($mixed === false) { 581 | $this->jsonDecoder = false; 582 | } elseif (is_callable($mixed)) { 583 | $this->jsonDecoder = $mixed; 584 | } 585 | } 586 | 587 | /** 588 | * Set XML Decoder 589 | * 590 | * @access public 591 | * @param $mixed boolean|callable 592 | */ 593 | public function setXmlDecoder($mixed) 594 | { 595 | if ($mixed === false) { 596 | $this->xmlDecoder = false; 597 | } elseif (is_callable($mixed)) { 598 | $this->xmlDecoder = $mixed; 599 | } 600 | } 601 | 602 | /** 603 | * Set Proxy 604 | * 605 | * Set an HTTP proxy to tunnel requests through. 606 | * 607 | * @access public 608 | * @param $proxy - The HTTP proxy to tunnel requests through. May include port number. 609 | * @param $port - The port number of the proxy to connect to. This port number can also be set in $proxy. 610 | * @param $username - The username to use for the connection to the proxy. 611 | * @param $password - The password to use for the connection to the proxy. 612 | */ 613 | public function setProxy($proxy, $port = null, $username = null, $password = null) 614 | { 615 | $this->setOpt(CURLOPT_PROXY, $proxy); 616 | if ($port !== null) { 617 | $this->setOpt(CURLOPT_PROXYPORT, $port); 618 | } 619 | if ($username !== null && $password !== null) { 620 | $this->setOpt(CURLOPT_PROXYUSERPWD, $username . ':' . $password); 621 | } 622 | } 623 | 624 | /** 625 | * Set Proxies 626 | * 627 | * Set proxies to tunnel requests through. When set, a random proxy will be 628 | * used for the request. 629 | * 630 | * @access public 631 | * @param $proxies array - A list of HTTP proxies to tunnel requests 632 | * through. May include port number. 633 | */ 634 | public function setProxies($proxies) 635 | { 636 | $this->proxies = $proxies; 637 | } 638 | 639 | /** 640 | * Set Proxy Auth 641 | * 642 | * Set the HTTP authentication method(s) to use for the proxy connection. 643 | * 644 | * @access public 645 | * @param $auth 646 | */ 647 | public function setProxyAuth($auth) 648 | { 649 | $this->setOpt(CURLOPT_PROXYAUTH, $auth); 650 | } 651 | 652 | /** 653 | * Set Proxy Type 654 | * 655 | * Set the proxy protocol type. 656 | * 657 | * @access public 658 | * @param $type 659 | */ 660 | public function setProxyType($type) 661 | { 662 | $this->setOpt(CURLOPT_PROXYTYPE, $type); 663 | } 664 | 665 | /** 666 | * Set Proxy Tunnel 667 | * 668 | * Set the proxy to tunnel through HTTP proxy. 669 | * 670 | * @access public 671 | * @param $tunnel boolean 672 | */ 673 | public function setProxyTunnel($tunnel = true) 674 | { 675 | $this->setOpt(CURLOPT_HTTPPROXYTUNNEL, $tunnel); 676 | } 677 | 678 | /** 679 | * Unset Proxy 680 | * 681 | * Disable use of the proxy. 682 | * 683 | * @access public 684 | */ 685 | public function unsetProxy() 686 | { 687 | $this->setOpt(CURLOPT_PROXY, null); 688 | } 689 | 690 | /** 691 | * Set Opt 692 | * 693 | * @access public 694 | * @param $option 695 | * @param $value 696 | */ 697 | public function setOpt($option, $value) 698 | { 699 | $this->options[$option] = $value; 700 | } 701 | 702 | /** 703 | * Set Opts 704 | * 705 | * @access public 706 | * @param $options 707 | */ 708 | public function setOpts($options) 709 | { 710 | foreach ($options as $option => $value) { 711 | $this->setOpt($option, $value); 712 | } 713 | } 714 | 715 | /** 716 | * Set Range 717 | * 718 | * @access public 719 | * @param $range 720 | */ 721 | public function setRange($range) 722 | { 723 | $this->setOpt(CURLOPT_RANGE, $range); 724 | } 725 | 726 | /** 727 | * Set Rate Limit 728 | * 729 | * @access public 730 | * @param $rate_limit string (e.g. "60/1m"). 731 | */ 732 | public function setRateLimit($rate_limit) 733 | { 734 | $rate_limit_pattern = 735 | '/' . // delimiter 736 | '^' . // assert start 737 | '(\d+)' . // digit(s) 738 | '\/' . // slash 739 | '(\d+)?' . // digit(s), optional 740 | '(s|m|h)' . // unit, s for seconds, m for minutes, h for hours 741 | '$' . // assert end 742 | '/' . // delimiter 743 | 'i' . // case-insensitive matches 744 | ''; 745 | if (!preg_match($rate_limit_pattern, $rate_limit, $matches)) { 746 | throw new \UnexpectedValueException( 747 | 'rate limit must be formatted as $max_requests/$interval(s|m|h) ' . 748 | '(e.g. "60/1m" for a maximum of 60 requests per 1 minute)' 749 | ); 750 | } 751 | 752 | $max_requests = (int)$matches['1']; 753 | if ($matches['2'] === '') { 754 | $interval = 1; 755 | } else { 756 | $interval = (int)$matches['2']; 757 | } 758 | $unit = strtolower($matches['3']); 759 | 760 | // Convert interval to seconds based on unit. 761 | if ($unit === 's') { 762 | $interval_seconds = $interval * 1; 763 | } elseif ($unit === 'm') { 764 | $interval_seconds = $interval * 60; 765 | } elseif ($unit === 'h') { 766 | $interval_seconds = $interval * 3600; 767 | } 768 | 769 | $this->rateLimit = $max_requests . '/' . $interval . $unit; 770 | $this->rateLimitEnabled = true; 771 | $this->maxRequests = $max_requests; 772 | $this->interval = $interval; 773 | $this->intervalSeconds = $interval_seconds; 774 | $this->unit = $unit; 775 | } 776 | 777 | /** 778 | * Set Referer 779 | * 780 | * @access public 781 | * @param $referer 782 | */ 783 | public function setReferer($referer) 784 | { 785 | $this->setReferrer($referer); 786 | } 787 | 788 | /** 789 | * Set Referrer 790 | * 791 | * @access public 792 | * @param $referrer 793 | */ 794 | public function setReferrer($referrer) 795 | { 796 | $this->setOpt(CURLOPT_REFERER, $referrer); 797 | } 798 | 799 | /** 800 | * Set Retry 801 | * 802 | * Number of retries to attempt or decider callable. 803 | * 804 | * When using a number of retries to attempt, the maximum number of attempts 805 | * for the request is $maximum_number_of_retries + 1. 806 | * 807 | * When using a callable decider, the request will be retried until the 808 | * function returns a value which evaluates to false. 809 | * 810 | * @access public 811 | * @param $mixed 812 | */ 813 | public function setRetry($mixed) 814 | { 815 | $this->retry = $mixed; 816 | } 817 | 818 | /** 819 | * Set Timeout 820 | * 821 | * @access public 822 | * @param $seconds 823 | */ 824 | public function setTimeout($seconds) 825 | { 826 | $this->setOpt(CURLOPT_TIMEOUT, $seconds); 827 | } 828 | 829 | /** 830 | * Disable Timeout 831 | * 832 | * @access public 833 | */ 834 | public function disableTimeout() 835 | { 836 | $this->setTimeout(null); 837 | } 838 | 839 | /** 840 | * Set Url 841 | * 842 | * @access public 843 | * @param $url 844 | */ 845 | public function setUrl($url) 846 | { 847 | $this->baseUrl = $url; 848 | } 849 | 850 | /** 851 | * Set User Agent 852 | * 853 | * @access public 854 | * @param $user_agent 855 | */ 856 | public function setUserAgent($user_agent) 857 | { 858 | $this->setOpt(CURLOPT_USERAGENT, $user_agent); 859 | } 860 | 861 | /** 862 | * Set Interface 863 | * 864 | * The name of the outgoing network interface to use. 865 | * This can be an interface name, an IP address or a host name. 866 | * 867 | * @access public 868 | * @param $interface 869 | */ 870 | public function setInterface($interface) 871 | { 872 | $this->setOpt(CURLOPT_INTERFACE, $interface); 873 | } 874 | 875 | /** 876 | * Start 877 | * 878 | * @access public 879 | */ 880 | public function start() 881 | { 882 | if ($this->isStarted) { 883 | return; 884 | } 885 | 886 | $this->isStarted = true; 887 | $this->currentStartTime = microtime(true); 888 | $this->currentRequestCount = 0; 889 | 890 | do { 891 | while (count($this->curls) && 892 | count($this->activeCurls) < $this->concurrency && 893 | (!$this->rateLimitEnabled || $this->hasRequestQuota()) 894 | ) { 895 | $this->initHandle(); 896 | } 897 | 898 | if ($this->rateLimitEnabled && !count($this->activeCurls) && !$this->hasRequestQuota()) { 899 | $this->waitUntilRequestQuotaAvailable(); 900 | } 901 | 902 | // Wait for activity on any curl_multi connection when curl_multi_select (libcurl) fails to correctly block. 903 | // https://bugs.php.net/bug.php?id=63411 904 | if (curl_multi_select($this->multiCurl) === -1) { 905 | usleep(100000); 906 | } 907 | 908 | curl_multi_exec($this->multiCurl, $active); 909 | 910 | while (($info_array = curl_multi_info_read($this->multiCurl)) !== false) { 911 | if ($info_array['msg'] === CURLMSG_DONE) { 912 | foreach ($this->activeCurls as $key => $curl) { 913 | if ($curl->curl === $info_array['handle']) { 914 | // Set the error code for multi handles using the "result" key in the array returned by 915 | // curl_multi_info_read(). Using curl_errno() on a multi handle will incorrectly return 0 916 | // for errors. 917 | $curl->curlErrorCode = $info_array['result']; 918 | $curl->exec($curl->curl); 919 | 920 | if ($curl->attemptRetry()) { 921 | // Remove completed handle before adding again in order to retry request. 922 | curl_multi_remove_handle($this->multiCurl, $curl->curl); 923 | 924 | $curlm_error_code = curl_multi_add_handle($this->multiCurl, $curl->curl); 925 | if ($curlm_error_code !== CURLM_OK) { 926 | throw new \ErrorException( 927 | 'cURL multi add handle error: ' . curl_multi_strerror($curlm_error_code) 928 | ); 929 | } 930 | } else { 931 | $curl->execDone(); 932 | 933 | // Remove completed instance from active curls. 934 | unset($this->activeCurls[$key]); 935 | 936 | // Remove handle of the completed instance. 937 | curl_multi_remove_handle($this->multiCurl, $curl->curl); 938 | 939 | // Clean up completed instance. 940 | $curl->close(); 941 | } 942 | 943 | break; 944 | } 945 | } 946 | } 947 | } 948 | } while ($active || count($this->activeCurls) || count($this->curls)); 949 | 950 | $this->isStarted = false; 951 | } 952 | 953 | /** 954 | * Success 955 | * 956 | * @access public 957 | * @param $callback 958 | */ 959 | public function success($callback) 960 | { 961 | $this->successCallback = $callback; 962 | } 963 | 964 | /** 965 | * Unset Header 966 | * 967 | * Remove extra header previously set using Curl::setHeader(). 968 | * 969 | * @access public 970 | * @param $key 971 | */ 972 | public function unsetHeader($key) 973 | { 974 | unset($this->headers[$key]); 975 | } 976 | 977 | /** 978 | * Remove Header 979 | * 980 | * Remove an internal header from the request. 981 | * Using `curl -H "Host:" ...' is equivalent to $curl->removeHeader('Host');. 982 | * 983 | * @access public 984 | * @param $key 985 | */ 986 | public function removeHeader($key) 987 | { 988 | $this->setHeader($key, ''); 989 | } 990 | 991 | /** 992 | * Verbose 993 | * 994 | * @access public 995 | * @param bool $on 996 | * @param resource $output 997 | */ 998 | public function verbose($on = true, $output = STDERR) 999 | { 1000 | // Turn off CURLINFO_HEADER_OUT for verbose to work. This has the side 1001 | // effect of causing Curl::requestHeaders to be empty. 1002 | if ($on) { 1003 | $this->setOpt(CURLINFO_HEADER_OUT, false); 1004 | } 1005 | $this->setOpt(CURLOPT_VERBOSE, $on); 1006 | $this->setOpt(CURLOPT_STDERR, $output); 1007 | } 1008 | 1009 | /** 1010 | * Destruct 1011 | * 1012 | * @access public 1013 | */ 1014 | public function __destruct() 1015 | { 1016 | $this->close(); 1017 | } 1018 | 1019 | /** 1020 | * Update Headers 1021 | * 1022 | * @access private 1023 | */ 1024 | private function updateHeaders() 1025 | { 1026 | foreach ($this->curls as $curl) { 1027 | $curl->setHeaders($this->headers); 1028 | } 1029 | } 1030 | 1031 | /** 1032 | * Queue Handle 1033 | * 1034 | * @access private 1035 | * @param $curl 1036 | */ 1037 | private function queueHandle($curl) 1038 | { 1039 | // Use sequential ids to allow for ordered post processing. 1040 | $curl->id = $this->nextCurlId++; 1041 | $curl->childOfMultiCurl = true; 1042 | $this->curls[$curl->id] = $curl; 1043 | 1044 | $curl->setHeaders($this->headers); 1045 | } 1046 | 1047 | /** 1048 | * Init Handle 1049 | * 1050 | * @access private 1051 | * @param $curl 1052 | * @throws \ErrorException 1053 | */ 1054 | private function initHandle() 1055 | { 1056 | $curl = array_shift($this->curls); 1057 | if ($curl === null) { 1058 | return; 1059 | } 1060 | 1061 | // Add instance to list of active curls. 1062 | $this->currentRequestCount += 1; 1063 | $this->activeCurls[$curl->id] = $curl; 1064 | 1065 | // Set callbacks if not already individually set. 1066 | if ($curl->beforeSendCallback === null) { 1067 | $curl->beforeSend($this->beforeSendCallback); 1068 | } 1069 | if ($curl->successCallback === null) { 1070 | $curl->success($this->successCallback); 1071 | } 1072 | if ($curl->errorCallback === null) { 1073 | $curl->error($this->errorCallback); 1074 | } 1075 | if ($curl->completeCallback === null) { 1076 | $curl->complete($this->completeCallback); 1077 | } 1078 | 1079 | // Set decoders if not already individually set. 1080 | if ($curl->jsonDecoder === null) { 1081 | $curl->setJsonDecoder($this->jsonDecoder); 1082 | } 1083 | if ($curl->xmlDecoder === null) { 1084 | $curl->setXmlDecoder($this->xmlDecoder); 1085 | } 1086 | 1087 | $curl->setOpts($this->options); 1088 | $curl->setRetry($this->retry); 1089 | $curl->setCookies($this->cookies); 1090 | 1091 | // Use a random proxy for the curl instance when proxies have been set 1092 | // and the curl instance doesn't already have a proxy set. 1093 | if (is_array($this->proxies) && $curl->getOpt(CURLOPT_PROXY) === null) { 1094 | $random_proxy = ArrayUtil::arrayRandom($this->proxies); 1095 | $curl->setProxy($random_proxy); 1096 | } 1097 | 1098 | $curlm_error_code = curl_multi_add_handle($this->multiCurl, $curl->curl); 1099 | if ($curlm_error_code !== CURLM_OK) { 1100 | throw new \ErrorException('cURL multi add handle error: ' . curl_multi_strerror($curlm_error_code)); 1101 | } 1102 | 1103 | $curl->call($curl->beforeSendCallback); 1104 | } 1105 | 1106 | /** 1107 | * Has Request Quota 1108 | * 1109 | * Checks if there is any available quota to make additional requests while 1110 | * rate limiting is enabled. 1111 | * 1112 | * @access private 1113 | */ 1114 | private function hasRequestQuota() 1115 | { 1116 | // Calculate if there's request quota since ratelimiting is enabled. 1117 | if ($this->rateLimitEnabled) { 1118 | // Determine if the limit of requests per interval has been reached. 1119 | if ($this->currentRequestCount >= $this->maxRequests) { 1120 | $elapsed_seconds = microtime(true) - $this->currentStartTime; 1121 | if ($elapsed_seconds <= $this->intervalSeconds) { 1122 | $this->rateLimitReached = true; 1123 | return false; 1124 | } elseif ($this->rateLimitReached) { 1125 | $this->rateLimitReached = false; 1126 | $this->currentStartTime = microtime(true); 1127 | $this->currentRequestCount = 0; 1128 | } 1129 | } 1130 | 1131 | return true; 1132 | } else { 1133 | return true; 1134 | } 1135 | } 1136 | 1137 | /** 1138 | * Wait Until Request Quota Available 1139 | * 1140 | * Waits until there is available request quota available based on the rate limit. 1141 | * 1142 | * @access private 1143 | */ 1144 | private function waitUntilRequestQuotaAvailable() 1145 | { 1146 | $sleep_until = $this->currentStartTime + $this->intervalSeconds; 1147 | $sleep_seconds = $sleep_until - microtime(true); 1148 | 1149 | // Avoid using time_sleep_until() as it appears to be less precise and not sleep long enough. 1150 | usleep($sleep_seconds * 1000000); 1151 | 1152 | $this->currentStartTime = microtime(true); 1153 | $this->currentRequestCount = 0; 1154 | } 1155 | } 1156 | -------------------------------------------------------------------------------- /data_vnexpress/useLibs/library/Curl/StringUtil.php: -------------------------------------------------------------------------------- 1 | baseUrl = $base_url; 15 | $this->relativeUrl = $relative_url; 16 | } 17 | 18 | public function __toString() 19 | { 20 | return $this->absolutizeUrl(); 21 | } 22 | 23 | /** 24 | * Remove dot segments. 25 | * 26 | * Interpret and remove the special "." and ".." path segments from a referenced path. 27 | */ 28 | public static function removeDotSegments($input) 29 | { 30 | // 1. The input buffer is initialized with the now-appended path 31 | // components and the output buffer is initialized to the empty 32 | // string. 33 | $output = ''; 34 | 35 | // 2. While the input buffer is not empty, loop as follows: 36 | while (!empty($input)) { 37 | // A. If the input buffer begins with a prefix of "../" or "./", 38 | // then remove that prefix from the input buffer; otherwise, 39 | if (StringUtil::startsWith($input, '../')) { 40 | $input = substr($input, 3); 41 | } elseif (StringUtil::startsWith($input, './')) { 42 | $input = substr($input, 2); 43 | 44 | // B. if the input buffer begins with a prefix of "/./" or "/.", 45 | // where "." is a complete path segment, then replace that 46 | // prefix with "/" in the input buffer; otherwise, 47 | } elseif (StringUtil::startsWith($input, '/./')) { 48 | $input = substr($input, 2); 49 | } elseif ($input === '/.') { 50 | $input = '/'; 51 | 52 | // C. if the input buffer begins with a prefix of "/../" or "/..", 53 | // where ".." is a complete path segment, then replace that 54 | // prefix with "/" in the input buffer and remove the last 55 | // segment and its preceding "/" (if any) from the output 56 | // buffer; otherwise, 57 | } elseif (StringUtil::startsWith($input, '/../')) { 58 | $input = substr($input, 3); 59 | $output = substr_replace($output, '', StringUtil::reversePosition($output, '/')); 60 | } elseif ($input === '/..') { 61 | $input = '/'; 62 | $output = substr_replace($output, '', StringUtil::reversePosition($output, '/')); 63 | 64 | // D. if the input buffer consists only of "." or "..", then remove 65 | // that from the input buffer; otherwise, 66 | } elseif ($input === '.' || $input === '..') { 67 | $input = ''; 68 | 69 | // E. move the first path segment in the input buffer to the end of 70 | // the output buffer, including the initial "/" character (if 71 | // any) and any subsequent characters up to, but not including, 72 | // the next "/" character or the end of the input buffer. 73 | } elseif (!(($pos = StringUtil::position($input, '/', 1)) === false)) { 74 | $output .= substr($input, 0, $pos); 75 | $input = substr_replace($input, '', 0, $pos); 76 | } else { 77 | $output .= $input; 78 | $input = ''; 79 | } 80 | } 81 | 82 | // 3. Finally, the output buffer is returned as the result of 83 | // remove_dot_segments. 84 | return $output . $input; 85 | } 86 | 87 | /** 88 | * Absolutize url. 89 | * 90 | * Combine the base and relative url into an absolute url. 91 | */ 92 | private function absolutizeUrl() 93 | { 94 | $b = $this->parseUrl($this->baseUrl); 95 | if (!isset($b['path'])) { 96 | $b['path'] = '/'; 97 | } 98 | if ($this->relativeUrl === null) { 99 | return $this->unparseUrl($b); 100 | } 101 | $r = $this->parseUrl($this->relativeUrl); 102 | $r['authorized'] = isset($r['scheme']) || isset($r['host']) || isset($r['port']) 103 | || isset($r['user']) || isset($r['pass']); 104 | $target = array(); 105 | if (isset($r['scheme'])) { 106 | $target['scheme'] = $r['scheme']; 107 | $target['host'] = isset($r['host']) ? $r['host'] : null; 108 | $target['port'] = isset($r['port']) ? $r['port'] : null; 109 | $target['user'] = isset($r['user']) ? $r['user'] : null; 110 | $target['pass'] = isset($r['pass']) ? $r['pass'] : null; 111 | $target['path'] = isset($r['path']) ? self::removeDotSegments($r['path']) : null; 112 | $target['query'] = isset($r['query']) ? $r['query'] : null; 113 | } else { 114 | $target['scheme'] = isset($b['scheme']) ? $b['scheme'] : null; 115 | if ($r['authorized']) { 116 | $target['host'] = isset($r['host']) ? $r['host'] : null; 117 | $target['port'] = isset($r['port']) ? $r['port'] : null; 118 | $target['user'] = isset($r['user']) ? $r['user'] : null; 119 | $target['pass'] = isset($r['pass']) ? $r['pass'] : null; 120 | $target['path'] = isset($r['path']) ? self::removeDotSegments($r['path']) : null; 121 | $target['query'] = isset($r['query']) ? $r['query'] : null; 122 | } else { 123 | $target['host'] = isset($b['host']) ? $b['host'] : null; 124 | $target['port'] = isset($b['port']) ? $b['port'] : null; 125 | $target['user'] = isset($b['user']) ? $b['user'] : null; 126 | $target['pass'] = isset($b['pass']) ? $b['pass'] : null; 127 | if (!isset($r['path']) || $r['path'] === '') { 128 | $target['path'] = $b['path']; 129 | $target['query'] = isset($r['query']) ? $r['query'] : (isset($b['query']) ? $b['query'] : null); 130 | } else { 131 | if (StringUtil::startsWith($r['path'], '/')) { 132 | $target['path'] = self::removeDotSegments($r['path']); 133 | } else { 134 | $base = StringUtil::characterReversePosition($b['path'], '/', true); 135 | if ($base === false) { 136 | $base = ''; 137 | } 138 | $target['path'] = self::removeDotSegments($base . '/' . $r['path']); 139 | } 140 | $target['query'] = isset($r['query']) ? $r['query'] : null; 141 | } 142 | } 143 | } 144 | if ($this->relativeUrl === '') { 145 | $target['fragment'] = isset($b['fragment']) ? $b['fragment'] : null; 146 | } else { 147 | $target['fragment'] = isset($r['fragment']) ? $r['fragment'] : null; 148 | } 149 | $absolutized_url = $this->unparseUrl($target); 150 | return $absolutized_url; 151 | } 152 | 153 | /** 154 | * Parse url. 155 | * 156 | * Parse url into components of a URI as specified by RFC 3986. 157 | */ 158 | private function parseUrl($url) 159 | { 160 | // RFC 3986 - Parsing a URI Reference with a Regular Expression. 161 | // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? 162 | // 12 3 4 5 6 7 8 9 163 | // 164 | // "http://www.ics.uci.edu/pub/ietf/uri/#Related" 165 | // $1 = http: (scheme) 166 | // $2 = http (scheme) 167 | // $3 = //www.ics.uci.edu (ignore) 168 | // $4 = www.ics.uci.edu (authority) 169 | // $5 = /pub/ietf/uri/ (path) 170 | // $6 =(ignore) 171 | // $7 = (query) 172 | // $8 = #Related (ignore) 173 | // $9 = Related (fragment) 174 | preg_match('/^(([^:\/?#]+):)?(\/\/([^\/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/', $url, $output_array); 175 | 176 | $parts = array(); 177 | if (isset($output_array['1']) && $output_array['1'] !== '') { 178 | $parts['scheme'] = $output_array['1']; 179 | } 180 | if (isset($output_array['2']) && $output_array['2'] !== '') { 181 | $parts['scheme'] = $output_array['2']; 182 | } 183 | if (isset($output_array['4']) && $output_array['4'] !== '') { 184 | // authority = [ userinfo "@" ] host [ ":" port ] 185 | $parts['host'] = $output_array['4']; 186 | if (strpos($parts['host'], ':') !== false) { 187 | $host_parts = explode(':', $output_array['4']); 188 | $parts['port'] = array_pop($host_parts); 189 | $parts['host'] = implode(':', $host_parts); 190 | if (strpos($parts['host'], '@') !== false) { 191 | $host_parts = explode('@', $parts['host']); 192 | $parts['host'] = array_pop($host_parts); 193 | $parts['user'] = implode('@', $host_parts); 194 | if (strpos($parts['user'], ':') !== false) { 195 | $user_parts = explode(':', $parts['user'], 2); 196 | $parts['user'] = array_shift($user_parts); 197 | $parts['pass'] = implode(':', $user_parts); 198 | } 199 | } 200 | } 201 | } 202 | if (isset($output_array['5']) && $output_array['5'] !== '') { 203 | $parts['path'] = $this->percentEncodeChars($output_array['5']); 204 | } 205 | if (isset($output_array['7']) && $output_array['7'] !== '') { 206 | $parts['query'] = $output_array['7']; 207 | } 208 | if (isset($output_array['9']) && $output_array['9'] !== '') { 209 | $parts['fragment'] = $output_array['9']; 210 | } 211 | return $parts; 212 | } 213 | 214 | /** 215 | * Percent-encode characters. 216 | * 217 | * Percent-encode characters to represent a data octet in a component when 218 | * that octet's corresponding character is outside the allowed set. 219 | */ 220 | private function percentEncodeChars($chars) 221 | { 222 | // ALPHA = A-Z / a-z 223 | $alpha = 'A-Za-z'; 224 | 225 | // DIGIT = 0-9 226 | $digit = '0-9'; 227 | 228 | // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 229 | $unreserved = $alpha . $digit . preg_quote('-._~'); 230 | 231 | // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 232 | // / "*" / "+" / "," / ";" / "=" / "#" 233 | $sub_delims = preg_quote('!$&\'()*+,;=#'); 234 | 235 | // HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" 236 | $hexdig = $digit . 'A-F'; 237 | // "The uppercase hexadecimal digits 'A' through 'F' are equivalent to 238 | // the lowercase digits 'a' through 'f', respectively." 239 | $hexdig .= 'a-f'; 240 | 241 | $pattern = '/(?:[^' . $unreserved . $sub_delims . preg_quote(':@%/?', '/') . ']++|%(?![' . $hexdig . ']{2}))/'; 242 | $percent_encoded_chars = preg_replace_callback( 243 | $pattern, 244 | function ($matches) { 245 | return rawurlencode($matches[0]); 246 | }, 247 | $chars 248 | ); 249 | return $percent_encoded_chars; 250 | } 251 | 252 | /** 253 | * Unparse url. 254 | * 255 | * Combine url components into a url. 256 | */ 257 | private function unparseUrl($parsed_url) 258 | { 259 | $scheme = isset($parsed_url['scheme']) ? $parsed_url['scheme'] . '://' : ''; 260 | $user = isset($parsed_url['user']) ? $parsed_url['user'] : ''; 261 | $pass = isset($parsed_url['pass']) ? ':' . $parsed_url['pass'] : ''; 262 | $pass = ($user || $pass) ? $pass . '@' : ''; 263 | $host = isset($parsed_url['host']) ? $parsed_url['host'] : ''; 264 | $port = isset($parsed_url['port']) ? ':' . $parsed_url['port'] : ''; 265 | $path = isset($parsed_url['path']) ? $parsed_url['path'] : ''; 266 | $query = isset($parsed_url['query']) ? '?' . $parsed_url['query'] : ''; 267 | $fragment = isset($parsed_url['fragment']) ? '#' . $parsed_url['fragment'] : ''; 268 | $unparsed_url = $scheme . $user . $pass . $host . $port . $path . $query . $fragment; 269 | return $unparsed_url; 270 | } 271 | } 272 | -------------------------------------------------------------------------------- /data_vnexpress/useLibs/library/php-curl-class-master.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bacdong/PHP_Web-crawler/c94118d752a6a39026f3a542ec00c1315059a745/data_vnexpress/useLibs/library/php-curl-class-master.zip -------------------------------------------------------------------------------- /data_vnexpress/useLibs/text.php: -------------------------------------------------------------------------------- 1 | load($html ->save()); 11 | 12 | // get latest news 13 | $items = $html->find('article.item-news.full-thumb.article-topstory', 0); 14 | $linkItems = $items->find('.thumb-art a', 0)->href; 15 | $thumbnail = $items->find('img', 0)->src; 16 | $title = $items->find('h3', 0)->plaintext; 17 | $description = $items->find('p.description a', 0)->plaintext; 18 | // $created_at = $items->find('p.meta-news span.time-public span.time-ago', 0)->datetime; 19 | 20 | 21 | // get news detail 22 | $urlPost = $linkItems.'/'; 23 | $htmlDetail = file_get_html($urlPost); 24 | $htmlDetail ->load($htmlDetail ->save()); 25 | $content = $htmlDetail->find('p'); 26 | $string = ""; 27 | foreach ($content as $item) { 28 | $string .= $item->plaintext.'
'.'
'; 29 | // } 30 | 31 | $qry = "SELECT * FROM latestnews"; 32 | $rs = $connection->query($qry)->fetch_assoc(); 33 | $id = $rs['id']; 34 | 35 | $queryContent = "INSERT INTO news_detail(content) VALUES('".$string."') WHERE news_id = ".$id; 36 | die($queryContent); 37 | 38 | $query = 'INSERT INTO latestnews(title, thumbnail, content, created_at) VALUES("'.$title.'", "'.$thumbnail.'", "'.$description.'", "'.$created_at.'")'; 39 | ?> -------------------------------------------------------------------------------- /first_learning/api.php: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /first_learning/controller.php: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /first_learning/index.php: -------------------------------------------------------------------------------- 1 | 0, 6 | CURLOPT_URL => $url, 7 | CURLOPT_USERAGENT => 'thongvang.local/php_curl', 8 | CURLOPT_SSL_VERIFYPEER => false 9 | )); 10 | 11 | $resp = curl_exec($curl); 12 | 13 | //Dữ liệu thời tiết ở dạng JSON 14 | $weather = json_decode($resp); 15 | var_dump($weather); 16 | 17 | curl_close($curl); 18 | ?> -------------------------------------------------------------------------------- /first_learning/showResult.php: -------------------------------------------------------------------------------- 1 | 2 | $items): ?> 3 |4 |12 | 13 | 14 |Thời gian:
5 |Nhiệt độ: °C
6 |Độ ẩm: %
7 |Mực nước biển: m
8 |Trạng thái:
9 |10 |
Sức gió: m/h
11 |Không có kết quả
15 | -------------------------------------------------------------------------------- /first_learning/view.php: -------------------------------------------------------------------------------- 1 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |Thời tiết 13 | 14 | 15 |16 |34 | 35 | --------------------------------------------------------------------------------17 | 33 |18 | 19 |20 | 26 |27 | 28 |29 |30 | 31 | 32 |