├── .gitignore ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── composer.json └── src └── OneTitle.php /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | vendor 3 | test.php 4 | composer.lock 5 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 1.0.0 - 2016-06-11 4 | 5 | * First release 6 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to contribute 2 | 3 | 1. Fork this repository on GitHub 4 | 2. Make your changes on a separate feature branch 5 | 3. Send a pull request from your feature branch to this repository's master branch. 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 xu42 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Imdb 2 | 3 | [![Latest Version on Packagist][ico-version]][link-packagist] 4 | [![Software License][ico-license]](LICENSE.md) 5 | [![Total Downloads][ico-downloads]][link-downloads] 6 | 7 | Crawl IMDb movie rating, content rating, release date, poster, presentation, director, actor, duration and other information 8 | 9 | ## Install 10 | 11 | Via Composer 12 | 13 | ``` bash 14 | $ composer require xu42/imdb 15 | ``` 16 | 17 | ## Usage 18 | 19 | ``` php 20 | require_once 'vendor/autoload.php'; 21 | use Xu42\Imdb\OneTitle; 22 | $res = OneTitle::singleton()->get('tt0111161'); 23 | echo json_encode($res) . PHP_EOL; 24 | ``` 25 | 26 | ## Change log 27 | 28 | Please see [CHANGELOG](CHANGELOG.md) for more information what has changed recently. 29 | 30 | ## Testing 31 | 32 | Tests unavailable. 33 | 34 | ## Contributing 35 | 36 | Please see [CONTRIBUTING](CONTRIBUTING.md) for details. 37 | 38 | ## Security 39 | 40 | If you discover any security related issues, please using the issue tracker. 41 | 42 | ## Credits 43 | 44 | - [Xu42](https://github.com/xu42) 45 | - [All Contributors](https://github.com/xu42/IMDb/contributors) 46 | 47 | ## License 48 | 49 | The MIT License (MIT). Please see [License File](LICENSE.md) for more information. 50 | 51 | [ico-version]: https://img.shields.io/packagist/v/xu42/imdb.svg?style=flat-square 52 | [ico-license]: https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat-square 53 | [ico-downloads]: https://img.shields.io/packagist/dt/xu42/imdb.svg?style=flat-square 54 | 55 | [link-packagist]: https://packagist.org/packages/xu42/imdb 56 | [link-travis]: https://travis-ci.org/xu42/imdb 57 | [link-scrutinizer]: https://scrutinizer-ci.com/g/xu42/imdb/code-structure 58 | [link-code-quality]: https://scrutinizer-ci.com/g/xu42/imdb 59 | [link-downloads]: https://packagist.org/packages/xu42/imdb 60 | [link-author]: https://github.com/xu42 61 | [link-contributors]: ../../contributors 62 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "xu42/imdb", 3 | "description": "Crawl IMDb movie rating, content rating, release date, poster, presentation, director, actor, duration and other information", 4 | "keywords": [ 5 | "xu42", 6 | "imdb" 7 | ], 8 | "homepage": "https://github.com/xu42/IMDb", 9 | "license": "MIT", 10 | "authors": [ 11 | { 12 | "name": "xu42", 13 | "homepage": "http://xu42.cn", 14 | "role": "Developer" 15 | } 16 | ], 17 | "require": { 18 | "php" : "~5.5|~7.0", 19 | "xu42/php-utils": "^0.0.1" 20 | }, 21 | "autoload": { 22 | "psr-4": { 23 | "Xu42\\Imdb\\": "src" 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/OneTitle.php: -------------------------------------------------------------------------------- 1 | getMsgOfOneTitle($title); 37 | } 38 | 39 | /** 40 | * Get sources of Webpage with one IMDb's title 41 | * @return String 42 | */ 43 | private function getWebPageOfOneTitle() 44 | { 45 | $headers = [ 46 | 'Host: www.imdb.com', 47 | 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 48 | 'Upgrade-Insecure-Requests: 1', 49 | 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.108 Safari/537.36', 50 | 'Referer: http://www.imdb.com', 51 | 'Accept-Language: zh-CN,zh;q=0.8' 52 | ]; 53 | 54 | $ch = curl_init(); 55 | curl_setopt($ch, CURLOPT_URL, $this->oneTitleUrl); 56 | curl_setopt($ch, CURLOPT_BINARYTRANSFER, true); 57 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 58 | curl_setopt($ch, CURLOPT_TIMEOUT, 60); 59 | curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); 60 | 61 | $this->oneTitleWebPage = curl_exec($ch); 62 | curl_close($ch); 63 | return $this->oneTitleWebPage; 64 | } 65 | 66 | /** 67 | * Get details of one IMDb's title 68 | * @param string title An string of one IMDb's title 69 | * @return array 70 | */ 71 | public function getMsgOfOneTitle($title) 72 | { 73 | $this->oneTitle = $title; 74 | $this->oneTitleUrl = $this->oneTitlePrefixUrl . $title . '/?ref_=nv_mv_dflt_1'; 75 | 76 | if (strlen($this->oneTitleWebPage) == 0) { 77 | $this->getWebPageOfOneTitle(); 78 | } 79 | 80 | // title 81 | preg_match('#(.*?) -#', $this->oneTitleWebPage, $oneTitleTitle); 82 | 83 | // rating Value 84 | preg_match('#ratingValue">(\d\.\d)<#', $this->oneTitleWebPage, $oneTitleRatingValue); 85 | 86 | // rating Count 87 | preg_match('#ratingCount">(.*?)<#', $this->oneTitleWebPage, $oneTitleRatingCount); 88 | 89 | // content Rating 90 | preg_match('#contentRating" content="(.*?)"#', $this->oneTitleWebPage, $oneTitleContentRating); 91 | 92 | // datePublished 93 | preg_match('#datePublished" content="(.*?)"#', $this->oneTitleWebPage, $oneTitleDatePublished); 94 | 95 | // Poster small 96 | preg_match('#Poster"\nsrc="(.*?)"#', $this->oneTitleWebPage, $oneTitlePosterSmall); 97 | // Poster big 98 | if (empty($oneTitlePosterSmall[1])) { 99 | $oneTitlePosterBig = ""; 100 | } else { 101 | $oneTitlePosterBig = strstr($oneTitlePosterSmall[1], '_', true) . '.jpg'; 102 | } 103 | 104 | // description 105 | preg_match('#description">(.*?)<#s', $this->oneTitleWebPage, $oneTitleDescription); 106 | 107 | // Director 108 | preg_match('#Director:(.*?)</span#s', $this->oneTitleWebPage, $oneTitleDirectorDirty); 109 | preg_match_all('#/name/(.*?)\?#', $oneTitleDirectorDirty[0], $oneTitleDirectorId); 110 | preg_match_all('#name">(.*?)</span#', $oneTitleDirectorDirty[0], $oneTitleDirectorName); 111 | $oneTitleDirector = null; 112 | for ($i = 0; $i < count($oneTitleDirectorId[1]); $i++) { 113 | $oneTitleDirector[] = [ 114 | $oneTitleDirectorId[1][$i], 115 | $oneTitleDirectorName[1][$i] 116 | ]; 117 | } 118 | 119 | // Writers 120 | preg_match('#Writers:(.*?)</div#s', $this->oneTitleWebPage, $oneTitleWritersDirty); 121 | preg_match_all('#/name/(.*?)\?#', $oneTitleWritersDirty[0], $oneTitleWritersId); 122 | preg_match_all('#name">(.*?)</span#', $oneTitleWritersDirty[0], $oneTitleWritersName); 123 | preg_match_all('#\((.*?)\)#', $oneTitleWritersDirty[0], $oneTitleWritersWhat); 124 | $oneTitleWriters = null; 125 | for ($i = 0; $i < count($oneTitleWritersId[1]); $i++) { 126 | $oneTitleWriters[] = [ 127 | $oneTitleWritersId[1][$i], 128 | $oneTitleWritersName[1][$i], 129 | $oneTitleWritersWhat[1][$i] 130 | ]; 131 | } 132 | 133 | // cast_list 134 | preg_match('#cast_list">(.*?)<\/table#s', $this->oneTitleWebPage, $oneTitleCastDirty); 135 | preg_match_all('#class="[odd|even](.*?)<\/tr#s', $oneTitleCastDirty[1], $oneTitleCastDirtyTr); 136 | $oneTitleCast = null; 137 | for ($i = 0; $i < count($oneTitleCastDirtyTr[1]); $i++) { 138 | preg_match('#/name/(.*?)/\?#', $oneTitleCastDirtyTr[1][$i], $oneTitleCastId); 139 | preg_match('#alt="(.*?)"#', $oneTitleCastDirtyTr[1][$i], $oneTitleCastName); 140 | preg_match('#loadlate="(.*?)"#', $oneTitleCastDirtyTr[1][$i], $oneTitleCastPhotoSmall); 141 | if (empty($oneTitleCastPhotoSmall[1])) { 142 | $oneTitleCastPhotoBig = ""; 143 | } else { 144 | $oneTitleCastPhotoBig = strstr($oneTitleCastPhotoSmall[1], '_', true) . 'jpg'; 145 | } 146 | $oneTitleCast[] = [ 147 | 'id' => trim($oneTitleCastId[1] ?? null), 148 | 'name' => trim($oneTitleCastName[1] ?? null), 149 | 'photo_small' => trim($oneTitleCastPhotoSmall[1] ?? null), 150 | 'photo_big' => trim($oneTitleCastPhotoBig) 151 | ]; 152 | } 153 | 154 | // Shortline 155 | preg_match_all('#<p>\n(.*?)<em#', $this->oneTitleWebPage, $oneTitleStoryline); 156 | 157 | // Taglines 158 | preg_match('#Taglines:</h4>\n(.*?)</div#s', $this->oneTitleWebPage, $oneTitleTagLines); 159 | 160 | // Genres 161 | preg_match_all('#href="/genre/(.*?)\?ref_=tt_stry_gnr#', $this->oneTitleWebPage, $oneTitleGenres); 162 | 163 | // Details 164 | preg_match('#href="/country/\w.*\?ref_=tt_dt_dt"\nitemprop=\'url\'>(\w.*)<#', $this->oneTitleWebPage, $oneTitleDetailsCountry); 165 | preg_match('#href="/language/\w.*\?ref_=tt_dt_dt"\nitemprop=\'url\'>(\w.*)<#', $this->oneTitleWebPage, $oneTitleDetailsLanguage); 166 | preg_match('#Release Date:</h4>(.*?)\n#', $this->oneTitleWebPage, $oneTitleDetailsReleaseDate); 167 | $oneTitleDetails = [ 168 | 'country' => trim($oneTitleDetailsCountry[1] ?? null), 169 | 'language' => trim($oneTitleDetailsLanguage[1] ?? null), 170 | 'release_date' => trim($oneTitleDetailsReleaseDate[1] ?? null) 171 | ]; 172 | 173 | // Box Office 174 | preg_match('#Budget:</h4>\s(.*?)\s\n#', $this->oneTitleWebPage, $oneTitleBoxOfficeBudget); 175 | preg_match('#Gross:</h4>\s(.*?)\s\n#', $this->oneTitleWebPage, $oneTitleBoxOfficeGross); 176 | $oneTitleBoxOffice = [ 177 | 'budget' => trim($oneTitleBoxOfficeBudget[1] ?? null), 178 | 'gross' => trim($oneTitleBoxOfficeGross[1] ?? null) 179 | ]; 180 | 181 | // Technical Specs 182 | preg_match('#Technical Specs</h3>(.*?)technical specs#s', $this->oneTitleWebPage, $oneTitleTechnicalSpecsDirty); 183 | preg_match('#>(\d.*\smin)<#', $oneTitleTechnicalSpecsDirty[1], $oneTitleTechnicalSpecsDuration); 184 | preg_match('#sound_mixes=(\w.*)&#', $oneTitleTechnicalSpecsDirty[1], $oneTitleTechnicalSpecsSound); 185 | preg_match('#colors=(\w.*)&#', $oneTitleTechnicalSpecsDirty[1], $oneTitleTechnicalSpecsColor); 186 | preg_match('#Aspect Ratio:</h4>\s(.*?)\n#', $oneTitleTechnicalSpecsDirty[1], $oneTitleTechnicalSpecsRatio); 187 | $oneTitleTechnicalSpecs = [ 188 | 'duration' => trim($oneTitleTechnicalSpecsDuration[1] ?? null), 189 | 'sound_mix' => trim($oneTitleTechnicalSpecsSound[1] ?? null), 190 | 'color' => trim($oneTitleTechnicalSpecsColor[1] ?? null), 191 | 'aspect_ratio' => trim($oneTitleTechnicalSpecsRatio[1] ?? null) 192 | ]; 193 | 194 | $oneTitle = [ 195 | 'title' => trim($oneTitleTitle[1] ?? null), 196 | 'rating_value' => trim($oneTitleRatingValue[1] ?? null), 197 | 'rating_count' => trim($oneTitleRatingCount[1] ?? null), 198 | 'content_rating' => trim($oneTitleContentRating[1] ?? null), 199 | 'date_published' => trim($oneTitleDatePublished[1] ?? null), 200 | 'poster_small' => trim($oneTitlePosterSmall[1] ?? null), 201 | 'poster_big' => trim($oneTitlePosterBig ?? null), 202 | 'description' => trim($oneTitleDescription[1] ?? null), 203 | 'director' => $oneTitleDirector ?? null, 204 | 'writers' => $oneTitleWriters ?? null, 205 | 'cast' => $oneTitleCast ?? null, 206 | 'storyline' => trim($oneTitleStoryline[1][0] ?? null), 207 | 'taglines' => trim($oneTitleTagLines[1] ?? null), 208 | 'genres' => $oneTitleGenres[1] ?? null, 209 | 'details' => $oneTitleDetails ?? null, 210 | 'box_office' => $oneTitleBoxOffice ?? null, 211 | 'technical_specs' => $oneTitleTechnicalSpecs ?? null, 212 | ]; 213 | return $oneTitle; 214 | } 215 | } 216 | --------------------------------------------------------------------------------