├── .gitignore ├── README.md ├── blacklist.txt ├── cache └── .gitkeep ├── common-sites.json ├── composer.json ├── composer.lock ├── composer.phar ├── example.env ├── index.php ├── lib ├── FileSystemCache │ ├── .gitignore │ ├── .travis.yml │ ├── README.md │ ├── composer.json │ ├── lib │ │ └── FileSystemCache.php │ ├── phpunit.xml.dist │ └── tests │ │ └── FileSystemCacheTest.php ├── ansi-color.php └── fivefilters-php-readability │ ├── JSLikeHTMLElement.php │ ├── README.md │ ├── Readability.php │ ├── composer.json │ └── examples │ ├── JSLikeHTMLElement.php │ └── Readability.php ├── src ├── Fetcher.php ├── Generator.php ├── Parser.php ├── Uploader.php └── templates │ ├── fullhn.manifest.mustache │ ├── index.mustache │ ├── latest.mustache │ └── partials │ └── head.mustache └── www ├── apple-touch-icon.png ├── css ├── img │ └── loader.gif └── style.css ├── favicon.ico ├── js ├── app.js ├── jquery-2.0.3.min.js ├── moment.min.js └── waypoints.min.js └── robots.txt /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .env 3 | index.html 4 | latest.html 5 | cache.manifest 6 | cache/* 7 | vendor/* 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FullHackerNews 2 | 3 | Read all Hacker News articles in one single static page, optimized for reading. 4 | I use it to load all articles for offline reading on my iPhone. 5 | 6 | Can work with any other feed. 7 | 8 | # Requirement 9 | * PHP >= 5.6.0 10 | * Amazon S3 account 11 | 12 | # Installing 13 | 14 | * Make the `cache` folder writable 15 | * Create an S3 bucket, configured as a Web server 16 | * upload the content of `www` to the S3 bucket 17 | * copy `example.env` to `.env` and update the values, or set env variables 18 | * install dependencies : `$ php composer.phar install` 19 | * run `php index.php` periodically 20 | * enjoy 21 | 22 | # License 23 | 24 | This project is released under the BSD license. 25 | -------------------------------------------------------------------------------- /blacklist.txt: -------------------------------------------------------------------------------- 1 | www.fullhn.com 2 | dolphin-emu.org -------------------------------------------------------------------------------- /cache/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mauricesvay/FullHackerNews/73117bd1f81ddb163048e5828ee6da1edabd157e/cache/.gitkeep -------------------------------------------------------------------------------- /common-sites.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "github", 4 | "pattern": "/http(s)?:\\/\\/github.com\\/([^\\/]+)\\/([^\\/]+)/", 5 | "path": "#readme > article.entry-content" 6 | }, 7 | { 8 | "name": "tweet", 9 | "pattern": "/http(s)?:\\/\\/twitter.com\\/(\\S+)\\/status\\/\\d+/", 10 | "path": ".js-tweet-text-container > .TweetTextSize--jumbo" 11 | }, 12 | { 13 | "name": "tweet (mobile)", 14 | "pattern": "/http(s)?:\\/\\/mobile.twitter.com\\/(\\S+)\\/status\\/\\d+/", 15 | "path": "[data-testid='tweetDetail']" 16 | }, 17 | { 18 | "name": "arxiv.org", 19 | "pattern": "/http(s)?:\\/\\/arxiv.org\\/abs\\/(\\S+)/", 20 | "path": ".abstract" 21 | } 22 | ] 23 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "require": { 3 | "mustache/mustache": "^2.12", 4 | "ezyang/htmlpurifier": "^4.10", 5 | "simplepie/simplepie": "^1.5", 6 | "guzzlehttp/guzzle": "^7.0", 7 | "euskadi31/opengraph": "^1.0", 8 | "aws/aws-sdk-php": "^3.99", 9 | "vlucas/phpdotenv": "^3.3", 10 | "paquettg/php-html-parser": "^3.1" 11 | } 12 | } -------------------------------------------------------------------------------- /composer.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_readme": [ 3 | "This file locks the dependencies of your project to a known state", 4 | "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", 5 | "This file is @generated automatically" 6 | ], 7 | "content-hash": "137a1b4f54362a7dee1624a111fdbb25", 8 | "packages": [ 9 | { 10 | "name": "aws/aws-crt-php", 11 | "version": "v1.0.2", 12 | "source": { 13 | "type": "git", 14 | "url": "https://github.com/awslabs/aws-crt-php.git", 15 | "reference": "3942776a8c99209908ee0b287746263725685732" 16 | }, 17 | "dist": { 18 | "type": "zip", 19 | "url": "https://api.github.com/repos/awslabs/aws-crt-php/zipball/3942776a8c99209908ee0b287746263725685732", 20 | "reference": "3942776a8c99209908ee0b287746263725685732", 21 | "shasum": "" 22 | }, 23 | "require": { 24 | "php": ">=5.5" 25 | }, 26 | "require-dev": { 27 | "phpunit/phpunit": "^4.8.35|^5.4.3" 28 | }, 29 | "type": "library", 30 | "autoload": { 31 | "classmap": [ 32 | "src/" 33 | ] 34 | }, 35 | "notification-url": "https://packagist.org/downloads/", 36 | "license": [ 37 | "Apache-2.0" 38 | ], 39 | "authors": [ 40 | { 41 | "name": "AWS SDK Common Runtime Team", 42 | "email": "aws-sdk-common-runtime@amazon.com" 43 | } 44 | ], 45 | "description": "AWS Common Runtime for PHP", 46 | "homepage": "http://aws.amazon.com/sdkforphp", 47 | "keywords": [ 48 | "amazon", 49 | "aws", 50 | "crt", 51 | "sdk" 52 | ], 53 | "support": { 54 | "issues": "https://github.com/awslabs/aws-crt-php/issues", 55 | "source": "https://github.com/awslabs/aws-crt-php/tree/v1.0.2" 56 | }, 57 | "time": "2021-09-03T22:57:30+00:00" 58 | }, 59 | { 60 | "name": "aws/aws-sdk-php", 61 | "version": "3.194.5", 62 | "source": { 63 | "type": "git", 64 | "url": "https://github.com/aws/aws-sdk-php.git", 65 | "reference": "33f755378debdbc7e010157811fc47aebf090c53" 66 | }, 67 | "dist": { 68 | "type": "zip", 69 | "url": "https://api.github.com/repos/aws/aws-sdk-php/zipball/33f755378debdbc7e010157811fc47aebf090c53", 70 | "reference": "33f755378debdbc7e010157811fc47aebf090c53", 71 | "shasum": "" 72 | }, 73 | "require": { 74 | "aws/aws-crt-php": "^1.0.2", 75 | "ext-json": "*", 76 | "ext-pcre": "*", 77 | "ext-simplexml": "*", 78 | "guzzlehttp/guzzle": "^5.3.3|^6.2.1|^7.0", 79 | "guzzlehttp/promises": "^1.4.0", 80 | "guzzlehttp/psr7": "^1.7.0", 81 | "mtdowling/jmespath.php": "^2.6", 82 | "php": ">=5.5" 83 | }, 84 | "require-dev": { 85 | "andrewsville/php-token-reflection": "^1.4", 86 | "aws/aws-php-sns-message-validator": "~1.0", 87 | "behat/behat": "~3.0", 88 | "doctrine/cache": "~1.4", 89 | "ext-dom": "*", 90 | "ext-openssl": "*", 91 | "ext-pcntl": "*", 92 | "ext-sockets": "*", 93 | "nette/neon": "^2.3", 94 | "paragonie/random_compat": ">= 2", 95 | "phpunit/phpunit": "^4.8.35|^5.4.3", 96 | "psr/cache": "^1.0", 97 | "psr/simple-cache": "^1.0", 98 | "sebastian/comparator": "^1.2.3" 99 | }, 100 | "suggest": { 101 | "aws/aws-php-sns-message-validator": "To validate incoming SNS notifications", 102 | "doctrine/cache": "To use the DoctrineCacheAdapter", 103 | "ext-curl": "To send requests using cURL", 104 | "ext-openssl": "Allows working with CloudFront private distributions and verifying received SNS messages", 105 | "ext-sockets": "To use client-side monitoring" 106 | }, 107 | "type": "library", 108 | "extra": { 109 | "branch-alias": { 110 | "dev-master": "3.0-dev" 111 | } 112 | }, 113 | "autoload": { 114 | "psr-4": { 115 | "Aws\\": "src/" 116 | }, 117 | "files": [ 118 | "src/functions.php" 119 | ] 120 | }, 121 | "notification-url": "https://packagist.org/downloads/", 122 | "license": [ 123 | "Apache-2.0" 124 | ], 125 | "authors": [ 126 | { 127 | "name": "Amazon Web Services", 128 | "homepage": "http://aws.amazon.com" 129 | } 130 | ], 131 | "description": "AWS SDK for PHP - Use Amazon Web Services in your PHP project", 132 | "homepage": "http://aws.amazon.com/sdkforphp", 133 | "keywords": [ 134 | "amazon", 135 | "aws", 136 | "cloud", 137 | "dynamodb", 138 | "ec2", 139 | "glacier", 140 | "s3", 141 | "sdk" 142 | ], 143 | "support": { 144 | "forum": "https://forums.aws.amazon.com/forum.jspa?forumID=80", 145 | "issues": "https://github.com/aws/aws-sdk-php/issues", 146 | "source": "https://github.com/aws/aws-sdk-php/tree/3.194.5" 147 | }, 148 | "time": "2021-09-24T18:25:24+00:00" 149 | }, 150 | { 151 | "name": "euskadi31/opengraph", 152 | "version": "v1.0.0", 153 | "source": { 154 | "type": "git", 155 | "url": "https://github.com/euskadi31/Opengraph.git", 156 | "reference": "35645b1bbe9309f8b46734e3eb8f01e6613aeb52" 157 | }, 158 | "dist": { 159 | "type": "zip", 160 | "url": "https://api.github.com/repos/euskadi31/Opengraph/zipball/35645b1bbe9309f8b46734e3eb8f01e6613aeb52", 161 | "reference": "35645b1bbe9309f8b46734e3eb8f01e6613aeb52", 162 | "shasum": "" 163 | }, 164 | "require": { 165 | "php": ">=5.3.0" 166 | }, 167 | "require-dev": { 168 | "atoum/atoum": "dev-master" 169 | }, 170 | "type": "library", 171 | "autoload": { 172 | "psr-0": { 173 | "Opengraph": "src/" 174 | } 175 | }, 176 | "notification-url": "https://packagist.org/downloads/", 177 | "license": [ 178 | "MIT" 179 | ], 180 | "authors": [ 181 | { 182 | "name": "Axel Etcheverry", 183 | "email": "axel@etcheverry.biz", 184 | "homepage": "http://www.axel-etcheverry.com" 185 | } 186 | ], 187 | "description": "A PHP 5.3+ framework for OpenGraph Protocol", 188 | "keywords": [ 189 | "OpenGraph Protocol", 190 | "og", 191 | "opengraph", 192 | "sdk" 193 | ], 194 | "support": { 195 | "issues": "https://github.com/euskadi31/Opengraph/issues", 196 | "source": "https://github.com/euskadi31/Opengraph/tree/master" 197 | }, 198 | "time": "2013-11-25T14:33:37+00:00" 199 | }, 200 | { 201 | "name": "ezyang/htmlpurifier", 202 | "version": "v4.13.0", 203 | "source": { 204 | "type": "git", 205 | "url": "https://github.com/ezyang/htmlpurifier.git", 206 | "reference": "08e27c97e4c6ed02f37c5b2b20488046c8d90d75" 207 | }, 208 | "dist": { 209 | "type": "zip", 210 | "url": "https://api.github.com/repos/ezyang/htmlpurifier/zipball/08e27c97e4c6ed02f37c5b2b20488046c8d90d75", 211 | "reference": "08e27c97e4c6ed02f37c5b2b20488046c8d90d75", 212 | "shasum": "" 213 | }, 214 | "require": { 215 | "php": ">=5.2" 216 | }, 217 | "require-dev": { 218 | "simpletest/simpletest": "dev-master#72de02a7b80c6bb8864ef9bf66d41d2f58f826bd" 219 | }, 220 | "type": "library", 221 | "autoload": { 222 | "psr-0": { 223 | "HTMLPurifier": "library/" 224 | }, 225 | "files": [ 226 | "library/HTMLPurifier.composer.php" 227 | ], 228 | "exclude-from-classmap": [ 229 | "/library/HTMLPurifier/Language/" 230 | ] 231 | }, 232 | "notification-url": "https://packagist.org/downloads/", 233 | "license": [ 234 | "LGPL-2.1-or-later" 235 | ], 236 | "authors": [ 237 | { 238 | "name": "Edward Z. Yang", 239 | "email": "admin@htmlpurifier.org", 240 | "homepage": "http://ezyang.com" 241 | } 242 | ], 243 | "description": "Standards compliant HTML filter written in PHP", 244 | "homepage": "http://htmlpurifier.org/", 245 | "keywords": [ 246 | "html" 247 | ], 248 | "support": { 249 | "issues": "https://github.com/ezyang/htmlpurifier/issues", 250 | "source": "https://github.com/ezyang/htmlpurifier/tree/master" 251 | }, 252 | "time": "2020-06-29T00:56:53+00:00" 253 | }, 254 | { 255 | "name": "guzzlehttp/guzzle", 256 | "version": "7.3.0", 257 | "source": { 258 | "type": "git", 259 | "url": "https://github.com/guzzle/guzzle.git", 260 | "reference": "7008573787b430c1c1f650e3722d9bba59967628" 261 | }, 262 | "dist": { 263 | "type": "zip", 264 | "url": "https://api.github.com/repos/guzzle/guzzle/zipball/7008573787b430c1c1f650e3722d9bba59967628", 265 | "reference": "7008573787b430c1c1f650e3722d9bba59967628", 266 | "shasum": "" 267 | }, 268 | "require": { 269 | "ext-json": "*", 270 | "guzzlehttp/promises": "^1.4", 271 | "guzzlehttp/psr7": "^1.7 || ^2.0", 272 | "php": "^7.2.5 || ^8.0", 273 | "psr/http-client": "^1.0" 274 | }, 275 | "provide": { 276 | "psr/http-client-implementation": "1.0" 277 | }, 278 | "require-dev": { 279 | "bamarni/composer-bin-plugin": "^1.4.1", 280 | "ext-curl": "*", 281 | "php-http/client-integration-tests": "^3.0", 282 | "phpunit/phpunit": "^8.5.5 || ^9.3.5", 283 | "psr/log": "^1.1" 284 | }, 285 | "suggest": { 286 | "ext-curl": "Required for CURL handler support", 287 | "ext-intl": "Required for Internationalized Domain Name (IDN) support", 288 | "psr/log": "Required for using the Log middleware" 289 | }, 290 | "type": "library", 291 | "extra": { 292 | "branch-alias": { 293 | "dev-master": "7.3-dev" 294 | } 295 | }, 296 | "autoload": { 297 | "psr-4": { 298 | "GuzzleHttp\\": "src/" 299 | }, 300 | "files": [ 301 | "src/functions_include.php" 302 | ] 303 | }, 304 | "notification-url": "https://packagist.org/downloads/", 305 | "license": [ 306 | "MIT" 307 | ], 308 | "authors": [ 309 | { 310 | "name": "Michael Dowling", 311 | "email": "mtdowling@gmail.com", 312 | "homepage": "https://github.com/mtdowling" 313 | }, 314 | { 315 | "name": "Márk Sági-Kazár", 316 | "email": "mark.sagikazar@gmail.com", 317 | "homepage": "https://sagikazarmark.hu" 318 | } 319 | ], 320 | "description": "Guzzle is a PHP HTTP client library", 321 | "homepage": "http://guzzlephp.org/", 322 | "keywords": [ 323 | "client", 324 | "curl", 325 | "framework", 326 | "http", 327 | "http client", 328 | "psr-18", 329 | "psr-7", 330 | "rest", 331 | "web service" 332 | ], 333 | "support": { 334 | "issues": "https://github.com/guzzle/guzzle/issues", 335 | "source": "https://github.com/guzzle/guzzle/tree/7.3.0" 336 | }, 337 | "funding": [ 338 | { 339 | "url": "https://github.com/GrahamCampbell", 340 | "type": "github" 341 | }, 342 | { 343 | "url": "https://github.com/Nyholm", 344 | "type": "github" 345 | }, 346 | { 347 | "url": "https://github.com/alexeyshockov", 348 | "type": "github" 349 | }, 350 | { 351 | "url": "https://github.com/gmponos", 352 | "type": "github" 353 | } 354 | ], 355 | "time": "2021-03-23T11:33:13+00:00" 356 | }, 357 | { 358 | "name": "guzzlehttp/promises", 359 | "version": "1.4.1", 360 | "source": { 361 | "type": "git", 362 | "url": "https://github.com/guzzle/promises.git", 363 | "reference": "8e7d04f1f6450fef59366c399cfad4b9383aa30d" 364 | }, 365 | "dist": { 366 | "type": "zip", 367 | "url": "https://api.github.com/repos/guzzle/promises/zipball/8e7d04f1f6450fef59366c399cfad4b9383aa30d", 368 | "reference": "8e7d04f1f6450fef59366c399cfad4b9383aa30d", 369 | "shasum": "" 370 | }, 371 | "require": { 372 | "php": ">=5.5" 373 | }, 374 | "require-dev": { 375 | "symfony/phpunit-bridge": "^4.4 || ^5.1" 376 | }, 377 | "type": "library", 378 | "extra": { 379 | "branch-alias": { 380 | "dev-master": "1.4-dev" 381 | } 382 | }, 383 | "autoload": { 384 | "psr-4": { 385 | "GuzzleHttp\\Promise\\": "src/" 386 | }, 387 | "files": [ 388 | "src/functions_include.php" 389 | ] 390 | }, 391 | "notification-url": "https://packagist.org/downloads/", 392 | "license": [ 393 | "MIT" 394 | ], 395 | "authors": [ 396 | { 397 | "name": "Michael Dowling", 398 | "email": "mtdowling@gmail.com", 399 | "homepage": "https://github.com/mtdowling" 400 | } 401 | ], 402 | "description": "Guzzle promises library", 403 | "keywords": [ 404 | "promise" 405 | ], 406 | "support": { 407 | "issues": "https://github.com/guzzle/promises/issues", 408 | "source": "https://github.com/guzzle/promises/tree/1.4.1" 409 | }, 410 | "time": "2021-03-07T09:25:29+00:00" 411 | }, 412 | { 413 | "name": "guzzlehttp/psr7", 414 | "version": "1.8.2", 415 | "source": { 416 | "type": "git", 417 | "url": "https://github.com/guzzle/psr7.git", 418 | "reference": "dc960a912984efb74d0a90222870c72c87f10c91" 419 | }, 420 | "dist": { 421 | "type": "zip", 422 | "url": "https://api.github.com/repos/guzzle/psr7/zipball/dc960a912984efb74d0a90222870c72c87f10c91", 423 | "reference": "dc960a912984efb74d0a90222870c72c87f10c91", 424 | "shasum": "" 425 | }, 426 | "require": { 427 | "php": ">=5.4.0", 428 | "psr/http-message": "~1.0", 429 | "ralouphie/getallheaders": "^2.0.5 || ^3.0.0" 430 | }, 431 | "provide": { 432 | "psr/http-message-implementation": "1.0" 433 | }, 434 | "require-dev": { 435 | "ext-zlib": "*", 436 | "phpunit/phpunit": "~4.8.36 || ^5.7.27 || ^6.5.14 || ^7.5.20 || ^8.5.8 || ^9.3.10" 437 | }, 438 | "suggest": { 439 | "laminas/laminas-httphandlerrunner": "Emit PSR-7 responses" 440 | }, 441 | "type": "library", 442 | "extra": { 443 | "branch-alias": { 444 | "dev-master": "1.7-dev" 445 | } 446 | }, 447 | "autoload": { 448 | "psr-4": { 449 | "GuzzleHttp\\Psr7\\": "src/" 450 | }, 451 | "files": [ 452 | "src/functions_include.php" 453 | ] 454 | }, 455 | "notification-url": "https://packagist.org/downloads/", 456 | "license": [ 457 | "MIT" 458 | ], 459 | "authors": [ 460 | { 461 | "name": "Michael Dowling", 462 | "email": "mtdowling@gmail.com", 463 | "homepage": "https://github.com/mtdowling" 464 | }, 465 | { 466 | "name": "Tobias Schultze", 467 | "homepage": "https://github.com/Tobion" 468 | } 469 | ], 470 | "description": "PSR-7 message implementation that also provides common utility methods", 471 | "keywords": [ 472 | "http", 473 | "message", 474 | "psr-7", 475 | "request", 476 | "response", 477 | "stream", 478 | "uri", 479 | "url" 480 | ], 481 | "support": { 482 | "issues": "https://github.com/guzzle/psr7/issues", 483 | "source": "https://github.com/guzzle/psr7/tree/1.8.2" 484 | }, 485 | "time": "2021-04-26T09:17:50+00:00" 486 | }, 487 | { 488 | "name": "mtdowling/jmespath.php", 489 | "version": "2.6.1", 490 | "source": { 491 | "type": "git", 492 | "url": "https://github.com/jmespath/jmespath.php.git", 493 | "reference": "9b87907a81b87bc76d19a7fb2d61e61486ee9edb" 494 | }, 495 | "dist": { 496 | "type": "zip", 497 | "url": "https://api.github.com/repos/jmespath/jmespath.php/zipball/9b87907a81b87bc76d19a7fb2d61e61486ee9edb", 498 | "reference": "9b87907a81b87bc76d19a7fb2d61e61486ee9edb", 499 | "shasum": "" 500 | }, 501 | "require": { 502 | "php": "^5.4 || ^7.0 || ^8.0", 503 | "symfony/polyfill-mbstring": "^1.17" 504 | }, 505 | "require-dev": { 506 | "composer/xdebug-handler": "^1.4 || ^2.0", 507 | "phpunit/phpunit": "^4.8.36 || ^7.5.15" 508 | }, 509 | "bin": [ 510 | "bin/jp.php" 511 | ], 512 | "type": "library", 513 | "extra": { 514 | "branch-alias": { 515 | "dev-master": "2.6-dev" 516 | } 517 | }, 518 | "autoload": { 519 | "psr-4": { 520 | "JmesPath\\": "src/" 521 | }, 522 | "files": [ 523 | "src/JmesPath.php" 524 | ] 525 | }, 526 | "notification-url": "https://packagist.org/downloads/", 527 | "license": [ 528 | "MIT" 529 | ], 530 | "authors": [ 531 | { 532 | "name": "Michael Dowling", 533 | "email": "mtdowling@gmail.com", 534 | "homepage": "https://github.com/mtdowling" 535 | } 536 | ], 537 | "description": "Declaratively specify how to extract elements from a JSON document", 538 | "keywords": [ 539 | "json", 540 | "jsonpath" 541 | ], 542 | "support": { 543 | "issues": "https://github.com/jmespath/jmespath.php/issues", 544 | "source": "https://github.com/jmespath/jmespath.php/tree/2.6.1" 545 | }, 546 | "time": "2021-06-14T00:11:39+00:00" 547 | }, 548 | { 549 | "name": "mustache/mustache", 550 | "version": "v2.13.0", 551 | "source": { 552 | "type": "git", 553 | "url": "https://github.com/bobthecow/mustache.php.git", 554 | "reference": "e95c5a008c23d3151d59ea72484d4f72049ab7f4" 555 | }, 556 | "dist": { 557 | "type": "zip", 558 | "url": "https://api.github.com/repos/bobthecow/mustache.php/zipball/e95c5a008c23d3151d59ea72484d4f72049ab7f4", 559 | "reference": "e95c5a008c23d3151d59ea72484d4f72049ab7f4", 560 | "shasum": "" 561 | }, 562 | "require": { 563 | "php": ">=5.2.4" 564 | }, 565 | "require-dev": { 566 | "friendsofphp/php-cs-fixer": "~1.11", 567 | "phpunit/phpunit": "~3.7|~4.0|~5.0" 568 | }, 569 | "type": "library", 570 | "autoload": { 571 | "psr-0": { 572 | "Mustache": "src/" 573 | } 574 | }, 575 | "notification-url": "https://packagist.org/downloads/", 576 | "license": [ 577 | "MIT" 578 | ], 579 | "authors": [ 580 | { 581 | "name": "Justin Hileman", 582 | "email": "justin@justinhileman.info", 583 | "homepage": "http://justinhileman.com" 584 | } 585 | ], 586 | "description": "A Mustache implementation in PHP.", 587 | "homepage": "https://github.com/bobthecow/mustache.php", 588 | "keywords": [ 589 | "mustache", 590 | "templating" 591 | ], 592 | "support": { 593 | "issues": "https://github.com/bobthecow/mustache.php/issues", 594 | "source": "https://github.com/bobthecow/mustache.php/tree/master" 595 | }, 596 | "time": "2019-11-23T21:40:31+00:00" 597 | }, 598 | { 599 | "name": "myclabs/php-enum", 600 | "version": "1.8.3", 601 | "source": { 602 | "type": "git", 603 | "url": "https://github.com/myclabs/php-enum.git", 604 | "reference": "b942d263c641ddb5190929ff840c68f78713e937" 605 | }, 606 | "dist": { 607 | "type": "zip", 608 | "url": "https://api.github.com/repos/myclabs/php-enum/zipball/b942d263c641ddb5190929ff840c68f78713e937", 609 | "reference": "b942d263c641ddb5190929ff840c68f78713e937", 610 | "shasum": "" 611 | }, 612 | "require": { 613 | "ext-json": "*", 614 | "php": "^7.3 || ^8.0" 615 | }, 616 | "require-dev": { 617 | "phpunit/phpunit": "^9.5", 618 | "squizlabs/php_codesniffer": "1.*", 619 | "vimeo/psalm": "^4.6.2" 620 | }, 621 | "type": "library", 622 | "autoload": { 623 | "psr-4": { 624 | "MyCLabs\\Enum\\": "src/" 625 | } 626 | }, 627 | "notification-url": "https://packagist.org/downloads/", 628 | "license": [ 629 | "MIT" 630 | ], 631 | "authors": [ 632 | { 633 | "name": "PHP Enum contributors", 634 | "homepage": "https://github.com/myclabs/php-enum/graphs/contributors" 635 | } 636 | ], 637 | "description": "PHP Enum implementation", 638 | "homepage": "http://github.com/myclabs/php-enum", 639 | "keywords": [ 640 | "enum" 641 | ], 642 | "support": { 643 | "issues": "https://github.com/myclabs/php-enum/issues", 644 | "source": "https://github.com/myclabs/php-enum/tree/1.8.3" 645 | }, 646 | "funding": [ 647 | { 648 | "url": "https://github.com/mnapoli", 649 | "type": "github" 650 | }, 651 | { 652 | "url": "https://tidelift.com/funding/github/packagist/myclabs/php-enum", 653 | "type": "tidelift" 654 | } 655 | ], 656 | "time": "2021-07-05T08:18:36+00:00" 657 | }, 658 | { 659 | "name": "paquettg/php-html-parser", 660 | "version": "3.1.1", 661 | "source": { 662 | "type": "git", 663 | "url": "https://github.com/paquettg/php-html-parser.git", 664 | "reference": "4e01a438ad5961cc2d7427eb9798d213c8a12629" 665 | }, 666 | "dist": { 667 | "type": "zip", 668 | "url": "https://api.github.com/repos/paquettg/php-html-parser/zipball/4e01a438ad5961cc2d7427eb9798d213c8a12629", 669 | "reference": "4e01a438ad5961cc2d7427eb9798d213c8a12629", 670 | "shasum": "" 671 | }, 672 | "require": { 673 | "ext-curl": "*", 674 | "ext-mbstring": "*", 675 | "ext-zlib": "*", 676 | "guzzlehttp/guzzle": "^7.0", 677 | "guzzlehttp/psr7": "^1.6", 678 | "myclabs/php-enum": "^1.7", 679 | "paquettg/string-encode": "~1.0.0", 680 | "php": ">=7.2", 681 | "php-http/httplug": "^2.1" 682 | }, 683 | "require-dev": { 684 | "friendsofphp/php-cs-fixer": "^2.16", 685 | "infection/infection": "^0.13.4", 686 | "mockery/mockery": "^1.2", 687 | "phan/phan": "^2.4", 688 | "phpunit/phpunit": "^7.5.1" 689 | }, 690 | "type": "library", 691 | "autoload": { 692 | "psr-4": { 693 | "PHPHtmlParser\\": "src/PHPHtmlParser" 694 | } 695 | }, 696 | "notification-url": "https://packagist.org/downloads/", 697 | "license": [ 698 | "MIT" 699 | ], 700 | "authors": [ 701 | { 702 | "name": "Gilles Paquette", 703 | "email": "paquettg@gmail.com", 704 | "homepage": "http://gillespaquette.ca" 705 | } 706 | ], 707 | "description": "An HTML DOM parser. It allows you to manipulate HTML. Find tags on an HTML page with selectors just like jQuery.", 708 | "homepage": "https://github.com/paquettg/php-html-parser", 709 | "keywords": [ 710 | "dom", 711 | "html", 712 | "parser" 713 | ], 714 | "support": { 715 | "issues": "https://github.com/paquettg/php-html-parser/issues", 716 | "source": "https://github.com/paquettg/php-html-parser/tree/3.1.1" 717 | }, 718 | "funding": [ 719 | { 720 | "url": "https://tidelift.com/funding/github/packagist/paquettg/php-html-parser", 721 | "type": "tidelift" 722 | } 723 | ], 724 | "time": "2020-11-01T20:34:43+00:00" 725 | }, 726 | { 727 | "name": "paquettg/string-encode", 728 | "version": "1.0.1", 729 | "source": { 730 | "type": "git", 731 | "url": "https://github.com/paquettg/string-encoder.git", 732 | "reference": "a8708e9fac9d5ddfc8fc2aac6004e2cd05d80fee" 733 | }, 734 | "dist": { 735 | "type": "zip", 736 | "url": "https://api.github.com/repos/paquettg/string-encoder/zipball/a8708e9fac9d5ddfc8fc2aac6004e2cd05d80fee", 737 | "reference": "a8708e9fac9d5ddfc8fc2aac6004e2cd05d80fee", 738 | "shasum": "" 739 | }, 740 | "require": { 741 | "php": ">=7.1" 742 | }, 743 | "require-dev": { 744 | "phpunit/phpunit": "^7.5.1" 745 | }, 746 | "type": "library", 747 | "autoload": { 748 | "psr-0": { 749 | "stringEncode": "src/" 750 | } 751 | }, 752 | "notification-url": "https://packagist.org/downloads/", 753 | "license": [ 754 | "MIT" 755 | ], 756 | "authors": [ 757 | { 758 | "name": "Gilles Paquette", 759 | "email": "paquettg@gmail.com", 760 | "homepage": "http://gillespaquette.ca" 761 | } 762 | ], 763 | "description": "Facilitating the process of altering string encoding in PHP.", 764 | "homepage": "https://github.com/paquettg/string-encoder", 765 | "keywords": [ 766 | "charset", 767 | "encoding", 768 | "string" 769 | ], 770 | "support": { 771 | "issues": "https://github.com/paquettg/string-encoder/issues", 772 | "source": "https://github.com/paquettg/string-encoder/tree/1.0.1" 773 | }, 774 | "time": "2018-12-21T02:25:09+00:00" 775 | }, 776 | { 777 | "name": "php-http/httplug", 778 | "version": "2.2.0", 779 | "source": { 780 | "type": "git", 781 | "url": "https://github.com/php-http/httplug.git", 782 | "reference": "191a0a1b41ed026b717421931f8d3bd2514ffbf9" 783 | }, 784 | "dist": { 785 | "type": "zip", 786 | "url": "https://api.github.com/repos/php-http/httplug/zipball/191a0a1b41ed026b717421931f8d3bd2514ffbf9", 787 | "reference": "191a0a1b41ed026b717421931f8d3bd2514ffbf9", 788 | "shasum": "" 789 | }, 790 | "require": { 791 | "php": "^7.1 || ^8.0", 792 | "php-http/promise": "^1.1", 793 | "psr/http-client": "^1.0", 794 | "psr/http-message": "^1.0" 795 | }, 796 | "require-dev": { 797 | "friends-of-phpspec/phpspec-code-coverage": "^4.1", 798 | "phpspec/phpspec": "^5.1 || ^6.0" 799 | }, 800 | "type": "library", 801 | "extra": { 802 | "branch-alias": { 803 | "dev-master": "2.x-dev" 804 | } 805 | }, 806 | "autoload": { 807 | "psr-4": { 808 | "Http\\Client\\": "src/" 809 | } 810 | }, 811 | "notification-url": "https://packagist.org/downloads/", 812 | "license": [ 813 | "MIT" 814 | ], 815 | "authors": [ 816 | { 817 | "name": "Eric GELOEN", 818 | "email": "geloen.eric@gmail.com" 819 | }, 820 | { 821 | "name": "Márk Sági-Kazár", 822 | "email": "mark.sagikazar@gmail.com", 823 | "homepage": "https://sagikazarmark.hu" 824 | } 825 | ], 826 | "description": "HTTPlug, the HTTP client abstraction for PHP", 827 | "homepage": "http://httplug.io", 828 | "keywords": [ 829 | "client", 830 | "http" 831 | ], 832 | "support": { 833 | "issues": "https://github.com/php-http/httplug/issues", 834 | "source": "https://github.com/php-http/httplug/tree/master" 835 | }, 836 | "time": "2020-07-13T15:43:23+00:00" 837 | }, 838 | { 839 | "name": "php-http/promise", 840 | "version": "1.1.0", 841 | "source": { 842 | "type": "git", 843 | "url": "https://github.com/php-http/promise.git", 844 | "reference": "4c4c1f9b7289a2ec57cde7f1e9762a5789506f88" 845 | }, 846 | "dist": { 847 | "type": "zip", 848 | "url": "https://api.github.com/repos/php-http/promise/zipball/4c4c1f9b7289a2ec57cde7f1e9762a5789506f88", 849 | "reference": "4c4c1f9b7289a2ec57cde7f1e9762a5789506f88", 850 | "shasum": "" 851 | }, 852 | "require": { 853 | "php": "^7.1 || ^8.0" 854 | }, 855 | "require-dev": { 856 | "friends-of-phpspec/phpspec-code-coverage": "^4.3.2", 857 | "phpspec/phpspec": "^5.1.2 || ^6.2" 858 | }, 859 | "type": "library", 860 | "extra": { 861 | "branch-alias": { 862 | "dev-master": "1.1-dev" 863 | } 864 | }, 865 | "autoload": { 866 | "psr-4": { 867 | "Http\\Promise\\": "src/" 868 | } 869 | }, 870 | "notification-url": "https://packagist.org/downloads/", 871 | "license": [ 872 | "MIT" 873 | ], 874 | "authors": [ 875 | { 876 | "name": "Joel Wurtz", 877 | "email": "joel.wurtz@gmail.com" 878 | }, 879 | { 880 | "name": "Márk Sági-Kazár", 881 | "email": "mark.sagikazar@gmail.com" 882 | } 883 | ], 884 | "description": "Promise used for asynchronous HTTP requests", 885 | "homepage": "http://httplug.io", 886 | "keywords": [ 887 | "promise" 888 | ], 889 | "support": { 890 | "issues": "https://github.com/php-http/promise/issues", 891 | "source": "https://github.com/php-http/promise/tree/1.1.0" 892 | }, 893 | "time": "2020-07-07T09:29:14+00:00" 894 | }, 895 | { 896 | "name": "phpoption/phpoption", 897 | "version": "1.8.0", 898 | "source": { 899 | "type": "git", 900 | "url": "https://github.com/schmittjoh/php-option.git", 901 | "reference": "5455cb38aed4523f99977c4a12ef19da4bfe2a28" 902 | }, 903 | "dist": { 904 | "type": "zip", 905 | "url": "https://api.github.com/repos/schmittjoh/php-option/zipball/5455cb38aed4523f99977c4a12ef19da4bfe2a28", 906 | "reference": "5455cb38aed4523f99977c4a12ef19da4bfe2a28", 907 | "shasum": "" 908 | }, 909 | "require": { 910 | "php": "^7.0 || ^8.0" 911 | }, 912 | "require-dev": { 913 | "bamarni/composer-bin-plugin": "^1.4.1", 914 | "phpunit/phpunit": "^6.5.14 || ^7.0.20 || ^8.5.19 || ^9.5.8" 915 | }, 916 | "type": "library", 917 | "extra": { 918 | "branch-alias": { 919 | "dev-master": "1.8-dev" 920 | } 921 | }, 922 | "autoload": { 923 | "psr-4": { 924 | "PhpOption\\": "src/PhpOption/" 925 | } 926 | }, 927 | "notification-url": "https://packagist.org/downloads/", 928 | "license": [ 929 | "Apache-2.0" 930 | ], 931 | "authors": [ 932 | { 933 | "name": "Johannes M. Schmitt", 934 | "email": "schmittjoh@gmail.com" 935 | }, 936 | { 937 | "name": "Graham Campbell", 938 | "email": "hello@gjcampbell.co.uk" 939 | } 940 | ], 941 | "description": "Option Type for PHP", 942 | "keywords": [ 943 | "language", 944 | "option", 945 | "php", 946 | "type" 947 | ], 948 | "support": { 949 | "issues": "https://github.com/schmittjoh/php-option/issues", 950 | "source": "https://github.com/schmittjoh/php-option/tree/1.8.0" 951 | }, 952 | "funding": [ 953 | { 954 | "url": "https://github.com/GrahamCampbell", 955 | "type": "github" 956 | }, 957 | { 958 | "url": "https://tidelift.com/funding/github/packagist/phpoption/phpoption", 959 | "type": "tidelift" 960 | } 961 | ], 962 | "time": "2021-08-28T21:27:29+00:00" 963 | }, 964 | { 965 | "name": "psr/http-client", 966 | "version": "1.0.1", 967 | "source": { 968 | "type": "git", 969 | "url": "https://github.com/php-fig/http-client.git", 970 | "reference": "2dfb5f6c5eff0e91e20e913f8c5452ed95b86621" 971 | }, 972 | "dist": { 973 | "type": "zip", 974 | "url": "https://api.github.com/repos/php-fig/http-client/zipball/2dfb5f6c5eff0e91e20e913f8c5452ed95b86621", 975 | "reference": "2dfb5f6c5eff0e91e20e913f8c5452ed95b86621", 976 | "shasum": "" 977 | }, 978 | "require": { 979 | "php": "^7.0 || ^8.0", 980 | "psr/http-message": "^1.0" 981 | }, 982 | "type": "library", 983 | "extra": { 984 | "branch-alias": { 985 | "dev-master": "1.0.x-dev" 986 | } 987 | }, 988 | "autoload": { 989 | "psr-4": { 990 | "Psr\\Http\\Client\\": "src/" 991 | } 992 | }, 993 | "notification-url": "https://packagist.org/downloads/", 994 | "license": [ 995 | "MIT" 996 | ], 997 | "authors": [ 998 | { 999 | "name": "PHP-FIG", 1000 | "homepage": "http://www.php-fig.org/" 1001 | } 1002 | ], 1003 | "description": "Common interface for HTTP clients", 1004 | "homepage": "https://github.com/php-fig/http-client", 1005 | "keywords": [ 1006 | "http", 1007 | "http-client", 1008 | "psr", 1009 | "psr-18" 1010 | ], 1011 | "support": { 1012 | "source": "https://github.com/php-fig/http-client/tree/master" 1013 | }, 1014 | "time": "2020-06-29T06:28:15+00:00" 1015 | }, 1016 | { 1017 | "name": "psr/http-message", 1018 | "version": "1.0.1", 1019 | "source": { 1020 | "type": "git", 1021 | "url": "https://github.com/php-fig/http-message.git", 1022 | "reference": "f6561bf28d520154e4b0ec72be95418abe6d9363" 1023 | }, 1024 | "dist": { 1025 | "type": "zip", 1026 | "url": "https://api.github.com/repos/php-fig/http-message/zipball/f6561bf28d520154e4b0ec72be95418abe6d9363", 1027 | "reference": "f6561bf28d520154e4b0ec72be95418abe6d9363", 1028 | "shasum": "" 1029 | }, 1030 | "require": { 1031 | "php": ">=5.3.0" 1032 | }, 1033 | "type": "library", 1034 | "extra": { 1035 | "branch-alias": { 1036 | "dev-master": "1.0.x-dev" 1037 | } 1038 | }, 1039 | "autoload": { 1040 | "psr-4": { 1041 | "Psr\\Http\\Message\\": "src/" 1042 | } 1043 | }, 1044 | "notification-url": "https://packagist.org/downloads/", 1045 | "license": [ 1046 | "MIT" 1047 | ], 1048 | "authors": [ 1049 | { 1050 | "name": "PHP-FIG", 1051 | "homepage": "http://www.php-fig.org/" 1052 | } 1053 | ], 1054 | "description": "Common interface for HTTP messages", 1055 | "homepage": "https://github.com/php-fig/http-message", 1056 | "keywords": [ 1057 | "http", 1058 | "http-message", 1059 | "psr", 1060 | "psr-7", 1061 | "request", 1062 | "response" 1063 | ], 1064 | "support": { 1065 | "source": "https://github.com/php-fig/http-message/tree/master" 1066 | }, 1067 | "time": "2016-08-06T14:39:51+00:00" 1068 | }, 1069 | { 1070 | "name": "ralouphie/getallheaders", 1071 | "version": "3.0.3", 1072 | "source": { 1073 | "type": "git", 1074 | "url": "https://github.com/ralouphie/getallheaders.git", 1075 | "reference": "120b605dfeb996808c31b6477290a714d356e822" 1076 | }, 1077 | "dist": { 1078 | "type": "zip", 1079 | "url": "https://api.github.com/repos/ralouphie/getallheaders/zipball/120b605dfeb996808c31b6477290a714d356e822", 1080 | "reference": "120b605dfeb996808c31b6477290a714d356e822", 1081 | "shasum": "" 1082 | }, 1083 | "require": { 1084 | "php": ">=5.6" 1085 | }, 1086 | "require-dev": { 1087 | "php-coveralls/php-coveralls": "^2.1", 1088 | "phpunit/phpunit": "^5 || ^6.5" 1089 | }, 1090 | "type": "library", 1091 | "autoload": { 1092 | "files": [ 1093 | "src/getallheaders.php" 1094 | ] 1095 | }, 1096 | "notification-url": "https://packagist.org/downloads/", 1097 | "license": [ 1098 | "MIT" 1099 | ], 1100 | "authors": [ 1101 | { 1102 | "name": "Ralph Khattar", 1103 | "email": "ralph.khattar@gmail.com" 1104 | } 1105 | ], 1106 | "description": "A polyfill for getallheaders.", 1107 | "support": { 1108 | "issues": "https://github.com/ralouphie/getallheaders/issues", 1109 | "source": "https://github.com/ralouphie/getallheaders/tree/develop" 1110 | }, 1111 | "time": "2019-03-08T08:55:37+00:00" 1112 | }, 1113 | { 1114 | "name": "simplepie/simplepie", 1115 | "version": "1.5.6", 1116 | "source": { 1117 | "type": "git", 1118 | "url": "https://github.com/simplepie/simplepie.git", 1119 | "reference": "1c68e14ca3ac84346b6e6fe3c5eedf725d0f92c6" 1120 | }, 1121 | "dist": { 1122 | "type": "zip", 1123 | "url": "https://api.github.com/repos/simplepie/simplepie/zipball/1c68e14ca3ac84346b6e6fe3c5eedf725d0f92c6", 1124 | "reference": "1c68e14ca3ac84346b6e6fe3c5eedf725d0f92c6", 1125 | "shasum": "" 1126 | }, 1127 | "require": { 1128 | "ext-pcre": "*", 1129 | "ext-xml": "*", 1130 | "ext-xmlreader": "*", 1131 | "php": ">=5.6.0" 1132 | }, 1133 | "require-dev": { 1134 | "phpunit/phpunit": "~5.4.3 || ~6.5" 1135 | }, 1136 | "suggest": { 1137 | "ext-curl": "", 1138 | "ext-iconv": "", 1139 | "ext-intl": "", 1140 | "ext-mbstring": "", 1141 | "mf2/mf2": "Microformat module that allows for parsing HTML for microformats" 1142 | }, 1143 | "type": "library", 1144 | "autoload": { 1145 | "psr-0": { 1146 | "SimplePie": "library" 1147 | } 1148 | }, 1149 | "notification-url": "https://packagist.org/downloads/", 1150 | "license": [ 1151 | "BSD-3-Clause" 1152 | ], 1153 | "authors": [ 1154 | { 1155 | "name": "Ryan Parman", 1156 | "homepage": "http://ryanparman.com/", 1157 | "role": "Creator, alumnus developer" 1158 | }, 1159 | { 1160 | "name": "Sam Sneddon", 1161 | "homepage": "https://gsnedders.com/", 1162 | "role": "Alumnus developer" 1163 | }, 1164 | { 1165 | "name": "Ryan McCue", 1166 | "email": "me@ryanmccue.info", 1167 | "homepage": "http://ryanmccue.info/", 1168 | "role": "Developer" 1169 | } 1170 | ], 1171 | "description": "A simple Atom/RSS parsing library for PHP", 1172 | "homepage": "http://simplepie.org/", 1173 | "keywords": [ 1174 | "atom", 1175 | "feeds", 1176 | "rss" 1177 | ], 1178 | "support": { 1179 | "issues": "https://github.com/simplepie/simplepie/issues", 1180 | "source": "https://github.com/simplepie/simplepie/tree/1.5.6" 1181 | }, 1182 | "time": "2020-10-14T07:17:22+00:00" 1183 | }, 1184 | { 1185 | "name": "symfony/polyfill-ctype", 1186 | "version": "v1.23.0", 1187 | "source": { 1188 | "type": "git", 1189 | "url": "https://github.com/symfony/polyfill-ctype.git", 1190 | "reference": "46cd95797e9df938fdd2b03693b5fca5e64b01ce" 1191 | }, 1192 | "dist": { 1193 | "type": "zip", 1194 | "url": "https://api.github.com/repos/symfony/polyfill-ctype/zipball/46cd95797e9df938fdd2b03693b5fca5e64b01ce", 1195 | "reference": "46cd95797e9df938fdd2b03693b5fca5e64b01ce", 1196 | "shasum": "" 1197 | }, 1198 | "require": { 1199 | "php": ">=7.1" 1200 | }, 1201 | "suggest": { 1202 | "ext-ctype": "For best performance" 1203 | }, 1204 | "type": "library", 1205 | "extra": { 1206 | "branch-alias": { 1207 | "dev-main": "1.23-dev" 1208 | }, 1209 | "thanks": { 1210 | "name": "symfony/polyfill", 1211 | "url": "https://github.com/symfony/polyfill" 1212 | } 1213 | }, 1214 | "autoload": { 1215 | "psr-4": { 1216 | "Symfony\\Polyfill\\Ctype\\": "" 1217 | }, 1218 | "files": [ 1219 | "bootstrap.php" 1220 | ] 1221 | }, 1222 | "notification-url": "https://packagist.org/downloads/", 1223 | "license": [ 1224 | "MIT" 1225 | ], 1226 | "authors": [ 1227 | { 1228 | "name": "Gert de Pagter", 1229 | "email": "BackEndTea@gmail.com" 1230 | }, 1231 | { 1232 | "name": "Symfony Community", 1233 | "homepage": "https://symfony.com/contributors" 1234 | } 1235 | ], 1236 | "description": "Symfony polyfill for ctype functions", 1237 | "homepage": "https://symfony.com", 1238 | "keywords": [ 1239 | "compatibility", 1240 | "ctype", 1241 | "polyfill", 1242 | "portable" 1243 | ], 1244 | "support": { 1245 | "source": "https://github.com/symfony/polyfill-ctype/tree/v1.23.0" 1246 | }, 1247 | "funding": [ 1248 | { 1249 | "url": "https://symfony.com/sponsor", 1250 | "type": "custom" 1251 | }, 1252 | { 1253 | "url": "https://github.com/fabpot", 1254 | "type": "github" 1255 | }, 1256 | { 1257 | "url": "https://tidelift.com/funding/github/packagist/symfony/symfony", 1258 | "type": "tidelift" 1259 | } 1260 | ], 1261 | "time": "2021-02-19T12:13:01+00:00" 1262 | }, 1263 | { 1264 | "name": "symfony/polyfill-mbstring", 1265 | "version": "v1.23.1", 1266 | "source": { 1267 | "type": "git", 1268 | "url": "https://github.com/symfony/polyfill-mbstring.git", 1269 | "reference": "9174a3d80210dca8daa7f31fec659150bbeabfc6" 1270 | }, 1271 | "dist": { 1272 | "type": "zip", 1273 | "url": "https://api.github.com/repos/symfony/polyfill-mbstring/zipball/9174a3d80210dca8daa7f31fec659150bbeabfc6", 1274 | "reference": "9174a3d80210dca8daa7f31fec659150bbeabfc6", 1275 | "shasum": "" 1276 | }, 1277 | "require": { 1278 | "php": ">=7.1" 1279 | }, 1280 | "suggest": { 1281 | "ext-mbstring": "For best performance" 1282 | }, 1283 | "type": "library", 1284 | "extra": { 1285 | "branch-alias": { 1286 | "dev-main": "1.23-dev" 1287 | }, 1288 | "thanks": { 1289 | "name": "symfony/polyfill", 1290 | "url": "https://github.com/symfony/polyfill" 1291 | } 1292 | }, 1293 | "autoload": { 1294 | "psr-4": { 1295 | "Symfony\\Polyfill\\Mbstring\\": "" 1296 | }, 1297 | "files": [ 1298 | "bootstrap.php" 1299 | ] 1300 | }, 1301 | "notification-url": "https://packagist.org/downloads/", 1302 | "license": [ 1303 | "MIT" 1304 | ], 1305 | "authors": [ 1306 | { 1307 | "name": "Nicolas Grekas", 1308 | "email": "p@tchwork.com" 1309 | }, 1310 | { 1311 | "name": "Symfony Community", 1312 | "homepage": "https://symfony.com/contributors" 1313 | } 1314 | ], 1315 | "description": "Symfony polyfill for the Mbstring extension", 1316 | "homepage": "https://symfony.com", 1317 | "keywords": [ 1318 | "compatibility", 1319 | "mbstring", 1320 | "polyfill", 1321 | "portable", 1322 | "shim" 1323 | ], 1324 | "support": { 1325 | "source": "https://github.com/symfony/polyfill-mbstring/tree/v1.23.1" 1326 | }, 1327 | "funding": [ 1328 | { 1329 | "url": "https://symfony.com/sponsor", 1330 | "type": "custom" 1331 | }, 1332 | { 1333 | "url": "https://github.com/fabpot", 1334 | "type": "github" 1335 | }, 1336 | { 1337 | "url": "https://tidelift.com/funding/github/packagist/symfony/symfony", 1338 | "type": "tidelift" 1339 | } 1340 | ], 1341 | "time": "2021-05-27T12:26:48+00:00" 1342 | }, 1343 | { 1344 | "name": "vlucas/phpdotenv", 1345 | "version": "v3.6.8", 1346 | "source": { 1347 | "type": "git", 1348 | "url": "https://github.com/vlucas/phpdotenv.git", 1349 | "reference": "5e679f7616db829358341e2d5cccbd18773bdab8" 1350 | }, 1351 | "dist": { 1352 | "type": "zip", 1353 | "url": "https://api.github.com/repos/vlucas/phpdotenv/zipball/5e679f7616db829358341e2d5cccbd18773bdab8", 1354 | "reference": "5e679f7616db829358341e2d5cccbd18773bdab8", 1355 | "shasum": "" 1356 | }, 1357 | "require": { 1358 | "php": "^5.4 || ^7.0 || ^8.0", 1359 | "phpoption/phpoption": "^1.5.2", 1360 | "symfony/polyfill-ctype": "^1.17" 1361 | }, 1362 | "require-dev": { 1363 | "ext-filter": "*", 1364 | "ext-pcre": "*", 1365 | "phpunit/phpunit": "^4.8.36 || ^5.7.27 || ^6.5.14 || ^7.5.20" 1366 | }, 1367 | "suggest": { 1368 | "ext-filter": "Required to use the boolean validator.", 1369 | "ext-pcre": "Required to use most of the library." 1370 | }, 1371 | "type": "library", 1372 | "extra": { 1373 | "branch-alias": { 1374 | "dev-master": "3.6-dev" 1375 | } 1376 | }, 1377 | "autoload": { 1378 | "psr-4": { 1379 | "Dotenv\\": "src/" 1380 | } 1381 | }, 1382 | "notification-url": "https://packagist.org/downloads/", 1383 | "license": [ 1384 | "BSD-3-Clause" 1385 | ], 1386 | "authors": [ 1387 | { 1388 | "name": "Graham Campbell", 1389 | "email": "graham@alt-three.com", 1390 | "homepage": "https://gjcampbell.co.uk/" 1391 | }, 1392 | { 1393 | "name": "Vance Lucas", 1394 | "email": "vance@vancelucas.com", 1395 | "homepage": "https://vancelucas.com/" 1396 | } 1397 | ], 1398 | "description": "Loads environment variables from `.env` to `getenv()`, `$_ENV` and `$_SERVER` automagically.", 1399 | "keywords": [ 1400 | "dotenv", 1401 | "env", 1402 | "environment" 1403 | ], 1404 | "support": { 1405 | "issues": "https://github.com/vlucas/phpdotenv/issues", 1406 | "source": "https://github.com/vlucas/phpdotenv/tree/v3.6.8" 1407 | }, 1408 | "funding": [ 1409 | { 1410 | "url": "https://github.com/GrahamCampbell", 1411 | "type": "github" 1412 | }, 1413 | { 1414 | "url": "https://tidelift.com/funding/github/packagist/vlucas/phpdotenv", 1415 | "type": "tidelift" 1416 | } 1417 | ], 1418 | "time": "2021-01-20T14:39:46+00:00" 1419 | } 1420 | ], 1421 | "packages-dev": [], 1422 | "aliases": [], 1423 | "minimum-stability": "stable", 1424 | "stability-flags": [], 1425 | "prefer-stable": false, 1426 | "prefer-lowest": false, 1427 | "platform": [], 1428 | "platform-dev": [], 1429 | "plugin-api-version": "2.1.0" 1430 | } 1431 | -------------------------------------------------------------------------------- /composer.phar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mauricesvay/FullHackerNews/73117bd1f81ddb163048e5828ee6da1edabd157e/composer.phar -------------------------------------------------------------------------------- /example.env: -------------------------------------------------------------------------------- 1 | # Copy this file to .env 2 | 3 | # Feed 4 | FEED_URL="http://news.ycombinator.com/rss" 5 | 6 | # AWS 7 | AWS_BUCKET="www.fullhn.com" 8 | AWS_ACCESS_KEY_ID="" 9 | AWS_SECRET_ACCESS_KEY="" 10 | -------------------------------------------------------------------------------- /index.php: -------------------------------------------------------------------------------- 1 | load(); 6 | 7 | require_once __DIR__ . "/src/Fetcher.php"; 8 | require_once __DIR__ . "/src/Parser.php"; 9 | require_once __DIR__ . "/src/Generator.php"; 10 | require_once __DIR__ . "/src/Uploader.php"; 11 | require_once __DIR__ . "/lib/ansi-color.php"; 12 | 13 | use PhpAnsiColor\Color; 14 | 15 | $out_folder = __DIR__ . '/www'; 16 | 17 | $feed = new SimplePie(); 18 | $feed->set_cache_duration(600); 19 | $feed->set_cache_location(__DIR__ . '/cache'); 20 | $feed->set_feed_url(getenv('FEED_URL')); 21 | $feed->init(); 22 | 23 | $articles = []; 24 | 25 | foreach ($feed->get_items() as $i => $item) { 26 | $parsed_url = parse_url($item->get_permalink()); 27 | $comment_tags = $item->get_item_tags('', 'comments'); 28 | $articles[] = [ 29 | 'index' => $i, 30 | 'url' => $item->get_permalink(), 31 | 'domain' => $parsed_url['host'], 32 | 'title' => $item->get_title(), 33 | 'comments' => count($comment_tags) ? $comment_tags[0]['data'] : '', 34 | ]; 35 | } 36 | 37 | foreach ($articles as $i => $article) { 38 | error_log("================================================================================"); 39 | error_log(Color::set($article['url'], "yellow")); 40 | error_log("title: " . $articles[$i]['title'] . " (" . $articles[$i]['domain'] . ")"); 41 | try { 42 | $articles[$i]['content'] = FullFeed\Fetcher::fetch($article['url']); 43 | $articles[$i]['parsed'] = FullFeed\Parser::parse($article['url'], $articles[$i]['content']); 44 | $articles[$i]['image'] = FullFeed\Parser::extractImage($articles[$i]['content']); 45 | } catch (Exception $e) { 46 | error_log(Color::set($e->getMessage(), "red")); 47 | $articles[$i]['content'] = ""; 48 | $articles[$i]['parsed'] = ""; 49 | $articles[$i]['image'] = ""; 50 | } 51 | error_log("comments: " . $articles[$i]['comments']); 52 | error_log("image: " . $articles[$i]['image']); 53 | error_log("content: " . strlen($articles[$i]['content'])); 54 | error_log("parsed: " . strlen($articles[$i]['parsed'])); 55 | } 56 | 57 | error_log("================================================================================"); 58 | error_log(Color::set("Uploading to S3", "yellow")); 59 | $out_index = FullFeed\Generator::renderTemplateWithArticles('index', $articles); 60 | file_put_contents($out_folder . '/index.html', $out_index); 61 | $out_latest = FullFeed\Generator::renderTemplateWithArticles('latest', $articles); 62 | file_put_contents($out_folder . '/latest.html', $out_latest); 63 | $manifest = FullFeed\Generator::generateManifest($out_folder, date('r')); 64 | file_put_contents($out_folder . '/cache.manifest', $manifest); 65 | 66 | FullFeed\Uploader::upload($out_folder); -------------------------------------------------------------------------------- /lib/FileSystemCache/.gitignore: -------------------------------------------------------------------------------- 1 | cache/ 2 | examples/cache/ 3 | vendor/ 4 | tests/cache/ 5 | -------------------------------------------------------------------------------- /lib/FileSystemCache/.travis.yml: -------------------------------------------------------------------------------- 1 | language: php 2 | php: 3 | - 5.4 4 | - 5.3 5 | before_script: mkdir tests/cache; mkdir tests/cache/test; mkdir tests/cache/test2; mkdir tests/cache/test/test; chmod -R 777 tests/cache; 6 | script: phpunit --coverage-text 7 | -------------------------------------------------------------------------------- /lib/FileSystemCache/README.md: -------------------------------------------------------------------------------- 1 | FileSystemCache 2 | =============== 3 | 4 | A simple PHP class for caching data in the filesystem. Major features include: 5 | 6 | * Support for TTL when storing data 7 | * Support for "Newer Than" parameter when retrieving data 8 | * Every call is an atomic operation with proper file locking 9 | * Can group cache keys together for easy invalidation 10 | * Composer support 11 | * PHPUnit tests 12 | 13 | [![Build Status](https://secure.travis-ci.org/jdorn/FileSystemCache.png?branch=master)](http://travis-ci.org/jdorn/FileSystemCache) 14 | 15 | Getting Started 16 | ------------------ 17 | FileSystemCache can be installed with Composer or downloaded manually. 18 | 19 | ### With Composer 20 | 21 | If you're already using Composer, just add `jdorn/file-system-cache` to your `composer.json` file. 22 | FileSystemCache works with Composer's autoloader out of the bat. 23 | ```js 24 | { 25 | "require": { 26 | "jdorn/file-system-cache": "dev-master" 27 | } 28 | } 29 | ``` 30 | 31 | ### Manually 32 | 33 | If you aren't using Composer, you just need to include `lib/FileSystemCache.php` in your script. 34 | 35 | ```php 36 | require_once("path/to/FileSystemCache.php"); 37 | ``` 38 | 39 | Setting the Cache Directory 40 | ----------------------- 41 | 42 | By default, all cached data is stored in the `cache` directory relative to the currently executing script. 43 | You can change this by setting the $cacheDir static property. 44 | 45 | ```php 46 | 1001, 65 | 'ip address'=>'10.1.1.1' 66 | ); 67 | 68 | //string 69 | $key_data = 'my_key'; 70 | 71 | //object 72 | $key_data = new SomeObject(); 73 | 74 | //number 75 | $key_data = 1005; 76 | 77 | 78 | //generate a key object 79 | $key = FileSystemCache::generateCacheKey($key_data); 80 | ``` 81 | 82 | You can group cache keys together to better organize your data and make invalidation easier. 83 | 84 | ```php 85 | 'is some data I want to cache', 116 | 'it'=>'can be a string, array, object, or number.' 117 | ); 118 | 119 | $key = FileSystemCache::generateCacheKey('mykey'); 120 | 121 | FileSystemCache::store($key, $data); 122 | ``` 123 | 124 | If you want the data to expire automatically after a set amount of time, use the optional `ttl` parameter. 125 | 126 | ```php 127 | // Expire automatically after 1 hour (3600 seconds) 128 | FileSystemCache::store($key, $data, 3600); 129 | ``` 130 | 131 | Retrieve 132 | -------------------- 133 | You retrieve data using the same cache key you used to store it. `False` will be returned if the data was not cached or expired. 134 | 135 | ```php 136 | $data = FileSystemCache::retrieve($key); 137 | 138 | // If there was a cache miss 139 | if($data === false) { 140 | ... 141 | } 142 | ``` 143 | 144 | You can specify a `newer than` timestamp to only retrieve cached data that was stored after a certain time. 145 | This is useful for storing a compiled version of a source file. 146 | 147 | ```php 148 | $file = 'source_file.txt'; 149 | $modified = filemtime($file); 150 | 151 | $key = FileSystemCache::generateCacheKey($file); 152 | 153 | $data = FileSystemCache::retrieve($key, $modified); 154 | 155 | // If there was a cache miss 156 | if($data === false) { 157 | ... 158 | } 159 | ``` 160 | 161 | Get and Modify 162 | ------------------ 163 | There is an atomic `Get and Modify` method as well. 164 | 165 | ```php 166 | FileSystemCache::getAndModify($key, function($value) { 167 | $value->count++; 168 | 169 | return $value; 170 | }); 171 | ``` 172 | 173 | If the data was originally cached with a TTL, you can pass `true` as the 3rd parameter to resset the TTL. 174 | Otherwise, it will be based on the original time it was stored. 175 | 176 | 177 | Invalidate 178 | ------------------- 179 | You can invalidate a single cache key or a group of cache keys. 180 | 181 | ```php 182 | FileSystemCache::invalidate($key); 183 | 184 | FileSystemCache::invalidateGroup('mygroup'); 185 | ``` 186 | 187 | Invalidating a group is done recursively by default and all sub-groups will also be invalidated. 188 | If you pass `false` as the 2nd parameter, you can make it non-recursive. 189 | 190 | ```php 191 | FileSystemCache::invalidateGroup('mygroup', false); 192 | ``` 193 | 194 | Running the Tests 195 | ------------------ 196 | You need PHPUnit installed to run the tests. Configuration is defined in `phpunit.xml.dist`. Running the tests is easy: 197 | 198 | ``` 199 | phpunit 200 | ``` 201 | -------------------------------------------------------------------------------- /lib/FileSystemCache/composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "jdorn/file-system-cache", 3 | "description": "an easy way to cache data in the file system", 4 | "homepage": "https://github.com/jdorn/FileSystemCache/", 5 | "keywords": ["cache", "file system"], 6 | "minimum-stability": "dev", 7 | "license": "LGPL", 8 | "type": "library", 9 | "require": { 10 | "php": ">=5.3.0" 11 | }, 12 | "authors": [ 13 | { 14 | "name": "Jeremy Dorn", 15 | "email": "jeremy@jeremydorn.com", 16 | "homepage": "http://jeremydorn.com/" 17 | } 18 | ], 19 | "autoload": { 20 | "classmap": ["lib"] 21 | }, 22 | "extra": { 23 | "branch-alias": { 24 | "dev-master": "1.0.x-dev" 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /lib/FileSystemCache/lib/FileSystemCache.php: -------------------------------------------------------------------------------- 1 | 7 | * @copyright 2012 Jeremy Dorn 8 | * @license http://www.opensource.org/licenses/lgpl-license.php LGPL 9 | * @link http://github.com/jdorn/FileSystemCache 10 | * @version 1.0.0 11 | */ 12 | class FileSystemCache { 13 | /** 14 | * The root cache directory. Everything will be cached relative to this directory. 15 | * @var string 16 | */ 17 | public static $cacheDir = 'cache'; 18 | 19 | /** 20 | * Generates a cache key to use with store, retrieve, getAndModify, and invalidate 21 | * @param mixed $key_data Unique data that identifies the key. Can be a string, array, number, or object. 22 | * @param String $group An optional group to put the cache key in. Must be in the format "groupname" or "groupname/subgroupname". 23 | * @return FileSystemCacheKey The cache key object. 24 | */ 25 | public static function generateCacheKey($key_data, $group=null) { 26 | return new FileSystemCacheKey($key_data,$group); 27 | } 28 | 29 | /** 30 | * Stores data in the cache 31 | * @param FileSystemCacheKey $key The cache key 32 | * @param mixed $data The data to store (will be serialized before storing) 33 | * @param int $ttl The number of seconds until the cache expires. (optional) 34 | * @return boolean True on success, false on failure 35 | */ 36 | public static function store(FileSystemCacheKey $key, $data, $ttl=null) { 37 | $filename = $key->getFileName(); 38 | 39 | $data = new FileSystemCacheValue($key,$data,$ttl); 40 | 41 | $fh = self::getFileHandle($filename,'c'); 42 | 43 | if(!$fh) return false; 44 | 45 | if(!self::putContents($fh,$data)) return false; 46 | 47 | return true; 48 | } 49 | 50 | /** 51 | * Retrieve data from cache 52 | * @param FileSystemCacheKey $key The cache key 53 | * @param int $newer_than If passed, only return if the cached value was created after this time 54 | * @return mixed The cached data or FALSE if not found or expired 55 | */ 56 | public static function retrieve(FileSystemCacheKey $key, $newer_than=null) { 57 | $filename = $key->getFileName(); 58 | 59 | if(!file_exists($filename)) return false; 60 | 61 | //if cached data is not newer than $newer_than 62 | if($newer_than && filemtime($filename) < $newer_than) return false; 63 | 64 | $fh = self::getFileHandle($filename,'r'); 65 | if(!$fh) return false; 66 | 67 | $data = self::getContents($fh,$key); 68 | if(!$data) return false; 69 | 70 | 71 | self::closeFile($fh); 72 | return $data->value; 73 | } 74 | 75 | /** 76 | * Atomically retrieve data from cache, modify it, and store it back 77 | * @param FileSystemCacheKey $key The cache key 78 | * @param Closure $callback A closure function to modify the cache value. 79 | * Takes the old value as an argument and returns new value. 80 | * If this function returns false, the cached value will be invalidated. 81 | * @param bool $resetTtl If set to true, the expiration date will be recalculated using the previous TTL 82 | * @return mixed The new value if it was stored successfully or false if it wasn't 83 | * @throws Exception If an invalid callback method is given 84 | */ 85 | public static function getAndModify(FileSystemCacheKey $key, Closure $callback, $resetTtl=false) { 86 | $filename = $key->getFileName(); 87 | 88 | if(!file_exists($filename)) return false; 89 | 90 | //open a file handle 91 | $fh = self::getFileHandle($filename,'c+'); 92 | if(!$fh) return false; 93 | 94 | //get the data 95 | $data = self::getContents($fh,$key); 96 | if(!$data) return false; 97 | 98 | //get new value from callback function 99 | $old_value = $data->value; 100 | $data->value = $callback($data->value); 101 | 102 | //if the callback function returns false 103 | if($data->value === false) { 104 | self::closeFile($fh); 105 | 106 | //delete the cache file 107 | self::invalidate($key); 108 | return false; 109 | } 110 | 111 | //if value didn't change 112 | if(!$resetTtl && $data->value === $old_value) { 113 | self::closeFile($fh); 114 | return $data->value; 115 | } 116 | 117 | //if we're resetting the ttl to now 118 | if($resetTtl) { 119 | $data->created = time(); 120 | if($data->ttl) { 121 | $data->expires = $data->created + $data->ttl; 122 | } 123 | } 124 | 125 | if(!self::emptyFile($fh)) return false; 126 | 127 | //write contents and close the file handle 128 | self::putContents($fh,$data); 129 | 130 | //return the new value after modifying 131 | return $data->value; 132 | } 133 | 134 | /** 135 | * Invalidate a specific cache key 136 | * @param FileSystemCacheKey $key The cache key 137 | * @return boolean True on success. Currently never returns false. 138 | */ 139 | public static function invalidate(FileSystemCacheKey $key) { 140 | $filename = $key->getFileName(); 141 | if(file_exists($filename)) { 142 | unlink($filename); 143 | } 144 | return true; 145 | } 146 | 147 | /** 148 | * Invalidate a group of cache keys 149 | * @param string $name The name of the group to invalidate (e.g. 'groupname', 'groupname/subgroupname', etc.). If null, the entire cache will be invalidated. 150 | * @param boolean $recursive If set to false, none of the subgroups will be invalidated. 151 | * @throws Exception If an invalid group name is given 152 | */ 153 | public static function invalidateGroup($name=null, $recursive=true) { 154 | //if invalidating a group, make sure it's valid 155 | if($name) { 156 | //it needs to have a trailing slash and no leading slashes 157 | $name = trim($name,'/').'/'; 158 | 159 | //make sure the key isn't going up a directory 160 | if(strpos($name,'..') !== false) { 161 | throw new Exception("Invalidate path cannot go up directories."); 162 | } 163 | } 164 | 165 | array_map("unlink", glob(self::$cacheDir.'/'.$name.'*.cache')); 166 | 167 | //if recursively invalidating 168 | if($recursive) { 169 | $subdirs = glob(self::$cacheDir.'/'.$name.'*',GLOB_ONLYDIR); 170 | 171 | foreach($subdirs as $dir) { 172 | $dir = basename($dir); 173 | 174 | //skip all subdirectories that start with '.' 175 | if($dir[0] == '.') continue; 176 | 177 | self::invalidateGroup($name.$dir,true); 178 | } 179 | } 180 | } 181 | 182 | 183 | /** 184 | * Get a file handle from a file name. Will create the directory if it doesn't exist already. Also, automatically locks the file with the proper read or write lock. 185 | * @param String $filename The full file path. 186 | * @param String $mode The file mode. Accepted modes are 'c', 'c+', and 'r'. 187 | * @return resource The file handle 188 | */ 189 | private static function getFileHandle($filename, $mode='c') { 190 | $write = in_array($mode,array('c','c+')); 191 | 192 | if($write) { 193 | //make sure the directory exists and is writable 194 | $directory = dirname($filename); 195 | if(!file_exists($directory)) { 196 | if(!mkdir($directory,0777,true)) { 197 | return false; 198 | } 199 | } 200 | elseif(!is_dir($directory)) { 201 | return false; 202 | } 203 | elseif(!is_writable($directory)) { 204 | return false; 205 | } 206 | } 207 | 208 | //get file pointer 209 | $fh = fopen($filename,$mode); 210 | 211 | if(!$fh) return false; 212 | 213 | //lock file with appropriate lock type 214 | if($write) { 215 | if(!flock($fh,LOCK_EX)) { 216 | self::closeFile($fh); 217 | return false; 218 | } 219 | } 220 | else { 221 | if(!flock($fh,LOCK_SH)) { 222 | self::closeFile($fh); 223 | return false; 224 | } 225 | } 226 | 227 | return $fh; 228 | } 229 | 230 | /** 231 | * Empties a file. If empty fails, the file will be closed and it will return false. 232 | * @param resource $fh The file handle 233 | * @return boolean true for success, false for failure 234 | */ 235 | private static function emptyFile($fh) { 236 | rewind($fh); 237 | if(!ftruncate($fh,0)) { 238 | //release lock 239 | self::closeFile($fh); 240 | return false; 241 | } 242 | else { 243 | return true; 244 | } 245 | } 246 | 247 | /** 248 | * Closes a file. Also releases any locks on the file. 249 | * @param resource $fh The file handle 250 | */ 251 | private static function closeFile($fh) { 252 | flock($fh,LOCK_UN); 253 | fclose($fh); 254 | } 255 | 256 | /** 257 | * Returns the contents of a cache file. If the data is not in the right form or expired, it will be invalidated. 258 | * @param resource $fh The file handle 259 | * @param FileSystemCacheKey $key The cache key. This is used to invalidate the key when the data is expired. 260 | * @return boolean|FileSystemCacheValue FALSE if something went wrong or the data is expired. Otherwise, a FileSystemCacheValue object will be returned. 261 | */ 262 | private static function getContents($fh,FileSystemCacheKey $key) { 263 | //get the existing file contents 264 | $contents = stream_get_contents($fh); 265 | $data = @unserialize($contents); 266 | 267 | //if we can't unserialize the data or if the data is expired 268 | if(!$data || !($data instanceof FileSystemCacheValue) || $data->isExpired()) { 269 | //release lock 270 | self::closeFile($fh); 271 | 272 | //delete the cache file so we don't try to retrieve it again 273 | self::invalidate($key); 274 | 275 | return false; 276 | } 277 | 278 | return $data; 279 | } 280 | 281 | /** 282 | * Writes to a file. Also closes and releases any locks on the file. 283 | * @param resource $fh The file handle 284 | * @param FileSystemCacheValue $data The cache value to store in the file. 285 | * @return boolean True on success. Currently, never returns false. 286 | */ 287 | private static function putContents($fh,FileSystemCacheValue $data) { 288 | fwrite($fh,serialize($data)); 289 | fflush($fh); 290 | 291 | //release lock 292 | self::closeFile($fh); 293 | 294 | return true; 295 | } 296 | } 297 | 298 | /** 299 | * Class that represents a cache key. 300 | * Most of the time, you would get a FileSystemCacheKey object from FileSystemCache::generateCacheKey(); 301 | */ 302 | class FileSystemCacheKey { 303 | /** 304 | * @var mixed The key data used to generate the cache key 305 | */ 306 | public $key; 307 | /** 308 | * @var string The group (if any) that the key will be stored in. Can be null. 309 | */ 310 | public $group; 311 | 312 | /** 313 | * Creates a FileSystemCacheKey object 314 | * @param mixed $key Key data that will be used to generate a cache key 315 | * @param string $group The group (if any) that the key will be stored in. Can be null. 316 | */ 317 | public function __construct($key,$group) { 318 | $this->key = $key; 319 | $this->group = $group; 320 | } 321 | 322 | /** 323 | * Returns the generated cache key. 324 | * Non-string key data will be serialized and hashed 325 | * @return string The generated cache key. 326 | */ 327 | public function __toString() { 328 | $key = $this->key; 329 | 330 | //convert arrays and objects into strings 331 | if(!is_string($key)) { 332 | $key = serialize($key); 333 | } 334 | 335 | //if we can't use the key directly, md5 it 336 | if(preg_match('/[^a-zA-Z0-9_\-\.]/',$key)) { 337 | $key = md5($key); 338 | } 339 | 340 | //if it contains a group 341 | if($this->group) { 342 | //sanitize the group part 343 | $parts = explode('/',$this->group); 344 | foreach($parts as $i=>&$part) { 345 | $part = preg_replace('/[^a-zA-Z0-9_\-]/','',$part); 346 | 347 | if(!$part) unset($parts[$i]); 348 | } 349 | 350 | $group = implode('/',$parts); 351 | 352 | $key = $group.'/'.$key; 353 | } 354 | 355 | return $key; 356 | } 357 | 358 | /** 359 | * Returns the full path to the cache file for this key. 360 | * @return string The full path to the cache file for this key. 361 | */ 362 | public function getFileName() { 363 | return FileSystemCache::$cacheDir . '/' . $this->__toString() . '.cache'; 364 | } 365 | } 366 | 367 | /** 368 | * This class represents the actual data stored in the cache file. 369 | * You should never need to use this class directly. 370 | */ 371 | class FileSystemCacheValue { 372 | /** 373 | * @var FileSystemCacheKey The cache key the file is stored under. 374 | */ 375 | public $key; 376 | /** 377 | * @var mixed The value being cached 378 | */ 379 | public $value; 380 | /** 381 | * @var int The max number of seconds to store the data. If null, the data won't expire. 382 | */ 383 | public $ttl; 384 | /** 385 | * @var int The timestamp of when the data will expire. If null, the data won't expire. 386 | */ 387 | public $expires; 388 | /** 389 | * @var int The timestamp of when the value was created. 390 | */ 391 | public $created; 392 | 393 | /** 394 | * Creates a FileSystemCacheValue object. 395 | * @param FileSystemCacheKey $key The cache key the file is stored under. 396 | * @param mixed $value The data being stored 397 | * @param int $ttl The timestamp of when the data will expire. If null, the data won't expire. 398 | */ 399 | public function __construct($key,$value,$ttl = null) { 400 | $this->key = $key; 401 | $this->value = $value; 402 | $this->ttl = $ttl; 403 | $this->created = time(); 404 | 405 | if($ttl) $this->expires = $this->created + $ttl; 406 | else $this->expires = null; 407 | } 408 | 409 | /** 410 | * Checks if a value is expired 411 | * @return bool True if the value is expired. False if it is not. 412 | */ 413 | public function isExpired() { 414 | //value doesn't expire 415 | if(!$this->expires) return false; 416 | 417 | //if it is after the expire time 418 | return time() > $this->expires; 419 | } 420 | } 421 | -------------------------------------------------------------------------------- /lib/FileSystemCache/phpunit.xml.dist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | ./tests 7 | 8 | 9 | 10 | 11 | 12 | ./ 13 | 14 | ./tests 15 | ./vendor 16 | ./examples 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /lib/FileSystemCache/tests/FileSystemCacheTest.php: -------------------------------------------------------------------------------- 1 | assertInstanceOf('FileSystemCacheKey', $key); 13 | } 14 | 15 | /** 16 | * @dataProvider dataProvider 17 | */ 18 | function testStoreDataTypes($data) { 19 | $key = FileSystemCache::generateCacheKey('mytestkey'); 20 | 21 | FileSystemCache::invalidate($key); 22 | 23 | $this->assertFalse(FileSystemCache::retrieve($key)); 24 | 25 | FileSystemCache::store($key, $data); 26 | 27 | $this->assertEquals($data, FileSystemCache::retrieve($key)); 28 | 29 | FileSystemCache::invalidate($key); 30 | 31 | $this->assertFalse(FileSystemCache::retrieve($key)); 32 | } 33 | 34 | /** 35 | * @dataProvider keyDataProvider 36 | */ 37 | function testStore($key_data, $group) { 38 | $key = FileSystemCache::generateCacheKey($key_data, $group); 39 | 40 | $data = 'test'.microtime(true); 41 | 42 | FileSystemCache::invalidate($key); 43 | 44 | $this->assertFalse(FileSystemCache::retrieve($key)); 45 | 46 | FileSystemCache::store($key, $data); 47 | 48 | $this->assertEquals($data, FileSystemCache::retrieve($key)); 49 | 50 | FileSystemCache::invalidate($key); 51 | 52 | $this->assertFalse(FileSystemCache::retrieve($key)); 53 | } 54 | 55 | function testStoreTtl() { 56 | $key = FileSystemCache::generateCacheKey('ttl test'); 57 | $data = 'test ttl '.microtime(true); 58 | 59 | FileSystemCache::invalidate($key); 60 | 61 | $this->assertFalse(FileSystemCache::retrieve($key)); 62 | 63 | FileSystemCache::store($key, $data, 1); 64 | 65 | $this->assertEquals($data, FileSystemCache::retrieve($key)); 66 | 67 | sleep(2); 68 | 69 | $this->assertFalse(FileSystemCache::retrieve($key)); 70 | } 71 | 72 | function testRetrieveNewerThan() { 73 | $key = FileSystemCache::generateCacheKey('newer than test'); 74 | $data = 'test newer than data'; 75 | FileSystemCache::store($key, $data); 76 | 77 | $this->assertFalse(FileSystemCache::retrieve($key, time() + 5)); 78 | $this->assertEquals($data, FileSystemCache::retrieve($key, time() - 5)); 79 | 80 | FileSystemCache::invalidate($key); 81 | $this->assertFalse(FileSystemCache::retrieve($key)); 82 | } 83 | 84 | function testGetAndModifyReturnFalse() { 85 | $key = FileSystemCache::generateCacheKey('get and modify key'); 86 | $data = 'get and modify data'; 87 | 88 | FileSystemCache::store($key, $data, 1); 89 | $this->assertEquals($data, FileSystemCache::retrieve($key)); 90 | 91 | FileSystemCache::getAndModify($key, function($value) { 92 | return false; 93 | }); 94 | 95 | $this->assertFalse(FileSystemCache::retrieve($key)); 96 | } 97 | 98 | function testGetAndModify() { 99 | $key = FileSystemCache::generateCacheKey('get and modify key'); 100 | $data = 'get and modify data'; 101 | 102 | FileSystemCache::store($key, $data, 1); 103 | $this->assertEquals($data, FileSystemCache::retrieve($key)); 104 | 105 | FileSystemCache::getAndModify($key, function($value) { 106 | $value .= 'test'; 107 | return $value; 108 | }); 109 | 110 | $this->assertEquals($data.'test', FileSystemCache::retrieve($key)); 111 | 112 | sleep(2); 113 | 114 | $this->assertFalse(FileSystemCache::retrieve($key)); 115 | } 116 | 117 | function testGetAndModifyResetTtl() { 118 | $key = FileSystemCache::generateCacheKey('get and modify reset ttl key'); 119 | $data = 'get and modify reset ttl data'; 120 | 121 | FileSystemCache::store($key, $data, 3); 122 | sleep(2); 123 | // At this point, the key expires in 1 seconds 124 | $this->assertEquals($data, FileSystemCache::retrieve($key)); 125 | 126 | FileSystemCache::getAndModify($key, function($value) { 127 | $value .= 'test'; 128 | return $value; 129 | }, true); 130 | 131 | sleep(2); 132 | 133 | // The original expiration has hit, but getAndModify should have extended it 134 | $this->assertEquals($data.'test', FileSystemCache::retrieve($key)); 135 | 136 | sleep(2); 137 | 138 | $this->assertFalse(FileSystemCache::retrieve($key)); 139 | } 140 | 141 | function testGetAndModifyUnchanged() { 142 | $key = FileSystemCache::generateCacheKey('get and modify unchanged'); 143 | $data = 'get and modify unchanged'; 144 | 145 | FileSystemCache::store($key, $data); 146 | 147 | $return = FileSystemCache::getAndModify($key, function($value) { 148 | return $value; 149 | }); 150 | 151 | $this->assertEquals($data, $return); 152 | 153 | $this->assertEquals($data, FileSystemCache::retrieve($key)); 154 | } 155 | 156 | /** 157 | * @expectedException Exception 158 | */ 159 | function testHackedGroupInvalidation() { 160 | FileSystemCache::invalidateGroup('this/../../is/a/hack'); 161 | } 162 | 163 | function testGroupInvalidation() { 164 | $key_root = FileSystemCache::generateCacheKey('mykey'); 165 | $key_group1 = FileSystemCache::generateCacheKey('mykey1','test'); 166 | $key_group2 = FileSystemCache::generateCacheKey('mykey2','test'); 167 | $key_sub = FileSystemCache::generateCacheKey('mykey','test/test'); 168 | $key_other = FileSystemCache::generateCacheKey('mykey','test2'); 169 | 170 | $data = 'group invalidation'; 171 | 172 | FileSystemCache::store($key_root, $data); 173 | FileSystemCache::store($key_group1, $data); 174 | FileSystemCache::store($key_group2, $data); 175 | FileSystemCache::store($key_sub, $data); 176 | FileSystemCache::store($key_other, $data); 177 | 178 | $this->assertEquals($data, FileSystemCache::retrieve($key_root)); 179 | $this->assertEquals($data, FileSystemCache::retrieve($key_group1)); 180 | $this->assertEquals($data, FileSystemCache::retrieve($key_group2)); 181 | $this->assertEquals($data, FileSystemCache::retrieve($key_sub)); 182 | $this->assertEquals($data, FileSystemCache::retrieve($key_other)); 183 | 184 | FileSystemCache::invalidateGroup('test', false); 185 | 186 | $this->assertEquals($data, FileSystemCache::retrieve($key_root)); 187 | $this->assertFalse(FileSystemCache::retrieve($key_group1)); 188 | $this->assertFalse(FileSystemCache::retrieve($key_group2)); 189 | $this->assertEquals($data, FileSystemCache::retrieve($key_sub)); 190 | $this->assertEquals($data, FileSystemCache::retrieve($key_other)); 191 | 192 | FileSystemCache::invalidate($key_root); 193 | FileSystemCache::invalidate($key_sub); 194 | FileSystemCache::invalidate($key_other); 195 | 196 | $this->assertFalse(FileSystemCache::retrieve($key_root)); 197 | $this->assertFalse(FileSystemCache::retrieve($key_sub)); 198 | $this->assertFalse(FileSystemCache::retrieve($key_other)); 199 | } 200 | 201 | 202 | function testGroupInvalidationRecursive() { 203 | $key_root = FileSystemCache::generateCacheKey('mykey'); 204 | $key_group1 = FileSystemCache::generateCacheKey('mykey1','test'); 205 | $key_group2 = FileSystemCache::generateCacheKey('mykey2','test'); 206 | $key_sub = FileSystemCache::generateCacheKey('mykey','test/test'); 207 | $key_other = FileSystemCache::generateCacheKey('mykey','test2'); 208 | 209 | $data = 'group invalidation recursive'; 210 | 211 | FileSystemCache::store($key_root, $data); 212 | FileSystemCache::store($key_group1, $data); 213 | FileSystemCache::store($key_group2, $data); 214 | FileSystemCache::store($key_sub, $data); 215 | FileSystemCache::store($key_other, $data); 216 | 217 | $this->assertEquals($data, FileSystemCache::retrieve($key_root)); 218 | $this->assertEquals($data, FileSystemCache::retrieve($key_group1)); 219 | $this->assertEquals($data, FileSystemCache::retrieve($key_group2)); 220 | $this->assertEquals($data, FileSystemCache::retrieve($key_sub)); 221 | $this->assertEquals($data, FileSystemCache::retrieve($key_other)); 222 | 223 | FileSystemCache::invalidateGroup('test'); 224 | 225 | $this->assertEquals($data, FileSystemCache::retrieve($key_root)); 226 | $this->assertFalse(FileSystemCache::retrieve($key_group1)); 227 | $this->assertFalse(FileSystemCache::retrieve($key_group2)); 228 | $this->assertFalse(FileSystemCache::retrieve($key_sub)); 229 | $this->assertEquals($data, FileSystemCache::retrieve($key_other)); 230 | 231 | FileSystemCache::invalidate($key_root); 232 | FileSystemCache::invalidate($key_other); 233 | 234 | $this->assertFalse(FileSystemCache::retrieve($key_root)); 235 | $this->assertFalse(FileSystemCache::retrieve($key_other)); 236 | } 237 | 238 | function keyProvider() { 239 | return array( 240 | array(FileSystemCache::generateCacheKey('mykey')), 241 | array(FileSystemCache::generateCacheKey('mykey','test')), 242 | array(FileSystemCache::generateCacheKey('mykey','test/test')), 243 | ); 244 | } 245 | 246 | function keyDataProvider() { 247 | $data = $this->dataProvider(); 248 | $groups = $this->groupProvider(); 249 | 250 | $keys = array(); 251 | foreach($data as $key_data) { 252 | foreach($groups as $group) { 253 | $keys[] = array( 254 | $key_data[0], 255 | $group[0] 256 | ); 257 | } 258 | } 259 | 260 | return $keys; 261 | } 262 | function dataProvider() { 263 | $temp = new DateTime(); 264 | 265 | return array( 266 | array(99), 267 | array('string'), 268 | array(array('an','array','with'=>'data')), 269 | array( $temp ) 270 | ); 271 | } 272 | function groupProvider() { 273 | return array( 274 | array(null), 275 | array('test'), 276 | array('test/test') 277 | ); 278 | } 279 | } 280 | -------------------------------------------------------------------------------- /lib/ansi-color.php: -------------------------------------------------------------------------------- 1 | 0, 30 | "bold" => 1, 31 | "italic" => 3, 32 | "underline" => 4, 33 | "blink" => 5, 34 | "inverse" => 7, 35 | "hidden" => 8, 36 | "black" => 30, 37 | "red" => 31, 38 | "green" => 32, 39 | "yellow" => 33, 40 | "blue" => 34, 41 | "magenta" => 35, 42 | "cyan" => 36, 43 | "white" => 37, 44 | "black_bg" => 40, 45 | "red_bg" => 41, 46 | "green_bg" => 42, 47 | "yellow_bg" => 43, 48 | "blue_bg" => 44, 49 | "magenta_bg" => 45, 50 | "cyan_bg" => 46, 51 | "white_bg" => 47 52 | ); 53 | 54 | public static function set($str, $color) 55 | { 56 | $color_attrs = explode("+", $color); 57 | $ansi_str = ""; 58 | foreach ($color_attrs as $attr) { 59 | $ansi_str .= "\033[" . self::$ANSI_CODES[$attr] . "m"; 60 | } 61 | $ansi_str .= $str . "\033[" . self::$ANSI_CODES["off"] . "m"; 62 | return $ansi_str; 63 | } 64 | 65 | public static function log($message, $color) 66 | { 67 | error_log(self::set($message, $color)); 68 | } 69 | 70 | public static function replace($full_text, $search_regexp, $color) 71 | { 72 | $new_text = preg_replace_callback( 73 | "/($search_regexp)/", 74 | function ($matches) use ($color) { 75 | return Color::set($matches[1], $color); 76 | }, 77 | $full_text 78 | ); 79 | return is_null($new_text) ? $full_text : $new_text; 80 | } 81 | } 82 | 83 | -------------------------------------------------------------------------------- /lib/fivefilters-php-readability/JSLikeHTMLElement.php: -------------------------------------------------------------------------------- 1 | registerNodeClass('DOMElement', 'JSLikeHTMLElement'); 16 | * $doc->loadHTML('

Para 1

Para 2

'); 17 | * $elem = $doc->getElementsByTagName('div')->item(0); 18 | * 19 | * // print innerHTML 20 | * echo $elem->innerHTML; // prints '

Para 1

Para 2

' 21 | * echo "\n\n"; 22 | * 23 | * // set innerHTML 24 | * $elem->innerHTML = 'FiveFilters.org'; 25 | * echo $elem->innerHTML; // prints 'FiveFilters.org' 26 | * echo "\n\n"; 27 | * 28 | * // print document (with our changes) 29 | * echo $doc->saveXML(); 30 | * @endcode 31 | * 32 | * @author Keyvan Minoukadeh - http://www.keyvan.net - keyvan@keyvan.net 33 | * @see http://fivefilters.org (the project this was written for) 34 | */ 35 | class JSLikeHTMLElement extends DOMElement 36 | { 37 | /** 38 | * Used for setting innerHTML like it's done in JavaScript: 39 | * @code 40 | * $div->innerHTML = '

Chapter 2

The story begins...

'; 41 | * @endcode 42 | */ 43 | public function __set($name, $value) { 44 | if ($name == 'innerHTML') { 45 | // first, empty the element 46 | for ($x=$this->childNodes->length-1; $x>=0; $x--) { 47 | $this->removeChild($this->childNodes->item($x)); 48 | } 49 | // $value holds our new inner HTML 50 | if ($value != '') { 51 | $f = $this->ownerDocument->createDocumentFragment(); 52 | // appendXML() expects well-formed markup (XHTML) 53 | $result = @$f->appendXML($value); // @ to suppress PHP warnings 54 | if ($result) { 55 | if ($f->hasChildNodes()) $this->appendChild($f); 56 | } else { 57 | // $value is probably ill-formed 58 | $f = new DOMDocument(); 59 | $value = mb_convert_encoding($value, 'HTML-ENTITIES', 'UTF-8'); 60 | // Using will generate a warning, but so will bad HTML 61 | // (and by this point, bad HTML is what we've got). 62 | // We use it (and suppress the warning) because an HTML fragment will 63 | // be wrapped around tags which we don't really want to keep. 64 | // Note: despite the warning, if loadHTML succeeds it will return true. 65 | $result = @$f->loadHTML(''.$value.''); 66 | if ($result) { 67 | $import = $f->getElementsByTagName('htmlfragment')->item(0); 68 | foreach ($import->childNodes as $child) { 69 | $importedNode = $this->ownerDocument->importNode($child, true); 70 | $this->appendChild($importedNode); 71 | } 72 | } else { 73 | // oh well, we tried, we really did. :( 74 | // this element is now empty 75 | } 76 | } 77 | } 78 | } else { 79 | $trace = debug_backtrace(); 80 | trigger_error('Undefined property via __set(): '.$name.' in '.$trace[0]['file'].' on line '.$trace[0]['line'], E_USER_NOTICE); 81 | } 82 | } 83 | 84 | /** 85 | * Used for getting innerHTML like it's done in JavaScript: 86 | * @code 87 | * $string = $div->innerHTML; 88 | * @endcode 89 | */ 90 | public function __get($name) 91 | { 92 | if ($name == 'innerHTML') { 93 | $inner = ''; 94 | foreach ($this->childNodes as $child) { 95 | $inner .= $this->ownerDocument->saveXML($child); 96 | } 97 | return $inner; 98 | } 99 | 100 | $trace = debug_backtrace(); 101 | trigger_error('Undefined property via __get(): '.$name.' in '.$trace[0]['file'].' on line '.$trace[0]['line'], E_USER_NOTICE); 102 | return null; 103 | } 104 | 105 | public function __toString() 106 | { 107 | return '['.$this->tagName.']'; 108 | } 109 | } -------------------------------------------------------------------------------- /lib/fivefilters-php-readability/README.md: -------------------------------------------------------------------------------- 1 | PHP Readability 2 | ================ 3 | 4 | This is a PHP port of Arc90's original Javascript version of Readability. (Arc90 has since relaunched the project.) 5 | 6 | For instructions on how to use this, please see 7 | 8 | For a more flexible and robust solution to article extraction, take a look at [Full-Text RSS](http://fivefilters.org/content-only/) - it makes use of PHP Readability, but offers much more. 9 | 10 | Feel free to fork this and change/improve it. I would love to see your results. Please do share them and I'll consider pulling them in. 11 | 12 | PHP Readability is licensed under the Apache License, Version 2.0 (the same license as the original JS version). The original Javascript version can be found here: (readability.js) 13 | 14 | ### Simple example 15 | 16 | cleanRepair(); 36 | $html = $tidy->value; 37 | } 38 | 39 | // give it to Readability 40 | $readability = new Readability($html, $url); 41 | // print debug output? 42 | // useful to compare against Arc90's original JS version - 43 | // simply click the bookmarklet with FireBug's console window open 44 | $readability->debug = false; 45 | // convert links to footnotes? 46 | $readability->convertLinksToFootnotes = true; 47 | // process it 48 | $result = $readability->init(); 49 | // does it look like we found what we wanted? 50 | if ($result) { 51 | echo "== Title =====================================\n"; 52 | echo $readability->getTitle()->textContent, "\n\n"; 53 | echo "== Body ======================================\n"; 54 | $content = $readability->getContent()->innerHTML; 55 | // if we've got Tidy, let's clean it up for output 56 | if (function_exists('tidy_parse_string')) { 57 | $tidy = tidy_parse_string($content, array('indent'=>true, 'show-body-only' => true), 'UTF8'); 58 | $tidy->cleanRepair(); 59 | $content = $tidy->value; 60 | } 61 | echo $content; 62 | } else { 63 | echo 'Looks like we couldn\'t find the content. :('; 64 | } -------------------------------------------------------------------------------- /lib/fivefilters-php-readability/Readability.php: -------------------------------------------------------------------------------- 1 | init(); 64 | echo $r->articleContent->innerHTML; 65 | */ 66 | 67 | class Readability 68 | { 69 | public $version = '1.7.1-without-multi-page'; 70 | public $convertLinksToFootnotes = false; 71 | public $revertForcedParagraphElements = true; 72 | public $articleTitle; 73 | public $articleContent; 74 | public $dom; 75 | public $url = null; // optional - URL where HTML was retrieved 76 | public $debug = false; 77 | public $lightClean = true; // preserves more content (experimental) added 2012-09-19 78 | protected $body = null; // 79 | protected $bodyCache = null; // Cache the body HTML in case we need to re-use it later 80 | protected $flags = 7; // 1 | 2 | 4; // Start with all flags set. 81 | protected $success = false; // indicates whether we were able to extract or not 82 | 83 | /** 84 | * All of the regular expressions in use within readability. 85 | * Defined up here so we don't instantiate them repeatedly in loops. 86 | **/ 87 | public $regexps = array( 88 | 'unlikelyCandidates' => '/combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup/i', 89 | 'okMaybeItsACandidate' => '/and|article|body|column|main|shadow/i', 90 | 'positive' => '/article|body|content|entry|hentry|main|page|attachment|pagination|post|text|blog|story/i', 91 | 'negative' => '/combx|comment|com-|contact|foot|footer|_nav|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget/i', 92 | 'divToPElements' => '/<(a|blockquote|dl|div|img|ol|p|pre|table|ul)/i', 93 | 'replaceBrs' => '/(]*>[ \n\r\t]*){2,}/i', 94 | 'replaceFonts' => '/<(\/?)font[^>]*>/i', 95 | // 'trimRe' => '/^\s+|\s+$/g', // PHP has trim() 96 | 'normalize' => '/\s{2,}/', 97 | 'killBreaks' => '/((\s| ?)*){1,}/', 98 | 'video' => '!//(player\.|www\.)?(youtube|vimeo|viddler)\.com!i', 99 | 'skipFootnoteLink' => '/^\s*(\[?[a-z0-9]{1,2}\]?|^|edit|citation needed)\s*$/i' 100 | ); 101 | 102 | /* constants */ 103 | const FLAG_STRIP_UNLIKELYS = 1; 104 | const FLAG_WEIGHT_CLASSES = 2; 105 | const FLAG_CLEAN_CONDITIONALLY = 4; 106 | 107 | /** 108 | * Create instance of Readability 109 | * @param string UTF-8 encoded string 110 | * @param string (optional) URL associated with HTML (used for footnotes) 111 | * @param string which parser to use for turning raw HTML into a DOMDocument (either 'libxml' or 'html5lib') 112 | */ 113 | function __construct($html, $url=null, $parser='libxml') 114 | { 115 | $this->url = $url; 116 | /* Turn all double br's into p's */ 117 | $html = preg_replace($this->regexps['replaceBrs'], '

', $html); 118 | $html = preg_replace($this->regexps['replaceFonts'], '<$1span>', $html); 119 | $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); 120 | if (trim($html) == '') $html = ''; 121 | if ($parser=='html5lib' && ($this->dom = HTML5_Parser::parse($html))) { 122 | // all good 123 | } else { 124 | $this->dom = new DOMDocument(); 125 | $this->dom->preserveWhiteSpace = false; 126 | @$this->dom->loadHTML($html); 127 | } 128 | $this->dom->registerNodeClass('DOMElement', 'JSLikeHTMLElement'); 129 | } 130 | 131 | /** 132 | * Get article title element 133 | * @return DOMElement 134 | */ 135 | public function getTitle() { 136 | return $this->articleTitle; 137 | } 138 | 139 | /** 140 | * Get article content element 141 | * @return DOMElement 142 | */ 143 | public function getContent() { 144 | return $this->articleContent; 145 | } 146 | 147 | /** 148 | * Runs readability. 149 | * 150 | * Workflow: 151 | * 1. Prep the document by removing script tags, css, etc. 152 | * 2. Build readability's DOM tree. 153 | * 3. Grab the article content from the current dom tree. 154 | * 4. Replace the current DOM tree with the new one. 155 | * 5. Read peacefully. 156 | * 157 | * @return boolean true if we found content, false otherwise 158 | **/ 159 | public function init() 160 | { 161 | if (!isset($this->dom->documentElement)) return false; 162 | $this->removeScripts($this->dom); 163 | //die($this->getInnerHTML($this->dom->documentElement)); 164 | 165 | // Assume successful outcome 166 | $this->success = true; 167 | 168 | $bodyElems = $this->dom->getElementsByTagName('body'); 169 | if ($bodyElems->length > 0) { 170 | if ($this->bodyCache == null) { 171 | $this->bodyCache = $bodyElems->item(0)->innerHTML; 172 | } 173 | if ($this->body == null) { 174 | $this->body = $bodyElems->item(0); 175 | } 176 | } 177 | 178 | $this->prepDocument(); 179 | 180 | //die($this->dom->documentElement->parentNode->nodeType); 181 | //$this->setInnerHTML($this->dom->documentElement, $this->getInnerHTML($this->dom->documentElement)); 182 | //die($this->getInnerHTML($this->dom->documentElement)); 183 | 184 | /* Build readability's DOM tree */ 185 | $overlay = $this->dom->createElement('div'); 186 | $innerDiv = $this->dom->createElement('div'); 187 | $articleTitle = $this->getArticleTitle(); 188 | $articleContent = $this->grabArticle(); 189 | 190 | if (!$articleContent) { 191 | $this->success = false; 192 | $articleContent = $this->dom->createElement('div'); 193 | $articleContent->setAttribute('id', 'readability-content'); 194 | $articleContent->innerHTML = '

Sorry, Readability was unable to parse this page for content.

'; 195 | } 196 | 197 | $overlay->setAttribute('id', 'readOverlay'); 198 | $innerDiv->setAttribute('id', 'readInner'); 199 | 200 | /* Glue the structure of our document together. */ 201 | $innerDiv->appendChild($articleTitle); 202 | $innerDiv->appendChild($articleContent); 203 | $overlay->appendChild($innerDiv); 204 | 205 | /* Clear the old HTML, insert the new content. */ 206 | $this->body->innerHTML = ''; 207 | $this->body->appendChild($overlay); 208 | //document.body.insertBefore(overlay, document.body.firstChild); 209 | $this->body->removeAttribute('style'); 210 | 211 | $this->postProcessContent($articleContent); 212 | 213 | // Set title and content instance variables 214 | $this->articleTitle = $articleTitle; 215 | $this->articleContent = $articleContent; 216 | 217 | return $this->success; 218 | } 219 | 220 | /** 221 | * Debug 222 | */ 223 | protected function dbg($msg) { 224 | if ($this->debug) echo '* ',$msg, "\n"; 225 | } 226 | 227 | /** 228 | * Run any post-process modifications to article content as necessary. 229 | * 230 | * @param DOMElement 231 | * @return void 232 | */ 233 | public function postProcessContent($articleContent) { 234 | if ($this->convertLinksToFootnotes && !preg_match('/wikipedia\.org/', @$this->url)) { 235 | $this->addFootnotes($articleContent); 236 | } 237 | } 238 | 239 | /** 240 | * Get the article title as an H1. 241 | * 242 | * @return DOMElement 243 | */ 244 | protected function getArticleTitle() { 245 | $curTitle = ''; 246 | $origTitle = ''; 247 | 248 | try { 249 | $curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0)); 250 | } catch(Exception $e) {} 251 | 252 | if (preg_match('/ [\|\-] /', $curTitle)) 253 | { 254 | $curTitle = preg_replace('/(.*)[\|\-] .*/i', '$1', $origTitle); 255 | 256 | if (count(explode(' ', $curTitle)) < 3) { 257 | $curTitle = preg_replace('/[^\|\-]*[\|\-](.*)/i', '$1', $origTitle); 258 | } 259 | } 260 | else if (strpos($curTitle, ': ') !== false) 261 | { 262 | $curTitle = preg_replace('/.*:(.*)/i', '$1', $origTitle); 263 | 264 | if (count(explode(' ', $curTitle)) < 3) { 265 | $curTitle = preg_replace('/[^:]*[:](.*)/i','$1', $origTitle); 266 | } 267 | } 268 | else if(strlen($curTitle) > 150 || strlen($curTitle) < 15) 269 | { 270 | $hOnes = $this->dom->getElementsByTagName('h1'); 271 | if($hOnes->length == 1) 272 | { 273 | $curTitle = $this->getInnerText($hOnes->item(0)); 274 | } 275 | } 276 | 277 | $curTitle = trim($curTitle); 278 | 279 | if (count(explode(' ', $curTitle)) <= 4) { 280 | $curTitle = $origTitle; 281 | } 282 | 283 | $articleTitle = $this->dom->createElement('h1'); 284 | $articleTitle->innerHTML = $curTitle; 285 | 286 | return $articleTitle; 287 | } 288 | 289 | /** 290 | * Prepare the HTML document for readability to scrape it. 291 | * This includes things like stripping javascript, CSS, and handling terrible markup. 292 | * 293 | * @return void 294 | **/ 295 | protected function prepDocument() { 296 | /** 297 | * In some cases a body element can't be found (if the HTML is totally hosed for example) 298 | * so we create a new body node and append it to the document. 299 | */ 300 | if ($this->body == null) 301 | { 302 | $this->body = $this->dom->createElement('body'); 303 | $this->dom->documentElement->appendChild($this->body); 304 | } 305 | $this->body->setAttribute('id', 'readabilityBody'); 306 | 307 | /* Remove all style tags in head */ 308 | $styleTags = $this->dom->getElementsByTagName('style'); 309 | for ($i = $styleTags->length-1; $i >= 0; $i--) 310 | { 311 | $styleTags->item($i)->parentNode->removeChild($styleTags->item($i)); 312 | } 313 | 314 | /* Turn all double br's into p's */ 315 | /* Note, this is pretty costly as far as processing goes. Maybe optimize later. */ 316 | //document.body.innerHTML = document.body.innerHTML.replace(readability.regexps.replaceBrs, '

').replace(readability.regexps.replaceFonts, '<$1span>'); 317 | // We do this in the constructor for PHP as that's when we have raw HTML - before parsing it into a DOM tree. 318 | // Manipulating innerHTML as it's done in JS is not possible in PHP. 319 | } 320 | 321 | /** 322 | * For easier reading, convert this document to have footnotes at the bottom rather than inline links. 323 | * @see http://www.roughtype.com/archives/2010/05/experiments_in.php 324 | * 325 | * @return void 326 | **/ 327 | public function addFootnotes($articleContent) { 328 | $footnotesWrapper = $this->dom->createElement('div'); 329 | $footnotesWrapper->setAttribute('id', 'readability-footnotes'); 330 | $footnotesWrapper->innerHTML = '

References

'; 331 | 332 | $articleFootnotes = $this->dom->createElement('ol'); 333 | $articleFootnotes->setAttribute('id', 'readability-footnotes-list'); 334 | $footnotesWrapper->appendChild($articleFootnotes); 335 | 336 | $articleLinks = $articleContent->getElementsByTagName('a'); 337 | 338 | $linkCount = 0; 339 | for ($i = 0; $i < $articleLinks->length; $i++) 340 | { 341 | $articleLink = $articleLinks->item($i); 342 | $footnoteLink = $articleLink->cloneNode(true); 343 | $refLink = $this->dom->createElement('a'); 344 | $footnote = $this->dom->createElement('li'); 345 | $linkDomain = @parse_url($footnoteLink->getAttribute('href'), PHP_URL_HOST); 346 | if (!$linkDomain && isset($this->url)) $linkDomain = @parse_url($this->url, PHP_URL_HOST); 347 | //linkDomain = footnoteLink.host ? footnoteLink.host : document.location.host, 348 | $linkText = $this->getInnerText($articleLink); 349 | 350 | if ((strpos($articleLink->getAttribute('class'), 'readability-DoNotFootnote') !== false) || preg_match($this->regexps['skipFootnoteLink'], $linkText)) { 351 | continue; 352 | } 353 | 354 | $linkCount++; 355 | 356 | /** Add a superscript reference after the article link */ 357 | $refLink->setAttribute('href', '#readabilityFootnoteLink-' . $linkCount); 358 | $refLink->innerHTML = '[' . $linkCount . ']'; 359 | $refLink->setAttribute('class', 'readability-DoNotFootnote'); 360 | $refLink->setAttribute('style', 'color: inherit;'); 361 | 362 | //TODO: does this work or should we use DOMNode.isSameNode()? 363 | if ($articleLink->parentNode->lastChild == $articleLink) { 364 | $articleLink->parentNode->appendChild($refLink); 365 | } else { 366 | $articleLink->parentNode->insertBefore($refLink, $articleLink->nextSibling); 367 | } 368 | 369 | $articleLink->setAttribute('style', 'color: inherit; text-decoration: none;'); 370 | $articleLink->setAttribute('name', 'readabilityLink-' . $linkCount); 371 | 372 | $footnote->innerHTML = '^ '; 373 | 374 | $footnoteLink->innerHTML = ($footnoteLink->getAttribute('title') != '' ? $footnoteLink->getAttribute('title') : $linkText); 375 | $footnoteLink->setAttribute('name', 'readabilityFootnoteLink-' . $linkCount); 376 | 377 | $footnote->appendChild($footnoteLink); 378 | if ($linkDomain) $footnote->innerHTML = $footnote->innerHTML . ' (' . $linkDomain . ')'; 379 | 380 | $articleFootnotes->appendChild($footnote); 381 | } 382 | 383 | if ($linkCount > 0) { 384 | $articleContent->appendChild($footnotesWrapper); 385 | } 386 | } 387 | 388 | /** 389 | * Reverts P elements with class 'readability-styled' 390 | * to text nodes - which is what they were before. 391 | * 392 | * @param DOMElement 393 | * @return void 394 | */ 395 | function revertReadabilityStyledElements($articleContent) { 396 | $xpath = new DOMXPath($articleContent->ownerDocument); 397 | $elems = $xpath->query('.//p[@class="readability-styled"]', $articleContent); 398 | //$elems = $articleContent->getElementsByTagName('p'); 399 | for ($i = $elems->length-1; $i >= 0; $i--) { 400 | $e = $elems->item($i); 401 | $e->parentNode->replaceChild($articleContent->ownerDocument->createTextNode($e->textContent), $e); 402 | //if ($e->hasAttribute('class') && $e->getAttribute('class') == 'readability-styled') { 403 | // $e->parentNode->replaceChild($this->dom->createTextNode($e->textContent), $e); 404 | //} 405 | } 406 | } 407 | 408 | /** 409 | * Prepare the article node for display. Clean out any inline styles, 410 | * iframes, forms, strip extraneous

tags, etc. 411 | * 412 | * @param DOMElement 413 | * @return void 414 | */ 415 | function prepArticle($articleContent) { 416 | $this->cleanStyles($articleContent); 417 | $this->killBreaks($articleContent); 418 | if ($this->revertForcedParagraphElements) { 419 | $this->revertReadabilityStyledElements($articleContent); 420 | } 421 | 422 | /* Clean out junk from the article content */ 423 | $this->cleanConditionally($articleContent, 'form'); 424 | $this->clean($articleContent, 'object'); 425 | $this->clean($articleContent, 'h1'); 426 | 427 | /** 428 | * If there is only one h2, they are probably using it 429 | * as a header and not a subheader, so remove it since we already have a header. 430 | ***/ 431 | if (!$this->lightClean && ($articleContent->getElementsByTagName('h2')->length == 1)) { 432 | $this->clean($articleContent, 'h2'); 433 | } 434 | $this->clean($articleContent, 'iframe'); 435 | 436 | $this->cleanHeaders($articleContent); 437 | 438 | /* Do these last as the previous stuff may have removed junk that will affect these */ 439 | $this->cleanConditionally($articleContent, 'table'); 440 | $this->cleanConditionally($articleContent, 'ul'); 441 | $this->cleanConditionally($articleContent, 'div'); 442 | 443 | /* Remove extra paragraphs */ 444 | $articleParagraphs = $articleContent->getElementsByTagName('p'); 445 | for ($i = $articleParagraphs->length-1; $i >= 0; $i--) 446 | { 447 | $imgCount = $articleParagraphs->item($i)->getElementsByTagName('img')->length; 448 | $embedCount = $articleParagraphs->item($i)->getElementsByTagName('embed')->length; 449 | $objectCount = $articleParagraphs->item($i)->getElementsByTagName('object')->length; 450 | $iframeCount = $articleParagraphs->item($i)->getElementsByTagName('iframe')->length; 451 | 452 | if ($imgCount === 0 && $embedCount === 0 && $objectCount === 0 && $iframeCount === 0 && $this->getInnerText($articleParagraphs->item($i), false) == '') 453 | { 454 | $articleParagraphs->item($i)->parentNode->removeChild($articleParagraphs->item($i)); 455 | } 456 | } 457 | 458 | try { 459 | $articleContent->innerHTML = preg_replace('/]*>\s*

innerHTML); 460 | //articleContent.innerHTML = articleContent.innerHTML.replace(/]*>\s*

dbg("Cleaning innerHTML of breaks failed. This is an IE strict-block-elements bug. Ignoring.: " . $e); 464 | } 465 | } 466 | 467 | /** 468 | * Initialize a node with the readability object. Also checks the 469 | * className/id for special names to add to its score. 470 | * 471 | * @param Element 472 | * @return void 473 | **/ 474 | protected function initializeNode($node) { 475 | $readability = $this->dom->createAttribute('readability'); 476 | $readability->value = 0; // this is our contentScore 477 | $node->setAttributeNode($readability); 478 | 479 | switch (strtoupper($node->tagName)) { // unsure if strtoupper is needed, but using it just in case 480 | case 'DIV': 481 | $readability->value += 5; 482 | break; 483 | 484 | case 'PRE': 485 | case 'TD': 486 | case 'BLOCKQUOTE': 487 | $readability->value += 3; 488 | break; 489 | 490 | case 'ADDRESS': 491 | case 'OL': 492 | case 'UL': 493 | case 'DL': 494 | case 'DD': 495 | case 'DT': 496 | case 'LI': 497 | case 'FORM': 498 | $readability->value -= 3; 499 | break; 500 | 501 | case 'H1': 502 | case 'H2': 503 | case 'H3': 504 | case 'H4': 505 | case 'H5': 506 | case 'H6': 507 | case 'TH': 508 | $readability->value -= 5; 509 | break; 510 | } 511 | $readability->value += $this->getClassWeight($node); 512 | } 513 | 514 | /*** 515 | * grabArticle - Using a variety of metrics (content score, classname, element types), find the content that is 516 | * most likely to be the stuff a user wants to read. Then return it wrapped up in a div. 517 | * 518 | * @return DOMElement 519 | **/ 520 | protected function grabArticle($page=null) { 521 | $stripUnlikelyCandidates = $this->flagIsActive(self::FLAG_STRIP_UNLIKELYS); 522 | if (!$page) $page = $this->dom; 523 | $allElements = $page->getElementsByTagName('*'); 524 | /** 525 | * First, node prepping. Trash nodes that look cruddy (like ones with the class name "comment", etc), and turn divs 526 | * into P tags where they have been used inappropriately (as in, where they contain no other block level elements.) 527 | * 528 | * Note: Assignment from index for performance. See http://www.peachpit.com/articles/article.aspx?p=31567&seqNum=5 529 | * TODO: Shouldn't this be a reverse traversal? 530 | **/ 531 | $node = null; 532 | $nodesToScore = array(); 533 | for ($nodeIndex = 0; ($node = $allElements->item($nodeIndex)); $nodeIndex++) { 534 | //for ($nodeIndex=$targetList->length-1; $nodeIndex >= 0; $nodeIndex--) { 535 | //$node = $targetList->item($nodeIndex); 536 | $tagName = strtoupper($node->tagName); 537 | /* Remove unlikely candidates */ 538 | if ($stripUnlikelyCandidates) { 539 | $unlikelyMatchString = $node->getAttribute('class') . $node->getAttribute('id'); 540 | if ( 541 | preg_match($this->regexps['unlikelyCandidates'], $unlikelyMatchString) && 542 | !preg_match($this->regexps['okMaybeItsACandidate'], $unlikelyMatchString) && 543 | $tagName != 'BODY' 544 | ) 545 | { 546 | $this->dbg('Removing unlikely candidate - ' . $unlikelyMatchString); 547 | //$nodesToRemove[] = $node; 548 | $node->parentNode->removeChild($node); 549 | $nodeIndex--; 550 | continue; 551 | } 552 | } 553 | 554 | if ($tagName == 'P' || $tagName == 'TD' || $tagName == 'PRE') { 555 | $nodesToScore[] = $node; 556 | } 557 | 558 | /* Turn all divs that don't have children block level elements into p's */ 559 | if ($tagName == 'DIV') { 560 | if (!preg_match($this->regexps['divToPElements'], $node->innerHTML)) { 561 | //$this->dbg('Altering div to p'); 562 | $newNode = $this->dom->createElement('p'); 563 | try { 564 | $newNode->innerHTML = $node->innerHTML; 565 | //$nodesToReplace[] = array('new'=>$newNode, 'old'=>$node); 566 | $node->parentNode->replaceChild($newNode, $node); 567 | $nodeIndex--; 568 | $nodesToScore[] = $node; // or $newNode? 569 | } 570 | catch(Exception $e) { 571 | $this->dbg('Could not alter div to p, reverting back to div.: ' . $e); 572 | } 573 | } 574 | else 575 | { 576 | /* EXPERIMENTAL */ 577 | // TODO: change these p elements back to text nodes after processing 578 | for ($i = 0, $il = $node->childNodes->length; $i < $il; $i++) { 579 | $childNode = $node->childNodes->item($i); 580 | if ($childNode->nodeType == 3) { // XML_TEXT_NODE 581 | //$this->dbg('replacing text node with a p tag with the same content.'); 582 | $p = $this->dom->createElement('p'); 583 | $p->innerHTML = $childNode->nodeValue; 584 | $p->setAttribute('style', 'display: inline;'); 585 | $p->setAttribute('class', 'readability-styled'); 586 | $childNode->parentNode->replaceChild($p, $childNode); 587 | } 588 | } 589 | } 590 | } 591 | } 592 | 593 | /** 594 | * Loop through all paragraphs, and assign a score to them based on how content-y they look. 595 | * Then add their score to their parent node. 596 | * 597 | * A score is determined by things like number of commas, class names, etc. Maybe eventually link density. 598 | **/ 599 | $candidates = array(); 600 | for ($pt=0; $pt < count($nodesToScore); $pt++) { 601 | $parentNode = $nodesToScore[$pt]->parentNode; 602 | // $grandParentNode = $parentNode ? $parentNode->parentNode : null; 603 | $grandParentNode = !$parentNode ? null : (($parentNode->parentNode instanceof DOMElement) ? $parentNode->parentNode : null); 604 | $innerText = $this->getInnerText($nodesToScore[$pt]); 605 | 606 | if (!$parentNode || !isset($parentNode->tagName)) { 607 | continue; 608 | } 609 | 610 | /* If this paragraph is less than 25 characters, don't even count it. */ 611 | if(strlen($innerText) < 25) { 612 | continue; 613 | } 614 | 615 | /* Initialize readability data for the parent. */ 616 | if (!$parentNode->hasAttribute('readability')) 617 | { 618 | $this->initializeNode($parentNode); 619 | $candidates[] = $parentNode; 620 | } 621 | 622 | /* Initialize readability data for the grandparent. */ 623 | if ($grandParentNode && !$grandParentNode->hasAttribute('readability') && isset($grandParentNode->tagName)) 624 | { 625 | $this->initializeNode($grandParentNode); 626 | $candidates[] = $grandParentNode; 627 | } 628 | 629 | $contentScore = 0; 630 | 631 | /* Add a point for the paragraph itself as a base. */ 632 | $contentScore++; 633 | 634 | /* Add points for any commas within this paragraph */ 635 | $contentScore += count(explode(',', $innerText)); 636 | 637 | /* For every 100 characters in this paragraph, add another point. Up to 3 points. */ 638 | $contentScore += min(floor(strlen($innerText) / 100), 3); 639 | 640 | /* Add the score to the parent. The grandparent gets half. */ 641 | $parentNode->getAttributeNode('readability')->value += $contentScore; 642 | 643 | if ($grandParentNode) { 644 | $grandParentNode->getAttributeNode('readability')->value += $contentScore/2; 645 | } 646 | } 647 | 648 | /** 649 | * After we've calculated scores, loop through all of the possible candidate nodes we found 650 | * and find the one with the highest score. 651 | **/ 652 | $topCandidate = null; 653 | for ($c=0, $cl=count($candidates); $c < $cl; $c++) 654 | { 655 | /** 656 | * Scale the final candidates score based on link density. Good content should have a 657 | * relatively small link density (5% or less) and be mostly unaffected by this operation. 658 | **/ 659 | $readability = $candidates[$c]->getAttributeNode('readability'); 660 | $readability->value = $readability->value * (1-$this->getLinkDensity($candidates[$c])); 661 | 662 | $this->dbg('Candidate: ' . $candidates[$c]->tagName . ' (' . $candidates[$c]->getAttribute('class') . ':' . $candidates[$c]->getAttribute('id') . ') with score ' . $readability->value); 663 | 664 | if (!$topCandidate || $readability->value > (int)$topCandidate->getAttribute('readability')) { 665 | $topCandidate = $candidates[$c]; 666 | } 667 | } 668 | 669 | /** 670 | * If we still have no top candidate, just use the body as a last resort. 671 | * We also have to copy the body node so it is something we can modify. 672 | **/ 673 | if ($topCandidate === null || strtoupper($topCandidate->tagName) == 'BODY') 674 | { 675 | $topCandidate = $this->dom->createElement('div'); 676 | if ($page instanceof DOMDocument) { 677 | if (!isset($page->documentElement)) { 678 | // we don't have a body either? what a mess! :) 679 | } else { 680 | $topCandidate->innerHTML = $page->documentElement->innerHTML; 681 | $page->documentElement->innerHTML = ''; 682 | $page->documentElement->appendChild($topCandidate); 683 | } 684 | } else { 685 | $topCandidate->innerHTML = $page->innerHTML; 686 | $page->innerHTML = ''; 687 | $page->appendChild($topCandidate); 688 | } 689 | $this->initializeNode($topCandidate); 690 | } 691 | 692 | /** 693 | * Now that we have the top candidate, look through its siblings for content that might also be related. 694 | * Things like preambles, content split by ads that we removed, etc. 695 | **/ 696 | $articleContent = $this->dom->createElement('div'); 697 | $articleContent->setAttribute('id', 'readability-content'); 698 | $siblingScoreThreshold = max(10, ((int)$topCandidate->getAttribute('readability')) * 0.2); 699 | $siblingNodes = $topCandidate->parentNode->childNodes; 700 | if (!isset($siblingNodes)) { 701 | $siblingNodes = new stdClass; 702 | $siblingNodes->length = 0; 703 | } 704 | 705 | for ($s=0, $sl=$siblingNodes->length; $s < $sl; $s++) 706 | { 707 | $siblingNode = $siblingNodes->item($s); 708 | $append = false; 709 | 710 | $this->dbg('Looking at sibling node: ' . $siblingNode->nodeName . (($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->hasAttribute('readability')) ? (' with score ' . $siblingNode->getAttribute('readability')) : '')); 711 | 712 | //dbg('Sibling has score ' . ($siblingNode->readability ? siblingNode.readability.contentScore : 'Unknown')); 713 | 714 | if ($siblingNode === $topCandidate) 715 | // or if ($siblingNode->isSameNode($topCandidate)) 716 | { 717 | $append = true; 718 | } 719 | 720 | $contentBonus = 0; 721 | /* Give a bonus if sibling nodes and top candidates have the example same classname */ 722 | if ($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->getAttribute('class') == $topCandidate->getAttribute('class') && $topCandidate->getAttribute('class') != '') { 723 | $contentBonus += ((int)$topCandidate->getAttribute('readability')) * 0.2; 724 | } 725 | 726 | if ($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->hasAttribute('readability') && (((int)$siblingNode->getAttribute('readability')) + $contentBonus) >= $siblingScoreThreshold) 727 | { 728 | $append = true; 729 | } 730 | 731 | if (strtoupper($siblingNode->nodeName) == 'P') { 732 | $linkDensity = $this->getLinkDensity($siblingNode); 733 | $nodeContent = $this->getInnerText($siblingNode); 734 | $nodeLength = strlen($nodeContent); 735 | 736 | if ($nodeLength > 80 && $linkDensity < 0.25) 737 | { 738 | $append = true; 739 | } 740 | else if ($nodeLength < 80 && $linkDensity === 0 && preg_match('/\.( |$)/', $nodeContent)) 741 | { 742 | $append = true; 743 | } 744 | } 745 | 746 | if ($append) 747 | { 748 | $this->dbg('Appending node: ' . $siblingNode->nodeName); 749 | 750 | $nodeToAppend = null; 751 | $sibNodeName = strtoupper($siblingNode->nodeName); 752 | if ($sibNodeName != 'DIV' && $sibNodeName != 'P') { 753 | /* We have a node that isn't a common block level element, like a form or td tag. Turn it into a div so it doesn't get filtered out later by accident. */ 754 | 755 | $this->dbg('Altering siblingNode of ' . $sibNodeName . ' to div.'); 756 | $nodeToAppend = $this->dom->createElement('div'); 757 | try { 758 | $nodeToAppend->setAttribute('id', $siblingNode->getAttribute('id')); 759 | $nodeToAppend->innerHTML = $siblingNode->innerHTML; 760 | } 761 | catch(Exception $e) 762 | { 763 | $this->dbg('Could not alter siblingNode to div, reverting back to original.'); 764 | $nodeToAppend = $siblingNode; 765 | $s--; 766 | $sl--; 767 | } 768 | } else { 769 | $nodeToAppend = $siblingNode; 770 | $s--; 771 | $sl--; 772 | } 773 | 774 | /* To ensure a node does not interfere with readability styles, remove its classnames */ 775 | $nodeToAppend->removeAttribute('class'); 776 | 777 | /* Append sibling and subtract from our list because it removes the node when you append to another node */ 778 | $articleContent->appendChild($nodeToAppend); 779 | } 780 | } 781 | 782 | /** 783 | * So we have all of the content that we need. Now we clean it up for presentation. 784 | **/ 785 | $this->prepArticle($articleContent); 786 | 787 | /** 788 | * Now that we've gone through the full algorithm, check to see if we got any meaningful content. 789 | * If we didn't, we may need to re-run grabArticle with different flags set. This gives us a higher 790 | * likelihood of finding the content, and the sieve approach gives us a higher likelihood of 791 | * finding the -right- content. 792 | **/ 793 | if (strlen($this->getInnerText($articleContent, false)) < 250) 794 | { 795 | // TODO: find out why element disappears sometimes, e.g. for this URL http://www.businessinsider.com/6-hedge-fund-etfs-for-average-investors-2011-7 796 | // in the meantime, we check and create an empty element if it's not there. 797 | if (!isset($this->body->childNodes)) $this->body = $this->dom->createElement('body'); 798 | $this->body->innerHTML = $this->bodyCache; 799 | 800 | if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS)) { 801 | $this->removeFlag(self::FLAG_STRIP_UNLIKELYS); 802 | return $this->grabArticle($this->body); 803 | } 804 | else if ($this->flagIsActive(self::FLAG_WEIGHT_CLASSES)) { 805 | $this->removeFlag(self::FLAG_WEIGHT_CLASSES); 806 | return $this->grabArticle($this->body); 807 | } 808 | else if ($this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) { 809 | $this->removeFlag(self::FLAG_CLEAN_CONDITIONALLY); 810 | return $this->grabArticle($this->body); 811 | } 812 | else { 813 | return false; 814 | } 815 | } 816 | return $articleContent; 817 | } 818 | 819 | /** 820 | * Remove script tags from document 821 | * 822 | * @param DOMElement 823 | * @return void 824 | */ 825 | public function removeScripts($doc) { 826 | $scripts = $doc->getElementsByTagName('script'); 827 | for($i = $scripts->length-1; $i >= 0; $i--) 828 | { 829 | $scripts->item($i)->parentNode->removeChild($scripts->item($i)); 830 | } 831 | } 832 | 833 | /** 834 | * Get the inner text of a node. 835 | * This also strips out any excess whitespace to be found. 836 | * 837 | * @param DOMElement $ 838 | * @param boolean $normalizeSpaces (default: true) 839 | * @return string 840 | **/ 841 | public function getInnerText($e, $normalizeSpaces=true) { 842 | $textContent = ''; 843 | 844 | if (!isset($e->textContent) || $e->textContent == '') { 845 | return ''; 846 | } 847 | 848 | $textContent = trim($e->textContent); 849 | 850 | if ($normalizeSpaces) { 851 | return preg_replace($this->regexps['normalize'], ' ', $textContent); 852 | } else { 853 | return $textContent; 854 | } 855 | } 856 | 857 | /** 858 | * Get the number of times a string $s appears in the node $e. 859 | * 860 | * @param DOMElement $e 861 | * @param string - what to count. Default is "," 862 | * @return number (integer) 863 | **/ 864 | public function getCharCount($e, $s=',') { 865 | return substr_count($this->getInnerText($e), $s); 866 | } 867 | 868 | /** 869 | * Remove the style attribute on every $e and under. 870 | * 871 | * @param DOMElement $e 872 | * @return void 873 | */ 874 | public function cleanStyles($e) { 875 | if (!is_object($e)) return; 876 | $elems = $e->getElementsByTagName('*'); 877 | foreach ($elems as $elem) { 878 | $elem->removeAttribute('style'); 879 | } 880 | } 881 | 882 | /** 883 | * Get the density of links as a percentage of the content 884 | * This is the amount of text that is inside a link divided by the total text in the node. 885 | * 886 | * @param DOMElement $e 887 | * @return number (float) 888 | */ 889 | public function getLinkDensity($e) { 890 | $links = $e->getElementsByTagName('a'); 891 | $textLength = strlen($this->getInnerText($e)); 892 | $linkLength = 0; 893 | for ($i=0, $il=$links->length; $i < $il; $i++) 894 | { 895 | $linkLength += strlen($this->getInnerText($links->item($i))); 896 | } 897 | if ($textLength > 0) { 898 | return $linkLength / $textLength; 899 | } else { 900 | return 0; 901 | } 902 | } 903 | 904 | /** 905 | * Get an elements class/id weight. Uses regular expressions to tell if this 906 | * element looks good or bad. 907 | * 908 | * @param DOMElement $e 909 | * @return number (Integer) 910 | */ 911 | public function getClassWeight($e) { 912 | if(!$this->flagIsActive(self::FLAG_WEIGHT_CLASSES)) { 913 | return 0; 914 | } 915 | 916 | $weight = 0; 917 | 918 | /* Look for a special classname */ 919 | if ($e->hasAttribute('class') && $e->getAttribute('class') != '') 920 | { 921 | if (preg_match($this->regexps['negative'], $e->getAttribute('class'))) { 922 | $weight -= 25; 923 | } 924 | if (preg_match($this->regexps['positive'], $e->getAttribute('class'))) { 925 | $weight += 25; 926 | } 927 | } 928 | 929 | /* Look for a special ID */ 930 | if ($e->hasAttribute('id') && $e->getAttribute('id') != '') 931 | { 932 | if (preg_match($this->regexps['negative'], $e->getAttribute('id'))) { 933 | $weight -= 25; 934 | } 935 | if (preg_match($this->regexps['positive'], $e->getAttribute('id'))) { 936 | $weight += 25; 937 | } 938 | } 939 | return $weight; 940 | } 941 | 942 | /** 943 | * Remove extraneous break tags from a node. 944 | * 945 | * @param DOMElement $node 946 | * @return void 947 | */ 948 | public function killBreaks($node) { 949 | $html = $node->innerHTML; 950 | $html = preg_replace($this->regexps['killBreaks'], '
', $html); 951 | $node->innerHTML = $html; 952 | } 953 | 954 | /** 955 | * Clean a node of all elements of type "tag". 956 | * (Unless it's a youtube/vimeo video. People love movies.) 957 | * 958 | * Updated 2012-09-18 to preserve youtube/vimeo iframes 959 | * 960 | * @param DOMElement $e 961 | * @param string $tag 962 | * @return void 963 | */ 964 | public function clean($e, $tag) { 965 | $targetList = $e->getElementsByTagName($tag); 966 | $isEmbed = ($tag == 'iframe' || $tag == 'object' || $tag == 'embed'); 967 | 968 | for ($y=$targetList->length-1; $y >= 0; $y--) { 969 | /* Allow youtube and vimeo videos through as people usually want to see those. */ 970 | if ($isEmbed) { 971 | $attributeValues = ''; 972 | for ($i=0, $il=$targetList->item($y)->attributes->length; $i < $il; $i++) { 973 | $attributeValues .= $targetList->item($y)->attributes->item($i)->value . '|'; // DOMAttr? (TODO: test) 974 | } 975 | 976 | /* First, check the elements attributes to see if any of them contain youtube or vimeo */ 977 | if (preg_match($this->regexps['video'], $attributeValues)) { 978 | continue; 979 | } 980 | 981 | /* Then check the elements inside this element for the same. */ 982 | if (preg_match($this->regexps['video'], $targetList->item($y)->innerHTML)) { 983 | continue; 984 | } 985 | } 986 | $targetList->item($y)->parentNode->removeChild($targetList->item($y)); 987 | } 988 | } 989 | 990 | /** 991 | * Clean an element of all tags of type "tag" if they look fishy. 992 | * "Fishy" is an algorithm based on content length, classnames, 993 | * link density, number of images & embeds, etc. 994 | * 995 | * @param DOMElement $e 996 | * @param string $tag 997 | * @return void 998 | */ 999 | public function cleanConditionally($e, $tag) { 1000 | if (!$this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) { 1001 | return; 1002 | } 1003 | 1004 | $tagsList = $e->getElementsByTagName($tag); 1005 | $curTagsLength = $tagsList->length; 1006 | 1007 | /** 1008 | * Gather counts for other typical elements embedded within. 1009 | * Traverse backwards so we can remove nodes at the same time without effecting the traversal. 1010 | * 1011 | * TODO: Consider taking into account original contentScore here. 1012 | */ 1013 | for ($i=$curTagsLength-1; $i >= 0; $i--) { 1014 | $weight = $this->getClassWeight($tagsList->item($i)); 1015 | $contentScore = ($tagsList->item($i)->hasAttribute('readability')) ? (int)$tagsList->item($i)->getAttribute('readability') : 0; 1016 | 1017 | $this->dbg('Cleaning Conditionally ' . $tagsList->item($i)->tagName . ' (' . $tagsList->item($i)->getAttribute('class') . ':' . $tagsList->item($i)->getAttribute('id') . ')' . (($tagsList->item($i)->hasAttribute('readability')) ? (' with score ' . $tagsList->item($i)->getAttribute('readability')) : '')); 1018 | 1019 | if ($weight + $contentScore < 0) { 1020 | $tagsList->item($i)->parentNode->removeChild($tagsList->item($i)); 1021 | } 1022 | else if ( $this->getCharCount($tagsList->item($i), ',') < 10) { 1023 | /** 1024 | * If there are not very many commas, and the number of 1025 | * non-paragraph elements is more than paragraphs or other ominous signs, remove the element. 1026 | **/ 1027 | $p = $tagsList->item($i)->getElementsByTagName('p')->length; 1028 | $img = $tagsList->item($i)->getElementsByTagName('img')->length; 1029 | $li = $tagsList->item($i)->getElementsByTagName('li')->length-100; 1030 | $input = $tagsList->item($i)->getElementsByTagName('input')->length; 1031 | $a = $tagsList->item($i)->getElementsByTagName('a')->length; 1032 | 1033 | $embedCount = 0; 1034 | $embeds = $tagsList->item($i)->getElementsByTagName('embed'); 1035 | for ($ei=0, $il=$embeds->length; $ei < $il; $ei++) { 1036 | if (preg_match($this->regexps['video'], $embeds->item($ei)->getAttribute('src'))) { 1037 | $embedCount++; 1038 | } 1039 | } 1040 | $embeds = $tagsList->item($i)->getElementsByTagName('iframe'); 1041 | for ($ei=0, $il=$embeds->length; $ei < $il; $ei++) { 1042 | if (preg_match($this->regexps['video'], $embeds->item($ei)->getAttribute('src'))) { 1043 | $embedCount++; 1044 | } 1045 | } 1046 | 1047 | $linkDensity = $this->getLinkDensity($tagsList->item($i)); 1048 | $contentLength = strlen($this->getInnerText($tagsList->item($i))); 1049 | $toRemove = false; 1050 | 1051 | if ($this->lightClean) { 1052 | $this->dbg('Light clean...'); 1053 | if ( ($img > $p) && ($img > 4) ) { 1054 | $this->dbg(' more than 4 images and more image elements than paragraph elements'); 1055 | $toRemove = true; 1056 | } else if ($li > $p && $tag != 'ul' && $tag != 'ol') { 1057 | $this->dbg(' too many

  • elements, and parent is not
      or
        '); 1058 | $toRemove = true; 1059 | } else if ( $input > floor($p/3) ) { 1060 | $this->dbg(' too many elements'); 1061 | $toRemove = true; 1062 | } else if ($contentLength < 25 && ($embedCount === 0 && ($img === 0 || $img > 2))) { 1063 | $this->dbg(' content length less than 25 chars, 0 embeds and either 0 images or more than 2 images'); 1064 | $toRemove = true; 1065 | } else if($weight < 25 && $linkDensity > 0.2) { 1066 | $this->dbg(' weight smaller than 25 and link density above 0.2'); 1067 | $toRemove = true; 1068 | } else if($a > 2 && ($weight >= 25 && $linkDensity > 0.5)) { 1069 | $this->dbg(' more than 2 links and weight above 25 but link density greater than 0.5'); 1070 | $toRemove = true; 1071 | } else if($embedCount > 3) { 1072 | $this->dbg(' more than 3 embeds'); 1073 | $toRemove = true; 1074 | } 1075 | } else { 1076 | $this->dbg('Standard clean...'); 1077 | if ( $img > $p ) { 1078 | $this->dbg(' more image elements than paragraph elements'); 1079 | $toRemove = true; 1080 | } else if ($li > $p && $tag != 'ul' && $tag != 'ol') { 1081 | $this->dbg(' too many
      1. elements, and parent is not
          or
            '); 1082 | $toRemove = true; 1083 | } else if ( $input > floor($p/3) ) { 1084 | $this->dbg(' too many elements'); 1085 | $toRemove = true; 1086 | } else if ($contentLength < 25 && ($img === 0 || $img > 2) ) { 1087 | $this->dbg(' content length less than 25 chars and 0 images, or more than 2 images'); 1088 | $toRemove = true; 1089 | } else if($weight < 25 && $linkDensity > 0.2) { 1090 | $this->dbg(' weight smaller than 25 and link density above 0.2'); 1091 | $toRemove = true; 1092 | } else if($weight >= 25 && $linkDensity > 0.5) { 1093 | $this->dbg(' weight above 25 but link density greater than 0.5'); 1094 | $toRemove = true; 1095 | } else if(($embedCount == 1 && $contentLength < 75) || $embedCount > 1) { 1096 | $this->dbg(' 1 embed and content length smaller than 75 chars, or more than one embed'); 1097 | $toRemove = true; 1098 | } 1099 | } 1100 | 1101 | if ($toRemove) { 1102 | //$this->dbg('Removing: '.$tagsList->item($i)->innerHTML); 1103 | $tagsList->item($i)->parentNode->removeChild($tagsList->item($i)); 1104 | } 1105 | } 1106 | } 1107 | } 1108 | 1109 | /** 1110 | * Clean out spurious headers from an Element. Checks things like classnames and link density. 1111 | * 1112 | * @param DOMElement $e 1113 | * @return void 1114 | */ 1115 | public function cleanHeaders($e) { 1116 | for ($headerIndex = 1; $headerIndex < 3; $headerIndex++) { 1117 | $headers = $e->getElementsByTagName('h' . $headerIndex); 1118 | for ($i=$headers->length-1; $i >=0; $i--) { 1119 | if ($this->getClassWeight($headers->item($i)) < 0 || $this->getLinkDensity($headers->item($i)) > 0.33) { 1120 | $headers->item($i)->parentNode->removeChild($headers->item($i)); 1121 | } 1122 | } 1123 | } 1124 | } 1125 | 1126 | public function flagIsActive($flag) { 1127 | return ($this->flags & $flag) > 0; 1128 | } 1129 | 1130 | public function addFlag($flag) { 1131 | $this->flags = $this->flags | $flag; 1132 | } 1133 | 1134 | public function removeFlag($flag) { 1135 | $this->flags = $this->flags & ~$flag; 1136 | } 1137 | } -------------------------------------------------------------------------------- /lib/fivefilters-php-readability/composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "fivefilters/php-readability", 3 | "type": "library", 4 | "description": "Automatic article extraction from HTML", 5 | "keywords": ["article extraction","content extraction","extraction","article","content","html"], 6 | "homepage": "http://code.fivefilters.org/php-readability/", 7 | "license": "Apache-2.0", 8 | "authors": [ 9 | { 10 | "name": "Keyvan Minoukadeh", 11 | "email": "keyvan@keyvan.net", 12 | "homepage": "http://keyvan.net", 13 | "role": "Developer (ported original JS code to PHP)" 14 | }, 15 | { 16 | "name": "Arc90", 17 | "homepage": "http://arc90.com", 18 | "role": "Developer (original JS version)" 19 | } 20 | ], 21 | "require": { 22 | "php": ">=5.2" 23 | }, 24 | "autoload": { 25 | "psr-0": { "Readability": "" } 26 | } 27 | } -------------------------------------------------------------------------------- /lib/fivefilters-php-readability/examples/JSLikeHTMLElement.php: -------------------------------------------------------------------------------- 1 | registerNodeClass('DOMElement', 'JSLikeHTMLElement'); 6 | $doc->loadHTML('

            Para 1

            Para 2

            '); 7 | $elem = $doc->getElementsByTagName('div')->item(0); 8 | 9 | // print innerHTML 10 | echo $elem->innerHTML; // prints '

            Para 1

            Para 2

            ' 11 | echo "\n\n"; 12 | 13 | // set innerHTML 14 | $elem->innerHTML = 'FiveFilters.org'; 15 | echo $elem->innerHTML; // prints 'FiveFilters.org' 16 | echo "\n\n"; 17 | 18 | // print document (with our changes) 19 | echo $doc->saveXML(); 20 | ?> -------------------------------------------------------------------------------- /lib/fivefilters-php-readability/examples/Readability.php: -------------------------------------------------------------------------------- 1 | cleanRepair(); 21 | $html = $tidy->value; 22 | } 23 | 24 | // give it to Readability 25 | $readability = new Readability($html, $url); 26 | // print debug output? 27 | // useful to compare against Arc90's original JS version - 28 | // simply click the bookmarklet with FireBug's console window open 29 | $readability->debug = false; 30 | // convert links to footnotes? 31 | $readability->convertLinksToFootnotes = true; 32 | // process it 33 | $result = $readability->init(); 34 | // does it look like we found what we wanted? 35 | if ($result) { 36 | echo "== Title =====================================\n"; 37 | echo $readability->getTitle()->textContent, "\n\n"; 38 | echo "== Body ======================================\n"; 39 | $content = $readability->getContent()->innerHTML; 40 | // if we've got Tidy, let's clean it up for output 41 | if (function_exists('tidy_parse_string')) { 42 | $tidy = tidy_parse_string($content, array('indent'=>true, 'show-body-only' => true), 'UTF8'); 43 | $tidy->cleanRepair(); 44 | $content = $tidy->value; 45 | } 46 | echo $content; 47 | } else { 48 | echo 'Looks like we couldn\'t find the content. :('; 49 | } 50 | ?> -------------------------------------------------------------------------------- /src/Fetcher.php: -------------------------------------------------------------------------------- 1 | true]); 34 | $response = $client->request('GET', $url, [ 35 | 'headers' => [ 36 | 'User-Agent' => USER_AGENT, 37 | ], 38 | ]); 39 | $code = $response->getStatusCode(); 40 | if ($code >= 200 && $code < 300) { 41 | error_log("Fetching: success"); 42 | $html = (string) $response->getBody(); 43 | \FileSystemCache::store($key, $html); 44 | return $html; 45 | } else { 46 | error_log(Color::set("Fetching: error (HTTP $code)", "red")); 47 | return ""; 48 | } 49 | } else { 50 | error_log(Color::set("Fetching: cached ($key)", "cyan")); 51 | return $html; 52 | } 53 | return ""; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/Generator.php: -------------------------------------------------------------------------------- 1 | new \Mustache_Loader_FilesystemLoader(__DIR__ . '/templates'), 10 | 'partials_loader' => new \Mustache_Loader_FilesystemLoader(__DIR__ . '/templates/partials'), 11 | )); 12 | $tpl = $mustache->loadTemplate($template); 13 | $out = $tpl->render([ 14 | 'title' => 'Hacker News', 15 | 'lastupdate' => date('r'), 16 | "articles" => $articles 17 | ]); 18 | 19 | if ($options["gzip"]) { 20 | $out = gzencode($out); 21 | } 22 | 23 | return $out; 24 | } 25 | 26 | function generateManifest($path, $version) { 27 | $cachedfiles = array(); 28 | foreach (new \RecursiveIteratorIterator(new \RecursiveDirectoryIterator($path)) as $filename) { 29 | $basename = basename($filename); 30 | if (preg_match("/^\./", $basename) || $basename === 'cache.manifest' || $basename === 'latest.html') { 31 | continue; 32 | } 33 | $cachedfiles[] = str_replace($path, "", $filename); 34 | } 35 | 36 | $mustache = new \Mustache_Engine(array( 37 | 'loader' => new \Mustache_Loader_FilesystemLoader(__DIR__ . '/templates'), 38 | 'partials_loader' => new \Mustache_Loader_FilesystemLoader(__DIR__ . '/templates/partials'), 39 | )); 40 | $tpl = $mustache->loadTemplate('fullhn.manifest'); 41 | $out = $tpl->render([ 42 | 'version' => $version, 43 | 'cachedfiles' => implode("\n", $cachedfiles), 44 | ]); 45 | return $out; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/Parser.php: -------------------------------------------------------------------------------- 1 | loadStr($strHtml); 26 | $domNode = $dom->find($commonSite['path']); 27 | if (count($domNode)) { 28 | error_log(Color::set("Common site extractor", "cyan")); 29 | return (string) $domNode[0]; 30 | } 31 | } 32 | } 33 | 34 | return false; 35 | } 36 | 37 | public static function parse($url, $strHtml) 38 | { 39 | 40 | if (ARTICLE_MAXSIZE < strlen($strHtml)) { 41 | throw new \Exception("Cannot parse (HTML is too large)"); 42 | } 43 | 44 | // Convert plain text to HTML 45 | if (preg_match('/\.txt$/', $url)) { 46 | return "
            " . htmlentities($strHtml, ENT_QUOTES, "UTF-8") . "
            "; 47 | } 48 | 49 | $out = $strHtml; 50 | 51 | //check common site 52 | $commonSiteExtraction = Parser::extractFromCommonSite($url, $strHtml); 53 | if ($commonSiteExtraction !== false) { 54 | $out = $commonSiteExtraction; 55 | } 56 | 57 | // Purify 58 | $preConfig = \HTMLPurifier_Config::createDefault(); 59 | $preConfig->set('HTML.TidyLevel', 'heavy'); 60 | $preConfig->set('HTML.ForbiddenElements', array('style', 'script', 'link')); 61 | $preConfig->set('URI.Base', $url); 62 | $preConfig->set('URI.DefaultScheme', 'https'); 63 | $preConfig->set('URI.MakeAbsolute', true); 64 | $prePurifier = new \HTMLPurifier($preConfig); 65 | $out = $prePurifier->purify($out); 66 | 67 | $readability = new \Readability($out, $url); 68 | $result = $readability->init(); 69 | if ($result) { 70 | error_log("Extracting: readability success"); 71 | $out = $readability->getContent()->innerHTML; 72 | } else { 73 | // error_log("Extracting: readability error"); 74 | error_log(Color::set("Extracting: readability error", "red")); 75 | $out = ''; 76 | } 77 | 78 | // Reformat bad HTML from Readability 79 | $postConfig = \HTMLPurifier_Config::createDefault(); 80 | $postConfig->set('HTML.TidyLevel', 'none'); 81 | $postPurifier = new \HTMLPurifier($postConfig); 82 | $out = $postPurifier->purify($out); 83 | 84 | return $out; 85 | } 86 | 87 | public static function extractImage($strHtml) 88 | { 89 | $reader = new Opengraph\Reader(); 90 | $image = ""; 91 | try { 92 | $reader->parse($strHtml); 93 | $og = $reader->getArrayCopy(); 94 | if (array_key_exists('og:image', $og) && is_array($og['og:image']) && count($og['og:image']) > 0) { 95 | $image = $og['og:image'][0]["og:image:url"]; 96 | } 97 | } catch (\RuntimeException $e) { 98 | error_log($e->getMessage()); 99 | } 100 | return $image; 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /src/Uploader.php: -------------------------------------------------------------------------------- 1 | 'latest', 14 | 'region' => 'eu-west-1', 15 | ]); 16 | 17 | $files = [ 18 | [ 19 | "Bucket" => getenv('AWS_BUCKET'), 20 | "Body" => file_get_contents(__DIR__ . "/../www/index.html"), 21 | 'Key' => "index.html", 22 | "ContentType" => "text/html", 23 | "ACL" => "public-read", 24 | ], 25 | [ 26 | "Bucket" => getenv('AWS_BUCKET'), 27 | "Body" => file_get_contents(__DIR__ . "/../www/latest.html"), 28 | "Key" => "latest.html", 29 | "ContentType" => "text/html", 30 | "ACL" => "public-read", 31 | ], 32 | [ 33 | "Bucket" => getenv('AWS_BUCKET'), 34 | "Body" => file_get_contents(__DIR__ . "/../www/cache.manifest"), 35 | "Key" => "cache.manifest", 36 | "ContentType" => "text/cache-manifest", 37 | "ACL" => "public-read", 38 | ], 39 | ]; 40 | 41 | foreach ($files as $object) { 42 | $s3->putObject($object); 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/templates/fullhn.manifest.mustache: -------------------------------------------------------------------------------- 1 | CACHE MANIFEST 2 | 3 | # {{ version }} 4 | 5 | CACHE: 6 | {{ cachedfiles }} 7 | 8 | NETWORK: 9 | latest.html 10 | * 11 | -------------------------------------------------------------------------------- /src/templates/index.mustache: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Full Hacker News 6 | 7 | 8 | 9 | 10 | 11 | {{> head }} 12 | 13 | 14 | 17 |
            18 |

            Full {{ title }}

            19 |

            20 | Last update: {{lastupdate}} 21 |

            22 |
            23 |
            24 | 42 | 43 | {{# articles }} 44 | 59 | {{/ articles}} 60 |
            61 |
            62 |
            63 |

            About this site

            64 |

            65 | This page displays articles from {{ title }} in one single page for offline reading. 66 |
            67 | Get the source on github. 68 |
            69 | It was made by Maurice Svay <maurice@svay.com>. 70 |

            71 |
            72 |
            73 | 74 | 79 | 80 | 81 | 82 | 83 | 84 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /src/templates/latest.mustache: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Full Hacker News 6 | 7 | 8 | 9 | 10 | 11 | {{> head }} 12 | 13 | 14 | 17 |
            18 |

            Full {{ title }}

            19 |

            20 | Last update: {{lastupdate}} 21 |

            22 |
            23 |
            24 |
              25 | {{# articles }} 26 |
            • {{ title }}
            • 27 | {{/ articles}} 28 |
            29 | 30 | {{# articles }} 31 | 46 | {{/ articles}} 47 |
            48 |
            49 |
            50 |

            About this site

            51 |

            52 | This page displays articles from {{ title }} in one single page for offline reading. 53 |
            54 | Get the source on github. 55 |
            56 | It was made by Maurice Svay <maurice@svay.com>. 57 |

            58 |
            59 |
            60 | 61 | 66 | 67 | 68 | 69 | 70 | 71 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /src/templates/partials/head.mustache: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mauricesvay/FullHackerNews/73117bd1f81ddb163048e5828ee6da1edabd157e/src/templates/partials/head.mustache -------------------------------------------------------------------------------- /www/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mauricesvay/FullHackerNews/73117bd1f81ddb163048e5828ee6da1edabd157e/www/apple-touch-icon.png -------------------------------------------------------------------------------- /www/css/img/loader.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mauricesvay/FullHackerNews/73117bd1f81ddb163048e5828ee6da1edabd157e/www/css/img/loader.gif -------------------------------------------------------------------------------- /www/css/style.css: -------------------------------------------------------------------------------- 1 | /*! normalize.css v2.1.2 | MIT License | git.io/normalize */ 2 | article, 3 | aside, 4 | details, 5 | figcaption, 6 | figure, 7 | footer, 8 | header, 9 | hgroup, 10 | main, 11 | nav, 12 | section, 13 | summary { 14 | display: block; 15 | } 16 | audio, 17 | canvas, 18 | video { 19 | display: inline-block; 20 | } 21 | audio:not([controls]) { 22 | display: none; 23 | height: 0; 24 | } 25 | [hidden] { 26 | display: none; 27 | } 28 | html { 29 | font-family: sans-serif; 30 | -ms-text-size-adjust: 100%; 31 | -webkit-text-size-adjust: 100%; 32 | } 33 | body { 34 | margin: 0; 35 | } 36 | a:focus { 37 | outline: thin dotted; 38 | } 39 | a:active, 40 | a:hover { 41 | outline: 0; 42 | } 43 | h1 { 44 | font-size: 2em; 45 | margin: 0.67em 0; 46 | } 47 | abbr[title] { 48 | border-bottom: 1px dotted; 49 | } 50 | b, 51 | strong { 52 | font-weight: bold; 53 | } 54 | dfn { 55 | font-style: italic; 56 | } 57 | hr { 58 | -moz-box-sizing: content-box; 59 | box-sizing: content-box; 60 | height: 0; 61 | } 62 | mark { 63 | background: #ff0; 64 | color: #000; 65 | } 66 | code, 67 | kbd, 68 | pre, 69 | samp { 70 | font-family: monospace, serif; 71 | font-size: 1em; 72 | } 73 | pre { 74 | white-space: pre-wrap; 75 | } 76 | q { 77 | quotes: "\201C" "\201D" "\2018" "\2019"; 78 | } 79 | small { 80 | font-size: 80%; 81 | } 82 | sub, 83 | sup { 84 | font-size: 75%; 85 | line-height: 0; 86 | position: relative; 87 | vertical-align: baseline; 88 | } 89 | sup { 90 | top: -0.5em; 91 | } 92 | sub { 93 | bottom: -0.25em; 94 | } 95 | img { 96 | border: 0; 97 | } 98 | svg:not(:root) { 99 | overflow: hidden; 100 | } 101 | figure { 102 | margin: 0; 103 | } 104 | fieldset { 105 | border: 1px solid silver; 106 | margin: 0 2px; 107 | padding: 0.35em 0.625em 0.75em; 108 | } 109 | legend { 110 | border: 0; 111 | padding: 0; 112 | } 113 | button, 114 | input, 115 | select, 116 | textarea { 117 | font-family: inherit; 118 | font-size: 100%; 119 | margin: 0; 120 | } 121 | button, 122 | input { 123 | line-height: normal; 124 | } 125 | button, 126 | select { 127 | text-transform: none; 128 | } 129 | button, 130 | html input[type="button"], 131 | input[type="reset"], 132 | input[type="submit"] { 133 | -webkit-appearance: button; 134 | cursor: pointer; 135 | } 136 | button[disabled], 137 | html input[disabled] { 138 | cursor: default; 139 | } 140 | input[type="checkbox"], 141 | input[type="radio"] { 142 | box-sizing: border-box; 143 | padding: 0; 144 | } 145 | input[type="search"] { 146 | -webkit-appearance: textfield; 147 | -moz-box-sizing: content-box; 148 | -webkit-box-sizing: content-box; 149 | box-sizing: content-box; 150 | } 151 | input[type="search"]::-webkit-search-cancel-button, 152 | input[type="search"]::-webkit-search-decoration { 153 | -webkit-appearance: none; 154 | } 155 | button::-moz-focus-inner, 156 | input::-moz-focus-inner { 157 | border: 0; 158 | padding: 0; 159 | } 160 | textarea { 161 | overflow: auto; 162 | vertical-align: top; 163 | } 164 | table { 165 | border-collapse: collapse; 166 | border-spacing: 0; 167 | } 168 | 169 | /** 170 | * General 171 | ******************************************************************************/ 172 | body { 173 | margin: 0; 174 | font-family: Avenir, sans-serif; 175 | word-wrap: break-word; 176 | line-height: 1.5; 177 | background: #fff; 178 | color: #2c3e50; 179 | border-top: 6px solid #f60; 180 | } 181 | 182 | header, 183 | .main { 184 | max-width: 45em; 185 | margin: 0 auto; 186 | padding: 1em; 187 | } 188 | 189 | /** 190 | * Header 191 | ******************************************************************************/ 192 | header { 193 | overflow: hidden; 194 | } 195 | header h1 { 196 | float: left; 197 | font-size: 1.5em; 198 | } 199 | header p { 200 | float: right; 201 | margin-top: 1.25em; 202 | color: #999; 203 | } 204 | 205 | a { 206 | color: #2980b9; 207 | } 208 | a:visited { 209 | color: #8e44ad; 210 | } 211 | #reload, 212 | .loader { 213 | display: none; 214 | } 215 | #reload a { 216 | display: block; 217 | text-decoration: none; 218 | background: #f60; 219 | color: #fff; 220 | text-align: center; 221 | padding: 0.5em; 222 | } 223 | #reload a:hover { 224 | background: #f40; 225 | } 226 | 227 | /** 228 | * Table of contents 229 | ******************************************************************************/ 230 | .toc { 231 | margin: 0 0 1em 0; 232 | padding: 0; 233 | } 234 | .toc li { 235 | padding: 0.5em 0; 236 | list-style: none; 237 | border-top: 1px solid #eee; 238 | } 239 | .toc a { 240 | color: #2980b9; 241 | text-decoration: none; 242 | display: flex; 243 | } 244 | 245 | .toc .preview { 246 | flex: 0 0 48px; 247 | width: 48px; 248 | height: 48px; 249 | background-size: cover; 250 | background-position: center center; 251 | background-color: #fff; 252 | align-items: center; 253 | border-radius: 2px; 254 | } 255 | 256 | .toc .info { 257 | flex: 1 1 auto; 258 | margin-left: 16px; 259 | } 260 | 261 | .toc .title { 262 | display: block; 263 | line-height: 24px; 264 | } 265 | 266 | .toc .site { 267 | line-height: 24px; 268 | text-decoration: none; 269 | color: #999; 270 | font-size: small; 271 | } 272 | 273 | /** 274 | * Articles 275 | ******************************************************************************/ 276 | .hn-article { 277 | border-top: 3px solid #ecf0f1; 278 | padding: 1em 0; 279 | } 280 | .title { 281 | line-height: 1.2em; 282 | } 283 | .title a { 284 | color: #2c3e50; 285 | text-decoration: none; 286 | font-size: 1em; 287 | } 288 | 289 | .meta { 290 | font-size: 0.8em; 291 | margin: 1em 0; 292 | } 293 | .meta a { 294 | color: #7f8c8d; 295 | } 296 | blockquote { 297 | margin: 1em 0 1em 0; 298 | padding-left: 1em; 299 | border-left: 3px solid #eee; 300 | } 301 | 302 | table { 303 | border: 1px solid #eee; 304 | width: 100%; 305 | } 306 | tr { 307 | border-top: 1px solid #eee; 308 | } 309 | td { 310 | padding: 1ex; 311 | } 312 | 313 | img { 314 | max-width: 100%; 315 | height: auto; 316 | } 317 | 318 | pre { 319 | border: 1px solid #ccc; 320 | padding: 1em; 321 | background: #f3f3f3; 322 | } 323 | 324 | iframe { 325 | max-width: 100%; 326 | } 327 | 328 | hr { 329 | border: 0; 330 | border-top: 1px solid #eee; 331 | } 332 | 333 | /** 334 | * Navigation 335 | ******************************************************************************/ 336 | .toolbar { 337 | position: fixed; 338 | bottom: 0; 339 | left: 0; 340 | right: 0; 341 | background: rgba(0, 0, 0, 0.2); 342 | color: #fff; 343 | text-align: center; 344 | padding: 0.4em 0.4em 1em 0.4em; 345 | font-family: Avenir; 346 | } 347 | .toolbar a { 348 | text-transform: uppercase; 349 | color: #fff; 350 | display: inline-block; 351 | padding: 0.3em 1em; 352 | text-decoration: none; 353 | background: #000; 354 | border-radius: 24px; 355 | } 356 | .toolbar a:active { 357 | -o-transform: translateY(1px); 358 | -ms-transform: translateY(1px); 359 | -moz-transform: translateY(1px); 360 | -webkit-transform: translateY(1px); 361 | transform: translateY(1px); 362 | } 363 | 364 | /** 365 | * Footer 366 | ******************************************************************************/ 367 | footer { 368 | background: #eee; 369 | padding: 1em 1em 3em 1em; 370 | margin: 0; 371 | } 372 | footer div { 373 | max-width: 45em; 374 | margin: 0 auto; 375 | } 376 | 377 | /** 378 | * Responsive 379 | ******************************************************************************/ 380 | @media only screen and (max-width: 767px) and (orientation: portrait) { 381 | header h1, 382 | header p { 383 | float: none; 384 | text-align: center; 385 | margin: 0.5em 0; 386 | } 387 | header h1 { 388 | margin-bottom: 0; 389 | } 390 | pre { 391 | font-size: 0.9em; 392 | } 393 | 394 | h1 { 395 | font-size: 1.25em; 396 | } 397 | h2 { 398 | font-size: 1.15em; 399 | } 400 | h3 { 401 | font-size: 1em; 402 | } 403 | } 404 | -------------------------------------------------------------------------------- /www/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mauricesvay/FullHackerNews/73117bd1f81ddb163048e5828ee6da1edabd157e/www/favicon.ico -------------------------------------------------------------------------------- /www/js/app.js: -------------------------------------------------------------------------------- 1 | window.App = { 2 | 3 | current : 0, 4 | nbArticles : 0, 5 | 6 | init: function() { 7 | App.nbArticles = $('.hn-article').length; 8 | this.makeLastUpdateReadable(); 9 | this.buildNavigation(); 10 | this.checkCache(); 11 | }, 12 | 13 | checkCache : function() { 14 | var appCache = window.applicationCache; 15 | appCache.addEventListener('cached', App.handleCacheEvent, false); 16 | appCache.addEventListener('checking', App.handleCacheEvent, false); 17 | appCache.addEventListener('downloading', App.handleCacheEvent, false); 18 | appCache.addEventListener('error', App.handleCacheError, false); 19 | appCache.addEventListener('noupdate', App.handleCacheEvent, false); 20 | appCache.addEventListener('obsolete', App.handleCacheEvent, false); 21 | appCache.addEventListener('progress', App.handleCacheEvent, false); 22 | appCache.addEventListener('updateready', App.handleCacheEvent, false); 23 | }, 24 | 25 | handleCacheError: function() { 26 | $('.loader').hide(); 27 | }, 28 | handleCacheEvent: function() { 29 | switch (window.applicationCache.status) { 30 | case window.applicationCache.CHECKING: 31 | case window.applicationCache.DOWNLOADING: 32 | $('.loader').show(); 33 | break; 34 | case window.applicationCache.UPDATEREADY: 35 | $('.loader').hide(); 36 | $('#reload').show(); 37 | break; 38 | default : 39 | $('.loader').hide(); 40 | } 41 | }, 42 | 43 | scrollToIndex : function(idx) { 44 | var selector = '.hn-article[data-index='+idx+']'; 45 | var focusSelector = selector + ' > .title a'; 46 | if ($('.hn-article[data-index='+idx+']').length) { 47 | var scroll = $(selector).position().top; 48 | $(window).scrollTop(scroll); 49 | $(focusSelector).focus(); 50 | setTimeout(function(){ 51 | App.current = idx; 52 | }, 100); 53 | } 54 | }, 55 | 56 | makeLastUpdateReadable: function() { 57 | var $lastUpdate = $('#lastupdate'); 58 | var lastupdate = moment($lastUpdate.attr("title")); 59 | $lastUpdate.html(lastupdate.fromNow()); 60 | }, 61 | 62 | buildNavigation: function() { 63 | $('.toc a').on('click', function(e){ 64 | e.preventDefault(); 65 | var index = $(this).attr('href').replace('#article-',''); 66 | App.scrollToIndex(index); 67 | }); 68 | $('.hn-article').waypoint(function() { 69 | App.current = parseInt($(this).attr('data-index'),10); 70 | }); 71 | $('#next').on('click', function(e){ 72 | e.preventDefault(); 73 | var next = Math.min(App.nbArticles, App.current + 1); 74 | App.scrollToIndex(next); 75 | }); 76 | $('#prev').on('click', function(e){ 77 | e.preventDefault(); 78 | var prev = Math.max(0, (App.current - 1)); 79 | App.scrollToIndex(prev); 80 | }); 81 | $('#index').on('click', function(e) { 82 | e.preventDefault(); 83 | $(window).scrollTop(0); 84 | $('#toc a').first().focus(); 85 | }); 86 | } 87 | }; -------------------------------------------------------------------------------- /www/js/moment.min.js: -------------------------------------------------------------------------------- 1 | // moment.js 2 | // version : 2.0.0 3 | // author : Tim Wood 4 | // license : MIT 5 | // momentjs.com 6 | (function(e){function O(e,t){return function(n){return j(e.call(this,n),t)}}function M(e){return function(t){return this.lang().ordinal(e.call(this,t))}}function _(){}function D(e){H(this,e)}function P(e){var t=this._data={},n=e.years||e.year||e.y||0,r=e.months||e.month||e.M||0,i=e.weeks||e.week||e.w||0,s=e.days||e.day||e.d||0,o=e.hours||e.hour||e.h||0,u=e.minutes||e.minute||e.m||0,a=e.seconds||e.second||e.s||0,f=e.milliseconds||e.millisecond||e.ms||0;this._milliseconds=f+a*1e3+u*6e4+o*36e5,this._days=s+i*7,this._months=r+n*12,t.milliseconds=f%1e3,a+=B(f/1e3),t.seconds=a%60,u+=B(a/60),t.minutes=u%60,o+=B(u/60),t.hours=o%24,s+=B(o/24),s+=i*7,t.days=s%30,r+=B(s/30),t.months=r%12,n+=B(r/12),t.years=n}function H(e,t){for(var n in t)t.hasOwnProperty(n)&&(e[n]=t[n]);return e}function B(e){return e<0?Math.ceil(e):Math.floor(e)}function j(e,t){var n=e+"";while(n.length68?1900:2e3);break;case"YYYY":case"YYYYY":s[0]=~~t;break;case"a":case"A":n._isPm=(t+"").toLowerCase()==="pm";break;case"H":case"HH":case"h":case"hh":s[3]=~~t;break;case"m":case"mm":s[4]=~~t;break;case"s":case"ss":s[5]=~~t;break;case"S":case"SS":case"SSS":s[6]=~~(("0."+t)*1e3);break;case"X":n._d=new Date(parseFloat(t)*1e3);break;case"Z":case"ZZ":n._useUTC=!0,r=(t+"").match(x),r&&r[1]&&(n._tzh=~~r[1]),r&&r[2]&&(n._tzm=~~r[2]),r&&r[0]==="+"&&(n._tzh=-n._tzh,n._tzm=-n._tzm)}t==null&&(n._isValid=!1)}function J(e){var t,n,r=[];if(e._d)return;for(t=0;t<7;t++)e._a[t]=r[t]=e._a[t]==null?t===2?1:0:e._a[t];r[3]+=e._tzh||0,r[4]+=e._tzm||0,n=new Date(0),e._useUTC?(n.setUTCFullYear(r[0],r[1],r[2]),n.setUTCHours(r[3],r[4],r[5],r[6])):(n.setFullYear(r[0],r[1],r[2]),n.setHours(r[3],r[4],r[5],r[6])),e._d=n}function K(e){var t=e._f.match(a),n=e._i,r,i;e._a=[];for(r=0;r0,f[4]=n,Z.apply({},f)}function tt(e,n,r){var i=r-n,s=r-e.day();return s>i&&(s-=7),s11?n?"pm":"PM":n?"am":"AM"},_calendar:{sameDay:"[Today at] LT",nextDay:"[Tomorrow at] LT",nextWeek:"dddd [at] LT",lastDay:"[Yesterday at] LT",lastWeek:"[last] dddd [at] LT",sameElse:"L"},calendar:function(e,t){var n=this._calendar[e];return typeof n=="function"?n.apply(t):n},_relativeTime:{future:"in %s",past:"%s ago",s:"a few seconds",m:"a minute",mm:"%d minutes",h:"an hour",hh:"%d hours",d:"a day",dd:"%d days",M:"a month",MM:"%d months",y:"a year",yy:"%d years"},relativeTime:function(e,t,n,r){var i=this._relativeTime[n];return typeof i=="function"?i(e,t,n,r):i.replace(/%d/i,e)},pastFuture:function(e,t){var n=this._relativeTime[e>0?"future":"past"];return typeof n=="function"?n(t):n.replace(/%s/i,t)},ordinal:function(e){return this._ordinal.replace("%d",e)},_ordinal:"%d",preparse:function(e){return e},postformat:function(e){return e},week:function(e){return tt(e,this._week.dow,this._week.doy)},_week:{dow:0,doy:6}},t=function(e,t,n){return nt({_i:e,_f:t,_l:n,_isUTC:!1})},t.utc=function(e,t,n){return nt({_useUTC:!0,_isUTC:!0,_l:n,_i:e,_f:t})},t.unix=function(e){return t(e*1e3)},t.duration=function(e,n){var r=t.isDuration(e),i=typeof e=="number",s=r?e._data:i?{}:e,o;return i&&(n?s[n]=e:s.milliseconds=e),o=new P(s),r&&e.hasOwnProperty("_lang")&&(o._lang=e._lang),o},t.version=n,t.defaultFormat=E,t.lang=function(e,n){var r;if(!e)return t.fn._lang._abbr;n?R(e,n):s[e]||U(e),t.duration.fn._lang=t.fn._lang=U(e)},t.langData=function(e){return e&&e._lang&&e._lang._abbr&&(e=e._lang._abbr),U(e)},t.isMoment=function(e){return e instanceof D},t.isDuration=function(e){return e instanceof P},t.fn=D.prototype={clone:function(){return t(this)},valueOf:function(){return+this._d},unix:function(){return Math.floor(+this._d/1e3)},toString:function(){return this.format("ddd MMM DD YYYY HH:mm:ss [GMT]ZZ")},toDate:function(){return this._d},toJSON:function(){return t.utc(this).format("YYYY-MM-DD[T]HH:mm:ss.SSS[Z]")},toArray:function(){var e=this;return[e.year(),e.month(),e.date(),e.hours(),e.minutes(),e.seconds(),e.milliseconds()]},isValid:function(){return this._isValid==null&&(this._a?this._isValid=!q(this._a,(this._isUTC?t.utc(this._a):t(this._a)).toArray()):this._isValid=!isNaN(this._d.getTime())),!!this._isValid},utc:function(){return this._isUTC=!0,this},local:function(){return this._isUTC=!1,this},format:function(e){var n=X(this,e||t.defaultFormat);return this.lang().postformat(n)},add:function(e,n){var r;return typeof e=="string"?r=t.duration(+n,e):r=t.duration(e,n),F(this,r,1),this},subtract:function(e,n){var r;return typeof e=="string"?r=t.duration(+n,e):r=t.duration(e,n),F(this,r,-1),this},diff:function(e,n,r){var i=this._isUTC?t(e).utc():t(e).local(),s=(this.zone()-i.zone())*6e4,o,u;return n&&(n=n.replace(/s$/,"")),n==="year"||n==="month"?(o=(this.daysInMonth()+i.daysInMonth())*432e5,u=(this.year()-i.year())*12+(this.month()-i.month()),u+=(this-t(this).startOf("month")-(i-t(i).startOf("month")))/o,n==="year"&&(u/=12)):(o=this-i-s,u=n==="second"?o/1e3:n==="minute"?o/6e4:n==="hour"?o/36e5:n==="day"?o/864e5:n==="week"?o/6048e5:o),r?u:B(u)},from:function(e,n){return t.duration(this.diff(e)).lang(this.lang()._abbr).humanize(!n)},fromNow:function(e){return this.from(t(),e)},calendar:function(){var e=this.diff(t().startOf("day"),"days",!0),n=e<-6?"sameElse":e<-1?"lastWeek":e<0?"lastDay":e<1?"sameDay":e<2?"nextDay":e<7?"nextWeek":"sameElse";return this.format(this.lang().calendar(n,this))},isLeapYear:function(){var e=this.year();return e%4===0&&e%100!==0||e%400===0},isDST:function(){return this.zone()+t(e).startOf(n)},isBefore:function(e,n){return n=typeof n!="undefined"?n:"millisecond",+this.clone().startOf(n)<+t(e).startOf(n)},isSame:function(e,n){return n=typeof n!="undefined"?n:"millisecond",+this.clone().startOf(n)===+t(e).startOf(n)},zone:function(){return this._isUTC?0:this._d.getTimezoneOffset()},daysInMonth:function(){return t.utc([this.year(),this.month()+1,0]).date()},dayOfYear:function(e){var n=r((t(this).startOf("day")-t(this).startOf("year"))/864e5)+1;return e==null?n:this.add("d",e-n)},isoWeek:function(e){var t=tt(this,1,4);return e==null?t:this.add("d",(e-t)*7)},week:function(e){var t=this.lang().week(this);return e==null?t:this.add("d",(e-t)*7)},lang:function(t){return t===e?this._lang:(this._lang=U(t),this)}};for(i=0;i=0;s={horizontal:{},vertical:{}};f=1;a={};u="waypoints-context-id";p="resize.waypoints";y="scroll.waypoints";v=1;w="waypoints-waypoint-ids";g="waypoint";m="waypoints";o=function(){function t(t){var e=this;this.$element=t;this.element=t[0];this.didResize=false;this.didScroll=false;this.id="context"+f++;this.oldScroll={x:t.scrollLeft(),y:t.scrollTop()};this.waypoints={horizontal:{},vertical:{}};t.data(u,this.id);a[this.id]=this;t.bind(y,function(){var t;if(!(e.didScroll||c)){e.didScroll=true;t=function(){e.doScroll();return e.didScroll=false};return r.setTimeout(t,n[m].settings.scrollThrottle)}});t.bind(p,function(){var t;if(!e.didResize){e.didResize=true;t=function(){n[m]("refresh");return e.didResize=false};return r.setTimeout(t,n[m].settings.resizeThrottle)}})}t.prototype.doScroll=function(){var t,e=this;t={horizontal:{newScroll:this.$element.scrollLeft(),oldScroll:this.oldScroll.x,forward:"right",backward:"left"},vertical:{newScroll:this.$element.scrollTop(),oldScroll:this.oldScroll.y,forward:"down",backward:"up"}};if(c&&(!t.vertical.oldScroll||!t.vertical.newScroll)){n[m]("refresh")}n.each(t,function(t,r){var i,o,l;l=[];o=r.newScroll>r.oldScroll;i=o?r.forward:r.backward;n.each(e.waypoints[t],function(t,e){var n,i;if(r.oldScroll<(n=e.offset)&&n<=r.newScroll){return l.push(e)}else if(r.newScroll<(i=e.offset)&&i<=r.oldScroll){return l.push(e)}});l.sort(function(t,e){return t.offset-e.offset});if(!o){l.reverse()}return n.each(l,function(t,e){if(e.options.continuous||t===l.length-1){return e.trigger([i])}})});return this.oldScroll={x:t.horizontal.newScroll,y:t.vertical.newScroll}};t.prototype.refresh=function(){var t,e,r,i=this;r=n.isWindow(this.element);e=this.$element.offset();this.doScroll();t={horizontal:{contextOffset:r?0:e.left,contextScroll:r?0:this.oldScroll.x,contextDimension:this.$element.width(),oldScroll:this.oldScroll.x,forward:"right",backward:"left",offsetProp:"left"},vertical:{contextOffset:r?0:e.top,contextScroll:r?0:this.oldScroll.y,contextDimension:r?n[m]("viewportHeight"):this.$element.height(),oldScroll:this.oldScroll.y,forward:"down",backward:"up",offsetProp:"top"}};return n.each(t,function(t,e){return n.each(i.waypoints[t],function(t,r){var i,o,l,s,f;i=r.options.offset;l=r.offset;o=n.isWindow(r.element)?0:r.$element.offset()[e.offsetProp];if(n.isFunction(i)){i=i.apply(r.element)}else if(typeof i==="string"){i=parseFloat(i);if(r.options.offset.indexOf("%")>-1){i=Math.ceil(e.contextDimension*i/100)}}r.offset=o-e.contextOffset+e.contextScroll-i;if(r.options.onlyOnScroll&&l!=null||!r.enabled){return}if(l!==null&&l<(s=e.oldScroll)&&s<=r.offset){return r.trigger([e.backward])}else if(l!==null&&l>(f=e.oldScroll)&&f>=r.offset){return r.trigger([e.forward])}else if(l===null&&e.oldScroll>=r.offset){return r.trigger([e.forward])}})})};t.prototype.checkEmpty=function(){if(n.isEmptyObject(this.waypoints.horizontal)&&n.isEmptyObject(this.waypoints.vertical)){this.$element.unbind([p,y].join(" "));return delete a[this.id]}};return t}();l=function(){function t(t,e,r){var i,o;r=n.extend({},n.fn[g].defaults,r);if(r.offset==="bottom-in-view"){r.offset=function(){var t;t=n[m]("viewportHeight");if(!n.isWindow(e.element)){t=e.$element.height()}return t-n(this).outerHeight()}}this.$element=t;this.element=t[0];this.axis=r.horizontal?"horizontal":"vertical";this.callback=r.handler;this.context=e;this.enabled=r.enabled;this.id="waypoints"+v++;this.offset=null;this.options=r;e.waypoints[this.axis][this.id]=this;s[this.axis][this.id]=this;i=(o=t.data(w))!=null?o:[];i.push(this.id);t.data(w,i)}t.prototype.trigger=function(t){if(!this.enabled){return}if(this.callback!=null){this.callback.apply(this.element,t)}if(this.options.triggerOnce){return this.destroy()}};t.prototype.disable=function(){return this.enabled=false};t.prototype.enable=function(){this.context.refresh();return this.enabled=true};t.prototype.destroy=function(){delete s[this.axis][this.id];delete this.context.waypoints[this.axis][this.id];return this.context.checkEmpty()};t.getWaypointsByElement=function(t){var e,r;r=n(t).data(w);if(!r){return[]}e=n.extend({},s.horizontal,s.vertical);return n.map(r,function(t){return e[t]})};return t}();d={init:function(t,e){var r;if(e==null){e={}}if((r=e.handler)==null){e.handler=t}this.each(function(){var t,r,i,s;t=n(this);i=(s=e.context)!=null?s:n.fn[g].defaults.context;if(!n.isWindow(i)){i=t.closest(i)}i=n(i);r=a[i.data(u)];if(!r){r=new o(i)}return new l(t,r,e)});n[m]("refresh");return this},disable:function(){return d._invoke(this,"disable")},enable:function(){return d._invoke(this,"enable")},destroy:function(){return d._invoke(this,"destroy")},prev:function(t,e){return d._traverse.call(this,t,e,function(t,e,n){if(e>0){return t.push(n[e-1])}})},next:function(t,e){return d._traverse.call(this,t,e,function(t,e,n){if(et.oldScroll.y})},left:function(t){if(t==null){t=r}return h._filter(t,"horizontal",function(t,e){return e.offset<=t.oldScroll.x})},right:function(t){if(t==null){t=r}return h._filter(t,"horizontal",function(t,e){return e.offset>t.oldScroll.x})},enable:function(){return h._invoke("enable")},disable:function(){return h._invoke("disable")},destroy:function(){return h._invoke("destroy")},extendFn:function(t,e){return d[t]=e},_invoke:function(t){var e;e=n.extend({},s.vertical,s.horizontal);return n.each(e,function(e,n){n[t]();return true})},_filter:function(t,e,r){var i,o;i=a[n(t).data(u)];if(!i){return[]}o=[];n.each(i.waypoints[e],function(t,e){if(r(i,e)){return o.push(e)}});o.sort(function(t,e){return t.offset-e.offset});return n.map(o,function(t){return t.element})}};n[m]=function(){var t,n;n=arguments[0],t=2<=arguments.length?e.call(arguments,1):[];if(h[n]){return h[n].apply(null,t)}else{return h.aggregate.call(null,n)}};n[m].settings={resizeThrottle:100,scrollThrottle:30};return i.load(function(){return n[m]("refresh")})})}).call(this); -------------------------------------------------------------------------------- /www/robots.txt: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: / 3 | --------------------------------------------------------------------------------