├── .gitignore ├── README.md ├── composer.json ├── examples └── example.php ├── index.php └── src └── WBMScrapper.php /.gitignore: -------------------------------------------------------------------------------- 1 | /vendor/ 2 | vendor/ 3 | *.swp 4 | .idea/ 5 | tmp/ 6 | composer.lock 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## WMB Scrapper 2 | 3 | 4 | ========================================= 5 | 6 | [![Latest Stable Version](https://poser.pugx.org/dawood/wmb-scrapper/v/stable.svg)](https://packagist.org/packages/dawood/wmb-scrapper) 7 | [![Total Downloads](https://poser.pugx.org/dawood/wmb-scrapper/downloads)](https://packagist.org/packages/dawood/wmb-scrapper) 8 | [![License](https://poser.pugx.org/dawood/wmb-scrapper/license.svg)](https://packagist.org/packages/dawood/wmb-scrapper) 9 | 10 | ## Introduction 11 | A small Php package to fetch archive url snapshots from archive.org. 12 | using it you can fetch complete list of snapshot urls of any year or 13 | complete list of all years possible. 14 | **This package can be used to do recon of any target.** 15 | 16 | 17 | 18 | ## Installation 19 | 20 | Install the package through [composer](http://getcomposer.org): 21 | 22 | ``` 23 | composer require dawood/wmb-scrapper 24 | ``` 25 | 26 | Make sure, that you include the composer [autoloader](https://getcomposer.org/doc/01-basic-usage.md#autoloading) 27 | somewhere in your codebase. 28 | 29 | ## Examples 30 | 31 | There are several examples provided in examples folder too. 32 | 33 | ### Get first/last snapshot year of domain 34 | include "vendor/autoload.php"; 35 | use dawood\WBMScrapper\WBMScrapper; 36 | 37 | $url = 'https://github.com/'; 38 | $firstSnapShotYear = WBMScrapper::firstSnapshotYear($url); 39 | $lastSnapShotYear = WBMScrapper::lastSnapshotYear($url); 40 | echo $lastSnapShotYear .PHP_EOL; 41 | echo $firstSnapShotYear.PHP_EOL; 42 | 43 | 44 | ### Get snapshots of any year of domain 45 | include "vendor/autoload.php"; 46 | use dawood\WBMScrapper\WBMScrapper; 47 | 48 | $url = 'https://github.com/'; 49 | $snapshotsOf2012 = WBMScrapper::getSnapShotUrlsOfYear($url, 2012); 50 | print_r(snapshotsOf2012 ); 51 | //outputs list of urls of waybackmachin snapshots 52 | e.g 53 | https://web.archive.org/web/20091226225818/http://www.github.com/ 54 | 55 | ### Get snapshots of all years of domain 56 | include "vendor/autoload.php"; 57 | use dawood\WBMScrapper\WBMScrapper; 58 | 59 | $url = 'https://github.com/'; 60 | $allSnapshots = WBMScrapper::getAllSnapShotUrls($url); 61 | print_r($allSnapshots); 62 | 63 | //outputs a complete list of urls of waybackmachin snapshots 64 | e.g 65 | https://web.archive.org/web/20091226225818/http://www.github.com/ 66 | 67 | 68 | ## License 69 | The **WMB Scrapper** is open-sourced software licensed under the [MIT license](https://opensource.org/licenses/MIT). 70 | 71 | ## Contribution 72 | Thanks to all of the contributors , 73 | 74 | ## Author 75 | Dawood Ikhlaq and Open source community 76 | 77 | 78 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "dawood/wmb-scrapper", 3 | "description": "A small Php package to fetch archive url snapshots from archive.org.using it you can fetch complete list of snapshot urls of any year orcomplete list of all years possible.This package can be used to do recon of any target.", 4 | "type": "library", 5 | "license": "MIT", 6 | "keywords": ["shell","php","wayBackMachine","archive.org","snapshots"], 7 | "authors": [ 8 | { 9 | "name": "Dawood Ikhlaq", 10 | "email": "daudmalik06@gmail.com" 11 | } 12 | ], 13 | "minimum-stability": "stable", 14 | "autoload": { 15 | "psr-4": { 16 | "dawood\\WBMScrapper\\": "src/" 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /examples/example.php: -------------------------------------------------------------------------------- 1 | prepareWebArchiveUrlsFromArray($url, $finalTimestamps); 55 | } 56 | 57 | /** 58 | * returns first year of snapshot taken of provided $url 59 | * @param string $url 60 | * @return int 61 | */ 62 | public static function firstSnapshotYear($url) 63 | { 64 | $scrapper = new self; 65 | return (int)$scrapper->getYears($url)['first']; 66 | } 67 | 68 | /** 69 | * returns last year of snapshot taken of provided $url 70 | * @param $url 71 | * @return int 72 | */ 73 | public static function lastSnapshotYear($url) 74 | { 75 | $scrapper = new self; 76 | return (int)$scrapper->getYears($url)['last']; 77 | } 78 | 79 | /** 80 | * @param string $url 81 | * @return array 82 | */ 83 | private function getYears($url) 84 | { 85 | $infoAddress = 'https://web.archive.org/__wb/sparkline?url='.urlencode($url).'&collection=web&output=json'; 86 | $jsonResponse = file_get_contents($infoAddress); 87 | $jsonResponse= json_decode($jsonResponse, true); 88 | return [ 89 | 'first' => substr($jsonResponse['first_ts'], 0 ,4), 90 | 'last' => substr($jsonResponse['last_ts'], 0 ,4), 91 | ]; 92 | } 93 | 94 | /** 95 | * @param string $url 96 | * @param array $timestamps 97 | * @return array 98 | */ 99 | private function prepareWebArchiveUrlsFromArray($url, array $timestamps) 100 | { 101 | $webArchiveUrls = []; 102 | $webArchiveAddress = 'https://web.archive.org/web/TIME_STAMP/'.$url; 103 | foreach ($timestamps as $timestamp) { 104 | $webArchiveUrls[] = str_replace('TIME_STAMP',$timestamp, $webArchiveAddress); 105 | } 106 | return $webArchiveUrls; 107 | } 108 | 109 | } --------------------------------------------------------------------------------