├── LICENSE ├── README.md ├── getfile.php └── process.php /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PHP-Wayback-Machine-Downloader 2 | There are excellent shell scripts for downloading websites from Archive.org's Wayback Machine, but I could not find an existing, publicly available script for doing it with PHP, so using a shell script for an example, I made one in PHP. 3 | 4 | INSTRUCTIONS 5 | 6 | Create a folder with a random name on your site for the scripts to run inside. 7 | 8 | Edit variables in getfile.php and process.php 9 | 10 | Run getfile.php to create the rawlist.json file. 11 | 12 | Then run process.php to download the site you want. 13 | 14 | Questions: LilPeck@gmail.com 15 | -------------------------------------------------------------------------------- /getfile.php: -------------------------------------------------------------------------------- 1 | $url, 34 | CURLOPT_TIMEOUT => 3600, 35 | CURLOPT_FILE => $fp 36 | )); 37 | $contents = curl_exec($ci); // Returns '1' if successful 38 | curl_close($ci); 39 | fclose($fp); 40 | 41 | ## AFTER RUNNING THIS SCRIPT, OPEN THE RESULTING rawlist.json FILE IN A TEXT EDITOR TO MAKE SURE IT HAS THE URLS YOU CAN USE 42 | ## NEXT, EDIT THE process.php FILE. 43 | ?> 44 | -------------------------------------------------------------------------------- /process.php: -------------------------------------------------------------------------------- 1 | "; 42 | 43 | foreach ($json as $key => $value) { 44 | if (!is_array($value)) { 45 | 46 | echo $key . '=>' . $value . '
'; 47 | } else { 48 | 49 | foreach ($value as $key => $val) { 50 | 51 | echo "Key: $key; Value: $val
\n"; 52 | 53 | //PUT TIMESTAMP AS A KEY NAME AND ORIGINAL AS ITS VALUE INTO MYARRAY 54 | if ($key== 0){ 55 | $keyname = $val; 56 | } 57 | if ($key== 1){ 58 | $bname = basename($val); // $name == '1.jpg' 59 | $bExt = pathinfo($bname, PATHINFO_EXTENSION); 60 | if (in_array(".".$bExt, $fileext)) 61 | { 62 | echo $key . '=>' . $val . '
'; 63 | $keyvalue = $val; 64 | } else { 65 | echo $key . '=>' . $val . 'index.html
'; 66 | $keyvalue = $val . 'index.html'; 67 | } 68 | } 69 | $myArray[$keyname] = $keyvalue ; 70 | $without80=str_replace(":80","",$myArray[$keyname]); //:80 in the archive.org file urls appear to be redirectors 71 | array_push($myArray, $without80); 72 | //END of PUT TIMESTAMP AS A KEY NAME AND ORIGINAL AS ITS VALUE INTO MYARRAY 73 | 74 | } 75 | } 76 | } 77 | $result = array_unique($myArray); 78 | function myFilter($string) { 79 | return strpos($string, ':80') === false; 80 | } 81 | $result = array_filter($result, 'myFilter'); 82 | 83 | //GET URLS THAT HAVE FILE NAMES ONLY 84 | //get urls with folders only to create local folders 85 | $fileArr = array_unique($result); 86 | 87 | echo "
"; 88 | echo "
";
 89 |            
 90 |            echo "NEW ARRAY WITH TIMESTAMP AS KEYS AND ORIGINAL AS VALUE". "
\n"; 91 | echo "EXTRACT FOLDER NAMES AND PUT INTO FOLDER NAME ARRAY". "
\n"; 92 | 93 | echo "
";
 94 |            $xn = 0;
 95 |            foreach ($result as $fileKey => $fileValue) {
 96 |            echo $xn. '
'; 97 | if ($xn > 1) { 98 | echo $fileKey ." = ". $fileValue . '
'; 99 | 100 | $filename = basename($fileValue); // $name == '1.jpg' 101 | $myFolderVal=str_replace($filename,"",$fileValue); 102 | $myFolderVal=str_replace($domainurl,$domain,$myFolderVal); 103 | echo " FOLDER: ".$myFolderVal. '
'; 104 | 105 | $myFolderArray[] = $myFolderVal;//ADD ITEM TO ARRAY 106 | } 107 | echo '
'; 108 | $xn = $xn +1; 109 | } 110 | echo "
"; 111 | echo "
"; 112 | 113 | //CREATE LOCAL FOLDERS 114 | $myFolderArray = array_unique($myFolderArray); 115 | echo "FOLDER NAMES FOR LOCAL DIRECTORIES". '
'; 116 | //get urls with folders only to create local folders 117 | echo "
";
118 |            foreach ($myFolderArray as $fKey => $fValue) {
119 |            echo $fKey . '=>' . $fValue . '
'; 120 | if (!file_exists($fValue)) { 121 | mkdir($fValue, 0775, true); 122 | } 123 | } 124 | echo "
"; 125 | echo "
"; 126 | 127 | echo "FILE NAMES ARRAY FOR DOWNLOADING FILES."; 128 | echo "
";
129 |            $xa = 0;
130 |            foreach ($result as $aKey => $aValue) {
131 |             echo $xa. '
'; 132 | if ($xa > 1) { 133 | echo $aKey . '=>' . $aValue . '
'; 134 | $makeURLa=$aKey ."id_/"; 135 | $makeURLb=$aValue; 136 | $filestring = $webarchive.$makeURLa.$makeURLb; 137 | echo "URL: ".$filestring. "
\n"; 138 | $myFolderVal=str_replace($domainurl,$domain,$makeURLb); 139 | 140 | echo " SAVE TO FOLDER: ".$myFolderVal. '
'; 141 | $url = trim($filestring); 142 | $path = trim($myFolderVal); 143 | $returned_content = get_data($url); //running curl function 144 | // the following lines write the contents to a file 145 | if (($fp = fopen($path, "w")) !== false) { //new line 146 | $fp = fopen($path, 'w'); 147 | fwrite($fp, $returned_content); 148 | fclose($fp); 149 | } 150 | sleep(5); //to avoid excessive use of resources 151 | } 152 | echo '
'; 153 | $xa = $xa +1;} 154 | echo "
"; 155 | ?> 156 | --------------------------------------------------------------------------------