├── example.php ├── README.md ├── TODO.md └── zippy_batch_dl.php /example.php: -------------------------------------------------------------------------------- 1 | '; 8 | print_r($stuff); 9 | echo ''; 10 | } 11 | 12 | $zippy_links = [ 13 | "Rock Band ACDC" => [ 14 | "http://www38.zippyshare.com/v/30819272/file.html", 15 | "http://www15.zippyshare.com/v/98285835/file.html", 16 | "http://www46.zippyshare.com/v/37059986/file.html", 17 | "http://www53.zippyshare.com/v/57652376/file.html", 18 | "http://www41.zippyshare.com/v/10556896/file.html", 19 | "http://www40.zippyshare.com/v/81927964/file.html" 20 | ], 21 | "Green Day Rock Band" => [ 22 | "http://www72.zippyshare.com/v/27314844/file.html", 23 | "http://www6.zippyshare.com/v/5353353/file.html", 24 | "http://www49.zippyshare.com/v/49586179/file.html", 25 | "http://www25.zippyshare.com/v/75260868/file.html" 26 | ] 27 | ]; 28 | 29 | //example usage 30 | $parent_folder = 'DL_new'; 31 | 32 | $start_folder = 0; 33 | $end_folder = 'end'; 34 | 35 | $start_link = 0; 36 | $end_link = 'end'; 37 | 38 | print_r2($zippy_links); 39 | 40 | zippy_download_batch($zippy_links, $parent_folder, $start_folder, $end_folder, $start_link, $end_link); 41 | 42 | ?> -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **This repository is no longer maintained and contains some seriously low quality code!** 2 | ______________________________________________________ 3 | 4 | # Zippyshare-batch-download-PHP-cURL 5 | A PHP (5.6.3) script that uses cURL to automatically download an array of Zippyshare links. 6 | 7 | [GitHub project](https://github.com/Quirinus/Zippyshare-batch-download-PHP-cURL) 8 | _____________________________________________________________________ 9 | 10 | Run example.php to start downloading... use the zippy_batch_dl function (at the end of the file) and feed it arguments. log.php will be created in the download folder. 11 | 12 | _____________________________________________________________________ 13 | Features: 14 | - Accepts arrays of zippyshare links, grouped by associative array that contains DL folder names. 15 | - Tries to automatically solve the anti-bot code to get the validation number, in a number of ways. (other scripts require manualy specifying it, thus having to stop downloading, which can happen several times for longer batches, and prevents afk downloading) 16 | - Names files by index, as they appear in the array, then by download link number, and then by file name. 17 | - While downloading, the file name is modified to be *.part.file_name.part - when finished it is renamed to remove both .part . 18 | - When starting a new session, always deletes partially downloaded files and starts downloading them again. 19 | - Can choose if you want to overwrite existing files. 20 | - Checks if files already exist by searching the folder for index number and download link number. Skips already existing complete downloads (unless overwrite is on). 21 | - Another check if file exists, after it fetches the file name from the zippyshare page. Skips already existing complete downloads (unless overwrite is on). 22 | - Can specify various timeout, wait and delay times. 23 | - Can specify from which folder/link to start, and where to end (can choose 'end' to go till the end). 24 | - Can specify download folder. 25 | - Has a pretty extensive log (not very well formatted). 26 | - Has a lot of error detection. 27 | - Example included: example.php just contains a bunch of working Zippyshare links/folders for testing/example purposes. zippy_batch_dl.php is the main file. 28 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | # Zippyshare-batch-download-PHP-cURL 2 | A PHP (5.6.3) script that uses cURL to automatically download an array of Zippyshare links. 3 | 4 | [GitHub project](https://github.com/Quirinus/Zippyshare-batch-download-PHP-cURL) 5 | _____________________________________________________________________ 6 | To-Do: 7 | - Cosmetic/speed: 8 | - add datetimes to each log entry 9 | - clean up code... it's a mess atm. 10 | - make a nicer display of download links and their structure/number than it currently is. 11 | - format log to make it actually easy to read 12 | - check and note which PHP version and libraries this script uses (I'm running it on Windows 7 with: ApacheFriends XAMPP Version 5.6.3 which uses PHP 5.6.3 (VC11 X86 32bit thread safe) + PEAR, Apache 2.4.4, libraries: glob, curl...) 13 | - optimize code (eg. remove various preg matches if it can be done by quicker functions, maybe a faster approach than using the glob function, refactor some code...) 14 | - Features/functional: 15 | - add server index to the file name, between array index and url number 16 | - check file size first (read from site, CURLOPT_RANGE, CURLOPT_WRITEFUNCTION, CURLOPT_HEADERFUNCTION, or to get exact size, curl to get file size from header of the temp dl link - but that requires an aditional curl (if you do it, make it not request body, only header, and add other curl options to make it simpler)) If you read file size from from site, it's here: <font style="line-height:18px; font-size: 13px; font-weight: bold;">Size:</font><font style="line-height:18px; font-size: 13px;">29.52 MB</font><br /> : [Link 1](http://curl.haxx.se/libcurl/php/examples/callbacks.html), [Link 2](http://stackoverflow.com/questions/10991443/curl-get-remote-file-and-force-download-at-same-time) 17 | - re-download files if size doesn't match (give an optional argument for it, that overrides the overwrite argument) 18 | - make it constantly check a specified file for a 1 or 0 value. in case it's 0, stop running the script after finishing the current download. 19 | - add arbitrarily nested arrays of folder/links, along with support to specify download start/end folder/link indexes by having them nested like the folder/link array 20 | - increase the dl timeout according to the dl speed, only if script execution time isn't set to 0 (infinite) 21 | - - download resume: [Link 1](http://www.ankur.com/blog/106/php/resume-http-downloads-php-curl-fsockopen/), [Link 2](http://stackoverflow.com/questions/2032924/how-to-partially-download-a-remote-file-with-curl byteserving) 22 | - support for running two or more of these scripts in parallel 23 | - put variables from eval(algorithm_variables_code) in their own namespace? -------------------------------------------------------------------------------- /zippy_batch_dl.php: -------------------------------------------------------------------------------- 1 | ', ':', '"', "/", "\\", '|', '?', '*')); //cannot be contained in file/folder names 16 | return trim(str_replace($bad, '', $path)); 17 | } 18 | 19 | function left_to_right_slash($text) 20 | { 21 | return str_replace('\\','/',$text); 22 | } 23 | 24 | //formatting for the log file 25 | function red($text) //error 26 | { 27 | return "$text
\r\n"; 28 | } 29 | function green($text) //success 30 | { 31 | return "$text
\r\n"; 32 | } 33 | function strike($text) //skip 34 | { 35 | return "$text
\r\n"; 36 | } 37 | 38 | //create just one level of folders 39 | function create_folders($paths) 40 | { 41 | if (is_array($paths)) 42 | { 43 | foreach ($paths as $path) 44 | { 45 | if (!file_exists($path)) 46 | mkdir($path, 0777, true); 47 | } 48 | } 49 | else 50 | { 51 | if (!file_exists($paths)) 52 | mkdir($paths, 0777, true); 53 | } 54 | } 55 | 56 | //set time limit for the dl according to the file size and dl speed 57 | /*$filesize = curl_getinfo($ch, CURLINFO_CONTENT_LENGTH_DOWNLOAD); //in bytes 58 | if ($filesize) 59 | set_time_limit(($filesize/(8*1024))/$dl_speed); //dl speed in kb/s*/ 60 | 61 | //get zippyshare download page 62 | function _zippy_get_page($url, $response_time, $timeout, &$p_error) 63 | { 64 | if ($response_time > $timeout) 65 | $timeout = $response_time; 66 | 67 | $ch = curl_init(); 68 | curl_setopt($ch, CURLOPT_URL, $url); 69 | curl_setopt($ch, CURLOPT_HEADER, 1); 70 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 71 | curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $response_time); //sec limited response time 72 | curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); //sec limited time 73 | $page = curl_exec($ch); 74 | if (curl_error($ch)) 75 | $p_error = curl_error($ch); 76 | curl_close($ch); 77 | return $page; 78 | } 79 | 80 | //check if the zippyshare page is ok for processing 81 | function _page_has_errors($zippy_page, &$p_error) 82 | { 83 | //check for error while curling the page 84 | if (($p_error !== '') || ($zippy_page === false)) 85 | $p_error = "Error fetching: $p_error"; 86 | 87 | //check for empty page 88 | elseif (trim($zippy_page) == '') 89 | $p_error = 'Error: Page empty.'; 90 | 91 | //check for title 92 | elseif (!(preg_match('/([^\n\<]*)<\/title>/i', $zippy_page, $title))) 93 | $p_error = 'Error: No title. Wrong page? Stop.'; 94 | 95 | //check if the title contains 'zippyshare.com -' 96 | elseif (stripos($title[1],'Zippyshare.com - ') === false) 97 | $p_error = "Error: Wrong page - title '$title' doesn't contain 'Zippyshare.com - '."; 98 | 99 | 100 | elseif (stripos($zippy_page,'File does not exist on this server') !== false) 101 | $p_error = 'Error: File removed/deleted from zippy share or wrong zippyshare link.'; 102 | 103 | if ($p_error !== '') 104 | return true; 105 | return false; 106 | } 107 | 108 | //download zippyshare file 109 | function _zippy_get_file($dl_url, $referral_url, $cookie_jsid, $dl_path, $dl_response_time, $dl_timeout, &$c_error) 110 | { 111 | if ($dl_response_time > $dl_timeout) 112 | $dl_timeout = $dl_response_time; 113 | 114 | $ch = curl_init(); 115 | curl_setopt($ch, CURLOPT_URL, $dl_url); 116 | //curl_setopt($ch, CURLOPT_COOKIESESSION, 1); 117 | //curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file); 118 | //curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file); 119 | curl_setopt($ch, CURLOPT_HTTPHEADER, array("Cookie: JSESSIONID=$cookie_jsid")); 120 | //curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0'); 121 | //curl_setopt($ch, CURLOPT_HEADER, 1); 122 | //curl_setopt($ch,CURLOPT_USERAGENT,'Mozilla/5.0 (Windows NT 6.0) Gecko/20100101 Firefox/14.0.1'); 123 | //curl_setopt($ch,CURLOPT_USERAGENT,'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13'); 124 | //curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 0); 125 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 126 | curl_setopt($ch, CURLOPT_REFERER, $referral_url); 127 | 128 | $fp = fopen ($dl_path, 'w+'); 129 | curl_setopt($ch, CURLOPT_FILE, $fp); // write curl response to file 130 | curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $dl_response_time); //sec limited response time 131 | curl_setopt($ch, CURLOPT_TIMEOUT, $dl_timeout); //sec limited time 132 | curl_exec($ch); 133 | if (curl_error($ch)) 134 | $c_error = curl_error($ch); 135 | curl_close($ch); 136 | fclose($fp); 137 | } 138 | 139 | //download one file and write to log file 140 | function _zippy_download_one($zippy_page_url, $folder_path, $file_index, $dl_response_time, $dl_timeout, $dl_fetch_delay, $overwrite=false) 141 | { 142 | //get the unique url number 143 | preg_match('/\/v\/([^\n\\/]+)\/file\./i',$zippy_page_url, $zippy_url_number); 144 | 145 | //get the server number 146 | if (!preg_match('/www([0-9]*)\./i',$zippy_page_url, $zippy_server_number)) 147 | return red("Error finding the server number. ($zippy_page_url)"); 148 | 149 | //check if the file is already downloaded (by url number) 150 | $dirname = dirname(__FILE__); 151 | $glob_match = left_to_right_slash("$dirname/$folder_path")."/$file_index.{$zippy_url_number[1]}.*"; 152 | $glob = glob($glob_match, GLOB_NOSORT); //GLOB_NOSORT | GLOB_NOESCAPE 153 | if (count($glob)) 154 | { 155 | if (file_exists($glob[0]) && !$overwrite) 156 | { 157 | $skip_file = $glob[0]; 158 | $glob_match = left_to_right_slash("$dirname/$folder_path")."/$file_index.{$zippy_url_number[1]}.part.*"; 159 | $glob = glob($glob_match, GLOB_NOSORT); // GLOB_NOSORT | GLOB_NOESCAPE 160 | if (!count($glob)) 161 | return strike("Skipping $skip_file. Pre-url-fetch check: File already exists. ($zippy_page_url)"); 162 | } 163 | } 164 | 165 | //fetch the download page, and check for errors 166 | $p_error = ''; 167 | $zippy_page = _zippy_get_page($zippy_page_url, $dl_response_time, $dl_timeout, $p_error); 168 | if (_page_has_errors($zippy_page, $p_error)) 169 | return red("$p_error ($zippy_page_url)"); 170 | 171 | //get cookie 172 | if (!preg_match('/Set\-Cookie: JSESSIONID=([^\n\;]+); Path=/i', $zippy_page, $zippy_cookie_jsid)) 173 | return red("Error matching cookie. ($zippy_page_url)"); 174 | 175 | //try to solve the anti-bot algorithm to get the dl link's variable verification number 176 | $algorithm_script_code = end(explode('<script type="text/javascript">',explode('document.getElementById(\'fimage\').href',$zippy_page)[0])); 177 | $algorithm_variables_code = explode('document.getElementById(\'dlbutton\').href', $algorithm_script_code)[0]; 178 | if (stripos($algorithm_variables_code,'Math') !== false) 179 | return red("Error matching algorithm, JS Math function used. ($zippy_page_url)"); 180 | if (!preg_match('/\/\s*([^\n\/]*)"\s*\+\s*([^\n]+)\s*\+\s*"([^\n\/]*)\//i',$algorithm_script_code, $algorithm_number_code)) 181 | return red("Error finding algorithm number generating code. ($zippy_page_url)"); 182 | if (stripos($algorithm_variables_code,'var ') !== false) 183 | { 184 | if (!preg_match_all('/var ([^\n \$\=]+) \=/i',$algorithm_variables_code, $algorithm_variable_names, PREG_PATTERN_ORDER)) 185 | return red("Error finding algorithm variable names. ($zippy_page_url)"); 186 | $algorithm_variable_names = $algorithm_variable_names[1]; 187 | $algorithm_variable_names_dollar = $algorithm_variable_names; 188 | array_walk($algorithm_variable_names_dollar, function(&$value, $key) {$value = "$$value";}); //add $ in front of variable names 189 | $algorithm_number_code[2] = str_replace($algorithm_variable_names,$algorithm_variable_names_dollar,$algorithm_number_code[2]); //add $ to variable names in code 190 | $algorithm_variables_code = str_replace($algorithm_variable_names,$algorithm_variable_names_dollar,$algorithm_variables_code); 191 | $algorithm_variables_code = str_replace('var ','',$algorithm_variables_code); 192 | eval($algorithm_variables_code); //eval is dangerous, but without using it, I'd have to write a whole JS interpreter 193 | } 194 | 195 | //get file name and human verification number 196 | if (!preg_match("/\+\s*\"[^\n\/]*\/([^\n\"]+)\";/i",$algorithm_script_code, $zippy_dl_url_name)) 197 | return red("Error evaluating the variable number code part from the url. ($zippy_page_url)"); 198 | eval('$human_check_number = "'.$algorithm_number_code[1].'".'.$algorithm_number_code[2].'."'.$algorithm_number_code[3].'";'); 199 | if (!$human_check_number) 200 | return red("Error finding the name of the file in the download page. ($zippy_page_url)"); 201 | 202 | //make dl url and path 203 | $referral_url = $zippy_page_url; 204 | $dl_url = "http://www{$zippy_server_number[1]}.zippyshare.com/d/{$zippy_url_number[1]}/$human_check_number/{$zippy_dl_url_name[1]}"; 205 | $cookie_jsid = $zippy_cookie_jsid[1]; 206 | $dl_path = "$folder_path\\$file_index.{$zippy_url_number[1]}.".fix_bad_path_names(rawurldecode($zippy_dl_url_name[1])); 207 | $dl_path_part = "$folder_path\\$file_index.{$zippy_url_number[1]}.part.".fix_bad_path_names(rawurldecode($zippy_dl_url_name[1])).".part"; 208 | 209 | //download file if it doesn't already exist, or incomplete, or if overwrite on 210 | if (file_exists($dl_path) && !$overwrite) 211 | return strike("Skipping: $dl_path. File already exists and overwrite off. ($zippy_page_url)"); 212 | else 213 | { 214 | create_folders($folder_path); 215 | sleep($dl_fetch_delay); 216 | 217 | if (file_exists($dl_path)) 218 | unlink($dl_path); //delete 219 | if (file_exists($dl_path_part)) 220 | unlink($dl_path_part); 221 | 222 | //download the file 223 | $c_error = ''; 224 | set_time_limit($dl_timeout); //0 = unlimited 225 | _zippy_get_file($dl_url, $referral_url, $cookie_jsid, $dl_path_part, $dl_response_time, $dl_timeout, $c_error); 226 | if ($c_error !== '') //if error while downloading 227 | { 228 | //full file name already exists, delete it 229 | if (file_exists($dl_path)) 230 | { 231 | unlink($dl_path); 232 | return red("Error downloading: $c_error Deleted file: $dl_path ($zippy_page_url)"); 233 | } 234 | //partialy downloaded file already exists, delete it 235 | if (file_exists($dl_path_part)) 236 | { 237 | unlink($dl_path_part); 238 | return red("Error downloading: $c_error Deleted partial file: $dl_path_part ($zippy_page_url)"); 239 | } 240 | 241 | return red("Error downloading: $c_error File path: $dl_path_part ($zippy_page_url)"); 242 | } 243 | 244 | //when dl over, if present, delete old full file, and remove "part" from the name of the now finished file 245 | if (file_exists($dl_path)) 246 | unlink($dl_path); 247 | if (file_exists($dl_path_part)) 248 | rename($dl_path_part, $dl_path); 249 | $file_size = round(filesize($dl_path)/(1024*1024),2); 250 | return green("Download complete: $dl_path ($file_size MB) ($referral_url)"); 251 | } 252 | } 253 | 254 | //download multiple files 255 | function zippy_download_batch($zippy_links, $parent_folder = 'DL', $start_folder = 0, $end_folder = 'end', $start_link = 0, $end_link = 'end', $dl_response_time=0, $dl_timeout=0, $sleep_between=2, $dl_fetch_delay=1, $overwrite=0) 256 | { 257 | //turn folder array into a flat array with paths, and create them 258 | $folder_names = array_keys($zippy_links); 259 | $folder_paths = $folder_names; 260 | array_walk($folder_paths, function(&$value, $key, $parent_folder) {$value = "$parent_folder\\$key.$value";}, $parent_folder); 261 | create_folders($folder_paths); 262 | $folder_end = $end_folder === 'end' ? count($folder_names) - 1 : $end_folder; //if the number of folders to download is 'end' then download till the last folder 263 | 264 | //create log txt for unformatted logging 265 | $datetime = new DateTime(); 266 | $datetime = $datetime->format('d-m-Y H:i:s (P \U\T\C)'); 267 | file_put_contents("$parent_folder\\log.txt", "Session started: $datetime (script $version_number). Download folders: $start_folder-$end_folder, download files: $end_link-$end_link. Overwrite: $overwrite.<br>\r\n", FILE_APPEND); 268 | 269 | //create log php for viewing formatted log 270 | if (!file_exists("$parent_folder\\log.php")) 271 | { 272 | $log_php = "<!DOCTYPE html>\r\n<html>\r\n<head><meta charset='utf-8'><title>DL Log\r\n\r\n\r\n\r\n"; 273 | file_put_contents("$parent_folder\\log.php", $log_php); 274 | } 275 | 276 | //loop over the folders and links to download, and do the download + log 277 | for ($i = $start_folder; $i <= $folder_end; $i++) 278 | { 279 | $link_end = $end_link === 'end' ? count($zippy_links[$folder_names[$i]]) - 1 : $end_link; //if the number of links to download is 'end' then download till the end 280 | for ($j = $start_link; $j <= $link_end; $j++) 281 | { 282 | file_put_contents("$parent_folder\\log.txt", "Download link: {$zippy_links[$folder_names[$i]][$j]}. Path: {$folder_paths[$i]}. Folder/Link index: $i/$j.
\r\n", FILE_APPEND); 283 | $result = _zippy_download_one($zippy_links[$folder_names[$i]][$j], $folder_paths[$i], $j, $dl_response_time, $dl_timeout, $dl_fetch_delay, $overwrite); 284 | file_put_contents("$parent_folder\\log.txt", $result, FILE_APPEND); 285 | sleep($sleep_between); 286 | } 287 | } 288 | } 289 | 290 | ?> --------------------------------------------------------------------------------