├── .gitignore ├── LICENSE ├── README.md ├── cache └── .gitignore ├── check.php ├── composer.json ├── composer.lock ├── config.default.php ├── index.html.php ├── optimize.php ├── parallel.php └── src └── ExpiredFileManager.php /.gitignore: -------------------------------------------------------------------------------- 1 | logs 2 | vendor 3 | config.php 4 | packagist.tar.bz2 5 | optimize.db 6 | optimized 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Creative Commons Legal Code 2 | 3 | CC0 1.0 Universal 4 | 5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE 6 | LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN 7 | ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS 8 | INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES 9 | REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS 10 | PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM 11 | THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED 12 | HEREUNDER. 13 | 14 | Statement of Purpose 15 | 16 | The laws of most jurisdictions throughout the world automatically confer 17 | exclusive Copyright and Related Rights (defined below) upon the creator 18 | and subsequent owner(s) (each and all, an "owner") of an original work of 19 | authorship and/or a database (each, a "Work"). 20 | 21 | Certain owners wish to permanently relinquish those rights to a Work for 22 | the purpose of contributing to a commons of creative, cultural and 23 | scientific works ("Commons") that the public can reliably and without fear 24 | of later claims of infringement build upon, modify, incorporate in other 25 | works, reuse and redistribute as freely as possible in any form whatsoever 26 | and for any purposes, including without limitation commercial purposes. 27 | These owners may contribute to the Commons to promote the ideal of a free 28 | culture and the further production of creative, cultural and scientific 29 | works, or to gain reputation or greater distribution for their Work in 30 | part through the use and efforts of others. 31 | 32 | For these and/or other purposes and motivations, and without any 33 | expectation of additional consideration or compensation, the person 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she 35 | is an owner of Copyright and Related Rights in the Work, voluntarily 36 | elects to apply CC0 to the Work and publicly distribute the Work under its 37 | terms, with knowledge of his or her Copyright and Related Rights in the 38 | Work and the meaning and intended legal effect of CC0 on those rights. 39 | 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be 41 | protected by copyright and related or neighboring rights ("Copyright and 42 | Related Rights"). Copyright and Related Rights include, but are not 43 | limited to, the following: 44 | 45 | i. the right to reproduce, adapt, distribute, perform, display, 46 | communicate, and translate a Work; 47 | ii. moral rights retained by the original author(s) and/or performer(s); 48 | iii. publicity and privacy rights pertaining to a person's image or 49 | likeness depicted in a Work; 50 | iv. rights protecting against unfair competition in regards to a Work, 51 | subject to the limitations in paragraph 4(a), below; 52 | v. rights protecting the extraction, dissemination, use and reuse of data 53 | in a Work; 54 | vi. database rights (such as those arising under Directive 96/9/EC of the 55 | European Parliament and of the Council of 11 March 1996 on the legal 56 | protection of databases, and under any national implementation 57 | thereof, including any amended or successor version of such 58 | directive); and 59 | vii. other similar, equivalent or corresponding rights throughout the 60 | world based on applicable law or treaty, and any national 61 | implementations thereof. 62 | 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention 64 | of, applicable law, Affirmer hereby overtly, fully, permanently, 65 | irrevocably and unconditionally waives, abandons, and surrenders all of 66 | Affirmer's Copyright and Related Rights and associated claims and causes 67 | of action, whether now known or unknown (including existing as well as 68 | future claims and causes of action), in the Work (i) in all territories 69 | worldwide, (ii) for the maximum duration provided by applicable law or 70 | treaty (including future time extensions), (iii) in any current or future 71 | medium and for any number of copies, and (iv) for any purpose whatsoever, 72 | including without limitation commercial, advertising or promotional 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each 74 | member of the public at large and to the detriment of Affirmer's heirs and 75 | successors, fully intending that such Waiver shall not be subject to 76 | revocation, rescission, cancellation, termination, or any other legal or 77 | equitable action to disrupt the quiet enjoyment of the Work by the public 78 | as contemplated by Affirmer's express Statement of Purpose. 79 | 80 | 3. Public License Fallback. Should any part of the Waiver for any reason 81 | be judged legally invalid or ineffective under applicable law, then the 82 | Waiver shall be preserved to the maximum extent permitted taking into 83 | account Affirmer's express Statement of Purpose. In addition, to the 84 | extent the Waiver is so judged Affirmer hereby grants to each affected 85 | person a royalty-free, non transferable, non sublicensable, non exclusive, 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the 88 | maximum duration provided by applicable law or treaty (including future 89 | time extensions), (iii) in any current or future medium and for any number 90 | of copies, and (iv) for any purpose whatsoever, including without 91 | limitation commercial, advertising or promotional purposes (the 92 | "License"). The License shall be deemed effective as of the date CC0 was 93 | applied by Affirmer to the Work. Should any part of the License for any 94 | reason be judged legally invalid or ineffective under applicable law, such 95 | partial invalidity or ineffectiveness shall not invalidate the remainder 96 | of the License, and in such case Affirmer hereby affirms that he or she 97 | will not (i) exercise any of his or her remaining Copyright and Related 98 | Rights in the Work or (ii) assert any associated claims and causes of 99 | action with respect to the Work, in either case contrary to Affirmer's 100 | express Statement of Purpose. 101 | 102 | 4. Limitations and Disclaimers. 103 | 104 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 105 | surrendered, licensed or otherwise affected by this document. 106 | b. Affirmer offers the Work as-is and makes no representations or 107 | warranties of any kind concerning the Work, express, implied, 108 | statutory or otherwise, including without limitation warranties of 109 | title, merchantability, fitness for a particular purpose, non 110 | infringement, or the absence of latent or other defects, accuracy, or 111 | the present or absence of errors, whether or not discoverable, all to 112 | the greatest extent permissible under applicable law. 113 | c. Affirmer disclaims responsibility for clearing rights of other persons 114 | that may apply to the Work or any use thereof, including without 115 | limitation any person's Copyright and Related Rights in the Work. 116 | Further, Affirmer disclaims responsibility for obtaining any necessary 117 | consents, permissions or other rights required for any use of the 118 | Work. 119 | d. Affirmer understands and acknowledges that Creative Commons is not a 120 | party to this document and has no duty or obligation with respect to 121 | this CC0 or use of the Work. 122 | 123 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | packagist-crawler 2 | ======================== 3 | 4 | packagist.orgをクロールして、全てのpackage.jsonをダウンロードします。 5 | ダウンロードし終わったあとでstaticなweb serverで配信すれば、packagist.orgのミラーを作ることができます。 6 | 7 | Requirement 8 | ------------------ 9 | - PHP > 5.3 10 | - ext-curl 11 | - ext-hash 12 | - ext-json 13 | - ext-zlib 14 | - ext-PDO 15 | - ext-pdo\_sqlite 16 | 17 | 18 | Install 19 | ------------------ 20 | 21 | ```sh 22 | $ git clone https://github.com/hirak/packagist-crawler 23 | $ cd packagist-crawler 24 | $ composer install 25 | ``` 26 | 27 | Download! 28 | ------------------ 29 | 30 | ```sh 31 | $ php parallel.php 32 | 33 | (...few minutes...) 34 | 35 | $ ls cache/ 36 | p/ 37 | packages.json 38 | ``` 39 | 40 | 41 | Configuration 42 | ------------------ 43 | 44 | - config.default.php 45 | - config.php 46 | 47 | このどちらかのファイルがあると、挙動を変えることができます。 48 | 修正したいときはconfig.default.phpをconfig.phpにコピーして、 49 | config.phpの方をカスタマイズしてください。 50 | 51 | ```php 52 | __DIR__ . '/cache/', 55 | //'cachedir' => '/usr/share/nginx/html/', 56 | //'cachedir' => '/usr/local/apache2/htdocs/', 57 | 'packagistUrl' => 'https://packagist.org', 58 | 'maxConnections' => 4, 59 | 'lockfile' => __DIR__ . '/cache/.lock', 60 | 'expiredDb' => __DIR__ . '/cache/.expired.db 61 | ); 62 | ``` 63 | 64 | ### cachedir 65 | ダウンロードしたpackages.jsonを格納するディレクトリです。 66 | 67 | ### packagistUrl 68 | ダウンロード元のpackagist.orgのURLです。 69 | デフォルトではオリジンからダウンロードしますが、 70 | 既に存在する他のミラーサイトを指定することができます。 71 | 72 | ### maxConnections 73 | 並列ダウンロードの並列数です。 74 | 増やした方が速くダウンロードできますが、 75 | オリジンに負荷をかけるので適当なところにしてください。 76 | 77 | ### expiredDb 78 | ファイル更新によって古くなったjsonが記録されています。 79 | 80 | ## License 81 | 82 | 著作権は放棄するものとします。 83 | 利用に際して制限はありませんし、作者への連絡や著作権表示なども必要ありません。 84 | スニペット的にコードをコピーして使っても問題ありません。 85 | 86 | [ライセンスの原文](LICENSE) 87 | 88 | CC0-1.0 (No Rights Reserved) 89 | - https://creativecommons.org/publicdomain/zero/1.0/ 90 | - http://sciencecommons.jp/cc0/about (Japanese) 91 | 92 | -------------------------------------------------------------------------------- /cache/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /check.php: -------------------------------------------------------------------------------- 1 | cachedir; 18 | 19 | $packagejson = json_decode(file_get_contents($cachedir.'packages.json')); 20 | 21 | 22 | $j = 0; 23 | $errors = array(); 24 | $providerCounter = 1; 25 | $numberOfProviders = count( (array)$packagejson->{'provider-includes'} ); 26 | 27 | foreach ($packagejson->{'provider-includes'} as $tpl => $provider) { 28 | $providerjson = str_replace('%hash%', $provider->sha256, $tpl); 29 | $packages = json_decode(file_get_contents($cachedir.$providerjson)); 30 | 31 | $progressBar = new ProgressBarManager(0, count( (array)$packages->providers )); 32 | $progressBar->setFormat(" - Package: %current%/%max% [%bar%] %percent%%"); 33 | echo " - Check Provider {$providerCounter}/{$numberOfProviders}:\n"; 34 | 35 | foreach ($packages->providers as $tpl2 => $sha) { 36 | if (!file_exists($file = $cachedir . "p/$tpl2\$$sha->sha256.json")) { 37 | $errors[] = " - $tpl\t$tpl2 file not exists\n"; 38 | } elseif ($sha->sha256 !== hash_file('sha256', $file)) { 39 | unlink($file); 40 | $errors[] = " - $tpl\t$tpl2\tsha256 not match: {$sha->sha256}\n"; 41 | } else { 42 | ++$j; 43 | } 44 | $progressBar->advance(); 45 | } 46 | 47 | ++$providerCounter; 48 | } 49 | 50 | if (count($errors)) { 51 | echo "Errors: \n", implode('', $errors); 52 | } 53 | 54 | exit(1); 55 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "hirak/packagist-crawler", 3 | "description": "build mirror of packagist.org", 4 | "license": "CC0-1.0", 5 | "authors": [ 6 | { 7 | "name": "Hiraku NAKANO", 8 | "email": "hiraku@tojiru.net" 9 | } 10 | ], 11 | "require": { 12 | "spindle/httpclient": "1.*", 13 | "guiguiboy/php-cli-progress-bar": "dev-master", 14 | "php": ">=5.4", 15 | "ext-json": "*", 16 | "ext-hash": "*", 17 | "ext-curl": "*", 18 | "ext-PDO": "*", 19 | "ext-zlib": "*", 20 | "ext-pdo_sqlite": "*" 21 | }, 22 | "autoload": { 23 | "psr-4": { 24 | "hirak\\PackagistCrawler\\": "src/" 25 | } 26 | }, 27 | "bin": [ 28 | "parallel.php" 29 | ], 30 | "scripts": { 31 | "crawl": "php parallel.php", 32 | "test": "php check.php", 33 | "compress": "tar cjf packagist.tar.bz2 cache/" 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /composer.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_readme": [ 3 | "This file locks the dependencies of your project to a known state", 4 | "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file", 5 | "This file is @generated automatically" 6 | ], 7 | "hash": "2e937a4f120a0b748759af9dd8377cf2", 8 | "content-hash": "325314e6a447f35b59eaeff8d1bca3c5", 9 | "packages": [ 10 | { 11 | "name": "guiguiboy/php-cli-progress-bar", 12 | "version": "dev-master", 13 | "source": { 14 | "type": "git", 15 | "url": "https://github.com/guiguiboy/PHP-CLI-Progress-Bar.git", 16 | "reference": "7d3eb61c1f0c164b9c3139af694b2d38171e4d04" 17 | }, 18 | "dist": { 19 | "type": "zip", 20 | "url": "https://api.github.com/repos/guiguiboy/PHP-CLI-Progress-Bar/zipball/7d3eb61c1f0c164b9c3139af694b2d38171e4d04", 21 | "reference": "7d3eb61c1f0c164b9c3139af694b2d38171e4d04", 22 | "shasum": "" 23 | }, 24 | "require": { 25 | "ext-mbstring": "*", 26 | "php": ">=5.3.0" 27 | }, 28 | "type": "library", 29 | "autoload": { 30 | "psr-0": { 31 | "ProgressBar": "." 32 | } 33 | }, 34 | "notification-url": "https://packagist.org/downloads/", 35 | "license": [ 36 | "MIT" 37 | ], 38 | "authors": [ 39 | { 40 | "name": "Guillaume", 41 | "email": "guillaume.bretou@gmail.com" 42 | } 43 | ], 44 | "description": "Progress bar for PHP CLI scripts", 45 | "homepage": "https://github.com/guiguiboy/PHP-CLI-Progress-Bar", 46 | "keywords": [ 47 | "bar", 48 | "cli", 49 | "command-line", 50 | "progress" 51 | ], 52 | "time": "2014-11-19 13:12:00" 53 | }, 54 | { 55 | "name": "spindle/httpclient", 56 | "version": "1.0.1", 57 | "source": { 58 | "type": "git", 59 | "url": "https://github.com/spindle/spindle-httpclient.git", 60 | "reference": "aeda3e390e6c1006d5772b8867960aeaf5d9bac5" 61 | }, 62 | "dist": { 63 | "type": "zip", 64 | "url": "https://api.github.com/repos/spindle/spindle-httpclient/zipball/aeda3e390e6c1006d5772b8867960aeaf5d9bac5", 65 | "reference": "aeda3e390e6c1006d5772b8867960aeaf5d9bac5", 66 | "shasum": "" 67 | }, 68 | "require": { 69 | "ext-curl": "*", 70 | "php": ">=5.3.0" 71 | }, 72 | "require-dev": { 73 | "apigen/apigen": "*", 74 | "dg/texy": "=5.4", 109 | "ext-json": "*", 110 | "ext-hash": "*", 111 | "ext-curl": "*", 112 | "ext-pdo": "*", 113 | "ext-zlib": "*", 114 | "ext-pdo_sqlite": "*" 115 | }, 116 | "platform-dev": [] 117 | } 118 | -------------------------------------------------------------------------------- /config.default.php: -------------------------------------------------------------------------------- 1 | __DIR__ . '/cache/', 8 | //'cachedir' => '/usr/share/nginx/html/', 9 | //'cachedir' => '/usr/local/apache2/htdocs/', 10 | 'packagistUrl' => 'https://packagist.org', 11 | 'lockfile' => __DIR__ . '/cache/.lock', 12 | 'expiredDb' => __DIR__ . '/cache/.expired.db', 13 | 'maxConnections' => 2, 14 | 'generateGz' => true, 15 | 'expireMinutes' => 24 * 60, 16 | 'url' => 'http://localhost', 17 | 'cfemail' => null, 18 | 'cfkey' => null, 19 | 'zoneid' => null, 20 | ); 21 | -------------------------------------------------------------------------------- /index.html.php: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Packagist.JP 8 | 9 | 50 | 51 | 52 |
53 |

PackagistJP

54 |

最終同期: (JST) (2分毎に同期)

55 |
56 | 57 | 58 |
59 |

PHPのライブラリリポジトリであるhttps://packagist.orgのミラーサイトです。packagist.orgの代わりにこちらを参照することで、composer updateの応答速度が速くなります。特にフランスから遠い、アジア圏では顕著な効果が得られます。

60 |

有効にするには以下のコマンドを打ち込んでください。

61 |
62 | 63 | 64 |

enable

65 |
$ composer config -g repos.packagist composer 
66 | 67 |

disable

68 |
$ composer config -g --unset repos.packagist
69 | 70 | 71 |
72 | 73 |

なお、このサイトではcomposer自体やpackagist.orgにあるパッケージ情報ページ、検索機能などはミラーしておりません。それぞれ本家サイトをご利用ください。

74 | 75 |

仕組み

76 | 77 |

composer updateを実行すると、composerはpackagist.orgからパッケージ情報が書かれたJSONファイルをダウンロードし、必要なパッケージやそれに依存するパッケージのJSONファイルを個別にダウンロードしていきます。パッケージの複雑さにもよりますが、update時にダウンロードするJSONファイルは数十から数百に達します。composerは現状全ファイルに対してTLSのコネクション確立からやり直すので、packagist.orgとcomposerを実行しているクライアントとの物理的な距離(RTT)が大きく影響します。

78 | 79 |

本サイトは日本のさくらVPSを使って配信しています。hirak/packagist-crawlerというスクリプトを使って、あらかじめpackagist.orgをクロールし、同期時点でのパッケージの情報が書かれた全JSONファイルをダウンロードしてあります。

80 | 81 |

配信は普通のnginxを使い、高負荷時の対策として手前にCDN(CloudFlare)を置いてあります。単にそれだけのサイトです。

82 | 83 |

このため、ミラーサイトを使った場合に高速化するのはcomposer update, composer require, composer removeなどメタファイルのやり取りが発生する場合だけになります。

84 | 85 |

Travis-CIなどでcomposer installする際は、github.comなどとのやり取りになっており、ミラーを有効にしたところで全く高速化されません。

86 | 87 | 88 |

免責事項

89 | 90 |

このサイトは @Hiraku が個人的に運営しています。スペック的には今の数万倍のアクセスが来ようが余裕で捌けますので自由に使っていただいて構いません。利用に際して料金等はかかりませんが、個人運営ですので、障害が起きても何ら保障は致しかねます。その点だけご了承ください。

91 | 92 |

packagist.jpはただのミラーサイトで、JSONの加工は行っていないので、何か不具合があれば上記disableコマンドで設定を外し、本家packagist.orgを参照するようにしてみてください。

93 | 94 |

使い方の疑問や要望など、答えられる範囲では答えますので、お尋ねください。

95 | 96 |
97 | 98 |
Copyright (C) 2014, Hiraku (hiraku at tojiru.net)
99 | 100 | 101 | -------------------------------------------------------------------------------- /optimize.php: -------------------------------------------------------------------------------- 1 | PDO::FETCH_ASSOC, 14 | PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION, 15 | ]); 16 | $pdo->exec( 17 | 'CREATE TABLE IF NOT EXISTS providers (' 18 | .'file TEXT' 19 | .',hash TEXT' 20 | .')' 21 | ); 22 | $pdo->exec( 23 | 'CREATE TABLE IF NOT EXISTS packages (' 24 | .'provider TEXT' 25 | .',file TEXT' 26 | .',hash TEXT' 27 | .')' 28 | ); 29 | $pdo->beginTransaction(); 30 | 31 | $muda = 0; 32 | foreach ($packagesjson->{'provider-includes'} as $providerpath => $providerinfo) { 33 | $providerjson = json_decode(file_get_contents(BASEPATH . '/' . str_replace('%hash%', $providerinfo->sha256, $providerpath))); 34 | 35 | foreach ($providerjson->providers as $packagename => $packageinfo) { 36 | $packagejson = json_decode(file_get_contents(BASEPATH . "/p/$packagename\${$packageinfo->sha256}.json"), true); 37 | 38 | foreach ($packagejson['packages'] as $versionname => $info) { 39 | if ($versionname !== $packagename) { 40 | echo "むだな $versionname が $packagename の中に含まれています\n"; 41 | $muda += strlen(json_encode($info)); 42 | unset($packagejson['packages'][$versionname]); 43 | } 44 | } 45 | 46 | if (empty($packagejson['packages'])) { 47 | echo "$packagename は パッケージ情報を何も含んでいません。。要らないんじゃね?\n"; 48 | continue; 49 | } 50 | 51 | // package.jsonを作りなおす 52 | $packagestr = json_encode($packagejson, JSON_UNESCAPED_SLASHES); 53 | $packagehash = hash('sha256', $packagestr); 54 | 55 | // 新しいファイルとして書き出し 56 | $path = OPTPATH . "/p/{$packagename}\${$packagehash}.json"; 57 | $dir = dirname($path); 58 | if (!file_exists($dir)) { 59 | mkdir($dir, 0755, true); 60 | } 61 | file_put_contents($path, $packagestr); 62 | 63 | // DB上のhash値を更新 64 | $stmt = $pdo->prepare('INSERT INTO packages (provider, file, hash) VALUES (:provider, :file, :hash)'); 65 | $stmt->bindValue(':provider', $providerpath); 66 | $stmt->bindValue(':file', $packagename); 67 | $stmt->bindValue(':hash', $packagehash); 68 | $stmt->execute(); 69 | } 70 | 71 | // provider.jsonを作りなおす 72 | // DBから結果的にどうなったか抽出する 73 | $stmt = $pdo->prepare('SELECT file, hash FROM packages WHERE provider = :provider'); 74 | $stmt->bindValue(':provider', $providerpath); 75 | $stmt->execute(); 76 | $stmt->setFetchMode(PDO::FETCH_ASSOC); 77 | $newpackages = []; 78 | foreach ($stmt as $row) { 79 | $newpackages[$row['file']] = ['sha256'=>$row['hash']]; 80 | } 81 | 82 | $providerjson = ['providers' => $newpackages]; 83 | $providerstr = json_encode($providerjson, JSON_UNESCAPED_SLASHES); 84 | $providerhash = hash('sha256', $providerstr); 85 | 86 | // 新しいファイルとして書き出し 87 | $path = OPTPATH . '/' . str_replace('%hash%', $providerhash, $providerpath); 88 | $dir = dirname($path); 89 | if (!file_exists($dir)) { 90 | mkdir($dir, 0755, true); 91 | } 92 | file_put_contents($path, $providerstr); 93 | 94 | // DB上のhash値を更新 95 | $stmt = $pdo->prepare('INSERT INTO providers (file, hash) VALUES (:file, :hash)'); 96 | $stmt->bindValue(':file', $providerpath); 97 | $stmt->bindValue(':hash', $providerhash); 98 | $stmt->execute(); 99 | } 100 | $pdo->commit(); 101 | 102 | // 最後にpackages.jsonを作る 103 | // DBから結果的にどうなったか抽出する 104 | $stmt = $pdo->query('SELECT file, hash FROM providers'); 105 | $stmt->setFetchMode(PDO::FETCH_ASSOC); 106 | $newproviders = []; 107 | foreach ($stmt as $row) { 108 | $newproviders[$row['file']] = ['sha256'=>$row['hash']]; 109 | } 110 | $packagesjson->{'provider-includes'} = $newproviders; 111 | $packagesstr = json_encode($packagesjson, JSON_UNESCAPED_SLASHES); 112 | 113 | // 新しいファイルとして書き出し 114 | $path = OPTPATH . '/packages.json'; 115 | file_put_contents($path, $packagesstr); 116 | 117 | 118 | echo "全部で $muda byte 無駄です\n"; 119 | -------------------------------------------------------------------------------- /parallel.php: -------------------------------------------------------------------------------- 1 | lockfile)) { 19 | throw new \RuntimeException("$config->lockfile exists"); 20 | } 21 | 22 | 23 | touch($config->lockfile); 24 | register_shutdown_function(function() use($config) { 25 | unlink($config->lockfile); 26 | }); 27 | 28 | $globals = new \stdClass; 29 | $globals->q = new \SplQueue; 30 | $globals->expiredManager = new ExpiredFileManager($config->expiredDb, $config->expireMinutes); 31 | for ($i=0; $i<$config->maxConnections; ++$i) { 32 | $req = new Request; 33 | $req->setOption('encoding', 'gzip'); 34 | $req->setOption('userAgent', 'https://github.com/hirak/packagist-crawler'); 35 | $globals->q->enqueue($req); 36 | } 37 | 38 | $globals->mh = new Multi; 39 | clearExpiredFiles($globals->expiredManager); 40 | 41 | do { 42 | $globals->retry = false; 43 | $providers = downloadProviders($config, $globals); 44 | $mods = downloadPackages($config, $globals, $providers); 45 | //$globals->retry = checkFiles($config); 46 | if (isset($config->cfemail, $config->cfkey, $config->zoneid)) { 47 | clearCloudFlareCache($mods, $config->cfemail, $config->cfkey, $config->zoneid); 48 | } 49 | generateHtml($config); 50 | } while ($globals->retry); 51 | 52 | flushFiles($config); 53 | exit; 54 | 55 | /** 56 | * packages.json & provider-xxx$xxx.json downloader 57 | */ 58 | function downloadProviders($config, $globals) 59 | { 60 | $cachedir = $config->cachedir; 61 | 62 | $packagesCache = $cachedir . 'packages.json'; 63 | 64 | $req = new Request($config->packagistUrl . '/packages.json'); 65 | $req->setOption('encoding', 'gzip'); 66 | 67 | $res = $req->send(); 68 | 69 | if (200 === $res->getStatusCode()) { 70 | $packages = json_decode($res->getBody()); 71 | foreach (explode(' ', 'notify notify-batch search') as $k) { 72 | if (0 === strpos($packages->$k, '/')) { 73 | $packages->$k = 'https://packagist.org' . $packages->$k; 74 | } 75 | } 76 | file_put_contents($packagesCache . '.new', json_encode($packages)); 77 | } else { 78 | //no changes'; 79 | copy($packagesCache, $packagesCache . '.new'); 80 | $packages = json_decode(file_get_contents($packagesCache)); 81 | } 82 | 83 | if (empty($packages->{'provider-includes'})) { 84 | throw new \RuntimeException('packages.json schema changed?'); 85 | } 86 | 87 | $providers = []; 88 | 89 | $numberOfProviders = count( (array)$packages->{'provider-includes'} ); 90 | $progressBar = new ProgressBarManager(0, $numberOfProviders); 91 | $progressBar->setFormat('Downloading Providers: %current%/%max% [%bar%] %percent%%'); 92 | 93 | foreach ($packages->{'provider-includes'} as $tpl => $version) { 94 | $fileurl = str_replace('%hash%', $version->sha256, $tpl); 95 | $cachename = $cachedir . $fileurl; 96 | $providers[] = $cachename; 97 | 98 | if (!file_exists($cachename)){ 99 | $req->setOption('url', $config->packagistUrl . '/' . $fileurl); 100 | $res = $req->send(); 101 | 102 | if (200 === $res->getStatusCode()) { 103 | $oldcache = $cachedir . str_replace('%hash%.json', '*', $tpl); 104 | if ($glob = glob($oldcache)) { 105 | foreach ($glob as $old) { 106 | $globals->expiredManager->add($old, time()); 107 | } 108 | } 109 | if (!file_exists(dirname($cachename))) { 110 | mkdir(dirname($cachename), 0777, true); 111 | } 112 | file_put_contents($cachename, $res->getBody()); 113 | if ($config->generateGz) { 114 | file_put_contents($cachename . '.gz', gzencode($res->getBody())); 115 | } 116 | } else { 117 | $globals->retry = true; 118 | } 119 | } 120 | 121 | $progressBar->advance(); 122 | } 123 | 124 | return $providers; 125 | } 126 | 127 | /** 128 | * composer.json downloader 129 | * 130 | */ 131 | function downloadPackages($config, $globals, $providers) 132 | { 133 | $cachedir = $config->cachedir; 134 | $i = 1; 135 | $numberOfProviders = count($providers); 136 | $urls = []; 137 | 138 | foreach ($providers as $providerjson) { 139 | $list = json_decode(file_get_contents($providerjson)); 140 | if (!$list || empty($list->providers)) continue; 141 | 142 | $list = $list->providers; 143 | $all = count((array)$list); 144 | 145 | $progressBar = new ProgressBarManager(0, $all); 146 | echo " - Provider {$i}/{$numberOfProviders}:\n"; 147 | $progressBar->setFormat(" - Package: %current%/%max% [%bar%] %percent%%"); 148 | 149 | $sum = 0; 150 | foreach ($list as $packageName => $provider) { 151 | $progressBar->advance(); 152 | ++$sum; 153 | $url = "$config->packagistUrl/p/$packageName\$$provider->sha256.json"; 154 | $cachefile = $cachedir . str_replace("$config->packagistUrl/", '', $url); 155 | if (file_exists($cachefile)) continue; 156 | 157 | $req = $globals->q->dequeue(); 158 | $req->packageName = $packageName; 159 | $req->sha256 = $provider->sha256; 160 | $req->setOption('url', $url); 161 | $globals->mh->attach($req); 162 | $globals->mh->start(); //non block 163 | 164 | if (count($globals->q)) continue; 165 | 166 | /** @type Request[] $requests */ 167 | do { 168 | $requests = $globals->mh->getFinishedResponses(); //block 169 | } while (0 === count($requests)); 170 | 171 | foreach ($requests as $req) { 172 | $res = $req->getResponse(); 173 | $globals->q->enqueue($req); 174 | 175 | if (200 !== $res->getStatusCode() || $req->sha256 !== hash('sha256', $res)) { 176 | error_log($res->getStatusCode(). "\t". $res->getUrl()); 177 | $globals->retry = true; 178 | continue; 179 | } 180 | 181 | $cachefile = $cachedir 182 | . str_replace("$config->packagistUrl/", '', $res->getUrl()); 183 | $cachefile2 = $cachedir . '/p/' . $req->packageName . '.json'; 184 | $urls[] = $config->url . '/p/' . $req->packageName . '.json'; 185 | 186 | if ($glob = glob("{$cachedir}p/$req->packageName\$*")) { 187 | foreach ($glob as $old) { 188 | $globals->expiredManager->add($old, time()); 189 | } 190 | } 191 | if (!file_exists(dirname($cachefile))) { 192 | mkdir(dirname($cachefile), 0777, true); 193 | } 194 | file_put_contents($cachefile, $res->getBody()); 195 | file_put_contents($cachefile2, $res->getBody()); 196 | if ($config->generateGz) { 197 | $gz = gzencode($res->getBody()); 198 | file_put_contents($cachefile . '.gz', $gz); 199 | file_put_contents($cachefile2 . '.gz', $gz); 200 | } 201 | } 202 | } 203 | 204 | ++$i; 205 | } 206 | 207 | 208 | if (0 === count($globals->mh)) return; 209 | $globals->mh->waitResponse(); 210 | 211 | $progressBar = new ProgressBarManager(0, count($globals->mh)); 212 | $progressBar->setFormat(" - Remained packages: %current%/%max% [%bar%] %percent%%"); 213 | 214 | foreach ($globals->mh as $req) { 215 | $res = $req->getResponse(); 216 | 217 | if (200 !== $res->getStatusCode() || $req->sha256 !== hash('sha256', $res)) { 218 | error_log($res->getStatusCode(). "\t". $res->getUrl()); 219 | $globals->retry = true; 220 | continue; 221 | } 222 | 223 | $cachefile = $cachedir 224 | . str_replace("$config->packagistUrl/", '', $res->getUrl()); 225 | $cachefile2 = $cachedir . '/p/' . $req->packageName . '.json'; 226 | $urls[] = $config->url . '/p/' . $req->packageName . '.json'; 227 | 228 | if ($glob = glob("{$cachedir}p/$req->packageName\$*")) { 229 | foreach ($glob as $old) { 230 | $globals->expiredManager->add($old, time()); 231 | } 232 | } 233 | if (!file_exists(dirname($cachefile))) { 234 | mkdir(dirname($cachefile), 0777, true); 235 | } 236 | file_put_contents($cachefile, $res->getBody()); 237 | if ($config->generateGz) { 238 | $gz = gzencode($res->getBody()); 239 | file_put_contents($cachefile . '.gz', $gz); 240 | file_put_contents($cachefile2 . '.gz', $gz); 241 | } 242 | 243 | $progressBar->advance(); 244 | } 245 | 246 | return $urls; 247 | } 248 | 249 | function flushFiles($config) 250 | { 251 | rename( 252 | $config->cachedir . 'packages.json.new', 253 | $config->cachedir . 'packages.json' 254 | ); 255 | file_put_contents( 256 | $config->cachedir . 'packages.json.gz', 257 | gzencode(file_get_contents($config->cachedir . 'packages.json')) 258 | ); 259 | 260 | error_log('finished! flushing...'); 261 | } 262 | 263 | /** 264 | * check sha256 265 | */ 266 | function checkFiles($config) 267 | { 268 | $cachedir = $config->cachedir; 269 | 270 | $packagejson = json_decode(file_get_contents($cachedir.'packages.json.new')); 271 | 272 | $i = $j = 0; 273 | foreach ($packagejson->{'provider-includes'} as $tpl => $provider) { 274 | $providerjson = str_replace('%hash%', $provider->sha256, $tpl); 275 | $packages = json_decode(file_get_contents($cachedir.$providerjson)); 276 | 277 | foreach ($packages->providers as $tpl2 => $sha) { 278 | if (!file_exists($file = $cachedir . "p/$tpl2\$$sha->sha256.json")) { 279 | ++$i; 280 | } elseif ($sha->sha256 !== hash_file('sha256', $file)) { 281 | ++$i; 282 | unlink($file); 283 | } else { 284 | ++$j; 285 | } 286 | } 287 | } 288 | 289 | error_log($i . ' / ' . ($i + $j)); 290 | return $i; 291 | } 292 | 293 | function clearExpiredFiles(ExpiredFileManager $expiredManager) 294 | { 295 | $expiredFiles = $expiredManager->getExpiredFileList(); 296 | 297 | $progressBar = new ProgressBarManager(0, count($expiredFiles)); 298 | $progressBar->setFormat(" - Clearing Expired Files: %current%/%max% [%bar%] %percent%%"); 299 | 300 | foreach ($expiredFiles as $file) { 301 | if (file_exists($file)) { 302 | unlink($file) and $expiredManager->delete($file); 303 | } else { 304 | $expiredManager->delete($file); 305 | } 306 | $progressBar->advance(); 307 | } 308 | } 309 | 310 | function clearCloudFlareCache(array $modifiedFiles, $email, $key, $identifier) 311 | { 312 | $req = new Request("https://api.cloudflare.com/client/v4/zones/$identifier/purge_cache"); 313 | foreach (array_chunk($modifiedFiles, 30) as $mods) { 314 | $req->setOptions([ 315 | 'customRequest' => 'DELETE', 316 | 'verbose' => true, 317 | 'httpHeader' => [ 318 | "X-Auth-Email: $email", 319 | "X-Auth-Key: $key", 320 | 'Content-Type: application/json', 321 | ], 322 | 'postFields' => json_encode([ 323 | 'files' => $mods, 324 | ]), 325 | ]); 326 | $req->send(); 327 | } 328 | } 329 | 330 | function generateHtml($_config) 331 | { 332 | $url = $_config->url; 333 | ob_start(); 334 | include __DIR__ . '/index.html.php'; 335 | file_put_contents($_config->cachedir . '/index.html', ob_get_clean()); 336 | } 337 | -------------------------------------------------------------------------------- /src/ExpiredFileManager.php: -------------------------------------------------------------------------------- 1 | expire = $expire; 29 | 30 | $this->pdo = $pdo = new PDO("sqlite:$dbpath", null, null, array( 31 | PDO::ATTR_DEFAULT_FETCH_MODE => PDO::FETCH_ASSOC, 32 | PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION, 33 | )); 34 | $pdo->beginTransaction(); 35 | $pdo->exec( 36 | 'CREATE TABLE IF NOT EXISTS expired (' 37 | .'path TEXT PRIMARY KEY, expiredAt INTEGER' 38 | .')' 39 | ); 40 | $pdo->exec( 41 | 'CREATE INDEX IF NOT EXISTS expiredAtIndex' 42 | .' ON expired (expiredAt)' 43 | ); 44 | } 45 | 46 | function __destruct() 47 | { 48 | $this->pdo->commit(); 49 | $this->pdo->exec('VACUUM'); 50 | } 51 | 52 | /** 53 | * add record into expired.db 54 | * @param string $fullpath expired json file path 55 | * @param integer $now timestamp (optional) 56 | * @return void 57 | */ 58 | function add($fullpath, $now=null) 59 | { 60 | static $insert, $path, $expiredAt; 61 | empty($now) or $now = $_SERVER['REQUEST_TIME']; 62 | 63 | if (empty($insert)) { 64 | $insert = $this->pdo->prepare( 65 | 'INSERT OR IGNORE INTO expired(path,expiredAt)' 66 | .' VALUES(:path, :expiredAt)' 67 | ); 68 | $insert->bindParam(':path', $path, PDO::PARAM_STR); 69 | $insert->bindParam(':expiredAt', $expiredAt, PDO::PARAM_INT); 70 | } 71 | 72 | $path = $fullpath; 73 | $expiredAt = $now; 74 | $insert->execute(); 75 | } 76 | 77 | /** 78 | * delete record from expired.db 79 | * @param string $fullpath expired json file path 80 | * @return void 81 | */ 82 | function delete($fullpath) 83 | { 84 | static $delete, $path; 85 | 86 | if (empty($delete)) { 87 | $delete = $this->pdo->prepare( 88 | 'DELETE FROM expired WHERE path = :path' 89 | ); 90 | $delete->bindParam(':path', $path, PDO::PARAM_STR); 91 | } 92 | 93 | $path = $fullpath; 94 | $delete->execute(); 95 | } 96 | 97 | /** 98 | * get file list from expired.db 99 | * @param integer $from timestamp 100 | * @return Traversable (List) 101 | */ 102 | function getExpiredFileList($until=null) 103 | { 104 | isset($until) or $until = $_SERVER['REQUEST_TIME'] - $this->expire * 60; 105 | 106 | $stmt = $this->pdo->prepare( 107 | 'SELECT path FROM expired WHERE expiredAt <= :expiredAt' 108 | ); 109 | $stmt->bindValue(':expiredAt', $until, PDO::PARAM_INT); 110 | $stmt->execute(); 111 | $stmt->setFetchMode(PDO::FETCH_COLUMN, 0); 112 | $list = array(); 113 | 114 | foreach ($stmt as $file){ 115 | $list[] = $file; 116 | } 117 | 118 | return $list; 119 | } 120 | } 121 | --------------------------------------------------------------------------------