├── .gitattributes ├── .github ├── FUNDING.yml └── workflows │ └── release.yml ├── .gitignore ├── .mocharc.js ├── .travis.yml ├── data ├── metadata.json └── packages.json ├── example.js ├── hostnames.js ├── lib └── stats-tpl.js ├── package-lock.json ├── package.json ├── readme.md ├── scripts ├── build.js ├── stats.js └── update.js └── test ├── metadata.js ├── repos.js └── stats.js /.gitattributes: -------------------------------------------------------------------------------- 1 | data/packages.json filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | github: jsdelivr 3 | open_collective: jsdelivr 4 | custom: ['https://www.jsdelivr.com/sponsors'] 5 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | schedule: 5 | - cron: "0 */12 * * *" 6 | workflow_dispatch: {} 7 | 8 | jobs: 9 | lint: 10 | name: lint 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | with: 15 | token: ${{ secrets.GITHUB_PAT_FROM_ZEKE }} 16 | lfs: true 17 | - name: Checkout LFS objects 18 | run: git lfs checkout 19 | - name: npm ci 20 | run: npm ci 21 | - name: release 22 | run: | 23 | git checkout master 24 | git config --global user.name "github-actions" 25 | git config --global user.email "github-actions@users.noreply.github.com" 26 | 27 | npm run update 28 | npm test 29 | [[ `git status --porcelain` ]] || exit 30 | 31 | git add . 32 | 33 | # Publish only if data/packages.json changed 34 | if [[ -z `git status --porcelain 'data/packages.json'` ]]; then 35 | git commit -m "update metadata" 36 | git push origin master 37 | else 38 | echo "//registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}" > ~/.npmrc 39 | 40 | # bump the version, commit, and create a tag 41 | npm version patch -f -m "update all-the-package-repos to %s" 42 | 43 | git push origin master --follow-tags 44 | npm publish 45 | fi 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | .npmrc 3 | .cache/ 4 | .idea/ 5 | node_modules/ 6 | -------------------------------------------------------------------------------- /.mocharc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | timeout: 10000, 3 | }; 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - "8" 4 | notifications: 5 | email: false -------------------------------------------------------------------------------- /data/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "error": true, 3 | "packages": 3454782, 4 | "last": 64607674, 5 | "latest": 43064782, 6 | "runs": { 7 | "total": 2277, 8 | "status": { 9 | "init": 2013, 10 | "timeout": 84, 11 | "killed": 175, 12 | "restart": 2 13 | } 14 | }, 15 | "repos": { 16 | "unsets": 1429966, 17 | "github": 1988953, 18 | "gitlab": 6457, 19 | "bitbucket": 1397, 20 | "others": 28009 21 | }, 22 | "stats": { 23 | "changes": 26351507, 24 | "inserts": 4853736, 25 | "updates": 18971370, 26 | "deletes": 2015256, 27 | "invalid": 511145, 28 | "ignored": 47, 29 | "notFound": 8 30 | }, 31 | "batch": { 32 | "status": "killed", 33 | "limit": -21542892, 34 | "since": 64607674, 35 | "index": 64607674, 36 | "until": 43064782, 37 | "started": "2025-06-06T00:21:38.436Z", 38 | "finished": "2025-06-06T06:06:38.538Z", 39 | "took_ms": 20700102, 40 | "found": 0 41 | }, 42 | "ignored": [ 43 | 20012191, 44 | 20468062, 45 | 20908815, 46 | 21553560, 47 | 21660508, 48 | 38167386, 49 | 64607633, 50 | 64607634, 51 | 64607635, 52 | 64607636, 53 | 64607637, 54 | 64607638, 55 | 64607639, 56 | 64607640, 57 | 64607641, 58 | 64607642, 59 | 64607643, 60 | 64607644, 61 | 64607645, 62 | 64607646, 63 | 64607647, 64 | 64607648, 65 | 64607649, 66 | 64607650, 67 | 64607651, 68 | 64607652, 69 | 64607653, 70 | 64607654, 71 | 64607655, 72 | 64607656, 73 | 64607657, 74 | 64607658, 75 | 64607659, 76 | 64607660, 77 | 64607661, 78 | 64607662, 79 | 64607663, 80 | 64607664, 81 | 64607665, 82 | 64607666, 83 | 64607667, 84 | 64607668, 85 | 64607669, 86 | 64607670, 87 | 64607671, 88 | 64607672, 89 | 64607673 90 | ], 91 | "notFound": [ 92 | [ 93 | "@bodycheck/body-check-core", 94 | [ 95 | 41381630 96 | ] 97 | ], 98 | [ 99 | "@dsr-rollback-org-raits-mufti-roopy-ulnar/dsr-rollback-raits-mufti-roopy-ulnar", 100 | [ 101 | 41386262 102 | ] 103 | ], 104 | [ 105 | "@primess/package_poc", 106 | [ 107 | 41403706 108 | ] 109 | ], 110 | [ 111 | "@character-tech/client-common", 112 | [ 113 | 41430887 114 | ] 115 | ], 116 | [ 117 | "@abdul_samad_12/ez-js", 118 | [ 119 | 41448952 120 | ] 121 | ], 122 | [ 123 | "rulesthatvalidnpmpackagenameshouldconformtopackagenamelengthshouldbegreaterthanzeroallthecharactersinthepackagenamemustbelowercasenouppercaseormixedcasenamesareallowedpackagenamecanconsistofhyphenspackagenamemustno", 124 | [ 125 | 62237333 126 | ] 127 | ], 128 | [ 129 | "rulesthatvalidnpmpackagenameshouldconformtopackagenamelengthshouldbegreaterthanzeroallthecharactersinthepackagenamemustbelowercasenouppercaseormixedcasenamesareallowedpackagenamecanconsistofhyphenspackagenamemustn", 130 | [ 131 | 62249536 132 | ] 133 | ], 134 | [ 135 | "rulesthatvalidnpmpackagenameshouldconformtopackagenamelengthshouldbegreaterthanzeroallthecharactersinthepackagenamemustbelowercasenouppercaseormixedcasenamesareallowedpackagenamecanconsistofhyphenspackagename", 136 | [ 137 | 62249587 138 | ] 139 | ] 140 | ] 141 | } -------------------------------------------------------------------------------- /data/packages.json: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e595af1081e6ced06971172f9a1a22fa8b9dc4ef55776c112f561e93a08ef11d 3 | size 193890085 4 | -------------------------------------------------------------------------------- /example.js: -------------------------------------------------------------------------------- 1 | const repos = require('.') 2 | const packages = require('all-the-package-names') 3 | const urls = Object.values(repos) 4 | const github = urls.filter(url => (/github\.com/i).test(url)) 5 | const bitbucket = urls.filter(url => (/bitbucket\.org/i).test(url)) 6 | const gitlab = urls.filter(url => (/gitlab\.com/i).test(url)) 7 | 8 | function percentage (collection) { 9 | return (collection.length / packages.length * 100).toFixed(2) + '%' 10 | } 11 | 12 | console.log(` 13 | Packages | Count | Percentage of Total Packages 14 | ---- | ----- | ---------- 15 | All | ${packages.length} | 100% 16 | With repository in package.json | ${urls.length} | ${percentage(urls)} 17 | On GitHub | ${github.length} | ${percentage(github)} 18 | On BitBucket | ${bitbucket.length} | ${percentage(bitbucket)} 19 | On GitLab | ${gitlab.length} | ${percentage(gitlab)} 20 | `) 21 | -------------------------------------------------------------------------------- /hostnames.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | const URL = require('url') 4 | const countValues = require('count-array-values') 5 | const urls = Object.values(require('.')) 6 | // eslint-disable-next-line n/no-deprecated-api 7 | const hostnames = urls.filter(Boolean).map(url => URL.parse(url).hostname.replace(/^www\./i, '')) 8 | const counts = countValues(hostnames) 9 | 10 | module.exports = counts 11 | 12 | if (!module.parent) { 13 | const longestHostname = hostnames.sort((a, b) => b.length - a.length)[0] 14 | 15 | counts.forEach(hostname => { 16 | console.log(String(hostname.value).padEnd(longestHostname.length + 3) + String(hostname.count)) 17 | }) 18 | } 19 | -------------------------------------------------------------------------------- /lib/stats-tpl.js: -------------------------------------------------------------------------------- 1 | const dedent = require('dedent') 2 | 3 | module.exports = { 4 | 5 | // Expression to replace 6 | regex: /[\s\S]+?/m, 7 | 8 | /** 9 | * @param {object} metadata 10 | * @return {string} markdown table 11 | */ 12 | build: (metadata) => { 13 | const total = metadata.packages 14 | const repos = metadata.repos 15 | 16 | const areNull = repos.unsets || 0 17 | const notNull = total - areNull 18 | 19 | const perc = (val) => (val * 100 / total).toFixed(2) 20 | 21 | return dedent` 22 | 23 | Packages | Count | Percentage 24 | :------- | -----:| ----------: 25 | With repository | ${notNull} | ${perc(notNull)}% 26 | Null repository | ${areNull} | ${perc(areNull)}% 27 | **Total** | ${total} | ${perc(total)}% 28 | 29 | Providers | Count | Percentage 30 | :-------- | -----:| ----------: 31 | GitHub | ${repos.github} | ${perc(repos.github)}% 32 | GitLab | ${repos.gitlab} | ${perc(repos.gitlab)}% 33 | Bitbucket | ${repos.bitbucket} | ${perc(repos.bitbucket)}% 34 | Others | ${repos.others} | ${perc(repos.others)}% 35 | **Total** | ${notNull} | ${perc(notNull)}% 36 | 37 | ` 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "all-the-package-repos", 3 | "version": "2.0.2304", 4 | "description": "All the repository URLs in the npm registry as an object whose keys are package names and values are URLs", 5 | "main": "data/packages.json", 6 | "repository": "https://github.com/nice-registry/all-the-package-repos", 7 | "author": "Zeke Sikelianos ", 8 | "license": "MIT", 9 | "files": [ 10 | "data", 11 | "hostnames.js" 12 | ], 13 | "scripts": { 14 | "build": "node scripts/build.js", 15 | "stats": "node scripts/stats.js", 16 | "update": "node scripts/update.js", 17 | "test": "mocha && standard" 18 | }, 19 | "devDependencies": { 20 | "all-the-package-names": "^2.0.2129", 21 | "bitbucket-url-to-object": "^0.3.0", 22 | "chai": "^5.2.0", 23 | "debug": "^4.4.0", 24 | "dedent": "^1.5.3", 25 | "github-url-to-object": "^4.0.2", 26 | "got": "^14.4.7", 27 | "is-url": "^1.2.2", 28 | "mocha": "^11.1.0", 29 | "ora": "^8.2.0", 30 | "package-stream": "^3.0.1", 31 | "standard": "^17.1.2" 32 | }, 33 | "dependencies": { 34 | "count-array-values": "^1.2.1" 35 | }, 36 | "bin": { 37 | "all-the-package-repo-hostnames": "hostnames.js" 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # all-the-package-repos 2 | 3 | *Maintained by [jsDelivr](https://github.com/jsdelivr). Please consider [becoming a sponsor](https://github.com/sponsors/jsdelivr) to support us.* 4 | 5 | All the repository URLs in the npm registry as an object whose keys are package names and values are URLs. 6 | 7 | This package weighs in at about 100 MB. 8 | 9 | ## Stats 10 | 11 | 12 | Packages | Count | Percentage 13 | :------- | -----:| ----------: 14 | With repository | 2024816 | 58.61% 15 | Null repository | 1429966 | 41.39% 16 | **Total** | 3454782 | 100.00% 17 | 18 | Providers | Count | Percentage 19 | :-------- | -----:| ----------: 20 | GitHub | 1988953 | 57.57% 21 | GitLab | 6457 | 0.19% 22 | Bitbucket | 1397 | 0.04% 23 | Others | 28009 | 0.81% 24 | **Total** | 2024816 | 58.61% 25 | 26 | 27 | ## Installation 28 | 29 | ```sh 30 | npm install all-the-package-repos --save 31 | ``` 32 | 33 | ## Usage 34 | 35 | ```js 36 | repos = require('all-the-package-repos') 37 | 38 | repos.express 39 | // https://github.com/expressjs/express 40 | ``` 41 | 42 | See [example.js](example.js) for more usage details. 43 | 44 | GitHub URLs are normalized to their `https` form using 45 | [github-url-to-object](http://ghub.io/github-url-to-object): 46 | 47 | - `git@github.com:foo/bar.git` becomes `https://github.com/foo/bar` 48 | - `foo/bar` becomes `https://github.com/foo/bar` 49 | - [etc...](http://ghub.io/github-url-to-object) 50 | 51 | ### Repository Hostnames 52 | 53 | For the curious, there's a submodule that collects all the hostnames of all the 54 | repository URLS: 55 | 56 | ```js 57 | require('./hostnames').slice(0,10) 58 | 59 | [ 60 | { value: 'github.com', count: 452768 }, 61 | { value: 'bitbucket.org', count: 553 }, 62 | { value: 'git.oschina.net', count: 219 }, 63 | { value: 'gitlab.com', count: 116 }, 64 | { value: 'git.coding.net', count: 114 }, 65 | { value: 'archive.voodoowarez.com', count: 81 }, 66 | { value: 'gitee.com', count: 60 }, 67 | { value: 'gitlab.baidu.com', count: 49 }, 68 | { value: 'git-wip-us.apache.org', count: 38 }, 69 | { value: 'gitlab.alibaba-inc.com', count: 36 } 70 | ] 71 | ``` 72 | 73 | It also has a CLI: 74 | 75 | ```sh 76 | all-the-package-repo-hostnames | head -n 10 77 | 78 | github.com 452768 79 | bitbucket.org 553 80 | git.oschina.net 219 81 | gitlab.com 116 82 | git.coding.net 114 83 | archive.voodoowarez.com 81 84 | gitee.com 60 85 | gitlab.baidu.com 49 86 | git-wip-us.apache.org 38 87 | gitlab.alibaba-inc.com 36 88 | ``` 89 | 90 | ## Tests 91 | 92 | ```sh 93 | npm install 94 | npm test 95 | ``` 96 | 97 | ## Dependencies 98 | 99 | None 100 | 101 | ## Dev Dependencies 102 | 103 | - [all-the-packages](https://github.com/zeke/all-the-packages): All the npm registry metadata as an offline event stream. 104 | - [github-url-to-object](https://github.com/zeke/github-url-to-object): Extract user, repo, and other interesting properties from GitHub URLs 105 | - [object-values](https://github.com/sindresorhus/object-values): Get the values of an object 106 | - [standard](https://github.com/feross/standard): JavaScript Standard Style 107 | - [tap-spec](https://github.com/scottcorgan/tap-spec): Formatted TAP output like Mocha's spec reporter 108 | - [tape](https://github.com/substack/tape): tap-producing test harness for node and browsers 109 | 110 | 111 | ## License 112 | 113 | MIT 114 | 115 | _Generated by [package-json-to-readme](https://github.com/zeke/package-json-to-readme)_ 116 | -------------------------------------------------------------------------------- /scripts/build.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs') 2 | const path = require('path') 3 | const registry = require('package-stream')() 4 | const ora = require('ora') 5 | const spinner = ora('Loading').start() 6 | const parseGitHubUrl = require('github-url-to-object') 7 | const isUrl = require('is-url') 8 | const repos = {} 9 | let totalPackages = 0 10 | 11 | registry 12 | .on('package', (pkg) => { 13 | spinner.text = String(++totalPackages) 14 | if (!pkg || !pkg.name || !pkg.repository) return 15 | 16 | /* uncomment for debug 17 | if (totalPackages > 500 * 1000) { 18 | console.log(pkg.name) 19 | } 20 | // */ 21 | 22 | const repo = (pkg.repository.url) ? pkg.repository.url : pkg.repository 23 | let parsed 24 | 25 | try { 26 | parsed = parseGitHubUrl(repo) 27 | } catch (err) { 28 | console.error('unable to parse GitHub URL', repo) 29 | console.error(err) 30 | } 31 | 32 | if (parsed) { 33 | repos[pkg.name] = parsed.https_url 34 | } else if (isUrl(repo) && repo.startsWith('http')) { 35 | repos[pkg.name] = repo 36 | } 37 | 38 | // uncomment for debugging 39 | // if (totalPackages>1000) return done() 40 | }) 41 | .on('up-to-date', done) 42 | 43 | function done () { 44 | console.log('\ndone!') 45 | 46 | const sorted = {} 47 | const keys = Object.keys(repos).sort() 48 | 49 | for (const key of keys) { 50 | sorted[key] = repos[key] 51 | delete repos[key] 52 | } 53 | 54 | fs.writeFileSync( 55 | path.join(__dirname, '../index.json'), 56 | JSON.stringify(sorted, null, 2) 57 | ) 58 | process.exit() 59 | } 60 | -------------------------------------------------------------------------------- /scripts/stats.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs') 2 | const path = require('path') 3 | 4 | const metadata = require('../data/metadata.json') 5 | const tpl = require('../lib/stats-tpl') 6 | 7 | const output = tpl.build(metadata) 8 | 9 | const readmeFile = path.join(__dirname, '../readme.md') 10 | const readme = fs 11 | .readFileSync(readmeFile, 'utf8') 12 | .replace(tpl.regex, output) 13 | 14 | fs.writeFileSync(readmeFile, readme) 15 | -------------------------------------------------------------------------------- /scripts/update.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs') 2 | const urlParser = require('url') 3 | const path = require('path') 4 | const events = require('events') 5 | const EventEmitter = events.EventEmitter 6 | 7 | const debug = require('debug')('update') 8 | const isUrl = require('is-url') 9 | 10 | const got = require('got').default.extend({ 11 | headers: { 12 | 'npm-replication-opt-in': 'true' // See https://github.com/orgs/community/discussions/152515 13 | }, 14 | timeout: { 15 | request: 60 * 1000 16 | } 17 | }) 18 | 19 | const to = { 20 | github: require('github-url-to-object'), 21 | bitbucket: require('bitbucket-url-to-object') 22 | } 23 | 24 | const replicateUrl = 'https://replicate.npmjs.com/registry' 25 | const registryUrl = 'https://registry.npmjs.org' 26 | 27 | events.setMaxListeners(Infinity) 28 | 29 | /** 30 | * Where the support files are stored 31 | */ 32 | const files = { 33 | packages: path.join(__dirname, '../data/packages.json'), 34 | metadata: path.join(__dirname, '../data/metadata.json') 35 | } 36 | 37 | const packages = new Map(fs.existsSync(files.packages) ? Object.entries(require(files.packages)) : []) 38 | const metadata = fs.existsSync(files.metadata) ? require(files.metadata) : {} 39 | metadata.notFound = new Map(metadata.notFound) 40 | 41 | /** 42 | * State info about the changes being processed 43 | */ 44 | const batch = { 45 | status: 'init', 46 | // How many changes to apply during the process (or `<= 0` to disable) 47 | limit: process.env.BATCH_LIMIT * 1 || 0 48 | } 49 | 50 | /** 51 | * Statistics gathered during the process 52 | */ 53 | const stats = { 54 | // revisions processed 55 | changes: 0, 56 | // operations executed 57 | inserts: 0, 58 | updates: 0, 59 | deletes: 0, 60 | // wrong repo urls 61 | invalid: 0, 62 | // unable to process 63 | ignored: 0, 64 | // 404 response from the registry; most likely deleted in a later change 65 | notFound: 0 66 | } 67 | 68 | /** 69 | * Stats about the repos 70 | */ 71 | const repos = { 72 | unsets: 0, 73 | github: 0, 74 | gitlab: 0, 75 | bitbucket: 0, 76 | others: 0 77 | } 78 | 79 | /** 80 | * Cache changes to disk, for faster historical rebuild 81 | */ 82 | const caches = { 83 | 84 | // Directory to temporarily store changes, or `null` to disable 85 | path: process.env.CACHE_DIR 86 | ? path.resolve(process.env.CACHE_DIR) 87 | : null, 88 | 89 | // How many entries per file 90 | size: 10000, 91 | 92 | // process cached files when starting up 93 | allowRead: process.env.CACHE_READ === '1' || false, 94 | 95 | // write consumed files to the file system 96 | allowWrite: process.env.CACHE_WRITE === '1' || false 97 | } 98 | 99 | /** 100 | * Display update progress animation 101 | */ 102 | const progress = { 103 | // minimum time to shown something (in millis) 104 | delay: 1000 * 60 * 5, 105 | // space between changes, range (0, 100) 106 | steps: 5.0, 107 | // min percent change 108 | scale: 0.01 109 | } 110 | 111 | const millis = 1 112 | const seconds = 1000 * millis 113 | const minutes = 60 * seconds 114 | const hours = 60 * minutes 115 | 116 | /** 117 | * Maximum allowed run time 118 | */ 119 | const killAfter = process.env.KILL_AFTER_MILLIS * 1 || 5.75 * hours 120 | 121 | const setupBatch = async () => { 122 | const remoteMeta = await got(`${replicateUrl}/`).json() 123 | 124 | // 1. was the previous ran an error? 125 | 126 | if (metadata.error) { 127 | const previousLimit = (metadata.batch && 128 | metadata.batch.limit) || 129 | batch.limit 130 | 131 | if (previousLimit > 1) { 132 | // attempt only the problematic change 133 | batch.limit = 1 134 | } else { 135 | // whatever, skip the bad one 136 | metadata.ignored = metadata.ignored || [] 137 | metadata.ignored.push(metadata.last) 138 | metadata.last += 1 139 | stats.ignored += 1 140 | } 141 | } 142 | 143 | // 2. setup batch limits 144 | 145 | batch.latest = remoteMeta.update_seq 146 | batch.index = 147 | batch.since = metadata.last || 0 148 | batch.until = Math.min( 149 | batch.limit > 0 150 | ? batch.since + batch.limit 151 | : Infinity 152 | , 153 | batch.latest 154 | ) 155 | 156 | // 3. prepare batch stats collection 157 | 158 | batch.started = new Date() 159 | batch.finished = false 160 | batch.took_ms = -1 161 | batch.found = 0 162 | 163 | if (!batch.limit || batch.limit <= 0 || !Number.isFinite(batch.limit)) { 164 | batch.limit = batch.until - batch.since 165 | } 166 | 167 | metadata.error = false 168 | } 169 | 170 | /** 171 | * @param {object} object 172 | * @param {number} [spaces] default 2 spaces 173 | * 174 | * @return {string} 175 | */ 176 | const toJson = (object, spaces = 2) => { 177 | return JSON.stringify(object, null, spaces) 178 | } 179 | 180 | /** 181 | * @param {Map} packagesMap 182 | * @return {object} 183 | */ 184 | const toSortedObject = (packagesMap) => { 185 | const sorted = {} 186 | const keys = [...packagesMap.keys()].sort() 187 | 188 | for (const key of keys) { 189 | sorted[key] = packagesMap.get(key) 190 | packagesMap.delete(key) 191 | } 192 | 193 | return sorted 194 | } 195 | 196 | const cache = (change) => { 197 | if (!caches.path || !caches.allowWrite) { 198 | return // cache is disabled 199 | } 200 | 201 | const entry = { 202 | seq: change.seq, 203 | id: change.id 204 | } 205 | 206 | if (change.deleted) { 207 | entry.deleted = true 208 | } else { 209 | entry.doc = { 210 | repository: change.doc && change.doc.repository 211 | } 212 | } 213 | 214 | caches.buffer = caches.buffer || [] 215 | caches.buffer.push(entry) 216 | 217 | if (caches.buffer.length >= caches.size) { 218 | writeCache() 219 | } 220 | } 221 | 222 | /** 223 | * 224 | */ 225 | const apply = (change) => { 226 | batch.index = change.seq 227 | batch.found += 1 228 | 229 | stats.changes += 1 230 | 231 | const name = change.id 232 | const curr = packages.get(name) 233 | 234 | // We got a valid change => delete records of any previous errors for the package. 235 | if (metadata.notFound.has(change.id)) { 236 | stats.notFound -= 1 237 | metadata.notFound.delete(change.id) 238 | debug('deleting previous error for', name, change) 239 | } 240 | 241 | if (change.deleted) { 242 | if (typeof curr !== 'undefined') { 243 | updateRepoStats(curr, -1) 244 | } 245 | 246 | stats.deletes += 1 247 | return packages.delete(name) 248 | } 249 | 250 | const changeUrl = extractUrl(change) 251 | const parsedUrl = parseUrl(changeUrl) || null 252 | 253 | if (changeUrl && !parsedUrl) { 254 | stats.invalid += 1 255 | return 256 | } 257 | 258 | if (typeof curr === 'undefined') { 259 | stats.inserts += 1 260 | } else { 261 | stats.updates += 1 262 | updateRepoStats(curr, -1) 263 | } 264 | 265 | packages.set(name, parsedUrl) 266 | updateRepoStats(parsedUrl, +1) 267 | } 268 | 269 | /** 270 | * @return {string} - repo url 271 | */ 272 | const extractUrl = (change) => { 273 | const repo = change.doc && change.doc.repository 274 | 275 | return typeof repo === 'string' 276 | ? repo 277 | : repo && repo.url 278 | } 279 | 280 | const urlToObject = (parse) => { 281 | return (url) => { 282 | const found = parse(url) 283 | return found && found.https_url 284 | } 285 | } 286 | 287 | const plainUrl = (url) => { 288 | if (isUrl(url) && url.startsWith('http')) { 289 | return url 290 | } 291 | } 292 | 293 | const URL_PARSERS = [ 294 | urlToObject(to.github), 295 | /* FIXME: disabled 296 | urlToObject(to.bitbucket), 297 | // */ 298 | plainUrl 299 | ] 300 | 301 | const parseUrl = (url) => { 302 | if (typeof url !== 'string') { 303 | return 304 | } 305 | 306 | for (const parse of URL_PARSERS) { 307 | try { 308 | const result = parse(url) 309 | if (result) return result 310 | } catch (err) { 311 | continue 312 | } 313 | } 314 | } 315 | 316 | const TYPES = [ 317 | { 318 | name: 'github', 319 | rule: /^github\./ 320 | }, 321 | { 322 | name: 'gitlab', 323 | rule: /^gitlab\./ 324 | }, 325 | { 326 | name: 'bitbucket', 327 | rule: /^bitbucket\./ 328 | } 329 | ] 330 | 331 | const extractType = (url) => { 332 | if (url === null) { 333 | return 'unsets' 334 | } 335 | 336 | const domain = extractDomain(url) 337 | 338 | if (domain) { 339 | for (const type of TYPES) { 340 | if (type.rule.test(domain)) { 341 | return type.name 342 | } 343 | } 344 | } 345 | 346 | return 'others' 347 | } 348 | 349 | const extractDomain = (repoUrl) => { 350 | try { 351 | // eslint-disable-next-line n/no-deprecated-api 352 | const { hostname } = urlParser.parse(repoUrl) 353 | return hostname.replace(/^www\./i, '') 354 | } catch (err) { 355 | // empty 356 | } 357 | } 358 | 359 | const updateRepoStats = (url, delta) => { 360 | const type = extractType(url) 361 | repos[type] = (repos[type] || 0) + Math.sign(delta) 362 | } 363 | 364 | /** 365 | * @return {Error} - or `null` on normal exit 366 | */ 367 | const updateStats = () => { 368 | batch.finished = new Date() 369 | batch.took_ms = batch.finished - batch.started 370 | 371 | const status = buildStatus() 372 | 373 | metadata.packages = packages.size 374 | metadata.last = batch.index || 375 | batch.since 376 | 377 | metadata.latest = batch.latest 378 | 379 | metadata.runs = metadata.runs || {} 380 | metadata.runs.total = (metadata.runs.total || 0) + 1 381 | metadata.runs.status = metadata.runs.status || {} 382 | metadata.runs.status[status] = (metadata.runs.status[status] || 0) + 1 383 | 384 | metadata.repos = metadata.repos || {} 385 | metadata.stats = metadata.stats || {} 386 | metadata.batch = batch 387 | metadata.error = Boolean(batch.error) 388 | 389 | for (const key of Object.keys(stats)) { 390 | metadata.stats[key] = (metadata.stats[key] || 0) + stats[key] 391 | } 392 | 393 | for (const key of Object.keys(repos)) { 394 | metadata.repos[key] = (metadata.repos[key] || 0) + repos[key] 395 | } 396 | 397 | batch.status = status 398 | 399 | const err = batch.error 400 | 401 | delete batch.latest 402 | delete batch.error 403 | 404 | if (err instanceof Error) { 405 | return err 406 | } 407 | } 408 | 409 | const buildStatus = () => { 410 | if (batch.status) return batch.status 411 | if (batch.found === 0) { 412 | batch.error = true 413 | return 'empty' 414 | } 415 | return 'ok' 416 | } 417 | 418 | const printProgress = (() => { 419 | // keeps track of the percent indicator 420 | let next = -1 421 | let time = 0 422 | 423 | // used for scale 424 | const mul = 1 / progress.scale * 100 425 | const div = mul / 100 426 | 427 | return (force) => { 428 | if (next < 0) { 429 | next = 0 430 | time = Date.now() 431 | console.log('applying changes...') 432 | return 433 | } 434 | 435 | const complete = 1 - (batch.until - batch.index) / batch.limit 436 | const percent = Math.round(complete * mul) / div 437 | const elapsed = Date.now() - time 438 | 439 | const hasAdvanced = percent >= next 440 | const hasTimedout = elapsed >= progress.delay 441 | 442 | if (force || hasAdvanced || hasTimedout) { 443 | next = percent + progress.steps 444 | time = Date.now() 445 | console.log('- %d%%', percent) 446 | } 447 | } 448 | })() 449 | 450 | const writeChanges = (deferred) => { 451 | console.log('writting changes...') 452 | 453 | const err = updateStats() 454 | 455 | fs.writeFileSync(files.metadata, toJson({ ...metadata, notFound: Array.from(metadata.notFound) })) 456 | 457 | if (batch.found > 0) { 458 | fs.writeFileSync(files.packages, toJson(toSortedObject(packages))) 459 | } 460 | 461 | writeReadme() 462 | writeCache() 463 | 464 | err 465 | ? deferred.reject(err) 466 | : deferred.resolve() 467 | } 468 | 469 | const writeCache = () => { 470 | if (!caches.path) { 471 | return // cache is disabled 472 | } 473 | 474 | if (!caches.buffer || caches.buffer.length === 0) { 475 | return // empty cache 476 | } 477 | 478 | let next = caches.index || -1 479 | let file 480 | 481 | do { 482 | next += 1 483 | file = path.join(caches.path, `${next}.json`) 484 | } while (fs.existsSync(file)) 485 | 486 | // using plain JSON.stringify to reduce file size 487 | fs.writeFileSync(file, toJson(caches.buffer, '\t')) 488 | 489 | caches.buffer.length = 0 490 | caches.index = next 491 | } 492 | 493 | const writeReadme = () => { 494 | if (!batch.found) { 495 | return // not new changes 496 | } 497 | 498 | const tpl = require('../lib/stats-tpl') 499 | 500 | const readmeFile = path.join(__dirname, '../readme.md') 501 | const readme = fs 502 | .readFileSync(readmeFile, 'utf8') 503 | .replace(tpl.regex, tpl.build(metadata)) 504 | 505 | fs.writeFileSync(readmeFile, readme) 506 | } 507 | 508 | const processCached = () => { 509 | if (!caches.path || !caches.allowRead) { 510 | return // cache is disabled 511 | } 512 | 513 | console.log('reading cache...') 514 | 515 | const changeList = [] 516 | 517 | let files = 0 518 | let changes = 0 519 | 520 | for (const file of fs.readdirSync(caches.path)) { 521 | if (!/^\d+\.json$/.test(file)) { 522 | continue 523 | } 524 | 525 | files += 1 526 | 527 | const entries = require( 528 | path.join(caches.path, file) 529 | ) 530 | 531 | for (const change of entries) { 532 | changes += 1 533 | if (batch.index < change.seq && change.seq <= batch.until) { 534 | changeList.push(change) 535 | } 536 | } 537 | } 538 | 539 | console.log(' -> found %d files', files) 540 | console.log(' -> found %d changes', changes) 541 | 542 | changeList.sort((a, b) => a.seq - b.seq) 543 | 544 | const added = new Map() 545 | 546 | for (const change of changeList) { 547 | if (added.has(change.seq)) { 548 | continue // ups, repeated cache entry 549 | } 550 | 551 | apply(change) 552 | 553 | batch.since = batch.index 554 | added.set(change.seq) 555 | } 556 | 557 | console.log(' -> added %d entries', batch.found) 558 | } 559 | 560 | const wait = (ms) => { 561 | return new Promise(resolve => setTimeout(resolve, ms)) 562 | } 563 | 564 | const backoff = async (retry) => { 565 | const bo = Math.min(Math.pow(retry + 1, 3) * 1000, 60 * 1000) 566 | debug('retrying (', retry, '), waiting for', bo) 567 | await wait(bo) 568 | } 569 | 570 | const asyncQueue = async function * (items, executor, { concurrency }) { 571 | const waiting = [] 572 | 573 | for (const item of items) { 574 | waiting.push(executor(item)) 575 | 576 | if (waiting.length >= concurrency) { 577 | yield await waiting.shift() 578 | } 579 | } 580 | 581 | for (const item of waiting) { 582 | yield await item 583 | } 584 | } 585 | 586 | class Follower extends EventEmitter { 587 | constructor ({ since, limit }) { 588 | super() 589 | this.since = since 590 | this.limit = limit 591 | this.abortController = null 592 | } 593 | 594 | start () { 595 | if (this.abortController) { 596 | return this 597 | } 598 | 599 | this.abortController = new AbortController() 600 | this.startInternal().catch(console.error) 601 | return this 602 | } 603 | 604 | async startInternal () { 605 | const signal = this.abortController.signal 606 | let retry = 0 607 | 608 | while (!signal.aborted && this.limit > 0) { 609 | try { 610 | const body = await got( 611 | `${replicateUrl}/_changes`, 612 | { 613 | timeout: { 614 | request: 5 * 60 * 1000 615 | }, 616 | searchParams: { 617 | since: this.since, 618 | limit: Math.min(this.limit, 10000) 619 | }, 620 | retry: { 621 | limit: 0 622 | }, 623 | signal 624 | } 625 | ).json() 626 | 627 | retry = 0 628 | 629 | if (body.last_seq) { 630 | this.since = body.last_seq 631 | } 632 | 633 | if (body.results) { 634 | const lazyResults = asyncQueue(body.results, (change) => { 635 | // Don't attempt to fetch the doc for deleted packages. 636 | if (change.deleted) { 637 | return change 638 | } 639 | 640 | return got(`${registryUrl}/${change.id}`, { 641 | retry: { 642 | limit: 10 643 | }, 644 | signal 645 | }).json().then((doc) => { 646 | return { ...change, doc } 647 | }).catch((error) => { 648 | return { ...change, error } 649 | }) 650 | }, { concurrency: 40 }) 651 | 652 | // The async generator ensures we process the changes in the right order, 653 | // while making multiple registry fetches in parallel. 654 | for await (const result of lazyResults) { 655 | if (signal.aborted) { 656 | break 657 | } 658 | 659 | if (result.error) { 660 | this.emit('error', result.error, result) 661 | } else { 662 | this.emit('change', result) 663 | } 664 | 665 | this.limit -= 1 666 | } 667 | 668 | debug(`processed ${body.results.length} changes`) 669 | } 670 | } catch (e) { 671 | debug('[error]', e) 672 | 673 | if (!signal.aborted) { 674 | await backoff(++retry) 675 | } 676 | } 677 | } 678 | 679 | if (this.limit <= 0) { 680 | this.emit('catchup') 681 | } 682 | 683 | this.abortController = null 684 | } 685 | 686 | stop () { 687 | this.abortController?.abort() 688 | this.emit('stop') 689 | } 690 | } 691 | 692 | /** 693 | * Main 694 | */ 695 | // eslint-disable-next-line no-unused-expressions 696 | !(async () => { 697 | await setupBatch() 698 | await processCached() 699 | 700 | console.log('batch:') 701 | console.log({ 702 | limit: batch.limit, 703 | since: batch.since, 704 | index: batch.index, 705 | until: batch.until, 706 | started: batch.started 707 | }) 708 | 709 | const request = { 710 | since: batch.since 711 | } 712 | 713 | if (batch.limit > 0 && Number.isFinite(batch.limit)) { 714 | request.limit = batch.limit 715 | } 716 | 717 | const feed = new Follower(request) 718 | 719 | feed.on('change', (change) => { 720 | cache(change) 721 | apply(change) 722 | 723 | printProgress() 724 | 725 | if (batch.index >= batch.until) { 726 | console.log('finish!') 727 | feed.stop() 728 | } 729 | }) 730 | 731 | feed.on('catchup', () => { 732 | console.log('up to date!') 733 | feed.stop() 734 | }) 735 | 736 | feed.on('error', (err, maybeChange) => { 737 | if (err?.response?.statusCode === 404 && maybeChange?.seq && maybeChange?.id) { 738 | const seqs = metadata.notFound.get(maybeChange.id) || [] 739 | 740 | if (!seqs.length) { 741 | stats.notFound += 1 742 | } 743 | 744 | seqs.push(maybeChange.seq) 745 | metadata.notFound.set(maybeChange.id, seqs) 746 | debug(`ignoring 404 for ${maybeChange?.id} (${maybeChange?.seq})`) 747 | return 748 | } 749 | 750 | console.log('error!') 751 | batch.status = 'error' 752 | batch.error = err 753 | feed.stop() 754 | }) 755 | 756 | process.once('SIGTERM', () => { 757 | console.log('cancelled!') 758 | batch.status = 'cancelled' 759 | feed.stop() 760 | }) 761 | 762 | process.once('SIGINT', () => { 763 | console.log('cancelled!') 764 | batch.status = 'cancelled' 765 | feed.stop() 766 | }) 767 | 768 | process.on('SIGUSR1', () => { 769 | const force = true 770 | printProgress(force) 771 | }) 772 | 773 | setTimeout(() => { 774 | console.log('killed!') 775 | batch.error = true 776 | batch.status = 'killed' 777 | feed.stop() 778 | }, killAfter) 779 | 780 | const deferred = {} 781 | 782 | feed.once('stop', () => { 783 | writeChanges(deferred) 784 | }) 785 | 786 | return new Promise((resolve, reject) => { 787 | deferred.resolve = resolve 788 | deferred.reject = reject 789 | feed.start() 790 | }) 791 | })().then( 792 | () => { 793 | console.log(metadata) 794 | process.exit(0) 795 | }, 796 | (err) => { 797 | console.error(err) 798 | process.exit(1) 799 | } 800 | ) 801 | -------------------------------------------------------------------------------- /test/metadata.js: -------------------------------------------------------------------------------- 1 | const describe = require('mocha').describe 2 | const it = require('mocha').it 3 | const expect = require('chai').expect 4 | 5 | const packages = require('../data/packages.json') 6 | const metadata = require('../data/metadata.json') 7 | 8 | const sum = (a, b) => a + b 9 | 10 | describe('metadata', () => { 11 | describe('repos', () => { 12 | it('should match repos', () => { 13 | const real = Object 14 | .keys(packages) 15 | .length 16 | 17 | const repos = Object 18 | .values(metadata.repos) 19 | .reduce(sum, 0) 20 | 21 | expect(repos) 22 | .to.be.equals(real) 23 | }) 24 | 25 | it('should match unsets', () => { 26 | const real = Object 27 | .values(packages) 28 | .filter(url => url === null) 29 | .length 30 | 31 | expect(metadata.repos.unsets || 0) 32 | .to.be.equals(real) 33 | }) 34 | 35 | it('should match urls', () => { 36 | const real = Object 37 | .values(packages) 38 | .filter(Boolean) 39 | .length 40 | 41 | const repos = Object 42 | .values(metadata.repos) 43 | .reduce(sum, 0) 44 | 45 | const others = repos - (metadata.repos.unsets || 0) 46 | 47 | expect(others) 48 | .to.be.equals(real) 49 | }) 50 | }) 51 | 52 | describe('stats', () => { 53 | it('should correctly count changes', () => { 54 | const changes = metadata.stats.inserts + 55 | metadata.stats.updates + 56 | metadata.stats.deletes + 57 | metadata.stats.invalid 58 | 59 | expect(metadata.stats.changes) 60 | .to.be.equals(changes) 61 | }) 62 | }) 63 | }) 64 | -------------------------------------------------------------------------------- /test/repos.js: -------------------------------------------------------------------------------- 1 | const describe = require('mocha').describe 2 | const it = require('mocha').it 3 | const expect = require('chai').expect 4 | 5 | const isUrl = require('is-url') 6 | const repos = require('..') 7 | 8 | describe('repos', () => { 9 | it('is an object with lots of values', () => { 10 | expect(Object.keys(repos).length).to.be.above(455 * 1000) 11 | }) 12 | 13 | it('sets URLs as values', () => { 14 | expect(repos.moby).to.equal('https://github.com/zeke/moby') 15 | }) 16 | 17 | it('sets shorthand GitHub URLS to full URL', () => { 18 | expect(repos.express).to.equal('https://github.com/expressjs/express') 19 | }) 20 | 21 | it('is always a URL or null', function () { 22 | this.timeout(30 * 1000) 23 | const urls = Object.values(repos) 24 | 25 | expect(urls.some(url => isUrl(url)), 'should contain packages with repos').to.equal(true) 26 | expect(urls.some(url => url === null), 'should contain packages without repos').to.equal(true) 27 | }) 28 | 29 | it('includes scoped package names', () => { 30 | expect(repos['@angular/core']).to.equal('https://github.com/angular/angular') 31 | 32 | const scopedNames = Object.keys(repos).filter(name => name.startsWith('@')) 33 | expect(scopedNames.length).to.be.above(32 * 1000) 34 | }) 35 | }) 36 | -------------------------------------------------------------------------------- /test/stats.js: -------------------------------------------------------------------------------- 1 | const describe = require('mocha').describe 2 | const it = require('mocha').it 3 | const expect = require('chai').expect 4 | 5 | describe('stats', () => { 6 | const fs = require('fs') 7 | const path = require('path') 8 | const tpl = require('../lib/stats-tpl') 9 | 10 | it('should match the readme table', () => { 11 | const readme = fs.readFileSync(path.join(__dirname, '../readme.md')) 12 | expect(readme).to.match(tpl.regex) 13 | }) 14 | 15 | it('should build the correct table', () => { 16 | const metadata = { 17 | repos: { 18 | github: 1, 19 | gitlab: 2, 20 | bitbucket: 3, 21 | unsets: 4, 22 | others: 5 23 | } 24 | } 25 | 26 | metadata.packages = Object 27 | .values(metadata.repos) 28 | .reduce((a, b) => a + b, 0) 29 | 30 | const table = tpl.build(metadata) 31 | 32 | expect(table) 33 | .to.be.a('string') 34 | .to.match(tpl.regex) 35 | .to.match(/With repository \| 11 \| 73.33%/) 36 | .to.match(/Null repository \| 4 \| 26.67%/) 37 | .to.match(/\*\*Total\*\* \| 15 \| 100.00%/) 38 | .to.match(/GitHub \| 1 \| 6.67%/) 39 | .to.match(/GitLab \| 2 \| 13.33%/) 40 | .to.match(/Bitbucket \| 3 \| 20.00%/) 41 | .to.match(/Others \| 5 \| 33.33%/) 42 | .to.match(/\*\*Total\*\* \| 11 \| 73.33%/) 43 | }) 44 | }) 45 | --------------------------------------------------------------------------------