├── .vscode ├── settings.json └── launch.json ├── .prettierrc.js ├── .gitignore ├── package.json ├── constants.js ├── ver-pos-os-generator.js ├── position-crawler.js ├── README.md ├── .github └── workflows │ └── ci.yml └── version-position-crawler.js /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "editor.defaultFormatter": "esbenp.prettier-vscode", 3 | "[javascript]": { 4 | "editor.defaultFormatter": "esbenp.prettier-vscode" 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /.prettierrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | // default 3 | printWidth: 80, 4 | useTabs: false, 5 | tabWidth: 2, 6 | bracketSpacing: true, 7 | 8 | // modify 9 | semi: false, 10 | singleQuote: true, 11 | trailingComma: 'es5', 12 | endOfLint: 'lf', 13 | } 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | node_modules 3 | /dist 4 | 5 | 6 | # local env files 7 | .env.local 8 | .env.*.local 9 | 10 | # Log files 11 | npm-debug.log* 12 | yarn-debug.log* 13 | yarn-error.log* 14 | pnpm-debug.log* 15 | 16 | # Editor directories and files 17 | .idea 18 | *.suo 19 | *.ntvs* 20 | *.njsproj 21 | *.sln 22 | *.sw? 23 | 24 | # JSON data 25 | json/all-version.json 26 | json/position 27 | json/ver-pos-os 28 | json/ver-pos-os-link 29 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "download-chromium-old-version", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "version-position-crawler.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "keywords": [], 10 | "author": "", 11 | "license": "ISC", 12 | "dependencies": { 13 | "axios": "^0.21.1", 14 | "cheerio": "^1.0.0-rc.5", 15 | "crawler": "^1.3.0", 16 | "got": "^11.8.1", 17 | "html-dom-parser": "^1.0.0", 18 | "stdio": "^2.1.1" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "type": "node", 9 | "request": "launch", 10 | "name": "Launch version-position-crawler.js", 11 | "skipFiles": ["/**"], 12 | "program": "${workspaceFolder}/version-position-crawler.js", 13 | "args": ["-m", "inc"] 14 | }, 15 | { 16 | "type": "node", 17 | "request": "launch", 18 | "name": "Launch position-crawler.js", 19 | "skipFiles": ["/**"], 20 | "program": "${workspaceFolder}/position-crawler.js" 21 | }, 22 | { 23 | "type": "node", 24 | "request": "launch", 25 | "name": "Launch os-ver-pos-generator", 26 | "skipFiles": ["/**"], 27 | "program": "${workspaceFolder}/os-ver-pos-generator.js" 28 | } 29 | ] 30 | } 31 | -------------------------------------------------------------------------------- /constants.js: -------------------------------------------------------------------------------- 1 | exports.OSList = [ 2 | 'Mac', 3 | 'Mac_Arm', 4 | 'Win_x64', 5 | 'Win', 6 | 'Linux_x64', 7 | 'Linux', 8 | 'Android', 9 | // 'Arm', 10 | ] 11 | 12 | exports.DownloadUrl = 13 | 'https://commondatastorage.googleapis.com/chromium-browser-snapshots/index.html?prefix=' 14 | 15 | exports.VersionUrl = 'https://chromium.googlesource.com/chromium/src/+refs' 16 | 17 | exports.VersionPositionUrl = 'https://omahaproxy.appspot.com/deps.json?version=' 18 | 19 | exports.PosReplaceExample = 'prefix=Mac' 20 | exports.PosReplaceStr = 'prefix=' 21 | exports.PosQueryPageToken = 'pageToken=' 22 | 23 | exports.PositionUrl = 24 | 'https://www.googleapis.com/storage/v1/b/chromium-browser-snapshots/o?delimiter=/&prefix=Mac/&fields=items(kind,mediaLink,metadata,name,size,updated),kind,prefixes,nextPageToken' 25 | 26 | exports.PosRegex = /^[0-9]+$/ 27 | exports.VerRegex = /^[0-9][0-9.]*[0-9]$/ 28 | 29 | const FileName = { 30 | allVersion: 'all-version.json', 31 | versionPosition: 'version-position.json', 32 | positionPrefix: 'position-', 33 | osVerPosPrefix: 'version-position-', 34 | osVerPosLinkPrefix: 'version-position-link-', 35 | } 36 | exports.FileName = FileName 37 | 38 | exports.getPosOsJson = (os) => { 39 | return `${FileName.positionPrefix}${os}.json` 40 | } 41 | exports.getVerPosOsJson = (os) => { 42 | return `${FileName.osVerPosPrefix}${os}.json` 43 | } 44 | exports.getVerPosLinkOsJson = (os) => { 45 | return `${FileName.osVerPosLinkPrefix}${os}.json` 46 | } 47 | 48 | exports.Dir = { 49 | base: 'json', 50 | verPosOs: 'ver-pos-os', 51 | verPosOSLink: 'ver-pos-os-link', 52 | position: 'position', 53 | } 54 | -------------------------------------------------------------------------------- /ver-pos-os-generator.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs') 2 | const path = require('path') 3 | 4 | const { 5 | OSList, 6 | PosRegex, 7 | getVerPosOsJson, 8 | getVerPosLinkOsJson, 9 | getPosOsJson, 10 | Dir, 11 | FileName, 12 | DownloadUrl, 13 | } = require('./constants') 14 | 15 | const osVerPosDir = path.join(Dir.base, Dir.verPosOs) 16 | 17 | main() 18 | 19 | function main() { 20 | let VerPosMap = fs.readFileSync( 21 | path.join(Dir.base, FileName.versionPosition), 22 | 'utf8' 23 | ) 24 | VerPosMap = JSON.parse(VerPosMap) 25 | Object.keys(VerPosMap).forEach((ver) => { 26 | const pos = VerPosMap[ver] 27 | // remove invalid format 28 | if (!PosRegex.test(pos)) { 29 | delete VerPosMap[ver] 30 | } 31 | }) 32 | 33 | OSList.forEach((os) => { 34 | let PosArr = fs.readFileSync( 35 | path.join(Dir.base, Dir.position, getPosOsJson(os)), 36 | 'utf8' 37 | ) 38 | PosArr = JSON.parse(PosArr) 39 | let posMap = {} 40 | PosArr.filter((pos) => PosRegex.test(pos)).forEach((pos) => { 41 | posMap[pos] = pos 42 | }) 43 | 44 | let verPosMap = {} 45 | Object.keys(VerPosMap).forEach((ver) => { 46 | const pos = VerPosMap[ver] 47 | if (posMap[pos]) { 48 | verPosMap[ver] = pos 49 | } 50 | }) 51 | if (!fs.existsSync(osVerPosDir)) { 52 | fs.mkdirSync(osVerPosDir, { recursive: true }) 53 | } 54 | 55 | fs.writeFileSync( 56 | path.join(osVerPosDir, getVerPosOsJson(os)), 57 | JSON.stringify(verPosMap, null, 2) 58 | ) 59 | 60 | const verPosLinkMap = Object.keys(verPosMap).reduce((all, item) => { 61 | const pos = verPosMap[item] 62 | all[item] = `${DownloadUrl}${os}/${pos}/` 63 | return all 64 | }, {}) 65 | const osVerPosLinkDir = path.join(Dir.base, Dir.verPosOSLink) 66 | if (!fs.existsSync(osVerPosLinkDir)) { 67 | fs.mkdirSync(osVerPosLinkDir, { recursive: true }) 68 | } 69 | fs.writeFileSync( 70 | path.join(osVerPosLinkDir, getVerPosLinkOsJson(os)), 71 | JSON.stringify(verPosLinkMap, null, 2) 72 | ) 73 | 74 | console.log(`${os} finish all -----------------------------`) 75 | }) 76 | } 77 | -------------------------------------------------------------------------------- /position-crawler.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs') 2 | const path = require('path') 3 | const got = require('got') 4 | 5 | const { 6 | OSList, 7 | PosReplaceExample, 8 | PosReplaceStr, 9 | PosQueryPageToken, 10 | getPosOsJson, 11 | PositionUrl, 12 | PosRegex, 13 | Dir, 14 | } = require('./constants') 15 | 16 | main() 17 | 18 | // about sort: https://stackoverflow.com/a/38641281/2752670 19 | const collator = new Intl.Collator(undefined, { 20 | numeric: true, 21 | sensitivity: 'base', 22 | }) 23 | 24 | async function main() { 25 | let promiseAll = [] 26 | OSList.forEach((os) => { 27 | promiseAll.push(doIt(os)) 28 | }) 29 | await Promise.all(promiseAll) 30 | // for (let i = 0; i < 1; i++) { 31 | // await doIt('Mac') 32 | // } 33 | } 34 | 35 | async function doIt(os, posArr, pageToken) { 36 | try { 37 | if (!posArr) { 38 | posArr = [] 39 | } 40 | let url = PositionUrl.replace(PosReplaceExample, `${PosReplaceStr}${os}`) 41 | if (pageToken) { 42 | url = `${url}&${PosQueryPageToken}${pageToken}` 43 | } 44 | const resp = await got(url) 45 | const json = JSON.parse(resp.body) 46 | if (!json.prefixes) { 47 | console.log(`${os} no prefixes`) 48 | return 49 | } 50 | json.prefixes.forEach((item) => { 51 | const arr = item.split('/') 52 | if (arr.length < 2) { 53 | console.log(`${os} position not correct: ${item}`) 54 | return 55 | } 56 | const posStr = arr[1] 57 | if (!PosRegex.test(posStr)) { 58 | console.log(`${os} position not correct: ${item}`) 59 | return 60 | } 61 | posArr.push(posStr) 62 | }) 63 | 64 | // console.log(url) 65 | console.log(`${os} success count: ${posArr.length}`) 66 | console.log('-------------------------------') 67 | 68 | if (json.nextPageToken) { 69 | return doIt(os, posArr, json.nextPageToken) 70 | } else { 71 | posArr.sort((a, b) => collator.compare(a, b)) 72 | const posDir = path.join(Dir.base, Dir.position) 73 | if (!fs.existsSync(posDir)) { 74 | fs.mkdirSync(posDir, { recursive: true }) 75 | } 76 | 77 | fs.writeFileSync( 78 | path.join(posDir, getPosOsJson(os)), 79 | JSON.stringify(posArr, null, 2) 80 | ) 81 | console.log(`${os} finish all -----------------------------`) 82 | return posArr 83 | } 84 | } catch (e) { 85 | console.log(e) 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # chromium-history-version-crawler 2 | 3 | Crawlers to produce Chromium versions mapping to `chromium_base_position`. 4 | 5 | Result JSON data: https://github.com/vikyd/chromium-history-version-position 6 | 7 | View page: https://vikyd.github.io/download-chromium-history-version/ 8 | 9 | [TOC] 10 | 11 | # Steps 12 | 13 | All output json files locate in `json` folder. 14 | 15 | ## Step1 16 | 17 | Find all available Chromiunm versions, then find the `chromium_base_position` of each version. And genrate: `all-version.json`, `version-position.json` . 18 | 19 | ```sh 20 | # doc: `node version-position-crawler.js -h` 21 | # `all mode` or `incremental mode` 22 | node version-position-crawler.js 23 | ``` 24 | 25 | `all-version.json`: 26 | 27 | ```json 28 | [ 29 | "90.0.4399.1", 30 | "90.0.4399.0", 31 | "90.0.4398.1", 32 | "90.0.4398.0" 33 | // ... 34 | ] 35 | ``` 36 | 37 | `version-position.json`: 38 | 39 | ```json 40 | { 41 | "90.0.4399.1": "846615", 42 | "90.0.4399.0": "846615", 43 | "90.0.4398.1": "846545", 44 | "90.0.4398.0": "846545" 45 | // ... 46 | } 47 | ``` 48 | 49 | ## Step2 50 | 51 | Find all available Chromiunm `chromium_base_position` of each OS. And generate: `position/position-Mac.json` etc. 52 | 53 | ```sh 54 | node position-crawler.js 55 | # about 90 seconds 56 | ``` 57 | 58 | `position-Mac.json`: 59 | 60 | ```json 61 | [ 62 | "15734", 63 | "15749", 64 | "15839", 65 | "15942" 66 | // ... 67 | ] 68 | ``` 69 | 70 | ## Step3 71 | 72 | Compare `version-position.json` and each `position/position-os.json`, extract the intersection of the `chromium_base_position`, generate the final json files: `version-position-Mac.json` etc. 73 | 74 | ```sh 75 | node ver-pos-os-generator.js 76 | ``` 77 | 78 | `Mac-ver-pos.json`: 79 | 80 | ```json 81 | { 82 | "90.0.4398.1": "846545", 83 | "90.0.4398.0": "846545", 84 | "90.0.4396.2": "845872", 85 | "90.0.4396.1": "845872" 86 | // ... 87 | } 88 | ``` 89 | 90 | # json steps 91 | 92 | ``` 93 | all-version.json -> version-position.json -> 94 | -> ver-pos-[os].json 95 | position-[os].json -> 96 | ``` 97 | 98 | # Full mode 99 | 100 | It is hard to fetch all position successfully at one loop. 101 | 102 | If you really want a full fetch, you may need to modify parameters in the js file. 103 | 104 | # Missing data 105 | 106 | Because of some problems of the position api: 107 | 108 | - data OK: https://omahaproxy.appspot.com/deps.json?version=87.0.4253.0 109 | - no data: https://omahaproxy.appspot.com/deps.json?version=33.0.1733.0 110 | 111 | Most version before v38.0.0.0 is missing. 112 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: 'Crawl and deploy' 2 | 3 | on: 4 | schedule: 5 | # - cron: '0 0,4,8,12,16,20 * * *' 6 | - cron: '0 10 1 * *' 7 | push: 8 | paths-ignore: 9 | - 'json/version-position.json' 10 | branches: 11 | - master 12 | jobs: 13 | crawler: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Checkout 17 | uses: actions/checkout@v3 18 | - uses: actions/setup-node@v3 19 | with: 20 | node-version: 20 21 | - name: Install 22 | run: npm install 23 | - name: 'Get position by version' 24 | run: node version-position-crawler.js 25 | - name: 'Get position list' 26 | run: node position-crawler.js 27 | - name: 'Generate Finial JSON' 28 | run: node ver-pos-os-generator.js 29 | - uses: stefanzweifel/git-auto-commit-action@v4 30 | with: 31 | file_pattern: json/version-position.json 32 | commit_message: 'feat: update JSON file' 33 | commit_user_name: vikyd 34 | commit_user_email: zwj_fhys@qq.com 35 | - name: 'Push to JSON repo' 36 | uses: crykn/copy_folder_to_another_repo_action@v1.0.6 37 | env: 38 | API_TOKEN_GITHUB: ${{ secrets.ACCESS_TOKEN }} 39 | with: 40 | source_folder: 'json' 41 | destination_repo: 'vikyd/chromium-history-version-position' 42 | destination_folder: 'json' 43 | user_email: 'zwj_fhys@qq.com' 44 | user_name: 'vikyd' 45 | commit_msg: 'feat: update JSON data' 46 | - name: 'Push to frontend repo' 47 | uses: crykn/copy_folder_to_another_repo_action@v1.0.6 48 | env: 49 | API_TOKEN_GITHUB: ${{ secrets.ACCESS_TOKEN }} 50 | with: 51 | source_folder: 'json/ver-pos-os' 52 | destination_repo: 'vikyd/download-chromium-history-version-src' 53 | destination_folder: 'public/json/ver-pos-os' 54 | user_email: 'zwj_fhys@qq.com' 55 | user_name: 'vikyd' 56 | commit_msg: 'feat: update JSON data' 57 | frontend: 58 | needs: crawler 59 | runs-on: ubuntu-latest 60 | steps: 61 | - name: Checkout 62 | uses: actions/checkout@v3 63 | with: 64 | repository: 'vikyd/download-chromium-history-version-src' 65 | - uses: actions/setup-node@v3 66 | with: 67 | node-version: 20 68 | - name: Install 69 | run: npm install 70 | - name: Build 71 | run: npm run build 72 | - name: 'Get not modify from github.io repo' 73 | run: curl https://raw.githubusercontent.com/vikyd/download-chromium-history-version/master/README.md --output dist/README.md 74 | - name: 'Push to github.io repo' 75 | uses: cpina/github-action-push-to-another-repository@main 76 | env: 77 | API_TOKEN_GITHUB: ${{ secrets.ACCESS_TOKEN }} 78 | with: 79 | source-directory: 'dist' 80 | destination-repository-name: 'download-chromium-history-version' 81 | target-branch: 'master' 82 | destination-github-username: 'vikyd' 83 | user-email: 'zwj_fhys@qq.com' 84 | commit-message: 'feat: update JSON data' 85 | 86 | 87 | -------------------------------------------------------------------------------- /version-position-crawler.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs') 2 | const path = require('path') 3 | const cheerio = require('cheerio') 4 | const got = require('got') 5 | const Crawler = require('crawler') 6 | const { getopt } = require('stdio') 7 | const { 8 | VersionUrl, 9 | VersionPositionUrl, 10 | FileName, 11 | Dir, 12 | VerRegex, 13 | } = require('./constants') 14 | 15 | const Modes = { 16 | inc: 'inc', 17 | all: 'all', 18 | } 19 | 20 | const AllDefaultOpts = { 21 | mode: Modes.inc, 22 | maxConnections: 10, 23 | loopTimes: 10, 24 | loopInterval: 5, 25 | } 26 | 27 | const IncDefaultOpts = { 28 | mode: Modes.inc, 29 | maxConnections: 3, 30 | loopTimes: 3, 31 | loopInterval: 3, 32 | } 33 | 34 | const receivedOpts = getopt({ 35 | mode: { 36 | key: 'm', 37 | description: 38 | '`all`: first crawl positions of all version, or `inc`: incremental mode(much less request and less error)', 39 | default: Modes.inc, 40 | }, 41 | maxConnections: { 42 | key: 'c', 43 | description: 44 | 'max connections of crawling positions, default `all`: 10, `inc`: 3', 45 | }, 46 | loopTimes: { 47 | key: 'l', 48 | description: 49 | 'times to loop all needed fetch versions, default `all`: 10, `inc`: 3', 50 | }, 51 | loopInterval: { 52 | key: 'i', 53 | description: 'seconds between each loop, default `all`: 5, `inc`: 3', 54 | }, 55 | beginVerIndex: { 56 | key: 'b', 57 | default: 0, 58 | }, 59 | saveStepSize: { 60 | key: 's', 61 | description: 62 | 'used for `all` only, save to file after this count of request', 63 | default: 1000, 64 | }, 65 | }) 66 | 67 | let opts = 68 | receivedOpts.mode === Modes.all 69 | ? { ...AllDefaultOpts } 70 | : { ...IncDefaultOpts } 71 | opts = { ...opts, ...receivedOpts } 72 | 73 | // entry 74 | main() 75 | 76 | async function main() { 77 | if (!fs.existsSync(Dir.base)) { 78 | fs.mkdirSync(Dir.base, { recursive: true }) 79 | } 80 | if (opts.mode === Modes.all) { 81 | console.log('mode: all') 82 | await mainFirstFull() 83 | } else { 84 | console.log('mode: increment') 85 | await mainIncreace() 86 | } 87 | console.log('all finished --------------------') 88 | } 89 | 90 | // ------------------------- 91 | 92 | async function mainIncreace() { 93 | for (let i = 1; i <= opts.loopTimes; i++) { 94 | console.log(`round ${i} begin ...`) 95 | try { 96 | const isFinish = await doIt(opts.beginVerIndex) 97 | if (isFinish) { 98 | break 99 | } 100 | } catch (e) { 101 | console.error(e) 102 | } 103 | await sleep(opts.loopInterval * 1000) 104 | console.error(`round ${i} end ---------------`) 105 | } 106 | } 107 | 108 | async function mainFirstFull() { 109 | for (let i = 1; i <= opts.loopTimes; i++) { 110 | console.log(`round ${i} begin ...`) 111 | const MaxVerCount = 30000 112 | const halfStep = Math.floor(opts.saveStepSize / 2) 113 | for (let j = opts.beginVerIndex; j < MaxVerCount; j += halfStep) { 114 | try { 115 | await doIt(j) 116 | } catch (e) { 117 | console.error(e) 118 | } 119 | } 120 | await sleep(opts.loopInterval * 1000) 121 | console.error(`round ${i} end ---------------`) 122 | } 123 | } 124 | 125 | function sleep(ms) { 126 | return new Promise((resolve) => { 127 | setTimeout(resolve, ms) 128 | }) 129 | } 130 | 131 | // about sort: https://stackoverflow.com/a/38641281/2752670 132 | const collator = new Intl.Collator(undefined, { 133 | numeric: true, 134 | sensitivity: 'base', 135 | }) 136 | 137 | async function doIt(beginIdx) { 138 | const resp = await got(VersionUrl) 139 | const $ = cheerio.load(resp.body) 140 | const $titles = $('.RefList-title') 141 | let $RefListItems = [] 142 | $titles.each(function (i, item) { 143 | const $item = $(this) 144 | const text = $item.text() 145 | console.log(`version page: ${text}`) 146 | if (text === 'Tags') { 147 | $RefListItems = $item.next().children('li') 148 | } 149 | }) 150 | if ($RefListItems.length === 0) { 151 | console.error('no version data') 152 | return false 153 | } 154 | console.log(`version count raw: ${$RefListItems.length}`) 155 | 156 | let versions = [] 157 | $RefListItems.each(function (i, item) { 158 | const $item = $(this) 159 | const ver = $item.text() 160 | const v = ver.trim() 161 | if (VerRegex.test(v)) { 162 | versions.push(ver.trim()) 163 | } else { 164 | console.log(`invalid version: ${ver}`) 165 | } 166 | }) 167 | console.log(`version count valid: ${versions.length}`) 168 | versions.sort((a, b) => collator.compare(b, a)) 169 | fs.writeFileSync( 170 | path.join(Dir.base, FileName.allVersion), 171 | JSON.stringify(versions, null, 2) 172 | ) 173 | 174 | // ---- begin to find position --------------- 175 | let verPosMap = {} 176 | const crawler = new Crawler({ 177 | jQuery: false, 178 | maxConnections: 3, 179 | // This will be called for each crawled data 180 | callback: function (error, res, done) { 181 | if (error) { 182 | console.log(error) 183 | done() 184 | return 185 | } 186 | let $ = res.$ 187 | let bodyObj = '' 188 | const v = res.options.myData 189 | if (res.body.includes('Traceback')) { 190 | console.log(`----- ${v} ---- ${res.body} ----`) 191 | verPosMap[v] = 'ErrorWithTraceback' 192 | done() 193 | return 194 | } 195 | try { 196 | bodyObj = JSON.parse(res.body) 197 | } catch (e) { 198 | console.log(`----- ${v} ---- ${res.body} ----`) 199 | done() 200 | return 201 | } 202 | const pos = bodyObj.chromium_base_position 203 | if (!pos) { 204 | // `null` pos will also be recorded 205 | // `null` pos is not changed after any times of request 206 | console.log(`version ${v} position ${pos}`) 207 | } else { 208 | console.log(`version ${v} match position ${pos}`) 209 | } 210 | verPosMap[v] = pos 211 | done() 212 | }, 213 | }) 214 | 215 | // read cached json, can avoid lots of request 216 | const oldVerPosStr = fs.readFileSync( 217 | path.join(Dir.base, FileName.versionPosition), 218 | 'utf8' 219 | ) 220 | const oldVerPosMap = JSON.parse(oldVerPosStr) 221 | versions = versions.filter((v) => !oldVerPosMap.hasOwnProperty(v)) 222 | 223 | const needToFetchCount = versions.length 224 | console.log(`need to fetch count: ${needToFetchCount}`) 225 | 226 | if (versions.length === 0) { 227 | return true 228 | } 229 | 230 | let count = 0 231 | for (let i = beginIdx; i < versions.length; i++) { 232 | const v = versions[i] 233 | if (i === 1) { 234 | console.log(`first fetch version: ${v}`) 235 | } 236 | const url = `${VersionPositionUrl}${v}` 237 | crawler.queue({ uri: url, myData: v }) 238 | count++ 239 | if (opts.mode === Modes.all) { 240 | // each step size save to file, in case of loss of crawled data 241 | if (count >= opts.saveStepSize) { 242 | break 243 | } 244 | } 245 | } 246 | 247 | return await new Promise((resolve, reject) => { 248 | crawler.on('drain', function () { 249 | console.log('crawler drain event') 250 | let allVerPosMap = Object.assign({}, oldVerPosMap, verPosMap) 251 | const successCount = 252 | Object.keys(allVerPosMap).length - Object.keys(oldVerPosMap).length 253 | console.log(`success count: ${successCount}`) 254 | 255 | // sort keys 256 | allVerPosMap = Object.entries(allVerPosMap) 257 | .sort((a, b) => collator.compare(b, a)) 258 | .reduce((o, [k, v]) => ((o[k] = v), o), {}) 259 | fs.writeFileSync( 260 | path.join(Dir.base, FileName.versionPosition), 261 | JSON.stringify(allVerPosMap, null, 2) 262 | ) 263 | 264 | const FName = __filename.slice(__dirname.length + 1) 265 | console.log(`${FName}: finish`) 266 | 267 | // remember to resolve 268 | resolve(successCount === needToFetchCount) 269 | }) 270 | }) 271 | } 272 | --------------------------------------------------------------------------------