├── .dockerignore ├── .env.example ├── .eslintignore ├── .eslintrc.js ├── .github └── workflows │ ├── api-control.yml │ ├── publish.yml │ ├── renovate.yml │ └── validate.yml ├── .gitignore ├── .husky ├── .gitignore └── pre-commit ├── .nvmrc ├── .prettierrc ├── .renovaterc.json ├── .yarn ├── plugins │ └── @yarnpkg │ │ └── plugin-workspace-tools.cjs └── releases │ └── yarn-3.2.2.cjs ├── .yarnrc.yml ├── CHANGELOG.md ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── Procfile ├── README.md ├── algolia.png ├── elastic-apm-node.js ├── jest.config.js ├── jsdelivr.png ├── package.json ├── release.config.js ├── scripts ├── publish-check.mjs ├── publish-docker ├── publish-github └── test-api-control ├── src ├── @types │ ├── nice-package.ts │ └── pkg.ts ├── StateManager.ts ├── __tests__ │ ├── StateManager.test.ts │ ├── __snapshots__ │ │ └── formatPkg.test.ts.snap │ ├── api-control │ │ └── npm.test.ts │ ├── bootstrap.test.ts │ ├── changelog.test.ts │ ├── config.test.ts │ ├── formatPkg.test.ts │ ├── preact-simplified.ts │ ├── rawPackages.ts │ └── saveDocs.test.ts ├── algolia │ └── index.ts ├── api.ts ├── bootstrap.ts ├── changelog.ts ├── config.ts ├── errors.ts ├── formatPkg.ts ├── index.ts ├── indexers │ ├── Indexer.ts │ ├── MainBootstrapIndexer.ts │ ├── MainIndexer.ts │ ├── MainWatchIndexer.ts │ ├── OneTimeBackgroundIndexer.ts │ └── PeriodicBackgroundIndexer.ts ├── jsDelivr │ ├── __test__ │ │ ├── __snapshots__ │ │ │ └── index.test.ts.snap │ │ ├── index.test.ts │ │ └── pkgTypes.test.ts │ ├── index.ts │ └── pkgTypes.ts ├── npm │ ├── ChangesReader.ts │ ├── Prefetcher.ts │ ├── __tests__ │ │ └── index.test.ts │ ├── index.ts │ └── types.ts ├── saveDocs.ts ├── typescript │ ├── index.test.ts │ └── index.ts ├── utils │ ├── MetricCollector.ts │ ├── datadog.ts │ ├── log.ts │ ├── request.ts │ ├── sentry.ts │ ├── time.ts │ └── wait.ts └── watch.ts ├── tsconfig.json └── yarn.lock /.dockerignore: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | **/node_modules 3 | 4 | # Useless and heavy folders 5 | **/dist 6 | coverage/ 7 | junit/ 8 | 9 | # Logs 10 | **/*.log 11 | **/.env* 12 | 13 | # Other useless files in the image 14 | cypress/ 15 | .git/ 16 | .github/ 17 | .githooks/ 18 | .husky/ 19 | .nodemon.json 20 | .editorconfig 21 | .gitattributes 22 | .coveralls.yml 23 | .prettierignore 24 | .prettierrc.js 25 | .eslintrc.js 26 | .nvmrc 27 | .npmrc 28 | .eslintignore 29 | .eslinrcjs 30 | .tern-project 31 | cypress.json 32 | cloudbuild.yaml 33 | docker-compose.yml 34 | MAINTAINERS.md 35 | README.md 36 | CHANGELOG.md 37 | CONTRIBUTING.md 38 | **/*.test.ts 39 | **/*.test.tsx 40 | **/*.test.js 41 | **/*.stories.tsx 42 | **/*.spec.ts 43 | **/*.spec.js 44 | **/*.perf.ts 45 | package-lock.json 46 | renovate.json 47 | **/jest* 48 | **/.DS_Store 49 | .vscode 50 | **/.storybook/ 51 | **/__fixtures__/ 52 | **/__snapshots__/ 53 | **/__mocks__/ 54 | **/__mock__/ 55 | **/__tests__/ 56 | **/tsconfig.tsbuildinfo 57 | Procfile 58 | release.config.js 59 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | appId= 2 | apiKey= 3 | indexName=npm-search 4 | bootstrapIndexName=npm-search-bootstrap 5 | DOGSTATSD_HOST="localhost" 6 | SENTRY_DSN= 7 | -------------------------------------------------------------------------------- /.eslintignore: -------------------------------------------------------------------------------- 1 | lib/ 2 | dist/ 3 | coverage/ 4 | -------------------------------------------------------------------------------- /.eslintrc.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable import/no-commonjs */ 2 | 3 | /** 4 | * @type {import('eslint').Linter.Config} 5 | */ 6 | const config = { 7 | extends: ['algolia', 'algolia/jest'], 8 | rules: { 9 | 'no-continue': 'off', 10 | 'valid-jsdoc': 'off', 11 | 'require-await': 'off', 12 | }, 13 | overrides: [ 14 | { 15 | files: ['**/*.ts'], 16 | extends: ['algolia/typescript'], 17 | parser: '@typescript-eslint/parser', 18 | parserOptions: { 19 | sourceType: 'module', 20 | project: './tsconfig.json', 21 | }, 22 | rules: { 23 | 'consistent-return': 'off', 24 | 'no-dupe-class-members': 'off', 25 | 'import/extensions': [ 26 | 'error', 27 | { 28 | ignorePackages: true, 29 | pattern: { 30 | js: 'always', 31 | ts: 'never', 32 | }, 33 | }, 34 | ], 35 | }, 36 | }, 37 | ], 38 | }; 39 | 40 | module.exports = config; 41 | -------------------------------------------------------------------------------- /.github/workflows/api-control.yml: -------------------------------------------------------------------------------- 1 | name: API CONTROL 2 | on: 3 | schedule: 4 | # Everyday, at 10am 5 | - cron: '0 10 * * *' 6 | 7 | jobs: 8 | api-control: 9 | name: 🛂 API Control 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: 📁 Checkout code 13 | uses: actions/checkout@v4 14 | 15 | - name: ⚙️ Setup node 16 | uses: actions/setup-node@v3 17 | with: 18 | node-version: '18.18' 19 | cache: 'yarn' 20 | 21 | - name: 📦 Install dependencies 22 | run: yarn install --frozen-lockfile 23 | 24 | - name: 🛂 API Control 25 | run: GITHUB_RUN_ID="${{ github.run_id }}" yarn test:api-control 26 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: PUBLISH 2 | run-name: 🚀 Commit on ${{ github.ref_name }} 3 | 4 | on: 5 | push: 6 | branches: 7 | - master 8 | 9 | jobs: 10 | validate: 11 | name: ↪️ Validate 12 | uses: ./.github/workflows/validate.yml 13 | publish: 14 | name: 📦 Publish 15 | needs: validate 16 | runs-on: ubuntu-latest 17 | permissions: 18 | actions: write # To cancel the run 19 | contents: write # To add a new tag and push a new release 20 | packages: write # To add a new Docker package 21 | env: 22 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 23 | RUN_ID: ${{ github.run_id }} 24 | 25 | steps: 26 | - name: 📁 Checkout code 27 | uses: actions/checkout@v4 28 | 29 | - name: ⚙️ Setup node 30 | uses: actions/setup-node@v3 31 | with: 32 | node-version: '18.18' 33 | cache: 'yarn' 34 | 35 | - name: 📦 Install dependencies 36 | run: yarn install --frozen-lockfile 37 | 38 | - name: ❓ Check if a new version needs to be published 39 | id: publish-check 40 | # Note: We can't do: 41 | # echo "SHOULD_PUBLISH=$(yarn publish:check)" >> "$GITHUB_OUTPUT" 42 | # Because the echo command will always be considered a success, even if 43 | # the $(yarn publish:check) fails. This is why we need an intermediate 44 | # variable. 45 | run: > 46 | SHOULD_PUBLISH="$(yarn publish:check)" 47 | && echo "SHOULD_PUBLISH=$SHOULD_PUBLISH" >> "$GITHUB_OUTPUT" 48 | 49 | - name: ⏹️ Cancel run 50 | if: ${{ steps.publish-check.outputs.SHOULD_PUBLISH == 'no' }} 51 | run: gh run cancel "$RUN_ID" 52 | 53 | - name: 🆙 Bump version, tag commit, publish GitHub Release 54 | if: ${{ steps.publish-check.outputs.SHOULD_PUBLISH == 'yes' }} 55 | run: yarn publish:github 56 | 57 | - name: 🐋 Publish Docker image 58 | if: ${{ steps.publish-check.outputs.SHOULD_PUBLISH == 'yes' }} 59 | run: yarn publish:docker 60 | -------------------------------------------------------------------------------- /.github/workflows/renovate.yml: -------------------------------------------------------------------------------- 1 | name: RENOVATE 2 | on: 3 | schedule: 4 | - cron: '0 14 * * 4' 5 | workflow_dispatch: 6 | 7 | jobs: 8 | test: 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - name: Checkout Repo 13 | uses: actions/checkout@v3 14 | 15 | - name: Renovate Automatic Branch 16 | uses: bodinsamuel/renovate-automatic-branch@v1 17 | with: 18 | github-token: ${{ secrets.GITHUB_TOKEN }} 19 | repo-owner: algolia 20 | repo-name: npm-search 21 | branch-base: master 22 | -------------------------------------------------------------------------------- /.github/workflows/validate.yml: -------------------------------------------------------------------------------- 1 | name: VALIDATE 2 | run-name: 🤖 Validating code on ${{ github.ref_name }} 3 | 4 | on: 5 | pull_request: 6 | workflow_call: 7 | 8 | jobs: 9 | validate: 10 | name: 🤖 Validate 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: 📁 Checkout code 14 | uses: actions/checkout@v4 15 | 16 | - name: ⚙️ Setup node 17 | uses: actions/setup-node@v3 18 | with: 19 | node-version: '18.18' 20 | cache: 'yarn' 21 | 22 | - name: 📦 Install dependencies 23 | run: yarn install --frozen-lockfile 24 | 25 | - name: 📝 Lint 26 | run: yarn lint 27 | 28 | - name: 🏗️ Build 29 | run: yarn build 30 | 31 | - name: 🚦 Test 32 | run: yarn test 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | coverage/ 2 | node_modules/ 3 | lib/ 4 | .env 5 | junit/ 6 | dist/ 7 | 8 | *.log 9 | 10 | # https://yarnpkg.com/getting-started/qa#which-files-should-be-gitignored 11 | .yarn/* 12 | !.yarn/releases 13 | !.yarn/plugins 14 | -------------------------------------------------------------------------------- /.husky/.gitignore: -------------------------------------------------------------------------------- 1 | _ 2 | -------------------------------------------------------------------------------- /.husky/pre-commit: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | . "$(dirname "$0")/_/husky.sh" 3 | 4 | npm run lint 5 | npm run test 6 | -------------------------------------------------------------------------------- /.nvmrc: -------------------------------------------------------------------------------- 1 | 18.18.0 2 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "singleQuote": true, 3 | "trailingComma": "es5" 4 | } -------------------------------------------------------------------------------- /.renovaterc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": [ 3 | "algolia" 4 | ], 5 | "baseBranches": [ 6 | "chore/renovateBaseBranch" 7 | ] 8 | } 9 | -------------------------------------------------------------------------------- /.yarnrc.yml: -------------------------------------------------------------------------------- 1 | nodeLinker: node-modules 2 | 3 | plugins: 4 | - path: .yarn/plugins/@yarnpkg/plugin-workspace-tools.cjs 5 | spec: "@yarnpkg/plugin-workspace-tools" 6 | 7 | yarnPath: .yarn/releases/yarn-3.2.2.cjs 8 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | ## Dev 4 | 5 | ```sh 6 | cp .env.example .env 7 | # Fill appId and apiKye 8 | 9 | yarn 10 | yarn build:hot 11 | yarn dev 12 | ``` 13 | 14 | ## Tests & Lint 15 | 16 | ```sh 17 | yarn test 18 | yarn lint 19 | ``` 20 | 21 | ## Env variables 22 | 23 | Everything in [src/config.ts](./src/config.ts) can be overriden via Env vars. 24 | You may want to override at least in your `.env`: 25 | 26 | - `apiKey`: [Algolia](https://www.algolia.com/) apiKey - **required** 27 | - `appId`: [Algolia](https://www.algolia.com/) appId - _default `OFCNCOG2CU`_ 28 | - `indexName`: [Algolia](https://www.algolia.com/) indexName - _default `npm-search`_ 29 | - `DOGSTATSD_HOST`: Metrics reporting - _default `localhost`_ 30 | - `SENTRY_DSN`: Error reporting - _default `empty`_ 31 | 32 | ## Releasing New Version 33 | 34 | > This step is done by the CI 35 | 36 | ```sh 37 | GH_TOKEN="token" yarn semantic-release --ci=false 38 | ``` 39 | 40 | ## Releasing Docker 41 | 42 | > This step is done by the CI 43 | 44 | ```sh 45 | yarn docker:build 46 | yarn docker:release 47 | ``` 48 | 49 | ## Deploying new version 50 | 51 | > Showing for GCP, but the image can be used anywhere 52 | 53 | - Go to "Compute Engine > VM Instances > `name_of_the_vm` 54 | - Edit 55 | - Change container image with new version 56 | - Save 57 | 58 | ## Deploying first time 59 | 60 | > You need to replace value with `PUT_` 61 | 62 | ```sh 63 | gcloud beta compute \ 64 | --project=npm-search-2 instances create-with-container npm-search-3 \ 65 | --zone=us-central1-a \ 66 | --machine-type=e2-medium \ 67 | --subnet=default \ 68 | --network-tier=STANDARD \ 69 | --metadata=google-logging-enabled=true \ 70 | --maintenance-policy=MIGRATE \ 71 | --service-account=PUT_YOUR_SERVICE_ACCOUNT 72 | --scopes=https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,https://www.googleapis.com/auth/monitoring.write,https://www.googleapis.com/auth/servicecontrol,https://www.googleapis.com/auth/service.management.readonly,https://www.googleapis.com/auth/trace.append \ 73 | --image=cos-stable-89-16108-470-1 \ 74 | --image-project=cos-cloud \ 75 | --boot-disk-size=10GB \ 76 | --boot-disk-type=pd-balanced \ 77 | --boot-disk-device-name=npm-search-3 \ 78 | --no-shielded-secure-boot \ 79 | --shielded-vtpm \ 80 | --shielded-integrity-monitoring \ 81 | --container-image=docker.io/algolia/npm-search:PUT_VERSION \ 82 | --container-restart-policy=always \ 83 | --container-command=node \ 84 | --container-arg=--async-stack-traces \ 85 | --container-arg=--max-semi-space-size=32 \ 86 | --container-arg=--max-old-space-size=3000 \ 87 | --container-arg=dist/index.js \ 88 | --container-env=indexName=npm-search,bootstrapIndexName=npm-search-bootstrap,bootstrapConcurrency=40,apiKey=PUT_ALGOLIA_API_KEY,UV_THREADPOOL_SIZE=128,SENTRY_DSN=PUT_SENTRY_URL,DOGSTATSD_HOST=datadog \ 89 | --labels=container-vm=cos-stable-89-16108-470-1 \ 90 | --reservation-affinity=any 91 | ``` 92 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # ---- Base ---- 2 | FROM node:18.18.0-alpine AS base 3 | 4 | # ------------------ 5 | # package.json cache 6 | # ------------------ 7 | FROM apteno/alpine-jq:2022-03-27 AS deps 8 | 9 | # To prevent cache invalidation from changes in fields other than dependencies 10 | COPY package.json /tmp 11 | RUN jq 'walk(if type == "object" then with_entries(select(.key | test("^jest|prettier|eslint|semantic|dotenv|nodemon|renovate") | not)) else . end) | { name, dependencies, devDependencies, packageManager }' < /tmp/package.json > /tmp/deps.json 12 | 13 | # ------------------ 14 | # New base image 15 | # ------------------ 16 | FROM base as tmp 17 | 18 | ENV IN_DOCKER true 19 | ENV PLAYWRIGHT_BROWSERS_PATH="/ms-playwright" 20 | ENV PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD="true" 21 | 22 | # Setup the app WORKDIR 23 | WORKDIR /app/tmp 24 | 25 | # Copy and install dependencies separately from the app's code 26 | # To leverage Docker's cache when no dependency has change 27 | COPY --from=deps /tmp/deps.json ./package.json 28 | COPY yarn.lock .yarnrc.yml ./ 29 | COPY .yarn .yarn 30 | 31 | # Install dependencies for native deps 32 | RUN apk add --no-cache bash python3 33 | 34 | # Install dev dependencies 35 | RUN true \ 36 | # Use local version instead of letting yarn auto upgrade itself 37 | && yarn set version $(ls -d $PWD/.yarn/releases/*) \ 38 | && yarn install 39 | 40 | # This step will invalidates cache 41 | COPY . ./ 42 | RUN ls -lah /app/tmp 43 | 44 | # Builds the code and reinstall node_modules in prod mode 45 | RUN true \ 46 | && yarn build \ 47 | # Finally remove all dev packages 48 | && yarn workspaces focus --all --production \ 49 | && rm -rf src/ \ 50 | && rm -rf .yarn/ 51 | 52 | # ---- Final ---- 53 | # Resulting new, minimal image 54 | # This image must have the minimum amount of layers 55 | FROM node:18.18.0-alpine as final 56 | 57 | ENV NODE_ENV production 58 | 59 | # Do not use root to run the app 60 | USER node 61 | 62 | WORKDIR /app 63 | 64 | COPY --from=tmp --chown=node:node /app/tmp /app 65 | 66 | EXPOSE 8000 67 | 68 | CMD [ "node", "dist/src/index.js" ] 69 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016-present Algolia, Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | worker: yarn start 2 | worker_new: yarn start_new 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # npm-search 2 | 3 | npm ↔️ Algolia replication tool. 4 | Maintained by Algolia and jsDelivr. 5 | 6 |

7 |
8 | Algolia logo 9 |  &  10 | jsDelivr logo 11 |
12 |

13 | 14 | [//]: # ([![CircleCI](https://circleci.com/gh/algolia/npm-search/tree/master.svg?style=svg)](https://circleci.com/gh/algolia/npm-search/tree/master) ) 15 | 16 | [//]: # ( Datadog Status) 17 | 18 | [//]: # () 19 | 20 | [//]: # (---) 21 | 22 | This is a failure resilient npm registry to Algolia index replication process. 23 | It will replicate all npm packages to an Algolia index and keep it up to date. 24 | The state of the replication is saved in Algolia index settings. 25 | 26 | The replication should always be running. **Only one instance per Algolia index must run at the same time**. 27 | If the process fails, restart it and the replication process will continue at the last point it remembers. 28 | 29 | 30 | 31 | 32 | - [🗿 npm-search ⛷ 🐌 🛰](#-npm-search---) 33 | - [Algolia Index](#algolia-index) 34 | - [Using the public index](#using-the-public-index) 35 | - [Schema](#schema) 36 | - [Ranking](#ranking) 37 | - [Textual relevance](#textual-relevance) 38 | - [Searchable Attributes](#searchable-attributes) 39 | - [Prefix Search](#prefix-search) 40 | - [Typo-tolerance](#typo-tolerance) 41 | - [Exact Boosting](#exact-boosting) 42 | - [Custom/Business relevance](#custombusiness-relevance) 43 | - [Number of downloads](#number-of-downloads) 44 | - [Popular packages](#popular-packages) 45 | - [Usage](#usage) 46 | - [Production](#production) 47 | - [Restart](#restart) 48 | - [How does it work?](#how-does-it-work) 49 | - [Contributing](#contributing) 50 | 51 | 52 | 53 | ## Algolia Index 54 | 55 | ### Using the public index 56 | 57 | The Algolia index is currently used, for free, by a few selected projects (e.g: [yarnpkg.com](https://yarnpkg.com), [codesandbox.io](https://codesandbox.io), [jsdelivr.com](https://www.jsdelivr.com/), etc...). 58 | 59 | If you want to include this index to your project please create a support request here: [Algolia Support](https://support.algolia.com/hc/en-us/requests/new). 60 | 61 | This product is an open source product for the community and not supported by Algolia. 62 | 63 | To be eligible your project must meet these requirements: 64 | 65 | - Publicly available: The project must be publicly usable and, if applicable, include documentation or instructions on how the community can use it. 66 | - Non-commercial: The project cannot be used to promote a product or service; it has to provide something of value to the community at no cost. Applications for non-commercial projects backed by commercial entities will be reviewed on a case-by-base basis. 67 | 68 | 69 | You can also use the code or the [public docker image](https://hub.docker.com/r/algolia/npm-search) to run your own (as of September 2021 it will create ~3M records x4). 70 | 71 | ### Schema 72 | 73 | For every single NPM package, we create a record in the Algolia index. The resulting records have the following schema: 74 | 75 | ```json5 76 | { 77 | name: 'babel-core', 78 | downloadsLast30Days: 10978749, 79 | downloadsRatio: 0.08310651682685861, 80 | humanDownloadsLast30Days: '11m', 81 | jsDelivrHits: 11684192, 82 | popular: true, 83 | version: '6.26.0', 84 | versions: { 85 | // [...] 86 | '7.0.0-beta.3': '2017-10-15T13:12:35.166Z', 87 | }, 88 | tags: { 89 | latest: '6.26.0', 90 | old: '5.8.38', 91 | next: '7.0.0-beta.3', 92 | }, 93 | description: 'Babel compiler core.', 94 | dependencies: { 95 | 'babel-code-frame': '^6.26.0', 96 | // [...] 97 | }, 98 | devDependencies: { 99 | 'babel-helper-fixtures': '^6.26.0', 100 | // [...] 101 | }, 102 | repository: { 103 | url: 'https://github.com/babel/babel/tree/master/packages/babel-core', 104 | host: 'github.com', 105 | user: 'babel', 106 | project: 'babel', 107 | path: '/tree/master/packages/babel-core', 108 | branch: 'master', 109 | }, 110 | readme: '# babel-core\n\n> Babel compiler core.\n\n\n [... truncated at 200kb]', 111 | owner: { 112 | // either GitHub owner or npm owner 113 | name: 'babel', 114 | avatar: 'https://github.com/babel.png', 115 | link: 'https://github.com/babel', 116 | }, 117 | deprecated: 'Deprecated', // This field will be removed, please use `isDeprecated` instead 118 | isDeprecated: true, 119 | deprecatedReason: 'Deprecated', 120 | isSecurityHeld: false, // See https://github.com/npm/security-holder 121 | badPackage: false, 122 | homepage: 'https://babeljs.io/', 123 | license: 'MIT', 124 | keywords: [ 125 | '6to5', 126 | 'babel', 127 | 'classes', 128 | 'const', 129 | 'es6', 130 | 'harmony', 131 | 'let', 132 | 'modules', 133 | 'transpile', 134 | 'transpiler', 135 | 'var', 136 | 'babel-core', 137 | 'compiler', 138 | ], 139 | created: 1424009748555, 140 | modified: 1508833762239, 141 | lastPublisher: { 142 | name: 'hzoo', 143 | email: 'hi@henryzoo.com', 144 | avatar: 'https://gravatar.com/avatar/851fb4fa7ca479bce1ae0cdf80d6e042', 145 | link: 'https://www.npmjs.com/~hzoo', 146 | }, 147 | owners: [ 148 | { 149 | email: 'me@thejameskyle.com', 150 | name: 'thejameskyle', 151 | avatar: 'https://gravatar.com/avatar/8a00efb48d632ae449794c094f7d5c38', 152 | link: 'https://www.npmjs.com/~thejameskyle', 153 | }, 154 | // [...] 155 | ], 156 | lastCrawl: '2017-10-24T08:29:24.672Z', 157 | dependents: 3321, 158 | types: { 159 | ts: 'definitely-typed', // definitely-typed | included | false 160 | definitelyTyped: '@types/babel__core', 161 | }, 162 | moduleTypes: ['unknown'], // esm | cjs | none | unknown 163 | styleTypes: ['none'], // file extensions like css, less, scss or none if no style files present 164 | humanDependents: '3.3k', 165 | changelogFilename: null, // if babel-core had a changelog, it would be the raw GitHub url here 166 | objectID: 'babel-core', 167 | // the following fields are considered internal and may change at any time 168 | _downloadsMagnitude: 8, 169 | _jsDelivrPopularity: 5, 170 | _popularName: 'babel-core', 171 | _searchInternal: { 172 | alternativeNames: [ 173 | // alternative versions of this name, to show up on confused searches 174 | ], 175 | }, 176 | } 177 | ``` 178 | 179 | ### Ranking 180 | 181 | If you want to learn more about how Algolia's ranking algorithm is working, you can read [this blog post](https://blog.algolia.com/search-ranking-algorithm-unveiled/). 182 | 183 | #### Textual relevance 184 | 185 | ##### Searchable Attributes 186 | 187 | We're restricting the search to use a subset of the attributes only: 188 | 189 | - `_popularName` 190 | - `name` 191 | - `description` 192 | - `keywords` 193 | - `owner.name` 194 | - `owners.name` 195 | 196 | ##### Prefix Search 197 | 198 | Algolia provides default prefix search capabilities (matching words with only the beginning). This is disabled for the `owner.name` and `owners.name` attributes. 199 | 200 | ##### Typo-tolerance 201 | 202 | Algolia provides default typo-tolerance. 203 | 204 | ##### Exact Boosting 205 | 206 | Using the `optionalFacetFilters` feature of Algolia, we're boosting exact matches on the name of a package to always be on top of the results. 207 | 208 | #### Custom/Business relevance 209 | 210 | ##### Number of downloads 211 | 212 | For each package, we use the number of downloads in the last 30 days as Algolia's `customRanking` setting. This will be used to sort the results having the same textual-relevance against each others. 213 | 214 | For instance, search for `babel` with match both `babel-core` and `babel-messages`. From a textual-relevance point of view, those 2 packages are exactly matching in the same way. In such case, Algolia will rely on the `customRanking` setting and therefore put the package with the highest number of downloads in the past 30 days first. 215 | 216 | ##### Popular packages 217 | 218 | Some packages will be considered as popular if they have been downloaded "more" than others. We currently consider a package popular if it either: 219 | - has more than `0.005%` of the total number of npm downloads, 220 | - is in the top thousand of packages at [jsDelivr](https://github.com/jsdelivr/data.jsdelivr.com). 221 | 222 | This `popular` flag is also used to boost some records over non-popular ones. 223 | 224 | ## Usage 225 | 226 | ### Production 227 | 228 | ```sh 229 | yarn 230 | apiKey=... yarn start 231 | ``` 232 | 233 | ### Restart 234 | To restart from a particular point (or from the beginning): 235 | 236 | ```sh 237 | seq=0 apiKey=... yarn start 238 | ``` 239 | 240 | This is useful when you want to completely resync the npm registry because: 241 | 242 | - you changed the way you format packages 243 | - you added more metadata (like GitHub stars) 244 | - you are in an unsure state and you just want to restart everything 245 | 246 | `seq` represents a [change sequence](http://docs.couchdb.org/en/2.0.0/json-structure.html#changes-information-for-a-database) 247 | in CouchDB lingo. 248 | 249 | ## How does it work? 250 | 251 | Our goal with this project is to: 252 | 253 | - be able to quickly do a complete rebuild 254 | - be resilient to failures 255 | - clean the package data 256 | 257 | When the process starts with `seq=0`: 258 | 259 | - save the [current sequence](https://replicate.npmjs.com/) of the npm registry in the state (Algolia settings) 260 | - bootstrap the initial index content by using [/\_all_docs](http://docs.couchdb.org/en/2.0.0/api/database/bulk-api.html) 261 | - replicate registry changes since the current sequence 262 | - watch for registry changes continuously and replicate them 263 | 264 | Replicate and watch are separated because: 265 | 266 | 1. In replicate we want to replicate a batch of documents in a fast way 267 | 2. In watch we want new changes as fast as possible, one by one. If watch was 268 | asking for batches of 100, new packages would be added too late to the index 269 | 270 | ## Contributing 271 | 272 | See [CONTRIBUTING.md](./CONTRIBUTING.md) 273 | -------------------------------------------------------------------------------- /algolia.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/algolia/npm-search/1b9ef2e27dce872a377466f4a935fd2525e6e687/algolia.png -------------------------------------------------------------------------------- /elastic-apm-node.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable import/no-commonjs */ 2 | module.exports = { 3 | active: true, 4 | serviceName: 'jsdelivr-npm-search', 5 | serviceVersion: require('./package.json').version, 6 | logLevel: 'fatal', 7 | centralConfig: false, 8 | captureExceptions: false, 9 | captureErrorLogStackTraces: 'always', 10 | ignoreUrls: [ 11 | '/favicon.ico', 12 | '/heartbeat', 13 | '/amp_preconnect_polyfill_404_or_other_error_expected._Do_not_worry_about_it', 14 | ], 15 | errorOnAbortedRequests: false, 16 | transactionSampleRate: 1, 17 | }; 18 | -------------------------------------------------------------------------------- /jest.config.js: -------------------------------------------------------------------------------- 1 | // eslint-disable-next-line import/no-commonjs 2 | module.exports = { 3 | transform: { 4 | '^.+\\.[jt]sx?$': [ 5 | 'ts-jest', 6 | { 7 | diagnostics: false, 8 | tsconfig: `tsconfig.json`, 9 | }, 10 | ], 11 | }, 12 | testMatch: ['/src/**/*.test.[jt]s'], 13 | // By default, ignore the slow and flaky tests testing external APIs. Those 14 | // will be run specifically with `yarn run test:api-control` 15 | testPathIgnorePatterns: ['api-control'], 16 | 17 | testEnvironment: 'node', 18 | modulePaths: ['src'], 19 | 20 | snapshotFormat: { 21 | escapeString: true, 22 | printBasicPrototype: true, 23 | }, 24 | }; 25 | -------------------------------------------------------------------------------- /jsdelivr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/algolia/npm-search/1b9ef2e27dce872a377466f4a935fd2525e6e687/jsdelivr.png -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "npm-search", 3 | "version": "1.8.4", 4 | "private": true, 5 | "author": { 6 | "name": "Algolia, Inc.", 7 | "url": "https://www.algolia.com" 8 | }, 9 | "scripts": { 10 | "build:hot": "tsc -w --preserveWatchOutput", 11 | "build": "tsc -b", 12 | "clean": "rm -rf dist/", 13 | "dev": "node -r ts-node/register/transpile-only -r dotenv/config --max-old-space-size=1500 src/index.ts", 14 | "lint": "eslint --ext=jsx,ts,tsx,js .", 15 | "prepare": "husky install", 16 | "start": "UV_THREADPOOL_SIZE=64 node --max-old-space-size=1500 dist/index.js", 17 | "start_new": "indexName=npm-search-new bootstrapIndexName=npm-search-new.tmp UV_THREADPOOL_SIZE=64 node --max-old-space-size=1500 dist/index.js", 18 | "test:watch": "jest --watchAll --no-watchman", 19 | "test:api-control": "./scripts/test-api-control", 20 | "test": "jest --forceExit", 21 | "publish:check": "node ./scripts/publish-check.mjs", 22 | "publish:github": "./scripts/publish-github", 23 | "publish:docker": "./scripts/publish-docker" 24 | }, 25 | "license": "MIT", 26 | "dependencies": { 27 | "@algolia/requester-node-http": "4.14.2", 28 | "@types/bluebird": "^3.5.39", 29 | "algoliasearch": "4.14.2", 30 | "bluebird": "^3.7.2", 31 | "bunyan": "1.8.15", 32 | "bunyan-debug-stream": "2.0.1", 33 | "chalk": "4.1.2", 34 | "dtrace-provider": "0.8.8", 35 | "elastic-apm-node": "^3.49.1", 36 | "escape-html": "1.0.3", 37 | "got": "11.8.5", 38 | "gravatar-url": "3.1.0", 39 | "hosted-git-info": "2.7.1", 40 | "lodash": "4.17.21", 41 | "ms": "2.1.3", 42 | "nano": "^10.1.2", 43 | "nice-package": "3.1.2", 44 | "numeral": "2.0.6", 45 | "object-sizeof": "2.6.3", 46 | "p-queue": "6.6.2", 47 | "promise-rat-race": "1.5.1", 48 | "throttled-queue": "^2.1.4", 49 | "traverse": "0.6.7", 50 | "truncate-utf8-bytes": "1.0.2" 51 | }, 52 | "devDependencies": { 53 | "@semantic-release/changelog": "6.0.3", 54 | "@semantic-release/git": "10.0.1", 55 | "@types/escape-html": "1.0.3", 56 | "@types/hosted-git-info": "3.0.4", 57 | "@types/jest": "28.1.8", 58 | "@types/lodash": "4.14.184", 59 | "@types/ms": "0.7.33", 60 | "@types/numeral": "2.0.2", 61 | "@types/traverse": "0.6.34", 62 | "@types/truncate-utf8-bytes": "1.0.0", 63 | "@typescript-eslint/eslint-plugin": "6.9.0", 64 | "@typescript-eslint/parser": "6.9.0", 65 | "dotenv": "16.0.1", 66 | "eslint": "8.22.0", 67 | "eslint-config-algolia": "22.0.0", 68 | "eslint-config-prettier": "8.5.0", 69 | "eslint-plugin-eslint-comments": "3.2.0", 70 | "eslint-plugin-import": "2.26.0", 71 | "eslint-plugin-jasmine": "4.1.3", 72 | "eslint-plugin-jest": "26.8.2", 73 | "eslint-plugin-jsdoc": "46.8.2", 74 | "eslint-plugin-prettier": "5.0.1", 75 | "eslint-plugin-react": "7.30.1", 76 | "husky": "8.0.1", 77 | "jest": "29.7.0", 78 | "lint-staged": "13.0.3", 79 | "pre-commit": "1.2.2", 80 | "prettier": "3.0.3", 81 | "renovate-config-algolia": "2.1.10", 82 | "semantic-release": "22.0.5", 83 | "ts-jest": "29.1.1", 84 | "ts-node": "10.9.1", 85 | "typescript": "5.2.2", 86 | "validator": "13.7.0" 87 | }, 88 | "engines": { 89 | "node": "^18.0.0", 90 | "yarn": "^1.22.0" 91 | }, 92 | "repository": { 93 | "type": "git", 94 | "url": "https://github.com/algolia/npm-search.git" 95 | }, 96 | "homepage": "https://github.com/algolia/npm-search", 97 | "packageManager": "yarn@3.2.2" 98 | } 99 | -------------------------------------------------------------------------------- /release.config.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable import/no-commonjs */ 2 | /* eslint-disable no-template-curly-in-string */ 3 | /** 4 | * We use semantic-release to automate the publishing of new versions based on 5 | * the commit history: whenever a commit is pushed to the master branch, it 6 | * checks if any commit had a BREAKING CHANGE / feat() / fix() message, and 7 | * publishes (or not) a new major.minor/patch version accordingly. 8 | * 9 | * See: https://github.com/semantic-release/semantic-release. 10 | * 11 | * Semantic-release executes steps in order (from verifyConditions to 12 | * success/fail). For each step, it execute the matching code in each plugin (if 13 | * such exists). If any step fails, the whole process stop. 14 | * 15 | * As we are using a mix of core and community plugins, as well as slightly 16 | * diverging from the default use-case, we explictly define the order of plugins 17 | * in each step instead of relying on the default order. 18 | * 19 | * The current configuration will: 20 | * - Check if a new version needs to be published (and stop if not) 21 | * - Update the version number in package.json accordingly 22 | * - Update the CHANGELOG.md with the changes 23 | * - Create a new commit, and tag it with the version number 24 | * - Publish the code source to GitHub Releases (not very useful). 25 | * 26 | * Specifically, it does not: 27 | * - Publish the code to npm (this is not an npm module) 28 | * - Publish the Docker image (yarn publish:docker takes care of that). 29 | **/ 30 | module.exports = { 31 | branches: 'master', 32 | plugins: [ 33 | // Those 4 plugins are part of the core of semantic-release 34 | '@semantic-release/commit-analyzer', 35 | '@semantic-release/release-notes-generator', 36 | '@semantic-release/npm', 37 | '@semantic-release/github', 38 | // Those 2 are additional plugins 39 | '@semantic-release/changelog', 40 | '@semantic-release/git', 41 | ], 42 | // Below are the various steps 43 | // Source: https://semantic-release.gitbook.io/semantic-release/usage/plugins 44 | // We explicitly define because it allows us to: 45 | // - remove steps that we don't need (for example verifying npm credentials as 46 | // we don't publish on npm) 47 | // - put steps in order (for example updating the changelog file before 48 | // committing it) 49 | verifyConditions: ['@semantic-release/github', '@semantic-release/git'], 50 | analyzeCommits: ['@semantic-release/commit-analyzer'], 51 | verifyRelease: [], 52 | generateNotes: ['@semantic-release/release-notes-generator'], 53 | prepare: [ 54 | '@semantic-release/changelog', 55 | '@semantic-release/npm', 56 | { 57 | path: '@semantic-release/git', 58 | assets: ['package.json', 'CHANGELOG.md'], 59 | message: 60 | 'chore(release): ${nextRelease.version} [skip ci]\n\n${nextRelease.notes}', 61 | }, 62 | ], 63 | publish: ['@semantic-release/github'], 64 | addChannel: [], 65 | success: [], 66 | fail: [], 67 | }; 68 | -------------------------------------------------------------------------------- /scripts/publish-check.mjs: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-console */ 2 | 3 | import { Writable } from 'node:stream'; 4 | 5 | import semanticRelease from 'semantic-release'; 6 | 7 | const stream = new Writable({ 8 | write(_chunk, _encoding, callback) { 9 | setImmediate(callback); 10 | }, 11 | }); 12 | 13 | // Execute semantic-release with only the commit-analyzer step, to see if 14 | // a new release is needed 15 | const { nextRelease } = await semanticRelease( 16 | { 17 | dryRun: true, 18 | plugins: ['@semantic-release/commit-analyzer'], 19 | verifyConditions: [], 20 | analyzeCommits: ['@semantic-release/commit-analyzer'], 21 | verifyRelease: [], 22 | generateNotes: [], 23 | prepare: [], 24 | publish: [], 25 | addChannel: [], 26 | success: [], 27 | fail: [], 28 | }, 29 | // Redirect output to new streams, to make the script silent 30 | { 31 | stdout: stream, 32 | stderr: stream, 33 | } 34 | ); 35 | 36 | // Display yes if a new release should be published, or no otherwise 37 | // The output of this script is used by the publishing workflow, to 38 | // conditionally either cancel the run, or actually publish to Docker/GitHub. 39 | // Make sure it only ever output either yes or no 40 | console.info(nextRelease?.version ? 'yes' : 'no'); 41 | -------------------------------------------------------------------------------- /scripts/publish-docker: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Publish the project on GitHub Packages 3 | # See: https://github.com/algolia/npm-search/pkgs/container/npm-search 4 | # 5 | # This script will be automatically run from GitHub Actions on each commits on 6 | # the main branch that warrants a release (ie. feat() and fix() commits). 7 | # 8 | # You can also run the script locally, but you'll need a GITHUB_TOKEN with the 9 | # write:packages scope. 10 | # See: https://github.com/settings/tokens 11 | set -e 12 | 13 | # Get version from package.json 14 | version=$(node -e "console.log(require('./package.json').version)") 15 | echo "Publishing: $version" 16 | echo "" 17 | 18 | # Build the image 19 | docker build \ 20 | --platform linux/amd64 \ 21 | --label "org.opencontainers.image.source=https://github.com/algolia/npm-search" \ 22 | --tag "ghcr.io/algolia/npm-search" \ 23 | --tag "ghcr.io/algolia/npm-search:${version}" \ 24 | . 25 | 26 | # Login to ghcr.io 27 | echo "${GITHUB_TOKEN}" | 28 | docker login ghcr.io \ 29 | --username $ \ 30 | --password-stdin 31 | 32 | # Push the image 33 | docker push "ghcr.io/algolia/npm-search" 34 | docker push "ghcr.io/algolia/npm-search:${version}" 35 | 36 | # Output 37 | echo "Version $version published" 38 | echo "https://github.com/algolia/npm-search/pkgs/container/npm-search" 39 | -------------------------------------------------------------------------------- /scripts/publish-github: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Publish a new version on GitHub, including: 3 | # - Update package.json and CHANGELOG.md with new version and changes 4 | # - Tag the commit with the version number 5 | # - Release the source code on GitHub Releases (https://github.com/algolia/npm-search/releases) 6 | # 7 | # This script doesn't do anything if there is no new version to publish 8 | set -e 9 | 10 | yarn run semantic-release 11 | -------------------------------------------------------------------------------- /scripts/test-api-control: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # API Control tests assert that the external third party APIs we are using 3 | # return data in an expected format. They query the relevant API with real (not 4 | # mocked) HTTP calls. 5 | # 6 | # As those tests are slow, and have a higher probability of flakiness because of 7 | # network issues or timeouts, we don't want to run them on each pre-commit hook 8 | # or CI commit. They can instead be run manually, or periodically from the CI. 9 | # 10 | # When it fails on the CI, it will generate a GitHub issue with the failure details 11 | # as well as a link to the run. 12 | 13 | # Running locally, with colors and live output 14 | if [ "$GITHUB_RUN_ID" = "" ]; then 15 | jest \ 16 | ./src/__tests__/api-control \ 17 | --forceExit \ 18 | --testPathIgnorePatterns='' 19 | exit $? 20 | fi 21 | 22 | # Running on CI, creating an issue on failure 23 | echo "Wait while we run the tests" 24 | output=$(jest \ 25 | ./src/__tests__/api-control \ 26 | --forceExit \ 27 | --testPathIgnorePatterns='' 2>&1) 28 | exitCode=$? 29 | echo "$output" 30 | 31 | # Stop on success 32 | if [ "$exitCode" = "0" ]; then 33 | exit 0 34 | fi 35 | 36 | # Create the issue on failure 37 | gh issue create \ 38 | --title "API Control failed" \ 39 | --body "\ 40 | One of the external APIs we depend on failed to return coherent data in our periodic test. 41 | Maybe it's a temporary issue, maybe they changed their format. 42 | 43 | https://github.com/algolia/npm-search/actions/runs/$GITHUB_RUN_ID 44 | 45 | \`\`\` 46 | $output 47 | \`\`\`" 48 | 49 | # Still mark the job as failed 50 | exit 1 51 | -------------------------------------------------------------------------------- /src/@types/nice-package.ts: -------------------------------------------------------------------------------- 1 | import type { 2 | GetPackage, 3 | GetUser, 4 | GetVersion, 5 | PackageRepo, 6 | } from '../npm/types'; 7 | 8 | export interface NicePackageType { 9 | _hasShrinkwrap?: false; 10 | bin?: Record; 11 | browser?: string; 12 | bundlesize?: Array>; 13 | created: string; 14 | dependencies?: Record; 15 | deprecated?: boolean | string; 16 | description: string; 17 | devDependencies?: Record; 18 | gitHead?: string; 19 | homepage?: string; 20 | keywords: string[]; 21 | lastPublisher?: GetUser; 22 | license?: string | { type: string }; 23 | licenseText?: string; 24 | main?: string[] | string; 25 | modified: string; 26 | module?: string; 27 | exports?: GetVersion['exports']; 28 | name: string; 29 | other: { 30 | _id?: string; 31 | _rev: string; 32 | 'dist-tags': Record; 33 | author?: GetUser; 34 | time?: GetPackage['time']; 35 | }; 36 | owners?: GetUser[]; 37 | readme?: string; 38 | repository?: Array> | Partial | string; 39 | scripts: Record; 40 | schematics?: string; 41 | starsCount?: number; 42 | style?: string; 43 | type?: 'commonjs' | 'module'; 44 | types?: string; 45 | typings?: string; 46 | unpkg?: string; 47 | version?: string; 48 | versions?: Array<{ 49 | date: string; 50 | number: string; 51 | }>; 52 | } 53 | -------------------------------------------------------------------------------- /src/@types/pkg.ts: -------------------------------------------------------------------------------- 1 | import type { GetUser } from '../npm/types'; 2 | 3 | export interface Owner { 4 | name: string; 5 | email?: string; 6 | avatar?: string; 7 | link?: string; 8 | } 9 | 10 | export interface Repo { 11 | url: string; 12 | host: string; 13 | user: string; 14 | project: string; 15 | path: string; 16 | head?: string; 17 | branch?: string; 18 | } 19 | 20 | export interface GithubRepo { 21 | user: string; 22 | project: string; 23 | path: string; 24 | head: string; 25 | } 26 | 27 | export type TsType = 28 | | { 29 | ts: 'definitely-typed'; 30 | definitelyTyped: string; 31 | } 32 | | { 33 | ts: 'included' | false | { possible: true }; 34 | }; 35 | 36 | export type ModuleType = 'cjs' | 'esm' | 'none' | 'unknown'; 37 | 38 | export type StyleType = string | 'none'; 39 | 40 | export type ComputedMeta = { 41 | computedKeywords: string[]; 42 | computedMetadata: Record; 43 | }; 44 | 45 | export interface RawPkg { 46 | objectID: string; 47 | rev: string; 48 | name: string; 49 | downloadsLast30Days: number; 50 | downloadsRatio: number; 51 | humanDownloadsLast30Days: string; 52 | jsDelivrHits: number; 53 | popular: boolean; 54 | version: string; 55 | versions: Record; 56 | tags: Record; 57 | description: string | null; 58 | dependencies: Record; 59 | devDependencies: Record; 60 | originalAuthor?: GetUser; 61 | repository: Repo | null; 62 | githubRepo: GithubRepo | null; 63 | gitHead: string | null; 64 | readme: string; 65 | owner: Owner | null; 66 | deprecated: boolean | string; 67 | isDeprecated: boolean; 68 | deprecatedReason: string | null; 69 | isSecurityHeld: boolean; 70 | homepage: string | null; 71 | license: string | null; 72 | keywords: string[]; 73 | computedKeywords: ComputedMeta['computedKeywords']; 74 | computedMetadata: ComputedMeta['computedMetadata']; 75 | created: number; 76 | modified: number; 77 | lastPublisher: Owner | null; 78 | owners: Owner[]; 79 | bin: Record; 80 | dependents: number; 81 | types: TsType; 82 | moduleTypes: ModuleType[]; 83 | styleTypes: StyleType[]; 84 | humanDependents: string; 85 | changelogFilename: string | null; 86 | lastCrawl: string; 87 | _revision: number; 88 | _searchInternal: { 89 | alternativeNames: string[]; 90 | popularAlternativeNames: string[]; 91 | }; 92 | } 93 | 94 | export type FinalPkg = RawPkg & { 95 | _oneTimeDataToUpdateAt?: number; 96 | _periodicDataUpdatedAt?: number; 97 | _jsDelivrPopularity?: number; 98 | _downloadsMagnitude?: number; 99 | _popularName?: string; 100 | }; 101 | -------------------------------------------------------------------------------- /src/StateManager.ts: -------------------------------------------------------------------------------- 1 | import type { SearchIndex } from 'algoliasearch'; 2 | 3 | import { config } from './config'; 4 | import { datadog } from './utils/datadog'; 5 | 6 | export type State = { 7 | seq: number | undefined; 8 | bootstrapDone: boolean; 9 | bootstrapLastDone: number | null; 10 | bootstrapLastId: string | null; 11 | stage: 'bootstrap' | 'watch'; 12 | }; 13 | 14 | const defaultState: State = { 15 | seq: config.seq ? Number(config.seq) : config.seq, 16 | bootstrapDone: false, 17 | bootstrapLastDone: null, 18 | bootstrapLastId: null, 19 | stage: 'bootstrap', 20 | }; 21 | 22 | export class StateManager { 23 | algoliaIndex; 24 | currentState: State = { ...defaultState }; 25 | refreshed: boolean = false; 26 | 27 | constructor(algoliaIndex: SearchIndex) { 28 | this.algoliaIndex = algoliaIndex; 29 | } 30 | 31 | async check(): Promise { 32 | const state = await this.get(); 33 | 34 | if (config.seq !== undefined) { 35 | return this.set({ ...state, seq: Number(config.seq) }); 36 | } 37 | 38 | if (state === undefined) { 39 | return this.reset(); 40 | } 41 | 42 | return state; 43 | } 44 | 45 | async get(): Promise { 46 | if (this.currentState && this.refreshed) { 47 | return this.currentState; 48 | } 49 | 50 | const start = Date.now(); 51 | const { userData } = await this.algoliaIndex.getSettings(); 52 | datadog.timing('stateManager.get', Date.now() - start); 53 | 54 | this.currentState = userData; 55 | this.refreshed = true; 56 | return userData; 57 | } 58 | 59 | async set(state: State): Promise { 60 | this.currentState = state; 61 | 62 | const start = Date.now(); 63 | await this.algoliaIndex.setSettings({ 64 | userData: state, 65 | }); 66 | datadog.timing('stateManager.set', Date.now() - start); 67 | 68 | return state; 69 | } 70 | 71 | async reset(): Promise { 72 | return await this.set(defaultState); 73 | } 74 | 75 | async save(partial: Partial): Promise { 76 | const current = await this.get(); 77 | 78 | return await this.set({ 79 | ...current, 80 | ...partial, 81 | }); 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /src/__tests__/StateManager.test.ts: -------------------------------------------------------------------------------- 1 | import { StateManager } from '../StateManager'; 2 | 3 | describe('stateManager', () => { 4 | describe('get()', () => { 5 | it('should get userData from algolia', async () => { 6 | const mock = { 7 | getSettings: jest.fn(() => { 8 | return { 9 | userData: 'foobar', 10 | }; 11 | }), 12 | } as any; 13 | const stateManager = new StateManager(mock); 14 | const userData = await stateManager.get(); 15 | 16 | expect(mock.getSettings).toHaveBeenCalled(); 17 | expect(userData).toBe('foobar'); 18 | }); 19 | }); 20 | 21 | describe('set()', () => { 22 | it('should set userData to algolia', async () => { 23 | const mock = { 24 | setSettings: jest.fn(), 25 | } as any; 26 | const stateManager = new StateManager(mock); 27 | await stateManager.set({ 28 | seq: 1, 29 | bootstrapDone: false, 30 | bootstrapLastDone: 1635196220508, 31 | bootstrapLastId: '', 32 | stage: 'bootstrap', 33 | }); 34 | 35 | expect(mock.setSettings).toHaveBeenCalledWith({ 36 | userData: { 37 | seq: 1, 38 | bootstrapDone: false, 39 | bootstrapLastDone: 1635196220508, 40 | bootstrapLastId: '', 41 | stage: 'bootstrap', 42 | }, 43 | }); 44 | }); 45 | }); 46 | 47 | describe('reset()', () => { 48 | it('should reset userData', async () => { 49 | const mock = { 50 | setSettings: jest.fn(), 51 | } as any; 52 | const stateManager = new StateManager(mock); 53 | await stateManager.reset(); 54 | 55 | expect(mock.setSettings).toHaveBeenCalled(); 56 | }); 57 | }); 58 | 59 | describe('save()', () => { 60 | it('should save userData to algolia', async () => { 61 | const mock = { 62 | getSettings: jest.fn(() => { 63 | return { 64 | userData: { bar: 'foo' }, 65 | }; 66 | }), 67 | setSettings: jest.fn(), 68 | } as any; 69 | const stateManager = new StateManager(mock); 70 | await stateManager.save({ foo: 'bar' } as any); 71 | 72 | expect(mock.getSettings).toHaveBeenCalled(); 73 | expect(mock.setSettings).toHaveBeenCalledWith({ 74 | userData: { 75 | bar: 'foo', 76 | foo: 'bar', 77 | }, 78 | }); 79 | }); 80 | }); 81 | }); 82 | -------------------------------------------------------------------------------- /src/__tests__/api-control/npm.test.ts: -------------------------------------------------------------------------------- 1 | import * as api from '../../npm/index'; 2 | 3 | jest.setTimeout(15000); 4 | 5 | describe('findAll()', () => { 6 | it('contains the correct keys', async () => { 7 | const all = await api.findAll({ limit: 2, startkey: '0' }); 8 | 9 | expect(all).toEqual( 10 | expect.objectContaining({ 11 | offset: expect.any(Number), 12 | total_rows: expect.any(Number), 13 | }) 14 | ); 15 | 16 | expect(all.rows).toHaveLength(2); 17 | 18 | expect(all.rows[0]).toEqual( 19 | expect.objectContaining({ 20 | id: '0', 21 | key: '0', 22 | value: { rev: '11-61bb2c49ce3202a3e0ab9a65646b4b4d' }, 23 | }) 24 | ); 25 | }); 26 | }); 27 | 28 | describe('getDocFromRegistry()', () => { 29 | it('retrieves a single doc', async () => { 30 | const doc = await api.getDocFromRegistry('jsdelivr'); 31 | 32 | expect(doc.name).toBe('jsdelivr'); 33 | expect(Object.keys(doc.versions)).toHaveLength(2); 34 | }); 35 | }); 36 | 37 | describe('getInfo()', () => { 38 | let registryInfo; 39 | beforeAll(async () => { 40 | registryInfo = await api.getInfo(); 41 | }); 42 | 43 | it('contains the correct keys', () => { 44 | expect(registryInfo).toEqual( 45 | expect.objectContaining({ 46 | nbDocs: expect.any(Number), 47 | seq: expect.any(Number), 48 | }) 49 | ); 50 | }); 51 | }); 52 | -------------------------------------------------------------------------------- /src/__tests__/bootstrap.test.ts: -------------------------------------------------------------------------------- 1 | import type { State } from '../StateManager'; 2 | import { StateManager } from '../StateManager'; 3 | import type { AlgoliaStore } from '../algolia'; 4 | import { Bootstrap } from '../bootstrap'; 5 | 6 | function getAlgoliaMock(): any { 7 | return { 8 | setSettings: (): Promise => { 9 | return Promise.resolve(); 10 | }, 11 | saveSynonyms: (): Promise => { 12 | return Promise.resolve(); 13 | }, 14 | saveRules: (): Promise<{ taskID: string }> => { 15 | return Promise.resolve({ taskID: 'A' }); 16 | }, 17 | waitTask: (): Promise => { 18 | return Promise.resolve(); 19 | }, 20 | }; 21 | } 22 | 23 | describe('isDone', () => { 24 | it('should return true', async () => { 25 | const mock = { 26 | ...getAlgoliaMock(), 27 | getSettings: jest.fn(() => { 28 | const state: State = { 29 | bootstrapDone: true, 30 | bootstrapLastDone: Date.now(), 31 | bootstrapLastId: '1', 32 | seq: 1, 33 | stage: 'watch', 34 | }; 35 | return { 36 | userData: state, 37 | }; 38 | }), 39 | } as any; 40 | const stateManager = new StateManager(mock); 41 | const bootstrap = new Bootstrap(stateManager, { 42 | mainIndex: mock, 43 | } as AlgoliaStore); 44 | 45 | expect(await bootstrap.isDone()).toBe(true); 46 | }); 47 | 48 | it('should return false', async () => { 49 | const mock = { 50 | ...getAlgoliaMock(), 51 | getSettings: jest.fn(() => { 52 | const state: State = { 53 | bootstrapDone: false, 54 | bootstrapLastDone: Date.now(), 55 | bootstrapLastId: '1', 56 | seq: 1, 57 | stage: 'watch', 58 | }; 59 | return { 60 | userData: state, 61 | }; 62 | }), 63 | } as any; 64 | const stateManager = new StateManager(mock); 65 | const bootstrap = new Bootstrap(stateManager, { 66 | mainIndex: mock, 67 | } as AlgoliaStore); 68 | 69 | expect(await bootstrap.isDone()).toBe(false); 70 | }); 71 | }); 72 | -------------------------------------------------------------------------------- /src/__tests__/changelog.test.ts: -------------------------------------------------------------------------------- 1 | import { baseUrlMap, getChangelog, getChangelogBackground } from '../changelog'; 2 | 3 | jest.mock('got', () => { 4 | const gotSnapshotUrls = new Set([ 5 | 'https://gitlab.com/janslow/gitlab-fetch/raw/master/CHANGELOG.md', 6 | 'https://raw.githubusercontent.com/algolia/algoliasearch-netlify/master/CHANGELOG.md', 7 | 'https://bitbucket.org/atlassian/aui/raw/master/changelog.md', 8 | 'https://raw.githubusercontent.com/expressjs/body-parser/master/HISTORY.md', 9 | ]); 10 | 11 | return Object.assign( 12 | (url: string): Promise<{ url: string; redirectUrls: string[] }> => { 13 | return gotSnapshotUrls.has(url) 14 | ? Promise.resolve({ url, redirectUrls: [], statusCode: 200 }) 15 | : Promise.reject(new Error(`got mock does not exist for ${url}`)); 16 | }, 17 | { 18 | HTTPError: TypeError, 19 | } 20 | ); 21 | }); 22 | 23 | describe('should test baseUrlMap', () => { 24 | it('should work with paths', () => { 25 | const bitbucketRepo = { 26 | host: 'bitbucket.org', 27 | user: 'user', 28 | project: 'project', 29 | path: '/src/master/packages/project1', 30 | head: 'master', 31 | branch: 'master', 32 | }; 33 | 34 | const gitlabRepo = { 35 | host: 'gitlab.com', 36 | path: '/tree/master/foo/bar', 37 | project: 'project', 38 | user: 'user', 39 | }; 40 | 41 | const githubRepo = { 42 | host: 'github.com', 43 | user: 'babel', 44 | project: 'babel', 45 | path: '/tree/master/packages/babel-core', 46 | head: 'master', 47 | }; 48 | 49 | expect(baseUrlMap.get('bitbucket.org')!.buildUrl(bitbucketRepo)).toBe( 50 | 'https://bitbucket.org/user/project/raw/master/packages/project1' 51 | ); 52 | 53 | expect(baseUrlMap.get('gitlab.com')!.buildUrl(gitlabRepo)).toBe( 54 | 'https://gitlab.com/user/project/raw/master/foo/bar' 55 | ); 56 | 57 | expect(baseUrlMap.get('github.com')!.buildUrl(githubRepo)).toBe( 58 | 'https://raw.githubusercontent.com/babel/babel/master/packages/babel-core' 59 | ); 60 | }); 61 | 62 | it('should work without paths', () => { 63 | const bitbucketRepo = { 64 | host: 'bitbucket.org', 65 | user: 'user', 66 | path: '', 67 | project: 'project', 68 | branch: 'master', 69 | }; 70 | 71 | const gitlabRepo = { 72 | host: 'gitlab.com', 73 | project: 'project', 74 | path: '', 75 | user: 'user', 76 | branch: 'master', 77 | }; 78 | 79 | const githubRepo = { 80 | host: 'github.com', 81 | user: 'babel', 82 | project: 'babel', 83 | path: '', 84 | branch: 'master', 85 | }; 86 | 87 | expect(baseUrlMap.get('bitbucket.org')!.buildUrl(bitbucketRepo)).toBe( 88 | 'https://bitbucket.org/user/project/raw/master' 89 | ); 90 | 91 | expect(baseUrlMap.get('gitlab.com')!.buildUrl(gitlabRepo)).toBe( 92 | 'https://gitlab.com/user/project/raw/master' 93 | ); 94 | 95 | expect(baseUrlMap.get('github.com')!.buildUrl(githubRepo)).toBe( 96 | 'https://raw.githubusercontent.com/babel/babel/master' 97 | ); 98 | }); 99 | }); 100 | 101 | describe('hosts', () => { 102 | it('should handle not found changelog for github', async () => { 103 | const pkg = { 104 | name: 'foo', 105 | version: '0.0.0', 106 | repository: { 107 | url: '', 108 | host: 'github.com', 109 | user: 'visionmedia', 110 | project: 'debug', 111 | path: '', 112 | head: 'master', 113 | branch: 'master', 114 | }, 115 | }; 116 | 117 | const { changelogFilename } = await getChangelogBackground(pkg); 118 | expect(changelogFilename).toBeNull(); 119 | }); 120 | 121 | it('should get changelog for github', async () => { 122 | const pkg = { 123 | name: 'foo', 124 | version: '0.0.0', 125 | repository: { 126 | url: '', 127 | host: 'github.com', 128 | user: 'algolia', 129 | project: 'algoliasearch-netlify', 130 | path: '', 131 | head: 'master', 132 | branch: 'master', 133 | }, 134 | }; 135 | 136 | const { changelogFilename } = await getChangelogBackground(pkg); 137 | expect(changelogFilename).toBe( 138 | 'https://raw.githubusercontent.com/algolia/algoliasearch-netlify/master/CHANGELOG.md' 139 | ); 140 | }); 141 | 142 | it('should get changelog for gitlab', async () => { 143 | const pkg = { 144 | name: 'foo', 145 | version: '0.0.0', 146 | repository: { 147 | url: '', 148 | host: 'gitlab.com', 149 | user: 'janslow', 150 | project: 'gitlab-fetch', 151 | path: '', 152 | head: 'master', 153 | branch: 'master', 154 | }, 155 | }; 156 | 157 | const { changelogFilename } = await getChangelogBackground(pkg); 158 | expect(changelogFilename).toBe( 159 | 'https://gitlab.com/janslow/gitlab-fetch/raw/master/CHANGELOG.md' 160 | ); 161 | }); 162 | 163 | it('should get changelog for bitbucket', async () => { 164 | const pkg = { 165 | name: 'foo', 166 | version: '0.0.0', 167 | repository: { 168 | url: '', 169 | host: 'bitbucket.org', 170 | user: 'atlassian', 171 | project: 'aui', 172 | path: '', 173 | head: 'master', 174 | branch: 'master', 175 | }, 176 | }; 177 | 178 | const { changelogFilename } = await getChangelogBackground(pkg); 179 | expect(changelogFilename).toBe( 180 | 'https://bitbucket.org/atlassian/aui/raw/master/changelog.md' 181 | ); 182 | }); 183 | }); 184 | 185 | describe('jsDelivr', () => { 186 | it('should early return when finding changelog', async () => { 187 | const { changelogFilename } = await getChangelog( 188 | { 189 | name: 'foo', 190 | version: '1.0.0', 191 | repository: { 192 | url: '', 193 | host: 'github.com', 194 | user: 'expressjs', 195 | project: 'body-parser', 196 | path: '', 197 | head: 'master', 198 | branch: 'master', 199 | }, 200 | }, 201 | [ 202 | { name: '/package.json', hash: '', time: '1', size: 1 }, 203 | { name: '/CHANGELOG.md', hash: '', time: '1', size: 1 }, 204 | ] 205 | ); 206 | expect(changelogFilename).toBe( 207 | 'https://cdn.jsdelivr.net/npm/foo@1.0.0/CHANGELOG.md' 208 | ); 209 | }); 210 | 211 | it('should early return when finding changelog in nested file', async () => { 212 | const { changelogFilename } = await getChangelog( 213 | { 214 | name: 'foo', 215 | version: '1.0.0', 216 | repository: { 217 | url: '', 218 | host: 'github.com', 219 | user: 'expressjs', 220 | project: 'body-parser', 221 | path: '', 222 | head: 'master', 223 | branch: 'master', 224 | }, 225 | }, 226 | [{ name: '/pkg/CHANGELOG.md', hash: '', time: '1', size: 1 }] 227 | ); 228 | expect(changelogFilename).toBe( 229 | 'https://cdn.jsdelivr.net/npm/foo@1.0.0/pkg/CHANGELOG.md' 230 | ); 231 | }); 232 | 233 | it('should not register a file looking like a changelog', async () => { 234 | const { changelogFilename } = await getChangelog( 235 | { 236 | name: 'foo', 237 | version: '1.0.0', 238 | repository: { 239 | url: '', 240 | host: 'github.com', 241 | user: 'hello', 242 | project: 'foo', 243 | path: '', 244 | head: 'master', 245 | branch: 'master', 246 | }, 247 | }, 248 | [{ name: '/dist/changelog.js', hash: '', time: '1', size: 1 }] 249 | ); 250 | expect(changelogFilename).toBeNull(); 251 | }); 252 | }); 253 | 254 | describe('filename', () => { 255 | it('should work with HISTORY.md', async () => { 256 | const pkg = { 257 | name: 'foo', 258 | version: '0.0.0', 259 | repository: { 260 | url: '', 261 | host: 'github.com', 262 | user: 'expressjs', 263 | project: 'body-parser', 264 | path: '', 265 | head: 'master', 266 | branch: 'master', 267 | }, 268 | }; 269 | 270 | const { changelogFilename } = await getChangelogBackground(pkg); 271 | expect(changelogFilename).toBe( 272 | 'https://raw.githubusercontent.com/expressjs/body-parser/master/HISTORY.md' 273 | ); 274 | }); 275 | }); 276 | -------------------------------------------------------------------------------- /src/__tests__/config.test.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable import/first */ 2 | process.env.apiKey = 'fake-api-key'; 3 | 4 | import { config } from '../config'; 5 | 6 | describe('config', () => { 7 | it('gets the correct keys from env variables', () => { 8 | // from mocked .env 9 | expect(config.apiKey).toBe('fake-api-key'); 10 | // from config.js 11 | expect(config.maxObjSize).toBe(450000); 12 | }); 13 | 14 | const objectIDRe = /^[A-Za-z0-9_-]+$/; 15 | 16 | it('sets correct objectIDs for query rules', () => { 17 | config.indexRules.forEach(({ objectID }) => { 18 | expect(objectID).toMatch(objectIDRe); 19 | }); 20 | }); 21 | 22 | it('sets correct objectIDs for synonyms', () => { 23 | config.indexSynonyms.forEach(({ objectID }) => { 24 | expect(objectID).toMatch(objectIDRe); 25 | }); 26 | }); 27 | }); 28 | -------------------------------------------------------------------------------- /src/__tests__/saveDocs.test.ts: -------------------------------------------------------------------------------- 1 | import algoliasearch from 'algoliasearch'; 2 | 3 | import { formatPkg } from '../formatPkg'; 4 | import { hits } from '../jsDelivr'; 5 | import { cacheTotalDownloads } from '../npm'; 6 | import { saveDoc } from '../saveDocs'; 7 | 8 | import preact from './preact-simplified'; 9 | 10 | jest.setTimeout(15000); 11 | 12 | const FINAL_BASE = { 13 | _revision: expect.any(Number), 14 | // _downloadsMagnitude: 7, 15 | // _jsDelivrPopularity: 0, 16 | _searchInternal: { 17 | alternativeNames: ['preact', 'preact.js', 'preactjs'], 18 | // popularAlternativeNames: ['preact', 'preact.js', 'preactjs'], 19 | }, 20 | bin: {}, 21 | changelogFilename: null, 22 | computedKeywords: [], 23 | computedMetadata: {}, 24 | created: 1441939293521, 25 | dependencies: {}, 26 | dependents: 0, 27 | deprecated: false, 28 | deprecatedReason: null, 29 | description: 30 | 'Fast 3kb React alternative with the same modern API. Components & Virtual DOM.', 31 | devDependencies: { 32 | '@types/chai': '^4.1.7', 33 | '@types/mocha': '^5.2.5', 34 | '@types/node': '^9.6.40', 35 | 'babel-cli': '^6.24.1', 36 | 'babel-core': '^6.24.1', 37 | 'babel-eslint': '^8.2.6', 38 | 'babel-loader': '^7.0.0', 39 | 'babel-plugin-transform-object-rest-spread': '^6.23.0', 40 | 'babel-plugin-transform-react-jsx': '^6.24.1', 41 | 'babel-preset-env': '^1.6.1', 42 | bundlesize: '^0.17.0', 43 | chai: '^4.2.0', 44 | copyfiles: '^2.1.0', 45 | 'core-js': '^2.6.0', 46 | coveralls: '^3.0.0', 47 | 'cross-env': '^5.1.4', 48 | diff: '^3.0.0', 49 | eslint: '^4.18.2', 50 | 'eslint-plugin-react': '^7.11.1', 51 | 'flow-bin': '^0.89.0', 52 | 'gzip-size-cli': '^2.0.0', 53 | 'istanbul-instrumenter-loader': '^3.0.0', 54 | jscodeshift: '^0.5.0', 55 | karma: '^3.1.3', 56 | 'karma-babel-preprocessor': '^7.0.0', 57 | 'karma-chai-sinon': '^0.1.5', 58 | 'karma-chrome-launcher': '^2.2.0', 59 | 'karma-coverage': '^1.1.2', 60 | 'karma-mocha': '^1.3.0', 61 | 'karma-mocha-reporter': '^2.2.5', 62 | 'karma-sauce-launcher': '^1.2.0', 63 | 'karma-sinon': '^1.0.5', 64 | 'karma-source-map-support': '^1.3.0', 65 | 'karma-sourcemap-loader': '^0.3.6', 66 | 'karma-webpack': '^3.0.5', 67 | mocha: '^5.0.4', 68 | 'npm-run-all': '^4.1.5', 69 | puppeteer: '^1.11.0', 70 | rimraf: '^2.5.3', 71 | rollup: '^0.57.1', 72 | 'rollup-plugin-babel': '^3.0.2', 73 | 'rollup-plugin-memory': '^3.0.0', 74 | 'rollup-plugin-node-resolve': '^3.4.0', 75 | sinon: '^4.4.2', 76 | 'sinon-chai': '^3.3.0', 77 | typescript: '^3.0.1', 78 | 'uglify-js': '^2.7.5', 79 | webpack: '^4.27.1', 80 | }, 81 | downloadsLast30Days: 2874638, 82 | downloadsRatio: 0.0023, 83 | gitHead: 'master', 84 | githubRepo: { 85 | head: 'master', 86 | path: '', 87 | project: 'preact', 88 | user: 'developit', 89 | }, 90 | homepage: null, 91 | humanDependents: '0', 92 | humanDownloadsLast30Days: '2.9m', 93 | isDeprecated: false, 94 | jsDelivrHits: 0, 95 | keywords: [ 96 | 'preact', 97 | 'react', 98 | 'virtual dom', 99 | 'vdom', 100 | 'components', 101 | 'virtual', 102 | 'dom', 103 | ], 104 | lastCrawl: '2021-07-11T12:31:18.112Z', 105 | lastPublisher: { 106 | avatar: 'https://gravatar.com/avatar/ad82ff1463f3e3b7b4a44c5f499912ae', 107 | email: 'npm.leah@hrmny.sh', 108 | link: 'https://www.npmjs.com/~harmony', 109 | name: 'harmony', 110 | }, 111 | license: 'MIT', 112 | modified: 1564778088321, 113 | moduleTypes: ['esm'], 114 | name: 'preact', 115 | objectID: 'preact', 116 | originalAuthor: { 117 | email: 'jason@developit.ca', 118 | name: 'Jason Miller', 119 | }, 120 | owner: { 121 | avatar: 'https://github.com/developit.png', 122 | link: 'https://github.com/developit', 123 | name: 'developit', 124 | }, 125 | owners: [ 126 | { 127 | avatar: 'https://gravatar.com/avatar/85ed8e6da2fbf39abeb4995189be324c', 128 | email: 'jason@developit.ca', 129 | link: 'https://www.npmjs.com/~developit', 130 | name: 'developit', 131 | }, 132 | { 133 | avatar: 'https://gravatar.com/avatar/52401c37bc5c4d54a051c619767fdbf8', 134 | email: 'ulliftw@gmail.com', 135 | link: 'https://www.npmjs.com/~harmony', 136 | name: 'harmony', 137 | }, 138 | { 139 | avatar: 'https://gravatar.com/avatar/308439e12701ef85245dc0632dd07c2a', 140 | email: 'luke@lukeed.com', 141 | link: 'https://www.npmjs.com/~lukeed', 142 | name: 'lukeed', 143 | }, 144 | { 145 | avatar: 'https://gravatar.com/avatar/4ed639a3ea6219b80b58e2e81ff9ba47', 146 | email: 'marvin@marvinhagemeister.de', 147 | link: 'https://www.npmjs.com/~marvinhagemeister', 148 | name: 'marvinhagemeister', 149 | }, 150 | { 151 | avatar: 'https://gravatar.com/avatar/83589d88ac76ddc2853562f9a817fe27', 152 | email: 'prateek89born@gmail.com', 153 | link: 'https://www.npmjs.com/~prateekbh', 154 | name: 'prateekbh', 155 | }, 156 | { 157 | avatar: 'https://gravatar.com/avatar/88747cce15801e9e96bcb76895fcd7f9', 158 | email: 'hello@preactjs.com', 159 | link: 'https://www.npmjs.com/~preactjs', 160 | name: 'preactjs', 161 | }, 162 | { 163 | avatar: 'https://gravatar.com/avatar/d279821c96bb49eeaef68b5456f42074', 164 | email: 'allamsetty.anup@gmail.com', 165 | link: 'https://www.npmjs.com/~reznord', 166 | name: 'reznord', 167 | }, 168 | ], 169 | popular: false, 170 | readme: '', 171 | repository: { 172 | branch: 'master', 173 | head: undefined, 174 | host: 'github.com', 175 | path: '', 176 | project: 'preact', 177 | type: 'git', 178 | url: 'https://github.com/developit/preact', 179 | user: 'developit', 180 | }, 181 | tags: { 182 | latest: '8.5.0', 183 | next: '10.0.0-rc.1', 184 | }, 185 | types: { 186 | ts: 'included', 187 | }, 188 | version: '8.5.0', 189 | versions: { 190 | '10.0.0-rc.1': '2019-08-02T20:34:45.123Z', 191 | '8.5.0': '2019-08-02T18:34:23.572Z', 192 | }, 193 | }; 194 | 195 | describe('saveDoc', () => { 196 | beforeAll(async () => { 197 | cacheTotalDownloads.total = 1e15; 198 | hits.set('preact', { hits: 12345, popular: true }); 199 | hits.set('reactjs', { hits: 1234, popular: false }); 200 | }); 201 | 202 | it('should always produce the same records', async () => { 203 | const client = algoliasearch('e', ''); 204 | const index = client.initIndex('a'); 205 | const oneTimeDataIndex = client.initIndex('a'); 206 | const periodicDataIndex = client.initIndex('a'); 207 | jest.spyOn(index, 'saveObject').mockImplementationOnce(() => { 208 | return true as any; 209 | }); 210 | 211 | const final = { 212 | ...FINAL_BASE, 213 | }; 214 | const clean = expect.objectContaining({ 215 | ...final, 216 | jsDelivrHits: 12345, 217 | lastCrawl: expect.any(String), 218 | downloadsLast30Days: 0, 219 | downloadsRatio: 0, 220 | humanDownloadsLast30Days: '0', 221 | modified: expect.any(Number), 222 | _searchInternal: expect.objectContaining({ 223 | ...final._searchInternal, 224 | popularAlternativeNames: ['preact', 'preact.js', 'preactjs'], 225 | }), 226 | _jsDelivrPopularity: 2, 227 | popular: true, 228 | }); 229 | 230 | await saveDoc({ 231 | formatted: formatPkg(preact)!, 232 | index, 233 | oneTimeDataIndex, 234 | periodicDataIndex, 235 | }); 236 | 237 | expect(index.saveObject).toHaveBeenCalledWith(clean); 238 | }); 239 | 240 | it('should reuse existing changelog and downloads data', async () => { 241 | const client = algoliasearch('e', ''); 242 | const index = client.initIndex('a'); 243 | jest.spyOn(index, 'saveObject').mockImplementationOnce(() => { 244 | return true as any; 245 | }); 246 | 247 | const oneTimeDataIndex = client.initIndex('b'); 248 | jest.spyOn(oneTimeDataIndex, 'getObject').mockImplementationOnce(() => { 249 | return { changelogFilename: '/resolved-from-index.md' } as any; 250 | }); 251 | 252 | const periodicDataIndex = client.initIndex('c'); 253 | jest.spyOn(periodicDataIndex, 'getObject').mockImplementationOnce(() => { 254 | return { packageNpmDownloads: 2233, totalNpmDownloads: 1e10 } as any; 255 | }); 256 | 257 | const final = { 258 | ...FINAL_BASE, 259 | }; 260 | const clean = expect.objectContaining({ 261 | ...final, 262 | jsDelivrHits: 12345, 263 | changelogFilename: '/resolved-from-index.md', 264 | lastCrawl: expect.any(String), 265 | downloadsLast30Days: 2233, 266 | downloadsRatio: expect.any(Number), 267 | humanDownloadsLast30Days: '2.2k', 268 | modified: expect.any(Number), 269 | _searchInternal: expect.objectContaining({ 270 | ...final._searchInternal, 271 | popularAlternativeNames: ['preact', 'preact.js', 'preactjs'], 272 | }), 273 | _jsDelivrPopularity: 2, 274 | popular: true, 275 | }); 276 | 277 | await saveDoc({ 278 | formatted: formatPkg(preact)!, 279 | index, 280 | oneTimeDataIndex, 281 | periodicDataIndex, 282 | }); 283 | 284 | expect(index.saveObject).toHaveBeenCalledWith(clean); 285 | }); 286 | 287 | it('should not add popular alternative names for non-popular packages', async () => { 288 | const client = algoliasearch('e', ''); 289 | const index = client.initIndex('a'); 290 | const oneTimeDataIndex = client.initIndex('a'); 291 | const periodicDataIndex = client.initIndex('a'); 292 | jest.spyOn(index, 'saveObject').mockImplementationOnce(() => { 293 | return true as any; 294 | }); 295 | 296 | const final = { 297 | ...FINAL_BASE, 298 | name: 'reactjs', 299 | objectID: 'reactjs', 300 | tags: { 301 | latest: '1.0.0', 302 | }, 303 | version: '1.0.0', 304 | versions: { 305 | '1.0.0': '2019-08-02T18:34:23.572Z', 306 | }, 307 | }; 308 | const clean = expect.objectContaining({ 309 | ...final, 310 | jsDelivrHits: 1234, 311 | lastCrawl: expect.any(String), 312 | downloadsLast30Days: 0, 313 | downloadsRatio: 0, 314 | humanDownloadsLast30Days: '0', 315 | modified: expect.any(Number), 316 | _searchInternal: expect.objectContaining({ 317 | popularAlternativeNames: [], 318 | }), 319 | }); 320 | 321 | await saveDoc({ 322 | formatted: formatPkg({ 323 | ...preact, 324 | name: 'reactjs', 325 | 'dist-tags': { latest: '1.0.0' }, 326 | versions: { 327 | '1.0.0': { 328 | ...preact.versions['8.5.0'], 329 | name: 'reactjs', 330 | version: '1.0.0', 331 | }, 332 | }, 333 | time: { 334 | ...preact.time, 335 | '1.0.0': '2019-08-02T18:34:23.572Z', 336 | }, 337 | })!, 338 | index, 339 | periodicDataIndex, 340 | oneTimeDataIndex, 341 | }); 342 | 343 | expect(index.saveObject).toHaveBeenCalledWith(clean); 344 | }); 345 | 346 | it('should skip getting extra data for security held packages', async () => { 347 | const client = algoliasearch('e', ''); 348 | const index = client.initIndex('a'); 349 | const oneTimeDataIndex = client.initIndex('a'); 350 | const periodicDataIndex = client.initIndex('a'); 351 | jest.spyOn(index, 'saveObject').mockImplementationOnce(() => { 352 | return true as any; 353 | }); 354 | 355 | const final = { 356 | ...FINAL_BASE, 357 | name: 'trello-enterprises', 358 | objectID: 'trello-enterprises', 359 | tags: { 360 | latest: '1000.1000.1000', 361 | }, 362 | version: '1000.1000.1000', 363 | versions: { 364 | '1000.1000.1000': '2019-08-02T18:34:23.572Z', 365 | }, 366 | repository: { 367 | branch: 'master', 368 | head: undefined, 369 | host: 'github.com', 370 | path: '', 371 | project: 'security-holder', 372 | type: 'git', 373 | url: 'https://github.com/npm/security-holder', 374 | user: 'npm', 375 | }, 376 | githubRepo: { 377 | head: 'master', 378 | path: '', 379 | project: 'security-holder', 380 | user: 'npm', 381 | }, 382 | downloadsLast30Days: 0, 383 | humanDownloadsLast30Days: '0', 384 | isSecurityHeld: true, 385 | }; 386 | const clean = expect.objectContaining({ 387 | ...final, 388 | owner: expect.any(Object), 389 | homepage: expect.any(String), 390 | lastCrawl: expect.any(String), 391 | downloadsRatio: expect.any(Number), 392 | modified: expect.any(Number), 393 | _searchInternal: expect.objectContaining({ 394 | popularAlternativeNames: [], 395 | }), 396 | }); 397 | 398 | await saveDoc({ 399 | formatted: formatPkg({ 400 | ...preact, 401 | name: 'trello-enterprises', 402 | 'dist-tags': { latest: '1000.1000.1000' }, 403 | versions: { 404 | '1000.1000.1000': { 405 | ...preact.versions['8.5.0'], 406 | name: 'trello-enterprises', 407 | version: '1000.1000.1000', 408 | }, 409 | }, 410 | time: { 411 | ...preact.time, 412 | '1000.1000.1000': '2019-08-02T18:34:23.572Z', 413 | }, 414 | repository: { 415 | type: 'git', 416 | url: 'https://github.com/npm/security-holder', 417 | }, 418 | })!, 419 | index, 420 | oneTimeDataIndex, 421 | periodicDataIndex, 422 | }); 423 | 424 | expect(index.saveObject).toHaveBeenCalledWith(clean); 425 | }); 426 | }); 427 | -------------------------------------------------------------------------------- /src/algolia/index.ts: -------------------------------------------------------------------------------- 1 | import { createNodeHttpRequester } from '@algolia/requester-node-http'; 2 | import type { SearchClient, SearchIndex } from 'algoliasearch'; 3 | import algoliasearch from 'algoliasearch'; 4 | 5 | import type { Config } from '../config'; 6 | import { httpAgent, httpsAgent, USER_AGENT } from '../utils/request'; 7 | 8 | export interface AlgoliaStore { 9 | mainIndex: SearchIndex; 10 | mainQueueIndex: SearchIndex; 11 | mainLostIndex: SearchIndex; 12 | mainNotFoundIndex: SearchIndex; 13 | bootstrapIndex: SearchIndex; 14 | bootstrapQueueIndex: SearchIndex; 15 | bootstrapLostIndex: SearchIndex; 16 | bootstrapNotFoundIndex: SearchIndex; 17 | oneTimeDataIndex: SearchIndex; 18 | periodicDataIndex: SearchIndex; 19 | client: SearchClient; 20 | } 21 | 22 | const requester = createNodeHttpRequester({ 23 | agent: httpsAgent, 24 | httpAgent, 25 | httpsAgent, 26 | }); 27 | 28 | function createClient({ 29 | appId, 30 | apiKey, 31 | indexName, 32 | }: { 33 | appId: string; 34 | apiKey: string; 35 | indexName: string; 36 | }): { index: SearchIndex; client: SearchClient } { 37 | const client = algoliasearch(appId, apiKey, { 38 | requester, 39 | }); 40 | client.addAlgoliaAgent(USER_AGENT); 41 | return { 42 | index: client.initIndex(indexName), 43 | client, 44 | }; 45 | } 46 | 47 | /** 48 | * Prepare algolia for indexing. 49 | */ 50 | export async function prepare(config: Config): Promise { 51 | if (!config.apiKey) { 52 | throw new Error( 53 | 'npm-search: Please provide the `apiKey` env variable and restart' 54 | ); 55 | } 56 | 57 | // Get main index and boostrap algolia client 58 | const { index: mainIndex, client } = createClient(config); 59 | const { index: mainQueueIndex } = createClient({ 60 | appId: config.appId, 61 | apiKey: config.apiKey, 62 | indexName: `${config.indexName}.queue`, 63 | }); 64 | const { index: mainLostIndex } = createClient({ 65 | appId: config.appId, 66 | apiKey: config.apiKey, 67 | indexName: `${config.indexName}.lost`, 68 | }); 69 | const { index: mainNotFoundIndex } = createClient({ 70 | appId: config.appId, 71 | apiKey: config.apiKey, 72 | indexName: `${config.indexName}.not-found`, 73 | }); 74 | const { index: bootstrapIndex } = createClient({ 75 | appId: config.appId, 76 | apiKey: config.apiKey, 77 | indexName: config.bootstrapIndexName, 78 | }); 79 | const { index: bootstrapQueueIndex } = createClient({ 80 | appId: config.appId, 81 | apiKey: config.apiKey, 82 | indexName: `${config.bootstrapIndexName}.queue`, 83 | }); 84 | const { index: bootstrapLostIndex } = createClient({ 85 | appId: config.appId, 86 | apiKey: config.apiKey, 87 | indexName: `${config.bootstrapIndexName}.lost`, 88 | }); 89 | const { index: bootstrapNotFoundIndex } = createClient({ 90 | appId: config.appId, 91 | apiKey: config.apiKey, 92 | indexName: `${config.bootstrapIndexName}.not-found`, 93 | }); 94 | const { index: oneTimeDataIndex } = createClient({ 95 | appId: config.appId, 96 | apiKey: config.apiKey, 97 | indexName: `${config.indexName}.one-time-data`, 98 | }); 99 | const { index: periodicDataIndex } = createClient({ 100 | appId: config.appId, 101 | apiKey: config.apiKey, 102 | indexName: `${config.indexName}.periodic-data`, 103 | }); 104 | 105 | // Ensure indices exists by calling an empty setSettings() 106 | await mainIndex.setSettings({}).wait(); 107 | await mainQueueIndex 108 | .setSettings({ 109 | attributesForFaceting: ['isProcessed', 'retries'], 110 | }) 111 | .wait(); 112 | await bootstrapIndex.setSettings({}).wait(); 113 | await bootstrapQueueIndex 114 | .setSettings({ 115 | attributesForFaceting: ['retries'], 116 | }) 117 | .wait(); 118 | await mainLostIndex.setSettings({}).wait(); 119 | await mainNotFoundIndex.setSettings({}).wait(); 120 | await bootstrapLostIndex.setSettings({}).wait(); 121 | await bootstrapNotFoundIndex.setSettings({}).wait(); 122 | await oneTimeDataIndex.setSettings({}).wait(); 123 | await periodicDataIndex.setSettings({}).wait(); 124 | 125 | return { 126 | client, 127 | mainIndex, 128 | mainQueueIndex, 129 | mainLostIndex, 130 | mainNotFoundIndex, 131 | bootstrapIndex, 132 | bootstrapQueueIndex, 133 | bootstrapLostIndex, 134 | bootstrapNotFoundIndex, 135 | oneTimeDataIndex, 136 | periodicDataIndex, 137 | }; 138 | } 139 | 140 | export async function putDefaultSettings( 141 | index: SearchIndex, 142 | config: Config 143 | ): Promise { 144 | await index.setSettings(config.indexSettings); 145 | 146 | await index.saveSynonyms(config.indexSynonyms, { 147 | replaceExistingSynonyms: true, 148 | }); 149 | const { taskID } = await index.saveRules(config.indexRules, { 150 | replaceExistingRules: true, 151 | }); 152 | 153 | await index.waitTask(taskID); 154 | } 155 | -------------------------------------------------------------------------------- /src/api.ts: -------------------------------------------------------------------------------- 1 | import http from 'http'; 2 | 3 | // import { datadog } from './utils/datadog'; 4 | import { log } from './utils/log'; 5 | 6 | // Used for health check 7 | export function createAPI(): http.Server { 8 | const server = http.createServer((_req, res) => { 9 | // datadog.check('main', datadog.CHECKS.OK); 10 | res.writeHead(200, { 'Content-Type': 'application/json' }); 11 | res.end( 12 | JSON.stringify({ 13 | code: 200, 14 | }) 15 | ); 16 | }); 17 | 18 | server.listen(8000, () => { 19 | log.info(`⛑ API started on port 8000`); 20 | }); 21 | return server; 22 | } 23 | -------------------------------------------------------------------------------- /src/bootstrap.ts: -------------------------------------------------------------------------------- 1 | import { EventEmitter } from 'events'; 2 | 3 | import chalk from 'chalk'; 4 | 5 | import type { StateManager } from './StateManager'; 6 | import type { AlgoliaStore } from './algolia'; 7 | import { putDefaultSettings } from './algolia'; 8 | import { config } from './config'; 9 | import { MainBootstrapIndexer } from './indexers/MainBootstrapIndexer'; 10 | import { OneTimeBackgroundIndexer } from './indexers/OneTimeBackgroundIndexer'; 11 | import { PeriodicBackgroundIndexer } from './indexers/PeriodicBackgroundIndexer'; 12 | import * as npm from './npm'; 13 | import { Prefetcher } from './npm/Prefetcher'; 14 | import { datadog } from './utils/datadog'; 15 | import { log } from './utils/log'; 16 | import * as sentry from './utils/sentry'; 17 | 18 | export class Bootstrap extends EventEmitter { 19 | stateManager: StateManager; 20 | algoliaStore: AlgoliaStore; 21 | prefetcher: Prefetcher | undefined; 22 | interval: NodeJS.Timer | undefined; 23 | oneTimeIndexer: OneTimeBackgroundIndexer | undefined; 24 | periodicDataIndexer: PeriodicBackgroundIndexer | undefined; 25 | mainBootstrapIndexer: MainBootstrapIndexer | undefined; 26 | 27 | constructor(stateManager: StateManager, algoliaStore: AlgoliaStore) { 28 | super(); 29 | this.stateManager = stateManager; 30 | this.algoliaStore = algoliaStore; 31 | } 32 | 33 | override on(param: 'finished', cb: () => any): this; 34 | override on(param: string, cb: () => void): this { 35 | return super.on(param, cb); 36 | } 37 | 38 | async stop(): Promise { 39 | log.info('Stopping Bootstrap...'); 40 | 41 | if (this.interval) { 42 | clearInterval(this.interval); 43 | } 44 | 45 | if (this.prefetcher) { 46 | this.prefetcher.stop(); 47 | await this.oneTimeIndexer!.stop(); 48 | await this.periodicDataIndexer!.stop(); 49 | await this.mainBootstrapIndexer!.stop(); 50 | } 51 | 52 | log.info('Stopped Bootstrap gracefully'); 53 | } 54 | 55 | /** 56 | * Bootstrap is the mode that goes from 0 to all the packages in NPM 57 | * In other word it is reindexing everything from scratch. 58 | * 59 | * It is useful if: 60 | * - you are starting this project for the first time 61 | * - you messed up with your Algolia index 62 | * - you lagged too much behind. 63 | * 64 | * Watch mode should/can be reliably left running for weeks/months as CouchDB is made for that. 65 | */ 66 | async run(): Promise { 67 | log.info('-----'); 68 | log.info('⛷ Bootstrap: starting'); 69 | const state = await this.stateManager.check(); 70 | 71 | await this.stateManager.save({ 72 | stage: 'bootstrap', 73 | }); 74 | 75 | const { seq, nbDocs: totalDocs } = await npm.getInfo(); 76 | if (!state.bootstrapLastId) { 77 | // Start from 0 78 | log.info('⛷ Bootstrap: starting from the first doc'); 79 | // first time this launches, we need to remember the last seq our bootstrap can trust 80 | await this.stateManager.save({ seq }); 81 | await putDefaultSettings(this.algoliaStore.bootstrapIndex, config); 82 | } else { 83 | log.info('⛷ Bootstrap: starting at doc %s', state.bootstrapLastId); 84 | } 85 | 86 | log.info('-----'); 87 | log.info(chalk.yellowBright`Total packages: ${totalDocs}`); 88 | log.info('-----'); 89 | 90 | this.prefetcher = new Prefetcher( 91 | this.stateManager, 92 | this.algoliaStore.bootstrapQueueIndex, 93 | { 94 | nextKey: state.bootstrapLastId, 95 | } 96 | ); 97 | 98 | this.oneTimeIndexer = new OneTimeBackgroundIndexer( 99 | this.algoliaStore, 100 | this.algoliaStore.bootstrapIndex 101 | ); 102 | 103 | this.periodicDataIndexer = new PeriodicBackgroundIndexer( 104 | this.algoliaStore, 105 | this.algoliaStore.bootstrapIndex, 106 | this.algoliaStore.bootstrapNotFoundIndex 107 | ); 108 | 109 | this.mainBootstrapIndexer = new MainBootstrapIndexer(this.algoliaStore); 110 | 111 | this.prefetcher.run(); 112 | this.oneTimeIndexer.run(); 113 | this.periodicDataIndexer.run(); 114 | this.mainBootstrapIndexer.run(); 115 | 116 | let done = 0; 117 | 118 | this.interval = setInterval(async () => { 119 | this.logProgress(done).catch(() => {}); 120 | 121 | try { 122 | if ( 123 | this.prefetcher!.isFinished && 124 | (await this.mainBootstrapIndexer!.isFinished()) 125 | ) { 126 | clearInterval(this.interval!); 127 | await this.afterProcessing(); 128 | return; 129 | } 130 | } catch (e) { 131 | sentry.report(e); 132 | } 133 | 134 | done = 0; 135 | }, config.prefetchWaitBetweenPage); 136 | } 137 | 138 | /** 139 | * Tell if we need to execute bootstrap or not. 140 | */ 141 | async isDone(): Promise { 142 | const state = await this.stateManager.check(); 143 | 144 | if (state.seq && state.seq > 0 && state.bootstrapDone) { 145 | await putDefaultSettings(this.algoliaStore.mainIndex, config); 146 | log.info('⛷ Bootstrap: already done, skipping'); 147 | 148 | return true; 149 | } 150 | 151 | return false; 152 | } 153 | 154 | /** 155 | * Last step after everything has been processed. 156 | */ 157 | private async afterProcessing(): Promise { 158 | await this.oneTimeIndexer!.stop(); 159 | await this.periodicDataIndexer!.stop(); 160 | await this.mainBootstrapIndexer!.stop(); 161 | 162 | await this.stateManager.save({ 163 | bootstrapDone: true, 164 | bootstrapLastDone: Date.now(), 165 | }); 166 | 167 | await this.moveToProduction(); 168 | 169 | log.info('-----'); 170 | log.info('⛷ Bootstrap: done'); 171 | log.info('-----'); 172 | 173 | this.emit('finished'); 174 | } 175 | 176 | /** 177 | * Move algolia index to prod. 178 | */ 179 | private async moveToProduction(): Promise { 180 | log.info('🚚 starting move to production'); 181 | 182 | const currentState = await this.stateManager.get(); 183 | // Backup current prod index 184 | await this.algoliaStore.client 185 | .copyIndex( 186 | config.indexName, 187 | `${config.indexName}.bak-${new Date().toISOString()}` 188 | ) 189 | .wait(); 190 | 191 | // Replace prod with bootstrap 192 | await this.algoliaStore.client 193 | .copyIndex(config.bootstrapIndexName, config.indexName) 194 | .wait(); 195 | 196 | // Remove bootstrap so we don't end up reusing a partial index 197 | await this.algoliaStore.bootstrapIndex.delete(); 198 | 199 | await this.stateManager.save(currentState); 200 | } 201 | 202 | /** 203 | * Log approximate progress. 204 | */ 205 | private async logProgress(nbDocs: number): Promise { 206 | const { nbDocs: totalDocs } = await npm.getInfo(); 207 | const queueLength = await this.mainBootstrapIndexer!.fetchQueueLength(); 208 | const offset = this.prefetcher!.offset; 209 | 210 | datadog.gauge('sequence.total', totalDocs); 211 | datadog.gauge('sequence.current', offset + nbDocs); 212 | datadog.gauge('job.idleCount', queueLength); 213 | 214 | log.info( 215 | chalk.dim.italic 216 | .white`[progress] %d/%d docs queued (%s%) (~%s in queue) (%s processing; %s buffer)`, 217 | offset + nbDocs, 218 | totalDocs, 219 | ((Math.max(offset + nbDocs, 1) / totalDocs) * 100).toFixed(2), 220 | queueLength, 221 | this.mainBootstrapIndexer!.running, 222 | this.mainBootstrapIndexer!.queued 223 | ); 224 | } 225 | } 226 | -------------------------------------------------------------------------------- /src/changelog.ts: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | 3 | import { HTTPError } from 'got'; 4 | import ms from 'ms'; 5 | import PQueue from 'p-queue'; 6 | import race from 'promise-rat-race'; 7 | 8 | import type { RawPkg, Repo } from './@types/pkg'; 9 | import * as jsDelivr from './jsDelivr/index'; 10 | import { datadog } from './utils/datadog'; 11 | import { request } from './utils/request'; 12 | 13 | type ChangelogResult = { 14 | changelogFilename: string | null; 15 | }; 16 | 17 | type HostObject = { 18 | name: string; 19 | queue: PQueue; 20 | buildUrl: ( 21 | opts: Pick 22 | ) => string; 23 | }; 24 | 25 | export const baseUrlMap = new Map(); 26 | 27 | baseUrlMap.set('github.com', { 28 | name: 'github', 29 | queue: new PQueue({ intervalCap: 20, interval: 1000 }), 30 | buildUrl: ({ user, project, path: pathName, branch }): string => { 31 | return `https://raw.githubusercontent.com/${user}/${project}/${ 32 | pathName ? '' : branch 33 | }${pathName.replace('/tree/', '')}`; 34 | }, 35 | }); 36 | 37 | baseUrlMap.set('gitlab.com', { 38 | name: 'gitlab', 39 | queue: new PQueue({ intervalCap: 10, interval: 1000 }), 40 | buildUrl: ({ user, project, path: pathName, branch }): string => { 41 | return `https://gitlab.com/${user}/${project}${ 42 | pathName ? pathName.replace('tree', 'raw') : `/raw/${branch}` 43 | }`; 44 | }, 45 | }); 46 | 47 | baseUrlMap.set('bitbucket.org', { 48 | name: 'bitbucket', 49 | queue: new PQueue({ intervalCap: 10, interval: 1000 }), 50 | buildUrl: ({ user, project, path: pathName, branch }): string => { 51 | return `https://bitbucket.org/${user}/${project}${ 52 | pathName ? pathName.replace('src', 'raw') : `/raw/${branch}` 53 | }`; 54 | }, 55 | }); 56 | 57 | const fileOptions = [ 58 | 'CHANGELOG.md', 59 | 'ChangeLog.md', 60 | 'changelog.md', 61 | 'changelog.markdown', 62 | 'CHANGELOG', 63 | 'ChangeLog', 64 | 'changelog', 65 | 'CHANGES.md', 66 | 'changes.md', 67 | 'Changes.md', 68 | 'CHANGES', 69 | 'changes', 70 | 'Changes', 71 | 'HISTORY.md', 72 | 'history.md', 73 | 'HISTORY', 74 | 'history', 75 | 'RELEASES.md', 76 | 'RELEASES', 77 | ]; 78 | 79 | // https://regex101.com/r/zU2gjr/1 80 | const fileRegex = 81 | /^(((changelogs?)|changes|history|(releases?)))((.(md|markdown))?$)/i; 82 | 83 | async function handledGot(file: string): Promise { 84 | const result = await request(file, { method: 'HEAD' }); 85 | 86 | if ( 87 | // bitbucket returns 200 for private repos 88 | // github returns a 404 89 | // I am unsure what gitlab does 90 | result?.redirectUrls?.find((res) => 91 | res.startsWith('https://bitbucket.org/account/signin') 92 | ) 93 | ) { 94 | throw new Error('Redirect leads to login page'); 95 | } 96 | if (result.statusCode !== 200) { 97 | throw new Error('not found'); 98 | } 99 | 100 | return result.url; 101 | } 102 | 103 | async function raceFromPaths( 104 | host: HostObject, 105 | files: string[] 106 | ): Promise { 107 | const start = Date.now(); 108 | 109 | try { 110 | const url = await race( 111 | files.map((file) => { 112 | return host.queue.add(() => { 113 | datadog.increment(`changelogs.requests.${host.name}`); 114 | return handledGot(file); 115 | }); 116 | }) 117 | ); 118 | 119 | datadog.increment(`changelogs.success`); 120 | return { changelogFilename: url }; 121 | } catch (e) { 122 | if ( 123 | e instanceof HTTPError && 124 | (e.response.statusCode === 429 || e.response.statusCode >= 500) 125 | ) { 126 | datadog.increment(`changelogs.throttle.${host.name}`); 127 | 128 | if (!host.queue.isPaused) { 129 | host.queue.pause(); 130 | setTimeout(() => host.queue.start(), ms('1 minute')).unref(); 131 | } 132 | } 133 | 134 | datadog.increment(`changelogs.failure`); 135 | return { changelogFilename: null }; 136 | } finally { 137 | datadog.timing('changelogs.getChangelog', Date.now() - start); 138 | } 139 | } 140 | 141 | export async function getChangelog( 142 | pkg: Pick, 143 | filelist: jsDelivr.File[] 144 | ): Promise<{ 145 | changelogFilename: string | null; 146 | }> { 147 | for (const file of filelist) { 148 | const name = path.basename(file.name); 149 | if (!fileRegex.test(name)) { 150 | continue; 151 | } 152 | 153 | datadog.increment('jsdelivr.getChangelog.hit'); 154 | 155 | return { changelogFilename: jsDelivr.getFullURL(pkg, file) }; 156 | } 157 | 158 | datadog.increment('jsdelivr.getChangelog.miss'); 159 | return { changelogFilename: null }; 160 | } 161 | 162 | export async function getChangelogBackground( 163 | pkg: Pick 164 | ): Promise { 165 | const { repository } = pkg; 166 | 167 | if (!repository?.host) { 168 | return { changelogFilename: null }; 169 | } 170 | 171 | const host = repository.host || ''; 172 | const knownHost = baseUrlMap.get(host); 173 | 174 | // No known git hosts 175 | if (!knownHost) { 176 | return { changelogFilename: null }; 177 | } 178 | 179 | const baseUrl = knownHost.buildUrl(repository); 180 | const files = fileOptions.map((file) => 181 | [baseUrl.replace(/\/$/, ''), file].join('/') 182 | ); 183 | 184 | // Brute-force from git host 185 | return raceFromPaths(knownHost, [...files]); 186 | } 187 | -------------------------------------------------------------------------------- /src/config.ts: -------------------------------------------------------------------------------- 1 | import type { Settings, Synonym, Rule } from '@algolia/client-search'; 2 | import ms from 'ms'; 3 | 4 | const indexSettings: Settings = { 5 | searchableAttributes: [ 6 | 'unordered(_popularName)', 7 | 'name, description, keywords', 8 | '_searchInternal.popularAlternativeNames', 9 | 'owner.name', 10 | 'owners.name', 11 | ], 12 | attributesForFaceting: [ 13 | 'filterOnly(_searchInternal.popularAlternativeNames)' /* optionalFacetFilters to boost the name */, 14 | 'filterOnly(bin)', 15 | 'searchable(keywords)', 16 | 'searchable(computedKeywords)', 17 | 'searchable(owner.name)', 18 | '_oneTimeDataToUpdateAt', 19 | '_periodicDataUpdatedAt', 20 | 'deprecated', 21 | 'isDeprecated', 22 | 'isSecurityHeld', 23 | 'types.ts', 24 | 'moduleTypes', 25 | 'styleTypes', 26 | 'popular', 27 | ], 28 | customRanking: [ 29 | 'desc(_downloadsMagnitude)', 30 | 'desc(_jsDelivrPopularity)', 31 | 'desc(dependents)', 32 | 'desc(downloadsLast30Days)', 33 | ], 34 | disablePrefixOnAttributes: ['owner.name', 'owners.name'], 35 | disableExactOnAttributes: ['owner.name', 'owners.name'], 36 | exactOnSingleWordQuery: 'word', 37 | ranking: [ 38 | 'filters', 39 | 'typo', 40 | 'words', 41 | 'attribute', 42 | 'proximity', 43 | 'asc(isSecurityHeld)', 44 | 'asc(deprecated)', 45 | 'asc(isDeprecated)', 46 | 'asc(badPackage)', 47 | 'desc(popular)', 48 | 'exact', 49 | 'custom', 50 | ], 51 | minProximity: 5, 52 | optionalWords: ['js', 'javascript'], 53 | separatorsToIndex: '_', 54 | replaceSynonymsInHighlight: false, 55 | maxValuesPerFacet: 1000, 56 | unretrievableAttributes: ['_oneTimeDataToUpdateAt', '_periodicDataUpdatedAt'], 57 | }; 58 | 59 | const indexSynonyms: Synonym[] = [ 60 | { 61 | type: 'synonym', 62 | synonyms: ['_', 'underscore'], 63 | objectID: 'underscore', 64 | }, 65 | { 66 | type: 'synonym', 67 | synonyms: ['a11y', 'accessibility', 'accessible'], 68 | objectID: 'a11y', 69 | }, 70 | { 71 | type: 'synonym', 72 | synonyms: [ 73 | 'i18n', 74 | 'internationalisation', 75 | 'internationalization', 76 | 'translation', 77 | 'translate', 78 | ], 79 | objectID: 'i18n', 80 | }, 81 | { 82 | type: 'synonym', 83 | synonyms: ['k8s', 'kubernetes'], 84 | objectID: 'k8s', 85 | }, 86 | ]; 87 | 88 | const indexRules: Rule[] = [ 89 | { 90 | objectID: 'promote-exact', 91 | description: 'promote exact matches', 92 | condition: { 93 | pattern: '{facet:_searchInternal.popularAlternativeNames}', 94 | anchoring: 'is', 95 | }, 96 | consequence: { 97 | params: { 98 | automaticOptionalFacetFilters: [ 99 | { 100 | facet: '_searchInternal.popularAlternativeNames', 101 | }, 102 | ], 103 | }, 104 | }, 105 | }, 106 | { 107 | condition: { 108 | pattern: 'author\\: {facet:owner.name}', 109 | anchoring: 'contains', 110 | }, 111 | consequence: { 112 | params: { 113 | automaticFacetFilters: ['owner.name'], 114 | query: { 115 | remove: ['author\\:', '{facet:owner.name}'], 116 | }, 117 | }, 118 | }, 119 | description: 'filter on author: {owner.name}', 120 | objectID: 'author-filter', 121 | }, 122 | { 123 | condition: { 124 | pattern: 'owner\\: {facet:owner.name}', 125 | anchoring: 'contains', 126 | }, 127 | consequence: { 128 | params: { 129 | automaticFacetFilters: ['owner.name'], 130 | query: { 131 | remove: ['owner\\:', '{facet:owner.name}'], 132 | }, 133 | }, 134 | }, 135 | description: 'filter on owner: {owner.name}', 136 | objectID: 'owner-filter', 137 | }, 138 | { 139 | condition: { 140 | pattern: 'keyword\\: {facet:keywords}', 141 | anchoring: 'contains', 142 | }, 143 | consequence: { 144 | params: { 145 | automaticFacetFilters: ['keywords'], 146 | query: { 147 | remove: ['keyword\\:', '{facet:keywords}'], 148 | }, 149 | }, 150 | }, 151 | description: 'filter on keyword: {keywords}', 152 | objectID: 'keyword-filter', 153 | }, 154 | ]; 155 | 156 | export const config = { 157 | npmRegistryEndpoint: 'https://replicate.npmjs.com', 158 | npmRegistryDBName: 'registry', 159 | npmDownloadsEndpoint: 'https://api.npmjs.org/downloads', 160 | npmRootEndpoint: 'https://registry.npmjs.org', 161 | jsDelivrHitsEndpoint: 162 | 'https://data.jsdelivr.com/v1/stats/packages/all?period=month&type=npm', 163 | jsDelivrPackageEndpoint: 'https://data.jsdelivr.com/v1/package/npm', 164 | typescriptTypesIndex: 'https://cdn.jsdelivr.net/npm/all-the-package-types', 165 | maxObjSize: 450000, 166 | popularDownloadsRatio: 0.005, 167 | appId: 'OFCNCOG2CU', 168 | apiKey: '', 169 | indexName: 'npm-search', 170 | bootstrapIndexName: 'npm-search-bootstrap', 171 | bootstrapConcurrency: 25, 172 | timeToRedoBootstrap: ms('30 days'), 173 | seq: undefined, 174 | indexSettings, 175 | indexSynonyms, 176 | indexRules, 177 | prefetchWaitBetweenPage: 5000, 178 | retryMax: 4, 179 | retrySkipped: ms('1 minute'), 180 | retryBackoffPow: 3, 181 | retryBackoffMax: ms('1 minute'), 182 | refreshPeriod: ms('2 minutes'), 183 | alternativeNamesNpmDownloadsThreshold: 5000, 184 | alternativeNamesJsDelivrHitsThreshold: 10000, 185 | 186 | // http 187 | defaultRequestTimeout: ms('30 seconds'), 188 | 189 | // Watch 190 | watchMaxPrefetch: 10, 191 | watchMinUnpause: 5, 192 | }; 193 | 194 | export type Config = typeof config; 195 | 196 | Object.entries(process.env).forEach(([key, value]) => { 197 | if (key in config) { 198 | config[key] = 199 | typeof config[key] === 'number' ? parseInt(value!, 10) : value; 200 | } 201 | }); 202 | -------------------------------------------------------------------------------- /src/errors.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable max-classes-per-file */ 2 | export class DeletedError extends Error {} 3 | export class PackageNotFoundError extends Error {} 4 | -------------------------------------------------------------------------------- /src/formatPkg.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable complexity */ 2 | import escape from 'escape-html'; 3 | import gravatarUrl from 'gravatar-url'; 4 | import hostedGitInfo from 'hosted-git-info'; 5 | import NicePackage from 'nice-package'; 6 | import numeral from 'numeral'; 7 | import sizeof from 'object-sizeof'; 8 | import traverse from 'traverse'; 9 | import truncate from 'truncate-utf8-bytes'; 10 | 11 | import type { NicePackageType } from './@types/nice-package'; 12 | import type { 13 | ComputedMeta, 14 | GithubRepo, 15 | ModuleType, 16 | StyleType, 17 | Owner, 18 | RawPkg, 19 | Repo, 20 | } from './@types/pkg'; 21 | import { config } from './config'; 22 | import type { GetPackage, GetUser, PackageRepo } from './npm/types'; 23 | import { datadog } from './utils/datadog'; 24 | 25 | const defaultGravatar = 'https://www.gravatar.com/avatar/'; 26 | 27 | type Subset = { 28 | name: string; 29 | include: boolean; 30 | metadata?: { schematics: string }; 31 | }; 32 | 33 | const registrySubsetRules: Array<(pkg: NicePackageType) => Subset> = [ 34 | ({ name }): Subset => ({ 35 | name: 'babel-plugin', 36 | include: 37 | name.startsWith('@babel/plugin') || name.startsWith('babel-plugin-'), 38 | }), 39 | 40 | ({ name }): Subset => ({ 41 | name: 'vue-cli-plugin', 42 | include: /^(@vue\/|vue-|@[\w-]+\/vue-)cli-plugin-/.test(name), 43 | }), 44 | 45 | ({ name, keywords = [] }): Subset => ({ 46 | name: 'yeoman-generator', 47 | include: 48 | name.startsWith('generator-') && keywords.includes('yeoman-generator'), 49 | }), 50 | 51 | ({ schematics = '' }): Subset => ({ 52 | name: 'angular-cli-schematic', 53 | include: schematics.length > 0, 54 | metadata: { schematics }, 55 | }), 56 | 57 | ({ name }): Subset => ({ 58 | name: 'webpack-scaffold', 59 | include: name.startsWith('webpack-scaffold-'), 60 | }), 61 | ]; 62 | 63 | export function formatPkg(pkg: GetPackage): RawPkg | undefined { 64 | const start = Date.now(); 65 | // Be careful NicePackage modify the Object ref 66 | const cleaned: NicePackageType | undefined = new NicePackage(pkg); 67 | if (!cleaned?.name) { 68 | return; 69 | } 70 | 71 | if (Array.isArray(cleaned.main)) { 72 | // https://github.com/angular-ui/bootstrap-bower/issues/52 73 | cleaned.main = cleaned.main[0]; 74 | } 75 | 76 | const lastPublisher = cleaned.lastPublisher 77 | ? formatUser(cleaned.lastPublisher) 78 | : null; 79 | const author = getAuthor(cleaned); 80 | const license = getLicense(cleaned); 81 | 82 | const version = cleaned.version ? cleaned.version : '0.0.0'; 83 | const versions = getVersions(cleaned, pkg); 84 | 85 | let githubRepo: GithubRepo | null = null; 86 | let defaultRepository: PackageRepo | undefined; 87 | 88 | if (cleaned.repository) { 89 | let tmp = cleaned.repository; 90 | if (Array.isArray(tmp) && tmp.length) { 91 | tmp = tmp[0] as PackageRepo; 92 | } 93 | 94 | if (typeof tmp === 'string') { 95 | defaultRepository = { type: 'git', url: tmp }; 96 | } else if (Object.keys(tmp).length > 0) { 97 | defaultRepository = tmp as PackageRepo; 98 | } 99 | 100 | // At this point, we are not even sure the source is correct 101 | if ( 102 | defaultRepository && 103 | (!defaultRepository.type || !defaultRepository.url) 104 | ) { 105 | defaultRepository = undefined; 106 | } 107 | 108 | if (defaultRepository) { 109 | githubRepo = getGitHubRepoInfo({ 110 | repository: defaultRepository, 111 | gitHead: cleaned.gitHead, 112 | }); 113 | } 114 | } 115 | 116 | if (!githubRepo && !lastPublisher && !author) { 117 | return; // ignore this package, we cannot link it to anyone 118 | } 119 | 120 | const repoInfo = getRepositoryInfo(defaultRepository); 121 | // If defaultRepository is undefined or it does not have an URL 122 | // we don't include it. 123 | const repository: Repo | null = 124 | defaultRepository?.url && repoInfo 125 | ? { 126 | ...defaultRepository, // Default info: type, url 127 | ...repoInfo, // Extra info: host, project, user... 128 | head: cleaned.gitHead, 129 | branch: cleaned.gitHead || 'master', 130 | } 131 | : null; 132 | 133 | const types = getTypes(cleaned); 134 | 135 | const owner = getOwner({ repository, lastPublisher, author }); // always favor the repository owner 136 | const { computedKeywords, computedMetadata } = getComputedData(cleaned); 137 | const keywords = getKeywords(cleaned); 138 | 139 | const dependencies = cleaned.dependencies || {}; 140 | const devDependencies = cleaned.devDependencies || {}; 141 | const alternativeNames = getAlternativeNames(cleaned.name); 142 | const moduleTypes = getModuleTypes(cleaned); 143 | const styleTypes = getStyleTypes(cleaned); 144 | 145 | const tags = pkg['dist-tags']; 146 | const isDeprecated = 147 | cleaned.deprecated !== undefined && cleaned.deprecated !== false; 148 | const isSecurityHeld = 149 | repository?.host === 'github.com' && 150 | repository?.user === 'npm' && 151 | repository?.project === 'security-holder'; 152 | 153 | const rawPkg: RawPkg = { 154 | objectID: cleaned.name, 155 | rev: cleaned.other._rev, 156 | name: cleaned.name, 157 | downloadsLast30Days: 0, 158 | downloadsRatio: 0, 159 | humanDownloadsLast30Days: numeral(0).format('0.[0]a'), 160 | jsDelivrHits: 0, 161 | popular: false, 162 | version, 163 | versions, 164 | tags, 165 | description: cleaned.description ? cleaned.description : null, 166 | dependencies, 167 | devDependencies, 168 | originalAuthor: cleaned.other.author, 169 | repository, 170 | githubRepo, 171 | gitHead: githubRepo ? githubRepo.head : null, // remove this when we update to the new schema frontend 172 | readme: pkg.readme, 173 | owner, 174 | deprecated: isDeprecated ? cleaned.deprecated! : false, 175 | isDeprecated, 176 | deprecatedReason: isDeprecated ? String(cleaned.deprecated) : null, 177 | isSecurityHeld, 178 | homepage: getHomePage(cleaned), 179 | license, 180 | keywords, 181 | computedKeywords, 182 | computedMetadata, 183 | created: Date.parse(cleaned.created), 184 | modified: Date.parse(cleaned.modified), 185 | lastPublisher, 186 | owners: (cleaned.owners || []).map(formatUser), 187 | bin: cleaned.bin || {}, 188 | humanDependents: '0', 189 | dependents: 0, 190 | types, 191 | moduleTypes, 192 | styleTypes, 193 | changelogFilename: null, 194 | lastCrawl: new Date().toISOString(), 195 | _revision: Date.now(), 196 | _searchInternal: { 197 | alternativeNames, 198 | popularAlternativeNames: [], 199 | }, 200 | }; 201 | 202 | const truncated = truncatePackage(rawPkg); 203 | 204 | const escaped = traverse(truncated).forEach(maybeEscape); 205 | 206 | datadog.timing('formatPkg', Date.now() - start); 207 | return escaped; 208 | } 209 | 210 | function checkSize(pkg: RawPkg): { 211 | size: number; 212 | diff: number; 213 | isTooBig: boolean; 214 | } { 215 | const size = sizeof(pkg); 216 | const diff = size - config.maxObjSize; 217 | 218 | return { 219 | size, 220 | diff, 221 | isTooBig: diff > 0, 222 | }; 223 | } 224 | 225 | function truncatePackage(pkg: RawPkg): RawPkg | undefined { 226 | const smallerPkg = { ...pkg }; 227 | 228 | { 229 | const { diff, isTooBig } = checkSize(smallerPkg); 230 | if (isTooBig && pkg.readme) { 231 | const postfix = ' **TRUNCATED**'; 232 | // sizeof is * 2 what truncate expects 233 | const maxReadmeLength = (sizeof(pkg.readme) - diff - sizeof(postfix)) / 2; 234 | 235 | smallerPkg.readme = truncate(pkg.readme, maxReadmeLength) + postfix; 236 | } 237 | } 238 | 239 | { 240 | const { isTooBig } = checkSize(smallerPkg); 241 | if (isTooBig) { 242 | smallerPkg.readme = 243 | '** TRUNCATED ** this package was too big, so non-essential information was removed'; 244 | smallerPkg.versions = pkg.versions[pkg.version] 245 | ? { 246 | [pkg.version]: pkg.versions[pkg.version]!, 247 | } 248 | : {}; 249 | smallerPkg.tags = pkg?.tags?.latest 250 | ? { 251 | latest: pkg.tags.latest, 252 | } 253 | : {}; 254 | smallerPkg.owners = smallerPkg.owner ? [smallerPkg.owner] : []; 255 | } 256 | } 257 | 258 | // This modify the type without warning, 259 | // { 260 | // const { isTooBig } = checkSize(smallerPkg); 261 | // if (isTooBig) { 262 | // smallerPkg = { 263 | // name: smallerPkg.name, 264 | // readme: smallerPkg.readme, 265 | // }; 266 | // } 267 | // } 268 | 269 | { 270 | const { isTooBig } = checkSize(smallerPkg); 271 | if (isTooBig) { 272 | return; 273 | } 274 | } 275 | 276 | return smallerPkg; 277 | } 278 | 279 | function maybeEscape(this: any, node: any): void { 280 | if (this.isLeaf && typeof node === 'string') { 281 | if (this.key === 'readme') { 282 | this.update(node); 283 | } else { 284 | this.update(escape(node)); 285 | } 286 | } 287 | } 288 | 289 | function getAuthor(cleaned: NicePackageType): Owner | null { 290 | if (cleaned.other.author && typeof cleaned.other.author === 'object') { 291 | return formatUser(cleaned.other.author); 292 | } 293 | if (Array.isArray(cleaned.owners) && typeof cleaned.owners[0] === 'object') { 294 | return formatUser(cleaned.owners[0]); 295 | } 296 | return null; 297 | } 298 | 299 | function getLicense(cleaned: NicePackageType): string | null { 300 | if (!cleaned.license) { 301 | return null; 302 | } 303 | if ( 304 | typeof cleaned.license === 'object' && 305 | typeof cleaned.license.type === 'string' 306 | ) { 307 | return cleaned.license.type; 308 | } 309 | 310 | if (typeof cleaned.license === 'string') { 311 | return cleaned.license; 312 | } 313 | return null; 314 | } 315 | 316 | function getOwner({ 317 | repository, 318 | lastPublisher, 319 | author, 320 | }: { 321 | repository: RawPkg['repository'] | null; 322 | lastPublisher: RawPkg['lastPublisher'] | null; 323 | author: NicePackageType['other']['author'] | null; 324 | }): Owner | null { 325 | if (repository?.user) { 326 | const { user } = repository; 327 | 328 | if (repository.host === 'github.com') { 329 | return { 330 | name: user, 331 | avatar: `https://github.com/${user}.png`, 332 | link: `https://github.com/${user}`, 333 | }; 334 | } 335 | 336 | if (repository.host === 'gitlab.com') { 337 | return { 338 | name: user, 339 | avatar: lastPublisher?.avatar, 340 | link: `https://gitlab.com/${user}`, 341 | }; 342 | } 343 | 344 | if (repository.host === 'bitbucket.org') { 345 | return { 346 | name: user, 347 | avatar: `https://bitbucket.org/account/${user}/avatar`, 348 | link: `https://bitbucket.org/${user}`, 349 | }; 350 | } 351 | } 352 | 353 | if (lastPublisher) { 354 | return lastPublisher; 355 | } 356 | 357 | return author || null; 358 | } 359 | 360 | function getGravatar(user: GetUser): string { 361 | if ( 362 | !user.email || 363 | typeof user.email !== 'string' || 364 | user.email.indexOf('@') === -1 365 | ) { 366 | return defaultGravatar; 367 | } 368 | 369 | return gravatarUrl(user.email); 370 | } 371 | 372 | export function getVersions( 373 | cleaned: Pick, 374 | rawPkg: Pick 375 | ): Record { 376 | if (cleaned?.other?.time) { 377 | const realVersions = Object.keys(rawPkg.versions); 378 | 379 | return Object.fromEntries( 380 | Object.entries(cleaned.other.time).filter(([key]) => 381 | realVersions.includes(key) 382 | ) 383 | ); 384 | } 385 | return {}; 386 | } 387 | 388 | function getComputedData(cleaned: NicePackageType): ComputedMeta { 389 | const res: ComputedMeta = { computedKeywords: [], computedMetadata: {} }; 390 | registrySubsetRules.forEach((matcher) => { 391 | const { include, metadata, name } = matcher(cleaned); 392 | if (!include) { 393 | return; 394 | } 395 | res.computedKeywords.push(name); 396 | res.computedMetadata = { 397 | ...res.computedMetadata, 398 | ...metadata, 399 | }; 400 | }); 401 | return res; 402 | } 403 | 404 | function getKeywords(cleaned: NicePackageType): string[] { 405 | if (cleaned.keywords) { 406 | if (Array.isArray(cleaned.keywords)) { 407 | return [...cleaned.keywords]; 408 | } 409 | if (typeof cleaned.keywords === 'string') { 410 | return [cleaned.keywords]; 411 | } 412 | } 413 | return []; 414 | } 415 | 416 | function getGitHubRepoInfo({ 417 | repository, 418 | gitHead = 'master', 419 | }: { 420 | repository: PackageRepo; 421 | gitHead?: string; 422 | }): GithubRepo | null { 423 | const result = repository.url.match( 424 | /^https:\/\/(?:www\.)?github.com\/([^/]+)\/([^/]+)(\/.+)?$/ 425 | ); 426 | 427 | if (!result) { 428 | return null; 429 | } 430 | 431 | if (result.length < 3) { 432 | return null; 433 | } 434 | 435 | const head = gitHead; 436 | const [, user, project, path = ''] = result; 437 | 438 | return { 439 | user: user!, 440 | project: project!, 441 | path, 442 | head, 443 | }; 444 | } 445 | 446 | function getHomePage(pkg: NicePackageType): string | null { 447 | if ( 448 | pkg.homepage && 449 | typeof pkg.homepage === 'string' && // if there's a homepage 450 | (!pkg.repository || // and there's no repo, 451 | typeof pkg.repository !== 'string' || // or repo is not a string 452 | pkg.homepage.indexOf(pkg.repository) < 0) // or repo is different than homepage 453 | ) { 454 | return pkg.homepage; // then we consider it a valuable homepage 455 | } 456 | 457 | return null; 458 | } 459 | 460 | /** 461 | * Get info from urls like this: (has multiple packages in one repo, like babel does) 462 | * https://github.com/babel/babel/tree/master/packages/babel 463 | * https://gitlab.com/user/repo/tree/master/packages/project1 464 | * https://bitbucket.org/user/repo/src/ae8df4cd0e809a789e3f96fd114075191c0d5c8b/packages/project1/. 465 | * 466 | * This function is like getGitHubRepoInfo (above), but support github, gitlab and bitbucket. 467 | */ 468 | function getRepositoryInfoFromHttpUrl(repository: string): Repo | null { 469 | const result = repository.match( 470 | /^https?:\/\/(?:www\.)?((?:github|gitlab|bitbucket)).((?:com|org))\/([^/]+)\/([^/]+)(\/.+)?$/ 471 | ); 472 | 473 | if (!result || result.length < 6) { 474 | return null; 475 | } 476 | 477 | const [, domain, domainTld, user, project, path = ''] = result; 478 | 479 | return { 480 | url: repository, 481 | host: `${domain}.${domainTld}`, 482 | user: user!, 483 | project: project!, 484 | path, 485 | }; 486 | } 487 | 488 | export function getRepositoryInfo( 489 | repository: GetPackage['repository'] | string 490 | ): Repo | null { 491 | if (!repository) { 492 | return null; 493 | } 494 | 495 | const url = typeof repository === 'string' ? repository : repository.url; 496 | const path = typeof repository === 'string' ? '' : repository.directory || ''; 497 | 498 | if (!url) { 499 | return null; 500 | } 501 | 502 | /** 503 | * Get information using hosted-git-info. 504 | */ 505 | try { 506 | const repositoryInfo = hostedGitInfo.fromUrl(url); 507 | 508 | if (repositoryInfo) { 509 | const { project, user, domain } = repositoryInfo; 510 | return { 511 | url, 512 | project, 513 | user, 514 | host: domain, 515 | path: path.replace(/^[./]+/, ''), 516 | }; 517 | } 518 | } catch { 519 | // Ignore. 520 | } 521 | 522 | /** 523 | * Unfortunately, hosted-git-info can't handle URL like this: (has path) 524 | * https://github.com/babel/babel/tree/master/packages/babel-core 525 | * so we need to do it. 526 | */ 527 | const repositoryInfoFromUrl = getRepositoryInfoFromHttpUrl(url); 528 | if (!repositoryInfoFromUrl) { 529 | return null; 530 | } 531 | return { 532 | ...repositoryInfoFromUrl, 533 | path: path.replace(/^[./]+/, '') || repositoryInfoFromUrl.path, 534 | }; 535 | } 536 | 537 | function formatUser(user: GetUser): Owner { 538 | return { 539 | ...user, 540 | avatar: getGravatar(user), 541 | link: `https://www.npmjs.com/~${encodeURIComponent(user.name)}`, 542 | }; 543 | } 544 | 545 | function getTypes(pkg: NicePackageType): RawPkg['types'] { 546 | // The cheap and simple (+ recommended by TS) way 547 | // of adding a types section to your package.json 548 | if (pkg.types) { 549 | return { ts: 'included' }; 550 | } 551 | 552 | // Older, but still works way of defining your types 553 | if (pkg.typings) { 554 | return { ts: 'included' }; 555 | } 556 | 557 | return { 558 | ts: { possible: true }, 559 | }; 560 | } 561 | 562 | function getAlternativeNames(name: string): string[] { 563 | const alternativeNames = new Set(); 564 | 565 | const concatenatedName = name.replace(/[-/@_.]+/g, ''); 566 | alternativeNames.add(concatenatedName); 567 | 568 | const splitName = name.replace(/[-/@_.]+/g, ' '); 569 | alternativeNames.add(splitName); 570 | 571 | const isDotJs = name.endsWith('.js'); 572 | const isJsSuffix = name.match(/\.?js$/); 573 | 574 | if (isDotJs) { 575 | alternativeNames.add(name.substring(0, name.length - 3)); 576 | } else if (isJsSuffix) { 577 | alternativeNames.add(name.substring(0, name.length - 2)); 578 | } else { 579 | alternativeNames.add(`${name}.js`); 580 | alternativeNames.add(`${name}js`); 581 | } 582 | 583 | alternativeNames.add(name); 584 | 585 | return Array.from(alternativeNames); 586 | } 587 | 588 | export function getMains(pkg: Pick): string[] { 589 | if (Array.isArray(pkg.main)) { 590 | // we can not deal with non-string mains for now 591 | return pkg.main.filter((main) => typeof main === 'string'); 592 | } 593 | if (typeof pkg.main === 'string') { 594 | return [pkg.main]; 595 | } 596 | if (typeof pkg.main === 'undefined') { 597 | return ['index.js']; 598 | } 599 | // we can not deal with non-array ||non-string mains for now 600 | return []; 601 | } 602 | 603 | export function getExportKeys( 604 | exp: NicePackageType['exports'] | string 605 | ): string[] { 606 | if (typeof exp !== 'object' || exp === null) { 607 | return []; 608 | } 609 | const keys = Object.keys(exp); 610 | const nestedKeys = keys.flatMap((key) => getExportKeys(exp[key])); 611 | return [...keys, ...nestedKeys]; 612 | } 613 | 614 | const typeToModuleTypeMapping: Record< 615 | Required['type'], 616 | ModuleType 617 | > = { 618 | commonjs: 'cjs', 619 | module: 'esm', 620 | }; 621 | 622 | function getModuleTypes(pkg: NicePackageType): ModuleType[] { 623 | const moduleTypes: Set = new Set(); 624 | 625 | // type is declared 626 | if (pkg.type) { 627 | moduleTypes.add(typeToModuleTypeMapping[pkg.type]); 628 | } 629 | 630 | // get all explicit exports (supporting cjs in esm or other way round) 631 | // reference: https://nodejs.org/api/packages.html 632 | const exportKeys = getExportKeys(pkg.exports); 633 | if (exportKeys.includes('import')) { 634 | moduleTypes.add('esm'); 635 | } 636 | if (exportKeys.includes('require')) { 637 | moduleTypes.add('cjs'); 638 | } 639 | 640 | // module (non-standard) is declared 641 | if (typeof pkg.module === 'string') { 642 | moduleTypes.add('esm'); 643 | } 644 | 645 | // check the extension of each of the "main" values 646 | getMains(pkg).forEach((main) => { 647 | if (main.endsWith('.mjs')) { 648 | moduleTypes.add('esm'); 649 | } 650 | if (main.endsWith('.cjs')) { 651 | moduleTypes.add('cjs'); 652 | } 653 | }); 654 | 655 | // add a default value to make filtering possible 656 | if (moduleTypes.size === 0) { 657 | moduleTypes.add('unknown'); 658 | } 659 | 660 | return [...moduleTypes]; 661 | } 662 | 663 | function getStyleTypes(pkg: NicePackageType): StyleType[] { 664 | // style not declared - we will detect it later based on file list 665 | if (typeof pkg.style !== 'string') { 666 | return []; 667 | } 668 | 669 | const ext = pkg.style.split('.').pop(); 670 | 671 | return ext ? [ext.toLowerCase()] : []; 672 | } 673 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-process-exit */ 2 | 3 | import 'elastic-apm-node/start'; 4 | 5 | import type http from 'http'; 6 | 7 | import ms from 'ms'; 8 | 9 | import { version } from '../package.json'; 10 | 11 | import { StateManager } from './StateManager'; 12 | import * as algolia from './algolia/index'; 13 | import { createAPI } from './api'; 14 | import { Bootstrap } from './bootstrap'; 15 | import { config } from './config'; 16 | import * as jsDelivr from './jsDelivr/index'; 17 | import * as npm from './npm/index'; 18 | import * as typescript from './typescript/index'; 19 | import { datadog } from './utils/datadog'; 20 | import { log } from './utils/log'; 21 | import * as sentry from './utils/sentry'; 22 | import { Watch } from './watch'; 23 | 24 | const KILL_PROCESS_EVERY_MS = ms('4 hours'); 25 | 26 | class Main { 27 | bootstrap: Bootstrap | undefined; 28 | watch: Watch | undefined; 29 | healthApi: http.Server | undefined; 30 | 31 | async preload(): Promise { 32 | await Promise.all([ 33 | jsDelivr.loadHits(), 34 | npm.loadTotalDownloads(), 35 | typescript.loadTypesIndex(), 36 | ]); 37 | } 38 | 39 | async run(): Promise { 40 | log.info('🗿 npm ↔️ Algolia replication starts ⛷ 🐌 🛰', { version }); 41 | let start = Date.now(); 42 | 43 | // We schedule to kill the process: 44 | // - reset cache 45 | // - maybe retrigger bootstrap 46 | setTimeout(() => { 47 | log.info('👋 Scheduled process cleaning'); 48 | close(); 49 | }, KILL_PROCESS_EVERY_MS).unref(); 50 | 51 | this.healthApi = createAPI(); 52 | 53 | // first we make sure the bootstrap index has the correct settings 54 | start = Date.now(); 55 | 56 | log.info('💪 Setting up Algolia', config.appId, [ 57 | config.bootstrapIndexName, 58 | config.indexName, 59 | ]); 60 | const algoliaStore = await algolia.prepare(config); 61 | datadog.timing('main.init_algolia', Date.now() - start); 62 | 63 | // Create State Manager that holds progression of indexing 64 | const stateManager = new StateManager(algoliaStore.mainIndex); 65 | 66 | const scheduleRefresh = (delay = ms('1 hour')): void => { 67 | setTimeout(() => { 68 | this.preload() 69 | .then(() => { 70 | scheduleRefresh(); 71 | }) 72 | .catch(() => { 73 | scheduleRefresh(ms('1 minute')); 74 | }); 75 | }, delay).unref(); 76 | }; 77 | 78 | // Preload some useful data 79 | await this.preload(); 80 | scheduleRefresh(); 81 | 82 | this.bootstrap = new Bootstrap(stateManager, algoliaStore); 83 | this.watch = new Watch(stateManager, algoliaStore); 84 | 85 | if (!(await this.bootstrap.isDone())) { 86 | this.bootstrap.on('finished', async () => { 87 | await this.watch!.run(); 88 | }); 89 | 90 | // then we run the bootstrap 91 | // after a bootstrap is done, it's moved to main (with settings) 92 | // if it was already finished, we will set the settings on the main index 93 | await this.bootstrap.run(); 94 | } else { 95 | await this.watch.run(); 96 | } 97 | } 98 | 99 | async stop(): Promise { 100 | if (this.bootstrap) { 101 | await this.bootstrap.stop(); 102 | } 103 | if (this.watch) { 104 | await this.watch.stop(); 105 | } 106 | if (this.healthApi) { 107 | await new Promise((resolve) => { 108 | this.healthApi!.close(resolve); 109 | }); 110 | } 111 | log.info('Stopped Main gracefully'); 112 | } 113 | } 114 | 115 | const main = new Main(); 116 | 117 | process.on('unhandledRejection', (err) => { 118 | sentry.report(new Error('unhandledRejection'), { err }); 119 | close(); 120 | }); 121 | process.on('uncaughtException', (err) => { 122 | sentry.report(new Error('uncauthexception'), { err }); 123 | }); 124 | 125 | (async (): Promise => { 126 | try { 127 | await main.run(); 128 | } catch (err) { 129 | sentry.report(new Error('Error during run'), { err }); 130 | close(); 131 | } 132 | })(); 133 | 134 | async function close(): Promise { 135 | log.info('Close was requested'); 136 | setTimeout(() => { 137 | // grace period in case a lot of jobs are pending 138 | process.exit(1); 139 | }, 90000).unref(); 140 | 141 | // datadog.close(); 142 | await sentry.drain(); 143 | await main.stop(); 144 | 145 | process.nextTick(() => { 146 | process.exit(0); 147 | }); 148 | } 149 | 150 | process.once('SIGINT', async () => { 151 | await close(); 152 | }); 153 | 154 | process.once('SIGTERM', async () => { 155 | await close(); 156 | }); 157 | -------------------------------------------------------------------------------- /src/indexers/Indexer.ts: -------------------------------------------------------------------------------- 1 | import { setTimeout } from 'node:timers/promises'; 2 | 3 | import type { SearchIndex } from 'algoliasearch'; 4 | import chalk from 'chalk'; 5 | import type { DebouncedFunc } from 'lodash'; 6 | import _ from 'lodash'; 7 | import ms from 'ms'; 8 | import PQueue from 'p-queue'; 9 | 10 | import type { AlgoliaStore } from '../algolia'; 11 | import { log } from '../utils/log'; 12 | import * as sentry from '../utils/sentry'; 13 | 14 | export abstract class Indexer { 15 | protected mainIndex: SearchIndex; 16 | protected algoliaStore: AlgoliaStore; 17 | 18 | private recordQueue: PQueue; 19 | private recordsQueueConcurrency: number = 240; 20 | 21 | private taskQueue: PQueue; 22 | private taskQueueConcurrency: number = 120; 23 | 24 | private isRunning: boolean = false; 25 | private readonly throttledFetchFacets: DebouncedFunc<() => Promise>; 26 | 27 | protected abstract readonly facetField: string; 28 | 29 | get facetFilter(): string | undefined { 30 | return undefined; 31 | } 32 | 33 | get queued(): number { 34 | return this.taskQueue.size; 35 | } 36 | 37 | get running(): number { 38 | return this.taskQueue.pending; 39 | } 40 | 41 | constructor(algoliaStore: AlgoliaStore, mainIndex: SearchIndex) { 42 | this.mainIndex = mainIndex; 43 | this.algoliaStore = algoliaStore; 44 | 45 | this.throttledFetchFacets = _.throttle( 46 | () => this.fetchFacets().catch(() => []), 47 | ms('1 minute') 48 | ); 49 | 50 | this.recordQueue = new PQueue({ 51 | concurrency: this.recordsQueueConcurrency, 52 | }); 53 | 54 | this.taskQueue = new PQueue({ 55 | concurrency: this.taskQueueConcurrency, 56 | }); 57 | } 58 | 59 | async fetchFacets(): Promise { 60 | const result = await this.mainIndex.search('', { 61 | filters: this.facetFilter, 62 | facets: [this.facetField], 63 | hitsPerPage: 0, 64 | maxValuesPerFacet: 1000, 65 | sortFacetValuesBy: 'alpha', 66 | }); 67 | 68 | if (!result.facets) { 69 | log.error('Wrong results from Algolia'); 70 | return []; 71 | } 72 | 73 | return Object.keys(result.facets[this.facetField] || {}).sort(); 74 | } 75 | 76 | async *fetchRecords(): AsyncGenerator { 77 | const facets = await this.throttledFetchFacets(); 78 | 79 | if (!facets?.length) { 80 | return []; 81 | } 82 | 83 | for (const facet of facets) { 84 | let cursor; 85 | 86 | while (this.isRunning) { 87 | // Using direct API call here because the client library doesn't allow 88 | // for asynchronous callbacks between pages. 89 | const response = await this.algoliaStore.client.customRequest({ 90 | method: 'GET', 91 | path: `/1/indexes/${this.mainIndex.indexName}/browse`, 92 | data: { 93 | filters: `${this.facetFilter ? `${this.facetFilter} AND ` : ''}${ 94 | this.facetField 95 | }:${facet}`, 96 | ...(cursor ? { cursor } : {}), 97 | }, 98 | cacheable: false, 99 | }); 100 | 101 | yield response.hits; 102 | 103 | if (!response.cursor) { 104 | break; 105 | } 106 | 107 | cursor = response.cursor; 108 | } 109 | } 110 | } 111 | 112 | async flush(): Promise {} 113 | 114 | async isFinished(): Promise { 115 | return ( 116 | !this.recordQueue.size && 117 | !this.recordQueue.pending && 118 | !this.taskQueue.size && 119 | !this.taskQueue.pending 120 | ); 121 | } 122 | 123 | async queueTask(task: TTask): Promise { 124 | while (this.taskQueue.size > this.taskQueueConcurrency) { 125 | await setTimeout(ms('1 second')); 126 | } 127 | 128 | this.taskQueue.add(() => this.taskExecutor(task)); 129 | } 130 | 131 | run(): void { 132 | this.isRunning = true; 133 | 134 | this.runInternal().catch((e) => { 135 | sentry.report(e); 136 | }); 137 | } 138 | 139 | async runInternal(): Promise { 140 | try { 141 | for await (const records of this.fetchRecords()) { 142 | if (!this.isRunning) { 143 | return; 144 | } 145 | 146 | if (!records.length) { 147 | continue; 148 | } 149 | 150 | log.info( 151 | chalk.dim.italic 152 | .white`[${this.constructor.name}] %d new, %d in record queue, %d in task queue`, 153 | records.length, 154 | this.recordQueue.size, 155 | this.taskQueue.size 156 | ); 157 | 158 | for (const record of records) { 159 | this.recordQueue.add(() => this.recordExecutor(record)); 160 | } 161 | 162 | while (this.recordQueue.size > this.recordsQueueConcurrency) { 163 | await setTimeout(ms('1 second')); 164 | } 165 | } 166 | } catch (err) { 167 | sentry.report(new Error(`Error in ${this.constructor.name}`), { err }); 168 | } 169 | 170 | await this.flush(); 171 | 172 | // Minimum wait between loops. 173 | await setTimeout(ms('5 seconds')); 174 | 175 | // Finish processing all records before the next batch starts. 176 | while ( 177 | this.recordQueue.size || 178 | this.recordQueue.pending || 179 | this.taskQueue.size || 180 | this.taskQueue.pending 181 | ) { 182 | await setTimeout(ms('1 second')); 183 | } 184 | 185 | return this.runInternal(); 186 | } 187 | 188 | async stop(force: boolean = false): Promise { 189 | this.isRunning = false; 190 | 191 | if (force) { 192 | this.recordQueue.clear(); 193 | this.taskQueue.clear(); 194 | } 195 | 196 | if (this.recordQueue.size || this.recordQueue.pending) { 197 | await this.recordQueue.onIdle(); 198 | } 199 | 200 | if (this.recordQueue.size || this.taskQueue.pending) { 201 | await this.taskQueue.onIdle(); 202 | } 203 | } 204 | 205 | abstract recordExecutor(record: TMainRecord): Promise; 206 | 207 | abstract taskExecutor(task: TTask): Promise; 208 | } 209 | -------------------------------------------------------------------------------- /src/indexers/MainBootstrapIndexer.ts: -------------------------------------------------------------------------------- 1 | import type { AlgoliaStore } from '../algolia'; 2 | import { PackageNotFoundError } from '../errors'; 3 | import { formatPkg } from '../formatPkg'; 4 | import * as npm from '../npm'; 5 | import type { PrefetchedPkg } from '../npm/Prefetcher'; 6 | import { type GetPackage } from '../npm/types'; 7 | import { saveDoc } from '../saveDocs'; 8 | import { datadog } from '../utils/datadog'; 9 | import { log } from '../utils/log'; 10 | import * as sentry from '../utils/sentry'; 11 | 12 | import { MainIndexer } from './MainIndexer'; 13 | 14 | type TaskType = { pkg: PrefetchedPkg; objectID: string; retries: number }; 15 | 16 | export class MainBootstrapIndexer extends MainIndexer { 17 | protected facetField = 'retries'; 18 | 19 | constructor(algoliaStore: AlgoliaStore) { 20 | super(algoliaStore, algoliaStore.bootstrapQueueIndex); 21 | } 22 | 23 | override async isFinished(): Promise { 24 | if (!(await super.isFinished())) { 25 | return false; 26 | } 27 | 28 | return (await this.fetchQueueLength()) === 0; 29 | } 30 | 31 | async markAsProcessed(objectID): Promise { 32 | await this.mainIndex 33 | .deleteObject(objectID) 34 | .wait() 35 | .catch(() => {}); 36 | } 37 | 38 | async recordExecutor(record: TaskType): Promise { 39 | await this.queueTask(record); 40 | } 41 | 42 | async taskExecutor({ pkg, objectID, retries }): Promise { 43 | log.info(`Start:`, pkg.id, retries); 44 | const start = Date.now(); 45 | 46 | try { 47 | datadog.increment('packages'); 48 | 49 | let res: GetPackage; 50 | 51 | try { 52 | res = await npm.getDocFromRegistry(pkg.id); 53 | } catch (error) { 54 | if (error instanceof PackageNotFoundError) { 55 | log.warn('Package not found in the registry', error); 56 | } else { 57 | log.error('Got an error', error); 58 | } 59 | 60 | await this.markAsProcessed(objectID); 61 | return; 62 | } 63 | 64 | const formatted = formatPkg(res); 65 | 66 | if (!formatted) { 67 | log.error('Empty formatted output', pkg); 68 | await this.markAsProcessed(objectID); 69 | return; 70 | } 71 | 72 | await saveDoc({ 73 | formatted, 74 | index: this.algoliaStore.bootstrapIndex, 75 | oneTimeDataIndex: this.algoliaStore.oneTimeDataIndex, 76 | periodicDataIndex: this.algoliaStore.periodicDataIndex, 77 | }); 78 | 79 | await this.markAsProcessed(objectID); 80 | log.info(`Done:`, pkg.id, retries); 81 | } catch (err: any) { 82 | log.info(`Failed:`, pkg.id, retries, err.statusCode); 83 | 84 | if (err.statusCode === 404) { 85 | // Store in not-found index 86 | datadog.increment('job.notFound'); 87 | 88 | await this.algoliaStore.bootstrapNotFoundIndex 89 | .saveObject({ 90 | name: pkg.id, 91 | objectID: pkg.id, 92 | err: err instanceof Error ? err.toString() : err, 93 | date: new Date().toISOString(), 94 | movedBy: 'bootstrap', 95 | }) 96 | .catch(() => {}); 97 | 98 | await this.markAsProcessed(objectID); 99 | return; 100 | } 101 | 102 | sentry.report(new Error('Error during job'), { 103 | statusCode: err.statusCode, 104 | err, 105 | }); 106 | 107 | datadog.increment('job.retries'); 108 | 109 | await this.mainIndex 110 | .partialUpdateObject({ 111 | objectID, 112 | retries: retries + 1, 113 | }) 114 | .wait() 115 | .catch(() => {}); 116 | } finally { 117 | datadog.timing('loop', Date.now() - start); 118 | } 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /src/indexers/MainIndexer.ts: -------------------------------------------------------------------------------- 1 | import { Indexer } from './Indexer'; 2 | 3 | export abstract class MainIndexer extends Indexer { 4 | async fetchQueueLength(): Promise { 5 | const { nbHits } = await this.mainIndex.search('', { 6 | filters: this.facetFilter, 7 | }); 8 | 9 | return nbHits; 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /src/indexers/MainWatchIndexer.ts: -------------------------------------------------------------------------------- 1 | import ms from 'ms'; 2 | import type { DatabaseChangesResultItem } from 'nano'; 3 | 4 | import type { AlgoliaStore } from '../algolia'; 5 | import { PackageNotFoundError } from '../errors'; 6 | import { formatPkg } from '../formatPkg'; 7 | import * as npm from '../npm'; 8 | import type { GetPackage } from '../npm/types'; 9 | import { saveDoc } from '../saveDocs'; 10 | import { datadog } from '../utils/datadog'; 11 | import { log } from '../utils/log'; 12 | import * as sentry from '../utils/sentry'; 13 | 14 | import { MainIndexer } from './MainIndexer'; 15 | 16 | type TaskType = { 17 | seq: number; 18 | name: string; 19 | objectID: string; 20 | retries: number; 21 | change: DatabaseChangesResultItem; 22 | }; 23 | 24 | export class MainWatchIndexer extends MainIndexer { 25 | protected facetField = 'retries'; 26 | protected cleanupInterval: NodeJS.Timer | undefined; 27 | 28 | override get facetFilter(): string { 29 | return 'NOT isProcessed:1'; 30 | } 31 | 32 | constructor(algoliaStore: AlgoliaStore) { 33 | super(algoliaStore, algoliaStore.mainQueueIndex); 34 | } 35 | 36 | async markAsProcessed(objectID, seq): Promise { 37 | await this.mainIndex 38 | .partialUpdateObject({ 39 | objectID, 40 | isProcessed: 1, 41 | seq: { _operation: 'IncrementFrom', value: seq }, 42 | }) 43 | .wait() 44 | .catch(() => {}); 45 | } 46 | 47 | async recordExecutor(record: TaskType): Promise { 48 | await this.queueTask(record); 49 | } 50 | 51 | override run(): void { 52 | this.cleanupInterval = setInterval(() => { 53 | this.mainIndex 54 | .deleteBy({ 55 | filters: 'isProcessed:1', 56 | }) 57 | .catch((e) => sentry.report(e)); 58 | }, ms('1 minute')); 59 | 60 | super.run(); 61 | } 62 | 63 | override async stop(force: boolean = false): Promise { 64 | clearInterval(this.cleanupInterval); 65 | return super.stop(force); 66 | } 67 | 68 | async taskExecutor({ 69 | seq, 70 | objectID, 71 | retries, 72 | change, 73 | }: TaskType): Promise { 74 | log.info(`Start:`, change.id, retries); 75 | const start = Date.now(); 76 | 77 | try { 78 | datadog.increment('packages'); 79 | 80 | if (change.deleted) { 81 | await this.algoliaStore.mainIndex.deleteObject(change.id); 82 | } else { 83 | if (change.changes.length <= 0) { 84 | log.error('Document without change'); 85 | await this.markAsProcessed(objectID, seq); 86 | return; 87 | } 88 | 89 | let res: GetPackage; 90 | 91 | try { 92 | res = await npm.getDocFromRegistry(change.id); 93 | } catch (error) { 94 | if (error instanceof PackageNotFoundError) { 95 | log.warn('Package not found in the registry', error); 96 | } else { 97 | log.error('Got an error', error); 98 | } 99 | 100 | await this.markAsProcessed(objectID, seq); 101 | return; 102 | } 103 | 104 | const formatted = formatPkg(res); 105 | 106 | if (!formatted) { 107 | await this.markAsProcessed(objectID, seq); 108 | return; 109 | } 110 | 111 | await saveDoc({ 112 | formatted, 113 | index: this.algoliaStore.mainIndex, 114 | oneTimeDataIndex: this.algoliaStore.oneTimeDataIndex, 115 | periodicDataIndex: this.algoliaStore.periodicDataIndex, 116 | }); 117 | } 118 | 119 | await this.markAsProcessed(objectID, seq); 120 | log.info(`Done:`, change.id, retries); 121 | } catch (err: any) { 122 | log.info(`Failed:`, change.id, retries, err.statusCode); 123 | 124 | if (err.statusCode === 404) { 125 | // Store in not-found index 126 | datadog.increment('job.notFound'); 127 | 128 | await this.algoliaStore.mainNotFoundIndex 129 | .saveObject({ 130 | name: change.id, 131 | objectID: change.id, 132 | err: err instanceof Error ? err.toString() : err, 133 | date: new Date().toISOString(), 134 | movedBy: 'watch', 135 | }) 136 | .catch(() => {}); 137 | 138 | await this.markAsProcessed(objectID, seq); 139 | return; 140 | } 141 | 142 | sentry.report(new Error('Error during job'), { 143 | statusCode: err.statusCode, 144 | err, 145 | }); 146 | 147 | datadog.increment('job.retries'); 148 | 149 | await this.mainIndex 150 | .partialUpdateObject({ 151 | objectID, 152 | retries: retries + 1, 153 | }) 154 | .wait() 155 | .catch(() => {}); 156 | } finally { 157 | datadog.timing('loop', Date.now() - start); 158 | } 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /src/indexers/OneTimeBackgroundIndexer.ts: -------------------------------------------------------------------------------- 1 | import ms from 'ms'; 2 | 3 | import type { FinalPkg } from '../@types/pkg'; 4 | import { getChangelogBackground } from '../changelog'; 5 | import { getFileListMetadata } from '../saveDocs'; 6 | import { datadog } from '../utils/datadog'; 7 | import * as sentry from '../utils/sentry'; 8 | import { offsetToTimestamp } from '../utils/time'; 9 | 10 | import { Indexer } from './Indexer'; 11 | 12 | export type OneTimeDataObject = { 13 | name: string; 14 | objectID: string; 15 | updatedAt: string; 16 | changelogFilename: string | null; 17 | }; 18 | 19 | export class OneTimeBackgroundIndexer extends Indexer { 20 | protected readonly facetField: string = '_oneTimeDataToUpdateAt'; 21 | 22 | override get facetFilter(): string { 23 | const expired = offsetToTimestamp(0); 24 | 25 | // 0 === already processed 26 | // value in the future === errored and scheduled to retry later 27 | return `NOT ${this.facetField}:0 AND ${this.facetField} <= ${expired}`; 28 | } 29 | 30 | async patchObject( 31 | pkg: FinalPkg, 32 | patch: Partial, 33 | facetValue: number 34 | ): Promise { 35 | await this.mainIndex 36 | .partialUpdateObject( 37 | { 38 | objectID: pkg.objectID, 39 | ...patch, 40 | [this.facetField]: facetValue, 41 | _revision: { _operation: 'IncrementFrom', value: pkg._revision }, 42 | }, 43 | { createIfNotExists: false } 44 | ) 45 | .wait(); 46 | } 47 | 48 | async recordExecutor(pkg: FinalPkg): Promise { 49 | await this.queueTask(pkg); 50 | } 51 | 52 | override async stop(): Promise { 53 | return super.stop(true); 54 | } 55 | 56 | async taskExecutor(pkg: FinalPkg): Promise { 57 | try { 58 | const { metadata } = await getFileListMetadata(pkg); 59 | const { changelogFilename } = metadata.changelogFilename 60 | ? metadata 61 | : await getChangelogBackground(pkg); 62 | 63 | const data = { 64 | name: `${pkg.name}@${pkg.version}`, 65 | objectID: `${pkg.name}@${pkg.version}`, 66 | updatedAt: new Date().toISOString(), 67 | changelogFilename, 68 | }; 69 | 70 | await Promise.all([ 71 | this.algoliaStore.oneTimeDataIndex.saveObject(data), 72 | this.patchObject( 73 | pkg, 74 | { 75 | ...metadata, 76 | changelogFilename, 77 | }, 78 | 0 79 | ), 80 | ]); 81 | 82 | datadog.increment('oneTimeDataIndex.success'); 83 | } catch (err) { 84 | datadog.increment('oneTimeDataIndex.failure'); 85 | sentry.report(new Error(`Error in ${this.constructor.name}`), { err }); 86 | 87 | await this.patchObject( 88 | pkg, 89 | {}, 90 | offsetToTimestamp(ms('1 week'), new Date(pkg[this.facetField])) 91 | ).catch(() => {}); 92 | } 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/indexers/PeriodicBackgroundIndexer.ts: -------------------------------------------------------------------------------- 1 | import type { SearchIndex } from 'algoliasearch'; 2 | import Bluebird from 'bluebird'; 3 | import ms from 'ms'; 4 | 5 | import type { FinalPkg } from '../@types/pkg'; 6 | import type { AlgoliaStore } from '../algolia'; 7 | import { PackageNotFoundError } from '../errors'; 8 | import * as jsDelivr from '../jsDelivr'; 9 | import type { DownloadsData } from '../npm'; 10 | import { computeDownload, getDocFromRegistry, getDownloads } from '../npm'; 11 | import { getPopularAlternativeNames } from '../saveDocs'; 12 | import { datadog } from '../utils/datadog'; 13 | import * as sentry from '../utils/sentry'; 14 | import { offsetToTimestamp, round } from '../utils/time'; 15 | 16 | import { Indexer } from './Indexer'; 17 | 18 | export type PeriodicDataObject = DownloadsData & { 19 | name: string; 20 | objectID: string; 21 | updatedAt: string; 22 | }; 23 | 24 | type Task = { pkg: FinalPkg[] }; 25 | 26 | export class PeriodicBackgroundIndexer extends Indexer { 27 | protected readonly facetField: string = '_periodicDataUpdatedAt'; 28 | private packagesPerBatch: number = 127; 29 | private unscopedPackages: FinalPkg[]; 30 | private notFoundIndex: SearchIndex; 31 | 32 | override get facetFilter(): string { 33 | const expired = offsetToTimestamp(-ms('30 days')); 34 | return `${this.facetField} < ${expired}`; 35 | } 36 | 37 | constructor( 38 | algoliaStore: AlgoliaStore, 39 | mainIndex: SearchIndex, 40 | notFoundIndex: SearchIndex 41 | ) { 42 | super(algoliaStore, mainIndex); 43 | 44 | this.notFoundIndex = notFoundIndex; 45 | this.unscopedPackages = []; 46 | } 47 | 48 | override async flush(): Promise { 49 | while (this.unscopedPackages.length) { 50 | await this.queueTask({ 51 | pkg: this.unscopedPackages.splice(0, this.packagesPerBatch), 52 | }); 53 | } 54 | 55 | return super.flush(); 56 | } 57 | 58 | async recordExecutor(pkg: FinalPkg): Promise { 59 | if (pkg.objectID.startsWith('@')) { 60 | await this.queueTask({ pkg: [pkg] }); 61 | return; 62 | } 63 | 64 | if (!this.unscopedPackages.find((p) => p.name === pkg.name)) { 65 | this.unscopedPackages.push(pkg); 66 | } 67 | 68 | if (this.unscopedPackages.length >= this.packagesPerBatch) { 69 | await this.queueTask({ 70 | pkg: this.unscopedPackages.splice(0, this.packagesPerBatch), 71 | }); 72 | } 73 | } 74 | 75 | override async stop(): Promise { 76 | return super.stop(true); 77 | } 78 | 79 | async taskExecutor(task: Task): Promise { 80 | try { 81 | const downloads = await getDownloads(task.pkg); 82 | const oneWeekAgo = offsetToTimestamp(-ms('1 week')); 83 | const dataIndexObjects: PeriodicDataObject[] = []; 84 | const patches: Array> = []; 85 | 86 | await Bluebird.map( 87 | task.pkg, 88 | async (pkg) => { 89 | const data: PeriodicDataObject = { 90 | name: pkg.name, 91 | objectID: pkg.name, 92 | updatedAt: new Date().toISOString(), 93 | totalNpmDownloads: downloads[pkg.name]?.totalNpmDownloads, 94 | packageNpmDownloads: downloads[pkg.name]?.packageNpmDownloads, 95 | }; 96 | 97 | dataIndexObjects.push(data); 98 | 99 | // The npm replicate API often incorrectly reports packages there were 100 | // actually deleted from the registry. If the downloads API has no 101 | // records for the package, and the package was published more than 102 | // a while ago, we check with the registry. If the registry says the 103 | // package does not exist, we delete it. 104 | if ( 105 | data.packageNpmDownloads === undefined && 106 | pkg.created < oneWeekAgo 107 | ) { 108 | try { 109 | await getDocFromRegistry(pkg.name); 110 | } catch (e) { 111 | if (e instanceof PackageNotFoundError) { 112 | datadog.increment('periodic.notFound'); 113 | 114 | await this.notFoundIndex.saveObject({ 115 | name: pkg.name, 116 | objectID: pkg.name, 117 | date: new Date().toISOString(), 118 | movedBy: 'periodicIndexer', 119 | }); 120 | 121 | await this.algoliaStore.periodicDataIndex.deleteObject( 122 | pkg.name 123 | ); 124 | 125 | await this.mainIndex.deleteObject(pkg.name).wait(); 126 | return; 127 | } 128 | } 129 | } 130 | 131 | const npmDownloads = computeDownload( 132 | pkg, 133 | data.packageNpmDownloads, 134 | data.totalNpmDownloads 135 | ); 136 | 137 | const jsDelivrHits = jsDelivr.getHit(pkg); 138 | const pkgPatch = { 139 | objectID: pkg.objectID, 140 | ...(npmDownloads || {}), 141 | ...jsDelivrHits, 142 | popular: npmDownloads?.popular || jsDelivrHits.popular, 143 | }; 144 | 145 | patches.push({ 146 | ...pkgPatch, 147 | _searchInternal: { 148 | ...pkg._searchInternal, 149 | popularAlternativeNames: getPopularAlternativeNames({ 150 | ...pkg, 151 | ...pkgPatch, 152 | }), 153 | }, 154 | [this.facetField]: round(new Date(data.updatedAt)).valueOf(), 155 | }); 156 | }, 157 | { concurrency: 20 } 158 | ); 159 | 160 | await Promise.all([ 161 | this.algoliaStore.periodicDataIndex.saveObjects(dataIndexObjects), 162 | this.mainIndex.partialUpdateObjects(patches).wait(), 163 | ]); 164 | 165 | datadog.increment('periodicDataIndex.success', task.pkg.length); 166 | } catch (err) { 167 | datadog.increment('periodicDataIndex.failure', task.pkg.length); 168 | sentry.report(new Error(`Error in ${this.constructor.name}`), { err }); 169 | 170 | await this.mainIndex 171 | .partialUpdateObjects( 172 | task.pkg.map((pkg) => { 173 | return { 174 | objectID: pkg.objectID, 175 | [this.facetField]: offsetToTimestamp( 176 | ms('1 day'), 177 | new Date(pkg[this.facetField]) 178 | ), 179 | }; 180 | }) 181 | ) 182 | .wait() 183 | .catch(() => {}); 184 | } 185 | } 186 | } 187 | -------------------------------------------------------------------------------- /src/jsDelivr/__test__/__snapshots__/index.test.ts.snap: -------------------------------------------------------------------------------- 1 | // Jest Snapshot v1, https://goo.gl/fbAQLP 2 | 3 | exports[`files getFilesList() should get a flat list of files 1`] = ` 4 | Array [ 5 | Object { 6 | "hash": "+uxmYs/88pYWmLwFS3M54NGjE+hX6sBmwNOpzaW1LGk=", 7 | "name": "/bin/jest.js", 8 | "size": 343, 9 | "time": "1985-10-26T08:15:00.000Z", 10 | }, 11 | Object { 12 | "hash": "MvOGr1Lc6r8wEe8GNmscm3Sx/QWEFE4Is1AZ5rQzFr8=", 13 | "name": "/build/jest.d.ts", 14 | "size": 291, 15 | "time": "1985-10-26T08:15:00.000Z", 16 | }, 17 | Object { 18 | "hash": "BEQ5sRqArzHCh5sNbwjxHRQunhxkCD1HXcM9EdYAKPc=", 19 | "name": "/build/jest.d.ts.map", 20 | "size": 171, 21 | "time": "1985-10-26T08:15:00.000Z", 22 | }, 23 | Object { 24 | "hash": "m5wVGuVr5Pq4z5L2vpeMVA3rbLV4kQ0MCPuo0newsmY=", 25 | "name": "/build/jest.js", 26 | "size": 1030, 27 | "time": "1985-10-26T08:15:00.000Z", 28 | }, 29 | Object { 30 | "hash": "m/vOMvpK2FU19W9PYavnEExEToN7HHU1mb/f/ooU3eQ=", 31 | "name": "/LICENSE", 32 | "size": 1099, 33 | "time": "1985-10-26T08:15:00.000Z", 34 | }, 35 | Object { 36 | "hash": "9hWvkPsgtCTc1w0lswu1AO+Q+S19Dppeg5bNklG/Khg=", 37 | "name": "/package.json", 38 | "size": 925, 39 | "time": "1985-10-26T08:15:00.000Z", 40 | }, 41 | Object { 42 | "hash": "MPu0d2f8or6adBXZZLUNh6vL7Yeg34MmOBiupdclu10=", 43 | "name": "/README.md", 44 | "size": 551, 45 | "time": "1985-10-26T08:15:00.000Z", 46 | }, 47 | Object { 48 | "hash": "CycshPBWVvIRZozw+b1pnAvKYC1Q7aPvcT8tS+HPepU=", 49 | "name": "/tsconfig.json", 50 | "size": 162, 51 | "time": "1985-10-26T08:15:00.000Z", 52 | }, 53 | Object { 54 | "hash": "kt2uoTK/NmyQe2OUiNwpdwxV4RxgS2gW9rEgOtj+lZU=", 55 | "name": "/tsconfig.tsbuildinfo", 56 | "size": 220798, 57 | "time": "1985-10-26T08:15:00.000Z", 58 | }, 59 | ] 60 | `; 61 | -------------------------------------------------------------------------------- /src/jsDelivr/__test__/index.test.ts: -------------------------------------------------------------------------------- 1 | import * as api from '../index'; 2 | 3 | jest.mock('../../utils/log', () => { 4 | return { 5 | log: { 6 | info: jest.fn(), 7 | warn: jest.fn(), 8 | error: jest.fn(), 9 | }, 10 | }; 11 | }); 12 | 13 | jest.setTimeout(10000); 14 | 15 | // eslint-disable-next-line jest/require-top-level-describe 16 | beforeEach(() => { 17 | jest.resetAllMocks(); 18 | }); 19 | 20 | describe('hits', () => { 21 | describe('getHits()', () => { 22 | beforeAll(() => { 23 | api.hits.clear(); 24 | api.hits.set('jquery', { hits: 1234, popular: true }); 25 | }); 26 | 27 | it('should get one formatted hit', () => { 28 | expect(api.getHits([{ name: 'jquery' }])).toEqual([ 29 | { 30 | jsDelivrHits: 1234, 31 | _jsDelivrPopularity: 1, 32 | popular: true, 33 | _popularName: 'jquery', 34 | }, 35 | ]); 36 | }); 37 | it('should get multiple formatted hits', () => { 38 | expect( 39 | api.getHits([{ name: 'jquery' }, { name: 'thispackagedoesnotexist' }]) 40 | ).toEqual([ 41 | { 42 | jsDelivrHits: 1234, 43 | _jsDelivrPopularity: 1, 44 | popular: true, 45 | _popularName: 'jquery', 46 | }, 47 | { 48 | jsDelivrHits: 0, 49 | _jsDelivrPopularity: 0, 50 | popular: false, 51 | }, 52 | ]); 53 | }); 54 | }); 55 | 56 | describe('loadHits()', () => { 57 | beforeAll(async () => { 58 | await api.loadHits(); 59 | }); 60 | it('should download all packages hits', () => { 61 | expect(api.hits.size).toBeGreaterThan(30000); // 32509 (2022-11) 62 | }); 63 | 64 | it('should get one hit', () => { 65 | expect(api.hits.get('jquery')?.hits).toBeGreaterThan(1000000000); // 1065750968 (2019-08) 66 | }); 67 | 68 | it('should not get one hit', () => { 69 | expect(api.hits.get('thispackagedoesnotexist')?.hits).toBeUndefined(); 70 | }); 71 | }); 72 | }); 73 | 74 | describe('files', () => { 75 | describe('getFilesList()', () => { 76 | it('should get a flat list of files', async () => { 77 | const files = await api.getFilesList({ 78 | name: 'jest', 79 | version: '24.8.0', 80 | }); 81 | expect(files).toMatchSnapshot(); 82 | }); 83 | 84 | it('should not get a files list for fake package', async () => { 85 | const files = await api.getFilesList({ 86 | name: 'thispackagedoesnotexist', 87 | version: '3.33.0', 88 | }); 89 | expect(files).toEqual([]); 90 | }); 91 | }); 92 | }); 93 | -------------------------------------------------------------------------------- /src/jsDelivr/__test__/pkgTypes.test.ts: -------------------------------------------------------------------------------- 1 | import type { File } from '../index'; 2 | import * as api from '../pkgTypes'; 3 | 4 | const BASE_FILE: File = { 5 | name: '0', 6 | hash: 'sha256:', 7 | size: 0, 8 | time: '1985-10-26T08:15:00.000Z', 9 | }; 10 | 11 | describe('package module/style types', () => { 12 | describe('package style types', () => { 13 | it('should return correct style types for multiple packages', () => { 14 | const styleTypes = api.getStyleTypesForAll( 15 | [ 16 | { styleTypes: [] }, 17 | { styleTypes: [] }, 18 | { styleTypes: [] }, 19 | { styleTypes: [] }, 20 | { styleTypes: ['css'] }, 21 | { styleTypes: [] }, 22 | ], 23 | [ 24 | [], 25 | [{ ...BASE_FILE, name: '/dist/style/style.min.css' }], 26 | [ 27 | { ...BASE_FILE, name: '/src/style/style.less' }, 28 | { ...BASE_FILE, name: '/dist/style/style.min.css' }, 29 | { ...BASE_FILE, name: '/dist/js/lib.min.js' }, 30 | { ...BASE_FILE, name: '/style.scss' }, 31 | ], 32 | undefined as any, 33 | [{ ...BASE_FILE, name: '/src/style/style.less' }], 34 | [{ ...BASE_FILE, name: '/DIST/STYLE/STYLE.MIN.CSS' }], 35 | ] 36 | ); 37 | expect(styleTypes).toEqual([ 38 | { styleTypes: ['none'] }, 39 | { styleTypes: ['css'] }, 40 | { styleTypes: ['less', 'css', 'scss'] }, 41 | { styleTypes: ['none'] }, 42 | { styleTypes: ['css', 'less'] }, 43 | { styleTypes: ['css'] }, 44 | ]); 45 | }); 46 | 47 | it('should ignore blacklisted paths', () => { 48 | const styleTypes = api.getStyleTypes({ styleTypes: [] }, [ 49 | { ...BASE_FILE, name: '/dist/style/style.min.css' }, 50 | { ...BASE_FILE, name: '/dist/style/_source.scss' }, 51 | { ...BASE_FILE, name: '/docs/file.scss' }, 52 | { ...BASE_FILE, name: '/test/file.scss' }, 53 | { ...BASE_FILE, name: '/.hidden/file.scss' }, 54 | { ...BASE_FILE, name: '/dist/.hidden.scss' }, 55 | { ...BASE_FILE, name: '/dist/.hidden/style.scss' }, 56 | ]); 57 | expect(styleTypes).toEqual({ styleTypes: ['css'] }); 58 | }); 59 | }); 60 | 61 | describe('package module types', () => { 62 | it('should return correct module types for multiple packages', () => { 63 | const moduleTypes = api.getModuleTypesForAll( 64 | [ 65 | { moduleTypes: ['unknown'] }, 66 | { moduleTypes: ['unknown'] }, 67 | { moduleTypes: ['unknown'] }, 68 | { moduleTypes: ['unknown'] }, 69 | { moduleTypes: ['unknown'] }, 70 | { moduleTypes: ['esm'] }, 71 | { moduleTypes: ['esm', 'cjs'] }, 72 | ], 73 | [ 74 | [], 75 | [{ ...BASE_FILE, name: '/dist/style/style.min.css' }], 76 | [{ ...BASE_FILE, name: '/dist/js/lib.min.js' }], 77 | [{ ...BASE_FILE, name: '/dist/js/lib.min.mjs' }], 78 | [{ ...BASE_FILE, name: '/dist/js/lib.min.cjs' }], 79 | [], 80 | undefined as any, 81 | ] 82 | ); 83 | 84 | expect(moduleTypes).toEqual([ 85 | { moduleTypes: ['none'] }, 86 | { moduleTypes: ['none'] }, 87 | { moduleTypes: ['unknown'] }, 88 | { moduleTypes: ['unknown'] }, 89 | { moduleTypes: ['unknown'] }, 90 | { moduleTypes: ['esm'] }, 91 | { moduleTypes: ['esm', 'cjs'] }, 92 | ]); 93 | }); 94 | 95 | it('should ignore blacklisted paths', () => { 96 | const moduleTypes = api.getModuleTypes({ moduleTypes: ['unknown'] }, [ 97 | { ...BASE_FILE, name: '/dist/js/_hidden.mjs' }, 98 | { ...BASE_FILE, name: '/dist/js/.hidden.mjs' }, 99 | { ...BASE_FILE, name: '/docs/lib.js' }, 100 | { ...BASE_FILE, name: '/test/lib.js' }, 101 | { ...BASE_FILE, name: '/.hidden/lib.cjs' }, 102 | { ...BASE_FILE, name: '/dist/.hidden/lib.js' }, 103 | ]); 104 | expect(moduleTypes).toEqual({ moduleTypes: ['none'] }); 105 | }); 106 | }); 107 | }); 108 | -------------------------------------------------------------------------------- /src/jsDelivr/index.ts: -------------------------------------------------------------------------------- 1 | import { HTTPError } from 'got/dist/source'; 2 | 3 | import type { RawPkg } from '../@types/pkg'; 4 | import { config } from '../config'; 5 | import { datadog } from '../utils/datadog'; 6 | import { log } from '../utils/log'; 7 | import { request } from '../utils/request'; 8 | import * as sentry from '../utils/sentry'; 9 | 10 | type Hit = { type: 'npm'; name: string; hits: number }; 11 | export type File = { name: string; hash: string; time: string; size: number }; 12 | export type GetHit = { 13 | popular: boolean; 14 | jsDelivrHits: number; 15 | _jsDelivrPopularity: number; 16 | _popularName?: string; 17 | }; 18 | export const hits = new Map(); 19 | 20 | /** 21 | * Load downloads hits. 22 | */ 23 | export async function loadHits(): Promise { 24 | const start = Date.now(); 25 | log.info('📦 Loading hits from jsDelivr'); 26 | 27 | const res = await request(config.jsDelivrHitsEndpoint, { 28 | responseType: 'json', 29 | }); 30 | 31 | if (!res.body.length) { 32 | throw new Error('Empty jsDelivr data'); 33 | } 34 | 35 | hits.clear(); 36 | 37 | res.body.forEach((pkg, index) => { 38 | hits.set(pkg.name, { hits: pkg.hits, popular: index < 1000 }); 39 | }); 40 | 41 | datadog.timing('jsdelivr.loadHits', Date.now() - start); 42 | } 43 | 44 | /** 45 | * Get download hits. 46 | */ 47 | export function getHits(pkgs: Array>): GetHit[] { 48 | const start = Date.now(); 49 | const all = pkgs.map(getHit); 50 | 51 | datadog.timing('jsdelivr.getHits', Date.now() - start); 52 | return all; 53 | } 54 | 55 | export function getHit(pkg: Pick): GetHit { 56 | const data = hits.get(pkg.name); 57 | const jsDelivrHits = data?.hits || 0; 58 | const popular = data?.popular || false; 59 | 60 | return { 61 | popular, 62 | jsDelivrHits, 63 | // anything below 1000 hits/month is likely to mean that 64 | // someone just made a few random requests so we count that as 0 65 | _jsDelivrPopularity: Math.max(jsDelivrHits.toString().length - 3, 0), 66 | // similar to npm popular but we consider the top 1k packages instead 67 | ...(popular && { 68 | _popularName: pkg.name, 69 | }), 70 | }; 71 | } 72 | 73 | /** 74 | * Get one package files list. 75 | */ 76 | export async function getFilesList( 77 | pkg: Pick 78 | ): Promise { 79 | const start = Date.now(); 80 | if (!pkg.name || !pkg.version) { 81 | throw new Error( 82 | `Package name should contain a version number: ${pkg.name}` 83 | ); 84 | } 85 | 86 | let files: File[] = []; 87 | const url = `${config.jsDelivrPackageEndpoint}/${pkg.name}@${pkg.version}/flat`; 88 | try { 89 | const response = await request<{ default: string; files: File[] }>(url, { 90 | responseType: 'json', 91 | }); 92 | 93 | if (Array.isArray(response.body.files)) { 94 | files = response.body.files; 95 | } else { 96 | sentry.report(new Error('JsDelivr network error'), { 97 | statusCode: response.statusCode, 98 | files: response.body.files, 99 | url, 100 | }); 101 | } 102 | } catch (err: any) { 103 | if ( 104 | !( 105 | err instanceof HTTPError && [403, 404].includes(err.response.statusCode) 106 | ) 107 | ) { 108 | sentry.report(new Error('JsDelivr network error'), { 109 | statusCode: err?.response?.statusCode, 110 | err, 111 | url, 112 | }); 113 | } 114 | } 115 | 116 | datadog.timing('jsdelivr.getFilesList', Date.now() - start); 117 | return files; 118 | } 119 | 120 | export function getFullURL( 121 | pkg: Pick, 122 | file: File 123 | ): string { 124 | return `https://cdn.jsdelivr.net/npm/${pkg.name}@${pkg.version}${file.name}`; 125 | } 126 | -------------------------------------------------------------------------------- /src/jsDelivr/pkgTypes.ts: -------------------------------------------------------------------------------- 1 | import type { RawPkg, StyleType } from '../@types/pkg'; 2 | import { datadog } from '../utils/datadog'; 3 | 4 | import type { File } from './index'; 5 | 6 | const styleFileExtensions = ['css', 'less', 'scss']; 7 | const styleFilePattern = createFilePattern(styleFileExtensions); 8 | 9 | const jsFileExtensions = ['js', 'mjs', 'cjs']; 10 | const jsFilePattern = createFilePattern(jsFileExtensions); 11 | 12 | function createFilePattern(extensions: string[]): RegExp { 13 | const extPattern = extensions.join('|'); 14 | 15 | // https://regex101.com/r/X5jQfH/2 16 | return new RegExp( 17 | `^(?:(?!\\/(docs?|documentation|examples?|samples?|demos?|tests?)\\/)(?!\\/[._]).)+\\.(${extPattern})$`, 18 | 'i' 19 | ); 20 | } 21 | 22 | export function getStyleTypes( 23 | pkg: Pick, 24 | filelist: File[] 25 | ): Pick { 26 | const start = Date.now(); 27 | 28 | try { 29 | const styleTypes = new Set(pkg.styleTypes); 30 | 31 | for (const file of filelist) { 32 | if (!styleFilePattern.test(file.name)) { 33 | continue; 34 | } 35 | 36 | const type = file.name.split('.').pop(); 37 | 38 | if (type) { 39 | styleTypes.add(type.toLowerCase()); 40 | } 41 | } 42 | 43 | if (styleTypes.size === 0) { 44 | styleTypes.add('none'); 45 | } 46 | 47 | return { styleTypes: [...styleTypes] }; 48 | } finally { 49 | datadog.timing('pkgTypes.getStyleTypes', Date.now() - start); 50 | } 51 | } 52 | 53 | export function getStyleTypesForAll( 54 | pkgs: Array>, 55 | filelists: File[][] 56 | ): Array> { 57 | const start = Date.now(); 58 | 59 | const all = pkgs.map((pkg, index) => { 60 | return getStyleTypes(pkg, filelists[index] || []); 61 | }); 62 | 63 | datadog.timing('pkgTypes.getStyleTypesForAll', Date.now() - start); 64 | return all; 65 | } 66 | 67 | export function getModuleTypes( 68 | pkg: Pick, 69 | filelist: File[] 70 | ): Pick { 71 | const start = Date.now(); 72 | 73 | try { 74 | // Module type(s) already detected - it can't be none at that point 75 | if (!pkg.moduleTypes.includes('unknown')) { 76 | return { moduleTypes: pkg.moduleTypes }; 77 | } 78 | 79 | for (const file of filelist) { 80 | // JS file found - it can't be non anymore 81 | if (jsFilePattern.test(file.name)) { 82 | return { moduleTypes: pkg.moduleTypes }; 83 | } 84 | } 85 | 86 | return { moduleTypes: ['none'] }; 87 | } finally { 88 | datadog.timing('pkgTypes.getModuleTypes', Date.now() - start); 89 | } 90 | } 91 | 92 | export function getModuleTypesForAll( 93 | pkgs: Array>, 94 | filelists: File[][] 95 | ): Array> { 96 | const start = Date.now(); 97 | 98 | const all = pkgs.map((pkg, index) => { 99 | return getModuleTypes(pkg, filelists[index] || []); 100 | }); 101 | 102 | datadog.timing('pkgTypes.getModuleTypesForAll', Date.now() - start); 103 | return all; 104 | } 105 | -------------------------------------------------------------------------------- /src/npm/ChangesReader.ts: -------------------------------------------------------------------------------- 1 | import { EventEmitter } from 'events'; 2 | import { setTimeout } from 'node:timers/promises'; 3 | 4 | import ms from 'ms'; 5 | import type { DatabaseChangesResponse } from 'nano'; 6 | 7 | import { config } from '../config'; 8 | import { request } from '../utils/request'; 9 | import * as sentry from '../utils/sentry'; 10 | import { backoff } from '../utils/wait'; 11 | 12 | type ChangesReaderOptions = { 13 | since: string; 14 | }; 15 | 16 | export class ChangesReader extends EventEmitter { 17 | protected running: boolean = false; 18 | protected paused: boolean = false; 19 | protected since: string; 20 | 21 | constructor({ since }: ChangesReaderOptions) { 22 | super(); 23 | 24 | this.since = since; 25 | } 26 | 27 | pause(): void { 28 | this.paused = true; 29 | } 30 | 31 | resume(): void { 32 | this.paused = false; 33 | } 34 | 35 | run(): void { 36 | this.running = true; 37 | 38 | this.runInternal().catch((e) => { 39 | sentry.report(e); 40 | }); 41 | } 42 | 43 | async runInternal(): Promise { 44 | let retry = 0; 45 | 46 | while (this.running) { 47 | try { 48 | const { body } = await request( 49 | `${config.npmRegistryEndpoint}/_changes`, 50 | { 51 | timeout: ms('60 seconds'), // Hard timeout after which the client aborts. 52 | headers: { 53 | 'npm-replication-opt-in': 'true', // See https://github.com/orgs/community/discussions/152515 54 | }, 55 | searchParams: { 56 | since: this.since, 57 | limit: 10, 58 | }, 59 | responseType: 'json', 60 | } 61 | ); 62 | 63 | retry = 0; 64 | 65 | if (body.last_seq) { 66 | this.since = body.last_seq; 67 | } 68 | 69 | if (body.results) { 70 | for (const result of body.results) { 71 | this.emit('change', result); 72 | } 73 | 74 | this.emit('batch', body.results); 75 | } 76 | 77 | // If there are no results, retry in 30 seconds. 78 | if (!body.results?.length) { 79 | await setTimeout(ms('30 seconds')); 80 | } 81 | } catch (e) { 82 | this.emit('error', e); 83 | await backoff(++retry, config.retryBackoffPow, config.retryBackoffMax); 84 | } 85 | 86 | while (this.running && this.paused) { 87 | await setTimeout(100); 88 | } 89 | } 90 | } 91 | 92 | stop(): void { 93 | this.running = false; 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /src/npm/Prefetcher.ts: -------------------------------------------------------------------------------- 1 | import { setTimeout } from 'node:timers/promises'; 2 | 3 | import type { SearchIndex } from 'algoliasearch'; 4 | import ms from 'ms'; 5 | import type { DocumentListParams, DocumentResponseRow } from 'nano'; 6 | 7 | import type { StateManager } from '../StateManager'; 8 | import { config } from '../config'; 9 | import { log } from '../utils/log'; 10 | import * as sentry from '../utils/sentry'; 11 | 12 | import type { GetPackage } from './types'; 13 | 14 | import * as npm from './index'; 15 | 16 | export type PrefetchedPkg = Pick< 17 | DocumentResponseRow, 18 | 'id' | 'value' 19 | > & { offset: number }; 20 | 21 | export class Prefetcher { 22 | private stateManager: StateManager; 23 | private queueIndex: SearchIndex; 24 | 25 | #limit: number = config.bootstrapConcurrency; 26 | #ready: PrefetchedPkg[] = []; 27 | 28 | #nextKey: string | null = null; 29 | #running: boolean = false; 30 | #offset: number = 0; 31 | #finished: boolean = false; 32 | 33 | constructor( 34 | stateManager: StateManager, 35 | queueIndex: SearchIndex, 36 | opts: { nextKey: string | null } 37 | ) { 38 | this.stateManager = stateManager; 39 | this.queueIndex = queueIndex; 40 | this.#nextKey = opts.nextKey; 41 | } 42 | 43 | stop(): void { 44 | this.#running = false; 45 | } 46 | 47 | get offset(): number { 48 | return this.#offset + this.#limit - this.#ready.length; 49 | } 50 | 51 | get isFinished(): boolean { 52 | return this.#finished; 53 | } 54 | 55 | run(): void { 56 | this.#running = true; 57 | 58 | this.runInternal().catch((e) => { 59 | sentry.report(e); 60 | }); 61 | } 62 | 63 | async runInternal(): Promise { 64 | while (this.#running) { 65 | await this.queueOnePage(); 66 | await setTimeout(ms('1 second')); 67 | } 68 | } 69 | 70 | private async queueOnePage(): Promise { 71 | const options: Partial = { 72 | limit: this.#limit, 73 | }; 74 | 75 | if (this.#nextKey) { 76 | options.startkey = this.#nextKey; 77 | } 78 | 79 | try { 80 | const { rows: packages, offset } = await npm.findAll(options); 81 | 82 | if (packages.length <= 0) { 83 | this.#finished = true; 84 | this.#running = false; 85 | this.#offset = offset; 86 | log.info('[pf] done'); 87 | return; 88 | } 89 | 90 | // Skip the first item as we already processed it on the previous page. 91 | if (this.#nextKey && packages.at(0)?.id === this.#nextKey) { 92 | packages.shift(); 93 | } 94 | 95 | await this.queueIndex.saveObjects( 96 | packages.map((pkg) => ({ 97 | name: pkg.id, 98 | objectID: pkg.id, 99 | retries: 0, 100 | pkg, 101 | })) 102 | ); 103 | 104 | const lastId = (await this.stateManager.get()).bootstrapLastId; 105 | const pkg = packages.at(-1); 106 | 107 | if (pkg && (!lastId || lastId < pkg.id)) { 108 | await this.stateManager.save({ 109 | bootstrapLastId: pkg.id, 110 | }); 111 | } 112 | 113 | this.#offset = offset; 114 | this.#nextKey = packages[packages.length - 1]!.id; 115 | } catch (err: any) { 116 | sentry.report(err); 117 | 118 | if (err.statusCode === 429) { 119 | log.info('[pf] waiting'); 120 | await setTimeout(ms('2 minutes')); 121 | } 122 | } 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /src/npm/__tests__/index.test.ts: -------------------------------------------------------------------------------- 1 | import { PackageNotFoundError } from '../../errors'; 2 | import type { DownloadsData } from '../index'; 3 | import * as api from '../index'; 4 | import { computeDownload } from '../index'; 5 | 6 | jest.setTimeout(15000); 7 | 8 | describe('getDocFromRegistry()', () => { 9 | it('retrieves a single doc', async () => { 10 | const doc = await api.getDocFromRegistry('jsdelivr'); 11 | 12 | expect(doc.name).toBe('jsdelivr'); 13 | expect(Object.keys(doc.versions).length).toBeGreaterThanOrEqual(2); 14 | }); 15 | 16 | it('throws PackageNotFoundError for non-existent packages', async () => { 17 | await expect(api.getDocFromRegistry('jsdelivrxxxx')).rejects.toBeInstanceOf( 18 | PackageNotFoundError 19 | ); 20 | }); 21 | 22 | it('throws PackageNotFoundError for packages without versions', async () => { 23 | await expect( 24 | api.getDocFromRegistry('ebay-app-meta') 25 | ).rejects.toBeInstanceOf(PackageNotFoundError); 26 | }); 27 | }); 28 | 29 | describe('getDependents()', () => { 30 | let dependents; 31 | beforeAll(async () => { 32 | dependents = await api.getDependents([ 33 | { name: 'jest' }, 34 | { name: '@angular/core' }, 35 | { name: 'holmes.js' }, 36 | ]); 37 | }); 38 | 39 | it('contains the correct keys', () => { 40 | expect(dependents).toEqual( 41 | expect.arrayContaining([ 42 | expect.objectContaining({ 43 | dependents: expect.any(Number), 44 | humanDependents: expect.any(String), 45 | }), 46 | expect.objectContaining({ 47 | dependents: expect.any(Number), 48 | humanDependents: expect.any(String), 49 | }), 50 | ]) 51 | ); 52 | }); 53 | 54 | it('has the right fake value', () => { 55 | const [jest, angular, holmes] = dependents.map((pkg) => pkg.dependents); 56 | expect(jest).toBe(0); 57 | expect(angular).toBe(0); 58 | expect(holmes).toBe(0); 59 | }); 60 | }); 61 | 62 | describe('fetchDownload()', () => { 63 | it('should download one package and return correct response', async () => { 64 | const dl = await api.fetchDownload('jest'); 65 | expect(dl).toHaveProperty('jest'); 66 | expect(dl.jest).toEqual({ 67 | packageNpmDownloads: expect.any(Number), 68 | }); 69 | }); 70 | 71 | it('should download one scoped package and return correct response', async () => { 72 | const dl = await api.fetchDownload('@angular/core'); 73 | expect(dl).toHaveProperty('@angular/core'); 74 | expect(dl['@angular/core']).toEqual({ 75 | packageNpmDownloads: expect.any(Number), 76 | }); 77 | }); 78 | 79 | it('should download 2 packages and return correct response', async () => { 80 | const dl = await api.fetchDownload('jest,holmes.js'); 81 | expect(dl).toHaveProperty('jest'); 82 | expect(dl).toHaveProperty(['holmes.js']); 83 | }); 84 | }); 85 | 86 | describe('getDownloads()', () => { 87 | let downloads: Awaited>; 88 | 89 | beforeAll(async () => { 90 | await api.loadTotalDownloads(); 91 | 92 | downloads = await api.getDownloads([ 93 | { name: 'jest' }, 94 | { name: 'holmes.js' }, 95 | ]); 96 | 97 | downloads = { 98 | ...downloads, 99 | ...(await api.getDownloads([{ name: '@angular/core' }])), 100 | }; 101 | }); 102 | 103 | it('contains the correct keys', () => { 104 | expect(downloads).toEqual({ 105 | jest: expect.objectContaining({ 106 | packageNpmDownloads: expect.any(Number), 107 | totalNpmDownloads: expect.any(Number), 108 | }), 109 | 'holmes.js': expect.objectContaining({ 110 | packageNpmDownloads: expect.any(Number), 111 | totalNpmDownloads: expect.any(Number), 112 | }), 113 | '@angular/core': expect.objectContaining({ 114 | packageNpmDownloads: expect.any(Number), 115 | totalNpmDownloads: expect.any(Number), 116 | }), 117 | }); 118 | }); 119 | 120 | it('has the right approximate value for downloadsLast30Days', () => { 121 | const [jest, holmes, angular] = Object.values(downloads).map((pkg) => 122 | pkg.packageNpmDownloads!.toString() 123 | ); 124 | 125 | expect(jest!.length).toBeGreaterThanOrEqual(6); 126 | expect(jest!.length).toBeLessThanOrEqual(9); 127 | 128 | expect(angular!.length).toBeGreaterThanOrEqual(6); 129 | expect(angular!.length).toBeLessThanOrEqual(8); 130 | 131 | expect(holmes!.length).toBeGreaterThanOrEqual(2); 132 | expect(holmes!.length).toBeLessThanOrEqual(4); 133 | }); 134 | 135 | it('has the right approximate value for downloadsMagnitude', () => { 136 | const [jest, holmes, angular] = Object.entries( 137 | downloads 138 | ).map( 139 | ([name, pkg]) => 140 | computeDownload( 141 | { name }, 142 | pkg.packageNpmDownloads, 143 | pkg.totalNpmDownloads 144 | )?._downloadsMagnitude 145 | ); 146 | 147 | expect(jest).toBeGreaterThanOrEqual(6); 148 | expect(jest).toBeLessThanOrEqual(9); 149 | 150 | expect(angular).toBeGreaterThanOrEqual(6); 151 | expect(angular).toBeLessThanOrEqual(8); 152 | 153 | expect(holmes).toBeGreaterThanOrEqual(2); 154 | expect(holmes).toBeLessThanOrEqual(4); 155 | }); 156 | 157 | it('validates package batching', async () => { 158 | await expect( 159 | api.getDownloads([{ name: '@scope/p-1' }, { name: '@scope/p-2' }]) 160 | ).rejects.toThrow('one at a time'); 161 | }); 162 | 163 | it('returns undefined for non-existent packages without failing the valid ones', async () => { 164 | const result = await api.getDownloads([ 165 | { name: 'jsdelivr' }, 166 | { name: 'jsdelivrxxxx' }, 167 | ]); 168 | 169 | expect(result.jsdelivr!.packageNpmDownloads).toBeGreaterThan(0); 170 | expect(result.jsdelivrxxxx!.packageNpmDownloads).toBeUndefined(); 171 | }); 172 | }); 173 | -------------------------------------------------------------------------------- /src/npm/index.ts: -------------------------------------------------------------------------------- 1 | import { HTTPError } from 'got'; 2 | import _ from 'lodash'; 3 | import ms from 'ms'; 4 | import type { DocumentListParams, DocumentListResponse } from 'nano'; 5 | import nano from 'nano'; 6 | import numeral from 'numeral'; 7 | import PQueue from 'p-queue'; 8 | 9 | import type { RawPkg } from '../@types/pkg'; 10 | import { config } from '../config'; 11 | import { PackageNotFoundError } from '../errors'; 12 | import { datadog } from '../utils/datadog'; 13 | import { log } from '../utils/log'; 14 | import { httpsAgent, request, USER_AGENT } from '../utils/request'; 15 | 16 | import type { GetInfo, GetPackage, PackageDownload } from './types'; 17 | 18 | type GetDependent = { dependents: number; humanDependents: string }; 19 | type GetDownload = { 20 | downloadsLast30Days: number; 21 | humanDownloadsLast30Days: string; 22 | downloadsRatio: number; 23 | popular: boolean; 24 | _downloadsMagnitude: number; 25 | _popularName?: string; 26 | }; 27 | export type DownloadsData = { 28 | totalNpmDownloads?: number; 29 | packageNpmDownloads?: number; 30 | }; 31 | export const cacheTotalDownloads: { total?: number; date?: number } = { 32 | total: undefined, 33 | date: undefined, 34 | }; 35 | 36 | export const registry = nano({ 37 | url: config.npmRegistryEndpoint, 38 | requestDefaults: { 39 | agent: httpsAgent, 40 | timeout: 30000, 41 | headers: { 42 | 'user-agent': USER_AGENT, 43 | 'Accept-Encoding': 'deflate, gzip', 44 | 'content-type': 'application/json', 45 | accept: 'application/json', 46 | 'npm-replication-opt-in': 'true', // See https://github.com/orgs/community/discussions/152515 47 | }, 48 | }, 49 | }); 50 | 51 | export const db = registry.use(config.npmRegistryDBName); 52 | const registryQueue = new PQueue({ intervalCap: 6, interval: 1000 }); 53 | const downloadsQueue = new PQueue({ intervalCap: 6, interval: 1000 }); 54 | 55 | /** 56 | * Find all packages in registry. 57 | */ 58 | async function findAll( 59 | options: Partial 60 | ): Promise> { 61 | const start = Date.now(); 62 | 63 | const results = await db.list({ 64 | ...options, 65 | }); 66 | 67 | datadog.timing('db.allDocs', Date.now() - start); 68 | 69 | return results; 70 | } 71 | 72 | async function getDocFromRegistry(name: string): Promise { 73 | const start = Date.now(); 74 | 75 | try { 76 | const doc = await registryQueue.add(() => 77 | request(`${config.npmRootEndpoint}/${name}`, { 78 | responseType: 'json', 79 | }) 80 | ); 81 | 82 | // Package without versions means it was unpublished. 83 | // Treat it the same as if it was not found at all. 84 | if (_.isEmpty(doc.body.versions)) { 85 | throw new PackageNotFoundError(); 86 | } 87 | 88 | return doc.body; 89 | } catch (e) { 90 | if (e instanceof HTTPError && e.response.statusCode === 404) { 91 | throw new PackageNotFoundError(); 92 | } 93 | 94 | throw e; 95 | } finally { 96 | datadog.timing('npm.getDocRegistry.one', Date.now() - start); 97 | } 98 | } 99 | 100 | /** 101 | * Get info about registry. 102 | */ 103 | async function getInfo(): Promise<{ nbDocs: number; seq: number }> { 104 | const start = Date.now(); 105 | 106 | const { 107 | body: { doc_count: nbDocs, update_seq: seq }, 108 | } = await request( 109 | `${config.npmRegistryEndpoint}/${config.npmRegistryDBName}/`, 110 | { 111 | headers: { 112 | 'npm-replication-opt-in': 'true', // See https://github.com/orgs/community/discussions/152515 113 | }, 114 | responseType: 'json', 115 | } 116 | ); 117 | 118 | datadog.timing('npm.info', Date.now() - start); 119 | 120 | return { 121 | nbDocs, 122 | seq, 123 | }; 124 | } 125 | 126 | /** 127 | * Get list of packages that depends of them. 128 | * 129 | * @param pkgs - Package list. 130 | */ 131 | function getDependents( 132 | pkgs: Array> 133 | ): Promise { 134 | // we return 0, waiting for https://github.com/npm/registry/issues/361 135 | return Promise.all(pkgs.map(getDependent)); 136 | } 137 | 138 | function getDependent(_pkg: Pick): GetDependent { 139 | return { dependents: 0, humanDependents: '0' }; 140 | } 141 | 142 | async function loadTotalDownloads(): Promise { 143 | const start = Date.now(); 144 | 145 | const { 146 | body: { downloads: totalNpmDownloadsPerDay }, 147 | } = await request<{ downloads: Array<{ downloads: number }> }>( 148 | `${config.npmDownloadsEndpoint}/range/last-month`, 149 | { 150 | responseType: 'json', 151 | } 152 | ); 153 | 154 | const total = totalNpmDownloadsPerDay.reduce( 155 | (agg, { downloads: dayDownloads }) => agg + dayDownloads, 156 | 0 157 | ); 158 | 159 | cacheTotalDownloads.date = start; 160 | cacheTotalDownloads.total = total; 161 | 162 | datadog.timing('npm.loadTotalDownloads', Date.now() - start); 163 | } 164 | 165 | /** 166 | * Get total npm downloads. 167 | */ 168 | async function getTotalDownloads(): Promise { 169 | return cacheTotalDownloads.total; 170 | } 171 | 172 | /** 173 | * Get download stats for a list of packages. 174 | */ 175 | async function fetchDownload( 176 | pkgNames: string, 177 | retry: number = 0 178 | ): Promise> { 179 | const start = Date.now(); 180 | 181 | try { 182 | const response = await downloadsQueue.add(() => { 183 | datadog.increment('npm.downloads.requests'); 184 | 185 | return request>( 186 | `${config.npmDownloadsEndpoint}/point/last-month/${pkgNames}`, 187 | { 188 | responseType: 'json', 189 | } 190 | ); 191 | }); 192 | 193 | if (response.statusCode !== 200 || !response.body) { 194 | return {}; 195 | } 196 | 197 | // Single package 198 | if (response.body.downloads) { 199 | return { 200 | [response.body.package as string]: { 201 | packageNpmDownloads: response.body?.downloads as number, 202 | }, 203 | }; 204 | } 205 | 206 | return _.mapValues(response.body, (record) => { 207 | return { 208 | packageNpmDownloads: 209 | (typeof record === 'object' && record?.downloads) || undefined, 210 | }; 211 | }); 212 | } catch (error) { 213 | if ( 214 | error instanceof HTTPError && 215 | (error.response.statusCode === 429 || error.response.statusCode >= 500) 216 | ) { 217 | datadog.increment(`npm.downloads.throttle`); 218 | 219 | if (!downloadsQueue.isPaused) { 220 | downloadsQueue.pause(); 221 | setTimeout(() => downloadsQueue.start(), ms('1 minute')).unref(); 222 | } 223 | 224 | if (retry < config.retryMax) { 225 | return fetchDownload(pkgNames, retry + 1); 226 | } 227 | } 228 | 229 | if (error instanceof HTTPError && error.response.statusCode === 404) { 230 | return {}; 231 | } 232 | 233 | datadog.increment(`npm.downloads.failure`); 234 | log.warn(`An error occurred when getting download of ${pkgNames} ${error}`); 235 | throw error; 236 | } finally { 237 | datadog.timing('npm.fetchDownload', Date.now() - start); 238 | } 239 | } 240 | 241 | export function computeDownload( 242 | pkg: Pick, 243 | downloadsLast30Days: number | undefined, 244 | totalNpmDownloads: number | undefined 245 | ): GetDownload | null { 246 | if (!downloadsLast30Days || !totalNpmDownloads) { 247 | return null; 248 | } 249 | 250 | const downloadsRatio = Number( 251 | ((downloadsLast30Days / totalNpmDownloads) * 100).toFixed(4) 252 | ); 253 | const popular = downloadsRatio > config.popularDownloadsRatio; 254 | const downloadsMagnitude = downloadsLast30Days 255 | ? downloadsLast30Days.toString().length 256 | : 0; 257 | 258 | return { 259 | downloadsLast30Days, 260 | humanDownloadsLast30Days: numeral(downloadsLast30Days).format('0.[0]a'), 261 | downloadsRatio, 262 | popular, 263 | _downloadsMagnitude: downloadsMagnitude, 264 | // if the package is popular, we copy its name to a dedicated attribute 265 | // which will make popular records' `name` matches to be ranked higher than other matches 266 | // see the `searchableAttributes` index setting 267 | ...(popular && { 268 | _popularName: pkg.name, 269 | }), 270 | }; 271 | } 272 | 273 | /** 274 | * Get downloads for all packages passer in arguments. 275 | */ 276 | async function getDownloads( 277 | pkgs: Array> 278 | ): Promise> { 279 | const start = Date.now(); 280 | 281 | if (pkgs.length > 1 && pkgs.some((pkg) => pkg.name.startsWith('@'))) { 282 | throw new Error( 283 | `Scoped packages can only be requested separately, one at a time.` 284 | ); 285 | } 286 | 287 | const encodedPackageNames = pkgs 288 | .map((pkg) => pkg.name) 289 | .map((name) => encodeURIComponent(name)); 290 | 291 | if (encodedPackageNames.length > 1) { 292 | // why do we do this? see https://github.com/npm/registry/issues/104 293 | encodedPackageNames.unshift(''); 294 | } 295 | 296 | const totalNpmDownloads = await getTotalDownloads(); 297 | const packageNpmDownloads = await fetchDownload( 298 | encodedPackageNames.join(',') 299 | ); 300 | 301 | datadog.timing('npm.getDownloads', Date.now() - start); 302 | 303 | return _.mapValues( 304 | _.pickBy(packageNpmDownloads, (value, key) => key), 305 | (pkg) => { 306 | return { ...pkg, totalNpmDownloads }; 307 | } 308 | ); 309 | } 310 | 311 | export { 312 | findAll, 313 | loadTotalDownloads, 314 | getInfo, 315 | getDocFromRegistry, 316 | getDependents, 317 | getDependent, 318 | fetchDownload, 319 | getDownloads, 320 | }; 321 | -------------------------------------------------------------------------------- /src/npm/types.ts: -------------------------------------------------------------------------------- 1 | import type { DocumentLookupFailure } from 'nano'; 2 | 3 | export interface PackageDownload { 4 | downloads: number; 5 | package: string; 6 | // start: string; 7 | // end: string; 8 | } 9 | 10 | export interface GetInfo { 11 | doc_count: number; 12 | update_seq: number; 13 | } 14 | 15 | export interface GetUser { 16 | name: string; 17 | email?: string; 18 | } 19 | 20 | export interface GetVersion { 21 | _from?: string; 22 | _id?: string; 23 | _npmUser?: GetUser; 24 | _npmVersion?: string; 25 | _nodeVersion?: string; 26 | _npmOperationalInternal?: Record; 27 | _shasum?: string; 28 | _resolved?: string; 29 | author?: GetUser; 30 | description?: string; 31 | dist?: { 32 | shasum: string; 33 | tarball: string; 34 | integrity?: string; 35 | [key: string]: any | undefined; 36 | }; 37 | config?: { 38 | access?: 'public'; 39 | }; 40 | license?: string; 41 | 42 | type?: 'commonjs' | 'module'; 43 | module?: string; 44 | main?: string; 45 | exports?: PackageExports; 46 | 47 | repository?: PackageRepo; 48 | maintainers?: GetUser[]; 49 | name: string; 50 | scripts?: Record; 51 | version: string; 52 | deprecated?: boolean | string; 53 | schematics?: string; 54 | types?: string; 55 | typings?: string; 56 | style?: string; 57 | dependencies?: Record; 58 | devDependencies?: Record; 59 | peerDependencies?: Record; 60 | optionalDependencies?: Record; 61 | gitHead?: string; 62 | bugs?: { url: string }; 63 | homepage?: string; 64 | files?: string[]; 65 | keywords?: string[]; 66 | 67 | [key: string]: any; 68 | } 69 | 70 | export interface PackageRepo { 71 | type: string; 72 | url: string; 73 | directory?: string; 74 | } 75 | 76 | export interface PackageExports { 77 | [key: string]: PackageExports | string; 78 | } 79 | 80 | export interface GetPackage { 81 | _id: string; 82 | _rev: string; 83 | 'dist-tags': { [key: string]: string }; 84 | license?: string; 85 | maintainers: GetUser[]; 86 | name: string; 87 | description?: string; 88 | homepage?: string; 89 | bugs?: { url: string }; 90 | readme: string; 91 | readmeFilename: string; 92 | time: { 93 | created: string; 94 | modified: string; 95 | [key: string]: string; 96 | }; 97 | author?: GetUser; 98 | users?: Record; 99 | versions: Record; 100 | keywords?: string[] | string; 101 | contributors?: Array<{ name: string }>; 102 | repository?: PackageRepo; 103 | schematics?: string; 104 | types?: string; 105 | typings?: string; 106 | 107 | [key: string]: any; 108 | } 109 | 110 | export interface GetPackageLight { 111 | name: string; 112 | 'dist-tags': Record; 113 | versions: Record>; 114 | modified: string; 115 | } 116 | 117 | export function isFailure(change: any): change is DocumentLookupFailure { 118 | return change.error && !change.id; 119 | } 120 | -------------------------------------------------------------------------------- /src/saveDocs.ts: -------------------------------------------------------------------------------- 1 | import type { SearchIndex } from 'algoliasearch'; 2 | 3 | import type { FinalPkg, RawPkg } from './@types/pkg'; 4 | import { getChangelog } from './changelog'; 5 | import { config } from './config'; 6 | import type { OneTimeDataObject } from './indexers/OneTimeBackgroundIndexer'; 7 | import type { PeriodicDataObject } from './indexers/PeriodicBackgroundIndexer'; 8 | import * as jsDelivr from './jsDelivr'; 9 | import { getModuleTypes, getStyleTypes } from './jsDelivr/pkgTypes'; 10 | import * as npm from './npm'; 11 | import { computeDownload } from './npm'; 12 | import { getTypeScriptSupport } from './typescript'; 13 | import { datadog } from './utils/datadog'; 14 | import { offsetToTimestamp, round } from './utils/time'; 15 | 16 | export async function saveDoc({ 17 | formatted, 18 | index, 19 | oneTimeDataIndex, 20 | periodicDataIndex, 21 | }: { 22 | formatted: RawPkg; 23 | index: SearchIndex; 24 | oneTimeDataIndex: SearchIndex; 25 | periodicDataIndex: SearchIndex; 26 | }): Promise { 27 | const start = Date.now(); 28 | const pkg = await addMetaData(formatted, oneTimeDataIndex, periodicDataIndex); 29 | 30 | const start2 = Date.now(); 31 | await index.saveObject(pkg); 32 | datadog.timing('saveDocs.saveObject.one', Date.now() - start2); 33 | 34 | datadog.timing('saveDocs.one', Date.now() - start); 35 | } 36 | 37 | async function addMetaData( 38 | pkg: RawPkg, 39 | oneTimeDataIndex: SearchIndex, 40 | periodicDataIndex: SearchIndex 41 | ): Promise { 42 | const start = Date.now(); 43 | let periodicDataUpdatedAt = 0; 44 | let download; 45 | 46 | if (pkg.isSecurityHeld) { 47 | return pkg; 48 | } 49 | 50 | const [dependent, hit] = [npm.getDependent(pkg), jsDelivr.getHit(pkg)]; 51 | const { filelist, metadata } = await getFileListMetadata(pkg); 52 | 53 | let hasAllOneTimeData = Boolean(metadata.changelogFilename); 54 | let needsOneTimeReindex = !hasAllOneTimeData || !filelist.length; 55 | 56 | if (!hasAllOneTimeData) { 57 | try { 58 | const data = await oneTimeDataIndex.getObject( 59 | `${pkg.name}@${pkg.version}` 60 | ); 61 | 62 | datadog.increment('oneTimeDataIndex.hit'); 63 | 64 | if (!metadata.changelogFilename) { 65 | metadata.changelogFilename = data.changelogFilename; 66 | } 67 | 68 | hasAllOneTimeData = true; 69 | needsOneTimeReindex = !hasAllOneTimeData || !filelist.length; 70 | } catch { 71 | datadog.increment('oneTimeDataIndex.miss'); 72 | } 73 | } 74 | 75 | try { 76 | const data = await periodicDataIndex.getObject( 77 | pkg.name 78 | ); 79 | 80 | datadog.increment('periodicDataIndex.hit'); 81 | 82 | download = computeDownload( 83 | pkg, 84 | data.packageNpmDownloads, 85 | data.totalNpmDownloads 86 | ); 87 | 88 | periodicDataUpdatedAt = round(new Date(data.updatedAt)).valueOf(); 89 | } catch { 90 | datadog.increment('periodicDataIndex.miss'); 91 | } 92 | 93 | const final = { 94 | ...pkg, 95 | ...(download || {}), 96 | ...dependent, 97 | ...metadata, 98 | ...hit, 99 | popular: download?.popular || hit.popular, 100 | _oneTimeDataToUpdateAt: needsOneTimeReindex ? offsetToTimestamp(0) : 0, 101 | _periodicDataUpdatedAt: periodicDataUpdatedAt, 102 | _searchInternal: { 103 | ...pkg._searchInternal, 104 | }, 105 | }; 106 | 107 | final._searchInternal.popularAlternativeNames = 108 | getPopularAlternativeNames(final); 109 | 110 | datadog.timing('saveDocs.addMetaData.one', Date.now() - start); 111 | return final; 112 | } 113 | 114 | export async function getFileListMetadata(pkg: RawPkg): Promise<{ 115 | filelist: Awaited>; 116 | metadata: Awaited> & 117 | Awaited> & 118 | Awaited> & 119 | Awaited>; 120 | }> { 121 | const filelist = await jsDelivr.getFilesList(pkg); 122 | 123 | const [changelog, ts, moduleTypes, styleTypes] = await Promise.all([ 124 | getChangelog(pkg, filelist), 125 | getTypeScriptSupport(pkg, filelist), 126 | getModuleTypes(pkg, filelist), 127 | getStyleTypes(pkg, filelist), 128 | ]); 129 | 130 | return { 131 | filelist, 132 | metadata: { 133 | ...changelog, 134 | ...ts, 135 | ...moduleTypes, 136 | ...styleTypes, 137 | }, 138 | }; 139 | } 140 | 141 | export function getPopularAlternativeNames(pkg: FinalPkg): string[] { 142 | const hasFewDownloads = 143 | pkg.downloadsLast30Days <= config.alternativeNamesNpmDownloadsThreshold && 144 | pkg.jsDelivrHits <= config.alternativeNamesJsDelivrHitsThreshold; 145 | 146 | const addPopularAlternativeNames = 147 | pkg.popular || 148 | (!pkg.isDeprecated && !pkg.isSecurityHeld && !hasFewDownloads); 149 | 150 | return addPopularAlternativeNames ? pkg._searchInternal.alternativeNames : []; 151 | } 152 | -------------------------------------------------------------------------------- /src/typescript/index.test.ts: -------------------------------------------------------------------------------- 1 | import * as api from './index'; 2 | 3 | jest.setTimeout(15000); 4 | 5 | describe('loadTypesIndex()', () => { 6 | it('should download and cache all @types', async () => { 7 | expect(api.typesCache).not.toHaveProperty('algoliasearch'); 8 | expect(api.isDefinitelyTyped({ name: 'algoliasearch' })).toBeUndefined(); 9 | 10 | await api.loadTypesIndex(); 11 | expect(api.typesCache).toHaveProperty('algoliasearch'); 12 | expect(api.typesCache).not.toHaveProperty('algoliasearch/lite'); 13 | 14 | expect(api.typesCache.algoliasearch).toBe('algoliasearch'); 15 | expect(api.typesCache['algoliasearch/lite']).toBeUndefined(); 16 | expect(api.typesCache.doesnotexist).toBeUndefined(); 17 | 18 | expect(api.isDefinitelyTyped({ name: 'algoliasearch' })).toBe( 19 | 'algoliasearch' 20 | ); 21 | }); 22 | }); 23 | 24 | describe('getTypeScriptSupport()', () => { 25 | it('If types are already calculated - return early', () => { 26 | const typesSupport = api.getTypeScriptSupport( 27 | { 28 | name: 'Has Types', 29 | types: { ts: 'included' }, 30 | version: '1.0', 31 | }, 32 | [] 33 | ); 34 | 35 | expect(typesSupport).toEqual({ types: { ts: 'included' } }); 36 | }); 37 | 38 | it('Handles not having any possible TS types', () => { 39 | const typesSupport = api.getTypeScriptSupport( 40 | { 41 | name: 'my-lib', 42 | types: { ts: false }, 43 | version: '1.0', 44 | }, 45 | [] 46 | ); 47 | expect(typesSupport).toEqual({ types: { ts: false } }); 48 | }); 49 | 50 | describe('Definitely Typed', () => { 51 | it('Checks for @types/[name]', () => { 52 | const atTypesSupport = api.getTypeScriptSupport( 53 | { 54 | name: 'lodash.valuesin', 55 | types: { ts: false }, 56 | version: '1.0', 57 | }, 58 | [] 59 | ); 60 | expect(atTypesSupport).toEqual({ 61 | types: { 62 | ts: 'definitely-typed', 63 | definitelyTyped: '@types/lodash.valuesin', 64 | }, 65 | }); 66 | }); 67 | 68 | it('Checks for @types/[scope__name]', () => { 69 | const atTypesSupport = api.getTypeScriptSupport( 70 | { 71 | name: '@mapbox/geojson-area', 72 | types: { ts: false }, 73 | version: '1.0', 74 | }, 75 | [] 76 | ); 77 | expect(atTypesSupport).toEqual({ 78 | types: { 79 | ts: 'definitely-typed', 80 | definitelyTyped: '@types/mapbox__geojson-area', 81 | }, 82 | }); 83 | 84 | const atTypesSupport2 = api.getTypeScriptSupport( 85 | { 86 | name: '@reach/router', 87 | types: { ts: false }, 88 | version: '1.0', 89 | }, 90 | [] 91 | ); 92 | expect(atTypesSupport2).toEqual({ 93 | types: { 94 | ts: 'definitely-typed', 95 | definitelyTyped: '@types/reach__router', 96 | }, 97 | }); 98 | }); 99 | }); 100 | 101 | describe('FilesList', () => { 102 | it('should match a correct filesList', () => { 103 | const atTypesSupport = api.getTypeScriptSupport( 104 | { 105 | name: 'doesnotexist', 106 | types: { ts: false }, 107 | version: '1.0', 108 | }, 109 | [ 110 | { name: 'index.js', hash: '', time: '', size: 0 }, 111 | { name: 'index.d.ts', hash: '', time: '', size: 0 }, 112 | ] 113 | ); 114 | expect(atTypesSupport).toEqual({ 115 | types: { 116 | ts: 'included', 117 | }, 118 | }); 119 | }); 120 | 121 | it('should not match an incorrect filesList', () => { 122 | const atTypesSupport = api.getTypeScriptSupport( 123 | { 124 | name: 'doesnotexist', 125 | types: { ts: false }, 126 | version: '1.0', 127 | }, 128 | [ 129 | { name: 'index.js', hash: '', time: '', size: 0 }, 130 | { name: 'index.ts', hash: '', time: '', size: 0 }, 131 | { name: 'index.md', hash: '', time: '', size: 0 }, 132 | ] 133 | ); 134 | expect(atTypesSupport).toEqual({ 135 | types: { 136 | ts: false, 137 | }, 138 | }); 139 | }); 140 | }); 141 | }); 142 | -------------------------------------------------------------------------------- /src/typescript/index.ts: -------------------------------------------------------------------------------- 1 | import type { RawPkg } from '../@types/pkg'; 2 | import { config } from '../config'; 3 | import type { File } from '../jsDelivr'; 4 | import { datadog } from '../utils/datadog'; 5 | import { log } from '../utils/log'; 6 | import { request } from '../utils/request'; 7 | 8 | export const typesCache: Record = Object.create(null); 9 | 10 | type TypesEntry = { 11 | p: string | null; // package repo 12 | l: string | null; // package name 13 | t: string; // @types package name 14 | }; 15 | 16 | /** 17 | * Microsoft build a index.json with all @types/* on each publication. 18 | * - https://github.com/microsoft/types-publisher/blob/master/src/create-search-index.ts. 19 | */ 20 | export async function loadTypesIndex(): Promise { 21 | const start = Date.now(); 22 | 23 | const { body: data } = await request( 24 | config.typescriptTypesIndex, 25 | { 26 | decompress: true, 27 | responseType: 'json', 28 | } 29 | ); 30 | 31 | log.info(`📦 Typescript preload, found ${data.length} @types`); 32 | 33 | data.forEach((entry) => { 34 | if (entry.l) { 35 | typesCache[entry.l] = entry.t; 36 | } 37 | }); 38 | 39 | datadog.timing('typescript.loadTypesIndex', Date.now() - start); 40 | } 41 | 42 | export function isDefinitelyTyped({ name }): string | undefined { 43 | return typesCache[name]; 44 | } 45 | 46 | /** 47 | * Basically either 48 | * - { types: { ts: false }} for no existing TypeScript support 49 | * - { types: { ts: "@types/module" }} - for definitely typed support 50 | * - { types: { ts: "included" }} - for types shipped with the module. 51 | */ 52 | export function getTypeScriptSupport( 53 | pkg: Pick, 54 | filelist: File[] 55 | ): Pick { 56 | const start = Date.now(); 57 | 58 | try { 59 | // Already calculated in `formatPkg` 60 | if (pkg.types.ts === 'included') { 61 | return { types: pkg.types }; 62 | } 63 | 64 | for (const file of filelist) { 65 | if (!file.name.endsWith('.d.ts')) { 66 | continue; 67 | } 68 | 69 | datadog.increment('jsdelivr.getTSSupport.hit'); 70 | 71 | return { types: { ts: 'included' } }; 72 | } 73 | 74 | // The 2nd most likely is definitely typed 75 | const defTyped = isDefinitelyTyped({ name: pkg.name }); 76 | if (defTyped) { 77 | return { 78 | types: { 79 | ts: 'definitely-typed', 80 | definitelyTyped: `@types/${defTyped}`, 81 | }, 82 | }; 83 | } 84 | datadog.increment('jsdelivr.getTSSupport.miss'); 85 | 86 | return { types: { ts: false } }; 87 | } finally { 88 | datadog.timing('typescript.getSupport', Date.now() - start); 89 | } 90 | } 91 | 92 | /** 93 | * Check if packages have Typescript definitions. 94 | */ 95 | export async function getTSSupport( 96 | pkgs: Array>, 97 | filelists: File[][] 98 | ): Promise>> { 99 | const start = Date.now(); 100 | 101 | const all = await Promise.all( 102 | pkgs.map((pkg, index) => { 103 | return getTypeScriptSupport(pkg, filelists[index] || []); 104 | }) 105 | ); 106 | 107 | datadog.timing('getTSSupport', Date.now() - start); 108 | return all; 109 | } 110 | -------------------------------------------------------------------------------- /src/utils/MetricCollector.ts: -------------------------------------------------------------------------------- 1 | import type { Agent } from 'elastic-apm-node'; 2 | import _ from 'lodash'; 3 | 4 | class MetricCollector { 5 | private client: Agent; 6 | private readonly events: { [k: string]: number }; 7 | private readonly timings: { [k: string]: number[] }; 8 | private timingsToClear: Set; 9 | 10 | constructor(client) { 11 | this.client = client; 12 | this.events = Object.create(null); 13 | this.timings = Object.create(null); 14 | this.timingsToClear = new Set(); 15 | } 16 | 17 | increment(event: string, count: number = 1): this { 18 | this.logEvent(event, count); 19 | return this; 20 | } 21 | 22 | gauge(name: string, value: number): this { 23 | if (this.timings[name] === undefined) { 24 | this.registerTiming(name); 25 | } 26 | 27 | this.timings[name] = [value]; 28 | return this; 29 | } 30 | 31 | logEvent(event: string, count: number = 1): this { 32 | if (this.events[event] === undefined) { 33 | this.registerEvent(event); 34 | } 35 | 36 | this.events[event] += count; 37 | return this; 38 | } 39 | 40 | timing(timing: string, duration: number): this { 41 | if (this.timings[timing] === undefined) { 42 | this.registerTiming(timing); 43 | } 44 | 45 | if (this.timingsToClear.has(timing)) { 46 | this.timingsToClear.delete(timing); 47 | this.timings[timing] = []; 48 | } 49 | 50 | this.timings[timing]!.push(duration); 51 | return this; 52 | } 53 | 54 | private registerEvent(event: string): void { 55 | this.events[event] = 0; 56 | 57 | // istanbul ignore if 58 | if (this.client.isStarted()) { 59 | this.client.registerMetric(`npmSearch.${event}`, () => { 60 | const value = this.events[event]; 61 | this.events[event] = 0; 62 | return value; 63 | }); 64 | } 65 | } 66 | 67 | private registerTiming(timing: string): void { 68 | this.timings[timing] = []; 69 | 70 | // istanbul ignore if 71 | if (this.client.isStarted()) { 72 | this.client.registerMetric(`npmSearch.${timing}`, () => { 73 | this.timingsToClear.add(timing); 74 | return _.sum(this.timings[timing]) / this.timings[timing]!.length; 75 | }); 76 | } 77 | } 78 | } 79 | 80 | export default MetricCollector; 81 | -------------------------------------------------------------------------------- /src/utils/datadog.ts: -------------------------------------------------------------------------------- 1 | import agent from 'elastic-apm-node'; 2 | 3 | import MetricCollector from './MetricCollector'; 4 | 5 | export const datadog = new MetricCollector(agent); 6 | -------------------------------------------------------------------------------- /src/utils/log.ts: -------------------------------------------------------------------------------- 1 | import bunyan from 'bunyan'; 2 | import bunyanDebugStream from 'bunyan-debug-stream'; 3 | 4 | const stream = bunyanDebugStream({ 5 | showDate: process.env.NODE_ENV !== 'production', 6 | showProcess: false, 7 | showLoggerName: false, 8 | showPid: false, 9 | showLevel: process.env.NODE_ENV === 'production', 10 | }); 11 | 12 | export const log = bunyan.createLogger({ 13 | name: 'npm-search', 14 | streams: [ 15 | { 16 | level: 'info', 17 | type: 'raw', 18 | stream, 19 | }, 20 | ], 21 | serializers: bunyanDebugStream.serializers, 22 | }); 23 | -------------------------------------------------------------------------------- /src/utils/request.ts: -------------------------------------------------------------------------------- 1 | import http from 'http'; 2 | import https from 'https'; 3 | 4 | import type { OptionsOfJSONResponseBody } from 'got'; 5 | import got from 'got'; 6 | 7 | import { config } from '../config'; 8 | 9 | // eslint-disable-next-line @typescript-eslint/no-var-requires, import/no-commonjs 10 | const { version } = require('../../package.json'); 11 | 12 | export const USER_AGENT = `Algolia npm-search/${version} (https://github.com/algolia/npm-search)`; 13 | 14 | const options: http.AgentOptions = { 15 | keepAlive: true, 16 | timeout: 60000, 17 | maxFreeSockets: 2000, 18 | scheduling: 'fifo', 19 | }; 20 | 21 | // The agents will pool TCP connections 22 | export const httpAgent = new http.Agent(options); 23 | export const httpsAgent = new https.Agent(options); 24 | 25 | // eslint-disable-next-line @typescript-eslint/explicit-function-return-type 26 | export async function request( 27 | url: string, 28 | opts: OptionsOfJSONResponseBody 29 | ) { 30 | return await got(url, { 31 | timeout: config.defaultRequestTimeout, 32 | ...opts, 33 | headers: { 34 | ...(opts.headers || {}), 35 | 'user-agent': USER_AGENT, 36 | }, 37 | dnsCache: true, 38 | dnsLookupIpVersion: 'ipv4', 39 | agent: { 40 | http: httpAgent, 41 | https: httpsAgent, 42 | }, 43 | }); 44 | } 45 | -------------------------------------------------------------------------------- /src/utils/sentry.ts: -------------------------------------------------------------------------------- 1 | import agent from 'elastic-apm-node'; 2 | 3 | import { log } from './log'; 4 | 5 | export function report(err: any, extra: any = {}): void { 6 | const logErr = [504].includes(err.statusCode) 7 | ? { statusCode: err.statusCode } 8 | : err; 9 | 10 | const logXtr = [504].includes(extra.err?.statusCode) 11 | ? { err: { statusCode: extra.err.statusCode } } 12 | : extra; 13 | 14 | log.error(logErr, logXtr); 15 | agent.captureError(err, { custom: extra }); 16 | } 17 | 18 | export async function drain(): Promise { 19 | return agent.flush(); 20 | } 21 | -------------------------------------------------------------------------------- /src/utils/time.ts: -------------------------------------------------------------------------------- 1 | export function offsetToTimestamp( 2 | offset: number, 3 | now: Date | number = Date.now() 4 | ): number { 5 | return round(now.valueOf() + offset).valueOf(); 6 | } 7 | 8 | export function round(date: Date | number): Date { 9 | const d = new Date(date); 10 | d.setUTCHours(0, 0, 0, 0); 11 | return d; 12 | } 13 | -------------------------------------------------------------------------------- /src/utils/wait.ts: -------------------------------------------------------------------------------- 1 | import { setTimeout } from 'node:timers/promises'; 2 | 3 | import { log } from './log'; 4 | 5 | export async function backoff( 6 | retry: number, 7 | pow: number, 8 | max: number 9 | ): Promise { 10 | // retry backoff 11 | const bo = Math.min(Math.pow(retry + 1, pow) * 1000, max); 12 | log.info('Retrying (', retry, '), waiting for', bo); 13 | await setTimeout(bo); 14 | } 15 | -------------------------------------------------------------------------------- /src/watch.ts: -------------------------------------------------------------------------------- 1 | import chalk from 'chalk'; 2 | import type { DatabaseChangesResultItem } from 'nano'; 3 | 4 | import type { StateManager } from './StateManager'; 5 | import type { AlgoliaStore } from './algolia'; 6 | import { config } from './config'; 7 | import { MainWatchIndexer } from './indexers/MainWatchIndexer'; 8 | import { OneTimeBackgroundIndexer } from './indexers/OneTimeBackgroundIndexer'; 9 | import { PeriodicBackgroundIndexer } from './indexers/PeriodicBackgroundIndexer'; 10 | import * as npm from './npm'; 11 | import { ChangesReader } from './npm/ChangesReader'; 12 | import { datadog } from './utils/datadog'; 13 | import { log } from './utils/log'; 14 | import * as sentry from './utils/sentry'; 15 | import { report } from './utils/sentry'; 16 | import { backoff } from './utils/wait'; 17 | 18 | export class Watch { 19 | stateManager: StateManager; 20 | algoliaStore: AlgoliaStore; 21 | // Cached npmInfo.seq 22 | totalSequence: number = 0; 23 | 24 | changesReader: ChangesReader | undefined; 25 | oneTimeIndexer: OneTimeBackgroundIndexer | undefined; 26 | periodicDataIndexer: PeriodicBackgroundIndexer | undefined; 27 | mainWatchIndexer: MainWatchIndexer | undefined; 28 | 29 | constructor(stateManager: StateManager, algoliaStore: AlgoliaStore) { 30 | this.stateManager = stateManager; 31 | this.algoliaStore = algoliaStore; 32 | } 33 | 34 | /** 35 | * Run watch. 36 | * 37 | * --- Watch ? 38 | * Watch is "Long Polled. This mode is not paginated and the event system in CouchDB send 39 | * events as they arrive, which is super cool and reactive. 40 | * One gotcha those events arrive at the same rate whether you are watching the last seq or not. 41 | * 42 | * Example: 43 | * listener A - up to date 44 | * listener B - few sequences behind. 45 | * 46 | * Package C is updated. 47 | * 48 | * Listener A receive update C 49 | * listener B receive update N. 50 | * 51 | * Listener A is up to date again 52 | * listener B is still few sequences behind and will not receive any other event 53 | * until an other package is updated. 54 | * It will never be up to date because he receive event at the same pace 55 | * as they arrive in listener A, even if it's not the same package. 56 | */ 57 | async run(): Promise { 58 | log.info('-----'); 59 | log.info('🚀 Watch: starting'); 60 | log.info('-----'); 61 | 62 | await this.stateManager.save({ 63 | stage: 'watch', 64 | }); 65 | 66 | setInterval(() => { 67 | npm.registry 68 | .request('') 69 | .then((info) => { 70 | this.totalSequence = Number(info.update_seq); 71 | }) 72 | .catch(() => {}); 73 | }, 5000).unref(); 74 | 75 | this.oneTimeIndexer = new OneTimeBackgroundIndexer( 76 | this.algoliaStore, 77 | this.algoliaStore.mainIndex 78 | ); 79 | 80 | this.periodicDataIndexer = new PeriodicBackgroundIndexer( 81 | this.algoliaStore, 82 | this.algoliaStore.mainIndex, 83 | this.algoliaStore.mainNotFoundIndex 84 | ); 85 | 86 | this.mainWatchIndexer = new MainWatchIndexer(this.algoliaStore); 87 | 88 | this.oneTimeIndexer.run(); 89 | this.periodicDataIndexer.run(); 90 | this.mainWatchIndexer.run(); 91 | 92 | await this.launchChangeReader(); 93 | } 94 | 95 | async stop(): Promise { 96 | log.info('Stopping Watch...'); 97 | 98 | try { 99 | this.changesReader?.stop?.(); 100 | await this.oneTimeIndexer?.stop?.(); 101 | await this.periodicDataIndexer?.stop?.(); 102 | await this.mainWatchIndexer?.stop?.(); 103 | } catch (err) { 104 | sentry.report(err); 105 | } 106 | 107 | log.info('Stopped Watch gracefully'); 108 | } 109 | 110 | async launchChangeReader(): Promise { 111 | const { seq: since } = await this.stateManager.get(); 112 | 113 | log.info(`listening from ${since}...`); 114 | 115 | const reader = new ChangesReader({ since: String(since) }); 116 | 117 | reader 118 | .on('batch', (batch: DatabaseChangesResultItem[]) => { 119 | const changes = Array.from( 120 | batch 121 | .filter((change) => change.id) 122 | .reduce((acc, change) => { 123 | return acc.set(change.id, change); 124 | }, new Map()) 125 | .values() 126 | ); 127 | 128 | if (!changes.length) { 129 | return; 130 | } 131 | 132 | const storeChanges = async (retry = 0): Promise => { 133 | try { 134 | await this.algoliaStore.mainQueueIndex.saveObjects( 135 | changes.map((change) => ({ 136 | seq: change.seq, 137 | name: change.id, 138 | objectID: change.id, 139 | retries: 0, 140 | change, 141 | })) 142 | ); 143 | } catch (err) { 144 | const newRetry = retry + 1; 145 | log.error('Error adding a change to the queue.', { err }); 146 | 147 | await backoff( 148 | newRetry, 149 | config.retryBackoffPow, 150 | config.retryBackoffMax 151 | ); 152 | 153 | return storeChanges(newRetry); 154 | } 155 | }; 156 | 157 | // We need to move one at a time here, so pause until the change is safely stored. 158 | reader.pause(); 159 | 160 | storeChanges().then(() => { 161 | const seq = changes.at(-1).seq; 162 | 163 | reader.resume(); 164 | this.logProgress(seq).catch(() => {}); 165 | 166 | this.stateManager.save({ seq }).catch((err) => { 167 | report(new Error('Error storing watch progress'), { err }); 168 | }); 169 | }); 170 | }) 171 | .on('error', (err) => { 172 | sentry.report(err); 173 | }) 174 | .run(); 175 | 176 | this.changesReader = reader; 177 | } 178 | 179 | /** 180 | * Log our process through watch. 181 | * 182 | */ 183 | async logProgress(seq: number): Promise { 184 | const queueLength = await this.mainWatchIndexer!.fetchQueueLength(); 185 | 186 | datadog.gauge('sequence.total', this.totalSequence); 187 | datadog.gauge('sequence.current', seq); 188 | datadog.gauge('job.idleCount', queueLength); 189 | 190 | log.info( 191 | chalk.dim.italic 192 | .white`[progress] Synced %d/%d changes (%s%) (%s remaining) (~%s in queue)`, 193 | seq, 194 | this.totalSequence, 195 | ((Math.max(seq, 1) / this.totalSequence) * 100).toFixed(2), 196 | this.totalSequence - seq, 197 | queueLength 198 | ); 199 | } 200 | } 201 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | /* Visit https://aka.ms/tsconfig.json to read more about this file */ 4 | /* Basic Options */ 5 | // "incremental": true, /* Enable incremental compilation */ 6 | "target": "ESNext" /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019', 'ES2020', 'ES2021', or 'ESNEXT'. */, 7 | "module": "commonjs" /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', 'es2020', or 'ESNext'. */, 8 | "lib": [ 9 | "ES6", 10 | "es2015", 11 | "es2017", 12 | "es2019" 13 | ] /* Specify library files to be included in the compilation. */, 14 | "allowJs": true /* Allow javascript files to be compiled. */, 15 | "checkJs": false /* Report errors in .js files. */, 16 | // "jsx": "preserve", /* Specify JSX code generation: 'preserve', 'react-native', 'react', 'react-jsx' or 'react-jsxdev'. */ 17 | "declaration": true /* Generates corresponding '.d.ts' file. */, 18 | "declarationMap": true /* Generates a sourcemap for each corresponding '.d.ts' file. */, 19 | "sourceMap": true /* Generates corresponding '.map' file. */, 20 | // "outFile": "./", /* Concatenate and emit output to single file. */ 21 | "outDir": "./dist" /* Redirect output structure to the directory. */, 22 | "rootDir": "./" /* Specify the root directory of input files. Use to control the output directory structure with --outDir. */, 23 | // "composite": true, /* Enable project compilation */ 24 | // "tsBuildInfoFile": "./", /* Specify file to store incremental compilation information */ 25 | "removeComments": true /* Do not emit comments to output. */, 26 | // "noEmit": true, /* Do not emit outputs. */ 27 | // "importHelpers": true, /* Import emit helpers from 'tslib'. */ 28 | "downlevelIteration": true /* Provide full support for iterables in 'for-of', spread, and destructuring when targeting 'ES5' or 'ES3'. */, 29 | // "isolatedModules": true, /* Transpile each file as a separate module (similar to 'ts.transpileModule'). */ 30 | /* Strict Type-Checking Options */ 31 | "strict": true /* Enable all strict type-checking options. */, 32 | "noImplicitAny": false /* Raise error on expressions and declarations with an implied 'any' type. */, 33 | "strictNullChecks": true /* Enable strict null checks. */, 34 | "strictFunctionTypes": true /* Enable strict checking of function types. */, 35 | "strictBindCallApply": true /* Enable strict 'bind', 'call', and 'apply' methods on functions. */, 36 | "strictPropertyInitialization": true /* Enable strict checking of property initialization in classes. */, 37 | "noImplicitThis": true /* Raise error on 'this' expressions with an implied 'any' type. */, 38 | "alwaysStrict": true /* Parse in strict mode and emit "use strict" for each source file. */, 39 | /* Additional Checks */ 40 | "noUnusedLocals": false /* Report errors on unused locals. */, 41 | "noUnusedParameters": false /* Report errors on unused parameters. */, 42 | "noImplicitReturns": true /* Report error when not all code paths in function return a value. */, 43 | "noFallthroughCasesInSwitch": true /* Report errors for fallthrough cases in switch statement. */, 44 | "noUncheckedIndexedAccess": true /* Include 'undefined' in index signature results */, 45 | "noImplicitOverride": true /* Ensure overriding members in derived classes are marked with an 'override' modifier. */, 46 | // "noPropertyAccessFromIndexSignature": true, /* Require undeclared properties from index signatures to use element accesses. */ 47 | /* Module Resolution Options */ 48 | "moduleResolution": "node" /* Specify module resolution strategy: 'node' (Node.js) or 'classic' (TypeScript pre-1.6). */, 49 | // "baseUrl": "./", /* Base directory to resolve non-absolute module names. */ 50 | // "paths": {}, /* A series of entries which re-map imports to lookup locations relative to the 'baseUrl'. */ 51 | // "rootDirs": [], /* List of root folders whose combined content represents the structure of the project at runtime. */ 52 | // "typeRoots": [], /* List of folders to include type definitions from. */ 53 | // "types": [], /* Type declaration files to be included in compilation. */ 54 | "allowSyntheticDefaultImports": true /* Allow default imports from modules with no default export. This does not affect code emit, just typechecking. */, 55 | "esModuleInterop": true /* Enables emit interoperability between CommonJS and ES Modules via creation of namespace objects for all imports. Implies 'allowSyntheticDefaultImports'. */, 56 | // "preserveSymlinks": true, /* Do not resolve the real path of symlinks. */ 57 | // "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */ 58 | /* Source Map Options */ 59 | // "sourceRoot": "", /* Specify the location where debugger should locate TypeScript files instead of source locations. */ 60 | // "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */ 61 | // "inlineSourceMap": true, /* Emit a single file with source maps instead of having a separate file. */ 62 | // "inlineSources": true, /* Emit the source alongside the sourcemaps within a single file; requires '--inlineSourceMap' or '--sourceMap' to be set. */ 63 | /* Experimental Options */ 64 | // "experimentalDecorators": true, /* Enables experimental support for ES7 decorators. */ 65 | // "emitDecoratorMetadata": true, /* Enables experimental support for emitting type metadata for decorators. */ 66 | /* Advanced Options */ 67 | "skipLibCheck": true /* Skip type checking of declaration files. */, 68 | "forceConsistentCasingInFileNames": true /* Disallow inconsistently-cased references to the same file. */, 69 | "resolveJsonModule": true 70 | }, 71 | "include": ["src/**/*.ts", "package.json"], 72 | "exclude": ["node_modules"] 73 | } 74 | --------------------------------------------------------------------------------