├── .dockerignore
├── .env.example
├── .eslintignore
├── .eslintrc.js
├── .github
└── workflows
│ ├── api-control.yml
│ ├── publish.yml
│ ├── renovate.yml
│ └── validate.yml
├── .gitignore
├── .husky
├── .gitignore
└── pre-commit
├── .nvmrc
├── .prettierrc
├── .renovaterc.json
├── .yarn
├── plugins
│ └── @yarnpkg
│ │ └── plugin-workspace-tools.cjs
└── releases
│ └── yarn-3.2.2.cjs
├── .yarnrc.yml
├── CHANGELOG.md
├── CONTRIBUTING.md
├── Dockerfile
├── LICENSE
├── Procfile
├── README.md
├── algolia.png
├── elastic-apm-node.js
├── jest.config.js
├── jsdelivr.png
├── package.json
├── release.config.js
├── scripts
├── publish-check.mjs
├── publish-docker
├── publish-github
└── test-api-control
├── src
├── @types
│ ├── nice-package.ts
│ └── pkg.ts
├── StateManager.ts
├── __tests__
│ ├── StateManager.test.ts
│ ├── __snapshots__
│ │ └── formatPkg.test.ts.snap
│ ├── api-control
│ │ └── npm.test.ts
│ ├── bootstrap.test.ts
│ ├── changelog.test.ts
│ ├── config.test.ts
│ ├── formatPkg.test.ts
│ ├── preact-simplified.ts
│ ├── rawPackages.ts
│ └── saveDocs.test.ts
├── algolia
│ └── index.ts
├── api.ts
├── bootstrap.ts
├── changelog.ts
├── config.ts
├── errors.ts
├── formatPkg.ts
├── index.ts
├── indexers
│ ├── Indexer.ts
│ ├── MainBootstrapIndexer.ts
│ ├── MainIndexer.ts
│ ├── MainWatchIndexer.ts
│ ├── OneTimeBackgroundIndexer.ts
│ └── PeriodicBackgroundIndexer.ts
├── jsDelivr
│ ├── __test__
│ │ ├── __snapshots__
│ │ │ └── index.test.ts.snap
│ │ ├── index.test.ts
│ │ └── pkgTypes.test.ts
│ ├── index.ts
│ └── pkgTypes.ts
├── npm
│ ├── ChangesReader.ts
│ ├── Prefetcher.ts
│ ├── __tests__
│ │ └── index.test.ts
│ ├── index.ts
│ └── types.ts
├── saveDocs.ts
├── typescript
│ ├── index.test.ts
│ └── index.ts
├── utils
│ ├── MetricCollector.ts
│ ├── datadog.ts
│ ├── log.ts
│ ├── request.ts
│ ├── sentry.ts
│ ├── time.ts
│ └── wait.ts
└── watch.ts
├── tsconfig.json
└── yarn.lock
/.dockerignore:
--------------------------------------------------------------------------------
1 | # Dependencies
2 | **/node_modules
3 |
4 | # Useless and heavy folders
5 | **/dist
6 | coverage/
7 | junit/
8 |
9 | # Logs
10 | **/*.log
11 | **/.env*
12 |
13 | # Other useless files in the image
14 | cypress/
15 | .git/
16 | .github/
17 | .githooks/
18 | .husky/
19 | .nodemon.json
20 | .editorconfig
21 | .gitattributes
22 | .coveralls.yml
23 | .prettierignore
24 | .prettierrc.js
25 | .eslintrc.js
26 | .nvmrc
27 | .npmrc
28 | .eslintignore
29 | .eslinrcjs
30 | .tern-project
31 | cypress.json
32 | cloudbuild.yaml
33 | docker-compose.yml
34 | MAINTAINERS.md
35 | README.md
36 | CHANGELOG.md
37 | CONTRIBUTING.md
38 | **/*.test.ts
39 | **/*.test.tsx
40 | **/*.test.js
41 | **/*.stories.tsx
42 | **/*.spec.ts
43 | **/*.spec.js
44 | **/*.perf.ts
45 | package-lock.json
46 | renovate.json
47 | **/jest*
48 | **/.DS_Store
49 | .vscode
50 | **/.storybook/
51 | **/__fixtures__/
52 | **/__snapshots__/
53 | **/__mocks__/
54 | **/__mock__/
55 | **/__tests__/
56 | **/tsconfig.tsbuildinfo
57 | Procfile
58 | release.config.js
59 |
--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
1 | appId=
2 | apiKey=
3 | indexName=npm-search
4 | bootstrapIndexName=npm-search-bootstrap
5 | DOGSTATSD_HOST="localhost"
6 | SENTRY_DSN=
7 |
--------------------------------------------------------------------------------
/.eslintignore:
--------------------------------------------------------------------------------
1 | lib/
2 | dist/
3 | coverage/
4 |
--------------------------------------------------------------------------------
/.eslintrc.js:
--------------------------------------------------------------------------------
1 | /* eslint-disable import/no-commonjs */
2 |
3 | /**
4 | * @type {import('eslint').Linter.Config}
5 | */
6 | const config = {
7 | extends: ['algolia', 'algolia/jest'],
8 | rules: {
9 | 'no-continue': 'off',
10 | 'valid-jsdoc': 'off',
11 | 'require-await': 'off',
12 | },
13 | overrides: [
14 | {
15 | files: ['**/*.ts'],
16 | extends: ['algolia/typescript'],
17 | parser: '@typescript-eslint/parser',
18 | parserOptions: {
19 | sourceType: 'module',
20 | project: './tsconfig.json',
21 | },
22 | rules: {
23 | 'consistent-return': 'off',
24 | 'no-dupe-class-members': 'off',
25 | 'import/extensions': [
26 | 'error',
27 | {
28 | ignorePackages: true,
29 | pattern: {
30 | js: 'always',
31 | ts: 'never',
32 | },
33 | },
34 | ],
35 | },
36 | },
37 | ],
38 | };
39 |
40 | module.exports = config;
41 |
--------------------------------------------------------------------------------
/.github/workflows/api-control.yml:
--------------------------------------------------------------------------------
1 | name: API CONTROL
2 | on:
3 | schedule:
4 | # Everyday, at 10am
5 | - cron: '0 10 * * *'
6 |
7 | jobs:
8 | api-control:
9 | name: 🛂 API Control
10 | runs-on: ubuntu-latest
11 | steps:
12 | - name: 📁 Checkout code
13 | uses: actions/checkout@v4
14 |
15 | - name: ⚙️ Setup node
16 | uses: actions/setup-node@v3
17 | with:
18 | node-version: '18.18'
19 | cache: 'yarn'
20 |
21 | - name: 📦 Install dependencies
22 | run: yarn install --frozen-lockfile
23 |
24 | - name: 🛂 API Control
25 | run: GITHUB_RUN_ID="${{ github.run_id }}" yarn test:api-control
26 |
--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
1 | name: PUBLISH
2 | run-name: 🚀 Commit on ${{ github.ref_name }}
3 |
4 | on:
5 | push:
6 | branches:
7 | - master
8 |
9 | jobs:
10 | validate:
11 | name: ↪️ Validate
12 | uses: ./.github/workflows/validate.yml
13 | publish:
14 | name: 📦 Publish
15 | needs: validate
16 | runs-on: ubuntu-latest
17 | permissions:
18 | actions: write # To cancel the run
19 | contents: write # To add a new tag and push a new release
20 | packages: write # To add a new Docker package
21 | env:
22 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
23 | RUN_ID: ${{ github.run_id }}
24 |
25 | steps:
26 | - name: 📁 Checkout code
27 | uses: actions/checkout@v4
28 |
29 | - name: ⚙️ Setup node
30 | uses: actions/setup-node@v3
31 | with:
32 | node-version: '18.18'
33 | cache: 'yarn'
34 |
35 | - name: 📦 Install dependencies
36 | run: yarn install --frozen-lockfile
37 |
38 | - name: ❓ Check if a new version needs to be published
39 | id: publish-check
40 | # Note: We can't do:
41 | # echo "SHOULD_PUBLISH=$(yarn publish:check)" >> "$GITHUB_OUTPUT"
42 | # Because the echo command will always be considered a success, even if
43 | # the $(yarn publish:check) fails. This is why we need an intermediate
44 | # variable.
45 | run: >
46 | SHOULD_PUBLISH="$(yarn publish:check)"
47 | && echo "SHOULD_PUBLISH=$SHOULD_PUBLISH" >> "$GITHUB_OUTPUT"
48 |
49 | - name: ⏹️ Cancel run
50 | if: ${{ steps.publish-check.outputs.SHOULD_PUBLISH == 'no' }}
51 | run: gh run cancel "$RUN_ID"
52 |
53 | - name: 🆙 Bump version, tag commit, publish GitHub Release
54 | if: ${{ steps.publish-check.outputs.SHOULD_PUBLISH == 'yes' }}
55 | run: yarn publish:github
56 |
57 | - name: 🐋 Publish Docker image
58 | if: ${{ steps.publish-check.outputs.SHOULD_PUBLISH == 'yes' }}
59 | run: yarn publish:docker
60 |
--------------------------------------------------------------------------------
/.github/workflows/renovate.yml:
--------------------------------------------------------------------------------
1 | name: RENOVATE
2 | on:
3 | schedule:
4 | - cron: '0 14 * * 4'
5 | workflow_dispatch:
6 |
7 | jobs:
8 | test:
9 | runs-on: ubuntu-latest
10 |
11 | steps:
12 | - name: Checkout Repo
13 | uses: actions/checkout@v3
14 |
15 | - name: Renovate Automatic Branch
16 | uses: bodinsamuel/renovate-automatic-branch@v1
17 | with:
18 | github-token: ${{ secrets.GITHUB_TOKEN }}
19 | repo-owner: algolia
20 | repo-name: npm-search
21 | branch-base: master
22 |
--------------------------------------------------------------------------------
/.github/workflows/validate.yml:
--------------------------------------------------------------------------------
1 | name: VALIDATE
2 | run-name: 🤖 Validating code on ${{ github.ref_name }}
3 |
4 | on:
5 | pull_request:
6 | workflow_call:
7 |
8 | jobs:
9 | validate:
10 | name: 🤖 Validate
11 | runs-on: ubuntu-latest
12 | steps:
13 | - name: 📁 Checkout code
14 | uses: actions/checkout@v4
15 |
16 | - name: ⚙️ Setup node
17 | uses: actions/setup-node@v3
18 | with:
19 | node-version: '18.18'
20 | cache: 'yarn'
21 |
22 | - name: 📦 Install dependencies
23 | run: yarn install --frozen-lockfile
24 |
25 | - name: 📝 Lint
26 | run: yarn lint
27 |
28 | - name: 🏗️ Build
29 | run: yarn build
30 |
31 | - name: 🚦 Test
32 | run: yarn test
33 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | coverage/
2 | node_modules/
3 | lib/
4 | .env
5 | junit/
6 | dist/
7 |
8 | *.log
9 |
10 | # https://yarnpkg.com/getting-started/qa#which-files-should-be-gitignored
11 | .yarn/*
12 | !.yarn/releases
13 | !.yarn/plugins
14 |
--------------------------------------------------------------------------------
/.husky/.gitignore:
--------------------------------------------------------------------------------
1 | _
2 |
--------------------------------------------------------------------------------
/.husky/pre-commit:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | . "$(dirname "$0")/_/husky.sh"
3 |
4 | npm run lint
5 | npm run test
6 |
--------------------------------------------------------------------------------
/.nvmrc:
--------------------------------------------------------------------------------
1 | 18.18.0
2 |
--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 | "singleQuote": true,
3 | "trailingComma": "es5"
4 | }
--------------------------------------------------------------------------------
/.renovaterc.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": [
3 | "algolia"
4 | ],
5 | "baseBranches": [
6 | "chore/renovateBaseBranch"
7 | ]
8 | }
9 |
--------------------------------------------------------------------------------
/.yarnrc.yml:
--------------------------------------------------------------------------------
1 | nodeLinker: node-modules
2 |
3 | plugins:
4 | - path: .yarn/plugins/@yarnpkg/plugin-workspace-tools.cjs
5 | spec: "@yarnpkg/plugin-workspace-tools"
6 |
7 | yarnPath: .yarn/releases/yarn-3.2.2.cjs
8 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | ## Dev
4 |
5 | ```sh
6 | cp .env.example .env
7 | # Fill appId and apiKye
8 |
9 | yarn
10 | yarn build:hot
11 | yarn dev
12 | ```
13 |
14 | ## Tests & Lint
15 |
16 | ```sh
17 | yarn test
18 | yarn lint
19 | ```
20 |
21 | ## Env variables
22 |
23 | Everything in [src/config.ts](./src/config.ts) can be overriden via Env vars.
24 | You may want to override at least in your `.env`:
25 |
26 | - `apiKey`: [Algolia](https://www.algolia.com/) apiKey - **required**
27 | - `appId`: [Algolia](https://www.algolia.com/) appId - _default `OFCNCOG2CU`_
28 | - `indexName`: [Algolia](https://www.algolia.com/) indexName - _default `npm-search`_
29 | - `DOGSTATSD_HOST`: Metrics reporting - _default `localhost`_
30 | - `SENTRY_DSN`: Error reporting - _default `empty`_
31 |
32 | ## Releasing New Version
33 |
34 | > This step is done by the CI
35 |
36 | ```sh
37 | GH_TOKEN="token" yarn semantic-release --ci=false
38 | ```
39 |
40 | ## Releasing Docker
41 |
42 | > This step is done by the CI
43 |
44 | ```sh
45 | yarn docker:build
46 | yarn docker:release
47 | ```
48 |
49 | ## Deploying new version
50 |
51 | > Showing for GCP, but the image can be used anywhere
52 |
53 | - Go to "Compute Engine > VM Instances > `name_of_the_vm`
54 | - Edit
55 | - Change container image with new version
56 | - Save
57 |
58 | ## Deploying first time
59 |
60 | > You need to replace value with `PUT_`
61 |
62 | ```sh
63 | gcloud beta compute \
64 | --project=npm-search-2 instances create-with-container npm-search-3 \
65 | --zone=us-central1-a \
66 | --machine-type=e2-medium \
67 | --subnet=default \
68 | --network-tier=STANDARD \
69 | --metadata=google-logging-enabled=true \
70 | --maintenance-policy=MIGRATE \
71 | --service-account=PUT_YOUR_SERVICE_ACCOUNT
72 | --scopes=https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,https://www.googleapis.com/auth/monitoring.write,https://www.googleapis.com/auth/servicecontrol,https://www.googleapis.com/auth/service.management.readonly,https://www.googleapis.com/auth/trace.append \
73 | --image=cos-stable-89-16108-470-1 \
74 | --image-project=cos-cloud \
75 | --boot-disk-size=10GB \
76 | --boot-disk-type=pd-balanced \
77 | --boot-disk-device-name=npm-search-3 \
78 | --no-shielded-secure-boot \
79 | --shielded-vtpm \
80 | --shielded-integrity-monitoring \
81 | --container-image=docker.io/algolia/npm-search:PUT_VERSION \
82 | --container-restart-policy=always \
83 | --container-command=node \
84 | --container-arg=--async-stack-traces \
85 | --container-arg=--max-semi-space-size=32 \
86 | --container-arg=--max-old-space-size=3000 \
87 | --container-arg=dist/index.js \
88 | --container-env=indexName=npm-search,bootstrapIndexName=npm-search-bootstrap,bootstrapConcurrency=40,apiKey=PUT_ALGOLIA_API_KEY,UV_THREADPOOL_SIZE=128,SENTRY_DSN=PUT_SENTRY_URL,DOGSTATSD_HOST=datadog \
89 | --labels=container-vm=cos-stable-89-16108-470-1 \
90 | --reservation-affinity=any
91 | ```
92 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # ---- Base ----
2 | FROM node:18.18.0-alpine AS base
3 |
4 | # ------------------
5 | # package.json cache
6 | # ------------------
7 | FROM apteno/alpine-jq:2022-03-27 AS deps
8 |
9 | # To prevent cache invalidation from changes in fields other than dependencies
10 | COPY package.json /tmp
11 | RUN jq 'walk(if type == "object" then with_entries(select(.key | test("^jest|prettier|eslint|semantic|dotenv|nodemon|renovate") | not)) else . end) | { name, dependencies, devDependencies, packageManager }' < /tmp/package.json > /tmp/deps.json
12 |
13 | # ------------------
14 | # New base image
15 | # ------------------
16 | FROM base as tmp
17 |
18 | ENV IN_DOCKER true
19 | ENV PLAYWRIGHT_BROWSERS_PATH="/ms-playwright"
20 | ENV PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD="true"
21 |
22 | # Setup the app WORKDIR
23 | WORKDIR /app/tmp
24 |
25 | # Copy and install dependencies separately from the app's code
26 | # To leverage Docker's cache when no dependency has change
27 | COPY --from=deps /tmp/deps.json ./package.json
28 | COPY yarn.lock .yarnrc.yml ./
29 | COPY .yarn .yarn
30 |
31 | # Install dependencies for native deps
32 | RUN apk add --no-cache bash python3
33 |
34 | # Install dev dependencies
35 | RUN true \
36 | # Use local version instead of letting yarn auto upgrade itself
37 | && yarn set version $(ls -d $PWD/.yarn/releases/*) \
38 | && yarn install
39 |
40 | # This step will invalidates cache
41 | COPY . ./
42 | RUN ls -lah /app/tmp
43 |
44 | # Builds the code and reinstall node_modules in prod mode
45 | RUN true \
46 | && yarn build \
47 | # Finally remove all dev packages
48 | && yarn workspaces focus --all --production \
49 | && rm -rf src/ \
50 | && rm -rf .yarn/
51 |
52 | # ---- Final ----
53 | # Resulting new, minimal image
54 | # This image must have the minimum amount of layers
55 | FROM node:18.18.0-alpine as final
56 |
57 | ENV NODE_ENV production
58 |
59 | # Do not use root to run the app
60 | USER node
61 |
62 | WORKDIR /app
63 |
64 | COPY --from=tmp --chown=node:node /app/tmp /app
65 |
66 | EXPOSE 8000
67 |
68 | CMD [ "node", "dist/src/index.js" ]
69 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2016-present Algolia, Inc.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | worker: yarn start
2 | worker_new: yarn start_new
3 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # npm-search
2 |
3 | npm ↔️ Algolia replication tool.
4 | Maintained by Algolia and jsDelivr.
5 |
6 |
7 |
8 |
9 | &
10 |
11 |
12 |
13 |
14 | [//]: # ([](https://circleci.com/gh/algolia/npm-search/tree/master) )
15 |
16 | [//]: # (
)
17 |
18 | [//]: # ()
19 |
20 | [//]: # (---)
21 |
22 | This is a failure resilient npm registry to Algolia index replication process.
23 | It will replicate all npm packages to an Algolia index and keep it up to date.
24 | The state of the replication is saved in Algolia index settings.
25 |
26 | The replication should always be running. **Only one instance per Algolia index must run at the same time**.
27 | If the process fails, restart it and the replication process will continue at the last point it remembers.
28 |
29 |
30 |
31 |
32 | - [🗿 npm-search ⛷ 🐌 🛰](#-npm-search---)
33 | - [Algolia Index](#algolia-index)
34 | - [Using the public index](#using-the-public-index)
35 | - [Schema](#schema)
36 | - [Ranking](#ranking)
37 | - [Textual relevance](#textual-relevance)
38 | - [Searchable Attributes](#searchable-attributes)
39 | - [Prefix Search](#prefix-search)
40 | - [Typo-tolerance](#typo-tolerance)
41 | - [Exact Boosting](#exact-boosting)
42 | - [Custom/Business relevance](#custombusiness-relevance)
43 | - [Number of downloads](#number-of-downloads)
44 | - [Popular packages](#popular-packages)
45 | - [Usage](#usage)
46 | - [Production](#production)
47 | - [Restart](#restart)
48 | - [How does it work?](#how-does-it-work)
49 | - [Contributing](#contributing)
50 |
51 |
52 |
53 | ## Algolia Index
54 |
55 | ### Using the public index
56 |
57 | The Algolia index is currently used, for free, by a few selected projects (e.g: [yarnpkg.com](https://yarnpkg.com), [codesandbox.io](https://codesandbox.io), [jsdelivr.com](https://www.jsdelivr.com/), etc...).
58 |
59 | If you want to include this index to your project please create a support request here: [Algolia Support](https://support.algolia.com/hc/en-us/requests/new).
60 |
61 | This product is an open source product for the community and not supported by Algolia.
62 |
63 | To be eligible your project must meet these requirements:
64 |
65 | - Publicly available: The project must be publicly usable and, if applicable, include documentation or instructions on how the community can use it.
66 | - Non-commercial: The project cannot be used to promote a product or service; it has to provide something of value to the community at no cost. Applications for non-commercial projects backed by commercial entities will be reviewed on a case-by-base basis.
67 |
68 |
69 | You can also use the code or the [public docker image](https://hub.docker.com/r/algolia/npm-search) to run your own (as of September 2021 it will create ~3M records x4).
70 |
71 | ### Schema
72 |
73 | For every single NPM package, we create a record in the Algolia index. The resulting records have the following schema:
74 |
75 | ```json5
76 | {
77 | name: 'babel-core',
78 | downloadsLast30Days: 10978749,
79 | downloadsRatio: 0.08310651682685861,
80 | humanDownloadsLast30Days: '11m',
81 | jsDelivrHits: 11684192,
82 | popular: true,
83 | version: '6.26.0',
84 | versions: {
85 | // [...]
86 | '7.0.0-beta.3': '2017-10-15T13:12:35.166Z',
87 | },
88 | tags: {
89 | latest: '6.26.0',
90 | old: '5.8.38',
91 | next: '7.0.0-beta.3',
92 | },
93 | description: 'Babel compiler core.',
94 | dependencies: {
95 | 'babel-code-frame': '^6.26.0',
96 | // [...]
97 | },
98 | devDependencies: {
99 | 'babel-helper-fixtures': '^6.26.0',
100 | // [...]
101 | },
102 | repository: {
103 | url: 'https://github.com/babel/babel/tree/master/packages/babel-core',
104 | host: 'github.com',
105 | user: 'babel',
106 | project: 'babel',
107 | path: '/tree/master/packages/babel-core',
108 | branch: 'master',
109 | },
110 | readme: '# babel-core\n\n> Babel compiler core.\n\n\n [... truncated at 200kb]',
111 | owner: {
112 | // either GitHub owner or npm owner
113 | name: 'babel',
114 | avatar: 'https://github.com/babel.png',
115 | link: 'https://github.com/babel',
116 | },
117 | deprecated: 'Deprecated', // This field will be removed, please use `isDeprecated` instead
118 | isDeprecated: true,
119 | deprecatedReason: 'Deprecated',
120 | isSecurityHeld: false, // See https://github.com/npm/security-holder
121 | badPackage: false,
122 | homepage: 'https://babeljs.io/',
123 | license: 'MIT',
124 | keywords: [
125 | '6to5',
126 | 'babel',
127 | 'classes',
128 | 'const',
129 | 'es6',
130 | 'harmony',
131 | 'let',
132 | 'modules',
133 | 'transpile',
134 | 'transpiler',
135 | 'var',
136 | 'babel-core',
137 | 'compiler',
138 | ],
139 | created: 1424009748555,
140 | modified: 1508833762239,
141 | lastPublisher: {
142 | name: 'hzoo',
143 | email: 'hi@henryzoo.com',
144 | avatar: 'https://gravatar.com/avatar/851fb4fa7ca479bce1ae0cdf80d6e042',
145 | link: 'https://www.npmjs.com/~hzoo',
146 | },
147 | owners: [
148 | {
149 | email: 'me@thejameskyle.com',
150 | name: 'thejameskyle',
151 | avatar: 'https://gravatar.com/avatar/8a00efb48d632ae449794c094f7d5c38',
152 | link: 'https://www.npmjs.com/~thejameskyle',
153 | },
154 | // [...]
155 | ],
156 | lastCrawl: '2017-10-24T08:29:24.672Z',
157 | dependents: 3321,
158 | types: {
159 | ts: 'definitely-typed', // definitely-typed | included | false
160 | definitelyTyped: '@types/babel__core',
161 | },
162 | moduleTypes: ['unknown'], // esm | cjs | none | unknown
163 | styleTypes: ['none'], // file extensions like css, less, scss or none if no style files present
164 | humanDependents: '3.3k',
165 | changelogFilename: null, // if babel-core had a changelog, it would be the raw GitHub url here
166 | objectID: 'babel-core',
167 | // the following fields are considered internal and may change at any time
168 | _downloadsMagnitude: 8,
169 | _jsDelivrPopularity: 5,
170 | _popularName: 'babel-core',
171 | _searchInternal: {
172 | alternativeNames: [
173 | // alternative versions of this name, to show up on confused searches
174 | ],
175 | },
176 | }
177 | ```
178 |
179 | ### Ranking
180 |
181 | If you want to learn more about how Algolia's ranking algorithm is working, you can read [this blog post](https://blog.algolia.com/search-ranking-algorithm-unveiled/).
182 |
183 | #### Textual relevance
184 |
185 | ##### Searchable Attributes
186 |
187 | We're restricting the search to use a subset of the attributes only:
188 |
189 | - `_popularName`
190 | - `name`
191 | - `description`
192 | - `keywords`
193 | - `owner.name`
194 | - `owners.name`
195 |
196 | ##### Prefix Search
197 |
198 | Algolia provides default prefix search capabilities (matching words with only the beginning). This is disabled for the `owner.name` and `owners.name` attributes.
199 |
200 | ##### Typo-tolerance
201 |
202 | Algolia provides default typo-tolerance.
203 |
204 | ##### Exact Boosting
205 |
206 | Using the `optionalFacetFilters` feature of Algolia, we're boosting exact matches on the name of a package to always be on top of the results.
207 |
208 | #### Custom/Business relevance
209 |
210 | ##### Number of downloads
211 |
212 | For each package, we use the number of downloads in the last 30 days as Algolia's `customRanking` setting. This will be used to sort the results having the same textual-relevance against each others.
213 |
214 | For instance, search for `babel` with match both `babel-core` and `babel-messages`. From a textual-relevance point of view, those 2 packages are exactly matching in the same way. In such case, Algolia will rely on the `customRanking` setting and therefore put the package with the highest number of downloads in the past 30 days first.
215 |
216 | ##### Popular packages
217 |
218 | Some packages will be considered as popular if they have been downloaded "more" than others. We currently consider a package popular if it either:
219 | - has more than `0.005%` of the total number of npm downloads,
220 | - is in the top thousand of packages at [jsDelivr](https://github.com/jsdelivr/data.jsdelivr.com).
221 |
222 | This `popular` flag is also used to boost some records over non-popular ones.
223 |
224 | ## Usage
225 |
226 | ### Production
227 |
228 | ```sh
229 | yarn
230 | apiKey=... yarn start
231 | ```
232 |
233 | ### Restart
234 | To restart from a particular point (or from the beginning):
235 |
236 | ```sh
237 | seq=0 apiKey=... yarn start
238 | ```
239 |
240 | This is useful when you want to completely resync the npm registry because:
241 |
242 | - you changed the way you format packages
243 | - you added more metadata (like GitHub stars)
244 | - you are in an unsure state and you just want to restart everything
245 |
246 | `seq` represents a [change sequence](http://docs.couchdb.org/en/2.0.0/json-structure.html#changes-information-for-a-database)
247 | in CouchDB lingo.
248 |
249 | ## How does it work?
250 |
251 | Our goal with this project is to:
252 |
253 | - be able to quickly do a complete rebuild
254 | - be resilient to failures
255 | - clean the package data
256 |
257 | When the process starts with `seq=0`:
258 |
259 | - save the [current sequence](https://replicate.npmjs.com/) of the npm registry in the state (Algolia settings)
260 | - bootstrap the initial index content by using [/\_all_docs](http://docs.couchdb.org/en/2.0.0/api/database/bulk-api.html)
261 | - replicate registry changes since the current sequence
262 | - watch for registry changes continuously and replicate them
263 |
264 | Replicate and watch are separated because:
265 |
266 | 1. In replicate we want to replicate a batch of documents in a fast way
267 | 2. In watch we want new changes as fast as possible, one by one. If watch was
268 | asking for batches of 100, new packages would be added too late to the index
269 |
270 | ## Contributing
271 |
272 | See [CONTRIBUTING.md](./CONTRIBUTING.md)
273 |
--------------------------------------------------------------------------------
/algolia.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/algolia/npm-search/1b9ef2e27dce872a377466f4a935fd2525e6e687/algolia.png
--------------------------------------------------------------------------------
/elastic-apm-node.js:
--------------------------------------------------------------------------------
1 | /* eslint-disable import/no-commonjs */
2 | module.exports = {
3 | active: true,
4 | serviceName: 'jsdelivr-npm-search',
5 | serviceVersion: require('./package.json').version,
6 | logLevel: 'fatal',
7 | centralConfig: false,
8 | captureExceptions: false,
9 | captureErrorLogStackTraces: 'always',
10 | ignoreUrls: [
11 | '/favicon.ico',
12 | '/heartbeat',
13 | '/amp_preconnect_polyfill_404_or_other_error_expected._Do_not_worry_about_it',
14 | ],
15 | errorOnAbortedRequests: false,
16 | transactionSampleRate: 1,
17 | };
18 |
--------------------------------------------------------------------------------
/jest.config.js:
--------------------------------------------------------------------------------
1 | // eslint-disable-next-line import/no-commonjs
2 | module.exports = {
3 | transform: {
4 | '^.+\\.[jt]sx?$': [
5 | 'ts-jest',
6 | {
7 | diagnostics: false,
8 | tsconfig: `tsconfig.json`,
9 | },
10 | ],
11 | },
12 | testMatch: ['/src/**/*.test.[jt]s'],
13 | // By default, ignore the slow and flaky tests testing external APIs. Those
14 | // will be run specifically with `yarn run test:api-control`
15 | testPathIgnorePatterns: ['api-control'],
16 |
17 | testEnvironment: 'node',
18 | modulePaths: ['src'],
19 |
20 | snapshotFormat: {
21 | escapeString: true,
22 | printBasicPrototype: true,
23 | },
24 | };
25 |
--------------------------------------------------------------------------------
/jsdelivr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/algolia/npm-search/1b9ef2e27dce872a377466f4a935fd2525e6e687/jsdelivr.png
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "npm-search",
3 | "version": "1.8.4",
4 | "private": true,
5 | "author": {
6 | "name": "Algolia, Inc.",
7 | "url": "https://www.algolia.com"
8 | },
9 | "scripts": {
10 | "build:hot": "tsc -w --preserveWatchOutput",
11 | "build": "tsc -b",
12 | "clean": "rm -rf dist/",
13 | "dev": "node -r ts-node/register/transpile-only -r dotenv/config --max-old-space-size=1500 src/index.ts",
14 | "lint": "eslint --ext=jsx,ts,tsx,js .",
15 | "prepare": "husky install",
16 | "start": "UV_THREADPOOL_SIZE=64 node --max-old-space-size=1500 dist/index.js",
17 | "start_new": "indexName=npm-search-new bootstrapIndexName=npm-search-new.tmp UV_THREADPOOL_SIZE=64 node --max-old-space-size=1500 dist/index.js",
18 | "test:watch": "jest --watchAll --no-watchman",
19 | "test:api-control": "./scripts/test-api-control",
20 | "test": "jest --forceExit",
21 | "publish:check": "node ./scripts/publish-check.mjs",
22 | "publish:github": "./scripts/publish-github",
23 | "publish:docker": "./scripts/publish-docker"
24 | },
25 | "license": "MIT",
26 | "dependencies": {
27 | "@algolia/requester-node-http": "4.14.2",
28 | "@types/bluebird": "^3.5.39",
29 | "algoliasearch": "4.14.2",
30 | "bluebird": "^3.7.2",
31 | "bunyan": "1.8.15",
32 | "bunyan-debug-stream": "2.0.1",
33 | "chalk": "4.1.2",
34 | "dtrace-provider": "0.8.8",
35 | "elastic-apm-node": "^3.49.1",
36 | "escape-html": "1.0.3",
37 | "got": "11.8.5",
38 | "gravatar-url": "3.1.0",
39 | "hosted-git-info": "2.7.1",
40 | "lodash": "4.17.21",
41 | "ms": "2.1.3",
42 | "nano": "^10.1.2",
43 | "nice-package": "3.1.2",
44 | "numeral": "2.0.6",
45 | "object-sizeof": "2.6.3",
46 | "p-queue": "6.6.2",
47 | "promise-rat-race": "1.5.1",
48 | "throttled-queue": "^2.1.4",
49 | "traverse": "0.6.7",
50 | "truncate-utf8-bytes": "1.0.2"
51 | },
52 | "devDependencies": {
53 | "@semantic-release/changelog": "6.0.3",
54 | "@semantic-release/git": "10.0.1",
55 | "@types/escape-html": "1.0.3",
56 | "@types/hosted-git-info": "3.0.4",
57 | "@types/jest": "28.1.8",
58 | "@types/lodash": "4.14.184",
59 | "@types/ms": "0.7.33",
60 | "@types/numeral": "2.0.2",
61 | "@types/traverse": "0.6.34",
62 | "@types/truncate-utf8-bytes": "1.0.0",
63 | "@typescript-eslint/eslint-plugin": "6.9.0",
64 | "@typescript-eslint/parser": "6.9.0",
65 | "dotenv": "16.0.1",
66 | "eslint": "8.22.0",
67 | "eslint-config-algolia": "22.0.0",
68 | "eslint-config-prettier": "8.5.0",
69 | "eslint-plugin-eslint-comments": "3.2.0",
70 | "eslint-plugin-import": "2.26.0",
71 | "eslint-plugin-jasmine": "4.1.3",
72 | "eslint-plugin-jest": "26.8.2",
73 | "eslint-plugin-jsdoc": "46.8.2",
74 | "eslint-plugin-prettier": "5.0.1",
75 | "eslint-plugin-react": "7.30.1",
76 | "husky": "8.0.1",
77 | "jest": "29.7.0",
78 | "lint-staged": "13.0.3",
79 | "pre-commit": "1.2.2",
80 | "prettier": "3.0.3",
81 | "renovate-config-algolia": "2.1.10",
82 | "semantic-release": "22.0.5",
83 | "ts-jest": "29.1.1",
84 | "ts-node": "10.9.1",
85 | "typescript": "5.2.2",
86 | "validator": "13.7.0"
87 | },
88 | "engines": {
89 | "node": "^18.0.0",
90 | "yarn": "^1.22.0"
91 | },
92 | "repository": {
93 | "type": "git",
94 | "url": "https://github.com/algolia/npm-search.git"
95 | },
96 | "homepage": "https://github.com/algolia/npm-search",
97 | "packageManager": "yarn@3.2.2"
98 | }
99 |
--------------------------------------------------------------------------------
/release.config.js:
--------------------------------------------------------------------------------
1 | /* eslint-disable import/no-commonjs */
2 | /* eslint-disable no-template-curly-in-string */
3 | /**
4 | * We use semantic-release to automate the publishing of new versions based on
5 | * the commit history: whenever a commit is pushed to the master branch, it
6 | * checks if any commit had a BREAKING CHANGE / feat() / fix() message, and
7 | * publishes (or not) a new major.minor/patch version accordingly.
8 | *
9 | * See: https://github.com/semantic-release/semantic-release.
10 | *
11 | * Semantic-release executes steps in order (from verifyConditions to
12 | * success/fail). For each step, it execute the matching code in each plugin (if
13 | * such exists). If any step fails, the whole process stop.
14 | *
15 | * As we are using a mix of core and community plugins, as well as slightly
16 | * diverging from the default use-case, we explictly define the order of plugins
17 | * in each step instead of relying on the default order.
18 | *
19 | * The current configuration will:
20 | * - Check if a new version needs to be published (and stop if not)
21 | * - Update the version number in package.json accordingly
22 | * - Update the CHANGELOG.md with the changes
23 | * - Create a new commit, and tag it with the version number
24 | * - Publish the code source to GitHub Releases (not very useful).
25 | *
26 | * Specifically, it does not:
27 | * - Publish the code to npm (this is not an npm module)
28 | * - Publish the Docker image (yarn publish:docker takes care of that).
29 | **/
30 | module.exports = {
31 | branches: 'master',
32 | plugins: [
33 | // Those 4 plugins are part of the core of semantic-release
34 | '@semantic-release/commit-analyzer',
35 | '@semantic-release/release-notes-generator',
36 | '@semantic-release/npm',
37 | '@semantic-release/github',
38 | // Those 2 are additional plugins
39 | '@semantic-release/changelog',
40 | '@semantic-release/git',
41 | ],
42 | // Below are the various steps
43 | // Source: https://semantic-release.gitbook.io/semantic-release/usage/plugins
44 | // We explicitly define because it allows us to:
45 | // - remove steps that we don't need (for example verifying npm credentials as
46 | // we don't publish on npm)
47 | // - put steps in order (for example updating the changelog file before
48 | // committing it)
49 | verifyConditions: ['@semantic-release/github', '@semantic-release/git'],
50 | analyzeCommits: ['@semantic-release/commit-analyzer'],
51 | verifyRelease: [],
52 | generateNotes: ['@semantic-release/release-notes-generator'],
53 | prepare: [
54 | '@semantic-release/changelog',
55 | '@semantic-release/npm',
56 | {
57 | path: '@semantic-release/git',
58 | assets: ['package.json', 'CHANGELOG.md'],
59 | message:
60 | 'chore(release): ${nextRelease.version} [skip ci]\n\n${nextRelease.notes}',
61 | },
62 | ],
63 | publish: ['@semantic-release/github'],
64 | addChannel: [],
65 | success: [],
66 | fail: [],
67 | };
68 |
--------------------------------------------------------------------------------
/scripts/publish-check.mjs:
--------------------------------------------------------------------------------
1 | /* eslint-disable no-console */
2 |
3 | import { Writable } from 'node:stream';
4 |
5 | import semanticRelease from 'semantic-release';
6 |
7 | const stream = new Writable({
8 | write(_chunk, _encoding, callback) {
9 | setImmediate(callback);
10 | },
11 | });
12 |
13 | // Execute semantic-release with only the commit-analyzer step, to see if
14 | // a new release is needed
15 | const { nextRelease } = await semanticRelease(
16 | {
17 | dryRun: true,
18 | plugins: ['@semantic-release/commit-analyzer'],
19 | verifyConditions: [],
20 | analyzeCommits: ['@semantic-release/commit-analyzer'],
21 | verifyRelease: [],
22 | generateNotes: [],
23 | prepare: [],
24 | publish: [],
25 | addChannel: [],
26 | success: [],
27 | fail: [],
28 | },
29 | // Redirect output to new streams, to make the script silent
30 | {
31 | stdout: stream,
32 | stderr: stream,
33 | }
34 | );
35 |
36 | // Display yes if a new release should be published, or no otherwise
37 | // The output of this script is used by the publishing workflow, to
38 | // conditionally either cancel the run, or actually publish to Docker/GitHub.
39 | // Make sure it only ever output either yes or no
40 | console.info(nextRelease?.version ? 'yes' : 'no');
41 |
--------------------------------------------------------------------------------
/scripts/publish-docker:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # Publish the project on GitHub Packages
3 | # See: https://github.com/algolia/npm-search/pkgs/container/npm-search
4 | #
5 | # This script will be automatically run from GitHub Actions on each commits on
6 | # the main branch that warrants a release (ie. feat() and fix() commits).
7 | #
8 | # You can also run the script locally, but you'll need a GITHUB_TOKEN with the
9 | # write:packages scope.
10 | # See: https://github.com/settings/tokens
11 | set -e
12 |
13 | # Get version from package.json
14 | version=$(node -e "console.log(require('./package.json').version)")
15 | echo "Publishing: $version"
16 | echo ""
17 |
18 | # Build the image
19 | docker build \
20 | --platform linux/amd64 \
21 | --label "org.opencontainers.image.source=https://github.com/algolia/npm-search" \
22 | --tag "ghcr.io/algolia/npm-search" \
23 | --tag "ghcr.io/algolia/npm-search:${version}" \
24 | .
25 |
26 | # Login to ghcr.io
27 | echo "${GITHUB_TOKEN}" |
28 | docker login ghcr.io \
29 | --username $ \
30 | --password-stdin
31 |
32 | # Push the image
33 | docker push "ghcr.io/algolia/npm-search"
34 | docker push "ghcr.io/algolia/npm-search:${version}"
35 |
36 | # Output
37 | echo "Version $version published"
38 | echo "https://github.com/algolia/npm-search/pkgs/container/npm-search"
39 |
--------------------------------------------------------------------------------
/scripts/publish-github:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # Publish a new version on GitHub, including:
3 | # - Update package.json and CHANGELOG.md with new version and changes
4 | # - Tag the commit with the version number
5 | # - Release the source code on GitHub Releases (https://github.com/algolia/npm-search/releases)
6 | #
7 | # This script doesn't do anything if there is no new version to publish
8 | set -e
9 |
10 | yarn run semantic-release
11 |
--------------------------------------------------------------------------------
/scripts/test-api-control:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # API Control tests assert that the external third party APIs we are using
3 | # return data in an expected format. They query the relevant API with real (not
4 | # mocked) HTTP calls.
5 | #
6 | # As those tests are slow, and have a higher probability of flakiness because of
7 | # network issues or timeouts, we don't want to run them on each pre-commit hook
8 | # or CI commit. They can instead be run manually, or periodically from the CI.
9 | #
10 | # When it fails on the CI, it will generate a GitHub issue with the failure details
11 | # as well as a link to the run.
12 |
13 | # Running locally, with colors and live output
14 | if [ "$GITHUB_RUN_ID" = "" ]; then
15 | jest \
16 | ./src/__tests__/api-control \
17 | --forceExit \
18 | --testPathIgnorePatterns=''
19 | exit $?
20 | fi
21 |
22 | # Running on CI, creating an issue on failure
23 | echo "Wait while we run the tests"
24 | output=$(jest \
25 | ./src/__tests__/api-control \
26 | --forceExit \
27 | --testPathIgnorePatterns='' 2>&1)
28 | exitCode=$?
29 | echo "$output"
30 |
31 | # Stop on success
32 | if [ "$exitCode" = "0" ]; then
33 | exit 0
34 | fi
35 |
36 | # Create the issue on failure
37 | gh issue create \
38 | --title "API Control failed" \
39 | --body "\
40 | One of the external APIs we depend on failed to return coherent data in our periodic test.
41 | Maybe it's a temporary issue, maybe they changed their format.
42 |
43 | https://github.com/algolia/npm-search/actions/runs/$GITHUB_RUN_ID
44 |
45 | \`\`\`
46 | $output
47 | \`\`\`"
48 |
49 | # Still mark the job as failed
50 | exit 1
51 |
--------------------------------------------------------------------------------
/src/@types/nice-package.ts:
--------------------------------------------------------------------------------
1 | import type {
2 | GetPackage,
3 | GetUser,
4 | GetVersion,
5 | PackageRepo,
6 | } from '../npm/types';
7 |
8 | export interface NicePackageType {
9 | _hasShrinkwrap?: false;
10 | bin?: Record;
11 | browser?: string;
12 | bundlesize?: Array>;
13 | created: string;
14 | dependencies?: Record;
15 | deprecated?: boolean | string;
16 | description: string;
17 | devDependencies?: Record;
18 | gitHead?: string;
19 | homepage?: string;
20 | keywords: string[];
21 | lastPublisher?: GetUser;
22 | license?: string | { type: string };
23 | licenseText?: string;
24 | main?: string[] | string;
25 | modified: string;
26 | module?: string;
27 | exports?: GetVersion['exports'];
28 | name: string;
29 | other: {
30 | _id?: string;
31 | _rev: string;
32 | 'dist-tags': Record;
33 | author?: GetUser;
34 | time?: GetPackage['time'];
35 | };
36 | owners?: GetUser[];
37 | readme?: string;
38 | repository?: Array> | Partial | string;
39 | scripts: Record;
40 | schematics?: string;
41 | starsCount?: number;
42 | style?: string;
43 | type?: 'commonjs' | 'module';
44 | types?: string;
45 | typings?: string;
46 | unpkg?: string;
47 | version?: string;
48 | versions?: Array<{
49 | date: string;
50 | number: string;
51 | }>;
52 | }
53 |
--------------------------------------------------------------------------------
/src/@types/pkg.ts:
--------------------------------------------------------------------------------
1 | import type { GetUser } from '../npm/types';
2 |
3 | export interface Owner {
4 | name: string;
5 | email?: string;
6 | avatar?: string;
7 | link?: string;
8 | }
9 |
10 | export interface Repo {
11 | url: string;
12 | host: string;
13 | user: string;
14 | project: string;
15 | path: string;
16 | head?: string;
17 | branch?: string;
18 | }
19 |
20 | export interface GithubRepo {
21 | user: string;
22 | project: string;
23 | path: string;
24 | head: string;
25 | }
26 |
27 | export type TsType =
28 | | {
29 | ts: 'definitely-typed';
30 | definitelyTyped: string;
31 | }
32 | | {
33 | ts: 'included' | false | { possible: true };
34 | };
35 |
36 | export type ModuleType = 'cjs' | 'esm' | 'none' | 'unknown';
37 |
38 | export type StyleType = string | 'none';
39 |
40 | export type ComputedMeta = {
41 | computedKeywords: string[];
42 | computedMetadata: Record;
43 | };
44 |
45 | export interface RawPkg {
46 | objectID: string;
47 | rev: string;
48 | name: string;
49 | downloadsLast30Days: number;
50 | downloadsRatio: number;
51 | humanDownloadsLast30Days: string;
52 | jsDelivrHits: number;
53 | popular: boolean;
54 | version: string;
55 | versions: Record;
56 | tags: Record;
57 | description: string | null;
58 | dependencies: Record;
59 | devDependencies: Record;
60 | originalAuthor?: GetUser;
61 | repository: Repo | null;
62 | githubRepo: GithubRepo | null;
63 | gitHead: string | null;
64 | readme: string;
65 | owner: Owner | null;
66 | deprecated: boolean | string;
67 | isDeprecated: boolean;
68 | deprecatedReason: string | null;
69 | isSecurityHeld: boolean;
70 | homepage: string | null;
71 | license: string | null;
72 | keywords: string[];
73 | computedKeywords: ComputedMeta['computedKeywords'];
74 | computedMetadata: ComputedMeta['computedMetadata'];
75 | created: number;
76 | modified: number;
77 | lastPublisher: Owner | null;
78 | owners: Owner[];
79 | bin: Record;
80 | dependents: number;
81 | types: TsType;
82 | moduleTypes: ModuleType[];
83 | styleTypes: StyleType[];
84 | humanDependents: string;
85 | changelogFilename: string | null;
86 | lastCrawl: string;
87 | _revision: number;
88 | _searchInternal: {
89 | alternativeNames: string[];
90 | popularAlternativeNames: string[];
91 | };
92 | }
93 |
94 | export type FinalPkg = RawPkg & {
95 | _oneTimeDataToUpdateAt?: number;
96 | _periodicDataUpdatedAt?: number;
97 | _jsDelivrPopularity?: number;
98 | _downloadsMagnitude?: number;
99 | _popularName?: string;
100 | };
101 |
--------------------------------------------------------------------------------
/src/StateManager.ts:
--------------------------------------------------------------------------------
1 | import type { SearchIndex } from 'algoliasearch';
2 |
3 | import { config } from './config';
4 | import { datadog } from './utils/datadog';
5 |
6 | export type State = {
7 | seq: number | undefined;
8 | bootstrapDone: boolean;
9 | bootstrapLastDone: number | null;
10 | bootstrapLastId: string | null;
11 | stage: 'bootstrap' | 'watch';
12 | };
13 |
14 | const defaultState: State = {
15 | seq: config.seq ? Number(config.seq) : config.seq,
16 | bootstrapDone: false,
17 | bootstrapLastDone: null,
18 | bootstrapLastId: null,
19 | stage: 'bootstrap',
20 | };
21 |
22 | export class StateManager {
23 | algoliaIndex;
24 | currentState: State = { ...defaultState };
25 | refreshed: boolean = false;
26 |
27 | constructor(algoliaIndex: SearchIndex) {
28 | this.algoliaIndex = algoliaIndex;
29 | }
30 |
31 | async check(): Promise {
32 | const state = await this.get();
33 |
34 | if (config.seq !== undefined) {
35 | return this.set({ ...state, seq: Number(config.seq) });
36 | }
37 |
38 | if (state === undefined) {
39 | return this.reset();
40 | }
41 |
42 | return state;
43 | }
44 |
45 | async get(): Promise {
46 | if (this.currentState && this.refreshed) {
47 | return this.currentState;
48 | }
49 |
50 | const start = Date.now();
51 | const { userData } = await this.algoliaIndex.getSettings();
52 | datadog.timing('stateManager.get', Date.now() - start);
53 |
54 | this.currentState = userData;
55 | this.refreshed = true;
56 | return userData;
57 | }
58 |
59 | async set(state: State): Promise {
60 | this.currentState = state;
61 |
62 | const start = Date.now();
63 | await this.algoliaIndex.setSettings({
64 | userData: state,
65 | });
66 | datadog.timing('stateManager.set', Date.now() - start);
67 |
68 | return state;
69 | }
70 |
71 | async reset(): Promise {
72 | return await this.set(defaultState);
73 | }
74 |
75 | async save(partial: Partial): Promise {
76 | const current = await this.get();
77 |
78 | return await this.set({
79 | ...current,
80 | ...partial,
81 | });
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/src/__tests__/StateManager.test.ts:
--------------------------------------------------------------------------------
1 | import { StateManager } from '../StateManager';
2 |
3 | describe('stateManager', () => {
4 | describe('get()', () => {
5 | it('should get userData from algolia', async () => {
6 | const mock = {
7 | getSettings: jest.fn(() => {
8 | return {
9 | userData: 'foobar',
10 | };
11 | }),
12 | } as any;
13 | const stateManager = new StateManager(mock);
14 | const userData = await stateManager.get();
15 |
16 | expect(mock.getSettings).toHaveBeenCalled();
17 | expect(userData).toBe('foobar');
18 | });
19 | });
20 |
21 | describe('set()', () => {
22 | it('should set userData to algolia', async () => {
23 | const mock = {
24 | setSettings: jest.fn(),
25 | } as any;
26 | const stateManager = new StateManager(mock);
27 | await stateManager.set({
28 | seq: 1,
29 | bootstrapDone: false,
30 | bootstrapLastDone: 1635196220508,
31 | bootstrapLastId: '',
32 | stage: 'bootstrap',
33 | });
34 |
35 | expect(mock.setSettings).toHaveBeenCalledWith({
36 | userData: {
37 | seq: 1,
38 | bootstrapDone: false,
39 | bootstrapLastDone: 1635196220508,
40 | bootstrapLastId: '',
41 | stage: 'bootstrap',
42 | },
43 | });
44 | });
45 | });
46 |
47 | describe('reset()', () => {
48 | it('should reset userData', async () => {
49 | const mock = {
50 | setSettings: jest.fn(),
51 | } as any;
52 | const stateManager = new StateManager(mock);
53 | await stateManager.reset();
54 |
55 | expect(mock.setSettings).toHaveBeenCalled();
56 | });
57 | });
58 |
59 | describe('save()', () => {
60 | it('should save userData to algolia', async () => {
61 | const mock = {
62 | getSettings: jest.fn(() => {
63 | return {
64 | userData: { bar: 'foo' },
65 | };
66 | }),
67 | setSettings: jest.fn(),
68 | } as any;
69 | const stateManager = new StateManager(mock);
70 | await stateManager.save({ foo: 'bar' } as any);
71 |
72 | expect(mock.getSettings).toHaveBeenCalled();
73 | expect(mock.setSettings).toHaveBeenCalledWith({
74 | userData: {
75 | bar: 'foo',
76 | foo: 'bar',
77 | },
78 | });
79 | });
80 | });
81 | });
82 |
--------------------------------------------------------------------------------
/src/__tests__/api-control/npm.test.ts:
--------------------------------------------------------------------------------
1 | import * as api from '../../npm/index';
2 |
3 | jest.setTimeout(15000);
4 |
5 | describe('findAll()', () => {
6 | it('contains the correct keys', async () => {
7 | const all = await api.findAll({ limit: 2, startkey: '0' });
8 |
9 | expect(all).toEqual(
10 | expect.objectContaining({
11 | offset: expect.any(Number),
12 | total_rows: expect.any(Number),
13 | })
14 | );
15 |
16 | expect(all.rows).toHaveLength(2);
17 |
18 | expect(all.rows[0]).toEqual(
19 | expect.objectContaining({
20 | id: '0',
21 | key: '0',
22 | value: { rev: '11-61bb2c49ce3202a3e0ab9a65646b4b4d' },
23 | })
24 | );
25 | });
26 | });
27 |
28 | describe('getDocFromRegistry()', () => {
29 | it('retrieves a single doc', async () => {
30 | const doc = await api.getDocFromRegistry('jsdelivr');
31 |
32 | expect(doc.name).toBe('jsdelivr');
33 | expect(Object.keys(doc.versions)).toHaveLength(2);
34 | });
35 | });
36 |
37 | describe('getInfo()', () => {
38 | let registryInfo;
39 | beforeAll(async () => {
40 | registryInfo = await api.getInfo();
41 | });
42 |
43 | it('contains the correct keys', () => {
44 | expect(registryInfo).toEqual(
45 | expect.objectContaining({
46 | nbDocs: expect.any(Number),
47 | seq: expect.any(Number),
48 | })
49 | );
50 | });
51 | });
52 |
--------------------------------------------------------------------------------
/src/__tests__/bootstrap.test.ts:
--------------------------------------------------------------------------------
1 | import type { State } from '../StateManager';
2 | import { StateManager } from '../StateManager';
3 | import type { AlgoliaStore } from '../algolia';
4 | import { Bootstrap } from '../bootstrap';
5 |
6 | function getAlgoliaMock(): any {
7 | return {
8 | setSettings: (): Promise => {
9 | return Promise.resolve();
10 | },
11 | saveSynonyms: (): Promise => {
12 | return Promise.resolve();
13 | },
14 | saveRules: (): Promise<{ taskID: string }> => {
15 | return Promise.resolve({ taskID: 'A' });
16 | },
17 | waitTask: (): Promise => {
18 | return Promise.resolve();
19 | },
20 | };
21 | }
22 |
23 | describe('isDone', () => {
24 | it('should return true', async () => {
25 | const mock = {
26 | ...getAlgoliaMock(),
27 | getSettings: jest.fn(() => {
28 | const state: State = {
29 | bootstrapDone: true,
30 | bootstrapLastDone: Date.now(),
31 | bootstrapLastId: '1',
32 | seq: 1,
33 | stage: 'watch',
34 | };
35 | return {
36 | userData: state,
37 | };
38 | }),
39 | } as any;
40 | const stateManager = new StateManager(mock);
41 | const bootstrap = new Bootstrap(stateManager, {
42 | mainIndex: mock,
43 | } as AlgoliaStore);
44 |
45 | expect(await bootstrap.isDone()).toBe(true);
46 | });
47 |
48 | it('should return false', async () => {
49 | const mock = {
50 | ...getAlgoliaMock(),
51 | getSettings: jest.fn(() => {
52 | const state: State = {
53 | bootstrapDone: false,
54 | bootstrapLastDone: Date.now(),
55 | bootstrapLastId: '1',
56 | seq: 1,
57 | stage: 'watch',
58 | };
59 | return {
60 | userData: state,
61 | };
62 | }),
63 | } as any;
64 | const stateManager = new StateManager(mock);
65 | const bootstrap = new Bootstrap(stateManager, {
66 | mainIndex: mock,
67 | } as AlgoliaStore);
68 |
69 | expect(await bootstrap.isDone()).toBe(false);
70 | });
71 | });
72 |
--------------------------------------------------------------------------------
/src/__tests__/changelog.test.ts:
--------------------------------------------------------------------------------
1 | import { baseUrlMap, getChangelog, getChangelogBackground } from '../changelog';
2 |
3 | jest.mock('got', () => {
4 | const gotSnapshotUrls = new Set([
5 | 'https://gitlab.com/janslow/gitlab-fetch/raw/master/CHANGELOG.md',
6 | 'https://raw.githubusercontent.com/algolia/algoliasearch-netlify/master/CHANGELOG.md',
7 | 'https://bitbucket.org/atlassian/aui/raw/master/changelog.md',
8 | 'https://raw.githubusercontent.com/expressjs/body-parser/master/HISTORY.md',
9 | ]);
10 |
11 | return Object.assign(
12 | (url: string): Promise<{ url: string; redirectUrls: string[] }> => {
13 | return gotSnapshotUrls.has(url)
14 | ? Promise.resolve({ url, redirectUrls: [], statusCode: 200 })
15 | : Promise.reject(new Error(`got mock does not exist for ${url}`));
16 | },
17 | {
18 | HTTPError: TypeError,
19 | }
20 | );
21 | });
22 |
23 | describe('should test baseUrlMap', () => {
24 | it('should work with paths', () => {
25 | const bitbucketRepo = {
26 | host: 'bitbucket.org',
27 | user: 'user',
28 | project: 'project',
29 | path: '/src/master/packages/project1',
30 | head: 'master',
31 | branch: 'master',
32 | };
33 |
34 | const gitlabRepo = {
35 | host: 'gitlab.com',
36 | path: '/tree/master/foo/bar',
37 | project: 'project',
38 | user: 'user',
39 | };
40 |
41 | const githubRepo = {
42 | host: 'github.com',
43 | user: 'babel',
44 | project: 'babel',
45 | path: '/tree/master/packages/babel-core',
46 | head: 'master',
47 | };
48 |
49 | expect(baseUrlMap.get('bitbucket.org')!.buildUrl(bitbucketRepo)).toBe(
50 | 'https://bitbucket.org/user/project/raw/master/packages/project1'
51 | );
52 |
53 | expect(baseUrlMap.get('gitlab.com')!.buildUrl(gitlabRepo)).toBe(
54 | 'https://gitlab.com/user/project/raw/master/foo/bar'
55 | );
56 |
57 | expect(baseUrlMap.get('github.com')!.buildUrl(githubRepo)).toBe(
58 | 'https://raw.githubusercontent.com/babel/babel/master/packages/babel-core'
59 | );
60 | });
61 |
62 | it('should work without paths', () => {
63 | const bitbucketRepo = {
64 | host: 'bitbucket.org',
65 | user: 'user',
66 | path: '',
67 | project: 'project',
68 | branch: 'master',
69 | };
70 |
71 | const gitlabRepo = {
72 | host: 'gitlab.com',
73 | project: 'project',
74 | path: '',
75 | user: 'user',
76 | branch: 'master',
77 | };
78 |
79 | const githubRepo = {
80 | host: 'github.com',
81 | user: 'babel',
82 | project: 'babel',
83 | path: '',
84 | branch: 'master',
85 | };
86 |
87 | expect(baseUrlMap.get('bitbucket.org')!.buildUrl(bitbucketRepo)).toBe(
88 | 'https://bitbucket.org/user/project/raw/master'
89 | );
90 |
91 | expect(baseUrlMap.get('gitlab.com')!.buildUrl(gitlabRepo)).toBe(
92 | 'https://gitlab.com/user/project/raw/master'
93 | );
94 |
95 | expect(baseUrlMap.get('github.com')!.buildUrl(githubRepo)).toBe(
96 | 'https://raw.githubusercontent.com/babel/babel/master'
97 | );
98 | });
99 | });
100 |
101 | describe('hosts', () => {
102 | it('should handle not found changelog for github', async () => {
103 | const pkg = {
104 | name: 'foo',
105 | version: '0.0.0',
106 | repository: {
107 | url: '',
108 | host: 'github.com',
109 | user: 'visionmedia',
110 | project: 'debug',
111 | path: '',
112 | head: 'master',
113 | branch: 'master',
114 | },
115 | };
116 |
117 | const { changelogFilename } = await getChangelogBackground(pkg);
118 | expect(changelogFilename).toBeNull();
119 | });
120 |
121 | it('should get changelog for github', async () => {
122 | const pkg = {
123 | name: 'foo',
124 | version: '0.0.0',
125 | repository: {
126 | url: '',
127 | host: 'github.com',
128 | user: 'algolia',
129 | project: 'algoliasearch-netlify',
130 | path: '',
131 | head: 'master',
132 | branch: 'master',
133 | },
134 | };
135 |
136 | const { changelogFilename } = await getChangelogBackground(pkg);
137 | expect(changelogFilename).toBe(
138 | 'https://raw.githubusercontent.com/algolia/algoliasearch-netlify/master/CHANGELOG.md'
139 | );
140 | });
141 |
142 | it('should get changelog for gitlab', async () => {
143 | const pkg = {
144 | name: 'foo',
145 | version: '0.0.0',
146 | repository: {
147 | url: '',
148 | host: 'gitlab.com',
149 | user: 'janslow',
150 | project: 'gitlab-fetch',
151 | path: '',
152 | head: 'master',
153 | branch: 'master',
154 | },
155 | };
156 |
157 | const { changelogFilename } = await getChangelogBackground(pkg);
158 | expect(changelogFilename).toBe(
159 | 'https://gitlab.com/janslow/gitlab-fetch/raw/master/CHANGELOG.md'
160 | );
161 | });
162 |
163 | it('should get changelog for bitbucket', async () => {
164 | const pkg = {
165 | name: 'foo',
166 | version: '0.0.0',
167 | repository: {
168 | url: '',
169 | host: 'bitbucket.org',
170 | user: 'atlassian',
171 | project: 'aui',
172 | path: '',
173 | head: 'master',
174 | branch: 'master',
175 | },
176 | };
177 |
178 | const { changelogFilename } = await getChangelogBackground(pkg);
179 | expect(changelogFilename).toBe(
180 | 'https://bitbucket.org/atlassian/aui/raw/master/changelog.md'
181 | );
182 | });
183 | });
184 |
185 | describe('jsDelivr', () => {
186 | it('should early return when finding changelog', async () => {
187 | const { changelogFilename } = await getChangelog(
188 | {
189 | name: 'foo',
190 | version: '1.0.0',
191 | repository: {
192 | url: '',
193 | host: 'github.com',
194 | user: 'expressjs',
195 | project: 'body-parser',
196 | path: '',
197 | head: 'master',
198 | branch: 'master',
199 | },
200 | },
201 | [
202 | { name: '/package.json', hash: '', time: '1', size: 1 },
203 | { name: '/CHANGELOG.md', hash: '', time: '1', size: 1 },
204 | ]
205 | );
206 | expect(changelogFilename).toBe(
207 | 'https://cdn.jsdelivr.net/npm/foo@1.0.0/CHANGELOG.md'
208 | );
209 | });
210 |
211 | it('should early return when finding changelog in nested file', async () => {
212 | const { changelogFilename } = await getChangelog(
213 | {
214 | name: 'foo',
215 | version: '1.0.0',
216 | repository: {
217 | url: '',
218 | host: 'github.com',
219 | user: 'expressjs',
220 | project: 'body-parser',
221 | path: '',
222 | head: 'master',
223 | branch: 'master',
224 | },
225 | },
226 | [{ name: '/pkg/CHANGELOG.md', hash: '', time: '1', size: 1 }]
227 | );
228 | expect(changelogFilename).toBe(
229 | 'https://cdn.jsdelivr.net/npm/foo@1.0.0/pkg/CHANGELOG.md'
230 | );
231 | });
232 |
233 | it('should not register a file looking like a changelog', async () => {
234 | const { changelogFilename } = await getChangelog(
235 | {
236 | name: 'foo',
237 | version: '1.0.0',
238 | repository: {
239 | url: '',
240 | host: 'github.com',
241 | user: 'hello',
242 | project: 'foo',
243 | path: '',
244 | head: 'master',
245 | branch: 'master',
246 | },
247 | },
248 | [{ name: '/dist/changelog.js', hash: '', time: '1', size: 1 }]
249 | );
250 | expect(changelogFilename).toBeNull();
251 | });
252 | });
253 |
254 | describe('filename', () => {
255 | it('should work with HISTORY.md', async () => {
256 | const pkg = {
257 | name: 'foo',
258 | version: '0.0.0',
259 | repository: {
260 | url: '',
261 | host: 'github.com',
262 | user: 'expressjs',
263 | project: 'body-parser',
264 | path: '',
265 | head: 'master',
266 | branch: 'master',
267 | },
268 | };
269 |
270 | const { changelogFilename } = await getChangelogBackground(pkg);
271 | expect(changelogFilename).toBe(
272 | 'https://raw.githubusercontent.com/expressjs/body-parser/master/HISTORY.md'
273 | );
274 | });
275 | });
276 |
--------------------------------------------------------------------------------
/src/__tests__/config.test.ts:
--------------------------------------------------------------------------------
1 | /* eslint-disable import/first */
2 | process.env.apiKey = 'fake-api-key';
3 |
4 | import { config } from '../config';
5 |
6 | describe('config', () => {
7 | it('gets the correct keys from env variables', () => {
8 | // from mocked .env
9 | expect(config.apiKey).toBe('fake-api-key');
10 | // from config.js
11 | expect(config.maxObjSize).toBe(450000);
12 | });
13 |
14 | const objectIDRe = /^[A-Za-z0-9_-]+$/;
15 |
16 | it('sets correct objectIDs for query rules', () => {
17 | config.indexRules.forEach(({ objectID }) => {
18 | expect(objectID).toMatch(objectIDRe);
19 | });
20 | });
21 |
22 | it('sets correct objectIDs for synonyms', () => {
23 | config.indexSynonyms.forEach(({ objectID }) => {
24 | expect(objectID).toMatch(objectIDRe);
25 | });
26 | });
27 | });
28 |
--------------------------------------------------------------------------------
/src/__tests__/saveDocs.test.ts:
--------------------------------------------------------------------------------
1 | import algoliasearch from 'algoliasearch';
2 |
3 | import { formatPkg } from '../formatPkg';
4 | import { hits } from '../jsDelivr';
5 | import { cacheTotalDownloads } from '../npm';
6 | import { saveDoc } from '../saveDocs';
7 |
8 | import preact from './preact-simplified';
9 |
10 | jest.setTimeout(15000);
11 |
12 | const FINAL_BASE = {
13 | _revision: expect.any(Number),
14 | // _downloadsMagnitude: 7,
15 | // _jsDelivrPopularity: 0,
16 | _searchInternal: {
17 | alternativeNames: ['preact', 'preact.js', 'preactjs'],
18 | // popularAlternativeNames: ['preact', 'preact.js', 'preactjs'],
19 | },
20 | bin: {},
21 | changelogFilename: null,
22 | computedKeywords: [],
23 | computedMetadata: {},
24 | created: 1441939293521,
25 | dependencies: {},
26 | dependents: 0,
27 | deprecated: false,
28 | deprecatedReason: null,
29 | description:
30 | 'Fast 3kb React alternative with the same modern API. Components & Virtual DOM.',
31 | devDependencies: {
32 | '@types/chai': '^4.1.7',
33 | '@types/mocha': '^5.2.5',
34 | '@types/node': '^9.6.40',
35 | 'babel-cli': '^6.24.1',
36 | 'babel-core': '^6.24.1',
37 | 'babel-eslint': '^8.2.6',
38 | 'babel-loader': '^7.0.0',
39 | 'babel-plugin-transform-object-rest-spread': '^6.23.0',
40 | 'babel-plugin-transform-react-jsx': '^6.24.1',
41 | 'babel-preset-env': '^1.6.1',
42 | bundlesize: '^0.17.0',
43 | chai: '^4.2.0',
44 | copyfiles: '^2.1.0',
45 | 'core-js': '^2.6.0',
46 | coveralls: '^3.0.0',
47 | 'cross-env': '^5.1.4',
48 | diff: '^3.0.0',
49 | eslint: '^4.18.2',
50 | 'eslint-plugin-react': '^7.11.1',
51 | 'flow-bin': '^0.89.0',
52 | 'gzip-size-cli': '^2.0.0',
53 | 'istanbul-instrumenter-loader': '^3.0.0',
54 | jscodeshift: '^0.5.0',
55 | karma: '^3.1.3',
56 | 'karma-babel-preprocessor': '^7.0.0',
57 | 'karma-chai-sinon': '^0.1.5',
58 | 'karma-chrome-launcher': '^2.2.0',
59 | 'karma-coverage': '^1.1.2',
60 | 'karma-mocha': '^1.3.0',
61 | 'karma-mocha-reporter': '^2.2.5',
62 | 'karma-sauce-launcher': '^1.2.0',
63 | 'karma-sinon': '^1.0.5',
64 | 'karma-source-map-support': '^1.3.0',
65 | 'karma-sourcemap-loader': '^0.3.6',
66 | 'karma-webpack': '^3.0.5',
67 | mocha: '^5.0.4',
68 | 'npm-run-all': '^4.1.5',
69 | puppeteer: '^1.11.0',
70 | rimraf: '^2.5.3',
71 | rollup: '^0.57.1',
72 | 'rollup-plugin-babel': '^3.0.2',
73 | 'rollup-plugin-memory': '^3.0.0',
74 | 'rollup-plugin-node-resolve': '^3.4.0',
75 | sinon: '^4.4.2',
76 | 'sinon-chai': '^3.3.0',
77 | typescript: '^3.0.1',
78 | 'uglify-js': '^2.7.5',
79 | webpack: '^4.27.1',
80 | },
81 | downloadsLast30Days: 2874638,
82 | downloadsRatio: 0.0023,
83 | gitHead: 'master',
84 | githubRepo: {
85 | head: 'master',
86 | path: '',
87 | project: 'preact',
88 | user: 'developit',
89 | },
90 | homepage: null,
91 | humanDependents: '0',
92 | humanDownloadsLast30Days: '2.9m',
93 | isDeprecated: false,
94 | jsDelivrHits: 0,
95 | keywords: [
96 | 'preact',
97 | 'react',
98 | 'virtual dom',
99 | 'vdom',
100 | 'components',
101 | 'virtual',
102 | 'dom',
103 | ],
104 | lastCrawl: '2021-07-11T12:31:18.112Z',
105 | lastPublisher: {
106 | avatar: 'https://gravatar.com/avatar/ad82ff1463f3e3b7b4a44c5f499912ae',
107 | email: 'npm.leah@hrmny.sh',
108 | link: 'https://www.npmjs.com/~harmony',
109 | name: 'harmony',
110 | },
111 | license: 'MIT',
112 | modified: 1564778088321,
113 | moduleTypes: ['esm'],
114 | name: 'preact',
115 | objectID: 'preact',
116 | originalAuthor: {
117 | email: 'jason@developit.ca',
118 | name: 'Jason Miller',
119 | },
120 | owner: {
121 | avatar: 'https://github.com/developit.png',
122 | link: 'https://github.com/developit',
123 | name: 'developit',
124 | },
125 | owners: [
126 | {
127 | avatar: 'https://gravatar.com/avatar/85ed8e6da2fbf39abeb4995189be324c',
128 | email: 'jason@developit.ca',
129 | link: 'https://www.npmjs.com/~developit',
130 | name: 'developit',
131 | },
132 | {
133 | avatar: 'https://gravatar.com/avatar/52401c37bc5c4d54a051c619767fdbf8',
134 | email: 'ulliftw@gmail.com',
135 | link: 'https://www.npmjs.com/~harmony',
136 | name: 'harmony',
137 | },
138 | {
139 | avatar: 'https://gravatar.com/avatar/308439e12701ef85245dc0632dd07c2a',
140 | email: 'luke@lukeed.com',
141 | link: 'https://www.npmjs.com/~lukeed',
142 | name: 'lukeed',
143 | },
144 | {
145 | avatar: 'https://gravatar.com/avatar/4ed639a3ea6219b80b58e2e81ff9ba47',
146 | email: 'marvin@marvinhagemeister.de',
147 | link: 'https://www.npmjs.com/~marvinhagemeister',
148 | name: 'marvinhagemeister',
149 | },
150 | {
151 | avatar: 'https://gravatar.com/avatar/83589d88ac76ddc2853562f9a817fe27',
152 | email: 'prateek89born@gmail.com',
153 | link: 'https://www.npmjs.com/~prateekbh',
154 | name: 'prateekbh',
155 | },
156 | {
157 | avatar: 'https://gravatar.com/avatar/88747cce15801e9e96bcb76895fcd7f9',
158 | email: 'hello@preactjs.com',
159 | link: 'https://www.npmjs.com/~preactjs',
160 | name: 'preactjs',
161 | },
162 | {
163 | avatar: 'https://gravatar.com/avatar/d279821c96bb49eeaef68b5456f42074',
164 | email: 'allamsetty.anup@gmail.com',
165 | link: 'https://www.npmjs.com/~reznord',
166 | name: 'reznord',
167 | },
168 | ],
169 | popular: false,
170 | readme: '',
171 | repository: {
172 | branch: 'master',
173 | head: undefined,
174 | host: 'github.com',
175 | path: '',
176 | project: 'preact',
177 | type: 'git',
178 | url: 'https://github.com/developit/preact',
179 | user: 'developit',
180 | },
181 | tags: {
182 | latest: '8.5.0',
183 | next: '10.0.0-rc.1',
184 | },
185 | types: {
186 | ts: 'included',
187 | },
188 | version: '8.5.0',
189 | versions: {
190 | '10.0.0-rc.1': '2019-08-02T20:34:45.123Z',
191 | '8.5.0': '2019-08-02T18:34:23.572Z',
192 | },
193 | };
194 |
195 | describe('saveDoc', () => {
196 | beforeAll(async () => {
197 | cacheTotalDownloads.total = 1e15;
198 | hits.set('preact', { hits: 12345, popular: true });
199 | hits.set('reactjs', { hits: 1234, popular: false });
200 | });
201 |
202 | it('should always produce the same records', async () => {
203 | const client = algoliasearch('e', '');
204 | const index = client.initIndex('a');
205 | const oneTimeDataIndex = client.initIndex('a');
206 | const periodicDataIndex = client.initIndex('a');
207 | jest.spyOn(index, 'saveObject').mockImplementationOnce(() => {
208 | return true as any;
209 | });
210 |
211 | const final = {
212 | ...FINAL_BASE,
213 | };
214 | const clean = expect.objectContaining({
215 | ...final,
216 | jsDelivrHits: 12345,
217 | lastCrawl: expect.any(String),
218 | downloadsLast30Days: 0,
219 | downloadsRatio: 0,
220 | humanDownloadsLast30Days: '0',
221 | modified: expect.any(Number),
222 | _searchInternal: expect.objectContaining({
223 | ...final._searchInternal,
224 | popularAlternativeNames: ['preact', 'preact.js', 'preactjs'],
225 | }),
226 | _jsDelivrPopularity: 2,
227 | popular: true,
228 | });
229 |
230 | await saveDoc({
231 | formatted: formatPkg(preact)!,
232 | index,
233 | oneTimeDataIndex,
234 | periodicDataIndex,
235 | });
236 |
237 | expect(index.saveObject).toHaveBeenCalledWith(clean);
238 | });
239 |
240 | it('should reuse existing changelog and downloads data', async () => {
241 | const client = algoliasearch('e', '');
242 | const index = client.initIndex('a');
243 | jest.spyOn(index, 'saveObject').mockImplementationOnce(() => {
244 | return true as any;
245 | });
246 |
247 | const oneTimeDataIndex = client.initIndex('b');
248 | jest.spyOn(oneTimeDataIndex, 'getObject').mockImplementationOnce(() => {
249 | return { changelogFilename: '/resolved-from-index.md' } as any;
250 | });
251 |
252 | const periodicDataIndex = client.initIndex('c');
253 | jest.spyOn(periodicDataIndex, 'getObject').mockImplementationOnce(() => {
254 | return { packageNpmDownloads: 2233, totalNpmDownloads: 1e10 } as any;
255 | });
256 |
257 | const final = {
258 | ...FINAL_BASE,
259 | };
260 | const clean = expect.objectContaining({
261 | ...final,
262 | jsDelivrHits: 12345,
263 | changelogFilename: '/resolved-from-index.md',
264 | lastCrawl: expect.any(String),
265 | downloadsLast30Days: 2233,
266 | downloadsRatio: expect.any(Number),
267 | humanDownloadsLast30Days: '2.2k',
268 | modified: expect.any(Number),
269 | _searchInternal: expect.objectContaining({
270 | ...final._searchInternal,
271 | popularAlternativeNames: ['preact', 'preact.js', 'preactjs'],
272 | }),
273 | _jsDelivrPopularity: 2,
274 | popular: true,
275 | });
276 |
277 | await saveDoc({
278 | formatted: formatPkg(preact)!,
279 | index,
280 | oneTimeDataIndex,
281 | periodicDataIndex,
282 | });
283 |
284 | expect(index.saveObject).toHaveBeenCalledWith(clean);
285 | });
286 |
287 | it('should not add popular alternative names for non-popular packages', async () => {
288 | const client = algoliasearch('e', '');
289 | const index = client.initIndex('a');
290 | const oneTimeDataIndex = client.initIndex('a');
291 | const periodicDataIndex = client.initIndex('a');
292 | jest.spyOn(index, 'saveObject').mockImplementationOnce(() => {
293 | return true as any;
294 | });
295 |
296 | const final = {
297 | ...FINAL_BASE,
298 | name: 'reactjs',
299 | objectID: 'reactjs',
300 | tags: {
301 | latest: '1.0.0',
302 | },
303 | version: '1.0.0',
304 | versions: {
305 | '1.0.0': '2019-08-02T18:34:23.572Z',
306 | },
307 | };
308 | const clean = expect.objectContaining({
309 | ...final,
310 | jsDelivrHits: 1234,
311 | lastCrawl: expect.any(String),
312 | downloadsLast30Days: 0,
313 | downloadsRatio: 0,
314 | humanDownloadsLast30Days: '0',
315 | modified: expect.any(Number),
316 | _searchInternal: expect.objectContaining({
317 | popularAlternativeNames: [],
318 | }),
319 | });
320 |
321 | await saveDoc({
322 | formatted: formatPkg({
323 | ...preact,
324 | name: 'reactjs',
325 | 'dist-tags': { latest: '1.0.0' },
326 | versions: {
327 | '1.0.0': {
328 | ...preact.versions['8.5.0'],
329 | name: 'reactjs',
330 | version: '1.0.0',
331 | },
332 | },
333 | time: {
334 | ...preact.time,
335 | '1.0.0': '2019-08-02T18:34:23.572Z',
336 | },
337 | })!,
338 | index,
339 | periodicDataIndex,
340 | oneTimeDataIndex,
341 | });
342 |
343 | expect(index.saveObject).toHaveBeenCalledWith(clean);
344 | });
345 |
346 | it('should skip getting extra data for security held packages', async () => {
347 | const client = algoliasearch('e', '');
348 | const index = client.initIndex('a');
349 | const oneTimeDataIndex = client.initIndex('a');
350 | const periodicDataIndex = client.initIndex('a');
351 | jest.spyOn(index, 'saveObject').mockImplementationOnce(() => {
352 | return true as any;
353 | });
354 |
355 | const final = {
356 | ...FINAL_BASE,
357 | name: 'trello-enterprises',
358 | objectID: 'trello-enterprises',
359 | tags: {
360 | latest: '1000.1000.1000',
361 | },
362 | version: '1000.1000.1000',
363 | versions: {
364 | '1000.1000.1000': '2019-08-02T18:34:23.572Z',
365 | },
366 | repository: {
367 | branch: 'master',
368 | head: undefined,
369 | host: 'github.com',
370 | path: '',
371 | project: 'security-holder',
372 | type: 'git',
373 | url: 'https://github.com/npm/security-holder',
374 | user: 'npm',
375 | },
376 | githubRepo: {
377 | head: 'master',
378 | path: '',
379 | project: 'security-holder',
380 | user: 'npm',
381 | },
382 | downloadsLast30Days: 0,
383 | humanDownloadsLast30Days: '0',
384 | isSecurityHeld: true,
385 | };
386 | const clean = expect.objectContaining({
387 | ...final,
388 | owner: expect.any(Object),
389 | homepage: expect.any(String),
390 | lastCrawl: expect.any(String),
391 | downloadsRatio: expect.any(Number),
392 | modified: expect.any(Number),
393 | _searchInternal: expect.objectContaining({
394 | popularAlternativeNames: [],
395 | }),
396 | });
397 |
398 | await saveDoc({
399 | formatted: formatPkg({
400 | ...preact,
401 | name: 'trello-enterprises',
402 | 'dist-tags': { latest: '1000.1000.1000' },
403 | versions: {
404 | '1000.1000.1000': {
405 | ...preact.versions['8.5.0'],
406 | name: 'trello-enterprises',
407 | version: '1000.1000.1000',
408 | },
409 | },
410 | time: {
411 | ...preact.time,
412 | '1000.1000.1000': '2019-08-02T18:34:23.572Z',
413 | },
414 | repository: {
415 | type: 'git',
416 | url: 'https://github.com/npm/security-holder',
417 | },
418 | })!,
419 | index,
420 | oneTimeDataIndex,
421 | periodicDataIndex,
422 | });
423 |
424 | expect(index.saveObject).toHaveBeenCalledWith(clean);
425 | });
426 | });
427 |
--------------------------------------------------------------------------------
/src/algolia/index.ts:
--------------------------------------------------------------------------------
1 | import { createNodeHttpRequester } from '@algolia/requester-node-http';
2 | import type { SearchClient, SearchIndex } from 'algoliasearch';
3 | import algoliasearch from 'algoliasearch';
4 |
5 | import type { Config } from '../config';
6 | import { httpAgent, httpsAgent, USER_AGENT } from '../utils/request';
7 |
8 | export interface AlgoliaStore {
9 | mainIndex: SearchIndex;
10 | mainQueueIndex: SearchIndex;
11 | mainLostIndex: SearchIndex;
12 | mainNotFoundIndex: SearchIndex;
13 | bootstrapIndex: SearchIndex;
14 | bootstrapQueueIndex: SearchIndex;
15 | bootstrapLostIndex: SearchIndex;
16 | bootstrapNotFoundIndex: SearchIndex;
17 | oneTimeDataIndex: SearchIndex;
18 | periodicDataIndex: SearchIndex;
19 | client: SearchClient;
20 | }
21 |
22 | const requester = createNodeHttpRequester({
23 | agent: httpsAgent,
24 | httpAgent,
25 | httpsAgent,
26 | });
27 |
28 | function createClient({
29 | appId,
30 | apiKey,
31 | indexName,
32 | }: {
33 | appId: string;
34 | apiKey: string;
35 | indexName: string;
36 | }): { index: SearchIndex; client: SearchClient } {
37 | const client = algoliasearch(appId, apiKey, {
38 | requester,
39 | });
40 | client.addAlgoliaAgent(USER_AGENT);
41 | return {
42 | index: client.initIndex(indexName),
43 | client,
44 | };
45 | }
46 |
47 | /**
48 | * Prepare algolia for indexing.
49 | */
50 | export async function prepare(config: Config): Promise {
51 | if (!config.apiKey) {
52 | throw new Error(
53 | 'npm-search: Please provide the `apiKey` env variable and restart'
54 | );
55 | }
56 |
57 | // Get main index and boostrap algolia client
58 | const { index: mainIndex, client } = createClient(config);
59 | const { index: mainQueueIndex } = createClient({
60 | appId: config.appId,
61 | apiKey: config.apiKey,
62 | indexName: `${config.indexName}.queue`,
63 | });
64 | const { index: mainLostIndex } = createClient({
65 | appId: config.appId,
66 | apiKey: config.apiKey,
67 | indexName: `${config.indexName}.lost`,
68 | });
69 | const { index: mainNotFoundIndex } = createClient({
70 | appId: config.appId,
71 | apiKey: config.apiKey,
72 | indexName: `${config.indexName}.not-found`,
73 | });
74 | const { index: bootstrapIndex } = createClient({
75 | appId: config.appId,
76 | apiKey: config.apiKey,
77 | indexName: config.bootstrapIndexName,
78 | });
79 | const { index: bootstrapQueueIndex } = createClient({
80 | appId: config.appId,
81 | apiKey: config.apiKey,
82 | indexName: `${config.bootstrapIndexName}.queue`,
83 | });
84 | const { index: bootstrapLostIndex } = createClient({
85 | appId: config.appId,
86 | apiKey: config.apiKey,
87 | indexName: `${config.bootstrapIndexName}.lost`,
88 | });
89 | const { index: bootstrapNotFoundIndex } = createClient({
90 | appId: config.appId,
91 | apiKey: config.apiKey,
92 | indexName: `${config.bootstrapIndexName}.not-found`,
93 | });
94 | const { index: oneTimeDataIndex } = createClient({
95 | appId: config.appId,
96 | apiKey: config.apiKey,
97 | indexName: `${config.indexName}.one-time-data`,
98 | });
99 | const { index: periodicDataIndex } = createClient({
100 | appId: config.appId,
101 | apiKey: config.apiKey,
102 | indexName: `${config.indexName}.periodic-data`,
103 | });
104 |
105 | // Ensure indices exists by calling an empty setSettings()
106 | await mainIndex.setSettings({}).wait();
107 | await mainQueueIndex
108 | .setSettings({
109 | attributesForFaceting: ['isProcessed', 'retries'],
110 | })
111 | .wait();
112 | await bootstrapIndex.setSettings({}).wait();
113 | await bootstrapQueueIndex
114 | .setSettings({
115 | attributesForFaceting: ['retries'],
116 | })
117 | .wait();
118 | await mainLostIndex.setSettings({}).wait();
119 | await mainNotFoundIndex.setSettings({}).wait();
120 | await bootstrapLostIndex.setSettings({}).wait();
121 | await bootstrapNotFoundIndex.setSettings({}).wait();
122 | await oneTimeDataIndex.setSettings({}).wait();
123 | await periodicDataIndex.setSettings({}).wait();
124 |
125 | return {
126 | client,
127 | mainIndex,
128 | mainQueueIndex,
129 | mainLostIndex,
130 | mainNotFoundIndex,
131 | bootstrapIndex,
132 | bootstrapQueueIndex,
133 | bootstrapLostIndex,
134 | bootstrapNotFoundIndex,
135 | oneTimeDataIndex,
136 | periodicDataIndex,
137 | };
138 | }
139 |
140 | export async function putDefaultSettings(
141 | index: SearchIndex,
142 | config: Config
143 | ): Promise {
144 | await index.setSettings(config.indexSettings);
145 |
146 | await index.saveSynonyms(config.indexSynonyms, {
147 | replaceExistingSynonyms: true,
148 | });
149 | const { taskID } = await index.saveRules(config.indexRules, {
150 | replaceExistingRules: true,
151 | });
152 |
153 | await index.waitTask(taskID);
154 | }
155 |
--------------------------------------------------------------------------------
/src/api.ts:
--------------------------------------------------------------------------------
1 | import http from 'http';
2 |
3 | // import { datadog } from './utils/datadog';
4 | import { log } from './utils/log';
5 |
6 | // Used for health check
7 | export function createAPI(): http.Server {
8 | const server = http.createServer((_req, res) => {
9 | // datadog.check('main', datadog.CHECKS.OK);
10 | res.writeHead(200, { 'Content-Type': 'application/json' });
11 | res.end(
12 | JSON.stringify({
13 | code: 200,
14 | })
15 | );
16 | });
17 |
18 | server.listen(8000, () => {
19 | log.info(`⛑ API started on port 8000`);
20 | });
21 | return server;
22 | }
23 |
--------------------------------------------------------------------------------
/src/bootstrap.ts:
--------------------------------------------------------------------------------
1 | import { EventEmitter } from 'events';
2 |
3 | import chalk from 'chalk';
4 |
5 | import type { StateManager } from './StateManager';
6 | import type { AlgoliaStore } from './algolia';
7 | import { putDefaultSettings } from './algolia';
8 | import { config } from './config';
9 | import { MainBootstrapIndexer } from './indexers/MainBootstrapIndexer';
10 | import { OneTimeBackgroundIndexer } from './indexers/OneTimeBackgroundIndexer';
11 | import { PeriodicBackgroundIndexer } from './indexers/PeriodicBackgroundIndexer';
12 | import * as npm from './npm';
13 | import { Prefetcher } from './npm/Prefetcher';
14 | import { datadog } from './utils/datadog';
15 | import { log } from './utils/log';
16 | import * as sentry from './utils/sentry';
17 |
18 | export class Bootstrap extends EventEmitter {
19 | stateManager: StateManager;
20 | algoliaStore: AlgoliaStore;
21 | prefetcher: Prefetcher | undefined;
22 | interval: NodeJS.Timer | undefined;
23 | oneTimeIndexer: OneTimeBackgroundIndexer | undefined;
24 | periodicDataIndexer: PeriodicBackgroundIndexer | undefined;
25 | mainBootstrapIndexer: MainBootstrapIndexer | undefined;
26 |
27 | constructor(stateManager: StateManager, algoliaStore: AlgoliaStore) {
28 | super();
29 | this.stateManager = stateManager;
30 | this.algoliaStore = algoliaStore;
31 | }
32 |
33 | override on(param: 'finished', cb: () => any): this;
34 | override on(param: string, cb: () => void): this {
35 | return super.on(param, cb);
36 | }
37 |
38 | async stop(): Promise {
39 | log.info('Stopping Bootstrap...');
40 |
41 | if (this.interval) {
42 | clearInterval(this.interval);
43 | }
44 |
45 | if (this.prefetcher) {
46 | this.prefetcher.stop();
47 | await this.oneTimeIndexer!.stop();
48 | await this.periodicDataIndexer!.stop();
49 | await this.mainBootstrapIndexer!.stop();
50 | }
51 |
52 | log.info('Stopped Bootstrap gracefully');
53 | }
54 |
55 | /**
56 | * Bootstrap is the mode that goes from 0 to all the packages in NPM
57 | * In other word it is reindexing everything from scratch.
58 | *
59 | * It is useful if:
60 | * - you are starting this project for the first time
61 | * - you messed up with your Algolia index
62 | * - you lagged too much behind.
63 | *
64 | * Watch mode should/can be reliably left running for weeks/months as CouchDB is made for that.
65 | */
66 | async run(): Promise {
67 | log.info('-----');
68 | log.info('⛷ Bootstrap: starting');
69 | const state = await this.stateManager.check();
70 |
71 | await this.stateManager.save({
72 | stage: 'bootstrap',
73 | });
74 |
75 | const { seq, nbDocs: totalDocs } = await npm.getInfo();
76 | if (!state.bootstrapLastId) {
77 | // Start from 0
78 | log.info('⛷ Bootstrap: starting from the first doc');
79 | // first time this launches, we need to remember the last seq our bootstrap can trust
80 | await this.stateManager.save({ seq });
81 | await putDefaultSettings(this.algoliaStore.bootstrapIndex, config);
82 | } else {
83 | log.info('⛷ Bootstrap: starting at doc %s', state.bootstrapLastId);
84 | }
85 |
86 | log.info('-----');
87 | log.info(chalk.yellowBright`Total packages: ${totalDocs}`);
88 | log.info('-----');
89 |
90 | this.prefetcher = new Prefetcher(
91 | this.stateManager,
92 | this.algoliaStore.bootstrapQueueIndex,
93 | {
94 | nextKey: state.bootstrapLastId,
95 | }
96 | );
97 |
98 | this.oneTimeIndexer = new OneTimeBackgroundIndexer(
99 | this.algoliaStore,
100 | this.algoliaStore.bootstrapIndex
101 | );
102 |
103 | this.periodicDataIndexer = new PeriodicBackgroundIndexer(
104 | this.algoliaStore,
105 | this.algoliaStore.bootstrapIndex,
106 | this.algoliaStore.bootstrapNotFoundIndex
107 | );
108 |
109 | this.mainBootstrapIndexer = new MainBootstrapIndexer(this.algoliaStore);
110 |
111 | this.prefetcher.run();
112 | this.oneTimeIndexer.run();
113 | this.periodicDataIndexer.run();
114 | this.mainBootstrapIndexer.run();
115 |
116 | let done = 0;
117 |
118 | this.interval = setInterval(async () => {
119 | this.logProgress(done).catch(() => {});
120 |
121 | try {
122 | if (
123 | this.prefetcher!.isFinished &&
124 | (await this.mainBootstrapIndexer!.isFinished())
125 | ) {
126 | clearInterval(this.interval!);
127 | await this.afterProcessing();
128 | return;
129 | }
130 | } catch (e) {
131 | sentry.report(e);
132 | }
133 |
134 | done = 0;
135 | }, config.prefetchWaitBetweenPage);
136 | }
137 |
138 | /**
139 | * Tell if we need to execute bootstrap or not.
140 | */
141 | async isDone(): Promise {
142 | const state = await this.stateManager.check();
143 |
144 | if (state.seq && state.seq > 0 && state.bootstrapDone) {
145 | await putDefaultSettings(this.algoliaStore.mainIndex, config);
146 | log.info('⛷ Bootstrap: already done, skipping');
147 |
148 | return true;
149 | }
150 |
151 | return false;
152 | }
153 |
154 | /**
155 | * Last step after everything has been processed.
156 | */
157 | private async afterProcessing(): Promise {
158 | await this.oneTimeIndexer!.stop();
159 | await this.periodicDataIndexer!.stop();
160 | await this.mainBootstrapIndexer!.stop();
161 |
162 | await this.stateManager.save({
163 | bootstrapDone: true,
164 | bootstrapLastDone: Date.now(),
165 | });
166 |
167 | await this.moveToProduction();
168 |
169 | log.info('-----');
170 | log.info('⛷ Bootstrap: done');
171 | log.info('-----');
172 |
173 | this.emit('finished');
174 | }
175 |
176 | /**
177 | * Move algolia index to prod.
178 | */
179 | private async moveToProduction(): Promise {
180 | log.info('🚚 starting move to production');
181 |
182 | const currentState = await this.stateManager.get();
183 | // Backup current prod index
184 | await this.algoliaStore.client
185 | .copyIndex(
186 | config.indexName,
187 | `${config.indexName}.bak-${new Date().toISOString()}`
188 | )
189 | .wait();
190 |
191 | // Replace prod with bootstrap
192 | await this.algoliaStore.client
193 | .copyIndex(config.bootstrapIndexName, config.indexName)
194 | .wait();
195 |
196 | // Remove bootstrap so we don't end up reusing a partial index
197 | await this.algoliaStore.bootstrapIndex.delete();
198 |
199 | await this.stateManager.save(currentState);
200 | }
201 |
202 | /**
203 | * Log approximate progress.
204 | */
205 | private async logProgress(nbDocs: number): Promise {
206 | const { nbDocs: totalDocs } = await npm.getInfo();
207 | const queueLength = await this.mainBootstrapIndexer!.fetchQueueLength();
208 | const offset = this.prefetcher!.offset;
209 |
210 | datadog.gauge('sequence.total', totalDocs);
211 | datadog.gauge('sequence.current', offset + nbDocs);
212 | datadog.gauge('job.idleCount', queueLength);
213 |
214 | log.info(
215 | chalk.dim.italic
216 | .white`[progress] %d/%d docs queued (%s%) (~%s in queue) (%s processing; %s buffer)`,
217 | offset + nbDocs,
218 | totalDocs,
219 | ((Math.max(offset + nbDocs, 1) / totalDocs) * 100).toFixed(2),
220 | queueLength,
221 | this.mainBootstrapIndexer!.running,
222 | this.mainBootstrapIndexer!.queued
223 | );
224 | }
225 | }
226 |
--------------------------------------------------------------------------------
/src/changelog.ts:
--------------------------------------------------------------------------------
1 | import path from 'path';
2 |
3 | import { HTTPError } from 'got';
4 | import ms from 'ms';
5 | import PQueue from 'p-queue';
6 | import race from 'promise-rat-race';
7 |
8 | import type { RawPkg, Repo } from './@types/pkg';
9 | import * as jsDelivr from './jsDelivr/index';
10 | import { datadog } from './utils/datadog';
11 | import { request } from './utils/request';
12 |
13 | type ChangelogResult = {
14 | changelogFilename: string | null;
15 | };
16 |
17 | type HostObject = {
18 | name: string;
19 | queue: PQueue;
20 | buildUrl: (
21 | opts: Pick
22 | ) => string;
23 | };
24 |
25 | export const baseUrlMap = new Map();
26 |
27 | baseUrlMap.set('github.com', {
28 | name: 'github',
29 | queue: new PQueue({ intervalCap: 20, interval: 1000 }),
30 | buildUrl: ({ user, project, path: pathName, branch }): string => {
31 | return `https://raw.githubusercontent.com/${user}/${project}/${
32 | pathName ? '' : branch
33 | }${pathName.replace('/tree/', '')}`;
34 | },
35 | });
36 |
37 | baseUrlMap.set('gitlab.com', {
38 | name: 'gitlab',
39 | queue: new PQueue({ intervalCap: 10, interval: 1000 }),
40 | buildUrl: ({ user, project, path: pathName, branch }): string => {
41 | return `https://gitlab.com/${user}/${project}${
42 | pathName ? pathName.replace('tree', 'raw') : `/raw/${branch}`
43 | }`;
44 | },
45 | });
46 |
47 | baseUrlMap.set('bitbucket.org', {
48 | name: 'bitbucket',
49 | queue: new PQueue({ intervalCap: 10, interval: 1000 }),
50 | buildUrl: ({ user, project, path: pathName, branch }): string => {
51 | return `https://bitbucket.org/${user}/${project}${
52 | pathName ? pathName.replace('src', 'raw') : `/raw/${branch}`
53 | }`;
54 | },
55 | });
56 |
57 | const fileOptions = [
58 | 'CHANGELOG.md',
59 | 'ChangeLog.md',
60 | 'changelog.md',
61 | 'changelog.markdown',
62 | 'CHANGELOG',
63 | 'ChangeLog',
64 | 'changelog',
65 | 'CHANGES.md',
66 | 'changes.md',
67 | 'Changes.md',
68 | 'CHANGES',
69 | 'changes',
70 | 'Changes',
71 | 'HISTORY.md',
72 | 'history.md',
73 | 'HISTORY',
74 | 'history',
75 | 'RELEASES.md',
76 | 'RELEASES',
77 | ];
78 |
79 | // https://regex101.com/r/zU2gjr/1
80 | const fileRegex =
81 | /^(((changelogs?)|changes|history|(releases?)))((.(md|markdown))?$)/i;
82 |
83 | async function handledGot(file: string): Promise {
84 | const result = await request(file, { method: 'HEAD' });
85 |
86 | if (
87 | // bitbucket returns 200 for private repos
88 | // github returns a 404
89 | // I am unsure what gitlab does
90 | result?.redirectUrls?.find((res) =>
91 | res.startsWith('https://bitbucket.org/account/signin')
92 | )
93 | ) {
94 | throw new Error('Redirect leads to login page');
95 | }
96 | if (result.statusCode !== 200) {
97 | throw new Error('not found');
98 | }
99 |
100 | return result.url;
101 | }
102 |
103 | async function raceFromPaths(
104 | host: HostObject,
105 | files: string[]
106 | ): Promise {
107 | const start = Date.now();
108 |
109 | try {
110 | const url = await race(
111 | files.map((file) => {
112 | return host.queue.add(() => {
113 | datadog.increment(`changelogs.requests.${host.name}`);
114 | return handledGot(file);
115 | });
116 | })
117 | );
118 |
119 | datadog.increment(`changelogs.success`);
120 | return { changelogFilename: url };
121 | } catch (e) {
122 | if (
123 | e instanceof HTTPError &&
124 | (e.response.statusCode === 429 || e.response.statusCode >= 500)
125 | ) {
126 | datadog.increment(`changelogs.throttle.${host.name}`);
127 |
128 | if (!host.queue.isPaused) {
129 | host.queue.pause();
130 | setTimeout(() => host.queue.start(), ms('1 minute')).unref();
131 | }
132 | }
133 |
134 | datadog.increment(`changelogs.failure`);
135 | return { changelogFilename: null };
136 | } finally {
137 | datadog.timing('changelogs.getChangelog', Date.now() - start);
138 | }
139 | }
140 |
141 | export async function getChangelog(
142 | pkg: Pick,
143 | filelist: jsDelivr.File[]
144 | ): Promise<{
145 | changelogFilename: string | null;
146 | }> {
147 | for (const file of filelist) {
148 | const name = path.basename(file.name);
149 | if (!fileRegex.test(name)) {
150 | continue;
151 | }
152 |
153 | datadog.increment('jsdelivr.getChangelog.hit');
154 |
155 | return { changelogFilename: jsDelivr.getFullURL(pkg, file) };
156 | }
157 |
158 | datadog.increment('jsdelivr.getChangelog.miss');
159 | return { changelogFilename: null };
160 | }
161 |
162 | export async function getChangelogBackground(
163 | pkg: Pick
164 | ): Promise {
165 | const { repository } = pkg;
166 |
167 | if (!repository?.host) {
168 | return { changelogFilename: null };
169 | }
170 |
171 | const host = repository.host || '';
172 | const knownHost = baseUrlMap.get(host);
173 |
174 | // No known git hosts
175 | if (!knownHost) {
176 | return { changelogFilename: null };
177 | }
178 |
179 | const baseUrl = knownHost.buildUrl(repository);
180 | const files = fileOptions.map((file) =>
181 | [baseUrl.replace(/\/$/, ''), file].join('/')
182 | );
183 |
184 | // Brute-force from git host
185 | return raceFromPaths(knownHost, [...files]);
186 | }
187 |
--------------------------------------------------------------------------------
/src/config.ts:
--------------------------------------------------------------------------------
1 | import type { Settings, Synonym, Rule } from '@algolia/client-search';
2 | import ms from 'ms';
3 |
4 | const indexSettings: Settings = {
5 | searchableAttributes: [
6 | 'unordered(_popularName)',
7 | 'name, description, keywords',
8 | '_searchInternal.popularAlternativeNames',
9 | 'owner.name',
10 | 'owners.name',
11 | ],
12 | attributesForFaceting: [
13 | 'filterOnly(_searchInternal.popularAlternativeNames)' /* optionalFacetFilters to boost the name */,
14 | 'filterOnly(bin)',
15 | 'searchable(keywords)',
16 | 'searchable(computedKeywords)',
17 | 'searchable(owner.name)',
18 | '_oneTimeDataToUpdateAt',
19 | '_periodicDataUpdatedAt',
20 | 'deprecated',
21 | 'isDeprecated',
22 | 'isSecurityHeld',
23 | 'types.ts',
24 | 'moduleTypes',
25 | 'styleTypes',
26 | 'popular',
27 | ],
28 | customRanking: [
29 | 'desc(_downloadsMagnitude)',
30 | 'desc(_jsDelivrPopularity)',
31 | 'desc(dependents)',
32 | 'desc(downloadsLast30Days)',
33 | ],
34 | disablePrefixOnAttributes: ['owner.name', 'owners.name'],
35 | disableExactOnAttributes: ['owner.name', 'owners.name'],
36 | exactOnSingleWordQuery: 'word',
37 | ranking: [
38 | 'filters',
39 | 'typo',
40 | 'words',
41 | 'attribute',
42 | 'proximity',
43 | 'asc(isSecurityHeld)',
44 | 'asc(deprecated)',
45 | 'asc(isDeprecated)',
46 | 'asc(badPackage)',
47 | 'desc(popular)',
48 | 'exact',
49 | 'custom',
50 | ],
51 | minProximity: 5,
52 | optionalWords: ['js', 'javascript'],
53 | separatorsToIndex: '_',
54 | replaceSynonymsInHighlight: false,
55 | maxValuesPerFacet: 1000,
56 | unretrievableAttributes: ['_oneTimeDataToUpdateAt', '_periodicDataUpdatedAt'],
57 | };
58 |
59 | const indexSynonyms: Synonym[] = [
60 | {
61 | type: 'synonym',
62 | synonyms: ['_', 'underscore'],
63 | objectID: 'underscore',
64 | },
65 | {
66 | type: 'synonym',
67 | synonyms: ['a11y', 'accessibility', 'accessible'],
68 | objectID: 'a11y',
69 | },
70 | {
71 | type: 'synonym',
72 | synonyms: [
73 | 'i18n',
74 | 'internationalisation',
75 | 'internationalization',
76 | 'translation',
77 | 'translate',
78 | ],
79 | objectID: 'i18n',
80 | },
81 | {
82 | type: 'synonym',
83 | synonyms: ['k8s', 'kubernetes'],
84 | objectID: 'k8s',
85 | },
86 | ];
87 |
88 | const indexRules: Rule[] = [
89 | {
90 | objectID: 'promote-exact',
91 | description: 'promote exact matches',
92 | condition: {
93 | pattern: '{facet:_searchInternal.popularAlternativeNames}',
94 | anchoring: 'is',
95 | },
96 | consequence: {
97 | params: {
98 | automaticOptionalFacetFilters: [
99 | {
100 | facet: '_searchInternal.popularAlternativeNames',
101 | },
102 | ],
103 | },
104 | },
105 | },
106 | {
107 | condition: {
108 | pattern: 'author\\: {facet:owner.name}',
109 | anchoring: 'contains',
110 | },
111 | consequence: {
112 | params: {
113 | automaticFacetFilters: ['owner.name'],
114 | query: {
115 | remove: ['author\\:', '{facet:owner.name}'],
116 | },
117 | },
118 | },
119 | description: 'filter on author: {owner.name}',
120 | objectID: 'author-filter',
121 | },
122 | {
123 | condition: {
124 | pattern: 'owner\\: {facet:owner.name}',
125 | anchoring: 'contains',
126 | },
127 | consequence: {
128 | params: {
129 | automaticFacetFilters: ['owner.name'],
130 | query: {
131 | remove: ['owner\\:', '{facet:owner.name}'],
132 | },
133 | },
134 | },
135 | description: 'filter on owner: {owner.name}',
136 | objectID: 'owner-filter',
137 | },
138 | {
139 | condition: {
140 | pattern: 'keyword\\: {facet:keywords}',
141 | anchoring: 'contains',
142 | },
143 | consequence: {
144 | params: {
145 | automaticFacetFilters: ['keywords'],
146 | query: {
147 | remove: ['keyword\\:', '{facet:keywords}'],
148 | },
149 | },
150 | },
151 | description: 'filter on keyword: {keywords}',
152 | objectID: 'keyword-filter',
153 | },
154 | ];
155 |
156 | export const config = {
157 | npmRegistryEndpoint: 'https://replicate.npmjs.com',
158 | npmRegistryDBName: 'registry',
159 | npmDownloadsEndpoint: 'https://api.npmjs.org/downloads',
160 | npmRootEndpoint: 'https://registry.npmjs.org',
161 | jsDelivrHitsEndpoint:
162 | 'https://data.jsdelivr.com/v1/stats/packages/all?period=month&type=npm',
163 | jsDelivrPackageEndpoint: 'https://data.jsdelivr.com/v1/package/npm',
164 | typescriptTypesIndex: 'https://cdn.jsdelivr.net/npm/all-the-package-types',
165 | maxObjSize: 450000,
166 | popularDownloadsRatio: 0.005,
167 | appId: 'OFCNCOG2CU',
168 | apiKey: '',
169 | indexName: 'npm-search',
170 | bootstrapIndexName: 'npm-search-bootstrap',
171 | bootstrapConcurrency: 25,
172 | timeToRedoBootstrap: ms('30 days'),
173 | seq: undefined,
174 | indexSettings,
175 | indexSynonyms,
176 | indexRules,
177 | prefetchWaitBetweenPage: 5000,
178 | retryMax: 4,
179 | retrySkipped: ms('1 minute'),
180 | retryBackoffPow: 3,
181 | retryBackoffMax: ms('1 minute'),
182 | refreshPeriod: ms('2 minutes'),
183 | alternativeNamesNpmDownloadsThreshold: 5000,
184 | alternativeNamesJsDelivrHitsThreshold: 10000,
185 |
186 | // http
187 | defaultRequestTimeout: ms('30 seconds'),
188 |
189 | // Watch
190 | watchMaxPrefetch: 10,
191 | watchMinUnpause: 5,
192 | };
193 |
194 | export type Config = typeof config;
195 |
196 | Object.entries(process.env).forEach(([key, value]) => {
197 | if (key in config) {
198 | config[key] =
199 | typeof config[key] === 'number' ? parseInt(value!, 10) : value;
200 | }
201 | });
202 |
--------------------------------------------------------------------------------
/src/errors.ts:
--------------------------------------------------------------------------------
1 | /* eslint-disable max-classes-per-file */
2 | export class DeletedError extends Error {}
3 | export class PackageNotFoundError extends Error {}
4 |
--------------------------------------------------------------------------------
/src/formatPkg.ts:
--------------------------------------------------------------------------------
1 | /* eslint-disable complexity */
2 | import escape from 'escape-html';
3 | import gravatarUrl from 'gravatar-url';
4 | import hostedGitInfo from 'hosted-git-info';
5 | import NicePackage from 'nice-package';
6 | import numeral from 'numeral';
7 | import sizeof from 'object-sizeof';
8 | import traverse from 'traverse';
9 | import truncate from 'truncate-utf8-bytes';
10 |
11 | import type { NicePackageType } from './@types/nice-package';
12 | import type {
13 | ComputedMeta,
14 | GithubRepo,
15 | ModuleType,
16 | StyleType,
17 | Owner,
18 | RawPkg,
19 | Repo,
20 | } from './@types/pkg';
21 | import { config } from './config';
22 | import type { GetPackage, GetUser, PackageRepo } from './npm/types';
23 | import { datadog } from './utils/datadog';
24 |
25 | const defaultGravatar = 'https://www.gravatar.com/avatar/';
26 |
27 | type Subset = {
28 | name: string;
29 | include: boolean;
30 | metadata?: { schematics: string };
31 | };
32 |
33 | const registrySubsetRules: Array<(pkg: NicePackageType) => Subset> = [
34 | ({ name }): Subset => ({
35 | name: 'babel-plugin',
36 | include:
37 | name.startsWith('@babel/plugin') || name.startsWith('babel-plugin-'),
38 | }),
39 |
40 | ({ name }): Subset => ({
41 | name: 'vue-cli-plugin',
42 | include: /^(@vue\/|vue-|@[\w-]+\/vue-)cli-plugin-/.test(name),
43 | }),
44 |
45 | ({ name, keywords = [] }): Subset => ({
46 | name: 'yeoman-generator',
47 | include:
48 | name.startsWith('generator-') && keywords.includes('yeoman-generator'),
49 | }),
50 |
51 | ({ schematics = '' }): Subset => ({
52 | name: 'angular-cli-schematic',
53 | include: schematics.length > 0,
54 | metadata: { schematics },
55 | }),
56 |
57 | ({ name }): Subset => ({
58 | name: 'webpack-scaffold',
59 | include: name.startsWith('webpack-scaffold-'),
60 | }),
61 | ];
62 |
63 | export function formatPkg(pkg: GetPackage): RawPkg | undefined {
64 | const start = Date.now();
65 | // Be careful NicePackage modify the Object ref
66 | const cleaned: NicePackageType | undefined = new NicePackage(pkg);
67 | if (!cleaned?.name) {
68 | return;
69 | }
70 |
71 | if (Array.isArray(cleaned.main)) {
72 | // https://github.com/angular-ui/bootstrap-bower/issues/52
73 | cleaned.main = cleaned.main[0];
74 | }
75 |
76 | const lastPublisher = cleaned.lastPublisher
77 | ? formatUser(cleaned.lastPublisher)
78 | : null;
79 | const author = getAuthor(cleaned);
80 | const license = getLicense(cleaned);
81 |
82 | const version = cleaned.version ? cleaned.version : '0.0.0';
83 | const versions = getVersions(cleaned, pkg);
84 |
85 | let githubRepo: GithubRepo | null = null;
86 | let defaultRepository: PackageRepo | undefined;
87 |
88 | if (cleaned.repository) {
89 | let tmp = cleaned.repository;
90 | if (Array.isArray(tmp) && tmp.length) {
91 | tmp = tmp[0] as PackageRepo;
92 | }
93 |
94 | if (typeof tmp === 'string') {
95 | defaultRepository = { type: 'git', url: tmp };
96 | } else if (Object.keys(tmp).length > 0) {
97 | defaultRepository = tmp as PackageRepo;
98 | }
99 |
100 | // At this point, we are not even sure the source is correct
101 | if (
102 | defaultRepository &&
103 | (!defaultRepository.type || !defaultRepository.url)
104 | ) {
105 | defaultRepository = undefined;
106 | }
107 |
108 | if (defaultRepository) {
109 | githubRepo = getGitHubRepoInfo({
110 | repository: defaultRepository,
111 | gitHead: cleaned.gitHead,
112 | });
113 | }
114 | }
115 |
116 | if (!githubRepo && !lastPublisher && !author) {
117 | return; // ignore this package, we cannot link it to anyone
118 | }
119 |
120 | const repoInfo = getRepositoryInfo(defaultRepository);
121 | // If defaultRepository is undefined or it does not have an URL
122 | // we don't include it.
123 | const repository: Repo | null =
124 | defaultRepository?.url && repoInfo
125 | ? {
126 | ...defaultRepository, // Default info: type, url
127 | ...repoInfo, // Extra info: host, project, user...
128 | head: cleaned.gitHead,
129 | branch: cleaned.gitHead || 'master',
130 | }
131 | : null;
132 |
133 | const types = getTypes(cleaned);
134 |
135 | const owner = getOwner({ repository, lastPublisher, author }); // always favor the repository owner
136 | const { computedKeywords, computedMetadata } = getComputedData(cleaned);
137 | const keywords = getKeywords(cleaned);
138 |
139 | const dependencies = cleaned.dependencies || {};
140 | const devDependencies = cleaned.devDependencies || {};
141 | const alternativeNames = getAlternativeNames(cleaned.name);
142 | const moduleTypes = getModuleTypes(cleaned);
143 | const styleTypes = getStyleTypes(cleaned);
144 |
145 | const tags = pkg['dist-tags'];
146 | const isDeprecated =
147 | cleaned.deprecated !== undefined && cleaned.deprecated !== false;
148 | const isSecurityHeld =
149 | repository?.host === 'github.com' &&
150 | repository?.user === 'npm' &&
151 | repository?.project === 'security-holder';
152 |
153 | const rawPkg: RawPkg = {
154 | objectID: cleaned.name,
155 | rev: cleaned.other._rev,
156 | name: cleaned.name,
157 | downloadsLast30Days: 0,
158 | downloadsRatio: 0,
159 | humanDownloadsLast30Days: numeral(0).format('0.[0]a'),
160 | jsDelivrHits: 0,
161 | popular: false,
162 | version,
163 | versions,
164 | tags,
165 | description: cleaned.description ? cleaned.description : null,
166 | dependencies,
167 | devDependencies,
168 | originalAuthor: cleaned.other.author,
169 | repository,
170 | githubRepo,
171 | gitHead: githubRepo ? githubRepo.head : null, // remove this when we update to the new schema frontend
172 | readme: pkg.readme,
173 | owner,
174 | deprecated: isDeprecated ? cleaned.deprecated! : false,
175 | isDeprecated,
176 | deprecatedReason: isDeprecated ? String(cleaned.deprecated) : null,
177 | isSecurityHeld,
178 | homepage: getHomePage(cleaned),
179 | license,
180 | keywords,
181 | computedKeywords,
182 | computedMetadata,
183 | created: Date.parse(cleaned.created),
184 | modified: Date.parse(cleaned.modified),
185 | lastPublisher,
186 | owners: (cleaned.owners || []).map(formatUser),
187 | bin: cleaned.bin || {},
188 | humanDependents: '0',
189 | dependents: 0,
190 | types,
191 | moduleTypes,
192 | styleTypes,
193 | changelogFilename: null,
194 | lastCrawl: new Date().toISOString(),
195 | _revision: Date.now(),
196 | _searchInternal: {
197 | alternativeNames,
198 | popularAlternativeNames: [],
199 | },
200 | };
201 |
202 | const truncated = truncatePackage(rawPkg);
203 |
204 | const escaped = traverse(truncated).forEach(maybeEscape);
205 |
206 | datadog.timing('formatPkg', Date.now() - start);
207 | return escaped;
208 | }
209 |
210 | function checkSize(pkg: RawPkg): {
211 | size: number;
212 | diff: number;
213 | isTooBig: boolean;
214 | } {
215 | const size = sizeof(pkg);
216 | const diff = size - config.maxObjSize;
217 |
218 | return {
219 | size,
220 | diff,
221 | isTooBig: diff > 0,
222 | };
223 | }
224 |
225 | function truncatePackage(pkg: RawPkg): RawPkg | undefined {
226 | const smallerPkg = { ...pkg };
227 |
228 | {
229 | const { diff, isTooBig } = checkSize(smallerPkg);
230 | if (isTooBig && pkg.readme) {
231 | const postfix = ' **TRUNCATED**';
232 | // sizeof is * 2 what truncate expects
233 | const maxReadmeLength = (sizeof(pkg.readme) - diff - sizeof(postfix)) / 2;
234 |
235 | smallerPkg.readme = truncate(pkg.readme, maxReadmeLength) + postfix;
236 | }
237 | }
238 |
239 | {
240 | const { isTooBig } = checkSize(smallerPkg);
241 | if (isTooBig) {
242 | smallerPkg.readme =
243 | '** TRUNCATED ** this package was too big, so non-essential information was removed';
244 | smallerPkg.versions = pkg.versions[pkg.version]
245 | ? {
246 | [pkg.version]: pkg.versions[pkg.version]!,
247 | }
248 | : {};
249 | smallerPkg.tags = pkg?.tags?.latest
250 | ? {
251 | latest: pkg.tags.latest,
252 | }
253 | : {};
254 | smallerPkg.owners = smallerPkg.owner ? [smallerPkg.owner] : [];
255 | }
256 | }
257 |
258 | // This modify the type without warning,
259 | // {
260 | // const { isTooBig } = checkSize(smallerPkg);
261 | // if (isTooBig) {
262 | // smallerPkg = {
263 | // name: smallerPkg.name,
264 | // readme: smallerPkg.readme,
265 | // };
266 | // }
267 | // }
268 |
269 | {
270 | const { isTooBig } = checkSize(smallerPkg);
271 | if (isTooBig) {
272 | return;
273 | }
274 | }
275 |
276 | return smallerPkg;
277 | }
278 |
279 | function maybeEscape(this: any, node: any): void {
280 | if (this.isLeaf && typeof node === 'string') {
281 | if (this.key === 'readme') {
282 | this.update(node);
283 | } else {
284 | this.update(escape(node));
285 | }
286 | }
287 | }
288 |
289 | function getAuthor(cleaned: NicePackageType): Owner | null {
290 | if (cleaned.other.author && typeof cleaned.other.author === 'object') {
291 | return formatUser(cleaned.other.author);
292 | }
293 | if (Array.isArray(cleaned.owners) && typeof cleaned.owners[0] === 'object') {
294 | return formatUser(cleaned.owners[0]);
295 | }
296 | return null;
297 | }
298 |
299 | function getLicense(cleaned: NicePackageType): string | null {
300 | if (!cleaned.license) {
301 | return null;
302 | }
303 | if (
304 | typeof cleaned.license === 'object' &&
305 | typeof cleaned.license.type === 'string'
306 | ) {
307 | return cleaned.license.type;
308 | }
309 |
310 | if (typeof cleaned.license === 'string') {
311 | return cleaned.license;
312 | }
313 | return null;
314 | }
315 |
316 | function getOwner({
317 | repository,
318 | lastPublisher,
319 | author,
320 | }: {
321 | repository: RawPkg['repository'] | null;
322 | lastPublisher: RawPkg['lastPublisher'] | null;
323 | author: NicePackageType['other']['author'] | null;
324 | }): Owner | null {
325 | if (repository?.user) {
326 | const { user } = repository;
327 |
328 | if (repository.host === 'github.com') {
329 | return {
330 | name: user,
331 | avatar: `https://github.com/${user}.png`,
332 | link: `https://github.com/${user}`,
333 | };
334 | }
335 |
336 | if (repository.host === 'gitlab.com') {
337 | return {
338 | name: user,
339 | avatar: lastPublisher?.avatar,
340 | link: `https://gitlab.com/${user}`,
341 | };
342 | }
343 |
344 | if (repository.host === 'bitbucket.org') {
345 | return {
346 | name: user,
347 | avatar: `https://bitbucket.org/account/${user}/avatar`,
348 | link: `https://bitbucket.org/${user}`,
349 | };
350 | }
351 | }
352 |
353 | if (lastPublisher) {
354 | return lastPublisher;
355 | }
356 |
357 | return author || null;
358 | }
359 |
360 | function getGravatar(user: GetUser): string {
361 | if (
362 | !user.email ||
363 | typeof user.email !== 'string' ||
364 | user.email.indexOf('@') === -1
365 | ) {
366 | return defaultGravatar;
367 | }
368 |
369 | return gravatarUrl(user.email);
370 | }
371 |
372 | export function getVersions(
373 | cleaned: Pick,
374 | rawPkg: Pick
375 | ): Record {
376 | if (cleaned?.other?.time) {
377 | const realVersions = Object.keys(rawPkg.versions);
378 |
379 | return Object.fromEntries(
380 | Object.entries(cleaned.other.time).filter(([key]) =>
381 | realVersions.includes(key)
382 | )
383 | );
384 | }
385 | return {};
386 | }
387 |
388 | function getComputedData(cleaned: NicePackageType): ComputedMeta {
389 | const res: ComputedMeta = { computedKeywords: [], computedMetadata: {} };
390 | registrySubsetRules.forEach((matcher) => {
391 | const { include, metadata, name } = matcher(cleaned);
392 | if (!include) {
393 | return;
394 | }
395 | res.computedKeywords.push(name);
396 | res.computedMetadata = {
397 | ...res.computedMetadata,
398 | ...metadata,
399 | };
400 | });
401 | return res;
402 | }
403 |
404 | function getKeywords(cleaned: NicePackageType): string[] {
405 | if (cleaned.keywords) {
406 | if (Array.isArray(cleaned.keywords)) {
407 | return [...cleaned.keywords];
408 | }
409 | if (typeof cleaned.keywords === 'string') {
410 | return [cleaned.keywords];
411 | }
412 | }
413 | return [];
414 | }
415 |
416 | function getGitHubRepoInfo({
417 | repository,
418 | gitHead = 'master',
419 | }: {
420 | repository: PackageRepo;
421 | gitHead?: string;
422 | }): GithubRepo | null {
423 | const result = repository.url.match(
424 | /^https:\/\/(?:www\.)?github.com\/([^/]+)\/([^/]+)(\/.+)?$/
425 | );
426 |
427 | if (!result) {
428 | return null;
429 | }
430 |
431 | if (result.length < 3) {
432 | return null;
433 | }
434 |
435 | const head = gitHead;
436 | const [, user, project, path = ''] = result;
437 |
438 | return {
439 | user: user!,
440 | project: project!,
441 | path,
442 | head,
443 | };
444 | }
445 |
446 | function getHomePage(pkg: NicePackageType): string | null {
447 | if (
448 | pkg.homepage &&
449 | typeof pkg.homepage === 'string' && // if there's a homepage
450 | (!pkg.repository || // and there's no repo,
451 | typeof pkg.repository !== 'string' || // or repo is not a string
452 | pkg.homepage.indexOf(pkg.repository) < 0) // or repo is different than homepage
453 | ) {
454 | return pkg.homepage; // then we consider it a valuable homepage
455 | }
456 |
457 | return null;
458 | }
459 |
460 | /**
461 | * Get info from urls like this: (has multiple packages in one repo, like babel does)
462 | * https://github.com/babel/babel/tree/master/packages/babel
463 | * https://gitlab.com/user/repo/tree/master/packages/project1
464 | * https://bitbucket.org/user/repo/src/ae8df4cd0e809a789e3f96fd114075191c0d5c8b/packages/project1/.
465 | *
466 | * This function is like getGitHubRepoInfo (above), but support github, gitlab and bitbucket.
467 | */
468 | function getRepositoryInfoFromHttpUrl(repository: string): Repo | null {
469 | const result = repository.match(
470 | /^https?:\/\/(?:www\.)?((?:github|gitlab|bitbucket)).((?:com|org))\/([^/]+)\/([^/]+)(\/.+)?$/
471 | );
472 |
473 | if (!result || result.length < 6) {
474 | return null;
475 | }
476 |
477 | const [, domain, domainTld, user, project, path = ''] = result;
478 |
479 | return {
480 | url: repository,
481 | host: `${domain}.${domainTld}`,
482 | user: user!,
483 | project: project!,
484 | path,
485 | };
486 | }
487 |
488 | export function getRepositoryInfo(
489 | repository: GetPackage['repository'] | string
490 | ): Repo | null {
491 | if (!repository) {
492 | return null;
493 | }
494 |
495 | const url = typeof repository === 'string' ? repository : repository.url;
496 | const path = typeof repository === 'string' ? '' : repository.directory || '';
497 |
498 | if (!url) {
499 | return null;
500 | }
501 |
502 | /**
503 | * Get information using hosted-git-info.
504 | */
505 | try {
506 | const repositoryInfo = hostedGitInfo.fromUrl(url);
507 |
508 | if (repositoryInfo) {
509 | const { project, user, domain } = repositoryInfo;
510 | return {
511 | url,
512 | project,
513 | user,
514 | host: domain,
515 | path: path.replace(/^[./]+/, ''),
516 | };
517 | }
518 | } catch {
519 | // Ignore.
520 | }
521 |
522 | /**
523 | * Unfortunately, hosted-git-info can't handle URL like this: (has path)
524 | * https://github.com/babel/babel/tree/master/packages/babel-core
525 | * so we need to do it.
526 | */
527 | const repositoryInfoFromUrl = getRepositoryInfoFromHttpUrl(url);
528 | if (!repositoryInfoFromUrl) {
529 | return null;
530 | }
531 | return {
532 | ...repositoryInfoFromUrl,
533 | path: path.replace(/^[./]+/, '') || repositoryInfoFromUrl.path,
534 | };
535 | }
536 |
537 | function formatUser(user: GetUser): Owner {
538 | return {
539 | ...user,
540 | avatar: getGravatar(user),
541 | link: `https://www.npmjs.com/~${encodeURIComponent(user.name)}`,
542 | };
543 | }
544 |
545 | function getTypes(pkg: NicePackageType): RawPkg['types'] {
546 | // The cheap and simple (+ recommended by TS) way
547 | // of adding a types section to your package.json
548 | if (pkg.types) {
549 | return { ts: 'included' };
550 | }
551 |
552 | // Older, but still works way of defining your types
553 | if (pkg.typings) {
554 | return { ts: 'included' };
555 | }
556 |
557 | return {
558 | ts: { possible: true },
559 | };
560 | }
561 |
562 | function getAlternativeNames(name: string): string[] {
563 | const alternativeNames = new Set();
564 |
565 | const concatenatedName = name.replace(/[-/@_.]+/g, '');
566 | alternativeNames.add(concatenatedName);
567 |
568 | const splitName = name.replace(/[-/@_.]+/g, ' ');
569 | alternativeNames.add(splitName);
570 |
571 | const isDotJs = name.endsWith('.js');
572 | const isJsSuffix = name.match(/\.?js$/);
573 |
574 | if (isDotJs) {
575 | alternativeNames.add(name.substring(0, name.length - 3));
576 | } else if (isJsSuffix) {
577 | alternativeNames.add(name.substring(0, name.length - 2));
578 | } else {
579 | alternativeNames.add(`${name}.js`);
580 | alternativeNames.add(`${name}js`);
581 | }
582 |
583 | alternativeNames.add(name);
584 |
585 | return Array.from(alternativeNames);
586 | }
587 |
588 | export function getMains(pkg: Pick): string[] {
589 | if (Array.isArray(pkg.main)) {
590 | // we can not deal with non-string mains for now
591 | return pkg.main.filter((main) => typeof main === 'string');
592 | }
593 | if (typeof pkg.main === 'string') {
594 | return [pkg.main];
595 | }
596 | if (typeof pkg.main === 'undefined') {
597 | return ['index.js'];
598 | }
599 | // we can not deal with non-array ||non-string mains for now
600 | return [];
601 | }
602 |
603 | export function getExportKeys(
604 | exp: NicePackageType['exports'] | string
605 | ): string[] {
606 | if (typeof exp !== 'object' || exp === null) {
607 | return [];
608 | }
609 | const keys = Object.keys(exp);
610 | const nestedKeys = keys.flatMap((key) => getExportKeys(exp[key]));
611 | return [...keys, ...nestedKeys];
612 | }
613 |
614 | const typeToModuleTypeMapping: Record<
615 | Required['type'],
616 | ModuleType
617 | > = {
618 | commonjs: 'cjs',
619 | module: 'esm',
620 | };
621 |
622 | function getModuleTypes(pkg: NicePackageType): ModuleType[] {
623 | const moduleTypes: Set = new Set();
624 |
625 | // type is declared
626 | if (pkg.type) {
627 | moduleTypes.add(typeToModuleTypeMapping[pkg.type]);
628 | }
629 |
630 | // get all explicit exports (supporting cjs in esm or other way round)
631 | // reference: https://nodejs.org/api/packages.html
632 | const exportKeys = getExportKeys(pkg.exports);
633 | if (exportKeys.includes('import')) {
634 | moduleTypes.add('esm');
635 | }
636 | if (exportKeys.includes('require')) {
637 | moduleTypes.add('cjs');
638 | }
639 |
640 | // module (non-standard) is declared
641 | if (typeof pkg.module === 'string') {
642 | moduleTypes.add('esm');
643 | }
644 |
645 | // check the extension of each of the "main" values
646 | getMains(pkg).forEach((main) => {
647 | if (main.endsWith('.mjs')) {
648 | moduleTypes.add('esm');
649 | }
650 | if (main.endsWith('.cjs')) {
651 | moduleTypes.add('cjs');
652 | }
653 | });
654 |
655 | // add a default value to make filtering possible
656 | if (moduleTypes.size === 0) {
657 | moduleTypes.add('unknown');
658 | }
659 |
660 | return [...moduleTypes];
661 | }
662 |
663 | function getStyleTypes(pkg: NicePackageType): StyleType[] {
664 | // style not declared - we will detect it later based on file list
665 | if (typeof pkg.style !== 'string') {
666 | return [];
667 | }
668 |
669 | const ext = pkg.style.split('.').pop();
670 |
671 | return ext ? [ext.toLowerCase()] : [];
672 | }
673 |
--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
1 | /* eslint-disable no-process-exit */
2 |
3 | import 'elastic-apm-node/start';
4 |
5 | import type http from 'http';
6 |
7 | import ms from 'ms';
8 |
9 | import { version } from '../package.json';
10 |
11 | import { StateManager } from './StateManager';
12 | import * as algolia from './algolia/index';
13 | import { createAPI } from './api';
14 | import { Bootstrap } from './bootstrap';
15 | import { config } from './config';
16 | import * as jsDelivr from './jsDelivr/index';
17 | import * as npm from './npm/index';
18 | import * as typescript from './typescript/index';
19 | import { datadog } from './utils/datadog';
20 | import { log } from './utils/log';
21 | import * as sentry from './utils/sentry';
22 | import { Watch } from './watch';
23 |
24 | const KILL_PROCESS_EVERY_MS = ms('4 hours');
25 |
26 | class Main {
27 | bootstrap: Bootstrap | undefined;
28 | watch: Watch | undefined;
29 | healthApi: http.Server | undefined;
30 |
31 | async preload(): Promise {
32 | await Promise.all([
33 | jsDelivr.loadHits(),
34 | npm.loadTotalDownloads(),
35 | typescript.loadTypesIndex(),
36 | ]);
37 | }
38 |
39 | async run(): Promise {
40 | log.info('🗿 npm ↔️ Algolia replication starts ⛷ 🐌 🛰', { version });
41 | let start = Date.now();
42 |
43 | // We schedule to kill the process:
44 | // - reset cache
45 | // - maybe retrigger bootstrap
46 | setTimeout(() => {
47 | log.info('👋 Scheduled process cleaning');
48 | close();
49 | }, KILL_PROCESS_EVERY_MS).unref();
50 |
51 | this.healthApi = createAPI();
52 |
53 | // first we make sure the bootstrap index has the correct settings
54 | start = Date.now();
55 |
56 | log.info('💪 Setting up Algolia', config.appId, [
57 | config.bootstrapIndexName,
58 | config.indexName,
59 | ]);
60 | const algoliaStore = await algolia.prepare(config);
61 | datadog.timing('main.init_algolia', Date.now() - start);
62 |
63 | // Create State Manager that holds progression of indexing
64 | const stateManager = new StateManager(algoliaStore.mainIndex);
65 |
66 | const scheduleRefresh = (delay = ms('1 hour')): void => {
67 | setTimeout(() => {
68 | this.preload()
69 | .then(() => {
70 | scheduleRefresh();
71 | })
72 | .catch(() => {
73 | scheduleRefresh(ms('1 minute'));
74 | });
75 | }, delay).unref();
76 | };
77 |
78 | // Preload some useful data
79 | await this.preload();
80 | scheduleRefresh();
81 |
82 | this.bootstrap = new Bootstrap(stateManager, algoliaStore);
83 | this.watch = new Watch(stateManager, algoliaStore);
84 |
85 | if (!(await this.bootstrap.isDone())) {
86 | this.bootstrap.on('finished', async () => {
87 | await this.watch!.run();
88 | });
89 |
90 | // then we run the bootstrap
91 | // after a bootstrap is done, it's moved to main (with settings)
92 | // if it was already finished, we will set the settings on the main index
93 | await this.bootstrap.run();
94 | } else {
95 | await this.watch.run();
96 | }
97 | }
98 |
99 | async stop(): Promise {
100 | if (this.bootstrap) {
101 | await this.bootstrap.stop();
102 | }
103 | if (this.watch) {
104 | await this.watch.stop();
105 | }
106 | if (this.healthApi) {
107 | await new Promise((resolve) => {
108 | this.healthApi!.close(resolve);
109 | });
110 | }
111 | log.info('Stopped Main gracefully');
112 | }
113 | }
114 |
115 | const main = new Main();
116 |
117 | process.on('unhandledRejection', (err) => {
118 | sentry.report(new Error('unhandledRejection'), { err });
119 | close();
120 | });
121 | process.on('uncaughtException', (err) => {
122 | sentry.report(new Error('uncauthexception'), { err });
123 | });
124 |
125 | (async (): Promise => {
126 | try {
127 | await main.run();
128 | } catch (err) {
129 | sentry.report(new Error('Error during run'), { err });
130 | close();
131 | }
132 | })();
133 |
134 | async function close(): Promise {
135 | log.info('Close was requested');
136 | setTimeout(() => {
137 | // grace period in case a lot of jobs are pending
138 | process.exit(1);
139 | }, 90000).unref();
140 |
141 | // datadog.close();
142 | await sentry.drain();
143 | await main.stop();
144 |
145 | process.nextTick(() => {
146 | process.exit(0);
147 | });
148 | }
149 |
150 | process.once('SIGINT', async () => {
151 | await close();
152 | });
153 |
154 | process.once('SIGTERM', async () => {
155 | await close();
156 | });
157 |
--------------------------------------------------------------------------------
/src/indexers/Indexer.ts:
--------------------------------------------------------------------------------
1 | import { setTimeout } from 'node:timers/promises';
2 |
3 | import type { SearchIndex } from 'algoliasearch';
4 | import chalk from 'chalk';
5 | import type { DebouncedFunc } from 'lodash';
6 | import _ from 'lodash';
7 | import ms from 'ms';
8 | import PQueue from 'p-queue';
9 |
10 | import type { AlgoliaStore } from '../algolia';
11 | import { log } from '../utils/log';
12 | import * as sentry from '../utils/sentry';
13 |
14 | export abstract class Indexer {
15 | protected mainIndex: SearchIndex;
16 | protected algoliaStore: AlgoliaStore;
17 |
18 | private recordQueue: PQueue;
19 | private recordsQueueConcurrency: number = 240;
20 |
21 | private taskQueue: PQueue;
22 | private taskQueueConcurrency: number = 120;
23 |
24 | private isRunning: boolean = false;
25 | private readonly throttledFetchFacets: DebouncedFunc<() => Promise>;
26 |
27 | protected abstract readonly facetField: string;
28 |
29 | get facetFilter(): string | undefined {
30 | return undefined;
31 | }
32 |
33 | get queued(): number {
34 | return this.taskQueue.size;
35 | }
36 |
37 | get running(): number {
38 | return this.taskQueue.pending;
39 | }
40 |
41 | constructor(algoliaStore: AlgoliaStore, mainIndex: SearchIndex) {
42 | this.mainIndex = mainIndex;
43 | this.algoliaStore = algoliaStore;
44 |
45 | this.throttledFetchFacets = _.throttle(
46 | () => this.fetchFacets().catch(() => []),
47 | ms('1 minute')
48 | );
49 |
50 | this.recordQueue = new PQueue({
51 | concurrency: this.recordsQueueConcurrency,
52 | });
53 |
54 | this.taskQueue = new PQueue({
55 | concurrency: this.taskQueueConcurrency,
56 | });
57 | }
58 |
59 | async fetchFacets(): Promise {
60 | const result = await this.mainIndex.search('', {
61 | filters: this.facetFilter,
62 | facets: [this.facetField],
63 | hitsPerPage: 0,
64 | maxValuesPerFacet: 1000,
65 | sortFacetValuesBy: 'alpha',
66 | });
67 |
68 | if (!result.facets) {
69 | log.error('Wrong results from Algolia');
70 | return [];
71 | }
72 |
73 | return Object.keys(result.facets[this.facetField] || {}).sort();
74 | }
75 |
76 | async *fetchRecords(): AsyncGenerator {
77 | const facets = await this.throttledFetchFacets();
78 |
79 | if (!facets?.length) {
80 | return [];
81 | }
82 |
83 | for (const facet of facets) {
84 | let cursor;
85 |
86 | while (this.isRunning) {
87 | // Using direct API call here because the client library doesn't allow
88 | // for asynchronous callbacks between pages.
89 | const response = await this.algoliaStore.client.customRequest({
90 | method: 'GET',
91 | path: `/1/indexes/${this.mainIndex.indexName}/browse`,
92 | data: {
93 | filters: `${this.facetFilter ? `${this.facetFilter} AND ` : ''}${
94 | this.facetField
95 | }:${facet}`,
96 | ...(cursor ? { cursor } : {}),
97 | },
98 | cacheable: false,
99 | });
100 |
101 | yield response.hits;
102 |
103 | if (!response.cursor) {
104 | break;
105 | }
106 |
107 | cursor = response.cursor;
108 | }
109 | }
110 | }
111 |
112 | async flush(): Promise {}
113 |
114 | async isFinished(): Promise {
115 | return (
116 | !this.recordQueue.size &&
117 | !this.recordQueue.pending &&
118 | !this.taskQueue.size &&
119 | !this.taskQueue.pending
120 | );
121 | }
122 |
123 | async queueTask(task: TTask): Promise {
124 | while (this.taskQueue.size > this.taskQueueConcurrency) {
125 | await setTimeout(ms('1 second'));
126 | }
127 |
128 | this.taskQueue.add(() => this.taskExecutor(task));
129 | }
130 |
131 | run(): void {
132 | this.isRunning = true;
133 |
134 | this.runInternal().catch((e) => {
135 | sentry.report(e);
136 | });
137 | }
138 |
139 | async runInternal(): Promise {
140 | try {
141 | for await (const records of this.fetchRecords()) {
142 | if (!this.isRunning) {
143 | return;
144 | }
145 |
146 | if (!records.length) {
147 | continue;
148 | }
149 |
150 | log.info(
151 | chalk.dim.italic
152 | .white`[${this.constructor.name}] %d new, %d in record queue, %d in task queue`,
153 | records.length,
154 | this.recordQueue.size,
155 | this.taskQueue.size
156 | );
157 |
158 | for (const record of records) {
159 | this.recordQueue.add(() => this.recordExecutor(record));
160 | }
161 |
162 | while (this.recordQueue.size > this.recordsQueueConcurrency) {
163 | await setTimeout(ms('1 second'));
164 | }
165 | }
166 | } catch (err) {
167 | sentry.report(new Error(`Error in ${this.constructor.name}`), { err });
168 | }
169 |
170 | await this.flush();
171 |
172 | // Minimum wait between loops.
173 | await setTimeout(ms('5 seconds'));
174 |
175 | // Finish processing all records before the next batch starts.
176 | while (
177 | this.recordQueue.size ||
178 | this.recordQueue.pending ||
179 | this.taskQueue.size ||
180 | this.taskQueue.pending
181 | ) {
182 | await setTimeout(ms('1 second'));
183 | }
184 |
185 | return this.runInternal();
186 | }
187 |
188 | async stop(force: boolean = false): Promise {
189 | this.isRunning = false;
190 |
191 | if (force) {
192 | this.recordQueue.clear();
193 | this.taskQueue.clear();
194 | }
195 |
196 | if (this.recordQueue.size || this.recordQueue.pending) {
197 | await this.recordQueue.onIdle();
198 | }
199 |
200 | if (this.recordQueue.size || this.taskQueue.pending) {
201 | await this.taskQueue.onIdle();
202 | }
203 | }
204 |
205 | abstract recordExecutor(record: TMainRecord): Promise;
206 |
207 | abstract taskExecutor(task: TTask): Promise;
208 | }
209 |
--------------------------------------------------------------------------------
/src/indexers/MainBootstrapIndexer.ts:
--------------------------------------------------------------------------------
1 | import type { AlgoliaStore } from '../algolia';
2 | import { PackageNotFoundError } from '../errors';
3 | import { formatPkg } from '../formatPkg';
4 | import * as npm from '../npm';
5 | import type { PrefetchedPkg } from '../npm/Prefetcher';
6 | import { type GetPackage } from '../npm/types';
7 | import { saveDoc } from '../saveDocs';
8 | import { datadog } from '../utils/datadog';
9 | import { log } from '../utils/log';
10 | import * as sentry from '../utils/sentry';
11 |
12 | import { MainIndexer } from './MainIndexer';
13 |
14 | type TaskType = { pkg: PrefetchedPkg; objectID: string; retries: number };
15 |
16 | export class MainBootstrapIndexer extends MainIndexer {
17 | protected facetField = 'retries';
18 |
19 | constructor(algoliaStore: AlgoliaStore) {
20 | super(algoliaStore, algoliaStore.bootstrapQueueIndex);
21 | }
22 |
23 | override async isFinished(): Promise {
24 | if (!(await super.isFinished())) {
25 | return false;
26 | }
27 |
28 | return (await this.fetchQueueLength()) === 0;
29 | }
30 |
31 | async markAsProcessed(objectID): Promise {
32 | await this.mainIndex
33 | .deleteObject(objectID)
34 | .wait()
35 | .catch(() => {});
36 | }
37 |
38 | async recordExecutor(record: TaskType): Promise {
39 | await this.queueTask(record);
40 | }
41 |
42 | async taskExecutor({ pkg, objectID, retries }): Promise {
43 | log.info(`Start:`, pkg.id, retries);
44 | const start = Date.now();
45 |
46 | try {
47 | datadog.increment('packages');
48 |
49 | let res: GetPackage;
50 |
51 | try {
52 | res = await npm.getDocFromRegistry(pkg.id);
53 | } catch (error) {
54 | if (error instanceof PackageNotFoundError) {
55 | log.warn('Package not found in the registry', error);
56 | } else {
57 | log.error('Got an error', error);
58 | }
59 |
60 | await this.markAsProcessed(objectID);
61 | return;
62 | }
63 |
64 | const formatted = formatPkg(res);
65 |
66 | if (!formatted) {
67 | log.error('Empty formatted output', pkg);
68 | await this.markAsProcessed(objectID);
69 | return;
70 | }
71 |
72 | await saveDoc({
73 | formatted,
74 | index: this.algoliaStore.bootstrapIndex,
75 | oneTimeDataIndex: this.algoliaStore.oneTimeDataIndex,
76 | periodicDataIndex: this.algoliaStore.periodicDataIndex,
77 | });
78 |
79 | await this.markAsProcessed(objectID);
80 | log.info(`Done:`, pkg.id, retries);
81 | } catch (err: any) {
82 | log.info(`Failed:`, pkg.id, retries, err.statusCode);
83 |
84 | if (err.statusCode === 404) {
85 | // Store in not-found index
86 | datadog.increment('job.notFound');
87 |
88 | await this.algoliaStore.bootstrapNotFoundIndex
89 | .saveObject({
90 | name: pkg.id,
91 | objectID: pkg.id,
92 | err: err instanceof Error ? err.toString() : err,
93 | date: new Date().toISOString(),
94 | movedBy: 'bootstrap',
95 | })
96 | .catch(() => {});
97 |
98 | await this.markAsProcessed(objectID);
99 | return;
100 | }
101 |
102 | sentry.report(new Error('Error during job'), {
103 | statusCode: err.statusCode,
104 | err,
105 | });
106 |
107 | datadog.increment('job.retries');
108 |
109 | await this.mainIndex
110 | .partialUpdateObject({
111 | objectID,
112 | retries: retries + 1,
113 | })
114 | .wait()
115 | .catch(() => {});
116 | } finally {
117 | datadog.timing('loop', Date.now() - start);
118 | }
119 | }
120 | }
121 |
--------------------------------------------------------------------------------
/src/indexers/MainIndexer.ts:
--------------------------------------------------------------------------------
1 | import { Indexer } from './Indexer';
2 |
3 | export abstract class MainIndexer extends Indexer {
4 | async fetchQueueLength(): Promise {
5 | const { nbHits } = await this.mainIndex.search('', {
6 | filters: this.facetFilter,
7 | });
8 |
9 | return nbHits;
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/src/indexers/MainWatchIndexer.ts:
--------------------------------------------------------------------------------
1 | import ms from 'ms';
2 | import type { DatabaseChangesResultItem } from 'nano';
3 |
4 | import type { AlgoliaStore } from '../algolia';
5 | import { PackageNotFoundError } from '../errors';
6 | import { formatPkg } from '../formatPkg';
7 | import * as npm from '../npm';
8 | import type { GetPackage } from '../npm/types';
9 | import { saveDoc } from '../saveDocs';
10 | import { datadog } from '../utils/datadog';
11 | import { log } from '../utils/log';
12 | import * as sentry from '../utils/sentry';
13 |
14 | import { MainIndexer } from './MainIndexer';
15 |
16 | type TaskType = {
17 | seq: number;
18 | name: string;
19 | objectID: string;
20 | retries: number;
21 | change: DatabaseChangesResultItem;
22 | };
23 |
24 | export class MainWatchIndexer extends MainIndexer {
25 | protected facetField = 'retries';
26 | protected cleanupInterval: NodeJS.Timer | undefined;
27 |
28 | override get facetFilter(): string {
29 | return 'NOT isProcessed:1';
30 | }
31 |
32 | constructor(algoliaStore: AlgoliaStore) {
33 | super(algoliaStore, algoliaStore.mainQueueIndex);
34 | }
35 |
36 | async markAsProcessed(objectID, seq): Promise {
37 | await this.mainIndex
38 | .partialUpdateObject({
39 | objectID,
40 | isProcessed: 1,
41 | seq: { _operation: 'IncrementFrom', value: seq },
42 | })
43 | .wait()
44 | .catch(() => {});
45 | }
46 |
47 | async recordExecutor(record: TaskType): Promise {
48 | await this.queueTask(record);
49 | }
50 |
51 | override run(): void {
52 | this.cleanupInterval = setInterval(() => {
53 | this.mainIndex
54 | .deleteBy({
55 | filters: 'isProcessed:1',
56 | })
57 | .catch((e) => sentry.report(e));
58 | }, ms('1 minute'));
59 |
60 | super.run();
61 | }
62 |
63 | override async stop(force: boolean = false): Promise {
64 | clearInterval(this.cleanupInterval);
65 | return super.stop(force);
66 | }
67 |
68 | async taskExecutor({
69 | seq,
70 | objectID,
71 | retries,
72 | change,
73 | }: TaskType): Promise {
74 | log.info(`Start:`, change.id, retries);
75 | const start = Date.now();
76 |
77 | try {
78 | datadog.increment('packages');
79 |
80 | if (change.deleted) {
81 | await this.algoliaStore.mainIndex.deleteObject(change.id);
82 | } else {
83 | if (change.changes.length <= 0) {
84 | log.error('Document without change');
85 | await this.markAsProcessed(objectID, seq);
86 | return;
87 | }
88 |
89 | let res: GetPackage;
90 |
91 | try {
92 | res = await npm.getDocFromRegistry(change.id);
93 | } catch (error) {
94 | if (error instanceof PackageNotFoundError) {
95 | log.warn('Package not found in the registry', error);
96 | } else {
97 | log.error('Got an error', error);
98 | }
99 |
100 | await this.markAsProcessed(objectID, seq);
101 | return;
102 | }
103 |
104 | const formatted = formatPkg(res);
105 |
106 | if (!formatted) {
107 | await this.markAsProcessed(objectID, seq);
108 | return;
109 | }
110 |
111 | await saveDoc({
112 | formatted,
113 | index: this.algoliaStore.mainIndex,
114 | oneTimeDataIndex: this.algoliaStore.oneTimeDataIndex,
115 | periodicDataIndex: this.algoliaStore.periodicDataIndex,
116 | });
117 | }
118 |
119 | await this.markAsProcessed(objectID, seq);
120 | log.info(`Done:`, change.id, retries);
121 | } catch (err: any) {
122 | log.info(`Failed:`, change.id, retries, err.statusCode);
123 |
124 | if (err.statusCode === 404) {
125 | // Store in not-found index
126 | datadog.increment('job.notFound');
127 |
128 | await this.algoliaStore.mainNotFoundIndex
129 | .saveObject({
130 | name: change.id,
131 | objectID: change.id,
132 | err: err instanceof Error ? err.toString() : err,
133 | date: new Date().toISOString(),
134 | movedBy: 'watch',
135 | })
136 | .catch(() => {});
137 |
138 | await this.markAsProcessed(objectID, seq);
139 | return;
140 | }
141 |
142 | sentry.report(new Error('Error during job'), {
143 | statusCode: err.statusCode,
144 | err,
145 | });
146 |
147 | datadog.increment('job.retries');
148 |
149 | await this.mainIndex
150 | .partialUpdateObject({
151 | objectID,
152 | retries: retries + 1,
153 | })
154 | .wait()
155 | .catch(() => {});
156 | } finally {
157 | datadog.timing('loop', Date.now() - start);
158 | }
159 | }
160 | }
161 |
--------------------------------------------------------------------------------
/src/indexers/OneTimeBackgroundIndexer.ts:
--------------------------------------------------------------------------------
1 | import ms from 'ms';
2 |
3 | import type { FinalPkg } from '../@types/pkg';
4 | import { getChangelogBackground } from '../changelog';
5 | import { getFileListMetadata } from '../saveDocs';
6 | import { datadog } from '../utils/datadog';
7 | import * as sentry from '../utils/sentry';
8 | import { offsetToTimestamp } from '../utils/time';
9 |
10 | import { Indexer } from './Indexer';
11 |
12 | export type OneTimeDataObject = {
13 | name: string;
14 | objectID: string;
15 | updatedAt: string;
16 | changelogFilename: string | null;
17 | };
18 |
19 | export class OneTimeBackgroundIndexer extends Indexer {
20 | protected readonly facetField: string = '_oneTimeDataToUpdateAt';
21 |
22 | override get facetFilter(): string {
23 | const expired = offsetToTimestamp(0);
24 |
25 | // 0 === already processed
26 | // value in the future === errored and scheduled to retry later
27 | return `NOT ${this.facetField}:0 AND ${this.facetField} <= ${expired}`;
28 | }
29 |
30 | async patchObject(
31 | pkg: FinalPkg,
32 | patch: Partial,
33 | facetValue: number
34 | ): Promise {
35 | await this.mainIndex
36 | .partialUpdateObject(
37 | {
38 | objectID: pkg.objectID,
39 | ...patch,
40 | [this.facetField]: facetValue,
41 | _revision: { _operation: 'IncrementFrom', value: pkg._revision },
42 | },
43 | { createIfNotExists: false }
44 | )
45 | .wait();
46 | }
47 |
48 | async recordExecutor(pkg: FinalPkg): Promise {
49 | await this.queueTask(pkg);
50 | }
51 |
52 | override async stop(): Promise {
53 | return super.stop(true);
54 | }
55 |
56 | async taskExecutor(pkg: FinalPkg): Promise {
57 | try {
58 | const { metadata } = await getFileListMetadata(pkg);
59 | const { changelogFilename } = metadata.changelogFilename
60 | ? metadata
61 | : await getChangelogBackground(pkg);
62 |
63 | const data = {
64 | name: `${pkg.name}@${pkg.version}`,
65 | objectID: `${pkg.name}@${pkg.version}`,
66 | updatedAt: new Date().toISOString(),
67 | changelogFilename,
68 | };
69 |
70 | await Promise.all([
71 | this.algoliaStore.oneTimeDataIndex.saveObject(data),
72 | this.patchObject(
73 | pkg,
74 | {
75 | ...metadata,
76 | changelogFilename,
77 | },
78 | 0
79 | ),
80 | ]);
81 |
82 | datadog.increment('oneTimeDataIndex.success');
83 | } catch (err) {
84 | datadog.increment('oneTimeDataIndex.failure');
85 | sentry.report(new Error(`Error in ${this.constructor.name}`), { err });
86 |
87 | await this.patchObject(
88 | pkg,
89 | {},
90 | offsetToTimestamp(ms('1 week'), new Date(pkg[this.facetField]))
91 | ).catch(() => {});
92 | }
93 | }
94 | }
95 |
--------------------------------------------------------------------------------
/src/indexers/PeriodicBackgroundIndexer.ts:
--------------------------------------------------------------------------------
1 | import type { SearchIndex } from 'algoliasearch';
2 | import Bluebird from 'bluebird';
3 | import ms from 'ms';
4 |
5 | import type { FinalPkg } from '../@types/pkg';
6 | import type { AlgoliaStore } from '../algolia';
7 | import { PackageNotFoundError } from '../errors';
8 | import * as jsDelivr from '../jsDelivr';
9 | import type { DownloadsData } from '../npm';
10 | import { computeDownload, getDocFromRegistry, getDownloads } from '../npm';
11 | import { getPopularAlternativeNames } from '../saveDocs';
12 | import { datadog } from '../utils/datadog';
13 | import * as sentry from '../utils/sentry';
14 | import { offsetToTimestamp, round } from '../utils/time';
15 |
16 | import { Indexer } from './Indexer';
17 |
18 | export type PeriodicDataObject = DownloadsData & {
19 | name: string;
20 | objectID: string;
21 | updatedAt: string;
22 | };
23 |
24 | type Task = { pkg: FinalPkg[] };
25 |
26 | export class PeriodicBackgroundIndexer extends Indexer {
27 | protected readonly facetField: string = '_periodicDataUpdatedAt';
28 | private packagesPerBatch: number = 127;
29 | private unscopedPackages: FinalPkg[];
30 | private notFoundIndex: SearchIndex;
31 |
32 | override get facetFilter(): string {
33 | const expired = offsetToTimestamp(-ms('30 days'));
34 | return `${this.facetField} < ${expired}`;
35 | }
36 |
37 | constructor(
38 | algoliaStore: AlgoliaStore,
39 | mainIndex: SearchIndex,
40 | notFoundIndex: SearchIndex
41 | ) {
42 | super(algoliaStore, mainIndex);
43 |
44 | this.notFoundIndex = notFoundIndex;
45 | this.unscopedPackages = [];
46 | }
47 |
48 | override async flush(): Promise {
49 | while (this.unscopedPackages.length) {
50 | await this.queueTask({
51 | pkg: this.unscopedPackages.splice(0, this.packagesPerBatch),
52 | });
53 | }
54 |
55 | return super.flush();
56 | }
57 |
58 | async recordExecutor(pkg: FinalPkg): Promise {
59 | if (pkg.objectID.startsWith('@')) {
60 | await this.queueTask({ pkg: [pkg] });
61 | return;
62 | }
63 |
64 | if (!this.unscopedPackages.find((p) => p.name === pkg.name)) {
65 | this.unscopedPackages.push(pkg);
66 | }
67 |
68 | if (this.unscopedPackages.length >= this.packagesPerBatch) {
69 | await this.queueTask({
70 | pkg: this.unscopedPackages.splice(0, this.packagesPerBatch),
71 | });
72 | }
73 | }
74 |
75 | override async stop(): Promise {
76 | return super.stop(true);
77 | }
78 |
79 | async taskExecutor(task: Task): Promise {
80 | try {
81 | const downloads = await getDownloads(task.pkg);
82 | const oneWeekAgo = offsetToTimestamp(-ms('1 week'));
83 | const dataIndexObjects: PeriodicDataObject[] = [];
84 | const patches: Array> = [];
85 |
86 | await Bluebird.map(
87 | task.pkg,
88 | async (pkg) => {
89 | const data: PeriodicDataObject = {
90 | name: pkg.name,
91 | objectID: pkg.name,
92 | updatedAt: new Date().toISOString(),
93 | totalNpmDownloads: downloads[pkg.name]?.totalNpmDownloads,
94 | packageNpmDownloads: downloads[pkg.name]?.packageNpmDownloads,
95 | };
96 |
97 | dataIndexObjects.push(data);
98 |
99 | // The npm replicate API often incorrectly reports packages there were
100 | // actually deleted from the registry. If the downloads API has no
101 | // records for the package, and the package was published more than
102 | // a while ago, we check with the registry. If the registry says the
103 | // package does not exist, we delete it.
104 | if (
105 | data.packageNpmDownloads === undefined &&
106 | pkg.created < oneWeekAgo
107 | ) {
108 | try {
109 | await getDocFromRegistry(pkg.name);
110 | } catch (e) {
111 | if (e instanceof PackageNotFoundError) {
112 | datadog.increment('periodic.notFound');
113 |
114 | await this.notFoundIndex.saveObject({
115 | name: pkg.name,
116 | objectID: pkg.name,
117 | date: new Date().toISOString(),
118 | movedBy: 'periodicIndexer',
119 | });
120 |
121 | await this.algoliaStore.periodicDataIndex.deleteObject(
122 | pkg.name
123 | );
124 |
125 | await this.mainIndex.deleteObject(pkg.name).wait();
126 | return;
127 | }
128 | }
129 | }
130 |
131 | const npmDownloads = computeDownload(
132 | pkg,
133 | data.packageNpmDownloads,
134 | data.totalNpmDownloads
135 | );
136 |
137 | const jsDelivrHits = jsDelivr.getHit(pkg);
138 | const pkgPatch = {
139 | objectID: pkg.objectID,
140 | ...(npmDownloads || {}),
141 | ...jsDelivrHits,
142 | popular: npmDownloads?.popular || jsDelivrHits.popular,
143 | };
144 |
145 | patches.push({
146 | ...pkgPatch,
147 | _searchInternal: {
148 | ...pkg._searchInternal,
149 | popularAlternativeNames: getPopularAlternativeNames({
150 | ...pkg,
151 | ...pkgPatch,
152 | }),
153 | },
154 | [this.facetField]: round(new Date(data.updatedAt)).valueOf(),
155 | });
156 | },
157 | { concurrency: 20 }
158 | );
159 |
160 | await Promise.all([
161 | this.algoliaStore.periodicDataIndex.saveObjects(dataIndexObjects),
162 | this.mainIndex.partialUpdateObjects(patches).wait(),
163 | ]);
164 |
165 | datadog.increment('periodicDataIndex.success', task.pkg.length);
166 | } catch (err) {
167 | datadog.increment('periodicDataIndex.failure', task.pkg.length);
168 | sentry.report(new Error(`Error in ${this.constructor.name}`), { err });
169 |
170 | await this.mainIndex
171 | .partialUpdateObjects(
172 | task.pkg.map((pkg) => {
173 | return {
174 | objectID: pkg.objectID,
175 | [this.facetField]: offsetToTimestamp(
176 | ms('1 day'),
177 | new Date(pkg[this.facetField])
178 | ),
179 | };
180 | })
181 | )
182 | .wait()
183 | .catch(() => {});
184 | }
185 | }
186 | }
187 |
--------------------------------------------------------------------------------
/src/jsDelivr/__test__/__snapshots__/index.test.ts.snap:
--------------------------------------------------------------------------------
1 | // Jest Snapshot v1, https://goo.gl/fbAQLP
2 |
3 | exports[`files getFilesList() should get a flat list of files 1`] = `
4 | Array [
5 | Object {
6 | "hash": "+uxmYs/88pYWmLwFS3M54NGjE+hX6sBmwNOpzaW1LGk=",
7 | "name": "/bin/jest.js",
8 | "size": 343,
9 | "time": "1985-10-26T08:15:00.000Z",
10 | },
11 | Object {
12 | "hash": "MvOGr1Lc6r8wEe8GNmscm3Sx/QWEFE4Is1AZ5rQzFr8=",
13 | "name": "/build/jest.d.ts",
14 | "size": 291,
15 | "time": "1985-10-26T08:15:00.000Z",
16 | },
17 | Object {
18 | "hash": "BEQ5sRqArzHCh5sNbwjxHRQunhxkCD1HXcM9EdYAKPc=",
19 | "name": "/build/jest.d.ts.map",
20 | "size": 171,
21 | "time": "1985-10-26T08:15:00.000Z",
22 | },
23 | Object {
24 | "hash": "m5wVGuVr5Pq4z5L2vpeMVA3rbLV4kQ0MCPuo0newsmY=",
25 | "name": "/build/jest.js",
26 | "size": 1030,
27 | "time": "1985-10-26T08:15:00.000Z",
28 | },
29 | Object {
30 | "hash": "m/vOMvpK2FU19W9PYavnEExEToN7HHU1mb/f/ooU3eQ=",
31 | "name": "/LICENSE",
32 | "size": 1099,
33 | "time": "1985-10-26T08:15:00.000Z",
34 | },
35 | Object {
36 | "hash": "9hWvkPsgtCTc1w0lswu1AO+Q+S19Dppeg5bNklG/Khg=",
37 | "name": "/package.json",
38 | "size": 925,
39 | "time": "1985-10-26T08:15:00.000Z",
40 | },
41 | Object {
42 | "hash": "MPu0d2f8or6adBXZZLUNh6vL7Yeg34MmOBiupdclu10=",
43 | "name": "/README.md",
44 | "size": 551,
45 | "time": "1985-10-26T08:15:00.000Z",
46 | },
47 | Object {
48 | "hash": "CycshPBWVvIRZozw+b1pnAvKYC1Q7aPvcT8tS+HPepU=",
49 | "name": "/tsconfig.json",
50 | "size": 162,
51 | "time": "1985-10-26T08:15:00.000Z",
52 | },
53 | Object {
54 | "hash": "kt2uoTK/NmyQe2OUiNwpdwxV4RxgS2gW9rEgOtj+lZU=",
55 | "name": "/tsconfig.tsbuildinfo",
56 | "size": 220798,
57 | "time": "1985-10-26T08:15:00.000Z",
58 | },
59 | ]
60 | `;
61 |
--------------------------------------------------------------------------------
/src/jsDelivr/__test__/index.test.ts:
--------------------------------------------------------------------------------
1 | import * as api from '../index';
2 |
3 | jest.mock('../../utils/log', () => {
4 | return {
5 | log: {
6 | info: jest.fn(),
7 | warn: jest.fn(),
8 | error: jest.fn(),
9 | },
10 | };
11 | });
12 |
13 | jest.setTimeout(10000);
14 |
15 | // eslint-disable-next-line jest/require-top-level-describe
16 | beforeEach(() => {
17 | jest.resetAllMocks();
18 | });
19 |
20 | describe('hits', () => {
21 | describe('getHits()', () => {
22 | beforeAll(() => {
23 | api.hits.clear();
24 | api.hits.set('jquery', { hits: 1234, popular: true });
25 | });
26 |
27 | it('should get one formatted hit', () => {
28 | expect(api.getHits([{ name: 'jquery' }])).toEqual([
29 | {
30 | jsDelivrHits: 1234,
31 | _jsDelivrPopularity: 1,
32 | popular: true,
33 | _popularName: 'jquery',
34 | },
35 | ]);
36 | });
37 | it('should get multiple formatted hits', () => {
38 | expect(
39 | api.getHits([{ name: 'jquery' }, { name: 'thispackagedoesnotexist' }])
40 | ).toEqual([
41 | {
42 | jsDelivrHits: 1234,
43 | _jsDelivrPopularity: 1,
44 | popular: true,
45 | _popularName: 'jquery',
46 | },
47 | {
48 | jsDelivrHits: 0,
49 | _jsDelivrPopularity: 0,
50 | popular: false,
51 | },
52 | ]);
53 | });
54 | });
55 |
56 | describe('loadHits()', () => {
57 | beforeAll(async () => {
58 | await api.loadHits();
59 | });
60 | it('should download all packages hits', () => {
61 | expect(api.hits.size).toBeGreaterThan(30000); // 32509 (2022-11)
62 | });
63 |
64 | it('should get one hit', () => {
65 | expect(api.hits.get('jquery')?.hits).toBeGreaterThan(1000000000); // 1065750968 (2019-08)
66 | });
67 |
68 | it('should not get one hit', () => {
69 | expect(api.hits.get('thispackagedoesnotexist')?.hits).toBeUndefined();
70 | });
71 | });
72 | });
73 |
74 | describe('files', () => {
75 | describe('getFilesList()', () => {
76 | it('should get a flat list of files', async () => {
77 | const files = await api.getFilesList({
78 | name: 'jest',
79 | version: '24.8.0',
80 | });
81 | expect(files).toMatchSnapshot();
82 | });
83 |
84 | it('should not get a files list for fake package', async () => {
85 | const files = await api.getFilesList({
86 | name: 'thispackagedoesnotexist',
87 | version: '3.33.0',
88 | });
89 | expect(files).toEqual([]);
90 | });
91 | });
92 | });
93 |
--------------------------------------------------------------------------------
/src/jsDelivr/__test__/pkgTypes.test.ts:
--------------------------------------------------------------------------------
1 | import type { File } from '../index';
2 | import * as api from '../pkgTypes';
3 |
4 | const BASE_FILE: File = {
5 | name: '0',
6 | hash: 'sha256:',
7 | size: 0,
8 | time: '1985-10-26T08:15:00.000Z',
9 | };
10 |
11 | describe('package module/style types', () => {
12 | describe('package style types', () => {
13 | it('should return correct style types for multiple packages', () => {
14 | const styleTypes = api.getStyleTypesForAll(
15 | [
16 | { styleTypes: [] },
17 | { styleTypes: [] },
18 | { styleTypes: [] },
19 | { styleTypes: [] },
20 | { styleTypes: ['css'] },
21 | { styleTypes: [] },
22 | ],
23 | [
24 | [],
25 | [{ ...BASE_FILE, name: '/dist/style/style.min.css' }],
26 | [
27 | { ...BASE_FILE, name: '/src/style/style.less' },
28 | { ...BASE_FILE, name: '/dist/style/style.min.css' },
29 | { ...BASE_FILE, name: '/dist/js/lib.min.js' },
30 | { ...BASE_FILE, name: '/style.scss' },
31 | ],
32 | undefined as any,
33 | [{ ...BASE_FILE, name: '/src/style/style.less' }],
34 | [{ ...BASE_FILE, name: '/DIST/STYLE/STYLE.MIN.CSS' }],
35 | ]
36 | );
37 | expect(styleTypes).toEqual([
38 | { styleTypes: ['none'] },
39 | { styleTypes: ['css'] },
40 | { styleTypes: ['less', 'css', 'scss'] },
41 | { styleTypes: ['none'] },
42 | { styleTypes: ['css', 'less'] },
43 | { styleTypes: ['css'] },
44 | ]);
45 | });
46 |
47 | it('should ignore blacklisted paths', () => {
48 | const styleTypes = api.getStyleTypes({ styleTypes: [] }, [
49 | { ...BASE_FILE, name: '/dist/style/style.min.css' },
50 | { ...BASE_FILE, name: '/dist/style/_source.scss' },
51 | { ...BASE_FILE, name: '/docs/file.scss' },
52 | { ...BASE_FILE, name: '/test/file.scss' },
53 | { ...BASE_FILE, name: '/.hidden/file.scss' },
54 | { ...BASE_FILE, name: '/dist/.hidden.scss' },
55 | { ...BASE_FILE, name: '/dist/.hidden/style.scss' },
56 | ]);
57 | expect(styleTypes).toEqual({ styleTypes: ['css'] });
58 | });
59 | });
60 |
61 | describe('package module types', () => {
62 | it('should return correct module types for multiple packages', () => {
63 | const moduleTypes = api.getModuleTypesForAll(
64 | [
65 | { moduleTypes: ['unknown'] },
66 | { moduleTypes: ['unknown'] },
67 | { moduleTypes: ['unknown'] },
68 | { moduleTypes: ['unknown'] },
69 | { moduleTypes: ['unknown'] },
70 | { moduleTypes: ['esm'] },
71 | { moduleTypes: ['esm', 'cjs'] },
72 | ],
73 | [
74 | [],
75 | [{ ...BASE_FILE, name: '/dist/style/style.min.css' }],
76 | [{ ...BASE_FILE, name: '/dist/js/lib.min.js' }],
77 | [{ ...BASE_FILE, name: '/dist/js/lib.min.mjs' }],
78 | [{ ...BASE_FILE, name: '/dist/js/lib.min.cjs' }],
79 | [],
80 | undefined as any,
81 | ]
82 | );
83 |
84 | expect(moduleTypes).toEqual([
85 | { moduleTypes: ['none'] },
86 | { moduleTypes: ['none'] },
87 | { moduleTypes: ['unknown'] },
88 | { moduleTypes: ['unknown'] },
89 | { moduleTypes: ['unknown'] },
90 | { moduleTypes: ['esm'] },
91 | { moduleTypes: ['esm', 'cjs'] },
92 | ]);
93 | });
94 |
95 | it('should ignore blacklisted paths', () => {
96 | const moduleTypes = api.getModuleTypes({ moduleTypes: ['unknown'] }, [
97 | { ...BASE_FILE, name: '/dist/js/_hidden.mjs' },
98 | { ...BASE_FILE, name: '/dist/js/.hidden.mjs' },
99 | { ...BASE_FILE, name: '/docs/lib.js' },
100 | { ...BASE_FILE, name: '/test/lib.js' },
101 | { ...BASE_FILE, name: '/.hidden/lib.cjs' },
102 | { ...BASE_FILE, name: '/dist/.hidden/lib.js' },
103 | ]);
104 | expect(moduleTypes).toEqual({ moduleTypes: ['none'] });
105 | });
106 | });
107 | });
108 |
--------------------------------------------------------------------------------
/src/jsDelivr/index.ts:
--------------------------------------------------------------------------------
1 | import { HTTPError } from 'got/dist/source';
2 |
3 | import type { RawPkg } from '../@types/pkg';
4 | import { config } from '../config';
5 | import { datadog } from '../utils/datadog';
6 | import { log } from '../utils/log';
7 | import { request } from '../utils/request';
8 | import * as sentry from '../utils/sentry';
9 |
10 | type Hit = { type: 'npm'; name: string; hits: number };
11 | export type File = { name: string; hash: string; time: string; size: number };
12 | export type GetHit = {
13 | popular: boolean;
14 | jsDelivrHits: number;
15 | _jsDelivrPopularity: number;
16 | _popularName?: string;
17 | };
18 | export const hits = new Map();
19 |
20 | /**
21 | * Load downloads hits.
22 | */
23 | export async function loadHits(): Promise {
24 | const start = Date.now();
25 | log.info('📦 Loading hits from jsDelivr');
26 |
27 | const res = await request(config.jsDelivrHitsEndpoint, {
28 | responseType: 'json',
29 | });
30 |
31 | if (!res.body.length) {
32 | throw new Error('Empty jsDelivr data');
33 | }
34 |
35 | hits.clear();
36 |
37 | res.body.forEach((pkg, index) => {
38 | hits.set(pkg.name, { hits: pkg.hits, popular: index < 1000 });
39 | });
40 |
41 | datadog.timing('jsdelivr.loadHits', Date.now() - start);
42 | }
43 |
44 | /**
45 | * Get download hits.
46 | */
47 | export function getHits(pkgs: Array>): GetHit[] {
48 | const start = Date.now();
49 | const all = pkgs.map(getHit);
50 |
51 | datadog.timing('jsdelivr.getHits', Date.now() - start);
52 | return all;
53 | }
54 |
55 | export function getHit(pkg: Pick): GetHit {
56 | const data = hits.get(pkg.name);
57 | const jsDelivrHits = data?.hits || 0;
58 | const popular = data?.popular || false;
59 |
60 | return {
61 | popular,
62 | jsDelivrHits,
63 | // anything below 1000 hits/month is likely to mean that
64 | // someone just made a few random requests so we count that as 0
65 | _jsDelivrPopularity: Math.max(jsDelivrHits.toString().length - 3, 0),
66 | // similar to npm popular but we consider the top 1k packages instead
67 | ...(popular && {
68 | _popularName: pkg.name,
69 | }),
70 | };
71 | }
72 |
73 | /**
74 | * Get one package files list.
75 | */
76 | export async function getFilesList(
77 | pkg: Pick
78 | ): Promise {
79 | const start = Date.now();
80 | if (!pkg.name || !pkg.version) {
81 | throw new Error(
82 | `Package name should contain a version number: ${pkg.name}`
83 | );
84 | }
85 |
86 | let files: File[] = [];
87 | const url = `${config.jsDelivrPackageEndpoint}/${pkg.name}@${pkg.version}/flat`;
88 | try {
89 | const response = await request<{ default: string; files: File[] }>(url, {
90 | responseType: 'json',
91 | });
92 |
93 | if (Array.isArray(response.body.files)) {
94 | files = response.body.files;
95 | } else {
96 | sentry.report(new Error('JsDelivr network error'), {
97 | statusCode: response.statusCode,
98 | files: response.body.files,
99 | url,
100 | });
101 | }
102 | } catch (err: any) {
103 | if (
104 | !(
105 | err instanceof HTTPError && [403, 404].includes(err.response.statusCode)
106 | )
107 | ) {
108 | sentry.report(new Error('JsDelivr network error'), {
109 | statusCode: err?.response?.statusCode,
110 | err,
111 | url,
112 | });
113 | }
114 | }
115 |
116 | datadog.timing('jsdelivr.getFilesList', Date.now() - start);
117 | return files;
118 | }
119 |
120 | export function getFullURL(
121 | pkg: Pick,
122 | file: File
123 | ): string {
124 | return `https://cdn.jsdelivr.net/npm/${pkg.name}@${pkg.version}${file.name}`;
125 | }
126 |
--------------------------------------------------------------------------------
/src/jsDelivr/pkgTypes.ts:
--------------------------------------------------------------------------------
1 | import type { RawPkg, StyleType } from '../@types/pkg';
2 | import { datadog } from '../utils/datadog';
3 |
4 | import type { File } from './index';
5 |
6 | const styleFileExtensions = ['css', 'less', 'scss'];
7 | const styleFilePattern = createFilePattern(styleFileExtensions);
8 |
9 | const jsFileExtensions = ['js', 'mjs', 'cjs'];
10 | const jsFilePattern = createFilePattern(jsFileExtensions);
11 |
12 | function createFilePattern(extensions: string[]): RegExp {
13 | const extPattern = extensions.join('|');
14 |
15 | // https://regex101.com/r/X5jQfH/2
16 | return new RegExp(
17 | `^(?:(?!\\/(docs?|documentation|examples?|samples?|demos?|tests?)\\/)(?!\\/[._]).)+\\.(${extPattern})$`,
18 | 'i'
19 | );
20 | }
21 |
22 | export function getStyleTypes(
23 | pkg: Pick,
24 | filelist: File[]
25 | ): Pick {
26 | const start = Date.now();
27 |
28 | try {
29 | const styleTypes = new Set(pkg.styleTypes);
30 |
31 | for (const file of filelist) {
32 | if (!styleFilePattern.test(file.name)) {
33 | continue;
34 | }
35 |
36 | const type = file.name.split('.').pop();
37 |
38 | if (type) {
39 | styleTypes.add(type.toLowerCase());
40 | }
41 | }
42 |
43 | if (styleTypes.size === 0) {
44 | styleTypes.add('none');
45 | }
46 |
47 | return { styleTypes: [...styleTypes] };
48 | } finally {
49 | datadog.timing('pkgTypes.getStyleTypes', Date.now() - start);
50 | }
51 | }
52 |
53 | export function getStyleTypesForAll(
54 | pkgs: Array>,
55 | filelists: File[][]
56 | ): Array> {
57 | const start = Date.now();
58 |
59 | const all = pkgs.map((pkg, index) => {
60 | return getStyleTypes(pkg, filelists[index] || []);
61 | });
62 |
63 | datadog.timing('pkgTypes.getStyleTypesForAll', Date.now() - start);
64 | return all;
65 | }
66 |
67 | export function getModuleTypes(
68 | pkg: Pick,
69 | filelist: File[]
70 | ): Pick {
71 | const start = Date.now();
72 |
73 | try {
74 | // Module type(s) already detected - it can't be none at that point
75 | if (!pkg.moduleTypes.includes('unknown')) {
76 | return { moduleTypes: pkg.moduleTypes };
77 | }
78 |
79 | for (const file of filelist) {
80 | // JS file found - it can't be non anymore
81 | if (jsFilePattern.test(file.name)) {
82 | return { moduleTypes: pkg.moduleTypes };
83 | }
84 | }
85 |
86 | return { moduleTypes: ['none'] };
87 | } finally {
88 | datadog.timing('pkgTypes.getModuleTypes', Date.now() - start);
89 | }
90 | }
91 |
92 | export function getModuleTypesForAll(
93 | pkgs: Array>,
94 | filelists: File[][]
95 | ): Array> {
96 | const start = Date.now();
97 |
98 | const all = pkgs.map((pkg, index) => {
99 | return getModuleTypes(pkg, filelists[index] || []);
100 | });
101 |
102 | datadog.timing('pkgTypes.getModuleTypesForAll', Date.now() - start);
103 | return all;
104 | }
105 |
--------------------------------------------------------------------------------
/src/npm/ChangesReader.ts:
--------------------------------------------------------------------------------
1 | import { EventEmitter } from 'events';
2 | import { setTimeout } from 'node:timers/promises';
3 |
4 | import ms from 'ms';
5 | import type { DatabaseChangesResponse } from 'nano';
6 |
7 | import { config } from '../config';
8 | import { request } from '../utils/request';
9 | import * as sentry from '../utils/sentry';
10 | import { backoff } from '../utils/wait';
11 |
12 | type ChangesReaderOptions = {
13 | since: string;
14 | };
15 |
16 | export class ChangesReader extends EventEmitter {
17 | protected running: boolean = false;
18 | protected paused: boolean = false;
19 | protected since: string;
20 |
21 | constructor({ since }: ChangesReaderOptions) {
22 | super();
23 |
24 | this.since = since;
25 | }
26 |
27 | pause(): void {
28 | this.paused = true;
29 | }
30 |
31 | resume(): void {
32 | this.paused = false;
33 | }
34 |
35 | run(): void {
36 | this.running = true;
37 |
38 | this.runInternal().catch((e) => {
39 | sentry.report(e);
40 | });
41 | }
42 |
43 | async runInternal(): Promise {
44 | let retry = 0;
45 |
46 | while (this.running) {
47 | try {
48 | const { body } = await request(
49 | `${config.npmRegistryEndpoint}/_changes`,
50 | {
51 | timeout: ms('60 seconds'), // Hard timeout after which the client aborts.
52 | headers: {
53 | 'npm-replication-opt-in': 'true', // See https://github.com/orgs/community/discussions/152515
54 | },
55 | searchParams: {
56 | since: this.since,
57 | limit: 10,
58 | },
59 | responseType: 'json',
60 | }
61 | );
62 |
63 | retry = 0;
64 |
65 | if (body.last_seq) {
66 | this.since = body.last_seq;
67 | }
68 |
69 | if (body.results) {
70 | for (const result of body.results) {
71 | this.emit('change', result);
72 | }
73 |
74 | this.emit('batch', body.results);
75 | }
76 |
77 | // If there are no results, retry in 30 seconds.
78 | if (!body.results?.length) {
79 | await setTimeout(ms('30 seconds'));
80 | }
81 | } catch (e) {
82 | this.emit('error', e);
83 | await backoff(++retry, config.retryBackoffPow, config.retryBackoffMax);
84 | }
85 |
86 | while (this.running && this.paused) {
87 | await setTimeout(100);
88 | }
89 | }
90 | }
91 |
92 | stop(): void {
93 | this.running = false;
94 | }
95 | }
96 |
--------------------------------------------------------------------------------
/src/npm/Prefetcher.ts:
--------------------------------------------------------------------------------
1 | import { setTimeout } from 'node:timers/promises';
2 |
3 | import type { SearchIndex } from 'algoliasearch';
4 | import ms from 'ms';
5 | import type { DocumentListParams, DocumentResponseRow } from 'nano';
6 |
7 | import type { StateManager } from '../StateManager';
8 | import { config } from '../config';
9 | import { log } from '../utils/log';
10 | import * as sentry from '../utils/sentry';
11 |
12 | import type { GetPackage } from './types';
13 |
14 | import * as npm from './index';
15 |
16 | export type PrefetchedPkg = Pick<
17 | DocumentResponseRow,
18 | 'id' | 'value'
19 | > & { offset: number };
20 |
21 | export class Prefetcher {
22 | private stateManager: StateManager;
23 | private queueIndex: SearchIndex;
24 |
25 | #limit: number = config.bootstrapConcurrency;
26 | #ready: PrefetchedPkg[] = [];
27 |
28 | #nextKey: string | null = null;
29 | #running: boolean = false;
30 | #offset: number = 0;
31 | #finished: boolean = false;
32 |
33 | constructor(
34 | stateManager: StateManager,
35 | queueIndex: SearchIndex,
36 | opts: { nextKey: string | null }
37 | ) {
38 | this.stateManager = stateManager;
39 | this.queueIndex = queueIndex;
40 | this.#nextKey = opts.nextKey;
41 | }
42 |
43 | stop(): void {
44 | this.#running = false;
45 | }
46 |
47 | get offset(): number {
48 | return this.#offset + this.#limit - this.#ready.length;
49 | }
50 |
51 | get isFinished(): boolean {
52 | return this.#finished;
53 | }
54 |
55 | run(): void {
56 | this.#running = true;
57 |
58 | this.runInternal().catch((e) => {
59 | sentry.report(e);
60 | });
61 | }
62 |
63 | async runInternal(): Promise {
64 | while (this.#running) {
65 | await this.queueOnePage();
66 | await setTimeout(ms('1 second'));
67 | }
68 | }
69 |
70 | private async queueOnePage(): Promise {
71 | const options: Partial = {
72 | limit: this.#limit,
73 | };
74 |
75 | if (this.#nextKey) {
76 | options.startkey = this.#nextKey;
77 | }
78 |
79 | try {
80 | const { rows: packages, offset } = await npm.findAll(options);
81 |
82 | if (packages.length <= 0) {
83 | this.#finished = true;
84 | this.#running = false;
85 | this.#offset = offset;
86 | log.info('[pf] done');
87 | return;
88 | }
89 |
90 | // Skip the first item as we already processed it on the previous page.
91 | if (this.#nextKey && packages.at(0)?.id === this.#nextKey) {
92 | packages.shift();
93 | }
94 |
95 | await this.queueIndex.saveObjects(
96 | packages.map((pkg) => ({
97 | name: pkg.id,
98 | objectID: pkg.id,
99 | retries: 0,
100 | pkg,
101 | }))
102 | );
103 |
104 | const lastId = (await this.stateManager.get()).bootstrapLastId;
105 | const pkg = packages.at(-1);
106 |
107 | if (pkg && (!lastId || lastId < pkg.id)) {
108 | await this.stateManager.save({
109 | bootstrapLastId: pkg.id,
110 | });
111 | }
112 |
113 | this.#offset = offset;
114 | this.#nextKey = packages[packages.length - 1]!.id;
115 | } catch (err: any) {
116 | sentry.report(err);
117 |
118 | if (err.statusCode === 429) {
119 | log.info('[pf] waiting');
120 | await setTimeout(ms('2 minutes'));
121 | }
122 | }
123 | }
124 | }
125 |
--------------------------------------------------------------------------------
/src/npm/__tests__/index.test.ts:
--------------------------------------------------------------------------------
1 | import { PackageNotFoundError } from '../../errors';
2 | import type { DownloadsData } from '../index';
3 | import * as api from '../index';
4 | import { computeDownload } from '../index';
5 |
6 | jest.setTimeout(15000);
7 |
8 | describe('getDocFromRegistry()', () => {
9 | it('retrieves a single doc', async () => {
10 | const doc = await api.getDocFromRegistry('jsdelivr');
11 |
12 | expect(doc.name).toBe('jsdelivr');
13 | expect(Object.keys(doc.versions).length).toBeGreaterThanOrEqual(2);
14 | });
15 |
16 | it('throws PackageNotFoundError for non-existent packages', async () => {
17 | await expect(api.getDocFromRegistry('jsdelivrxxxx')).rejects.toBeInstanceOf(
18 | PackageNotFoundError
19 | );
20 | });
21 |
22 | it('throws PackageNotFoundError for packages without versions', async () => {
23 | await expect(
24 | api.getDocFromRegistry('ebay-app-meta')
25 | ).rejects.toBeInstanceOf(PackageNotFoundError);
26 | });
27 | });
28 |
29 | describe('getDependents()', () => {
30 | let dependents;
31 | beforeAll(async () => {
32 | dependents = await api.getDependents([
33 | { name: 'jest' },
34 | { name: '@angular/core' },
35 | { name: 'holmes.js' },
36 | ]);
37 | });
38 |
39 | it('contains the correct keys', () => {
40 | expect(dependents).toEqual(
41 | expect.arrayContaining([
42 | expect.objectContaining({
43 | dependents: expect.any(Number),
44 | humanDependents: expect.any(String),
45 | }),
46 | expect.objectContaining({
47 | dependents: expect.any(Number),
48 | humanDependents: expect.any(String),
49 | }),
50 | ])
51 | );
52 | });
53 |
54 | it('has the right fake value', () => {
55 | const [jest, angular, holmes] = dependents.map((pkg) => pkg.dependents);
56 | expect(jest).toBe(0);
57 | expect(angular).toBe(0);
58 | expect(holmes).toBe(0);
59 | });
60 | });
61 |
62 | describe('fetchDownload()', () => {
63 | it('should download one package and return correct response', async () => {
64 | const dl = await api.fetchDownload('jest');
65 | expect(dl).toHaveProperty('jest');
66 | expect(dl.jest).toEqual({
67 | packageNpmDownloads: expect.any(Number),
68 | });
69 | });
70 |
71 | it('should download one scoped package and return correct response', async () => {
72 | const dl = await api.fetchDownload('@angular/core');
73 | expect(dl).toHaveProperty('@angular/core');
74 | expect(dl['@angular/core']).toEqual({
75 | packageNpmDownloads: expect.any(Number),
76 | });
77 | });
78 |
79 | it('should download 2 packages and return correct response', async () => {
80 | const dl = await api.fetchDownload('jest,holmes.js');
81 | expect(dl).toHaveProperty('jest');
82 | expect(dl).toHaveProperty(['holmes.js']);
83 | });
84 | });
85 |
86 | describe('getDownloads()', () => {
87 | let downloads: Awaited>;
88 |
89 | beforeAll(async () => {
90 | await api.loadTotalDownloads();
91 |
92 | downloads = await api.getDownloads([
93 | { name: 'jest' },
94 | { name: 'holmes.js' },
95 | ]);
96 |
97 | downloads = {
98 | ...downloads,
99 | ...(await api.getDownloads([{ name: '@angular/core' }])),
100 | };
101 | });
102 |
103 | it('contains the correct keys', () => {
104 | expect(downloads).toEqual({
105 | jest: expect.objectContaining({
106 | packageNpmDownloads: expect.any(Number),
107 | totalNpmDownloads: expect.any(Number),
108 | }),
109 | 'holmes.js': expect.objectContaining({
110 | packageNpmDownloads: expect.any(Number),
111 | totalNpmDownloads: expect.any(Number),
112 | }),
113 | '@angular/core': expect.objectContaining({
114 | packageNpmDownloads: expect.any(Number),
115 | totalNpmDownloads: expect.any(Number),
116 | }),
117 | });
118 | });
119 |
120 | it('has the right approximate value for downloadsLast30Days', () => {
121 | const [jest, holmes, angular] = Object.values(downloads).map((pkg) =>
122 | pkg.packageNpmDownloads!.toString()
123 | );
124 |
125 | expect(jest!.length).toBeGreaterThanOrEqual(6);
126 | expect(jest!.length).toBeLessThanOrEqual(9);
127 |
128 | expect(angular!.length).toBeGreaterThanOrEqual(6);
129 | expect(angular!.length).toBeLessThanOrEqual(8);
130 |
131 | expect(holmes!.length).toBeGreaterThanOrEqual(2);
132 | expect(holmes!.length).toBeLessThanOrEqual(4);
133 | });
134 |
135 | it('has the right approximate value for downloadsMagnitude', () => {
136 | const [jest, holmes, angular] = Object.entries(
137 | downloads
138 | ).map(
139 | ([name, pkg]) =>
140 | computeDownload(
141 | { name },
142 | pkg.packageNpmDownloads,
143 | pkg.totalNpmDownloads
144 | )?._downloadsMagnitude
145 | );
146 |
147 | expect(jest).toBeGreaterThanOrEqual(6);
148 | expect(jest).toBeLessThanOrEqual(9);
149 |
150 | expect(angular).toBeGreaterThanOrEqual(6);
151 | expect(angular).toBeLessThanOrEqual(8);
152 |
153 | expect(holmes).toBeGreaterThanOrEqual(2);
154 | expect(holmes).toBeLessThanOrEqual(4);
155 | });
156 |
157 | it('validates package batching', async () => {
158 | await expect(
159 | api.getDownloads([{ name: '@scope/p-1' }, { name: '@scope/p-2' }])
160 | ).rejects.toThrow('one at a time');
161 | });
162 |
163 | it('returns undefined for non-existent packages without failing the valid ones', async () => {
164 | const result = await api.getDownloads([
165 | { name: 'jsdelivr' },
166 | { name: 'jsdelivrxxxx' },
167 | ]);
168 |
169 | expect(result.jsdelivr!.packageNpmDownloads).toBeGreaterThan(0);
170 | expect(result.jsdelivrxxxx!.packageNpmDownloads).toBeUndefined();
171 | });
172 | });
173 |
--------------------------------------------------------------------------------
/src/npm/index.ts:
--------------------------------------------------------------------------------
1 | import { HTTPError } from 'got';
2 | import _ from 'lodash';
3 | import ms from 'ms';
4 | import type { DocumentListParams, DocumentListResponse } from 'nano';
5 | import nano from 'nano';
6 | import numeral from 'numeral';
7 | import PQueue from 'p-queue';
8 |
9 | import type { RawPkg } from '../@types/pkg';
10 | import { config } from '../config';
11 | import { PackageNotFoundError } from '../errors';
12 | import { datadog } from '../utils/datadog';
13 | import { log } from '../utils/log';
14 | import { httpsAgent, request, USER_AGENT } from '../utils/request';
15 |
16 | import type { GetInfo, GetPackage, PackageDownload } from './types';
17 |
18 | type GetDependent = { dependents: number; humanDependents: string };
19 | type GetDownload = {
20 | downloadsLast30Days: number;
21 | humanDownloadsLast30Days: string;
22 | downloadsRatio: number;
23 | popular: boolean;
24 | _downloadsMagnitude: number;
25 | _popularName?: string;
26 | };
27 | export type DownloadsData = {
28 | totalNpmDownloads?: number;
29 | packageNpmDownloads?: number;
30 | };
31 | export const cacheTotalDownloads: { total?: number; date?: number } = {
32 | total: undefined,
33 | date: undefined,
34 | };
35 |
36 | export const registry = nano({
37 | url: config.npmRegistryEndpoint,
38 | requestDefaults: {
39 | agent: httpsAgent,
40 | timeout: 30000,
41 | headers: {
42 | 'user-agent': USER_AGENT,
43 | 'Accept-Encoding': 'deflate, gzip',
44 | 'content-type': 'application/json',
45 | accept: 'application/json',
46 | 'npm-replication-opt-in': 'true', // See https://github.com/orgs/community/discussions/152515
47 | },
48 | },
49 | });
50 |
51 | export const db = registry.use(config.npmRegistryDBName);
52 | const registryQueue = new PQueue({ intervalCap: 6, interval: 1000 });
53 | const downloadsQueue = new PQueue({ intervalCap: 6, interval: 1000 });
54 |
55 | /**
56 | * Find all packages in registry.
57 | */
58 | async function findAll(
59 | options: Partial
60 | ): Promise> {
61 | const start = Date.now();
62 |
63 | const results = await db.list({
64 | ...options,
65 | });
66 |
67 | datadog.timing('db.allDocs', Date.now() - start);
68 |
69 | return results;
70 | }
71 |
72 | async function getDocFromRegistry(name: string): Promise {
73 | const start = Date.now();
74 |
75 | try {
76 | const doc = await registryQueue.add(() =>
77 | request(`${config.npmRootEndpoint}/${name}`, {
78 | responseType: 'json',
79 | })
80 | );
81 |
82 | // Package without versions means it was unpublished.
83 | // Treat it the same as if it was not found at all.
84 | if (_.isEmpty(doc.body.versions)) {
85 | throw new PackageNotFoundError();
86 | }
87 |
88 | return doc.body;
89 | } catch (e) {
90 | if (e instanceof HTTPError && e.response.statusCode === 404) {
91 | throw new PackageNotFoundError();
92 | }
93 |
94 | throw e;
95 | } finally {
96 | datadog.timing('npm.getDocRegistry.one', Date.now() - start);
97 | }
98 | }
99 |
100 | /**
101 | * Get info about registry.
102 | */
103 | async function getInfo(): Promise<{ nbDocs: number; seq: number }> {
104 | const start = Date.now();
105 |
106 | const {
107 | body: { doc_count: nbDocs, update_seq: seq },
108 | } = await request(
109 | `${config.npmRegistryEndpoint}/${config.npmRegistryDBName}/`,
110 | {
111 | headers: {
112 | 'npm-replication-opt-in': 'true', // See https://github.com/orgs/community/discussions/152515
113 | },
114 | responseType: 'json',
115 | }
116 | );
117 |
118 | datadog.timing('npm.info', Date.now() - start);
119 |
120 | return {
121 | nbDocs,
122 | seq,
123 | };
124 | }
125 |
126 | /**
127 | * Get list of packages that depends of them.
128 | *
129 | * @param pkgs - Package list.
130 | */
131 | function getDependents(
132 | pkgs: Array>
133 | ): Promise {
134 | // we return 0, waiting for https://github.com/npm/registry/issues/361
135 | return Promise.all(pkgs.map(getDependent));
136 | }
137 |
138 | function getDependent(_pkg: Pick): GetDependent {
139 | return { dependents: 0, humanDependents: '0' };
140 | }
141 |
142 | async function loadTotalDownloads(): Promise {
143 | const start = Date.now();
144 |
145 | const {
146 | body: { downloads: totalNpmDownloadsPerDay },
147 | } = await request<{ downloads: Array<{ downloads: number }> }>(
148 | `${config.npmDownloadsEndpoint}/range/last-month`,
149 | {
150 | responseType: 'json',
151 | }
152 | );
153 |
154 | const total = totalNpmDownloadsPerDay.reduce(
155 | (agg, { downloads: dayDownloads }) => agg + dayDownloads,
156 | 0
157 | );
158 |
159 | cacheTotalDownloads.date = start;
160 | cacheTotalDownloads.total = total;
161 |
162 | datadog.timing('npm.loadTotalDownloads', Date.now() - start);
163 | }
164 |
165 | /**
166 | * Get total npm downloads.
167 | */
168 | async function getTotalDownloads(): Promise {
169 | return cacheTotalDownloads.total;
170 | }
171 |
172 | /**
173 | * Get download stats for a list of packages.
174 | */
175 | async function fetchDownload(
176 | pkgNames: string,
177 | retry: number = 0
178 | ): Promise> {
179 | const start = Date.now();
180 |
181 | try {
182 | const response = await downloadsQueue.add(() => {
183 | datadog.increment('npm.downloads.requests');
184 |
185 | return request>(
186 | `${config.npmDownloadsEndpoint}/point/last-month/${pkgNames}`,
187 | {
188 | responseType: 'json',
189 | }
190 | );
191 | });
192 |
193 | if (response.statusCode !== 200 || !response.body) {
194 | return {};
195 | }
196 |
197 | // Single package
198 | if (response.body.downloads) {
199 | return {
200 | [response.body.package as string]: {
201 | packageNpmDownloads: response.body?.downloads as number,
202 | },
203 | };
204 | }
205 |
206 | return _.mapValues(response.body, (record) => {
207 | return {
208 | packageNpmDownloads:
209 | (typeof record === 'object' && record?.downloads) || undefined,
210 | };
211 | });
212 | } catch (error) {
213 | if (
214 | error instanceof HTTPError &&
215 | (error.response.statusCode === 429 || error.response.statusCode >= 500)
216 | ) {
217 | datadog.increment(`npm.downloads.throttle`);
218 |
219 | if (!downloadsQueue.isPaused) {
220 | downloadsQueue.pause();
221 | setTimeout(() => downloadsQueue.start(), ms('1 minute')).unref();
222 | }
223 |
224 | if (retry < config.retryMax) {
225 | return fetchDownload(pkgNames, retry + 1);
226 | }
227 | }
228 |
229 | if (error instanceof HTTPError && error.response.statusCode === 404) {
230 | return {};
231 | }
232 |
233 | datadog.increment(`npm.downloads.failure`);
234 | log.warn(`An error occurred when getting download of ${pkgNames} ${error}`);
235 | throw error;
236 | } finally {
237 | datadog.timing('npm.fetchDownload', Date.now() - start);
238 | }
239 | }
240 |
241 | export function computeDownload(
242 | pkg: Pick,
243 | downloadsLast30Days: number | undefined,
244 | totalNpmDownloads: number | undefined
245 | ): GetDownload | null {
246 | if (!downloadsLast30Days || !totalNpmDownloads) {
247 | return null;
248 | }
249 |
250 | const downloadsRatio = Number(
251 | ((downloadsLast30Days / totalNpmDownloads) * 100).toFixed(4)
252 | );
253 | const popular = downloadsRatio > config.popularDownloadsRatio;
254 | const downloadsMagnitude = downloadsLast30Days
255 | ? downloadsLast30Days.toString().length
256 | : 0;
257 |
258 | return {
259 | downloadsLast30Days,
260 | humanDownloadsLast30Days: numeral(downloadsLast30Days).format('0.[0]a'),
261 | downloadsRatio,
262 | popular,
263 | _downloadsMagnitude: downloadsMagnitude,
264 | // if the package is popular, we copy its name to a dedicated attribute
265 | // which will make popular records' `name` matches to be ranked higher than other matches
266 | // see the `searchableAttributes` index setting
267 | ...(popular && {
268 | _popularName: pkg.name,
269 | }),
270 | };
271 | }
272 |
273 | /**
274 | * Get downloads for all packages passer in arguments.
275 | */
276 | async function getDownloads(
277 | pkgs: Array>
278 | ): Promise> {
279 | const start = Date.now();
280 |
281 | if (pkgs.length > 1 && pkgs.some((pkg) => pkg.name.startsWith('@'))) {
282 | throw new Error(
283 | `Scoped packages can only be requested separately, one at a time.`
284 | );
285 | }
286 |
287 | const encodedPackageNames = pkgs
288 | .map((pkg) => pkg.name)
289 | .map((name) => encodeURIComponent(name));
290 |
291 | if (encodedPackageNames.length > 1) {
292 | // why do we do this? see https://github.com/npm/registry/issues/104
293 | encodedPackageNames.unshift('');
294 | }
295 |
296 | const totalNpmDownloads = await getTotalDownloads();
297 | const packageNpmDownloads = await fetchDownload(
298 | encodedPackageNames.join(',')
299 | );
300 |
301 | datadog.timing('npm.getDownloads', Date.now() - start);
302 |
303 | return _.mapValues(
304 | _.pickBy(packageNpmDownloads, (value, key) => key),
305 | (pkg) => {
306 | return { ...pkg, totalNpmDownloads };
307 | }
308 | );
309 | }
310 |
311 | export {
312 | findAll,
313 | loadTotalDownloads,
314 | getInfo,
315 | getDocFromRegistry,
316 | getDependents,
317 | getDependent,
318 | fetchDownload,
319 | getDownloads,
320 | };
321 |
--------------------------------------------------------------------------------
/src/npm/types.ts:
--------------------------------------------------------------------------------
1 | import type { DocumentLookupFailure } from 'nano';
2 |
3 | export interface PackageDownload {
4 | downloads: number;
5 | package: string;
6 | // start: string;
7 | // end: string;
8 | }
9 |
10 | export interface GetInfo {
11 | doc_count: number;
12 | update_seq: number;
13 | }
14 |
15 | export interface GetUser {
16 | name: string;
17 | email?: string;
18 | }
19 |
20 | export interface GetVersion {
21 | _from?: string;
22 | _id?: string;
23 | _npmUser?: GetUser;
24 | _npmVersion?: string;
25 | _nodeVersion?: string;
26 | _npmOperationalInternal?: Record;
27 | _shasum?: string;
28 | _resolved?: string;
29 | author?: GetUser;
30 | description?: string;
31 | dist?: {
32 | shasum: string;
33 | tarball: string;
34 | integrity?: string;
35 | [key: string]: any | undefined;
36 | };
37 | config?: {
38 | access?: 'public';
39 | };
40 | license?: string;
41 |
42 | type?: 'commonjs' | 'module';
43 | module?: string;
44 | main?: string;
45 | exports?: PackageExports;
46 |
47 | repository?: PackageRepo;
48 | maintainers?: GetUser[];
49 | name: string;
50 | scripts?: Record;
51 | version: string;
52 | deprecated?: boolean | string;
53 | schematics?: string;
54 | types?: string;
55 | typings?: string;
56 | style?: string;
57 | dependencies?: Record;
58 | devDependencies?: Record;
59 | peerDependencies?: Record;
60 | optionalDependencies?: Record;
61 | gitHead?: string;
62 | bugs?: { url: string };
63 | homepage?: string;
64 | files?: string[];
65 | keywords?: string[];
66 |
67 | [key: string]: any;
68 | }
69 |
70 | export interface PackageRepo {
71 | type: string;
72 | url: string;
73 | directory?: string;
74 | }
75 |
76 | export interface PackageExports {
77 | [key: string]: PackageExports | string;
78 | }
79 |
80 | export interface GetPackage {
81 | _id: string;
82 | _rev: string;
83 | 'dist-tags': { [key: string]: string };
84 | license?: string;
85 | maintainers: GetUser[];
86 | name: string;
87 | description?: string;
88 | homepage?: string;
89 | bugs?: { url: string };
90 | readme: string;
91 | readmeFilename: string;
92 | time: {
93 | created: string;
94 | modified: string;
95 | [key: string]: string;
96 | };
97 | author?: GetUser;
98 | users?: Record;
99 | versions: Record;
100 | keywords?: string[] | string;
101 | contributors?: Array<{ name: string }>;
102 | repository?: PackageRepo;
103 | schematics?: string;
104 | types?: string;
105 | typings?: string;
106 |
107 | [key: string]: any;
108 | }
109 |
110 | export interface GetPackageLight {
111 | name: string;
112 | 'dist-tags': Record;
113 | versions: Record>;
114 | modified: string;
115 | }
116 |
117 | export function isFailure(change: any): change is DocumentLookupFailure {
118 | return change.error && !change.id;
119 | }
120 |
--------------------------------------------------------------------------------
/src/saveDocs.ts:
--------------------------------------------------------------------------------
1 | import type { SearchIndex } from 'algoliasearch';
2 |
3 | import type { FinalPkg, RawPkg } from './@types/pkg';
4 | import { getChangelog } from './changelog';
5 | import { config } from './config';
6 | import type { OneTimeDataObject } from './indexers/OneTimeBackgroundIndexer';
7 | import type { PeriodicDataObject } from './indexers/PeriodicBackgroundIndexer';
8 | import * as jsDelivr from './jsDelivr';
9 | import { getModuleTypes, getStyleTypes } from './jsDelivr/pkgTypes';
10 | import * as npm from './npm';
11 | import { computeDownload } from './npm';
12 | import { getTypeScriptSupport } from './typescript';
13 | import { datadog } from './utils/datadog';
14 | import { offsetToTimestamp, round } from './utils/time';
15 |
16 | export async function saveDoc({
17 | formatted,
18 | index,
19 | oneTimeDataIndex,
20 | periodicDataIndex,
21 | }: {
22 | formatted: RawPkg;
23 | index: SearchIndex;
24 | oneTimeDataIndex: SearchIndex;
25 | periodicDataIndex: SearchIndex;
26 | }): Promise {
27 | const start = Date.now();
28 | const pkg = await addMetaData(formatted, oneTimeDataIndex, periodicDataIndex);
29 |
30 | const start2 = Date.now();
31 | await index.saveObject(pkg);
32 | datadog.timing('saveDocs.saveObject.one', Date.now() - start2);
33 |
34 | datadog.timing('saveDocs.one', Date.now() - start);
35 | }
36 |
37 | async function addMetaData(
38 | pkg: RawPkg,
39 | oneTimeDataIndex: SearchIndex,
40 | periodicDataIndex: SearchIndex
41 | ): Promise {
42 | const start = Date.now();
43 | let periodicDataUpdatedAt = 0;
44 | let download;
45 |
46 | if (pkg.isSecurityHeld) {
47 | return pkg;
48 | }
49 |
50 | const [dependent, hit] = [npm.getDependent(pkg), jsDelivr.getHit(pkg)];
51 | const { filelist, metadata } = await getFileListMetadata(pkg);
52 |
53 | let hasAllOneTimeData = Boolean(metadata.changelogFilename);
54 | let needsOneTimeReindex = !hasAllOneTimeData || !filelist.length;
55 |
56 | if (!hasAllOneTimeData) {
57 | try {
58 | const data = await oneTimeDataIndex.getObject(
59 | `${pkg.name}@${pkg.version}`
60 | );
61 |
62 | datadog.increment('oneTimeDataIndex.hit');
63 |
64 | if (!metadata.changelogFilename) {
65 | metadata.changelogFilename = data.changelogFilename;
66 | }
67 |
68 | hasAllOneTimeData = true;
69 | needsOneTimeReindex = !hasAllOneTimeData || !filelist.length;
70 | } catch {
71 | datadog.increment('oneTimeDataIndex.miss');
72 | }
73 | }
74 |
75 | try {
76 | const data = await periodicDataIndex.getObject(
77 | pkg.name
78 | );
79 |
80 | datadog.increment('periodicDataIndex.hit');
81 |
82 | download = computeDownload(
83 | pkg,
84 | data.packageNpmDownloads,
85 | data.totalNpmDownloads
86 | );
87 |
88 | periodicDataUpdatedAt = round(new Date(data.updatedAt)).valueOf();
89 | } catch {
90 | datadog.increment('periodicDataIndex.miss');
91 | }
92 |
93 | const final = {
94 | ...pkg,
95 | ...(download || {}),
96 | ...dependent,
97 | ...metadata,
98 | ...hit,
99 | popular: download?.popular || hit.popular,
100 | _oneTimeDataToUpdateAt: needsOneTimeReindex ? offsetToTimestamp(0) : 0,
101 | _periodicDataUpdatedAt: periodicDataUpdatedAt,
102 | _searchInternal: {
103 | ...pkg._searchInternal,
104 | },
105 | };
106 |
107 | final._searchInternal.popularAlternativeNames =
108 | getPopularAlternativeNames(final);
109 |
110 | datadog.timing('saveDocs.addMetaData.one', Date.now() - start);
111 | return final;
112 | }
113 |
114 | export async function getFileListMetadata(pkg: RawPkg): Promise<{
115 | filelist: Awaited>;
116 | metadata: Awaited> &
117 | Awaited> &
118 | Awaited> &
119 | Awaited>;
120 | }> {
121 | const filelist = await jsDelivr.getFilesList(pkg);
122 |
123 | const [changelog, ts, moduleTypes, styleTypes] = await Promise.all([
124 | getChangelog(pkg, filelist),
125 | getTypeScriptSupport(pkg, filelist),
126 | getModuleTypes(pkg, filelist),
127 | getStyleTypes(pkg, filelist),
128 | ]);
129 |
130 | return {
131 | filelist,
132 | metadata: {
133 | ...changelog,
134 | ...ts,
135 | ...moduleTypes,
136 | ...styleTypes,
137 | },
138 | };
139 | }
140 |
141 | export function getPopularAlternativeNames(pkg: FinalPkg): string[] {
142 | const hasFewDownloads =
143 | pkg.downloadsLast30Days <= config.alternativeNamesNpmDownloadsThreshold &&
144 | pkg.jsDelivrHits <= config.alternativeNamesJsDelivrHitsThreshold;
145 |
146 | const addPopularAlternativeNames =
147 | pkg.popular ||
148 | (!pkg.isDeprecated && !pkg.isSecurityHeld && !hasFewDownloads);
149 |
150 | return addPopularAlternativeNames ? pkg._searchInternal.alternativeNames : [];
151 | }
152 |
--------------------------------------------------------------------------------
/src/typescript/index.test.ts:
--------------------------------------------------------------------------------
1 | import * as api from './index';
2 |
3 | jest.setTimeout(15000);
4 |
5 | describe('loadTypesIndex()', () => {
6 | it('should download and cache all @types', async () => {
7 | expect(api.typesCache).not.toHaveProperty('algoliasearch');
8 | expect(api.isDefinitelyTyped({ name: 'algoliasearch' })).toBeUndefined();
9 |
10 | await api.loadTypesIndex();
11 | expect(api.typesCache).toHaveProperty('algoliasearch');
12 | expect(api.typesCache).not.toHaveProperty('algoliasearch/lite');
13 |
14 | expect(api.typesCache.algoliasearch).toBe('algoliasearch');
15 | expect(api.typesCache['algoliasearch/lite']).toBeUndefined();
16 | expect(api.typesCache.doesnotexist).toBeUndefined();
17 |
18 | expect(api.isDefinitelyTyped({ name: 'algoliasearch' })).toBe(
19 | 'algoliasearch'
20 | );
21 | });
22 | });
23 |
24 | describe('getTypeScriptSupport()', () => {
25 | it('If types are already calculated - return early', () => {
26 | const typesSupport = api.getTypeScriptSupport(
27 | {
28 | name: 'Has Types',
29 | types: { ts: 'included' },
30 | version: '1.0',
31 | },
32 | []
33 | );
34 |
35 | expect(typesSupport).toEqual({ types: { ts: 'included' } });
36 | });
37 |
38 | it('Handles not having any possible TS types', () => {
39 | const typesSupport = api.getTypeScriptSupport(
40 | {
41 | name: 'my-lib',
42 | types: { ts: false },
43 | version: '1.0',
44 | },
45 | []
46 | );
47 | expect(typesSupport).toEqual({ types: { ts: false } });
48 | });
49 |
50 | describe('Definitely Typed', () => {
51 | it('Checks for @types/[name]', () => {
52 | const atTypesSupport = api.getTypeScriptSupport(
53 | {
54 | name: 'lodash.valuesin',
55 | types: { ts: false },
56 | version: '1.0',
57 | },
58 | []
59 | );
60 | expect(atTypesSupport).toEqual({
61 | types: {
62 | ts: 'definitely-typed',
63 | definitelyTyped: '@types/lodash.valuesin',
64 | },
65 | });
66 | });
67 |
68 | it('Checks for @types/[scope__name]', () => {
69 | const atTypesSupport = api.getTypeScriptSupport(
70 | {
71 | name: '@mapbox/geojson-area',
72 | types: { ts: false },
73 | version: '1.0',
74 | },
75 | []
76 | );
77 | expect(atTypesSupport).toEqual({
78 | types: {
79 | ts: 'definitely-typed',
80 | definitelyTyped: '@types/mapbox__geojson-area',
81 | },
82 | });
83 |
84 | const atTypesSupport2 = api.getTypeScriptSupport(
85 | {
86 | name: '@reach/router',
87 | types: { ts: false },
88 | version: '1.0',
89 | },
90 | []
91 | );
92 | expect(atTypesSupport2).toEqual({
93 | types: {
94 | ts: 'definitely-typed',
95 | definitelyTyped: '@types/reach__router',
96 | },
97 | });
98 | });
99 | });
100 |
101 | describe('FilesList', () => {
102 | it('should match a correct filesList', () => {
103 | const atTypesSupport = api.getTypeScriptSupport(
104 | {
105 | name: 'doesnotexist',
106 | types: { ts: false },
107 | version: '1.0',
108 | },
109 | [
110 | { name: 'index.js', hash: '', time: '', size: 0 },
111 | { name: 'index.d.ts', hash: '', time: '', size: 0 },
112 | ]
113 | );
114 | expect(atTypesSupport).toEqual({
115 | types: {
116 | ts: 'included',
117 | },
118 | });
119 | });
120 |
121 | it('should not match an incorrect filesList', () => {
122 | const atTypesSupport = api.getTypeScriptSupport(
123 | {
124 | name: 'doesnotexist',
125 | types: { ts: false },
126 | version: '1.0',
127 | },
128 | [
129 | { name: 'index.js', hash: '', time: '', size: 0 },
130 | { name: 'index.ts', hash: '', time: '', size: 0 },
131 | { name: 'index.md', hash: '', time: '', size: 0 },
132 | ]
133 | );
134 | expect(atTypesSupport).toEqual({
135 | types: {
136 | ts: false,
137 | },
138 | });
139 | });
140 | });
141 | });
142 |
--------------------------------------------------------------------------------
/src/typescript/index.ts:
--------------------------------------------------------------------------------
1 | import type { RawPkg } from '../@types/pkg';
2 | import { config } from '../config';
3 | import type { File } from '../jsDelivr';
4 | import { datadog } from '../utils/datadog';
5 | import { log } from '../utils/log';
6 | import { request } from '../utils/request';
7 |
8 | export const typesCache: Record = Object.create(null);
9 |
10 | type TypesEntry = {
11 | p: string | null; // package repo
12 | l: string | null; // package name
13 | t: string; // @types package name
14 | };
15 |
16 | /**
17 | * Microsoft build a index.json with all @types/* on each publication.
18 | * - https://github.com/microsoft/types-publisher/blob/master/src/create-search-index.ts.
19 | */
20 | export async function loadTypesIndex(): Promise {
21 | const start = Date.now();
22 |
23 | const { body: data } = await request(
24 | config.typescriptTypesIndex,
25 | {
26 | decompress: true,
27 | responseType: 'json',
28 | }
29 | );
30 |
31 | log.info(`📦 Typescript preload, found ${data.length} @types`);
32 |
33 | data.forEach((entry) => {
34 | if (entry.l) {
35 | typesCache[entry.l] = entry.t;
36 | }
37 | });
38 |
39 | datadog.timing('typescript.loadTypesIndex', Date.now() - start);
40 | }
41 |
42 | export function isDefinitelyTyped({ name }): string | undefined {
43 | return typesCache[name];
44 | }
45 |
46 | /**
47 | * Basically either
48 | * - { types: { ts: false }} for no existing TypeScript support
49 | * - { types: { ts: "@types/module" }} - for definitely typed support
50 | * - { types: { ts: "included" }} - for types shipped with the module.
51 | */
52 | export function getTypeScriptSupport(
53 | pkg: Pick,
54 | filelist: File[]
55 | ): Pick {
56 | const start = Date.now();
57 |
58 | try {
59 | // Already calculated in `formatPkg`
60 | if (pkg.types.ts === 'included') {
61 | return { types: pkg.types };
62 | }
63 |
64 | for (const file of filelist) {
65 | if (!file.name.endsWith('.d.ts')) {
66 | continue;
67 | }
68 |
69 | datadog.increment('jsdelivr.getTSSupport.hit');
70 |
71 | return { types: { ts: 'included' } };
72 | }
73 |
74 | // The 2nd most likely is definitely typed
75 | const defTyped = isDefinitelyTyped({ name: pkg.name });
76 | if (defTyped) {
77 | return {
78 | types: {
79 | ts: 'definitely-typed',
80 | definitelyTyped: `@types/${defTyped}`,
81 | },
82 | };
83 | }
84 | datadog.increment('jsdelivr.getTSSupport.miss');
85 |
86 | return { types: { ts: false } };
87 | } finally {
88 | datadog.timing('typescript.getSupport', Date.now() - start);
89 | }
90 | }
91 |
92 | /**
93 | * Check if packages have Typescript definitions.
94 | */
95 | export async function getTSSupport(
96 | pkgs: Array>,
97 | filelists: File[][]
98 | ): Promise>> {
99 | const start = Date.now();
100 |
101 | const all = await Promise.all(
102 | pkgs.map((pkg, index) => {
103 | return getTypeScriptSupport(pkg, filelists[index] || []);
104 | })
105 | );
106 |
107 | datadog.timing('getTSSupport', Date.now() - start);
108 | return all;
109 | }
110 |
--------------------------------------------------------------------------------
/src/utils/MetricCollector.ts:
--------------------------------------------------------------------------------
1 | import type { Agent } from 'elastic-apm-node';
2 | import _ from 'lodash';
3 |
4 | class MetricCollector {
5 | private client: Agent;
6 | private readonly events: { [k: string]: number };
7 | private readonly timings: { [k: string]: number[] };
8 | private timingsToClear: Set;
9 |
10 | constructor(client) {
11 | this.client = client;
12 | this.events = Object.create(null);
13 | this.timings = Object.create(null);
14 | this.timingsToClear = new Set();
15 | }
16 |
17 | increment(event: string, count: number = 1): this {
18 | this.logEvent(event, count);
19 | return this;
20 | }
21 |
22 | gauge(name: string, value: number): this {
23 | if (this.timings[name] === undefined) {
24 | this.registerTiming(name);
25 | }
26 |
27 | this.timings[name] = [value];
28 | return this;
29 | }
30 |
31 | logEvent(event: string, count: number = 1): this {
32 | if (this.events[event] === undefined) {
33 | this.registerEvent(event);
34 | }
35 |
36 | this.events[event] += count;
37 | return this;
38 | }
39 |
40 | timing(timing: string, duration: number): this {
41 | if (this.timings[timing] === undefined) {
42 | this.registerTiming(timing);
43 | }
44 |
45 | if (this.timingsToClear.has(timing)) {
46 | this.timingsToClear.delete(timing);
47 | this.timings[timing] = [];
48 | }
49 |
50 | this.timings[timing]!.push(duration);
51 | return this;
52 | }
53 |
54 | private registerEvent(event: string): void {
55 | this.events[event] = 0;
56 |
57 | // istanbul ignore if
58 | if (this.client.isStarted()) {
59 | this.client.registerMetric(`npmSearch.${event}`, () => {
60 | const value = this.events[event];
61 | this.events[event] = 0;
62 | return value;
63 | });
64 | }
65 | }
66 |
67 | private registerTiming(timing: string): void {
68 | this.timings[timing] = [];
69 |
70 | // istanbul ignore if
71 | if (this.client.isStarted()) {
72 | this.client.registerMetric(`npmSearch.${timing}`, () => {
73 | this.timingsToClear.add(timing);
74 | return _.sum(this.timings[timing]) / this.timings[timing]!.length;
75 | });
76 | }
77 | }
78 | }
79 |
80 | export default MetricCollector;
81 |
--------------------------------------------------------------------------------
/src/utils/datadog.ts:
--------------------------------------------------------------------------------
1 | import agent from 'elastic-apm-node';
2 |
3 | import MetricCollector from './MetricCollector';
4 |
5 | export const datadog = new MetricCollector(agent);
6 |
--------------------------------------------------------------------------------
/src/utils/log.ts:
--------------------------------------------------------------------------------
1 | import bunyan from 'bunyan';
2 | import bunyanDebugStream from 'bunyan-debug-stream';
3 |
4 | const stream = bunyanDebugStream({
5 | showDate: process.env.NODE_ENV !== 'production',
6 | showProcess: false,
7 | showLoggerName: false,
8 | showPid: false,
9 | showLevel: process.env.NODE_ENV === 'production',
10 | });
11 |
12 | export const log = bunyan.createLogger({
13 | name: 'npm-search',
14 | streams: [
15 | {
16 | level: 'info',
17 | type: 'raw',
18 | stream,
19 | },
20 | ],
21 | serializers: bunyanDebugStream.serializers,
22 | });
23 |
--------------------------------------------------------------------------------
/src/utils/request.ts:
--------------------------------------------------------------------------------
1 | import http from 'http';
2 | import https from 'https';
3 |
4 | import type { OptionsOfJSONResponseBody } from 'got';
5 | import got from 'got';
6 |
7 | import { config } from '../config';
8 |
9 | // eslint-disable-next-line @typescript-eslint/no-var-requires, import/no-commonjs
10 | const { version } = require('../../package.json');
11 |
12 | export const USER_AGENT = `Algolia npm-search/${version} (https://github.com/algolia/npm-search)`;
13 |
14 | const options: http.AgentOptions = {
15 | keepAlive: true,
16 | timeout: 60000,
17 | maxFreeSockets: 2000,
18 | scheduling: 'fifo',
19 | };
20 |
21 | // The agents will pool TCP connections
22 | export const httpAgent = new http.Agent(options);
23 | export const httpsAgent = new https.Agent(options);
24 |
25 | // eslint-disable-next-line @typescript-eslint/explicit-function-return-type
26 | export async function request(
27 | url: string,
28 | opts: OptionsOfJSONResponseBody
29 | ) {
30 | return await got(url, {
31 | timeout: config.defaultRequestTimeout,
32 | ...opts,
33 | headers: {
34 | ...(opts.headers || {}),
35 | 'user-agent': USER_AGENT,
36 | },
37 | dnsCache: true,
38 | dnsLookupIpVersion: 'ipv4',
39 | agent: {
40 | http: httpAgent,
41 | https: httpsAgent,
42 | },
43 | });
44 | }
45 |
--------------------------------------------------------------------------------
/src/utils/sentry.ts:
--------------------------------------------------------------------------------
1 | import agent from 'elastic-apm-node';
2 |
3 | import { log } from './log';
4 |
5 | export function report(err: any, extra: any = {}): void {
6 | const logErr = [504].includes(err.statusCode)
7 | ? { statusCode: err.statusCode }
8 | : err;
9 |
10 | const logXtr = [504].includes(extra.err?.statusCode)
11 | ? { err: { statusCode: extra.err.statusCode } }
12 | : extra;
13 |
14 | log.error(logErr, logXtr);
15 | agent.captureError(err, { custom: extra });
16 | }
17 |
18 | export async function drain(): Promise {
19 | return agent.flush();
20 | }
21 |
--------------------------------------------------------------------------------
/src/utils/time.ts:
--------------------------------------------------------------------------------
1 | export function offsetToTimestamp(
2 | offset: number,
3 | now: Date | number = Date.now()
4 | ): number {
5 | return round(now.valueOf() + offset).valueOf();
6 | }
7 |
8 | export function round(date: Date | number): Date {
9 | const d = new Date(date);
10 | d.setUTCHours(0, 0, 0, 0);
11 | return d;
12 | }
13 |
--------------------------------------------------------------------------------
/src/utils/wait.ts:
--------------------------------------------------------------------------------
1 | import { setTimeout } from 'node:timers/promises';
2 |
3 | import { log } from './log';
4 |
5 | export async function backoff(
6 | retry: number,
7 | pow: number,
8 | max: number
9 | ): Promise {
10 | // retry backoff
11 | const bo = Math.min(Math.pow(retry + 1, pow) * 1000, max);
12 | log.info('Retrying (', retry, '), waiting for', bo);
13 | await setTimeout(bo);
14 | }
15 |
--------------------------------------------------------------------------------
/src/watch.ts:
--------------------------------------------------------------------------------
1 | import chalk from 'chalk';
2 | import type { DatabaseChangesResultItem } from 'nano';
3 |
4 | import type { StateManager } from './StateManager';
5 | import type { AlgoliaStore } from './algolia';
6 | import { config } from './config';
7 | import { MainWatchIndexer } from './indexers/MainWatchIndexer';
8 | import { OneTimeBackgroundIndexer } from './indexers/OneTimeBackgroundIndexer';
9 | import { PeriodicBackgroundIndexer } from './indexers/PeriodicBackgroundIndexer';
10 | import * as npm from './npm';
11 | import { ChangesReader } from './npm/ChangesReader';
12 | import { datadog } from './utils/datadog';
13 | import { log } from './utils/log';
14 | import * as sentry from './utils/sentry';
15 | import { report } from './utils/sentry';
16 | import { backoff } from './utils/wait';
17 |
18 | export class Watch {
19 | stateManager: StateManager;
20 | algoliaStore: AlgoliaStore;
21 | // Cached npmInfo.seq
22 | totalSequence: number = 0;
23 |
24 | changesReader: ChangesReader | undefined;
25 | oneTimeIndexer: OneTimeBackgroundIndexer | undefined;
26 | periodicDataIndexer: PeriodicBackgroundIndexer | undefined;
27 | mainWatchIndexer: MainWatchIndexer | undefined;
28 |
29 | constructor(stateManager: StateManager, algoliaStore: AlgoliaStore) {
30 | this.stateManager = stateManager;
31 | this.algoliaStore = algoliaStore;
32 | }
33 |
34 | /**
35 | * Run watch.
36 | *
37 | * --- Watch ?
38 | * Watch is "Long Polled. This mode is not paginated and the event system in CouchDB send
39 | * events as they arrive, which is super cool and reactive.
40 | * One gotcha those events arrive at the same rate whether you are watching the last seq or not.
41 | *
42 | * Example:
43 | * listener A - up to date
44 | * listener B - few sequences behind.
45 | *
46 | * Package C is updated.
47 | *
48 | * Listener A receive update C
49 | * listener B receive update N.
50 | *
51 | * Listener A is up to date again
52 | * listener B is still few sequences behind and will not receive any other event
53 | * until an other package is updated.
54 | * It will never be up to date because he receive event at the same pace
55 | * as they arrive in listener A, even if it's not the same package.
56 | */
57 | async run(): Promise {
58 | log.info('-----');
59 | log.info('🚀 Watch: starting');
60 | log.info('-----');
61 |
62 | await this.stateManager.save({
63 | stage: 'watch',
64 | });
65 |
66 | setInterval(() => {
67 | npm.registry
68 | .request('')
69 | .then((info) => {
70 | this.totalSequence = Number(info.update_seq);
71 | })
72 | .catch(() => {});
73 | }, 5000).unref();
74 |
75 | this.oneTimeIndexer = new OneTimeBackgroundIndexer(
76 | this.algoliaStore,
77 | this.algoliaStore.mainIndex
78 | );
79 |
80 | this.periodicDataIndexer = new PeriodicBackgroundIndexer(
81 | this.algoliaStore,
82 | this.algoliaStore.mainIndex,
83 | this.algoliaStore.mainNotFoundIndex
84 | );
85 |
86 | this.mainWatchIndexer = new MainWatchIndexer(this.algoliaStore);
87 |
88 | this.oneTimeIndexer.run();
89 | this.periodicDataIndexer.run();
90 | this.mainWatchIndexer.run();
91 |
92 | await this.launchChangeReader();
93 | }
94 |
95 | async stop(): Promise {
96 | log.info('Stopping Watch...');
97 |
98 | try {
99 | this.changesReader?.stop?.();
100 | await this.oneTimeIndexer?.stop?.();
101 | await this.periodicDataIndexer?.stop?.();
102 | await this.mainWatchIndexer?.stop?.();
103 | } catch (err) {
104 | sentry.report(err);
105 | }
106 |
107 | log.info('Stopped Watch gracefully');
108 | }
109 |
110 | async launchChangeReader(): Promise {
111 | const { seq: since } = await this.stateManager.get();
112 |
113 | log.info(`listening from ${since}...`);
114 |
115 | const reader = new ChangesReader({ since: String(since) });
116 |
117 | reader
118 | .on('batch', (batch: DatabaseChangesResultItem[]) => {
119 | const changes = Array.from(
120 | batch
121 | .filter((change) => change.id)
122 | .reduce((acc, change) => {
123 | return acc.set(change.id, change);
124 | }, new Map())
125 | .values()
126 | );
127 |
128 | if (!changes.length) {
129 | return;
130 | }
131 |
132 | const storeChanges = async (retry = 0): Promise => {
133 | try {
134 | await this.algoliaStore.mainQueueIndex.saveObjects(
135 | changes.map((change) => ({
136 | seq: change.seq,
137 | name: change.id,
138 | objectID: change.id,
139 | retries: 0,
140 | change,
141 | }))
142 | );
143 | } catch (err) {
144 | const newRetry = retry + 1;
145 | log.error('Error adding a change to the queue.', { err });
146 |
147 | await backoff(
148 | newRetry,
149 | config.retryBackoffPow,
150 | config.retryBackoffMax
151 | );
152 |
153 | return storeChanges(newRetry);
154 | }
155 | };
156 |
157 | // We need to move one at a time here, so pause until the change is safely stored.
158 | reader.pause();
159 |
160 | storeChanges().then(() => {
161 | const seq = changes.at(-1).seq;
162 |
163 | reader.resume();
164 | this.logProgress(seq).catch(() => {});
165 |
166 | this.stateManager.save({ seq }).catch((err) => {
167 | report(new Error('Error storing watch progress'), { err });
168 | });
169 | });
170 | })
171 | .on('error', (err) => {
172 | sentry.report(err);
173 | })
174 | .run();
175 |
176 | this.changesReader = reader;
177 | }
178 |
179 | /**
180 | * Log our process through watch.
181 | *
182 | */
183 | async logProgress(seq: number): Promise {
184 | const queueLength = await this.mainWatchIndexer!.fetchQueueLength();
185 |
186 | datadog.gauge('sequence.total', this.totalSequence);
187 | datadog.gauge('sequence.current', seq);
188 | datadog.gauge('job.idleCount', queueLength);
189 |
190 | log.info(
191 | chalk.dim.italic
192 | .white`[progress] Synced %d/%d changes (%s%) (%s remaining) (~%s in queue)`,
193 | seq,
194 | this.totalSequence,
195 | ((Math.max(seq, 1) / this.totalSequence) * 100).toFixed(2),
196 | this.totalSequence - seq,
197 | queueLength
198 | );
199 | }
200 | }
201 |
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | /* Visit https://aka.ms/tsconfig.json to read more about this file */
4 | /* Basic Options */
5 | // "incremental": true, /* Enable incremental compilation */
6 | "target": "ESNext" /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019', 'ES2020', 'ES2021', or 'ESNEXT'. */,
7 | "module": "commonjs" /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', 'es2020', or 'ESNext'. */,
8 | "lib": [
9 | "ES6",
10 | "es2015",
11 | "es2017",
12 | "es2019"
13 | ] /* Specify library files to be included in the compilation. */,
14 | "allowJs": true /* Allow javascript files to be compiled. */,
15 | "checkJs": false /* Report errors in .js files. */,
16 | // "jsx": "preserve", /* Specify JSX code generation: 'preserve', 'react-native', 'react', 'react-jsx' or 'react-jsxdev'. */
17 | "declaration": true /* Generates corresponding '.d.ts' file. */,
18 | "declarationMap": true /* Generates a sourcemap for each corresponding '.d.ts' file. */,
19 | "sourceMap": true /* Generates corresponding '.map' file. */,
20 | // "outFile": "./", /* Concatenate and emit output to single file. */
21 | "outDir": "./dist" /* Redirect output structure to the directory. */,
22 | "rootDir": "./" /* Specify the root directory of input files. Use to control the output directory structure with --outDir. */,
23 | // "composite": true, /* Enable project compilation */
24 | // "tsBuildInfoFile": "./", /* Specify file to store incremental compilation information */
25 | "removeComments": true /* Do not emit comments to output. */,
26 | // "noEmit": true, /* Do not emit outputs. */
27 | // "importHelpers": true, /* Import emit helpers from 'tslib'. */
28 | "downlevelIteration": true /* Provide full support for iterables in 'for-of', spread, and destructuring when targeting 'ES5' or 'ES3'. */,
29 | // "isolatedModules": true, /* Transpile each file as a separate module (similar to 'ts.transpileModule'). */
30 | /* Strict Type-Checking Options */
31 | "strict": true /* Enable all strict type-checking options. */,
32 | "noImplicitAny": false /* Raise error on expressions and declarations with an implied 'any' type. */,
33 | "strictNullChecks": true /* Enable strict null checks. */,
34 | "strictFunctionTypes": true /* Enable strict checking of function types. */,
35 | "strictBindCallApply": true /* Enable strict 'bind', 'call', and 'apply' methods on functions. */,
36 | "strictPropertyInitialization": true /* Enable strict checking of property initialization in classes. */,
37 | "noImplicitThis": true /* Raise error on 'this' expressions with an implied 'any' type. */,
38 | "alwaysStrict": true /* Parse in strict mode and emit "use strict" for each source file. */,
39 | /* Additional Checks */
40 | "noUnusedLocals": false /* Report errors on unused locals. */,
41 | "noUnusedParameters": false /* Report errors on unused parameters. */,
42 | "noImplicitReturns": true /* Report error when not all code paths in function return a value. */,
43 | "noFallthroughCasesInSwitch": true /* Report errors for fallthrough cases in switch statement. */,
44 | "noUncheckedIndexedAccess": true /* Include 'undefined' in index signature results */,
45 | "noImplicitOverride": true /* Ensure overriding members in derived classes are marked with an 'override' modifier. */,
46 | // "noPropertyAccessFromIndexSignature": true, /* Require undeclared properties from index signatures to use element accesses. */
47 | /* Module Resolution Options */
48 | "moduleResolution": "node" /* Specify module resolution strategy: 'node' (Node.js) or 'classic' (TypeScript pre-1.6). */,
49 | // "baseUrl": "./", /* Base directory to resolve non-absolute module names. */
50 | // "paths": {}, /* A series of entries which re-map imports to lookup locations relative to the 'baseUrl'. */
51 | // "rootDirs": [], /* List of root folders whose combined content represents the structure of the project at runtime. */
52 | // "typeRoots": [], /* List of folders to include type definitions from. */
53 | // "types": [], /* Type declaration files to be included in compilation. */
54 | "allowSyntheticDefaultImports": true /* Allow default imports from modules with no default export. This does not affect code emit, just typechecking. */,
55 | "esModuleInterop": true /* Enables emit interoperability between CommonJS and ES Modules via creation of namespace objects for all imports. Implies 'allowSyntheticDefaultImports'. */,
56 | // "preserveSymlinks": true, /* Do not resolve the real path of symlinks. */
57 | // "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */
58 | /* Source Map Options */
59 | // "sourceRoot": "", /* Specify the location where debugger should locate TypeScript files instead of source locations. */
60 | // "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */
61 | // "inlineSourceMap": true, /* Emit a single file with source maps instead of having a separate file. */
62 | // "inlineSources": true, /* Emit the source alongside the sourcemaps within a single file; requires '--inlineSourceMap' or '--sourceMap' to be set. */
63 | /* Experimental Options */
64 | // "experimentalDecorators": true, /* Enables experimental support for ES7 decorators. */
65 | // "emitDecoratorMetadata": true, /* Enables experimental support for emitting type metadata for decorators. */
66 | /* Advanced Options */
67 | "skipLibCheck": true /* Skip type checking of declaration files. */,
68 | "forceConsistentCasingInFileNames": true /* Disallow inconsistently-cased references to the same file. */,
69 | "resolveJsonModule": true
70 | },
71 | "include": ["src/**/*.ts", "package.json"],
72 | "exclude": ["node_modules"]
73 | }
74 |
--------------------------------------------------------------------------------