├── .editorconfig
├── .env.template
├── .github
    ├── actions
    │   └── setup-poetry-env
    │   │   └── action.yml
    └── workflows
    │   ├── main.yml
    │   └── publish.yml
├── .gitignore
├── .pre-commit-config.yaml
├── Dockerfile
├── DockerfileCPU
├── LICENSE
├── Makefile
├── README.md
├── SETUP.md
├── data
    └── .gitignore
├── docker-compose.yml
├── frontend
    ├── .dockerignore
    ├── .eslintrc.json
    ├── .gitignore
    ├── Dockerfile
    ├── README.md
    ├── app
    │   ├── components
    │   │   ├── GitHubButton.tsx
    │   │   ├── GoogleAnalytics.tsx
    │   │   ├── Header.tsx
    │   │   ├── InfoBox.tsx
    │   │   ├── ScatterPlot.tsx
    │   │   ├── SearchResultsTable.tsx
    │   │   ├── SupportButton.tsx
    │   │   └── ToggleSwitch.tsx
    │   ├── favicon.ico
    │   ├── globals.css
    │   ├── layout.tsx
    │   ├── page.tsx
    │   └── utils
    │   │   └── search.ts
    ├── next.config.mjs
    ├── package-lock.json
    ├── package.json
    ├── postcss.config.mjs
    ├── public
    │   ├── kofi.png
    │   ├── next.svg
    │   ├── pypi-light.svg
    │   ├── pypi.svg
    │   └── vercel.svg
    ├── tailwind.config.ts
    └── tsconfig.json
├── package-lock.json
├── package.json
├── poetry.lock
├── pypi_bigquery.sql
├── pypi_scout
    ├── __init__.py
    ├── api
    │   ├── data_loader.py
    │   ├── main.py
    │   └── models.py
    ├── config.py
    ├── data
    │   ├── description_cleaner.py
    │   └── raw_data_reader.py
    ├── embeddings
    │   ├── embeddings_creator.py
    │   └── simple_vector_database.py
    ├── scripts
    │   ├── create_vector_embeddings.py
    │   ├── download_raw_dataset.py
    │   ├── process_raw_dataset.py
    │   ├── setup.py
    │   └── upload_processed_datasets.py
    └── utils
    │   ├── blob_io.py
    │   ├── logging.py
    │   └── score_calculator.py
├── pyproject.toml
├── requirements-cpu.txt
├── static
    ├── demo.gif
    ├── pypi-light.svg
    └── pypi.svg
└── tests
    └── embeddings
        └── test_simple_vector_database.py


/.editorconfig:
--------------------------------------------------------------------------------
1 | max_line_length = 120
2 | 
3 | [*.json]
4 | indent_style = space
5 | indent_size = 4
6 | 


--------------------------------------------------------------------------------
/.env.template:
--------------------------------------------------------------------------------
1 | STORAGE_BACKEND=BLOB
2 | STORAGE_BACKEND_BLOB_ACCOUNT_NAME=
3 | STORAGE_BACKEND_BLOB_CONTAINER_NAME=
4 | STORAGE_BACKEND_BLOB_KEY=
5 | 


--------------------------------------------------------------------------------
/.github/actions/setup-poetry-env/action.yml:
--------------------------------------------------------------------------------
 1 | name: "setup-poetry-env"
 2 | description: "Composite action to setup the Python and poetry environment."
 3 | 
 4 | inputs:
 5 |   python-version:
 6 |     required: false
 7 |     description: "The python version to use"
 8 |     default: "3.11"
 9 | 
10 | runs:
11 |   using: "composite"
12 |   steps:
13 |     - name: Set up python
14 |       uses: actions/setup-python@v5
15 |       with:
16 |         python-version: ${{ inputs.python-version }}
17 | 
18 |     - name: Install Poetry
19 |       uses: snok/install-poetry@v1
20 |       with:
21 |         virtualenvs-in-project: true
22 | 
23 |     - name: Load cached venv
24 |       id: cached-poetry-dependencies
25 |       uses: actions/cache@v4
26 |       with:
27 |         path: .venv
28 |         key: venv-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('poetry.lock') }}
29 | 
30 |     - name: Install dependencies
31 |       if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
32 |       run: poetry install --no-interaction
33 |       shell: bash
34 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: Main
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     types: [opened, synchronize, reopened, ready_for_review]
 9 | 
10 | jobs:
11 |   quality:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - name: Check out
15 |         uses: actions/checkout@v4
16 | 
17 |       - uses: actions/cache@v4
18 |         with:
19 |           path: ~/.cache/pre-commit
20 |           key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }}
21 | 
22 |       - name: Set up the environment
23 |         uses: ./.github/actions/setup-poetry-env
24 | 
25 |       - name: Run checks
26 |         run: make check
27 | 
28 |   tox:
29 |     runs-on: ubuntu-latest
30 |     strategy:
31 |       matrix:
32 |         python-version: ["3.9", "3.10", "3.11", "3.12"]
33 |       fail-fast: false
34 |     steps:
35 |       - name: Check out
36 |         uses: actions/checkout@v4
37 | 
38 |       - name: Set up python
39 |         uses: actions/setup-python@v5
40 |         with:
41 |           python-version: ${{ matrix.python-version }}
42 | 
43 |       - name: Install Poetry
44 |         uses: snok/install-poetry@v1
45 | 
46 |       - name: Load cached venv
47 |         uses: actions/cache@v4
48 |         with:
49 |           path: .tox
50 |           key: venv-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('poetry.lock') }}
51 | 
52 |       - name: Install tox
53 |         run: |
54 |           python -m pip install --upgrade pip
55 |           python -m pip install tox tox-gh-actions
56 | 
57 |       - name: Test with tox
58 |         run: tox
59 | 
60 |       - name: Upload coverage reports to Codecov with GitHub Action on Python 3.11
61 |         uses: codecov/codecov-action@v4
62 |         if: ${{ matrix.python-version == '3.11' }}
63 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Build and Push Docker Images
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 | 
 6 | jobs:
 7 |   build-and-push-backend:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - name: Checkout repository
11 |         uses: actions/checkout@v2
12 | 
13 |       - name: Set up Docker Buildx
14 |         uses: docker/setup-buildx-action@v2
15 | 
16 |       - name: Login to Azure Container Registry
17 |         uses: azure/docker-login@v1
18 |         with:
19 |           login-server: pypiscoutacr.azurecr.io
20 |           username: ${{ secrets.ACR_USERNAME }}
21 |           password: ${{ secrets.ACR_PASSWORD }}
22 | 
23 |       - name: Build and Push Backend Docker image
24 |         uses: docker/build-push-action@v4
25 |         with:
26 |           context: .
27 |           file: ./DockerfileCPU
28 |           platforms: linux/amd64
29 |           push: true
30 |           tags: pypiscoutacr.azurecr.io/pypi-scout-backend:latest
31 | 
32 |   build-and-push-frontend:
33 |     runs-on: ubuntu-latest
34 |     steps:
35 |       - name: Checkout repository
36 |         uses: actions/checkout@v2
37 | 
38 |       - name: Set up Docker Buildx
39 |         uses: docker/setup-buildx-action@v2
40 | 
41 |       - name: Login to Azure Container Registry
42 |         uses: azure/docker-login@v1
43 |         with:
44 |           login-server: pypiscoutacr.azurecr.io
45 |           username: ${{ secrets.ACR_USERNAME }}
46 |           password: ${{ secrets.ACR_PASSWORD }}
47 | 
48 |       - name: Build and Push Frontend Docker image
49 |         uses: docker/build-push-action@v4
50 |         with:
51 |           context: ./frontend
52 |           file: ./frontend/Dockerfile
53 |           platforms: linux/amd64
54 |           push: true
55 |           tags: pypiscoutacr.azurecr.io/pypi-scout-frontend:latest
56 |           build-args: |
57 |             NEXT_PUBLIC_API_URL=https://pypiscout.com/api
58 |             NEXT_PUBLIC_GA_TRACKING_ID=${{ secrets.NEXT_PUBLIC_GA_TRACKING_ID }}
59 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | docs/source
  2 | 
  3 | # From https://raw.githubusercontent.com/github/gitignore/main/Python.gitignore
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | cover/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | .pybuilder/
 80 | target/
 81 | 
 82 | # Jupyter Notebook
 83 | .ipynb_checkpoints
 84 | 
 85 | # IPython
 86 | profile_default/
 87 | ipython_config.py
 88 | 
 89 | # pyenv
 90 | #   For a library or package, you might want to ignore these files since the code is
 91 | #   intended to run in multiple environments; otherwise, check them in:
 92 | # .python-version
 93 | 
 94 | # pipenv
 95 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 96 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 97 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 98 | #   install all needed dependencies.
 99 | #Pipfile.lock
100 | 
101 | # poetry
102 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
103 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
104 | #   commonly ignored for libraries.
105 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
106 | #poetry.lock
107 | 
108 | # pdm
109 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
110 | #pdm.lock
111 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
112 | #   in version control.
113 | #   https://pdm.fming.dev/#use-with-ide
114 | .pdm.toml
115 | 
116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
117 | __pypackages__/
118 | 
119 | # Celery stuff
120 | celerybeat-schedule
121 | celerybeat.pid
122 | 
123 | # SageMath parsed files
124 | *.sage.py
125 | 
126 | # Environments
127 | .env
128 | .venv
129 | env/
130 | venv/
131 | ENV/
132 | env.bak/
133 | venv.bak/
134 | 
135 | # Spyder project settings
136 | .spyderproject
137 | .spyproject
138 | 
139 | # Rope project settings
140 | .ropeproject
141 | 
142 | # mkdocs documentation
143 | /site
144 | 
145 | # mypy
146 | .mypy_cache/
147 | .dmypy.json
148 | dmypy.json
149 | 
150 | # Pyre type checker
151 | .pyre/
152 | 
153 | # pytype static type analyzer
154 | .pytype/
155 | 
156 | # Cython debug symbols
157 | cython_debug/
158 | 
159 | # Vscode config files
160 | .vscode/
161 | 
162 | # PyCharm
163 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
164 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
165 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
166 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
167 | #.idea/
168 | 
169 | .env
170 | .DS_Store
171 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: "v4.4.0"
 4 |     hooks:
 5 |       - id: check-case-conflict
 6 |       - id: check-merge-conflict
 7 |       - id: check-toml
 8 |       - id: check-yaml
 9 |       - id: end-of-file-fixer
10 |       - id: trailing-whitespace
11 | 
12 |   - repo: https://github.com/astral-sh/ruff-pre-commit
13 |     rev: "v0.1.6"
14 |     hooks:
15 |       - id: ruff
16 |         args: [--exit-non-zero-on-fix]
17 |       - id: ruff-format
18 | 
19 |   - repo: https://github.com/pre-commit/mirrors-prettier
20 |     rev: "v3.0.3"
21 |     hooks:
22 |       - id: prettier
23 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # syntax=docker/dockerfile:1
 2 | 
 3 | FROM python:3.10-slim-bookworm
 4 | 
 5 | ENV POETRY_VERSION=1.6 \
 6 |     POETRY_VIRTUALENVS_CREATE=false
 7 | 
 8 | # Install poetry and clean up
 9 | RUN pip install "poetry==$POETRY_VERSION" && \
10 |     rm -rf /root/.cache/pip
11 | 
12 | # Set work directory
13 | WORKDIR /code
14 | 
15 | # Copy only requirements to cache them in docker layer
16 | COPY poetry.lock pyproject.toml /code/
17 | 
18 | # Install project dependencies and clean up
19 | RUN poetry install --no-interaction --no-ansi --no-root --no-dev && \
20 |     rm -rf /root/.cache/pip
21 | 
22 | # Copy Python code to the Docker image
23 | COPY pypi_scout /code/pypi_scout/
24 | 
25 | # Make empty data directory
26 | RUN mkdir -p /code/data
27 | 
28 | ENV PYTHONPATH=/code
29 | 
30 | # Use the script as the entrypoint
31 | CMD ["uvicorn", "pypi_scout.api.main:app", "--host", "0.0.0.0", "--port", "8000"]
32 | 


--------------------------------------------------------------------------------
/DockerfileCPU:
--------------------------------------------------------------------------------
 1 | # syntax=docker/dockerfile:1
 2 | 
 3 | # Use a slim Python image as the base
 4 | FROM python:3.10-slim-bookworm
 5 | 
 6 | # Set environment variables
 7 | ENV PYTHONUNBUFFERED=1
 8 | 
 9 | # Install system dependencies
10 | RUN apt-get update && apt-get install -y --no-install-recommends \
11 |     build-essential \
12 |     && apt-get clean && rm -rf /var/lib/apt/lists/*
13 | 
14 | # Set working directory
15 | WORKDIR /code
16 | 
17 | # Copy only requirements to cache them in docker layer
18 | COPY requirements-cpu.txt /code/requirements-cpu.txt
19 | 
20 | # Install Python dependencies
21 | RUN pip install --no-cache-dir -r requirements-cpu.txt
22 | 
23 | # Copy the rest of the application code
24 | COPY pypi_scout /code/pypi_scout/
25 | 
26 | # Make empty data directory
27 | RUN mkdir -p /code/data
28 | 
29 | ENV PYTHONPATH=/code
30 | 
31 | # Use the script as the entrypoint
32 | CMD ["uvicorn", "pypi_scout.api.main:app", "--host", "0.0.0.0", "--port", "8000"]
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: install
 2 | install: ## Install the poetry environment and install the pre-commit hooks
 3 | 	@echo "🚀 Creating virtual environment using pyenv and poetry"
 4 | 	@poetry install
 5 | 	@ poetry run pre-commit install
 6 | 	@poetry shell
 7 | 
 8 | .PHONY: check
 9 | check: ## Run code quality tools.
10 | 	@echo "🚀 Checking Poetry lock file consistency with 'pyproject.toml': Running poetry check --lock"
11 | 	@poetry check --lock
12 | 	@echo "🚀 Linting code: Running pre-commit"
13 | 	@poetry run pre-commit run -a
14 | 	@echo "🚀 Checking for obsolete dependencies: Running deptry"
15 | 	@poetry run deptry .
16 | 
17 | .PHONY: test
18 | test: ## Test the code with pytest
19 | 	@echo "🚀 Testing code: Running pytest"
20 | 	@poetry run pytest --cov --cov-config=pyproject.toml --cov-report=xml
21 | 
22 | .PHONY: build
23 | build: ## Build wheel file using poetry
24 | 	@echo "🚀 Creating wheel file"
25 | 	@poetry build
26 | 
27 | .PHONY: serve
28 | serve: ## Serve API with uvicorn in development mode
29 | 	@poetry run uvicorn pypi_scout.api.main:app --reload
30 | 
31 | .PHONY: frontend
32 | frontend: ## Serve frontend in development mode
33 | 	@cd frontend; npm run dev
34 | 
35 | .PHONY: help
36 | help:
37 | 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
38 | 
39 | .DEFAULT_GOAL := help
40 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <p align="center">
 2 |   <img src="./static/pypi.svg" alt="PyPI Scout Logo" width="300">
 3 | </p>
 4 | 
 5 | <br/>
 6 | <p align="center">
 7 |   <img src="https://github.com/fpgmaas/pypi-scout/blob/main/static/demo.gif?raw=true" alt="PyPI Scout Demo" width="700">
 8 | </p>
 9 | 
10 | ## What does this do?
11 | 
12 | Finding the right Python package on [PyPI](https://pypi.org/) can be a bit difficult, since PyPI isn't really designed for discovering packages easily. For example, you can search for the word "plot" and get a list of hundreds of packages that contain the word "plot" in seemingly random order.
13 | 
14 | Inspired by [this blog post](https://koaning.io/posts/search-boxes/) about finding arXiv articles using vector embeddings, I decided to build a small application that helps you find Python packages with a similar approach. For example, you can ask it "I want to make nice plots and visualizations", and it will provide you with a short list of packages that can help you with that.
15 | 
16 | ## How does this work?
17 | 
18 | The project works by collecting project summaries and descriptions for all packages on PyPI with more than 100 weekly downloads. These are then converted into vector representations using [Sentence Transformers](https://www.sbert.net/). When the user enters a query, it is converted into a vector representation, and the most similar package descriptions are fetched from the vector database. Additional weight is given to the amount of weekly downloads before presenting the results to the user in a dashboard.
19 | 
20 | ## Stack
21 | 
22 | The project uses the following technologies:
23 | 
24 | 1. **[FastAPI](https://fastapi.tiangolo.com/)** for the API backend
25 | 2. **[NextJS](https://nextjs.org/) and [TailwindCSS](https://tailwindcss.com/)** for the frontend
26 | 3. **[Sentence Transformers](https://www.sbert.net/)** for vector embeddings
27 | 
28 | ## Getting Started
29 | 
30 | ### Build and Setup
31 | 
32 | #### 1. (Optional) **Create a `.env` file**
33 | 
34 | By default, all data will be stored on your local machine. It is also possible to store the data for the API on Azure Blob storage, and
35 | have the API read from there. To do so, create a `.env` file:
36 | 
37 | ```sh
38 | cp .env.template .env
39 | ```
40 | 
41 | and fill in the required fields.
42 | 
43 | #### 2. **Run the Setup Script**
44 | 
45 | The setup script will:
46 | 
47 | - Download and process the PyPI dataset and store the results in the `data` directory.
48 | - Create vector embeddings for the PyPI dataset.
49 | - If the `STORAGE_BACKEND` environment variable is set to `BLOB`: Upload the datasets to blob storage.
50 | 
51 | There are three methods to run the setup script, dependent on if you have a NVIDIA GPU and [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) installed. Please run the setup script using the method that is applicable for you:
52 | 
53 | - [Option 1: Using Poetry](SETUP.md#option-1-using-poetry)
54 | - [Option 2: Using Docker with NVIDIA GPU and NVIDIA Container Toolkit](SETUP.md#option-2-using-docker-with-nvidia-gpu-and-nvidia-container-toolkit)
55 | - [Option 3: Using Docker without NVIDIA GPU and NVIDIA Container Toolkit](SETUP.md#option-3-using-docker-without-nvidia-gpu-and-nvidia-container-toolkit)
56 | 
57 | > [!NOTE]
58 | > The dataset contains approximately 100.000 packages on PyPI with more than 100 weekly downloads. To speed up local development,
59 | > you can lower the amount of packages that is processed locally by lowering the value of `FRAC_DATA_TO_INCLUDE` in `pypi_scout/config.py`.
60 | 
61 | #### 3. **Run the Application**
62 | 
63 | Start the application using Docker Compose:
64 | 
65 | ```sh
66 | docker-compose up
67 | ```
68 | 
69 | After a short while, your application will be live at [http://localhost:3000](http://localhost:3000).
70 | 
71 | ## Data
72 | 
73 | The dataset for this project is created using the [PyPI dataset on Google BigQuery](https://console.cloud.google.com/marketplace/product/gcp-public-data-pypi/pypi?project=regal-net-412415). The SQL query used can be found in [pypi_bigquery.sql](./pypi_bigquery.sql). The resulting dataset is available as a CSV file on [Google Drive](https://drive.google.com/file/d/1huR7-VD3AieBRCcQyRX9MWbPLMb_czjq/view?usp=sharing).
74 | 


--------------------------------------------------------------------------------
/SETUP.md:
--------------------------------------------------------------------------------
 1 | # Running the Setup Script
 2 | 
 3 | The setup script will:
 4 | 
 5 | - Download and process the PyPI dataset and store the results in the `data` directory.
 6 | - Create vector embeddings for the PyPI dataset.
 7 | - If the `STORAGE_BACKEND` environment variable is set to `BLOB`: Upload the datasets to blob storage.
 8 | 
 9 | There are three ways to run the setup script:
10 | 
11 | ### Option 1: Using Poetry
12 | 
13 | You can run the setup script using a virtual environment with Poetry. This method will automatically utilize your GPU for the vector embeddings if it is detected.
14 | 
15 | 1. Install dependencies and set up the virtual environment:
16 | 
17 |    ```sh
18 |    poetry install
19 |    ```
20 | 
21 | 2. Run the setup script:
22 | 
23 |    ```sh
24 |    poetry run python pypi_scout/scripts/setup.py
25 |    ```
26 | 
27 | ### Option 2: Using Docker with NVIDIA GPU and NVIDIA Container Toolkit
28 | 
29 | If you have an NVIDIA GPU and the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) installed, follow these steps:
30 | 
31 | 1. Build the Docker image:
32 | 
33 |    ```sh
34 |    docker build -t pypi-scout .
35 |    ```
36 | 
37 | 2. Run the setup script in a Docker container with GPU support:
38 | 
39 |    ```sh
40 |    docker run --rm \
41 |       --gpus all \
42 |      --env-file .env \
43 |      -v $(pwd)/data:/code/data \
44 |      --entrypoint "/bin/bash" \
45 |      pypi-scout \
46 |      -c "python /code/pypi_scout/scripts/setup.py"
47 |    ```
48 | 
49 | ### Option 3: Using Docker without NVIDIA GPU and NVIDIA Container Toolkit
50 | 
51 | If you do not have an NVIDIA GPU or the NVIDIA Container Toolkit installed, follow these steps:
52 | 
53 | 1. Build the Docker image:
54 | 
55 |    ```sh
56 |    docker build -f DockerfileCPU -t pypi-scout .
57 |    ```
58 | 
59 | 2. Run the setup script in a Docker container without GPU support:
60 | 
61 |    ```sh
62 |    docker run --rm \
63 |      --env-file .env \
64 |      -v $(pwd)/data:/code/data \
65 |      --entrypoint "/bin/bash" \
66 |      pypi-scout \
67 |      -c "python /code/pypi_scout/scripts/setup.py"
68 |    ```
69 | 
70 | ### Running the Application
71 | 
72 | After setting up the dataset, start the application using Docker Compose:
73 | 
74 | ```sh
75 | docker-compose up
76 | ```
77 | 
78 | After a short while, your application will be live at [http://localhost:3000](http://localhost:3000).
79 | 


--------------------------------------------------------------------------------
/data/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore everything in this directory
2 | *
3 | # Except this file
4 | !.gitignore
5 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3.8"
 2 | 
 3 | services:
 4 |   backend:
 5 |     build:
 6 |       context: .
 7 |       dockerfile: Dockerfile
 8 |     ports:
 9 |       - "8000:8000"
10 |     volumes:
11 |       - ./data:/code/data
12 |     env_file:
13 |       - .env
14 | 
15 |   frontend:
16 |     build:
17 |       context: ./frontend
18 |       dockerfile: Dockerfile
19 |       args:
20 |         NEXT_PUBLIC_API_URL: http://localhost:8000/api
21 |     ports:
22 |       - "3000:3000"
23 |     depends_on:
24 |       - backend
25 | 


--------------------------------------------------------------------------------
/frontend/.dockerignore:
--------------------------------------------------------------------------------
1 | # .dockerignore
2 | node_modules
3 | .next
4 | .env
5 | .git
6 | 


--------------------------------------------------------------------------------
/frontend/.eslintrc.json:
--------------------------------------------------------------------------------
1 | {
2 |     "extends": "next/core-web-vitals"
3 | }
4 | 


--------------------------------------------------------------------------------
/frontend/.gitignore:
--------------------------------------------------------------------------------
 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
 2 | 
 3 | # dependencies
 4 | /node_modules
 5 | /.pnp
 6 | .pnp.js
 7 | .yarn/install-state.gz
 8 | 
 9 | # testing
10 | /coverage
11 | 
12 | # next.js
13 | /.next/
14 | /out/
15 | 
16 | # production
17 | /build
18 | 
19 | # misc
20 | .DS_Store
21 | *.pem
22 | 
23 | # debug
24 | npm-debug.log*
25 | yarn-debug.log*
26 | yarn-error.log*
27 | 
28 | # local env files
29 | .env*.local
30 | 
31 | # vercel
32 | .vercel
33 | 
34 | # typescript
35 | *.tsbuildinfo
36 | next-env.d.ts
37 | 
38 | .next
39 | 


--------------------------------------------------------------------------------
/frontend/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use the official Node.js image as the base image
 2 | FROM node:18-alpine
 3 | 
 4 | # Set the working directory inside the container
 5 | WORKDIR /app
 6 | 
 7 | # Copy package.json and package-lock.json files to the container
 8 | COPY package.json package-lock.json ./
 9 | 
10 | # Install dependencies
11 | RUN npm install
12 | 
13 | # Copy the rest of the application code to the container
14 | COPY . .
15 | 
16 | # Add build arguments to environment
17 | ARG NEXT_PUBLIC_API_URL
18 | ARG NEXT_PUBLIC_GA_TRACKING_ID
19 | ENV NEXT_PUBLIC_API_URL=${NEXT_PUBLIC_API_URL}
20 | ENV NEXT_PUBLIC_GA_TRACKING_ID=${NEXT_PUBLIC_GA_TRACKING_ID}
21 | 
22 | # Build the Next.js application
23 | RUN npm run build
24 | 
25 | # Expose the port on which the application will run
26 | EXPOSE 3000
27 | 
28 | # Start the Next.js application
29 | CMD ["npm", "run", "start"]
30 | 


--------------------------------------------------------------------------------
/frontend/README.md:
--------------------------------------------------------------------------------
 1 | This is a [Next.js](https://nextjs.org/) project bootstrapped with [`create-next-app`](https://github.com/vercel/next.js/tree/canary/packages/create-next-app).
 2 | 
 3 | ## Getting Started
 4 | 
 5 | First, run the development server:
 6 | 
 7 | ```bash
 8 | npm run dev
 9 | ```
10 | 
11 | Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
12 | 
13 | You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
14 | 
15 | This project uses [`next/font`](https://nextjs.org/docs/basic-features/font-optimization) to automatically optimize and load Inter, a custom Google Font.
16 | 
17 | ## Learn More
18 | 
19 | To learn more about Next.js, take a look at the following resources:
20 | 
21 | - [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
22 | - [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
23 | 
24 | You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js/) - your feedback and contributions are welcome!
25 | 


--------------------------------------------------------------------------------
/frontend/app/components/GitHubButton.tsx:
--------------------------------------------------------------------------------
 1 | import React from "react";
 2 | 
 3 | const GitHubButton: React.FC = () => {
 4 |   return (
 5 |     <a
 6 |       href="https://github.com/fpgmaas/pypi-scout"
 7 |       target="_blank"
 8 |       rel="noopener noreferrer"
 9 |       className="flex items-center p-2 border border-sky-700 rounded bg-sky-900 text-white hover:bg-sky-700 focus:outline-none focus:ring-2 focus:ring-sky-700"
10 |     >
11 |       <svg
12 |         height="24"
13 |         width="24"
14 |         viewBox="0 0 16 16"
15 |         fill="white"
16 |         className="mr-2"
17 |       >
18 |         <path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.58.82-2.14-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.14 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.45.55.38A8.001 8.001 0 0 0 16 8c0-4.42-3.58-8-8-8z" />
19 |       </svg>
20 |       GitHub
21 |     </a>
22 |   );
23 | };
24 | 
25 | export default GitHubButton;
26 | 


--------------------------------------------------------------------------------
/frontend/app/components/GoogleAnalytics.tsx:
--------------------------------------------------------------------------------
 1 | // app/components/GoogleAnalytics.tsx
 2 | "use client";
 3 | 
 4 | import { useEffect } from "react";
 5 | 
 6 | const GoogleAnalytics = () => {
 7 |   useEffect(() => {
 8 |     const trackingId = process.env.NEXT_PUBLIC_GA_TRACKING_ID;
 9 |     if (trackingId) {
10 |       const script1 = document.createElement("script");
11 |       script1.async = true;
12 |       script1.src = `https://www.googletagmanager.com/gtag/js?id=${trackingId}`;
13 |       document.head.appendChild(script1);
14 | 
15 |       const script2 = document.createElement("script");
16 |       script2.innerHTML = `
17 |         window.dataLayer = window.dataLayer || [];
18 |         function gtag(){dataLayer.push(arguments);}
19 |         gtag('js', new Date());
20 |         gtag('config', '${trackingId}');
21 |       `;
22 |       document.head.appendChild(script2);
23 |     }
24 |   }, []);
25 | 
26 |   return null;
27 | };
28 | 
29 | export default GoogleAnalytics;
30 | 


--------------------------------------------------------------------------------
/frontend/app/components/Header.tsx:
--------------------------------------------------------------------------------
 1 | import { useState } from "react";
 2 | import GitHubButton from "./GitHubButton";
 3 | import SupportButton from "./SupportButton";
 4 | import { FaBars, FaTimes } from "react-icons/fa";
 5 | 
 6 | const Header: React.FC = () => {
 7 |   const [isMenuOpen, setIsMenuOpen] = useState(false);
 8 | 
 9 |   const toggleMenu = () => {
10 |     setIsMenuOpen(!isMenuOpen);
11 |   };
12 | 
13 |   return (
14 |     <header className="w-full flex justify-end items-center p-4 bg-sky-950">
15 |       <div className="hidden md:flex space-x-4 ">
16 |         <GitHubButton />
17 |         <SupportButton />
18 |       </div>
19 |       <div className="md:hidden flex-grow flex justify-end">
20 |         <button
21 |           onClick={toggleMenu}
22 |           className="text-white focus:outline-none focus:ring-2 focus:ring-sky-700"
23 |         >
24 |           {isMenuOpen ? <FaTimes size={24} /> : <FaBars size={24} />}
25 |         </button>
26 |       </div>
27 |       {isMenuOpen && (
28 |         <div className="absolute top-16 right-4 bg-sky-900 p-4 rounded shadow-lg flex flex-col space-y-4 md:hidden">
29 |           <GitHubButton />
30 |           <SupportButton />
31 |         </div>
32 |       )}
33 |     </header>
34 |   );
35 | };
36 | 
37 | export default Header;
38 | 


--------------------------------------------------------------------------------
/frontend/app/components/InfoBox.tsx:
--------------------------------------------------------------------------------
 1 | import React from "react";
 2 | 
 3 | interface InfoBoxProps {
 4 |   infoBoxVisible: boolean;
 5 | }
 6 | 
 7 | const InfoBox: React.FC<InfoBoxProps> = ({ infoBoxVisible }) => {
 8 |   if (!infoBoxVisible) return null;
 9 | 
10 |   return (
11 |     <div className="w-3/5 bg-sky-900 p-6 rounded-lg shadow-lg mt-4 text-white">
12 |       <h2 className="text-2xl text-bold mb-2 text-gray-100">
13 |         How does this work?
14 |       </h2>
15 |       <p className="text-gray-100">
16 |         This application allows you to search for Python packages on PyPI using
17 |         natural language queries. For example, a query could be &quot;a package
18 |         that creates plots and beautiful visualizations&quot;.
19 |       </p>
20 |       <br />
21 |       <p className="text-gray-100">
22 |         Once you click search, your query will be matched against the summary
23 |         and the first part of the description of the ~100.000 most popular
24 |         packages on PyPI, which includes all packages with at least ~100
25 |         downloads per week. The results are then scored based on their
26 |         similarity to the query and their number of weekly downloads, and the
27 |         best results are displayed in the plot and table above.
28 |       </p>
29 |     </div>
30 |   );
31 | };
32 | 
33 | export default InfoBox;
34 | 


--------------------------------------------------------------------------------
/frontend/app/components/ScatterPlot.tsx:
--------------------------------------------------------------------------------
  1 | import React from "react";
  2 | import { Scatter } from "react-chartjs-2";
  3 | import {
  4 |   Chart,
  5 |   Tooltip,
  6 |   Legend,
  7 |   PointElement,
  8 |   LinearScale,
  9 |   Title,
 10 |   LogarithmicScale,
 11 |   CategoryScale,
 12 |   FontSpec,
 13 | } from "chart.js";
 14 | 
 15 | Chart.register(
 16 |   Tooltip,
 17 |   Legend,
 18 |   PointElement,
 19 |   LinearScale,
 20 |   Title,
 21 |   LogarithmicScale,
 22 |   CategoryScale,
 23 | );
 24 | 
 25 | interface Match {
 26 |   name: string;
 27 |   similarity: number;
 28 |   weekly_downloads: number;
 29 |   summary: string;
 30 | }
 31 | 
 32 | interface ScatterPlotProps {
 33 |   results: Match[];
 34 | }
 35 | 
 36 | const getColor = (
 37 |   similarity: number,
 38 |   downloads: number,
 39 |   minSim: number,
 40 |   maxSim: number,
 41 |   minLogDownloads: number,
 42 |   maxLogDownloads: number,
 43 | ) => {
 44 |   const baseColor = [54, 162, 235]; // Blue
 45 |   const highlightColor = [255, 99, 132]; // Red
 46 | 
 47 |   const normalizedSimilarity = (similarity - minSim) / (maxSim - minSim);
 48 |   const normalizedDownloads =
 49 |     (Math.log10(downloads) - minLogDownloads) /
 50 |     (maxLogDownloads - minLogDownloads);
 51 | 
 52 |   const weight = Math.min(
 53 |     ((normalizedSimilarity + normalizedDownloads) / 2) * 1.5,
 54 |     1,
 55 |   );
 56 | 
 57 |   const color = baseColor.map((base, index) =>
 58 |     Math.round(base + weight * (highlightColor[index] - base)),
 59 |   );
 60 | 
 61 |   return `rgba(${color.join(",")}, 0.8)`;
 62 | };
 63 | 
 64 | const getPointSize = (
 65 |   similarity: number,
 66 |   downloads: number,
 67 |   minSim: number,
 68 |   maxSim: number,
 69 |   minLogDownloads: number,
 70 |   maxLogDownloads: number,
 71 | ) => {
 72 |   const normalizedSimilarity = (similarity - minSim) / (maxSim - minSim);
 73 |   const normalizedDownloads =
 74 |     (Math.log10(downloads) - minLogDownloads) /
 75 |     (maxLogDownloads - minLogDownloads);
 76 | 
 77 |   const minSize = 2;
 78 |   const size = Math.min(
 79 |     (normalizedSimilarity + normalizedDownloads) * 10 + minSize,
 80 |     25,
 81 |   );
 82 |   return size;
 83 | };
 84 | 
 85 | const ScatterPlot: React.FC<ScatterPlotProps> = ({ results }) => {
 86 |   const similarities = results.map((result) => result.similarity);
 87 |   const downloads = results.map((result) => result.weekly_downloads);
 88 |   const logDownloads = downloads.map((download) => Math.log10(download));
 89 | 
 90 |   const minSim = Math.min(...similarities);
 91 |   const maxSim = Math.max(...similarities);
 92 |   const minLogDownloads = Math.min(...logDownloads);
 93 |   const maxLogDownloads = Math.max(...logDownloads);
 94 | 
 95 |   const data = {
 96 |     datasets: [
 97 |       {
 98 |         label: "Packages",
 99 |         data: results.map((result) => ({
100 |           x: result.similarity,
101 |           y: result.weekly_downloads,
102 |           name: result.name,
103 |           summary: result.summary,
104 |           link: `https://pypi.org/project/${result.name}/`,
105 |         })),
106 |         backgroundColor: results.map((result) =>
107 |           getColor(
108 |             result.similarity,
109 |             result.weekly_downloads,
110 |             minSim,
111 |             maxSim,
112 |             minLogDownloads,
113 |             maxLogDownloads,
114 |           ),
115 |         ),
116 |         borderColor: results.map((result) =>
117 |           getColor(
118 |             result.similarity,
119 |             result.weekly_downloads,
120 |             minSim,
121 |             maxSim,
122 |             minLogDownloads,
123 |             maxLogDownloads,
124 |           ),
125 |         ),
126 |         pointRadius: results.map((result) =>
127 |           getPointSize(
128 |             result.similarity,
129 |             result.weekly_downloads,
130 |             minSim,
131 |             maxSim,
132 |             minLogDownloads,
133 |             maxLogDownloads,
134 |           ),
135 |         ),
136 |         hoverBackgroundColor: results.map((result) =>
137 |           getColor(
138 |             result.similarity,
139 |             result.weekly_downloads,
140 |             minSim,
141 |             maxSim,
142 |             minLogDownloads,
143 |             maxLogDownloads,
144 |           ),
145 |         ),
146 |         hoverBorderColor: results.map((result) =>
147 |           getColor(
148 |             result.similarity,
149 |             result.weekly_downloads,
150 |             minSim,
151 |             maxSim,
152 |             minLogDownloads,
153 |             maxLogDownloads,
154 |           ),
155 |         ),
156 |         pointHoverRadius: 15,
157 |       },
158 |     ],
159 |   };
160 | 
161 |   const options = {
162 |     responsive: true,
163 |     maintainAspectRatio: false,
164 |     plugins: {
165 |       tooltip: {
166 |         callbacks: {
167 |           title: (context: any) => {
168 |             const dataPoint = context[0].raw;
169 |             return dataPoint.name;
170 |           },
171 |           beforeLabel: (context: any) => {
172 |             const dataPoint = context.raw;
173 |             return dataPoint.summary;
174 |           },
175 |           label: () => "",
176 |           afterLabel: (context: any) => {
177 |             const dataPoint = context.raw;
178 |             return `\nWeekly downloads: ${dataPoint.y.toLocaleString()}`;
179 |           },
180 |         },
181 |         titleFont: { size: 16, weight: "bold" as FontSpec["weight"] },
182 |         bodyFont: { size: 14, weight: "normal" as FontSpec["weight"] },
183 |         footerFont: { size: 12, weight: "normal" as FontSpec["weight"] },
184 |         displayColors: false,
185 |         backgroundColor: "rgba(0, 0, 0, 0.8)",
186 |         padding: 10,
187 |         bodySpacing: 4,
188 |         titleAlign: "left" as const,
189 |         bodyAlign: "left" as const,
190 |         footerAlign: "left" as const,
191 |       },
192 |       legend: {
193 |         display: false,
194 |       },
195 |     },
196 |     scales: {
197 |       x: {
198 |         title: {
199 |           display: true,
200 |           text: "Similarity",
201 |           color: "#FFFFFF",
202 |           font: {
203 |             size: 24,
204 |           },
205 |         },
206 |         ticks: {
207 |           color: "#FFFFFF",
208 |           display: false,
209 |         },
210 |         grid: {
211 |           display: false,
212 |         },
213 |       },
214 |       y: {
215 |         title: {
216 |           display: true,
217 |           text: "Weekly Downloads",
218 |           color: "#FFFFFF",
219 |           font: {
220 |             size: 24,
221 |           },
222 |         },
223 |         ticks: {
224 |           callback: function (value: any) {
225 |             return value.toLocaleString();
226 |           },
227 |           color: "#FFFFFF",
228 |           maxTicksLimit: 5,
229 |         },
230 |         type: "logarithmic" as const,
231 |       },
232 |     },
233 |     onClick: (event: any, elements: any) => {
234 |       if (elements.length > 0) {
235 |         const elementIndex = elements[0].index;
236 |         const datasetIndex = elements[0].datasetIndex;
237 |         const link = data.datasets[datasetIndex].data[elementIndex].link;
238 |         window.open(link, "_blank");
239 |       }
240 |     },
241 |     onHover: (event: any, elements: any) => {
242 |       event.native.target.style.cursor = elements[0] ? "pointer" : "default";
243 |     },
244 |     elements: {
245 |       point: {
246 |         hoverRadius: 15,
247 |       },
248 |     },
249 |   };
250 | 
251 |   const plugins = [
252 |     {
253 |       id: "customLabels",
254 |       afterDatasetsDraw: (chart: any) => {
255 |         const ctx = chart.ctx;
256 |         chart.data.datasets.forEach((dataset: any) => {
257 |           dataset.data.forEach((dataPoint: any, index: number) => {
258 |             const { x, y } = chart
259 |               .getDatasetMeta(0)
260 |               .data[index].tooltipPosition();
261 |             ctx.fillStyle = "white";
262 |             ctx.textAlign = "center";
263 |             ctx.fillText(dataPoint.name, x, y - 10);
264 |           });
265 |         });
266 |       },
267 |     },
268 |   ];
269 | 
270 |   return (
271 |     <div className="overflow-auto w-full flex flex-col items-center">
272 |       <h2 className="text-center text-white mb-4">
273 |         Click a package to go to PyPI
274 |       </h2>
275 |       <hr className="border-gray-500 mb-4 w-[700px]" />
276 |       <div className="w-full h-[600px]">
277 |         <Scatter data={data} options={options} plugins={plugins} />
278 |       </div>
279 |     </div>
280 |   );
281 | };
282 | 
283 | export default ScatterPlot;
284 | 


--------------------------------------------------------------------------------
/frontend/app/components/SearchResultsTable.tsx:
--------------------------------------------------------------------------------
  1 | import React from "react";
  2 | import { FaExternalLinkAlt } from "react-icons/fa"; // Import the icon
  3 | 
  4 | interface Match {
  5 |   name: string;
  6 |   similarity: number;
  7 |   weekly_downloads: number;
  8 |   summary: string;
  9 | }
 10 | 
 11 | interface SearchResultsTableProps {
 12 |   results: Match[];
 13 |   sortField: string;
 14 |   sortDirection: string;
 15 |   onSort: (field: string) => void;
 16 | }
 17 | 
 18 | const SearchResultsTable: React.FC<SearchResultsTableProps> = ({
 19 |   results,
 20 |   sortField,
 21 |   sortDirection,
 22 |   onSort,
 23 | }) => {
 24 |   const getSortIndicator = (field: string) => {
 25 |     return sortField === field ? (sortDirection === "asc" ? "▲" : "▼") : "";
 26 |   };
 27 | 
 28 |   const truncateText = (text: string, maxLength: number) => {
 29 |     return text.length > maxLength
 30 |       ? `${text.substring(0, maxLength)}...`
 31 |       : text;
 32 |   };
 33 | 
 34 |   return (
 35 |     <div className="overflow-x-auto w-full">
 36 |       <table className="min-w-full divide-y divide-sky-800">
 37 |         <thead className="bg-sky-950">
 38 |           <tr>
 39 |             <th
 40 |               className="px-4 py-2 text-left text-xs font-medium text-gray-200 uppercase tracking-wider cursor-pointer whitespace-nowrap"
 41 |               onClick={() => onSort("name")}
 42 |             >
 43 |               <div className="flex items-center">
 44 |                 Name <span className="ml-1">{getSortIndicator("name")}</span>
 45 |               </div>
 46 |             </th>
 47 |             <th
 48 |               className="px-4 py-2 text-left text-xs font-medium text-gray-200 uppercase tracking-wider cursor-pointer whitespace-nowrap"
 49 |               onClick={() => onSort("similarity")}
 50 |             >
 51 |               <div className="flex items-center">
 52 |                 Similarity{" "}
 53 |                 <span className="ml-1">{getSortIndicator("similarity")}</span>
 54 |               </div>
 55 |             </th>
 56 |             <th
 57 |               className="px-4 py-2 text-left text-xs font-medium text-gray-200 uppercase tracking-wider cursor-pointer whitespace-nowrap"
 58 |               onClick={() => onSort("weekly_downloads")}
 59 |             >
 60 |               <div className="flex items-center">
 61 |                 Weekly Downloads{" "}
 62 |                 <span className="ml-1">
 63 |                   {getSortIndicator("weekly_downloads")}
 64 |                 </span>
 65 |               </div>
 66 |             </th>
 67 |             <th className="px-4 py-2 text-left text-xs font-medium text-gray-200 uppercase tracking-wider">
 68 |               Summary
 69 |             </th>
 70 |             <th className="px-4 py-2 text-left text-xs font-medium text-gray-200 uppercase tracking-wider">
 71 |               Link
 72 |             </th>
 73 |           </tr>
 74 |         </thead>
 75 |         <tbody className="bg-sky-900 divide-y divide-sky-800">
 76 |           {results.map((result, index) => (
 77 |             <tr key={index} className="hover:bg-sky-800">
 78 |               <td className="px-4 py-2 whitespace-nowrap text-gray-200">
 79 |                 {truncateText(result.name, 20)}
 80 |               </td>
 81 |               <td className="px-4 py-2 whitespace-nowrap text-gray-200">
 82 |                 {result.similarity.toFixed(3)}
 83 |               </td>
 84 |               <td className="px-4 py-2 whitespace-nowrap text-gray-200">
 85 |                 {result.weekly_downloads.toLocaleString()}
 86 |               </td>
 87 |               <td className="px-4 py-2 whitespace-normal break-words text-gray-200 min-w-[400px]">
 88 |                 {result.summary}
 89 |               </td>
 90 |               <td className="px-4 py-2 whitespace-nowrap">
 91 |                 <a
 92 |                   href={`https://pypi.org/project/${result.name}/`}
 93 |                   target="_blank"
 94 |                   rel="noopener noreferrer"
 95 |                   className="text-sky-500 hover:underline flex items-center hover:text-orange-800"
 96 |                 >
 97 |                   <FaExternalLinkAlt className="mr-1" />
 98 |                   PyPI
 99 |                 </a>
100 |               </td>
101 |             </tr>
102 |           ))}
103 |         </tbody>
104 |       </table>
105 |     </div>
106 |   );
107 | };
108 | 
109 | export default SearchResultsTable;
110 | 


--------------------------------------------------------------------------------
/frontend/app/components/SupportButton.tsx:
--------------------------------------------------------------------------------
 1 | import React from "react";
 2 | 
 3 | const SupportButton: React.FC = () => {
 4 |   return (
 5 |     <a
 6 |       href="https://ko-fi.com/fpgmaas"
 7 |       target="_blank"
 8 |       rel="noopener noreferrer"
 9 |       className="flex items-center p-2 border border-sky-700 rounded bg-sky-900 text-white hover:bg-sky-700 focus:outline-none focus:ring-2 focus:ring-sky-700"
10 |     >
11 |       <img
12 |         src="kofi.png"
13 |         alt="Ko-fi logo"
14 |         width="24"
15 |         height="24"
16 |         className="mr-2"
17 |       />
18 |       Support
19 |     </a>
20 |   );
21 | };
22 | 
23 | export default SupportButton;
24 | 


--------------------------------------------------------------------------------
/frontend/app/components/ToggleSwitch.tsx:
--------------------------------------------------------------------------------
 1 | import React from "react";
 2 | 
 3 | interface ToggleSwitchProps {
 4 |   option1: string;
 5 |   option2: string;
 6 |   selectedOption: string;
 7 |   onToggle: (option: string) => void;
 8 | }
 9 | 
10 | const ToggleSwitch: React.FC<ToggleSwitchProps> = ({
11 |   option1,
12 |   option2,
13 |   selectedOption,
14 |   onToggle,
15 | }) => {
16 |   return (
17 |     <div className="flex space-x-4 bg-sky-900 p-2 rounded-lg shadow-md">
18 |       <button
19 |         className={`px-4 py-2 w-[80px] rounded ${
20 |           selectedOption === option1
21 |             ? "bg-white text-sky-900"
22 |             : " bg-sky-950 text-white hover:bg-sky-700"
23 |         }`}
24 |         onClick={() => onToggle(option1)}
25 |       >
26 |         {option1}
27 |       </button>
28 |       <button
29 |         className={`px-4 py-2 w-[80px] rounded ${
30 |           selectedOption === option2
31 |             ? "bg-white text-sky-900"
32 |             : " bg-sky-950 text-white hover:bg-sky-700"
33 |         }`}
34 |         onClick={() => onToggle(option2)}
35 |       >
36 |         {option2}
37 |       </button>
38 |     </div>
39 |   );
40 | };
41 | 
42 | export default ToggleSwitch;
43 | 


--------------------------------------------------------------------------------
/frontend/app/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgmaas/pypi-scout/593a48a2512a14c350bae98b087cd861d94a0c6b/frontend/app/favicon.ico


--------------------------------------------------------------------------------
/frontend/app/globals.css:
--------------------------------------------------------------------------------
 1 | @tailwind base;
 2 | @tailwind components;
 3 | @tailwind utilities;
 4 | 
 5 | :root {
 6 |   --foreground-rgb: 0, 0, 0;
 7 |   --background-start-rgb: 214, 219, 220;
 8 |   --background-end-rgb: 255, 255, 255;
 9 |   --dark-bg-start-rgb: 8, 47, 73; /* Dark sky (bg-sky-950) */
10 |   --dark-bg-end-rgb: 8, 47, 73; /* Dark sky (bg-sky-950) */
11 |   --dark-foreground-rgb: 255, 255, 255;
12 | }
13 | 
14 | @media (prefers-color-scheme: dark) {
15 |   :root {
16 |     --foreground-rgb: var(--dark-foreground-rgb);
17 |     --background-start-rgb: var(--dark-bg-start-rgb);
18 |     --background-end-rgb: var(--dark-bg-end-rgb);
19 |   }
20 | }
21 | 
22 | body {
23 |   color: rgb(var(--foreground-rgb));
24 |   background: rgb(var(--background-start-rgb)); /* Solid background color */
25 | }
26 | 
27 | @layer utilities {
28 |   .text-balance {
29 |     text-wrap: balance;
30 |   }
31 | }
32 | 


--------------------------------------------------------------------------------
/frontend/app/layout.tsx:
--------------------------------------------------------------------------------
 1 | import type { Metadata } from "next";
 2 | import { Inter } from "next/font/google";
 3 | import "./globals.css";
 4 | import GoogleAnalytics from "./components/GoogleAnalytics";
 5 | 
 6 | const inter = Inter({ subsets: ["latin"] });
 7 | 
 8 | export const metadata: Metadata = {
 9 |   title: "PyPI Scout",
10 |   description: "Find Python packages on PyPI with natural language queries",
11 |   openGraph: {
12 |     title: "PyPI Scout",
13 |     description: "Find Python packages on PyPI with natural language queries",
14 |     images: [
15 |       {
16 |         url: "/pypi-light.svg",
17 |         width: 600,
18 |         height: 300,
19 |         alt: "pypi-scout logo",
20 |       },
21 |     ],
22 |   },
23 | };
24 | 
25 | export default function RootLayout({
26 |   children,
27 | }: Readonly<{
28 |   children: React.ReactNode;
29 | }>) {
30 |   return (
31 |     <html lang="en">
32 |       <head>
33 |         <meta property="og:title" content="PyPI Scout" />
34 |         <meta
35 |           property="og:description"
36 |           content="Find Python packages on PyPI with natural language queries"
37 |         />
38 |         <meta property="og:image" content="/pypi-light.svg" />
39 |         <meta property="og:image:width" content="600" />
40 |         <meta property="og:image:height" content="300" />
41 |         <meta property="og:image:alt" content="pypi-scout logo" />
42 |       </head>
43 |       <body className={inter.className}>
44 |         <GoogleAnalytics />
45 |         {children}
46 |       </body>
47 |     </html>
48 |   );
49 | }
50 | 


--------------------------------------------------------------------------------
/frontend/app/page.tsx:
--------------------------------------------------------------------------------
  1 | "use client";
  2 | 
  3 | import { useState, useEffect, useRef } from "react";
  4 | import { handleSearch, sortResults } from "./utils/search";
  5 | import SearchResultsTable from "./components/SearchResultsTable";
  6 | import InfoBox from "./components/InfoBox";
  7 | import ScatterPlot from "./components/ScatterPlot";
  8 | import ToggleSwitch from "./components/ToggleSwitch";
  9 | import { ClipLoader } from "react-spinners";
 10 | import Header from "./components/Header";
 11 | 
 12 | interface Match {
 13 |   name: string;
 14 |   similarity: number;
 15 |   weekly_downloads: number;
 16 |   summary: string;
 17 | }
 18 | 
 19 | export default function Home() {
 20 |   const [text, setText] = useState<string>("");
 21 |   const [results, setResults] = useState<Match[]>([]);
 22 |   const [sortField, setSortField] = useState<string>("similarity");
 23 |   const [sortDirection, setSortDirection] = useState<string>("desc");
 24 |   const [loading, setLoading] = useState<boolean>(false);
 25 |   const [error, setError] = useState<string>("");
 26 |   const [infoBoxVisible, setInfoBoxVisible] = useState<boolean>(false);
 27 |   const [view, setView] = useState<string>("Plot");
 28 | 
 29 |   const resultsRef = useRef<HTMLDivElement>(null);
 30 | 
 31 |   // If user is on small screen, we probably
 32 |   useEffect(() => {
 33 |     if (window.innerWidth < 768) {
 34 |       setView("Table");
 35 |     }
 36 |   }, []);
 37 | 
 38 |   useEffect(() => {
 39 |     if (results.length > 0) {
 40 |       resultsRef.current?.scrollIntoView({ behavior: "smooth" });
 41 |     }
 42 |   }, [results]);
 43 | 
 44 |   const handleSort = (field: string) => {
 45 |     const direction =
 46 |       sortField === field && sortDirection === "asc" ? "desc" : "asc";
 47 |     setSortField(field);
 48 |     setSortDirection(direction);
 49 |     setResults(sortResults(results, field, direction));
 50 |   };
 51 | 
 52 |   const handleSearchAction = () => {
 53 |     handleSearch(
 54 |       text,
 55 |       sortField,
 56 |       sortDirection,
 57 |       setResults,
 58 |       setLoading,
 59 |       setError,
 60 |     );
 61 |   };
 62 | 
 63 |   const handleKeyDown = (e: React.KeyboardEvent<HTMLTextAreaElement>) => {
 64 |     if (e.key === "Enter" && !e.shiftKey) {
 65 |       e.preventDefault();
 66 |       handleSearchAction();
 67 |     }
 68 |   };
 69 | 
 70 |   return (
 71 |     <div className="min-h-screen w-full bg-sky-950 flex flex-col items-center">
 72 |       <Header />
 73 |       <main className="w-full max-w-[1800px] flex flex-col items-center p-4 md:p-12 lg:p-18 space-y-4">
 74 |         <div className="flex flex-col items-center text-center mb-4">
 75 |           <picture>
 76 |             <img
 77 |               alt="pypi-scout logo"
 78 |               width="420"
 79 |               height="220"
 80 |               src="./pypi.svg"
 81 |               className="mx-auto"
 82 |             ></img>
 83 |           </picture>
 84 |           <p className="text-gray-100 text-xl ">
 85 |             Find packages on PyPI with natural language queries
 86 |           </p>
 87 |         </div>
 88 | 
 89 |         <div className="flex flex-col items-center space-y-4 w-full max-w-3xl bg-sky-900 p-6 rounded-lg shadow-lg">
 90 |           <textarea
 91 |             className="placeholder-gray-400 w-full h-24 p-2 border border-sky-900 rounded resize-none overflow-auto focus:outline-none focus:ring-2 focus:ring-sky-800 bg-white text-sky-950"
 92 |             value={text}
 93 |             onChange={(e) => setText(e.target.value)}
 94 |             onKeyDown={handleKeyDown}
 95 |             placeholder="Describe what you are looking for... "
 96 |           ></textarea>
 97 |           <button
 98 |             className="w-full max-w-[250px] p-2 border border-sky-900 rounded bg-sky-950 text-white hover:bg-sky-700 hover:outline-none hover:ring-1 hover:ring-sky-700"
 99 |             onClick={handleSearchAction}
100 |           >
101 |             Search
102 |           </button>
103 |           {loading && (
104 |             <ClipLoader color={"#ffffff"} loading={loading} size={70} />
105 |           )}
106 |           {error && <p className="text-red-500">{error}</p>}
107 |         </div>
108 | 
109 |         {results.length > 0 && (
110 |           <div className="w-full flex justify-center mt-0">
111 |             <ToggleSwitch
112 |               option1="Plot"
113 |               option2="Table"
114 |               selectedOption={view}
115 |               onToggle={setView}
116 |             />
117 |           </div>
118 |         )}
119 | 
120 |         <div ref={resultsRef} className="w-full">
121 |           {" "}
122 |           {/* Reference to this div */}
123 |           {results.length > 0 && view === "Plot" && (
124 |             <div className="w-full flex justify-center mt-0">
125 |               <div className="w-full max-w-[1200px] bg-sky-900 p-6 rounded-lg shadow-lg flex flex-col justify-center items-center">
126 |                 <ScatterPlot results={results} />
127 |               </div>
128 |             </div>
129 |           )}
130 |           {results.length > 0 && view === "Table" && (
131 |             <div className="w-full flex justify-center mt-0">
132 |               <div className="w-full bg-sky-900 p-6 rounded-lg shadow-lg flex flex-col  justify-center  items-center">
133 |                 <SearchResultsTable
134 |                   results={results}
135 |                   sortField={sortField}
136 |                   sortDirection={sortDirection}
137 |                   onSort={handleSort}
138 |                 />
139 |               </div>
140 |             </div>
141 |           )}
142 |         </div>
143 | 
144 |         <div className="w-full flex justify-center mt-6">
145 |           <button
146 |             className="w-full max-w-[250px] p-2 border border-sky-700 rounded bg-sky-900 text-white hover:bg-sky-700 focus:outline-none focus:ring-2 focus:ring-sky-700"
147 |             onClick={() => setInfoBoxVisible(!infoBoxVisible)}
148 |           >
149 |             {infoBoxVisible ? "Hide Info" : "How does this work?"}
150 |           </button>
151 |         </div>
152 | 
153 |         <InfoBox infoBoxVisible={infoBoxVisible} />
154 |       </main>
155 |     </div>
156 |   );
157 | }
158 | 


--------------------------------------------------------------------------------
/frontend/app/utils/search.ts:
--------------------------------------------------------------------------------
 1 | import axios from "axios";
 2 | 
 3 | interface Match {
 4 |   name: string;
 5 |   similarity: number;
 6 |   weekly_downloads: number;
 7 |   summary: string;
 8 | }
 9 | 
10 | interface SearchResponse {
11 |   matches: Match[];
12 |   warning?: boolean;
13 |   warning_message?: string;
14 | }
15 | 
16 | const apiUrl = process.env.NEXT_PUBLIC_API_URL;
17 | 
18 | export const handleSearch = async (
19 |   query: string,
20 |   sortField: string,
21 |   sortDirection: string,
22 |   setResults: React.Dispatch<React.SetStateAction<Match[]>>,
23 |   setLoading: React.Dispatch<React.SetStateAction<boolean>>,
24 |   setError: React.Dispatch<React.SetStateAction<string>>,
25 | ) => {
26 |   setLoading(true);
27 |   setError("");
28 |   try {
29 |     const response = await axios.post<SearchResponse>(
30 |       `${apiUrl}/search`,
31 |       {
32 |         query: query,
33 |         top_k: 40,
34 |       },
35 |       {
36 |         headers: {
37 |           "Content-Type": "application/json",
38 |         },
39 |       },
40 |     );
41 | 
42 |     const { matches, warning, warning_message } = response.data;
43 | 
44 |     if (warning && warning_message) {
45 |       console.warn("Warning from API:", warning_message);
46 |     }
47 | 
48 |     setResults(sortResults(matches, sortField, sortDirection));
49 |   } catch (error) {
50 |     if (axios.isAxiosError(error) && error.response?.status === 429) {
51 |       setError("Rate limit reached. Please wait a minute and try again.");
52 |     } else {
53 |       setError("Error fetching search results.");
54 |     }
55 |     console.error("Error fetching search results:", error);
56 |   } finally {
57 |     setLoading(false);
58 |   }
59 | };
60 | 
61 | export const sortResults = (
62 |   data: Match[],
63 |   field: string,
64 |   direction: string,
65 | ): Match[] => {
66 |   return [...data].sort((a, b) => {
67 |     // @ts-ignore
68 |     if (a[field] < b[field]) return direction === "asc" ? -1 : 1;
69 |     // @ts-ignore
70 |     if (a[field] > b[field]) return direction === "asc" ? 1 : -1;
71 |     return 0;
72 |   });
73 | };
74 | 


--------------------------------------------------------------------------------
/frontend/next.config.mjs:
--------------------------------------------------------------------------------
 1 | /** @type {import('next').NextConfig} */
 2 | const nextConfig = {
 3 |   env: {
 4 |     NEXT_PUBLIC_API_URL:
 5 |       process.env.NEXT_PUBLIC_API_URL || "http://localhost:8000/api",
 6 |   },
 7 | };
 8 | 
 9 | export default nextConfig;
10 | 


--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "frontend",
 3 |     "version": "0.1.0",
 4 |     "private": true,
 5 |     "scripts": {
 6 |         "dev": "next dev",
 7 |         "build": "next build",
 8 |         "start": "next start",
 9 |         "lint": "next lint"
10 |     },
11 |     "dependencies": {
12 |         "axios": "^1.7.2",
13 |         "chart.js": "^4.4.3",
14 |         "next": "14.2.4",
15 |         "react": "^18",
16 |         "react-chartjs-2": "^5.2.0",
17 |         "react-dom": "^18",
18 |         "react-icons": "^5.2.1",
19 |         "react-spinners": "^0.13.8"
20 |     },
21 |     "devDependencies": {
22 |         "@types/node": "^20",
23 |         "@types/react": "^18",
24 |         "@types/react-dom": "^18",
25 |         "eslint": "^8",
26 |         "eslint-config-next": "14.2.4",
27 |         "postcss": "^8",
28 |         "tailwindcss": "^3.4.1",
29 |         "typescript": "^5"
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/frontend/postcss.config.mjs:
--------------------------------------------------------------------------------
1 | /** @type {import('postcss-load-config').Config} */
2 | const config = {
3 |   plugins: {
4 |     tailwindcss: {},
5 |   },
6 | };
7 | 
8 | export default config;
9 | 


--------------------------------------------------------------------------------
/frontend/public/kofi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgmaas/pypi-scout/593a48a2512a14c350bae98b087cd861d94a0c6b/frontend/public/kofi.png


--------------------------------------------------------------------------------
/frontend/public/next.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 394 80"><path fill="#000" d="M262 0h68.5v12.7h-27.2v66.6h-13.6V12.7H262V0ZM149 0v12.7H94v20.4h44.3v12.6H94v21h55v12.6H80.5V0h68.7zm34.3 0h-17.8l63.8 79.4h17.9l-32-39.7 32-39.6h-17.9l-23 28.6-23-28.6zm18.3 56.7-9-11-27.1 33.7h17.8l18.3-22.7z"/><path fill="#000" d="M81 79.3 17 0H0v79.3h13.6V17l50.2 62.3H81Zm252.6-.4c-1 0-1.8-.4-2.5-1s-1.1-1.6-1.1-2.6.3-1.8 1-2.5 1.6-1 2.6-1 1.8.3 2.5 1a3.4 3.4 0 0 1 .6 4.3 3.7 3.7 0 0 1-3 1.8zm23.2-33.5h6v23.3c0 2.1-.4 4-1.3 5.5a9.1 9.1 0 0 1-3.8 3.5c-1.6.8-3.5 1.3-5.7 1.3-2 0-3.7-.4-5.3-1s-2.8-1.8-3.7-3.2c-.9-1.3-1.4-3-1.4-5h6c.1.8.3 1.6.7 2.2s1 1.2 1.6 1.5c.7.4 1.5.5 2.4.5 1 0 1.8-.2 2.4-.6a4 4 0 0 0 1.6-1.8c.3-.8.5-1.8.5-3V45.5zm30.9 9.1a4.4 4.4 0 0 0-2-3.3 7.5 7.5 0 0 0-4.3-1.1c-1.3 0-2.4.2-3.3.5-.9.4-1.6 1-2 1.6a3.5 3.5 0 0 0-.3 4c.3.5.7.9 1.3 1.2l1.8 1 2 .5 3.2.8c1.3.3 2.5.7 3.7 1.2a13 13 0 0 1 3.2 1.8 8.1 8.1 0 0 1 3 6.5c0 2-.5 3.7-1.5 5.1a10 10 0 0 1-4.4 3.5c-1.8.8-4.1 1.2-6.8 1.2-2.6 0-4.9-.4-6.8-1.2-2-.8-3.4-2-4.5-3.5a10 10 0 0 1-1.7-5.6h6a5 5 0 0 0 3.5 4.6c1 .4 2.2.6 3.4.6 1.3 0 2.5-.2 3.5-.6 1-.4 1.8-1 2.4-1.7a4 4 0 0 0 .8-2.4c0-.9-.2-1.6-.7-2.2a11 11 0 0 0-2.1-1.4l-3.2-1-3.8-1c-2.8-.7-5-1.7-6.6-3.2a7.2 7.2 0 0 1-2.4-5.7 8 8 0 0 1 1.7-5 10 10 0 0 1 4.3-3.5c2-.8 4-1.2 6.4-1.2 2.3 0 4.4.4 6.2 1.2 1.8.8 3.2 2 4.3 3.4 1 1.4 1.5 3 1.5 5h-5.8z"/></svg>
2 | 


--------------------------------------------------------------------------------
/frontend/public/pypi-light.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="600" zoomAndPan="magnify" viewBox="0 0 450 225" height="300" preserveAspectRatio="xMidYMid meet" version="1.0"><defs><g/><clipPath id="dd88cb90f9"><path d="M 36.671875 60.503906 L 117 60.503906 L 117 141 L 36.671875 141 Z M 36.671875 60.503906 " clip-rule="nonzero"/></clipPath><clipPath id="89cd03b7e5"><path d="M 116 138 L 155.921875 138 L 155.921875 179 L 116 179 Z M 116 138 " clip-rule="nonzero"/></clipPath></defs><g clip-path="url(#dd88cb90f9)"><path fill="#f8ab37" d="M 76.570312 140.210938 C 54.59375 140.210938 36.710938 122.335938 36.710938 100.355469 C 36.710938 78.378906 54.589844 60.5 76.570312 60.5 C 98.546875 60.5 116.425781 78.378906 116.425781 100.355469 C 116.425781 122.335938 98.542969 140.210938 76.570312 140.210938 Z M 76.570312 67.054688 C 75.476562 67.054688 74.390625 67.109375 73.304688 67.214844 C 72.21875 67.324219 71.140625 67.480469 70.070312 67.695312 C 69.003906 67.90625 67.945312 68.171875 66.902344 68.488281 C 65.859375 68.804688 64.832031 69.171875 63.824219 69.589844 C 62.816406 70.007812 61.832031 70.472656 60.871094 70.988281 C 59.910156 71.5 58.972656 72.0625 58.066406 72.667969 C 57.160156 73.273438 56.285156 73.921875 55.441406 74.613281 C 54.601562 75.304688 53.792969 76.039062 53.019531 76.808594 C 52.25 77.578125 51.519531 78.386719 50.828125 79.230469 C 50.136719 80.074219 49.484375 80.949219 48.878906 81.855469 C 48.273438 82.761719 47.714844 83.695312 47.199219 84.65625 C 46.6875 85.621094 46.21875 86.605469 45.800781 87.613281 C 45.386719 88.621094 45.019531 89.644531 44.703125 90.6875 C 44.386719 91.734375 44.121094 92.789062 43.90625 93.859375 C 43.695312 94.929688 43.535156 96.007812 43.429688 97.09375 C 43.320312 98.175781 43.269531 99.265625 43.269531 100.355469 C 43.269531 101.445312 43.320312 102.535156 43.429688 103.621094 C 43.535156 104.707031 43.695312 105.78125 43.90625 106.851562 C 44.121094 107.921875 44.386719 108.980469 44.703125 110.023438 C 45.019531 111.066406 45.386719 112.09375 45.800781 113.101562 C 46.21875 114.109375 46.6875 115.09375 47.199219 116.054688 C 47.714844 117.015625 48.273438 117.949219 48.878906 118.855469 C 49.484375 119.765625 50.136719 120.640625 50.828125 121.480469 C 51.519531 122.324219 52.25 123.132812 53.019531 123.902344 C 53.792969 124.675781 54.601562 125.40625 55.441406 126.097656 C 56.285156 126.789062 57.160156 127.4375 58.066406 128.042969 C 58.972656 128.652344 59.910156 129.210938 60.871094 129.726562 C 61.832031 130.238281 62.816406 130.703125 63.824219 131.121094 C 64.832031 131.539062 65.859375 131.90625 66.902344 132.222656 C 67.945312 132.539062 69.003906 132.804688 70.070312 133.015625 C 71.140625 133.230469 72.21875 133.390625 73.304688 133.496094 C 74.390625 133.605469 75.476562 133.65625 76.570312 133.65625 C 77.660156 133.65625 78.746094 133.605469 79.832031 133.496094 C 80.917969 133.390625 81.996094 133.230469 83.066406 133.015625 C 84.136719 132.804688 85.191406 132.539062 86.234375 132.222656 C 87.277344 131.90625 88.304688 131.539062 89.3125 131.121094 C 90.320312 130.703125 91.304688 130.238281 92.265625 129.726562 C 93.230469 129.210938 94.164062 128.652344 95.070312 128.042969 C 95.976562 127.4375 96.851562 126.789062 97.695312 126.097656 C 98.539062 125.40625 99.34375 124.675781 100.117188 123.902344 C 100.886719 123.132812 101.617188 122.324219 102.3125 121.480469 C 103.003906 120.640625 103.652344 119.765625 104.257812 118.855469 C 104.863281 117.949219 105.421875 117.015625 105.9375 116.054688 C 106.453125 115.09375 106.917969 114.109375 107.335938 113.101562 C 107.753906 112.09375 108.121094 111.066406 108.4375 110.023438 C 108.753906 108.980469 109.015625 107.921875 109.230469 106.851562 C 109.441406 105.78125 109.601562 104.707031 109.710938 103.621094 C 109.816406 102.535156 109.871094 101.445312 109.871094 100.355469 C 109.867188 99.265625 109.8125 98.179688 109.707031 97.09375 C 109.597656 96.007812 109.4375 94.933594 109.222656 93.863281 C 109.007812 92.792969 108.742188 91.738281 108.425781 90.695312 C 108.109375 89.652344 107.742188 88.625 107.324219 87.621094 C 106.90625 86.613281 106.441406 85.628906 105.925781 84.667969 C 105.410156 83.707031 104.851562 82.773438 104.246094 81.867188 C 103.636719 80.960938 102.988281 80.085938 102.296875 79.242188 C 101.605469 78.402344 100.875 77.59375 100.105469 76.824219 C 99.332031 76.050781 98.527344 75.320312 97.683594 74.628906 C 96.839844 73.9375 95.964844 73.289062 95.058594 72.683594 C 94.152344 72.078125 93.21875 71.515625 92.257812 71.003906 C 91.296875 70.488281 90.3125 70.023438 89.304688 69.605469 C 88.296875 69.1875 87.273438 68.820312 86.230469 68.503906 C 85.1875 68.183594 84.132812 67.917969 83.0625 67.707031 C 81.992188 67.492188 80.917969 67.332031 79.832031 67.222656 C 78.746094 67.117188 77.660156 67.0625 76.570312 67.058594 Z M 76.570312 67.054688 " fill-opacity="1" fill-rule="nonzero"/></g><path fill="#f8ab37" d="M 105.347656 144.519531 C 104.988281 144.519531 104.632812 144.46875 104.285156 144.363281 C 103.9375 144.257812 103.613281 144.105469 103.3125 143.902344 C 103.011719 143.703125 102.746094 143.460938 102.515625 143.183594 C 102.285156 142.902344 102.097656 142.597656 101.960938 142.261719 C 101.820312 141.925781 101.734375 141.578125 101.695312 141.21875 C 101.660156 140.859375 101.675781 140.5 101.746094 140.144531 C 101.816406 139.789062 101.9375 139.449219 102.105469 139.128906 C 102.273438 138.808594 102.488281 138.519531 102.742188 138.265625 L 112.894531 128.039062 C 113.058594 127.851562 113.242188 127.679688 113.441406 127.53125 C 113.640625 127.378906 113.851562 127.25 114.078125 127.144531 C 114.304688 127.039062 114.539062 126.953125 114.78125 126.894531 C 115.023438 126.835938 115.273438 126.800781 115.523438 126.792969 C 115.769531 126.785156 116.019531 126.800781 116.265625 126.84375 C 116.511719 126.886719 116.753906 126.953125 116.984375 127.042969 C 117.21875 127.132812 117.441406 127.246094 117.648438 127.382812 C 117.859375 127.519531 118.050781 127.675781 118.230469 127.851562 C 118.410156 128.027344 118.566406 128.21875 118.707031 128.425781 C 118.84375 128.636719 118.960938 128.855469 119.054688 129.085938 C 119.148438 129.320312 119.214844 129.558594 119.261719 129.804688 C 119.304688 130.050781 119.324219 130.296875 119.320312 130.546875 C 119.3125 130.796875 119.28125 131.042969 119.226562 131.289062 C 119.171875 131.53125 119.089844 131.765625 118.984375 131.996094 C 118.882812 132.222656 118.753906 132.4375 118.605469 132.636719 C 118.457031 132.839844 118.292969 133.023438 118.105469 133.191406 L 107.957031 143.417969 C 107.242188 144.148438 106.371094 144.515625 105.347656 144.519531 Z M 105.347656 144.519531 " fill-opacity="1" fill-rule="nonzero"/><g clip-path="url(#89cd03b7e5)"><path fill="#f8ab37" d="M 149.394531 178.945312 C 148.546875 178.945312 147.734375 178.78125 146.953125 178.453125 C 146.171875 178.125 145.484375 177.65625 144.894531 177.050781 L 117.886719 149.390625 C 117.597656 149.097656 117.339844 148.777344 117.113281 148.429688 C 116.886719 148.082031 116.699219 147.71875 116.546875 147.335938 C 116.390625 146.949219 116.277344 146.554688 116.199219 146.148438 C 116.125 145.742188 116.089844 145.332031 116.09375 144.921875 C 116.097656 144.507812 116.144531 144.097656 116.230469 143.695312 C 116.316406 143.289062 116.4375 142.898438 116.601562 142.519531 C 116.761719 142.136719 116.960938 141.777344 117.195312 141.4375 C 117.429688 141.09375 117.695312 140.78125 117.988281 140.492188 C 118.285156 140.203125 118.605469 139.945312 118.953125 139.71875 C 119.296875 139.496094 119.664062 139.304688 120.046875 139.152344 C 120.429688 138.996094 120.824219 138.882812 121.230469 138.808594 C 121.636719 138.730469 122.046875 138.695312 122.460938 138.699219 C 122.875 138.703125 123.28125 138.75 123.6875 138.835938 C 124.09375 138.921875 124.484375 139.042969 124.863281 139.207031 C 125.242188 139.371094 125.605469 139.566406 125.945312 139.800781 C 126.285156 140.035156 126.601562 140.300781 126.890625 140.59375 L 153.898438 168.253906 C 154.332031 168.695312 154.691406 169.191406 154.976562 169.742188 C 155.261719 170.289062 155.464844 170.871094 155.578125 171.476562 C 155.695312 172.085938 155.71875 172.699219 155.652344 173.3125 C 155.589844 173.929688 155.4375 174.523438 155.195312 175.09375 C 154.957031 175.664062 154.636719 176.1875 154.242188 176.664062 C 153.847656 177.140625 153.390625 177.550781 152.875 177.894531 C 152.359375 178.234375 151.804688 178.496094 151.214844 178.675781 C 150.621094 178.855469 150.015625 178.945312 149.394531 178.945312 Z M 149.394531 178.945312 " fill-opacity="1" fill-rule="nonzero"/></g><path fill="#f8ab37" d="M 76.503906 128.429688 C 71.367188 128.429688 64.582031 126.957031 57.320312 121.371094 C 50.941406 116.464844 48.003906 108.921875 48.816406 99.558594 C 49.207031 95.230469 50.238281 91.054688 51.90625 87.039062 C 52.015625 86.804688 52.164062 86.597656 52.351562 86.417969 C 52.542969 86.242188 52.757812 86.105469 53.003906 86.015625 C 53.246094 85.921875 53.496094 85.882812 53.757812 85.890625 C 54.015625 85.898438 54.265625 85.953125 54.5 86.058594 C 54.738281 86.164062 54.945312 86.3125 55.125 86.5 C 55.304688 86.691406 55.441406 86.90625 55.535156 87.148438 C 55.625 87.390625 55.671875 87.640625 55.664062 87.902344 C 55.65625 88.160156 55.601562 88.410156 55.496094 88.648438 C 54.003906 92.292969 53.082031 96.074219 52.726562 99.996094 C 52.054688 108.019531 54.40625 114.152344 59.714844 118.246094 C 72.828125 128.335938 84.035156 123.121094 84.148438 123.066406 C 84.386719 122.945312 84.632812 122.878906 84.898438 122.859375 C 85.160156 122.839844 85.417969 122.871094 85.667969 122.957031 C 85.921875 123.042969 86.144531 123.171875 86.34375 123.347656 C 86.539062 123.519531 86.695312 123.726562 86.8125 123.964844 C 86.925781 124.203125 86.992188 124.457031 87.003906 124.71875 C 87.019531 124.984375 86.980469 125.238281 86.890625 125.488281 C 86.800781 125.738281 86.664062 125.957031 86.484375 126.152344 C 86.308594 126.347656 86.097656 126.5 85.855469 126.609375 C 85.574219 126.753906 81.980469 128.429688 76.503906 128.429688 Z M 76.503906 128.429688 " fill-opacity="1" fill-rule="nonzero"/><g fill="#00275b" fill-opacity="1"><g transform="translate(232.209004, 175.045997)"><g><path d="M 9.34375 -37 C 9.34375 -42.21875 14.125 -45.90625 20.53125 -46.046875 L 20.53125 -49.875 C 11.84375 -49.65625 4.859375 -44.65625 4.859375 -36.78125 C 4.859375 -27.875 12.875 -25.59375 20.53125 -23.46875 L 20.53125 -28.171875 C 14.484375 -29.859375 9.34375 -31.703125 9.34375 -37 Z M 34.875 -39.875 L 37.65625 -42.953125 C 34.140625 -47 28.984375 -49.5 22.359375 -49.796875 L 22.359375 -45.96875 C 27.21875 -45.6875 31.625 -43.703125 34.875 -39.875 Z M 38.765625 -13.015625 C 38.765625 -22.875 30.234375 -25.453125 22.359375 -27.65625 L 22.359375 -22.953125 C 28.6875 -21.109375 34.359375 -19.046875 34.359375 -12.71875 C 34.359375 -8.609375 31.625 -3.234375 22.359375 -2.9375 L 22.359375 0.875 C 34.796875 0.515625 38.765625 -6.84375 38.765625 -13.015625 Z M 6.109375 -10.078125 L 3.390625 -6.921875 C 6.984375 -2.578125 12.796875 0.65625 20.53125 0.875 L 20.53125 -2.9375 C 13.984375 -3.234375 9.125 -6.625 6.109375 -10.078125 Z M 6.109375 -10.078125 "/></g></g></g><g fill="#00275b" fill-opacity="1"><g transform="translate(274.943809, 175.045997)"><g><path d="M 21.40625 -32.8125 C 25.3125 -32.8125 28.328125 -31.40625 30.890625 -27.875 L 33.765625 -30.375 C 30.890625 -33.984375 27.359375 -36.40625 21.1875 -36.40625 C 11.1875 -36.40625 4.484375 -28.90625 3.96875 -19.125 L 8.015625 -19.125 C 8.53125 -27 13.46875 -32.8125 21.40625 -32.8125 Z M 21.1875 0.875 C 27.359375 0.875 30.890625 -1.546875 33.765625 -5.21875 L 30.96875 -7.578125 C 28.390625 -4.046875 25.3125 -2.71875 21.40625 -2.71875 C 13.234375 -2.71875 8.234375 -8.90625 8.015625 -17.140625 L 3.96875 -17.140625 C 4.1875 -7.0625 10.890625 0.875 21.1875 0.875 Z M 21.1875 0.875 "/></g></g></g><g fill="#00275b" fill-opacity="1"><g transform="translate(311.279417, 175.045997)"><g><path d="M 3.96875 -17.796875 C 3.96875 -7.71875 10.078125 0.375 19.9375 0.875 L 19.9375 -2.578125 C 12.28125 -3.15625 8.015625 -10.15625 8.015625 -17.796875 C 8.015625 -25.375 12.28125 -32.375 19.9375 -32.875 L 19.9375 -36.40625 C 10.078125 -35.90625 3.96875 -27.8125 3.96875 -17.796875 Z M 38.109375 -17.796875 C 38.109375 -27.875 31.921875 -35.96875 21.921875 -36.40625 L 21.921875 -32.953125 C 29.71875 -32.4375 33.984375 -25.375 33.984375 -17.796875 C 33.984375 -10.15625 29.71875 -3.015625 21.921875 -2.578125 L 21.921875 0.875 C 31.921875 0.4375 38.109375 -7.71875 38.109375 -17.796875 Z M 38.109375 -17.796875 "/></g></g></g><g fill="#00275b" fill-opacity="1"><g transform="translate(353.278673, 175.045997)"><g><path d="M 17.734375 0.8125 C 21.78125 0.8125 25.59375 -1.328125 27.953125 -3.015625 L 27.953125 -7.875 C 25.59375 -5.59375 21.40625 -3.15625 18.46875 -3.15625 C 13.3125 -3.15625 9.9375 -5.734375 9.9375 -10.8125 L 9.9375 -35.53125 L 5.953125 -35.53125 L 5.953125 -9.859375 C 5.953125 -3.90625 11.1875 0.8125 17.734375 0.8125 Z M 33.765625 0 L 33.765625 -35.53125 L 29.796875 -35.53125 L 29.796875 0 Z M 33.765625 0 "/></g></g></g><g fill="#00275b" fill-opacity="1"><g transform="translate(392.997766, 175.045997)"><g><path d="M 6.6875 -35.53125 L 0.8125 -35.53125 L 0.8125 -32.140625 L 17.796875 -32.140625 L 17.796875 -35.53125 L 10.59375 -35.53125 L 10.59375 -45.234375 L 6.6875 -45.234375 Z M 13.6875 0.875 C 16.625 0.875 18.3125 0 19.71875 -1.328125 L 18.25 -4.265625 C 17.4375 -3.390625 15.96875 -2.578125 14.265625 -2.578125 C 11.765625 -2.578125 10.59375 -4.5625 10.59375 -7.4375 L 10.59375 -30.3125 L 6.6875 -30.3125 L 6.6875 -6.84375 C 6.6875 -1.984375 8.96875 0.875 13.6875 0.875 Z M 13.6875 0.875 "/></g></g></g><g fill="#00275b" fill-opacity="1"><g transform="translate(147.146117, 96.560447)"><g><path d="M 10.296875 -49.0625 L 6.109375 -49.0625 L 6.109375 0 L 10.296875 0 Z M 24.0625 -24.203125 L 12.0625 -24.203125 L 12.0625 -20.375 L 24.5 -20.375 C 33.390625 -20.375 39.203125 -27.4375 39.203125 -34.71875 C 39.203125 -43.109375 33.546875 -49.0625 24.5 -49.0625 L 12.0625 -49.0625 L 12.0625 -45.234375 L 24.0625 -45.234375 C 30.3125 -45.234375 34.796875 -41.421875 34.796875 -34.71875 C 34.796875 -29.28125 30.234375 -24.203125 24.0625 -24.203125 Z M 24.0625 -24.203125 "/></g></g></g><g fill="#00275b" fill-opacity="1"><g transform="translate(189.145373, 96.560447)"><g><path d="M 14.5625 -1.984375 L 16.625 -6.984375 L 4.78125 -35.53125 L 0.515625 -35.53125 Z M 4.265625 10.375 L 3.609375 13.90625 C 4.5625 14.203125 6.171875 14.421875 7.4375 14.421875 C 11.546875 14.421875 14.5625 12.28125 16.484375 7.796875 L 34.875 -35.53125 L 30.53125 -35.53125 L 12.796875 6.625 C 11.328125 10 9.703125 10.953125 7.359375 10.953125 C 6.46875 10.953125 5.078125 10.734375 4.265625 10.375 Z M 4.265625 10.375 "/></g></g></g><g fill="#00275b" fill-opacity="1"><g transform="translate(224.524792, 96.560447)"><g><path d="M 10.296875 -49.0625 L 6.109375 -49.0625 L 6.109375 0 L 10.296875 0 Z M 24.0625 -24.203125 L 12.0625 -24.203125 L 12.0625 -20.375 L 24.5 -20.375 C 33.390625 -20.375 39.203125 -27.4375 39.203125 -34.71875 C 39.203125 -43.109375 33.546875 -49.0625 24.5 -49.0625 L 12.0625 -49.0625 L 12.0625 -45.234375 L 24.0625 -45.234375 C 30.3125 -45.234375 34.796875 -41.421875 34.796875 -34.71875 C 34.796875 -29.28125 30.234375 -24.203125 24.0625 -24.203125 Z M 24.0625 -24.203125 "/></g></g></g><g fill="#00275b" fill-opacity="1"><g transform="translate(266.524049, 96.560447)"><g><path d="M 6.109375 -49.0625 L 6.109375 0 L 10.296875 0 L 10.296875 -49.0625 Z M 6.109375 -49.0625 "/></g></g></g></svg>
2 | 


--------------------------------------------------------------------------------
/frontend/public/pypi.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="600" zoomAndPan="magnify" viewBox="0 0 450 225" height="300" preserveAspectRatio="xMidYMid meet" version="1.0"><defs><g/><clipPath id="ab6f82b5d9"><path d="M 36.671875 60.503906 L 117 60.503906 L 117 141 L 36.671875 141 Z M 36.671875 60.503906 " clip-rule="nonzero"/></clipPath><clipPath id="d812b0708e"><path d="M 116 138 L 155.921875 138 L 155.921875 179 L 116 179 Z M 116 138 " clip-rule="nonzero"/></clipPath></defs><g clip-path="url(#ab6f82b5d9)"><path fill="#f8ab37" d="M 76.570312 140.210938 C 54.59375 140.210938 36.710938 122.335938 36.710938 100.355469 C 36.710938 78.378906 54.589844 60.5 76.570312 60.5 C 98.546875 60.5 116.425781 78.378906 116.425781 100.355469 C 116.425781 122.335938 98.542969 140.210938 76.570312 140.210938 Z M 76.570312 67.054688 C 75.476562 67.054688 74.390625 67.109375 73.304688 67.214844 C 72.21875 67.324219 71.140625 67.480469 70.070312 67.695312 C 69.003906 67.90625 67.945312 68.171875 66.902344 68.488281 C 65.859375 68.804688 64.832031 69.171875 63.824219 69.589844 C 62.816406 70.007812 61.832031 70.472656 60.871094 70.988281 C 59.910156 71.5 58.972656 72.0625 58.066406 72.667969 C 57.160156 73.273438 56.285156 73.921875 55.441406 74.613281 C 54.601562 75.304688 53.792969 76.039062 53.019531 76.808594 C 52.25 77.578125 51.519531 78.386719 50.828125 79.230469 C 50.136719 80.074219 49.484375 80.949219 48.878906 81.855469 C 48.273438 82.761719 47.714844 83.695312 47.199219 84.65625 C 46.6875 85.621094 46.21875 86.605469 45.800781 87.613281 C 45.386719 88.621094 45.019531 89.644531 44.703125 90.6875 C 44.386719 91.734375 44.121094 92.789062 43.90625 93.859375 C 43.695312 94.929688 43.535156 96.007812 43.429688 97.09375 C 43.320312 98.175781 43.269531 99.265625 43.269531 100.355469 C 43.269531 101.445312 43.320312 102.535156 43.429688 103.621094 C 43.535156 104.707031 43.695312 105.78125 43.90625 106.851562 C 44.121094 107.921875 44.386719 108.980469 44.703125 110.023438 C 45.019531 111.066406 45.386719 112.09375 45.800781 113.101562 C 46.21875 114.109375 46.6875 115.09375 47.199219 116.054688 C 47.714844 117.015625 48.273438 117.949219 48.878906 118.855469 C 49.484375 119.765625 50.136719 120.640625 50.828125 121.480469 C 51.519531 122.324219 52.25 123.132812 53.019531 123.902344 C 53.792969 124.675781 54.601562 125.40625 55.441406 126.097656 C 56.285156 126.789062 57.160156 127.4375 58.066406 128.042969 C 58.972656 128.652344 59.910156 129.210938 60.871094 129.726562 C 61.832031 130.238281 62.816406 130.703125 63.824219 131.121094 C 64.832031 131.539062 65.859375 131.90625 66.902344 132.222656 C 67.945312 132.539062 69.003906 132.804688 70.070312 133.015625 C 71.140625 133.230469 72.21875 133.390625 73.304688 133.496094 C 74.390625 133.605469 75.476562 133.65625 76.570312 133.65625 C 77.660156 133.65625 78.746094 133.605469 79.832031 133.496094 C 80.917969 133.390625 81.996094 133.230469 83.066406 133.015625 C 84.136719 132.804688 85.191406 132.539062 86.234375 132.222656 C 87.277344 131.90625 88.304688 131.539062 89.3125 131.121094 C 90.320312 130.703125 91.304688 130.238281 92.265625 129.726562 C 93.230469 129.210938 94.164062 128.652344 95.070312 128.042969 C 95.976562 127.4375 96.851562 126.789062 97.695312 126.097656 C 98.539062 125.40625 99.34375 124.675781 100.117188 123.902344 C 100.886719 123.132812 101.617188 122.324219 102.3125 121.480469 C 103.003906 120.640625 103.652344 119.765625 104.257812 118.855469 C 104.863281 117.949219 105.421875 117.015625 105.9375 116.054688 C 106.453125 115.09375 106.917969 114.109375 107.335938 113.101562 C 107.753906 112.09375 108.121094 111.066406 108.4375 110.023438 C 108.753906 108.980469 109.015625 107.921875 109.230469 106.851562 C 109.441406 105.78125 109.601562 104.707031 109.710938 103.621094 C 109.816406 102.535156 109.871094 101.445312 109.871094 100.355469 C 109.867188 99.265625 109.8125 98.179688 109.707031 97.09375 C 109.597656 96.007812 109.4375 94.933594 109.222656 93.863281 C 109.007812 92.792969 108.742188 91.738281 108.425781 90.695312 C 108.109375 89.652344 107.742188 88.625 107.324219 87.621094 C 106.90625 86.613281 106.441406 85.628906 105.925781 84.667969 C 105.410156 83.707031 104.851562 82.773438 104.246094 81.867188 C 103.636719 80.960938 102.988281 80.085938 102.296875 79.242188 C 101.605469 78.402344 100.875 77.59375 100.105469 76.824219 C 99.332031 76.050781 98.527344 75.320312 97.683594 74.628906 C 96.839844 73.9375 95.964844 73.289062 95.058594 72.683594 C 94.152344 72.078125 93.21875 71.515625 92.257812 71.003906 C 91.296875 70.488281 90.3125 70.023438 89.304688 69.605469 C 88.296875 69.1875 87.273438 68.820312 86.230469 68.503906 C 85.1875 68.183594 84.132812 67.917969 83.0625 67.707031 C 81.992188 67.492188 80.917969 67.332031 79.832031 67.222656 C 78.746094 67.117188 77.660156 67.0625 76.570312 67.058594 Z M 76.570312 67.054688 " fill-opacity="1" fill-rule="nonzero"/></g><path fill="#f8ab37" d="M 105.347656 144.519531 C 104.988281 144.519531 104.632812 144.46875 104.285156 144.363281 C 103.9375 144.257812 103.613281 144.105469 103.3125 143.902344 C 103.011719 143.703125 102.746094 143.460938 102.515625 143.183594 C 102.285156 142.902344 102.097656 142.597656 101.960938 142.261719 C 101.820312 141.925781 101.734375 141.578125 101.695312 141.21875 C 101.660156 140.859375 101.675781 140.5 101.746094 140.144531 C 101.816406 139.789062 101.9375 139.449219 102.105469 139.128906 C 102.273438 138.808594 102.488281 138.519531 102.742188 138.265625 L 112.894531 128.039062 C 113.058594 127.851562 113.242188 127.679688 113.441406 127.53125 C 113.640625 127.378906 113.851562 127.25 114.078125 127.144531 C 114.304688 127.039062 114.539062 126.953125 114.78125 126.894531 C 115.023438 126.835938 115.273438 126.800781 115.523438 126.792969 C 115.769531 126.785156 116.019531 126.800781 116.265625 126.84375 C 116.511719 126.886719 116.753906 126.953125 116.984375 127.042969 C 117.21875 127.132812 117.441406 127.246094 117.648438 127.382812 C 117.859375 127.519531 118.050781 127.675781 118.230469 127.851562 C 118.410156 128.027344 118.566406 128.21875 118.707031 128.425781 C 118.84375 128.636719 118.960938 128.855469 119.054688 129.085938 C 119.148438 129.320312 119.214844 129.558594 119.261719 129.804688 C 119.304688 130.050781 119.324219 130.296875 119.320312 130.546875 C 119.3125 130.796875 119.28125 131.042969 119.226562 131.289062 C 119.171875 131.53125 119.089844 131.765625 118.984375 131.996094 C 118.882812 132.222656 118.753906 132.4375 118.605469 132.636719 C 118.457031 132.839844 118.292969 133.023438 118.105469 133.191406 L 107.957031 143.417969 C 107.242188 144.148438 106.371094 144.515625 105.347656 144.519531 Z M 105.347656 144.519531 " fill-opacity="1" fill-rule="nonzero"/><g clip-path="url(#d812b0708e)"><path fill="#f8ab37" d="M 149.394531 178.945312 C 148.546875 178.945312 147.734375 178.78125 146.953125 178.453125 C 146.171875 178.125 145.484375 177.65625 144.894531 177.050781 L 117.886719 149.390625 C 117.597656 149.097656 117.339844 148.777344 117.113281 148.429688 C 116.886719 148.082031 116.699219 147.71875 116.546875 147.335938 C 116.390625 146.949219 116.277344 146.554688 116.199219 146.148438 C 116.125 145.742188 116.089844 145.332031 116.09375 144.921875 C 116.097656 144.507812 116.144531 144.097656 116.230469 143.695312 C 116.316406 143.289062 116.4375 142.898438 116.601562 142.519531 C 116.761719 142.136719 116.960938 141.777344 117.195312 141.4375 C 117.429688 141.09375 117.695312 140.78125 117.988281 140.492188 C 118.285156 140.203125 118.605469 139.945312 118.953125 139.71875 C 119.296875 139.496094 119.664062 139.304688 120.046875 139.152344 C 120.429688 138.996094 120.824219 138.882812 121.230469 138.808594 C 121.636719 138.730469 122.046875 138.695312 122.460938 138.699219 C 122.875 138.703125 123.28125 138.75 123.6875 138.835938 C 124.09375 138.921875 124.484375 139.042969 124.863281 139.207031 C 125.242188 139.371094 125.605469 139.566406 125.945312 139.800781 C 126.285156 140.035156 126.601562 140.300781 126.890625 140.59375 L 153.898438 168.253906 C 154.332031 168.695312 154.691406 169.191406 154.976562 169.742188 C 155.261719 170.289062 155.464844 170.871094 155.578125 171.476562 C 155.695312 172.085938 155.71875 172.699219 155.652344 173.3125 C 155.589844 173.929688 155.4375 174.523438 155.195312 175.09375 C 154.957031 175.664062 154.636719 176.1875 154.242188 176.664062 C 153.847656 177.140625 153.390625 177.550781 152.875 177.894531 C 152.359375 178.234375 151.804688 178.496094 151.214844 178.675781 C 150.621094 178.855469 150.015625 178.945312 149.394531 178.945312 Z M 149.394531 178.945312 " fill-opacity="1" fill-rule="nonzero"/></g><path fill="#f8ab37" d="M 76.503906 128.429688 C 71.367188 128.429688 64.582031 126.957031 57.320312 121.371094 C 50.941406 116.464844 48.003906 108.921875 48.816406 99.558594 C 49.207031 95.230469 50.238281 91.054688 51.90625 87.039062 C 52.015625 86.804688 52.164062 86.597656 52.351562 86.417969 C 52.542969 86.242188 52.757812 86.105469 53.003906 86.015625 C 53.246094 85.921875 53.496094 85.882812 53.757812 85.890625 C 54.015625 85.898438 54.265625 85.953125 54.5 86.058594 C 54.738281 86.164062 54.945312 86.3125 55.125 86.5 C 55.304688 86.691406 55.441406 86.90625 55.535156 87.148438 C 55.625 87.390625 55.671875 87.640625 55.664062 87.902344 C 55.65625 88.160156 55.601562 88.410156 55.496094 88.648438 C 54.003906 92.292969 53.082031 96.074219 52.726562 99.996094 C 52.054688 108.019531 54.40625 114.152344 59.714844 118.246094 C 72.828125 128.335938 84.035156 123.121094 84.148438 123.066406 C 84.386719 122.945312 84.632812 122.878906 84.898438 122.859375 C 85.160156 122.839844 85.417969 122.871094 85.667969 122.957031 C 85.921875 123.042969 86.144531 123.171875 86.34375 123.347656 C 86.539062 123.519531 86.695312 123.726562 86.8125 123.964844 C 86.925781 124.203125 86.992188 124.457031 87.003906 124.71875 C 87.019531 124.984375 86.980469 125.238281 86.890625 125.488281 C 86.800781 125.738281 86.664062 125.957031 86.484375 126.152344 C 86.308594 126.347656 86.097656 126.5 85.855469 126.609375 C 85.574219 126.753906 81.980469 128.429688 76.503906 128.429688 Z M 76.503906 128.429688 " fill-opacity="1" fill-rule="nonzero"/><g fill="#ffffff" fill-opacity="1"><g transform="translate(232.209004, 175.045997)"><g><path d="M 9.34375 -37 C 9.34375 -42.21875 14.125 -45.90625 20.53125 -46.046875 L 20.53125 -49.875 C 11.84375 -49.65625 4.859375 -44.65625 4.859375 -36.78125 C 4.859375 -27.875 12.875 -25.59375 20.53125 -23.46875 L 20.53125 -28.171875 C 14.484375 -29.859375 9.34375 -31.703125 9.34375 -37 Z M 34.875 -39.875 L 37.65625 -42.953125 C 34.140625 -47 28.984375 -49.5 22.359375 -49.796875 L 22.359375 -45.96875 C 27.21875 -45.6875 31.625 -43.703125 34.875 -39.875 Z M 38.765625 -13.015625 C 38.765625 -22.875 30.234375 -25.453125 22.359375 -27.65625 L 22.359375 -22.953125 C 28.6875 -21.109375 34.359375 -19.046875 34.359375 -12.71875 C 34.359375 -8.609375 31.625 -3.234375 22.359375 -2.9375 L 22.359375 0.875 C 34.796875 0.515625 38.765625 -6.84375 38.765625 -13.015625 Z M 6.109375 -10.078125 L 3.390625 -6.921875 C 6.984375 -2.578125 12.796875 0.65625 20.53125 0.875 L 20.53125 -2.9375 C 13.984375 -3.234375 9.125 -6.625 6.109375 -10.078125 Z M 6.109375 -10.078125 "/></g></g></g><g fill="#ffffff" fill-opacity="1"><g transform="translate(274.943809, 175.045997)"><g><path d="M 21.40625 -32.8125 C 25.3125 -32.8125 28.328125 -31.40625 30.890625 -27.875 L 33.765625 -30.375 C 30.890625 -33.984375 27.359375 -36.40625 21.1875 -36.40625 C 11.1875 -36.40625 4.484375 -28.90625 3.96875 -19.125 L 8.015625 -19.125 C 8.53125 -27 13.46875 -32.8125 21.40625 -32.8125 Z M 21.1875 0.875 C 27.359375 0.875 30.890625 -1.546875 33.765625 -5.21875 L 30.96875 -7.578125 C 28.390625 -4.046875 25.3125 -2.71875 21.40625 -2.71875 C 13.234375 -2.71875 8.234375 -8.90625 8.015625 -17.140625 L 3.96875 -17.140625 C 4.1875 -7.0625 10.890625 0.875 21.1875 0.875 Z M 21.1875 0.875 "/></g></g></g><g fill="#ffffff" fill-opacity="1"><g transform="translate(311.279417, 175.045997)"><g><path d="M 3.96875 -17.796875 C 3.96875 -7.71875 10.078125 0.375 19.9375 0.875 L 19.9375 -2.578125 C 12.28125 -3.15625 8.015625 -10.15625 8.015625 -17.796875 C 8.015625 -25.375 12.28125 -32.375 19.9375 -32.875 L 19.9375 -36.40625 C 10.078125 -35.90625 3.96875 -27.8125 3.96875 -17.796875 Z M 38.109375 -17.796875 C 38.109375 -27.875 31.921875 -35.96875 21.921875 -36.40625 L 21.921875 -32.953125 C 29.71875 -32.4375 33.984375 -25.375 33.984375 -17.796875 C 33.984375 -10.15625 29.71875 -3.015625 21.921875 -2.578125 L 21.921875 0.875 C 31.921875 0.4375 38.109375 -7.71875 38.109375 -17.796875 Z M 38.109375 -17.796875 "/></g></g></g><g fill="#ffffff" fill-opacity="1"><g transform="translate(353.278673, 175.045997)"><g><path d="M 17.734375 0.8125 C 21.78125 0.8125 25.59375 -1.328125 27.953125 -3.015625 L 27.953125 -7.875 C 25.59375 -5.59375 21.40625 -3.15625 18.46875 -3.15625 C 13.3125 -3.15625 9.9375 -5.734375 9.9375 -10.8125 L 9.9375 -35.53125 L 5.953125 -35.53125 L 5.953125 -9.859375 C 5.953125 -3.90625 11.1875 0.8125 17.734375 0.8125 Z M 33.765625 0 L 33.765625 -35.53125 L 29.796875 -35.53125 L 29.796875 0 Z M 33.765625 0 "/></g></g></g><g fill="#ffffff" fill-opacity="1"><g transform="translate(392.997766, 175.045997)"><g><path d="M 6.6875 -35.53125 L 0.8125 -35.53125 L 0.8125 -32.140625 L 17.796875 -32.140625 L 17.796875 -35.53125 L 10.59375 -35.53125 L 10.59375 -45.234375 L 6.6875 -45.234375 Z M 13.6875 0.875 C 16.625 0.875 18.3125 0 19.71875 -1.328125 L 18.25 -4.265625 C 17.4375 -3.390625 15.96875 -2.578125 14.265625 -2.578125 C 11.765625 -2.578125 10.59375 -4.5625 10.59375 -7.4375 L 10.59375 -30.3125 L 6.6875 -30.3125 L 6.6875 -6.84375 C 6.6875 -1.984375 8.96875 0.875 13.6875 0.875 Z M 13.6875 0.875 "/></g></g></g><g fill="#ffffff" fill-opacity="1"><g transform="translate(147.146117, 96.560447)"><g><path d="M 10.296875 -49.0625 L 6.109375 -49.0625 L 6.109375 0 L 10.296875 0 Z M 24.0625 -24.203125 L 12.0625 -24.203125 L 12.0625 -20.375 L 24.5 -20.375 C 33.390625 -20.375 39.203125 -27.4375 39.203125 -34.71875 C 39.203125 -43.109375 33.546875 -49.0625 24.5 -49.0625 L 12.0625 -49.0625 L 12.0625 -45.234375 L 24.0625 -45.234375 C 30.3125 -45.234375 34.796875 -41.421875 34.796875 -34.71875 C 34.796875 -29.28125 30.234375 -24.203125 24.0625 -24.203125 Z M 24.0625 -24.203125 "/></g></g></g><g fill="#ffffff" fill-opacity="1"><g transform="translate(189.145373, 96.560447)"><g><path d="M 14.5625 -1.984375 L 16.625 -6.984375 L 4.78125 -35.53125 L 0.515625 -35.53125 Z M 4.265625 10.375 L 3.609375 13.90625 C 4.5625 14.203125 6.171875 14.421875 7.4375 14.421875 C 11.546875 14.421875 14.5625 12.28125 16.484375 7.796875 L 34.875 -35.53125 L 30.53125 -35.53125 L 12.796875 6.625 C 11.328125 10 9.703125 10.953125 7.359375 10.953125 C 6.46875 10.953125 5.078125 10.734375 4.265625 10.375 Z M 4.265625 10.375 "/></g></g></g><g fill="#ffffff" fill-opacity="1"><g transform="translate(224.524792, 96.560447)"><g><path d="M 10.296875 -49.0625 L 6.109375 -49.0625 L 6.109375 0 L 10.296875 0 Z M 24.0625 -24.203125 L 12.0625 -24.203125 L 12.0625 -20.375 L 24.5 -20.375 C 33.390625 -20.375 39.203125 -27.4375 39.203125 -34.71875 C 39.203125 -43.109375 33.546875 -49.0625 24.5 -49.0625 L 12.0625 -49.0625 L 12.0625 -45.234375 L 24.0625 -45.234375 C 30.3125 -45.234375 34.796875 -41.421875 34.796875 -34.71875 C 34.796875 -29.28125 30.234375 -24.203125 24.0625 -24.203125 Z M 24.0625 -24.203125 "/></g></g></g><g fill="#ffffff" fill-opacity="1"><g transform="translate(266.524049, 96.560447)"><g><path d="M 6.109375 -49.0625 L 6.109375 0 L 10.296875 0 L 10.296875 -49.0625 Z M 6.109375 -49.0625 "/></g></g></g></svg>
2 | 


--------------------------------------------------------------------------------
/frontend/public/vercel.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 283 64"><path fill="black" d="M141 16c-11 0-19 7-19 18s9 18 20 18c7 0 13-3 16-7l-7-5c-2 3-6 4-9 4-5 0-9-3-10-7h28v-3c0-11-8-18-19-18zm-9 15c1-4 4-7 9-7s8 3 9 7h-18zm117-15c-11 0-19 7-19 18s9 18 20 18c6 0 12-3 16-7l-8-5c-2 3-5 4-8 4-5 0-9-3-11-7h28l1-3c0-11-8-18-19-18zm-10 15c2-4 5-7 10-7s8 3 9 7h-19zm-39 3c0 6 4 10 10 10 4 0 7-2 9-5l8 5c-3 5-9 8-17 8-11 0-19-7-19-18s8-18 19-18c8 0 14 3 17 8l-8 5c-2-3-5-5-9-5-6 0-10 4-10 10zm83-29v46h-9V5h9zM37 0l37 64H0L37 0zm92 5-27 48L74 5h10l18 30 17-30h10zm59 12v10l-3-1c-6 0-10 4-10 10v15h-9V17h9v9c0-5 6-9 13-9z"/></svg>
2 | 


--------------------------------------------------------------------------------
/frontend/tailwind.config.ts:
--------------------------------------------------------------------------------
 1 | import type { Config } from "tailwindcss";
 2 | 
 3 | const config: Config = {
 4 |   content: [
 5 |     "./pages/**/*.{js,ts,jsx,tsx,mdx}",
 6 |     "./components/**/*.{js,ts,jsx,tsx,mdx}",
 7 |     "./app/**/*.{js,ts,jsx,tsx,mdx}",
 8 |   ],
 9 |   theme: {
10 |     extend: {
11 |       colors: {
12 |         sky: {
13 |           50: "#d9f0ff", // Darkened from #f0f9ff
14 |           100: "#c3e4fe", // Darkened from #e0f2fe
15 |           200: "#a3d4fd", // Darkened from #bae6fd
16 |           300: "#5cbdfc", // Darkened from #7dd3fc
17 |           400: "#2aa3f8", // Darkened from #38bdf8
18 |           500: "#0b8edc", // Darkened from #0ea5e9
19 |           600: "#026baa", // Darkened from #0284c7
20 |           700: "#015a89", // Darkened from #0369a1
21 |           800: "#054b6e", // Darkened from #075985
22 |           900: "#083857", // Darkened from #0c4a6e
23 |           950: "#062338", // Darkened from #082f49
24 |         },
25 |         orange: {
26 |           100: "#f8d5c7",
27 |           200: "#f1ac9a",
28 |           300: "#ea836d",
29 |           400: "#e35a40",
30 |           500: "#d77a61",
31 |           600: "#c45b3f",
32 |           700: "#b23a1b",
33 |           800: "#D18829", // Orange from logo
34 |         },
35 |       },
36 |       backgroundImage: {
37 |         "gradient-radial": "radial-gradient(var(--tw-gradient-stops))",
38 |         "gradient-conic":
39 |           "conic-gradient(from 180deg at 50% 50%, var(--tw-gradient-stops))",
40 |       },
41 |     },
42 |   },
43 |   plugins: [],
44 | };
45 | 
46 | export default config;
47 | 


--------------------------------------------------------------------------------
/frontend/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "compilerOptions": {
 3 |         "lib": ["dom", "dom.iterable", "esnext"],
 4 |         "allowJs": true,
 5 |         "skipLibCheck": true,
 6 |         "strict": true,
 7 |         "noEmit": true,
 8 |         "esModuleInterop": true,
 9 |         "module": "esnext",
10 |         "moduleResolution": "bundler",
11 |         "resolveJsonModule": true,
12 |         "isolatedModules": true,
13 |         "jsx": "preserve",
14 |         "incremental": true,
15 |         "plugins": [
16 |             {
17 |                 "name": "next"
18 |             }
19 |         ],
20 |         "paths": {
21 |             "@/*": ["./*"]
22 |         }
23 |     },
24 |     "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
25 |     "exclude": ["node_modules"]
26 | }
27 | 


--------------------------------------------------------------------------------
/package-lock.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "pypi-llm",
 3 |     "lockfileVersion": 3,
 4 |     "requires": true,
 5 |     "packages": {
 6 |         "": {
 7 |             "dependencies": {
 8 |                 "chart.js": "^4.4.3",
 9 |                 "react-chartjs-2": "^5.2.0"
10 |             }
11 |         },
12 |         "node_modules/@kurkle/color": {
13 |             "version": "0.3.2",
14 |             "resolved": "https://registry.npmjs.org/@kurkle/color/-/color-0.3.2.tgz",
15 |             "integrity": "sha512-fuscdXJ9G1qb7W8VdHi+IwRqij3lBkosAm4ydQtEmbY58OzHXqQhvlxqEkoz0yssNVn38bcpRWgA9PP+OGoisw=="
16 |         },
17 |         "node_modules/chart.js": {
18 |             "version": "4.4.3",
19 |             "resolved": "https://registry.npmjs.org/chart.js/-/chart.js-4.4.3.tgz",
20 |             "integrity": "sha512-qK1gkGSRYcJzqrrzdR6a+I0vQ4/R+SoODXyAjscQ/4mzuNzySaMCd+hyVxitSY1+L2fjPD1Gbn+ibNqRmwQeLw==",
21 |             "dependencies": {
22 |                 "@kurkle/color": "^0.3.0"
23 |             },
24 |             "engines": {
25 |                 "pnpm": ">=8"
26 |             }
27 |         },
28 |         "node_modules/js-tokens": {
29 |             "version": "4.0.0",
30 |             "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
31 |             "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==",
32 |             "peer": true
33 |         },
34 |         "node_modules/loose-envify": {
35 |             "version": "1.4.0",
36 |             "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
37 |             "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==",
38 |             "peer": true,
39 |             "dependencies": {
40 |                 "js-tokens": "^3.0.0 || ^4.0.0"
41 |             },
42 |             "bin": {
43 |                 "loose-envify": "cli.js"
44 |             }
45 |         },
46 |         "node_modules/react": {
47 |             "version": "18.3.1",
48 |             "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz",
49 |             "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==",
50 |             "peer": true,
51 |             "dependencies": {
52 |                 "loose-envify": "^1.1.0"
53 |             },
54 |             "engines": {
55 |                 "node": ">=0.10.0"
56 |             }
57 |         },
58 |         "node_modules/react-chartjs-2": {
59 |             "version": "5.2.0",
60 |             "resolved": "https://registry.npmjs.org/react-chartjs-2/-/react-chartjs-2-5.2.0.tgz",
61 |             "integrity": "sha512-98iN5aguJyVSxp5U3CblRLH67J8gkfyGNbiK3c+l1QI/G4irHMPQw44aEPmjVag+YKTyQ260NcF82GTQ3bdscA==",
62 |             "peerDependencies": {
63 |                 "chart.js": "^4.1.1",
64 |                 "react": "^16.8.0 || ^17.0.0 || ^18.0.0"
65 |             }
66 |         }
67 |     }
68 | }
69 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 |     "dependencies": {
3 |         "chart.js": "^4.4.3",
4 |         "react-chartjs-2": "^5.2.0"
5 |     }
6 | }
7 | 


--------------------------------------------------------------------------------
/pypi_bigquery.sql:
--------------------------------------------------------------------------------
 1 | WITH recent_downloads AS (
 2 |   SELECT
 3 |     LOWER(project) AS project_lower,
 4 |     project,
 5 |     COUNT(*) AS download_count
 6 |   FROM
 7 |     `bigquery-public-data.pypi.file_downloads`
 8 |   WHERE
 9 |     DATE(timestamp) BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY) AND CURRENT_DATE()
10 |   GROUP BY
11 |     LOWER(project), project
12 |   HAVING
13 |     COUNT(*) >= 100
14 | ),
15 | latest_metadata AS (
16 |   SELECT
17 |     LOWER(name) AS name_lower,
18 |     name,
19 |     description,
20 |     summary,
21 |     version,
22 |     upload_time,
23 |     ROW_NUMBER() OVER (PARTITION BY LOWER(name) ORDER BY upload_time DESC) AS rn
24 |   FROM
25 |     `bigquery-public-data.pypi.distribution_metadata`
26 | )
27 | SELECT
28 |   lm.name AS name,
29 |   lm.description AS description,
30 |   lm.summary AS summary,
31 |   lm.version AS latest_version,
32 |   rd.download_count AS number_of_downloads
33 | FROM
34 |   recent_downloads rd
35 | JOIN
36 |   latest_metadata lm
37 | ON
38 |   rd.project_lower = lm.name_lower
39 | WHERE
40 |   lm.rn = 1
41 | ORDER BY
42 |   rd.download_count DESC;
43 | 


--------------------------------------------------------------------------------
/pypi_scout/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgmaas/pypi-scout/593a48a2512a14c350bae98b087cd861d94a0c6b/pypi_scout/__init__.py


--------------------------------------------------------------------------------
/pypi_scout/api/data_loader.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Tuple
 3 | 
 4 | import polars as pl
 5 | 
 6 | from pypi_scout.config import Config, StorageBackend
 7 | from pypi_scout.utils.blob_io import BlobIO
 8 | 
 9 | 
10 | class ApiDataLoader:
11 |     def __init__(self, config: Config):
12 |         self.config = config
13 | 
14 |     def load_dataset(self) -> Tuple[pl.DataFrame, pl.DataFrame]:
15 |         if self.config.STORAGE_BACKEND == StorageBackend.LOCAL:
16 |             df_packages, df_embeddings = self._load_local_dataset()
17 |         elif self.config.STORAGE_BACKEND == StorageBackend.BLOB:
18 |             df_packages, df_embeddings = self._load_blob_dataset()
19 |         else:
20 |             raise ValueError(f"Unexpected value found for STORAGE_BACKEND: {self.config.STORAGE_BACKEND}")  # noqa: TRY003
21 | 
22 |         df_embeddings = self._drop_rows_from_embeddings_that_do_not_appear_in_packages(df_embeddings, df_packages)
23 |         return df_packages, df_embeddings
24 | 
25 |     def _load_local_dataset(self) -> Tuple[pl.DataFrame, pl.DataFrame]:
26 |         packages_dataset_path = self.config.DATA_DIR / self.config.DATASET_FOR_API_CSV_NAME
27 |         embeddings_dataset_path = self.config.DATA_DIR / self.config.EMBEDDINGS_PARQUET_NAME
28 | 
29 |         logging.info(f"Reading packages dataset from `{packages_dataset_path}`...")
30 |         df_packages = pl.read_csv(packages_dataset_path)
31 |         self._log_packages_dataset_info(df_packages)
32 | 
33 |         logging.info(f"Reading embeddings from `{embeddings_dataset_path}`...")
34 |         df_embeddings = pl.read_parquet(embeddings_dataset_path)
35 |         self._log_embeddings_dataset_info(df_embeddings)
36 | 
37 |         return df_packages, df_embeddings
38 | 
39 |     def _load_blob_dataset(self) -> Tuple[pl.DataFrame, pl.DataFrame]:
40 |         blob_io = BlobIO(
41 |             self.config.STORAGE_BACKEND_BLOB_ACCOUNT_NAME,
42 |             self.config.STORAGE_BACKEND_BLOB_CONTAINER_NAME,
43 |             self.config.STORAGE_BACKEND_BLOB_KEY,
44 |         )
45 | 
46 |         logging.info(
47 |             f"Downloading `{self.config.DATASET_FOR_API_CSV_NAME}` from container `{self.config.STORAGE_BACKEND_BLOB_CONTAINER_NAME}`..."
48 |         )
49 |         df_packages = blob_io.download_csv_to_df(self.config.DATASET_FOR_API_CSV_NAME)
50 |         self._log_packages_dataset_info(df_packages)
51 | 
52 |         logging.info(
53 |             f"Downloading `{self.config.EMBEDDINGS_PARQUET_NAME}` from container `{self.config.STORAGE_BACKEND_BLOB_CONTAINER_NAME}`..."
54 |         )
55 |         df_embeddings = blob_io.download_parquet_to_df(self.config.EMBEDDINGS_PARQUET_NAME)
56 |         self._log_embeddings_dataset_info(df_embeddings)
57 | 
58 |         return df_packages, df_embeddings
59 | 
60 |     @staticmethod
61 |     def _log_packages_dataset_info(df_packages: pl.DataFrame) -> None:
62 |         logging.info(f"Finished loading the `packages` dataset. Number of rows in dataset: {len(df_packages):,}")
63 |         logging.info(df_packages.describe())
64 | 
65 |     @staticmethod
66 |     def _log_embeddings_dataset_info(df_embeddings: pl.DataFrame) -> None:
67 |         logging.info(f"Finished loading the `embeddings` dataset. Number of rows in dataset: {len(df_embeddings):,}")
68 |         logging.info(df_embeddings.describe())
69 | 
70 |     @staticmethod
71 |     def _drop_rows_from_embeddings_that_do_not_appear_in_packages(df_embeddings, df_packages):
72 |         # We only keep the packages in the vector dataset that also occur in the packages dataset.
73 |         # In theory, this should never drop something. But still good to keep as a fail-safe to prevent issues in the API.
74 |         logging.info("Dropping packages in the `embeddings` dataset that do not occur in the `packages` dataset...")
75 |         logging.info(f"Number of rows before dropping: {len(df_embeddings):,}...")
76 |         df_embeddings = df_embeddings.join(df_packages, on="name", how="semi")
77 |         logging.info(f"Number of rows after dropping: {len(df_embeddings):,}...")
78 |         return df_embeddings
79 | 


--------------------------------------------------------------------------------
/pypi_scout/api/main.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from dotenv import load_dotenv
 4 | from fastapi import FastAPI, HTTPException
 5 | from fastapi.middleware.cors import CORSMiddleware
 6 | from sentence_transformers import SentenceTransformer
 7 | from slowapi import Limiter, _rate_limit_exceeded_handler
 8 | from slowapi.errors import RateLimitExceeded
 9 | from slowapi.util import get_remote_address
10 | from starlette.requests import Request
11 | 
12 | from pypi_scout.api.data_loader import ApiDataLoader
13 | from pypi_scout.api.models import QueryModel, SearchResponse
14 | from pypi_scout.config import Config
15 | from pypi_scout.embeddings.simple_vector_database import SimpleVectorDatabase
16 | from pypi_scout.utils.logging import setup_logging
17 | from pypi_scout.utils.score_calculator import calculate_score
18 | 
19 | setup_logging()
20 | logging.info("Initializing backend...")
21 | 
22 | limiter = Limiter(key_func=get_remote_address)
23 | app = FastAPI()
24 | app.state.limiter = limiter
25 | app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
26 | 
27 | load_dotenv()
28 | config = Config()
29 | 
30 | app.add_middleware(
31 |     CORSMiddleware,
32 |     allow_origins=["*"],
33 |     allow_credentials=True,
34 |     allow_methods=["*"],
35 |     allow_headers=["*"],
36 | )
37 | 
38 | data_loader = ApiDataLoader(config)
39 | df_packages, df_embeddings = data_loader.load_dataset()
40 | 
41 | model = SentenceTransformer(config.EMBEDDINGS_MODEL_NAME)
42 | vector_database = SimpleVectorDatabase(embeddings_model=model, df_embeddings=df_embeddings)
43 | 
44 | 
45 | @app.post("/api/search", response_model=SearchResponse)
46 | @limiter.limit("6/minute")
47 | async def search(query: QueryModel, request: Request):
48 |     """
49 |     Search for the packages whose summary and description have the highest similarity to the query.
50 |     We take the top_k * 2 most similar packages, and then calculate weighted score based on the similarity and weekly downloads.
51 |     The top_k packages with the highest score are returned.
52 |     """
53 | 
54 |     if query.top_k > 100:
55 |         raise HTTPException(status_code=400, detail="top_k cannot be larger than 100.")
56 | 
57 |     logging.info(f"Searching for similar projects. Query: '{query.query}'")
58 |     df_matches = vector_database.find_similar(query.query, top_k=int(query.top_k * 3))
59 |     df_matches = df_matches.join(df_packages, how="left", on="name")
60 |     logging.info(
61 |         f"Fetched the {len(df_matches)} most similar projects. Calculating the weighted scores and filtering..."
62 |     )
63 | 
64 |     df_matches = calculate_score(
65 |         df_matches, weight_similarity=config.WEIGHT_SIMILARITY, weight_weekly_downloads=config.WEIGHT_WEEKLY_DOWNLOADS
66 |     )
67 |     df_matches = df_matches.sort("score", descending=True)
68 | 
69 |     if len(df_matches) > query.top_k:
70 |         df_matches = df_matches.head(query.top_k)
71 | 
72 |     logging.info(f"Returning the {len(df_matches)} best matches.")
73 |     df_matches = df_matches.select(["name", "similarity", "summary", "weekly_downloads"])
74 |     return SearchResponse(matches=df_matches.to_dicts())
75 | 


--------------------------------------------------------------------------------
/pypi_scout/api/models.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel
 2 | 
 3 | 
 4 | class QueryModel(BaseModel):
 5 |     query: str
 6 |     top_k: int
 7 | 
 8 | 
 9 | class Match(BaseModel):
10 |     name: str
11 |     summary: str
12 |     similarity: float
13 |     weekly_downloads: int
14 | 
15 | 
16 | class SearchResponse(BaseModel):
17 |     matches: list[Match]
18 |     warning: bool = False
19 |     warning_message: str = None
20 | 


--------------------------------------------------------------------------------
/pypi_scout/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from dataclasses import dataclass
 3 | from enum import Enum
 4 | from pathlib import Path
 5 | 
 6 | 
 7 | class StorageBackend(Enum):
 8 |     LOCAL = "LOCAL"
 9 |     BLOB = "BLOB"
10 | 
11 | 
12 | @dataclass
13 | class Config:
14 |     # Name of the model used for generating vector embeddings from text.
15 |     # See https://sbert.net/docs/sentence_transformer/pretrained_models.html for available models.
16 |     EMBEDDINGS_MODEL_NAME = "all-mpnet-base-v2"
17 | 
18 |     # Boolean to overwrite raw data file if it already exists
19 |     OVERWRITE: bool = True
20 | 
21 |     # Directory where dataset files are stored.
22 |     DATA_DIR: Path = Path("data")
23 | 
24 |     # Filename for the raw dataset CSV.
25 |     RAW_DATASET_CSV_NAME = "raw_dataset.csv"
26 | 
27 |     # Filename for the processed dataset CSV.
28 |     PROCESSED_DATASET_CSV_NAME = "processed_dataset.csv"
29 | 
30 |     # Filename for the dataset that contains the minimal data that the API needs.
31 |     # For example; it needs the name, weekly downloads, and the summary, but not the (cleaned) description.
32 |     DATASET_FOR_API_CSV_NAME = "dataset_for_api.csv"
33 | 
34 |     # Filename for the dataset that contains the minimal data that the API needs.
35 |     # For example; it needs the name, weekly downloads, and the summary, but not the (cleaned) description.
36 |     EMBEDDINGS_PARQUET_NAME = "embeddings.parquet"
37 | 
38 |     # Google Drive file ID for downloading the raw dataset.
39 |     GOOGLE_FILE_ID = "12AH8PwKvZqRhXBf9uS1qRZq1-k3gIhhG"
40 | 
41 |     # Fraction of the dataset to include in the vector database. This value determines the portion of top packages
42 |     # (sorted by weekly downloads) to include. Increase this value to include a larger portion of the dataset, up to 1.0 (100%).
43 |     # For reference, a value of 0.25 corresponds to including all PyPI packages with at least approximately 650 weekly downloads
44 |     FRAC_DATA_TO_INCLUDE = 1
45 | 
46 |     # Weights for the combined score calculation. Higher WEIGHT_SIMILARITY prioritizes
47 |     # relevance based on text similarity, while higher WEIGHT_WEEKLY_DOWNLOADS prioritizes
48 |     # packages with more weekly downloads.
49 |     WEIGHT_SIMILARITY = 0.5
50 |     WEIGHT_WEEKLY_DOWNLOADS = 0.5
51 | 
52 |     # Storage backend configuration. Can be either StorageBackend.LOCAL or StorageBackend.BLOB.
53 |     # If StorageBackend.BLOB, the processed dataset will be uploaded to Blob, and the backend API
54 |     # will read the data from there, rather than from a local data directory. In order to use StorageBackend.BLOB,
55 |     # the other `STORAGE_BACKEND_BLOB_` variables need to be set as environment variables.
56 |     STORAGE_BACKEND: StorageBackend = StorageBackend.LOCAL
57 |     STORAGE_BACKEND_BLOB_ACCOUNT_NAME: str | None = None
58 |     STORAGE_BACKEND_BLOB_CONTAINER_NAME: str | None = None
59 |     STORAGE_BACKEND_BLOB_KEY: str | None = None
60 | 
61 |     def __post_init__(self) -> None:
62 |         if os.getenv("STORAGE_BACKEND") == "BLOB":
63 |             self.STORAGE_BACKEND = StorageBackend.BLOB
64 |             self.STORAGE_BACKEND_BLOB_ACCOUNT_NAME = os.getenv("STORAGE_BACKEND_BLOB_ACCOUNT_NAME")
65 |             self.STORAGE_BACKEND_BLOB_CONTAINER_NAME = os.getenv("STORAGE_BACKEND_BLOB_CONTAINER_NAME")
66 |             self.STORAGE_BACKEND_BLOB_KEY = os.getenv("STORAGE_BACKEND_BLOB_KEY")
67 | 
68 |             if not all(
69 |                 [
70 |                     self.STORAGE_BACKEND_BLOB_ACCOUNT_NAME,
71 |                     self.STORAGE_BACKEND_BLOB_CONTAINER_NAME,
72 |                     self.STORAGE_BACKEND_BLOB_KEY,
73 |                 ]
74 |             ):
75 |                 raise OSError("One or more BLOB storage environment variables are missing!")  # noqa: TRY003
76 | 


--------------------------------------------------------------------------------
/pypi_scout/data/description_cleaner.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from dataclasses import dataclass
 3 | 
 4 | import polars as pl
 5 | from bs4 import BeautifulSoup
 6 | 
 7 | CLEANING_FAILED = "cleaning failed!"
 8 | 
 9 | 
10 | @dataclass
11 | class DescriptionCleaner:
12 |     """
13 |     A class that provides methods to clean PyPI package descriptions in a DataFrame column.
14 |     """
15 | 
16 |     def clean(self, df: pl.DataFrame, input_col: str, output_col: str) -> pl.DataFrame:
17 |         """
18 |         Cleans the text in the specified DataFrame column and returns the modified DataFrame.
19 | 
20 |         Args:
21 |             df (pl.DataFrame): The DataFrame containing the text column to be cleaned.
22 |             input_col (str): The name of the input column containing the text to be cleaned.
23 |             output_col (str): The name of the output column to store the cleaned text.
24 | 
25 |         Returns:
26 |             pl.DataFrame: The modified DataFrame with the cleaned text.
27 |         """
28 |         df = df.with_columns(pl.col(input_col).map_elements(self._clean_text, return_dtype=pl.String).alias(output_col))
29 |         return df
30 | 
31 |     def _clean_text(self, text: str) -> str:
32 |         """
33 |         Cleans the given text by removing HTML tags, markdown image links, markdown badges,
34 |         markdown links, URLs, special markdown characters, markdown headers, and extra whitespaces.
35 | 
36 |         Args:
37 |             text (str): The text to be cleaned.
38 | 
39 |         Returns:
40 |             str: The cleaned text.
41 |         """
42 |         try:
43 |             text = self._remove_html_tags(text)
44 |             text = self._remove_markdown_image_links(text)
45 |             text = self._remove_markdown_badges(text)
46 |             text = self._remove_markdown_links(text)
47 |             text = self._remove_urls(text)
48 |             text = self._remove_special_markdown_characters(text)
49 |             text = self._remove_markdown_headers(text)
50 |             text = self._remove_extra_whitespaces(text)
51 |         except:  # noqa: E722
52 |             return CLEANING_FAILED
53 | 
54 |         return text
55 | 
56 |     @staticmethod
57 |     def _remove_html_tags(text: str) -> str:
58 |         soup = BeautifulSoup(text, "lxml")
59 |         return soup.get_text(separator=" ")
60 | 
61 |     @staticmethod
62 |     def _remove_markdown_image_links(text: str) -> str:
63 |         return re.sub(r"!\[.*?\]\(.*?\)", "", text)
64 | 
65 |     @staticmethod
66 |     def _remove_markdown_badges(text: str) -> str:
67 |         return re.sub(r"\[!\[.*?\]\(.*?\)\]", "", text)
68 | 
69 |     @staticmethod
70 |     def _remove_markdown_links(text: str) -> str:
71 |         return re.sub(r"\[.*?\]\(.*?\)", "", text)
72 | 
73 |     @staticmethod
74 |     def _remove_urls(text: str) -> str:
75 |         return re.sub(r"http\S+|www\S+|https\S+", "", text, flags=re.MULTILINE)
76 | 
77 |     @staticmethod
78 |     def _remove_special_markdown_characters(text: str) -> str:
79 |         return re.sub(r"[#*=_`]", "", text)
80 | 
81 |     @staticmethod
82 |     def _remove_markdown_headers(text: str) -> str:
83 |         return re.sub(r"\n\s*#{1,6}\s*", " ", text)
84 | 
85 |     @staticmethod
86 |     def _remove_extra_whitespaces(text: str) -> str:
87 |         return " ".join(text.split())
88 | 


--------------------------------------------------------------------------------
/pypi_scout/data/raw_data_reader.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from pathlib import Path
 3 | 
 4 | import polars as pl
 5 | 
 6 | 
 7 | @dataclass
 8 | class RawDataReader:
 9 |     """
10 |     A class for reading and processing data from a raw PyPI dataset.
11 |     """
12 | 
13 |     raw_dataset: Path
14 | 
15 |     def read(self):
16 |         """
17 |         Reads the raw dataset, performs data processing operations, and returns the processed dataframe.
18 |         The dataset should at least have the following columns: name, description, and number_of_downloads.
19 | 
20 |         Returns:
21 |             DataFrame: The processed dataframe.
22 |         """
23 |         df = pl.read_csv(self.raw_dataset)
24 |         df = df.with_columns(weekly_downloads=pl.col("number_of_downloads").cast(pl.Int32))
25 |         df = df.drop("number_of_downloads")
26 |         df = df.unique(subset="name")
27 |         df = df.filter(~(pl.col("description").is_null() & pl.col("summary").is_null()))
28 |         df = df.sort("weekly_downloads", descending=True)
29 |         df = df.with_columns(
30 |             summary=pl.col("summary").fill_null(""),
31 |             description=pl.col("description").fill_null(""),
32 |         )
33 |         return df
34 | 


--------------------------------------------------------------------------------
/pypi_scout/embeddings/embeddings_creator.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import polars as pl
 4 | from sentence_transformers import SentenceTransformer
 5 | from tqdm import tqdm
 6 | 
 7 | 
 8 | class VectorEmbeddingCreator:
 9 |     def __init__(
10 |         self,
11 |         embeddings_model: SentenceTransformer,
12 |         embedding_column_name: str = "embeddings",
13 |         batch_size: int = 128,
14 |     ):
15 |         """
16 |         Initializes the VectorEmbeddingCreator with a SentenceTransformer model, embedding column name, and batch size.
17 | 
18 |         Args:
19 |             embeddings_model (SentenceTransformer): The SentenceTransformer model to generate embeddings.
20 |             embedding_column_name (str, optional): The name of the column to store embeddings. Defaults to 'embeddings'.
21 |             batch_size (int, optional): The size of batches to process at a time. Defaults to 128.
22 |         """
23 |         self.model = embeddings_model
24 |         self.embedding_column_name = embedding_column_name
25 |         self.batch_size = batch_size
26 | 
27 |     def add_embeddings(self, df: pl.DataFrame, text_column: str) -> pl.DataFrame:
28 |         """
29 |         Adds embeddings to the DataFrame based on the specified text column.
30 | 
31 |         Args:
32 |             df (pl.DataFrame): The Polars DataFrame to which embeddings will be added.
33 |             text_column (str): The column name containing text to generate embeddings for.
34 | 
35 |         Returns:
36 |             pl.DataFrame: The DataFrame with an additional column containing embeddings.
37 |         """
38 |         logging.info("Splitting DataFrame into batches...")
39 |         df_chunks = self._split_dataframe_in_batches(df, batch_size=self.batch_size)
40 |         all_embeddings = []
41 | 
42 |         logging.info("Generating embeddings...")
43 |         for chunk in tqdm(df_chunks, desc="Generating embeddings", unit="batch"):
44 |             embeddings = self._generate_embeddings(chunk, text_column)
45 |             all_embeddings.extend(embeddings)
46 | 
47 |         df = df.with_columns(pl.Series(self.embedding_column_name, all_embeddings))
48 |         return df
49 | 
50 |     def _generate_embeddings(self, chunk: pl.DataFrame, text_column: str) -> list:
51 |         embeddings = self.model.encode(list(chunk[text_column]), show_progress_bar=False)
52 |         return embeddings
53 | 
54 |     @staticmethod
55 |     def _split_dataframe_in_batches(df: pl.DataFrame, batch_size: int) -> list:
56 |         """
57 |         Splits a Polars DataFrame into batches.
58 |         """
59 |         n_chunks = (df.height + batch_size - 1) // batch_size
60 |         return [df.slice(i * batch_size, batch_size) for i in range(n_chunks)]
61 | 


--------------------------------------------------------------------------------
/pypi_scout/embeddings/simple_vector_database.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import polars as pl
 3 | from sentence_transformers import SentenceTransformer
 4 | from sklearn.metrics.pairwise import cosine_similarity
 5 | 
 6 | 
 7 | class SimpleVectorDatabase:
 8 |     def __init__(
 9 |         self,
10 |         embeddings_model: SentenceTransformer,
11 |         df_embeddings: pl.DataFrame,
12 |         embedding_column: str = "embeddings",
13 |         processed_column: str = "embeddings_array",
14 |     ):
15 |         """
16 |         Initializes the SimpleVectorDatabase with a SentenceTransformer model and a DataFrame containing embeddings.
17 | 
18 |         Args:
19 |             embeddings_model (SentenceTransformer): The SentenceTransformer model to generate embeddings.
20 |             df_embeddings (pl.DataFrame): The Polars DataFrame containing the initial embeddings.
21 |             embedding_column (str, optional): The name of the column containing the original embeddings. Defaults to 'embeddings'.
22 |         """
23 |         self.embeddings_model = embeddings_model
24 |         self.df_embeddings = df_embeddings
25 |         self.embedding_column = embedding_column
26 |         self.embeddings_matrix = self._create_embeddings_matrix()
27 | 
28 |     def find_similar(self, query: str, top_k: int = 25) -> pl.DataFrame:
29 |         """
30 |         Finds the top_k most similar vectors in the database for a given query.
31 | 
32 |         Args:
33 |             query (str): The query string to find similar vectors for.
34 |             top_k (int, optional): The number of similar vectors to retrieve. Defaults to 25.
35 | 
36 |         Returns:
37 |             pl.DataFrame: A Polars DataFrame containing the most similar vectors and their similarity scores.
38 |         """
39 |         query_embedding = self.embeddings_model.encode(query, show_progress_bar=False)
40 | 
41 |         similarities = cosine_similarity([query_embedding], self.embeddings_matrix)[0]
42 | 
43 |         top_k_indices = np.argsort(similarities)[::-1][:top_k]
44 |         top_k_scores = similarities[top_k_indices]
45 |         df_best_matches = self.df_embeddings[top_k_indices]
46 | 
47 |         df_best_matches = df_best_matches.with_columns(pl.Series("similarity", top_k_scores))
48 |         df_best_matches = df_best_matches.drop(self.embedding_column)
49 | 
50 |         return df_best_matches
51 | 
52 |     def _create_embeddings_matrix(self) -> np.ndarray:
53 |         return np.stack(
54 |             self.df_embeddings[self.embedding_column].apply(lambda x: np.array(x, dtype=np.float32)).to_numpy()
55 |         )
56 | 


--------------------------------------------------------------------------------
/pypi_scout/scripts/create_vector_embeddings.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | 
 4 | import polars as pl
 5 | from dotenv import load_dotenv
 6 | from sentence_transformers import SentenceTransformer
 7 | 
 8 | from pypi_scout.config import Config
 9 | from pypi_scout.embeddings.embeddings_creator import VectorEmbeddingCreator
10 | from pypi_scout.utils.logging import setup_logging
11 | 
12 | 
13 | def read_processed_dataset(path_to_processed_dataset: Path):
14 |     logging.info("📂 Reading the processed dataset...")
15 |     df = pl.read_csv(path_to_processed_dataset)
16 |     logging.info(f"📊 Number of rows in the processed dataset: {len(df):,}")
17 |     return df
18 | 
19 | 
20 | def write_parquet(df: pl.DataFrame, processed_dataset_path: Path):
21 |     logging.info(f"Storing dataset in {processed_dataset_path}...")
22 |     df.write_parquet(processed_dataset_path)
23 |     logging.info("✅ Done!")
24 | 
25 | 
26 | def create_vector_embeddings():
27 |     setup_logging()
28 |     load_dotenv()
29 | 
30 |     config = Config()
31 |     df = read_processed_dataset(config.DATA_DIR / config.PROCESSED_DATASET_CSV_NAME)
32 |     df = df.with_columns(
33 |         summary_and_description_cleaned=pl.concat_str(pl.col("summary"), pl.lit(" - "), pl.col("description_cleaned"))
34 |     )
35 |     df = VectorEmbeddingCreator(embeddings_model=SentenceTransformer(config.EMBEDDINGS_MODEL_NAME)).add_embeddings(
36 |         df, text_column="summary_and_description_cleaned"
37 |     )
38 | 
39 |     df = df.select("name", "embeddings").unique(subset="name")
40 |     write_parquet(df, config.DATA_DIR / config.EMBEDDINGS_PARQUET_NAME)
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     setup_logging()
45 |     create_vector_embeddings()
46 | 


--------------------------------------------------------------------------------
/pypi_scout/scripts/download_raw_dataset.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import gdown
 4 | from dotenv import load_dotenv
 5 | 
 6 | from pypi_scout.config import Config
 7 | from pypi_scout.utils.logging import setup_logging
 8 | 
 9 | 
10 | def download_raw_dataset():
11 |     """
12 |     Downloads the dataset from a Google Drive link using the gdown library.
13 |     """
14 |     load_dotenv()
15 |     config = Config()
16 | 
17 |     target_path = config.DATA_DIR / config.RAW_DATASET_CSV_NAME
18 |     if target_path.exists():
19 |         if not config.OVERWRITE:
20 |             logging.info(f"🔹 Raw dataset {target_path} from Google Drive already exists! Skipping download.")
21 |             return
22 |         else:
23 |             logging.info(
24 |                 f"⤵️  Raw dataset {target_path} from Google Drive exists, but config.OVERWRITE is `true`. Overwriting..."
25 |             )
26 | 
27 |     logging.info(f"⬇️ Downloading raw dataset from Google Drive to {target_path}...")
28 |     url = f"https://drive.google.com/uc?id={config.GOOGLE_FILE_ID}"
29 |     gdown.download(url, str(target_path), quiet=False)
30 |     logging.info("✅ Done!")
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     setup_logging()
35 |     download_raw_dataset()
36 | 


--------------------------------------------------------------------------------
/pypi_scout/scripts/process_raw_dataset.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import polars as pl
 4 | from dotenv import load_dotenv
 5 | 
 6 | from pypi_scout.config import Config
 7 | from pypi_scout.data.description_cleaner import CLEANING_FAILED, DescriptionCleaner
 8 | from pypi_scout.data.raw_data_reader import RawDataReader
 9 | from pypi_scout.utils.logging import setup_logging
10 | 
11 | 
12 | def read_raw_dataset(path_to_raw_dataset):
13 |     logging.info("📂 Reading the raw dataset...")
14 |     df = RawDataReader(path_to_raw_dataset).read()
15 |     logging.info(f"📊 Number of rows in the raw dataset: {len(df):,}")
16 |     logging.info(f"The highest weekly downloads in the raw dataset: {df['weekly_downloads'].max():,}")
17 |     logging.info(f"The lowest weekly downloads in the raw dataset: {df['weekly_downloads'].min():,}")
18 |     return df
19 | 
20 | 
21 | def filter_top_packages(df, frac_data_to_include):
22 |     logging.info(
23 |         f"Using only the packages with weekly_downloads in the top {frac_data_to_include * 100}% of the dataset because config.FRAC_DATA_TO_INCLUDE is set to {frac_data_to_include}!"
24 |     )
25 |     logging.info(
26 |         "This means packages with low download counts are excluded from the results in the dashboard. To include more data, set config.FRAC_DATA_TO_INCLUDE to a higher value."
27 |     )
28 |     df = df.sort("weekly_downloads", descending=True)
29 |     df = df.head(round(frac_data_to_include * len(df)))
30 | 
31 |     logging.info(f"📊 Number of rows after filtering: {len(df):,}")
32 |     logging.info(f"The highest weekly downloads in the filtered dataset: {df['weekly_downloads'].max():,}")
33 |     logging.info(f"The lowest weekly downloads in the filtered dataset: {df['weekly_downloads'].min():,}")
34 |     return df
35 | 
36 | 
37 | def clean_descriptions(df):
38 |     logging.info("🧹 Cleaning the descriptions...")
39 |     df = DescriptionCleaner().clean(df, "description", "description_cleaned")
40 |     df = df.filter(~pl.col("description_cleaned").is_null())
41 |     df = df.filter(pl.col("description_cleaned") != CLEANING_FAILED)
42 |     return df
43 | 
44 | 
45 | def write_csv(df, processed_dataset_path):
46 |     logging.info(f"Storing dataset in {processed_dataset_path}...")
47 |     df.write_csv(processed_dataset_path)
48 |     logging.info("✅ Done!")
49 | 
50 | 
51 | def process_raw_dataset():
52 |     load_dotenv()
53 |     config = Config()
54 |     df = read_raw_dataset(config.DATA_DIR / config.RAW_DATASET_CSV_NAME)
55 |     if config.FRAC_DATA_TO_INCLUDE < 1.0:
56 |         df = filter_top_packages(df, config.FRAC_DATA_TO_INCLUDE)
57 |     df = clean_descriptions(df)
58 | 
59 |     write_csv(df, config.DATA_DIR / config.PROCESSED_DATASET_CSV_NAME)
60 |     write_csv(df.select(["name", "summary", "weekly_downloads"]), config.DATA_DIR / config.DATASET_FOR_API_CSV_NAME)
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     setup_logging()
65 |     process_raw_dataset()
66 | 


--------------------------------------------------------------------------------
/pypi_scout/scripts/setup.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from pypi_scout.scripts.create_vector_embeddings import create_vector_embeddings
 4 | from pypi_scout.scripts.download_raw_dataset import download_raw_dataset
 5 | from pypi_scout.scripts.process_raw_dataset import process_raw_dataset
 6 | from pypi_scout.scripts.upload_processed_datasets import upload_processed_datasets
 7 | from pypi_scout.utils.logging import setup_logging
 8 | 
 9 | 
10 | def main():
11 |     setup_logging()
12 | 
13 |     logging.info("\n\nDOWNLOADING RAW DATASET -------------\n")
14 |     download_raw_dataset()
15 | 
16 |     logging.info("\n\nPROCESSING RAW DATASET -------------\n")
17 |     process_raw_dataset()
18 | 
19 |     logging.info("\n\nCREATING VECTOR EMBEDDINGS -------------\n")
20 |     create_vector_embeddings()
21 | 
22 |     logging.info("\n\nUPLOADING PROCESSED DATASETS -------------\n")
23 |     upload_processed_datasets()
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     main()
28 | 


--------------------------------------------------------------------------------
/pypi_scout/scripts/upload_processed_datasets.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from dotenv import load_dotenv
 4 | 
 5 | from pypi_scout.config import Config, StorageBackend
 6 | from pypi_scout.utils.blob_io import BlobIO
 7 | from pypi_scout.utils.logging import setup_logging
 8 | 
 9 | 
10 | def upload_processed_datasets():
11 |     load_dotenv()
12 |     config = Config()
13 | 
14 |     if config.STORAGE_BACKEND != StorageBackend.BLOB:
15 |         logging.info(
16 |             "Not using BLOB backend. Skipping upload. To enable, configure the `STORAGE_BACKEND_` variables in config"
17 |         )
18 |         return
19 | 
20 |     file_names = [config.PROCESSED_DATASET_CSV_NAME, config.DATASET_FOR_API_CSV_NAME, config.EMBEDDINGS_PARQUET_NAME]
21 | 
22 |     blob_io = BlobIO(
23 |         config.STORAGE_BACKEND_BLOB_ACCOUNT_NAME,
24 |         config.STORAGE_BACKEND_BLOB_CONTAINER_NAME,
25 |         config.STORAGE_BACKEND_BLOB_KEY,
26 |     )
27 | 
28 |     for file_name in file_names:
29 |         logging.info(f"💫 Uploading {file_name} to blob container `{config.STORAGE_BACKEND_BLOB_CONTAINER_NAME}`...")
30 |         blob_io.upload_local_file(config.DATA_DIR / file_name, file_name)
31 | 
32 |     logging.info("✅ Done!")
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     setup_logging()
37 |     upload_processed_datasets()
38 | 


--------------------------------------------------------------------------------
/pypi_scout/utils/blob_io.py:
--------------------------------------------------------------------------------
 1 | import tempfile
 2 | from enum import Enum
 3 | 
 4 | import polars as pl
 5 | from azure.storage.blob import BlobServiceClient
 6 | 
 7 | 
 8 | class Format(Enum):
 9 |     CSV = "csv"
10 |     PARQUET = "parquet"
11 | 
12 | 
13 | class BlobIO:
14 |     def __init__(self, account_name: str, container_name: str, account_key: str):
15 |         self.account_name = account_name
16 |         self.container_name = container_name
17 |         self.account_key = account_key
18 |         self.service_client = BlobServiceClient(
19 |             account_url=f"https://{account_name}.blob.core.windows.net", credential=account_key
20 |         )
21 |         self.container_client = self.service_client.get_container_client(container_name)
22 | 
23 |     def upload_local_file(self, local_file_path: str, blob_name: str) -> None:
24 |         with open(local_file_path, "rb") as data:
25 |             blob_client = self.container_client.get_blob_client(blob_name)
26 |             blob_client.upload_blob(data, overwrite=True)
27 | 
28 |     def download_csv_to_df(self, blob_name: str):
29 |         return self._download_as_df(blob_name, Format.CSV)
30 | 
31 |     def download_parquet_to_df(self, blob_name: str):
32 |         return self._download_as_df(blob_name, Format.PARQUET)
33 | 
34 |     def _download_as_df(self, blob_name: str, format: Format) -> pl.DataFrame:  # noqa: A002
35 |         """
36 |         //TODO: Improve by not reading into a file first.
37 |         """
38 |         blob_client = self.container_client.get_blob_client(blob_name)
39 |         download_stream = blob_client.download_blob()
40 | 
41 |         with tempfile.NamedTemporaryFile(delete=True) as temp_file:
42 |             temp_file.write(download_stream.readall())
43 |             temp_file.flush()
44 | 
45 |             if format == Format.CSV:
46 |                 return pl.read_csv(temp_file.name)
47 | 
48 |             if format == Format.PARQUET:
49 |                 return pl.read_parquet(temp_file.name)
50 | 
51 |     def exists(self, blob_name):
52 |         blob_client = self.container_client.get_blob_client(blob_name)
53 |         return blob_client.exists()
54 | 


--------------------------------------------------------------------------------
/pypi_scout/utils/logging.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | 
 4 | def setup_logging() -> None:
 5 |     logging.getLogger("azure").setLevel(logging.WARNING)
 6 | 
 7 |     logging.basicConfig(
 8 |         level=logging.INFO,
 9 |         format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
10 |         handlers=[logging.StreamHandler()],
11 |     )
12 | 


--------------------------------------------------------------------------------
/pypi_scout/utils/score_calculator.py:
--------------------------------------------------------------------------------
 1 | import polars as pl
 2 | 
 3 | 
 4 | def calculate_score(
 5 |     df: pl.DataFrame, weight_similarity: float = 0.5, weight_weekly_downloads: float = 0.5
 6 | ) -> pl.DataFrame:
 7 |     """
 8 |     Calculate a combined score for packages based on similarity and weekly downloads.
 9 | 
10 |     This function normalizes the 'similarity' and 'weekly_downloads' columns to a [0, 1] scale,
11 |     and computes a combined score using the provided weights for similarity and weekly downloads.
12 |     The combined score helps in recommending packages that are both popular and relevant based on similarity.
13 | 
14 |     Args:
15 |         df (pl.DataFrame): DataFrame containing 'similarity' and 'weekly_downloads' columns.
16 |         weight_similarity (float): Weight for the similarity score in the combined score calculation. Default is 0.5.
17 |         weight_weekly_downloads (float): Weight for the weekly downloads score in the combined score calculation. Default is 0.5.
18 | 
19 |     Returns:
20 |         pl.DataFrame: DataFrame with the combined score and sorted by this score in descending order.
21 |     """
22 |     df = df.with_columns(
23 |         log_weekly_downloads=pl.col("weekly_downloads").log1p()  # log1p is log(1 + x)
24 |     )
25 | 
26 |     df = df.with_columns(
27 |         normalized_similarity=(pl.col("similarity") - pl.col("similarity").min())
28 |         / (pl.col("similarity").max() - pl.col("similarity").min()),
29 |         normalized_log_weekly_downloads=(pl.col("log_weekly_downloads") - pl.col("log_weekly_downloads").min())
30 |         / (pl.col("log_weekly_downloads").max() - pl.col("log_weekly_downloads").min()),
31 |     )
32 | 
33 |     df = df.with_columns(
34 |         score=weight_similarity * pl.col("normalized_similarity")
35 |         + weight_weekly_downloads * pl.col("normalized_log_weekly_downloads")
36 |     )
37 | 
38 |     df = df.sort("score", descending=True)
39 |     return df
40 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [tool.poetry]
  2 | name = "pypi_scout"
  3 | version = "0.0.1"
  4 | description = "PyPI Scout helps you find PyPI packages through natural language prompts with the help of Large Language Models (LLMs)."
  5 | authors = ["Florian Maas <ffpgmaas@gmail.com>"]
  6 | repository = "https://github.com/fpgmaas/pypi-scout"
  7 | documentation = "https://fpgmaas.github.io/pypi-scout/"
  8 | readme = "README.md"
  9 | packages = [
 10 |   {include = "pypi_scout"}
 11 | ]
 12 | 
 13 | [tool.poetry.dependencies]
 14 | python = ">=3.9,<4.0"
 15 | beautifulsoup4 = "^4.12.3"
 16 | polars = "^0.20.31"
 17 | sentence-transformers = "^3.0.1"
 18 | lxml = "^5.2.2"
 19 | python-dotenv = "^1.0.1"
 20 | tqdm = "^4.66.4"
 21 | fastapi = "^0.111.0"
 22 | pydantic = "^2.7.4"
 23 | uvicorn = "^0.30.1"
 24 | gdown = "^5.2.0"
 25 | azure-storage-blob = "^12.20.0"
 26 | slowapi = "^0.1.9"
 27 | starlette = "^0.37.2"
 28 | numpy = "^2.0.0"
 29 | scikit-learn = "^1.5.0"
 30 | 
 31 | [tool.poetry.group.dev.dependencies]
 32 | pytest = "^7.2.0"
 33 | pytest-cov = "^4.0.0"
 34 | pytest-mock = "^3.14.0"
 35 | deptry = "^0.12.0"
 36 | pre-commit = "^3.4.0"
 37 | tox = "^4.11.1"
 38 | 
 39 | [build-system]
 40 | requires = ["poetry-core>=1.0.0"]
 41 | build-backend = "poetry.core.masonry.api"
 42 | 
 43 | [tool.pytest.ini_options]
 44 | testpaths = ["tests"]
 45 | 
 46 | [tool.ruff]
 47 | target-version = "py37"
 48 | line-length = 120
 49 | fix = true
 50 | select = [
 51 |     # flake8-2020
 52 |     "YTT",
 53 |     # flake8-bandit
 54 |     "S",
 55 |     # flake8-bugbear
 56 |     "B",
 57 |     # flake8-builtins
 58 |     "A",
 59 |     # flake8-comprehensions
 60 |     "C4",
 61 |     # flake8-debugger
 62 |     "T10",
 63 |     # flake8-simplify
 64 |     "SIM",
 65 |     # isort
 66 |     "I",
 67 |     # mccabe
 68 |     "C90",
 69 |     # pycodestyle
 70 |     "E", "W",
 71 |     # pyflakes
 72 |     "F",
 73 |     # pygrep-hooks
 74 |     "PGH",
 75 |     # pyupgrade
 76 |     "UP",
 77 |     # ruff
 78 |     "RUF",
 79 |     # tryceratops
 80 |     "TRY",
 81 | ]
 82 | ignore = [
 83 |     # LineTooLong
 84 |     "E501",
 85 |     # DoNotAssignLambda
 86 |     "E731",
 87 | ]
 88 | 
 89 | [tool.ruff.format]
 90 | preview = true
 91 | 
 92 | [tool.coverage.report]
 93 | skip_empty = true
 94 | 
 95 | [tool.coverage.run]
 96 | branch = true
 97 | source = ["pypi_scout"]
 98 | 
 99 | 
100 | [tool.ruff.per-file-ignores]
101 | "tests/*" = ["S101"]
102 | 
103 | [tool.deptry]
104 | extend_exclude = [
105 |     "frontend"
106 | ]
107 | 
108 | [tool.deptry.per_rule_ignores]
109 | DEP002 = ["lxml", "uvicorn"]
110 | 


--------------------------------------------------------------------------------
/requirements-cpu.txt:
--------------------------------------------------------------------------------
 1 | # This file is used in DockerfileCPU. It installs torch without GPU support and without the NVIDIA package.
 2 | # This disables GPU support in the container, but reduces the size drastically (multiple GB's.)
 3 | beautifulsoup4==4.12.3
 4 | polars==0.20.31
 5 | sentence-transformers==3.0.1
 6 | lxml==5.2.2
 7 | python-dotenv==1.0.1
 8 | tqdm==4.66.4
 9 | fastapi==0.111.0
10 | pydantic==2.7.4
11 | uvicorn==0.30.1
12 | gdown==5.2.0
13 | torch==2.0.1
14 | numpy==1.24.4
15 | azure-storage-blob==12.20.0
16 | slowapi==0.1.9
17 | starlette==0.37.2
18 | scikit-learn==1.5.0
19 | --index-url=https://download.pytorch.org/whl/cpu
20 | --extra-index-url=https://pypi.org/simple
21 | 


--------------------------------------------------------------------------------
/static/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgmaas/pypi-scout/593a48a2512a14c350bae98b087cd861d94a0c6b/static/demo.gif


--------------------------------------------------------------------------------
/static/pypi-light.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="600" zoomAndPan="magnify" viewBox="0 0 450 225" height="300" preserveAspectRatio="xMidYMid meet" version="1.0"><defs><g/><clipPath id="dd88cb90f9"><path d="M 36.671875 60.503906 L 117 60.503906 L 117 141 L 36.671875 141 Z M 36.671875 60.503906 " clip-rule="nonzero"/></clipPath><clipPath id="89cd03b7e5"><path d="M 116 138 L 155.921875 138 L 155.921875 179 L 116 179 Z M 116 138 " clip-rule="nonzero"/></clipPath></defs><g clip-path="url(#dd88cb90f9)"><path fill="#f8ab37" d="M 76.570312 140.210938 C 54.59375 140.210938 36.710938 122.335938 36.710938 100.355469 C 36.710938 78.378906 54.589844 60.5 76.570312 60.5 C 98.546875 60.5 116.425781 78.378906 116.425781 100.355469 C 116.425781 122.335938 98.542969 140.210938 76.570312 140.210938 Z M 76.570312 67.054688 C 75.476562 67.054688 74.390625 67.109375 73.304688 67.214844 C 72.21875 67.324219 71.140625 67.480469 70.070312 67.695312 C 69.003906 67.90625 67.945312 68.171875 66.902344 68.488281 C 65.859375 68.804688 64.832031 69.171875 63.824219 69.589844 C 62.816406 70.007812 61.832031 70.472656 60.871094 70.988281 C 59.910156 71.5 58.972656 72.0625 58.066406 72.667969 C 57.160156 73.273438 56.285156 73.921875 55.441406 74.613281 C 54.601562 75.304688 53.792969 76.039062 53.019531 76.808594 C 52.25 77.578125 51.519531 78.386719 50.828125 79.230469 C 50.136719 80.074219 49.484375 80.949219 48.878906 81.855469 C 48.273438 82.761719 47.714844 83.695312 47.199219 84.65625 C 46.6875 85.621094 46.21875 86.605469 45.800781 87.613281 C 45.386719 88.621094 45.019531 89.644531 44.703125 90.6875 C 44.386719 91.734375 44.121094 92.789062 43.90625 93.859375 C 43.695312 94.929688 43.535156 96.007812 43.429688 97.09375 C 43.320312 98.175781 43.269531 99.265625 43.269531 100.355469 C 43.269531 101.445312 43.320312 102.535156 43.429688 103.621094 C 43.535156 104.707031 43.695312 105.78125 43.90625 106.851562 C 44.121094 107.921875 44.386719 108.980469 44.703125 110.023438 C 45.019531 111.066406 45.386719 112.09375 45.800781 113.101562 C 46.21875 114.109375 46.6875 115.09375 47.199219 116.054688 C 47.714844 117.015625 48.273438 117.949219 48.878906 118.855469 C 49.484375 119.765625 50.136719 120.640625 50.828125 121.480469 C 51.519531 122.324219 52.25 123.132812 53.019531 123.902344 C 53.792969 124.675781 54.601562 125.40625 55.441406 126.097656 C 56.285156 126.789062 57.160156 127.4375 58.066406 128.042969 C 58.972656 128.652344 59.910156 129.210938 60.871094 129.726562 C 61.832031 130.238281 62.816406 130.703125 63.824219 131.121094 C 64.832031 131.539062 65.859375 131.90625 66.902344 132.222656 C 67.945312 132.539062 69.003906 132.804688 70.070312 133.015625 C 71.140625 133.230469 72.21875 133.390625 73.304688 133.496094 C 74.390625 133.605469 75.476562 133.65625 76.570312 133.65625 C 77.660156 133.65625 78.746094 133.605469 79.832031 133.496094 C 80.917969 133.390625 81.996094 133.230469 83.066406 133.015625 C 84.136719 132.804688 85.191406 132.539062 86.234375 132.222656 C 87.277344 131.90625 88.304688 131.539062 89.3125 131.121094 C 90.320312 130.703125 91.304688 130.238281 92.265625 129.726562 C 93.230469 129.210938 94.164062 128.652344 95.070312 128.042969 C 95.976562 127.4375 96.851562 126.789062 97.695312 126.097656 C 98.539062 125.40625 99.34375 124.675781 100.117188 123.902344 C 100.886719 123.132812 101.617188 122.324219 102.3125 121.480469 C 103.003906 120.640625 103.652344 119.765625 104.257812 118.855469 C 104.863281 117.949219 105.421875 117.015625 105.9375 116.054688 C 106.453125 115.09375 106.917969 114.109375 107.335938 113.101562 C 107.753906 112.09375 108.121094 111.066406 108.4375 110.023438 C 108.753906 108.980469 109.015625 107.921875 109.230469 106.851562 C 109.441406 105.78125 109.601562 104.707031 109.710938 103.621094 C 109.816406 102.535156 109.871094 101.445312 109.871094 100.355469 C 109.867188 99.265625 109.8125 98.179688 109.707031 97.09375 C 109.597656 96.007812 109.4375 94.933594 109.222656 93.863281 C 109.007812 92.792969 108.742188 91.738281 108.425781 90.695312 C 108.109375 89.652344 107.742188 88.625 107.324219 87.621094 C 106.90625 86.613281 106.441406 85.628906 105.925781 84.667969 C 105.410156 83.707031 104.851562 82.773438 104.246094 81.867188 C 103.636719 80.960938 102.988281 80.085938 102.296875 79.242188 C 101.605469 78.402344 100.875 77.59375 100.105469 76.824219 C 99.332031 76.050781 98.527344 75.320312 97.683594 74.628906 C 96.839844 73.9375 95.964844 73.289062 95.058594 72.683594 C 94.152344 72.078125 93.21875 71.515625 92.257812 71.003906 C 91.296875 70.488281 90.3125 70.023438 89.304688 69.605469 C 88.296875 69.1875 87.273438 68.820312 86.230469 68.503906 C 85.1875 68.183594 84.132812 67.917969 83.0625 67.707031 C 81.992188 67.492188 80.917969 67.332031 79.832031 67.222656 C 78.746094 67.117188 77.660156 67.0625 76.570312 67.058594 Z M 76.570312 67.054688 " fill-opacity="1" fill-rule="nonzero"/></g><path fill="#f8ab37" d="M 105.347656 144.519531 C 104.988281 144.519531 104.632812 144.46875 104.285156 144.363281 C 103.9375 144.257812 103.613281 144.105469 103.3125 143.902344 C 103.011719 143.703125 102.746094 143.460938 102.515625 143.183594 C 102.285156 142.902344 102.097656 142.597656 101.960938 142.261719 C 101.820312 141.925781 101.734375 141.578125 101.695312 141.21875 C 101.660156 140.859375 101.675781 140.5 101.746094 140.144531 C 101.816406 139.789062 101.9375 139.449219 102.105469 139.128906 C 102.273438 138.808594 102.488281 138.519531 102.742188 138.265625 L 112.894531 128.039062 C 113.058594 127.851562 113.242188 127.679688 113.441406 127.53125 C 113.640625 127.378906 113.851562 127.25 114.078125 127.144531 C 114.304688 127.039062 114.539062 126.953125 114.78125 126.894531 C 115.023438 126.835938 115.273438 126.800781 115.523438 126.792969 C 115.769531 126.785156 116.019531 126.800781 116.265625 126.84375 C 116.511719 126.886719 116.753906 126.953125 116.984375 127.042969 C 117.21875 127.132812 117.441406 127.246094 117.648438 127.382812 C 117.859375 127.519531 118.050781 127.675781 118.230469 127.851562 C 118.410156 128.027344 118.566406 128.21875 118.707031 128.425781 C 118.84375 128.636719 118.960938 128.855469 119.054688 129.085938 C 119.148438 129.320312 119.214844 129.558594 119.261719 129.804688 C 119.304688 130.050781 119.324219 130.296875 119.320312 130.546875 C 119.3125 130.796875 119.28125 131.042969 119.226562 131.289062 C 119.171875 131.53125 119.089844 131.765625 118.984375 131.996094 C 118.882812 132.222656 118.753906 132.4375 118.605469 132.636719 C 118.457031 132.839844 118.292969 133.023438 118.105469 133.191406 L 107.957031 143.417969 C 107.242188 144.148438 106.371094 144.515625 105.347656 144.519531 Z M 105.347656 144.519531 " fill-opacity="1" fill-rule="nonzero"/><g clip-path="url(#89cd03b7e5)"><path fill="#f8ab37" d="M 149.394531 178.945312 C 148.546875 178.945312 147.734375 178.78125 146.953125 178.453125 C 146.171875 178.125 145.484375 177.65625 144.894531 177.050781 L 117.886719 149.390625 C 117.597656 149.097656 117.339844 148.777344 117.113281 148.429688 C 116.886719 148.082031 116.699219 147.71875 116.546875 147.335938 C 116.390625 146.949219 116.277344 146.554688 116.199219 146.148438 C 116.125 145.742188 116.089844 145.332031 116.09375 144.921875 C 116.097656 144.507812 116.144531 144.097656 116.230469 143.695312 C 116.316406 143.289062 116.4375 142.898438 116.601562 142.519531 C 116.761719 142.136719 116.960938 141.777344 117.195312 141.4375 C 117.429688 141.09375 117.695312 140.78125 117.988281 140.492188 C 118.285156 140.203125 118.605469 139.945312 118.953125 139.71875 C 119.296875 139.496094 119.664062 139.304688 120.046875 139.152344 C 120.429688 138.996094 120.824219 138.882812 121.230469 138.808594 C 121.636719 138.730469 122.046875 138.695312 122.460938 138.699219 C 122.875 138.703125 123.28125 138.75 123.6875 138.835938 C 124.09375 138.921875 124.484375 139.042969 124.863281 139.207031 C 125.242188 139.371094 125.605469 139.566406 125.945312 139.800781 C 126.285156 140.035156 126.601562 140.300781 126.890625 140.59375 L 153.898438 168.253906 C 154.332031 168.695312 154.691406 169.191406 154.976562 169.742188 C 155.261719 170.289062 155.464844 170.871094 155.578125 171.476562 C 155.695312 172.085938 155.71875 172.699219 155.652344 173.3125 C 155.589844 173.929688 155.4375 174.523438 155.195312 175.09375 C 154.957031 175.664062 154.636719 176.1875 154.242188 176.664062 C 153.847656 177.140625 153.390625 177.550781 152.875 177.894531 C 152.359375 178.234375 151.804688 178.496094 151.214844 178.675781 C 150.621094 178.855469 150.015625 178.945312 149.394531 178.945312 Z M 149.394531 178.945312 " fill-opacity="1" fill-rule="nonzero"/></g><path fill="#f8ab37" d="M 76.503906 128.429688 C 71.367188 128.429688 64.582031 126.957031 57.320312 121.371094 C 50.941406 116.464844 48.003906 108.921875 48.816406 99.558594 C 49.207031 95.230469 50.238281 91.054688 51.90625 87.039062 C 52.015625 86.804688 52.164062 86.597656 52.351562 86.417969 C 52.542969 86.242188 52.757812 86.105469 53.003906 86.015625 C 53.246094 85.921875 53.496094 85.882812 53.757812 85.890625 C 54.015625 85.898438 54.265625 85.953125 54.5 86.058594 C 54.738281 86.164062 54.945312 86.3125 55.125 86.5 C 55.304688 86.691406 55.441406 86.90625 55.535156 87.148438 C 55.625 87.390625 55.671875 87.640625 55.664062 87.902344 C 55.65625 88.160156 55.601562 88.410156 55.496094 88.648438 C 54.003906 92.292969 53.082031 96.074219 52.726562 99.996094 C 52.054688 108.019531 54.40625 114.152344 59.714844 118.246094 C 72.828125 128.335938 84.035156 123.121094 84.148438 123.066406 C 84.386719 122.945312 84.632812 122.878906 84.898438 122.859375 C 85.160156 122.839844 85.417969 122.871094 85.667969 122.957031 C 85.921875 123.042969 86.144531 123.171875 86.34375 123.347656 C 86.539062 123.519531 86.695312 123.726562 86.8125 123.964844 C 86.925781 124.203125 86.992188 124.457031 87.003906 124.71875 C 87.019531 124.984375 86.980469 125.238281 86.890625 125.488281 C 86.800781 125.738281 86.664062 125.957031 86.484375 126.152344 C 86.308594 126.347656 86.097656 126.5 85.855469 126.609375 C 85.574219 126.753906 81.980469 128.429688 76.503906 128.429688 Z M 76.503906 128.429688 " fill-opacity="1" fill-rule="nonzero"/><g fill="#00275b" fill-opacity="1"><g transform="translate(232.209004, 175.045997)"><g><path d="M 9.34375 -37 C 9.34375 -42.21875 14.125 -45.90625 20.53125 -46.046875 L 20.53125 -49.875 C 11.84375 -49.65625 4.859375 -44.65625 4.859375 -36.78125 C 4.859375 -27.875 12.875 -25.59375 20.53125 -23.46875 L 20.53125 -28.171875 C 14.484375 -29.859375 9.34375 -31.703125 9.34375 -37 Z M 34.875 -39.875 L 37.65625 -42.953125 C 34.140625 -47 28.984375 -49.5 22.359375 -49.796875 L 22.359375 -45.96875 C 27.21875 -45.6875 31.625 -43.703125 34.875 -39.875 Z M 38.765625 -13.015625 C 38.765625 -22.875 30.234375 -25.453125 22.359375 -27.65625 L 22.359375 -22.953125 C 28.6875 -21.109375 34.359375 -19.046875 34.359375 -12.71875 C 34.359375 -8.609375 31.625 -3.234375 22.359375 -2.9375 L 22.359375 0.875 C 34.796875 0.515625 38.765625 -6.84375 38.765625 -13.015625 Z M 6.109375 -10.078125 L 3.390625 -6.921875 C 6.984375 -2.578125 12.796875 0.65625 20.53125 0.875 L 20.53125 -2.9375 C 13.984375 -3.234375 9.125 -6.625 6.109375 -10.078125 Z M 6.109375 -10.078125 "/></g></g></g><g fill="#00275b" fill-opacity="1"><g transform="translate(274.943809, 175.045997)"><g><path d="M 21.40625 -32.8125 C 25.3125 -32.8125 28.328125 -31.40625 30.890625 -27.875 L 33.765625 -30.375 C 30.890625 -33.984375 27.359375 -36.40625 21.1875 -36.40625 C 11.1875 -36.40625 4.484375 -28.90625 3.96875 -19.125 L 8.015625 -19.125 C 8.53125 -27 13.46875 -32.8125 21.40625 -32.8125 Z M 21.1875 0.875 C 27.359375 0.875 30.890625 -1.546875 33.765625 -5.21875 L 30.96875 -7.578125 C 28.390625 -4.046875 25.3125 -2.71875 21.40625 -2.71875 C 13.234375 -2.71875 8.234375 -8.90625 8.015625 -17.140625 L 3.96875 -17.140625 C 4.1875 -7.0625 10.890625 0.875 21.1875 0.875 Z M 21.1875 0.875 "/></g></g></g><g fill="#00275b" fill-opacity="1"><g transform="translate(311.279417, 175.045997)"><g><path d="M 3.96875 -17.796875 C 3.96875 -7.71875 10.078125 0.375 19.9375 0.875 L 19.9375 -2.578125 C 12.28125 -3.15625 8.015625 -10.15625 8.015625 -17.796875 C 8.015625 -25.375 12.28125 -32.375 19.9375 -32.875 L 19.9375 -36.40625 C 10.078125 -35.90625 3.96875 -27.8125 3.96875 -17.796875 Z M 38.109375 -17.796875 C 38.109375 -27.875 31.921875 -35.96875 21.921875 -36.40625 L 21.921875 -32.953125 C 29.71875 -32.4375 33.984375 -25.375 33.984375 -17.796875 C 33.984375 -10.15625 29.71875 -3.015625 21.921875 -2.578125 L 21.921875 0.875 C 31.921875 0.4375 38.109375 -7.71875 38.109375 -17.796875 Z M 38.109375 -17.796875 "/></g></g></g><g fill="#00275b" fill-opacity="1"><g transform="translate(353.278673, 175.045997)"><g><path d="M 17.734375 0.8125 C 21.78125 0.8125 25.59375 -1.328125 27.953125 -3.015625 L 27.953125 -7.875 C 25.59375 -5.59375 21.40625 -3.15625 18.46875 -3.15625 C 13.3125 -3.15625 9.9375 -5.734375 9.9375 -10.8125 L 9.9375 -35.53125 L 5.953125 -35.53125 L 5.953125 -9.859375 C 5.953125 -3.90625 11.1875 0.8125 17.734375 0.8125 Z M 33.765625 0 L 33.765625 -35.53125 L 29.796875 -35.53125 L 29.796875 0 Z M 33.765625 0 "/></g></g></g><g fill="#00275b" fill-opacity="1"><g transform="translate(392.997766, 175.045997)"><g><path d="M 6.6875 -35.53125 L 0.8125 -35.53125 L 0.8125 -32.140625 L 17.796875 -32.140625 L 17.796875 -35.53125 L 10.59375 -35.53125 L 10.59375 -45.234375 L 6.6875 -45.234375 Z M 13.6875 0.875 C 16.625 0.875 18.3125 0 19.71875 -1.328125 L 18.25 -4.265625 C 17.4375 -3.390625 15.96875 -2.578125 14.265625 -2.578125 C 11.765625 -2.578125 10.59375 -4.5625 10.59375 -7.4375 L 10.59375 -30.3125 L 6.6875 -30.3125 L 6.6875 -6.84375 C 6.6875 -1.984375 8.96875 0.875 13.6875 0.875 Z M 13.6875 0.875 "/></g></g></g><g fill="#00275b" fill-opacity="1"><g transform="translate(147.146117, 96.560447)"><g><path d="M 10.296875 -49.0625 L 6.109375 -49.0625 L 6.109375 0 L 10.296875 0 Z M 24.0625 -24.203125 L 12.0625 -24.203125 L 12.0625 -20.375 L 24.5 -20.375 C 33.390625 -20.375 39.203125 -27.4375 39.203125 -34.71875 C 39.203125 -43.109375 33.546875 -49.0625 24.5 -49.0625 L 12.0625 -49.0625 L 12.0625 -45.234375 L 24.0625 -45.234375 C 30.3125 -45.234375 34.796875 -41.421875 34.796875 -34.71875 C 34.796875 -29.28125 30.234375 -24.203125 24.0625 -24.203125 Z M 24.0625 -24.203125 "/></g></g></g><g fill="#00275b" fill-opacity="1"><g transform="translate(189.145373, 96.560447)"><g><path d="M 14.5625 -1.984375 L 16.625 -6.984375 L 4.78125 -35.53125 L 0.515625 -35.53125 Z M 4.265625 10.375 L 3.609375 13.90625 C 4.5625 14.203125 6.171875 14.421875 7.4375 14.421875 C 11.546875 14.421875 14.5625 12.28125 16.484375 7.796875 L 34.875 -35.53125 L 30.53125 -35.53125 L 12.796875 6.625 C 11.328125 10 9.703125 10.953125 7.359375 10.953125 C 6.46875 10.953125 5.078125 10.734375 4.265625 10.375 Z M 4.265625 10.375 "/></g></g></g><g fill="#00275b" fill-opacity="1"><g transform="translate(224.524792, 96.560447)"><g><path d="M 10.296875 -49.0625 L 6.109375 -49.0625 L 6.109375 0 L 10.296875 0 Z M 24.0625 -24.203125 L 12.0625 -24.203125 L 12.0625 -20.375 L 24.5 -20.375 C 33.390625 -20.375 39.203125 -27.4375 39.203125 -34.71875 C 39.203125 -43.109375 33.546875 -49.0625 24.5 -49.0625 L 12.0625 -49.0625 L 12.0625 -45.234375 L 24.0625 -45.234375 C 30.3125 -45.234375 34.796875 -41.421875 34.796875 -34.71875 C 34.796875 -29.28125 30.234375 -24.203125 24.0625 -24.203125 Z M 24.0625 -24.203125 "/></g></g></g><g fill="#00275b" fill-opacity="1"><g transform="translate(266.524049, 96.560447)"><g><path d="M 6.109375 -49.0625 L 6.109375 0 L 10.296875 0 L 10.296875 -49.0625 Z M 6.109375 -49.0625 "/></g></g></g></svg>
2 | 


--------------------------------------------------------------------------------
/static/pypi.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="600" zoomAndPan="magnify" viewBox="0 0 450 225" height="300" preserveAspectRatio="xMidYMid meet" version="1.0"><defs><g/><clipPath id="ab6f82b5d9"><path d="M 36.671875 60.503906 L 117 60.503906 L 117 141 L 36.671875 141 Z M 36.671875 60.503906 " clip-rule="nonzero"/></clipPath><clipPath id="d812b0708e"><path d="M 116 138 L 155.921875 138 L 155.921875 179 L 116 179 Z M 116 138 " clip-rule="nonzero"/></clipPath></defs><g clip-path="url(#ab6f82b5d9)"><path fill="#f8ab37" d="M 76.570312 140.210938 C 54.59375 140.210938 36.710938 122.335938 36.710938 100.355469 C 36.710938 78.378906 54.589844 60.5 76.570312 60.5 C 98.546875 60.5 116.425781 78.378906 116.425781 100.355469 C 116.425781 122.335938 98.542969 140.210938 76.570312 140.210938 Z M 76.570312 67.054688 C 75.476562 67.054688 74.390625 67.109375 73.304688 67.214844 C 72.21875 67.324219 71.140625 67.480469 70.070312 67.695312 C 69.003906 67.90625 67.945312 68.171875 66.902344 68.488281 C 65.859375 68.804688 64.832031 69.171875 63.824219 69.589844 C 62.816406 70.007812 61.832031 70.472656 60.871094 70.988281 C 59.910156 71.5 58.972656 72.0625 58.066406 72.667969 C 57.160156 73.273438 56.285156 73.921875 55.441406 74.613281 C 54.601562 75.304688 53.792969 76.039062 53.019531 76.808594 C 52.25 77.578125 51.519531 78.386719 50.828125 79.230469 C 50.136719 80.074219 49.484375 80.949219 48.878906 81.855469 C 48.273438 82.761719 47.714844 83.695312 47.199219 84.65625 C 46.6875 85.621094 46.21875 86.605469 45.800781 87.613281 C 45.386719 88.621094 45.019531 89.644531 44.703125 90.6875 C 44.386719 91.734375 44.121094 92.789062 43.90625 93.859375 C 43.695312 94.929688 43.535156 96.007812 43.429688 97.09375 C 43.320312 98.175781 43.269531 99.265625 43.269531 100.355469 C 43.269531 101.445312 43.320312 102.535156 43.429688 103.621094 C 43.535156 104.707031 43.695312 105.78125 43.90625 106.851562 C 44.121094 107.921875 44.386719 108.980469 44.703125 110.023438 C 45.019531 111.066406 45.386719 112.09375 45.800781 113.101562 C 46.21875 114.109375 46.6875 115.09375 47.199219 116.054688 C 47.714844 117.015625 48.273438 117.949219 48.878906 118.855469 C 49.484375 119.765625 50.136719 120.640625 50.828125 121.480469 C 51.519531 122.324219 52.25 123.132812 53.019531 123.902344 C 53.792969 124.675781 54.601562 125.40625 55.441406 126.097656 C 56.285156 126.789062 57.160156 127.4375 58.066406 128.042969 C 58.972656 128.652344 59.910156 129.210938 60.871094 129.726562 C 61.832031 130.238281 62.816406 130.703125 63.824219 131.121094 C 64.832031 131.539062 65.859375 131.90625 66.902344 132.222656 C 67.945312 132.539062 69.003906 132.804688 70.070312 133.015625 C 71.140625 133.230469 72.21875 133.390625 73.304688 133.496094 C 74.390625 133.605469 75.476562 133.65625 76.570312 133.65625 C 77.660156 133.65625 78.746094 133.605469 79.832031 133.496094 C 80.917969 133.390625 81.996094 133.230469 83.066406 133.015625 C 84.136719 132.804688 85.191406 132.539062 86.234375 132.222656 C 87.277344 131.90625 88.304688 131.539062 89.3125 131.121094 C 90.320312 130.703125 91.304688 130.238281 92.265625 129.726562 C 93.230469 129.210938 94.164062 128.652344 95.070312 128.042969 C 95.976562 127.4375 96.851562 126.789062 97.695312 126.097656 C 98.539062 125.40625 99.34375 124.675781 100.117188 123.902344 C 100.886719 123.132812 101.617188 122.324219 102.3125 121.480469 C 103.003906 120.640625 103.652344 119.765625 104.257812 118.855469 C 104.863281 117.949219 105.421875 117.015625 105.9375 116.054688 C 106.453125 115.09375 106.917969 114.109375 107.335938 113.101562 C 107.753906 112.09375 108.121094 111.066406 108.4375 110.023438 C 108.753906 108.980469 109.015625 107.921875 109.230469 106.851562 C 109.441406 105.78125 109.601562 104.707031 109.710938 103.621094 C 109.816406 102.535156 109.871094 101.445312 109.871094 100.355469 C 109.867188 99.265625 109.8125 98.179688 109.707031 97.09375 C 109.597656 96.007812 109.4375 94.933594 109.222656 93.863281 C 109.007812 92.792969 108.742188 91.738281 108.425781 90.695312 C 108.109375 89.652344 107.742188 88.625 107.324219 87.621094 C 106.90625 86.613281 106.441406 85.628906 105.925781 84.667969 C 105.410156 83.707031 104.851562 82.773438 104.246094 81.867188 C 103.636719 80.960938 102.988281 80.085938 102.296875 79.242188 C 101.605469 78.402344 100.875 77.59375 100.105469 76.824219 C 99.332031 76.050781 98.527344 75.320312 97.683594 74.628906 C 96.839844 73.9375 95.964844 73.289062 95.058594 72.683594 C 94.152344 72.078125 93.21875 71.515625 92.257812 71.003906 C 91.296875 70.488281 90.3125 70.023438 89.304688 69.605469 C 88.296875 69.1875 87.273438 68.820312 86.230469 68.503906 C 85.1875 68.183594 84.132812 67.917969 83.0625 67.707031 C 81.992188 67.492188 80.917969 67.332031 79.832031 67.222656 C 78.746094 67.117188 77.660156 67.0625 76.570312 67.058594 Z M 76.570312 67.054688 " fill-opacity="1" fill-rule="nonzero"/></g><path fill="#f8ab37" d="M 105.347656 144.519531 C 104.988281 144.519531 104.632812 144.46875 104.285156 144.363281 C 103.9375 144.257812 103.613281 144.105469 103.3125 143.902344 C 103.011719 143.703125 102.746094 143.460938 102.515625 143.183594 C 102.285156 142.902344 102.097656 142.597656 101.960938 142.261719 C 101.820312 141.925781 101.734375 141.578125 101.695312 141.21875 C 101.660156 140.859375 101.675781 140.5 101.746094 140.144531 C 101.816406 139.789062 101.9375 139.449219 102.105469 139.128906 C 102.273438 138.808594 102.488281 138.519531 102.742188 138.265625 L 112.894531 128.039062 C 113.058594 127.851562 113.242188 127.679688 113.441406 127.53125 C 113.640625 127.378906 113.851562 127.25 114.078125 127.144531 C 114.304688 127.039062 114.539062 126.953125 114.78125 126.894531 C 115.023438 126.835938 115.273438 126.800781 115.523438 126.792969 C 115.769531 126.785156 116.019531 126.800781 116.265625 126.84375 C 116.511719 126.886719 116.753906 126.953125 116.984375 127.042969 C 117.21875 127.132812 117.441406 127.246094 117.648438 127.382812 C 117.859375 127.519531 118.050781 127.675781 118.230469 127.851562 C 118.410156 128.027344 118.566406 128.21875 118.707031 128.425781 C 118.84375 128.636719 118.960938 128.855469 119.054688 129.085938 C 119.148438 129.320312 119.214844 129.558594 119.261719 129.804688 C 119.304688 130.050781 119.324219 130.296875 119.320312 130.546875 C 119.3125 130.796875 119.28125 131.042969 119.226562 131.289062 C 119.171875 131.53125 119.089844 131.765625 118.984375 131.996094 C 118.882812 132.222656 118.753906 132.4375 118.605469 132.636719 C 118.457031 132.839844 118.292969 133.023438 118.105469 133.191406 L 107.957031 143.417969 C 107.242188 144.148438 106.371094 144.515625 105.347656 144.519531 Z M 105.347656 144.519531 " fill-opacity="1" fill-rule="nonzero"/><g clip-path="url(#d812b0708e)"><path fill="#f8ab37" d="M 149.394531 178.945312 C 148.546875 178.945312 147.734375 178.78125 146.953125 178.453125 C 146.171875 178.125 145.484375 177.65625 144.894531 177.050781 L 117.886719 149.390625 C 117.597656 149.097656 117.339844 148.777344 117.113281 148.429688 C 116.886719 148.082031 116.699219 147.71875 116.546875 147.335938 C 116.390625 146.949219 116.277344 146.554688 116.199219 146.148438 C 116.125 145.742188 116.089844 145.332031 116.09375 144.921875 C 116.097656 144.507812 116.144531 144.097656 116.230469 143.695312 C 116.316406 143.289062 116.4375 142.898438 116.601562 142.519531 C 116.761719 142.136719 116.960938 141.777344 117.195312 141.4375 C 117.429688 141.09375 117.695312 140.78125 117.988281 140.492188 C 118.285156 140.203125 118.605469 139.945312 118.953125 139.71875 C 119.296875 139.496094 119.664062 139.304688 120.046875 139.152344 C 120.429688 138.996094 120.824219 138.882812 121.230469 138.808594 C 121.636719 138.730469 122.046875 138.695312 122.460938 138.699219 C 122.875 138.703125 123.28125 138.75 123.6875 138.835938 C 124.09375 138.921875 124.484375 139.042969 124.863281 139.207031 C 125.242188 139.371094 125.605469 139.566406 125.945312 139.800781 C 126.285156 140.035156 126.601562 140.300781 126.890625 140.59375 L 153.898438 168.253906 C 154.332031 168.695312 154.691406 169.191406 154.976562 169.742188 C 155.261719 170.289062 155.464844 170.871094 155.578125 171.476562 C 155.695312 172.085938 155.71875 172.699219 155.652344 173.3125 C 155.589844 173.929688 155.4375 174.523438 155.195312 175.09375 C 154.957031 175.664062 154.636719 176.1875 154.242188 176.664062 C 153.847656 177.140625 153.390625 177.550781 152.875 177.894531 C 152.359375 178.234375 151.804688 178.496094 151.214844 178.675781 C 150.621094 178.855469 150.015625 178.945312 149.394531 178.945312 Z M 149.394531 178.945312 " fill-opacity="1" fill-rule="nonzero"/></g><path fill="#f8ab37" d="M 76.503906 128.429688 C 71.367188 128.429688 64.582031 126.957031 57.320312 121.371094 C 50.941406 116.464844 48.003906 108.921875 48.816406 99.558594 C 49.207031 95.230469 50.238281 91.054688 51.90625 87.039062 C 52.015625 86.804688 52.164062 86.597656 52.351562 86.417969 C 52.542969 86.242188 52.757812 86.105469 53.003906 86.015625 C 53.246094 85.921875 53.496094 85.882812 53.757812 85.890625 C 54.015625 85.898438 54.265625 85.953125 54.5 86.058594 C 54.738281 86.164062 54.945312 86.3125 55.125 86.5 C 55.304688 86.691406 55.441406 86.90625 55.535156 87.148438 C 55.625 87.390625 55.671875 87.640625 55.664062 87.902344 C 55.65625 88.160156 55.601562 88.410156 55.496094 88.648438 C 54.003906 92.292969 53.082031 96.074219 52.726562 99.996094 C 52.054688 108.019531 54.40625 114.152344 59.714844 118.246094 C 72.828125 128.335938 84.035156 123.121094 84.148438 123.066406 C 84.386719 122.945312 84.632812 122.878906 84.898438 122.859375 C 85.160156 122.839844 85.417969 122.871094 85.667969 122.957031 C 85.921875 123.042969 86.144531 123.171875 86.34375 123.347656 C 86.539062 123.519531 86.695312 123.726562 86.8125 123.964844 C 86.925781 124.203125 86.992188 124.457031 87.003906 124.71875 C 87.019531 124.984375 86.980469 125.238281 86.890625 125.488281 C 86.800781 125.738281 86.664062 125.957031 86.484375 126.152344 C 86.308594 126.347656 86.097656 126.5 85.855469 126.609375 C 85.574219 126.753906 81.980469 128.429688 76.503906 128.429688 Z M 76.503906 128.429688 " fill-opacity="1" fill-rule="nonzero"/><g fill="#ffffff" fill-opacity="1"><g transform="translate(232.209004, 175.045997)"><g><path d="M 9.34375 -37 C 9.34375 -42.21875 14.125 -45.90625 20.53125 -46.046875 L 20.53125 -49.875 C 11.84375 -49.65625 4.859375 -44.65625 4.859375 -36.78125 C 4.859375 -27.875 12.875 -25.59375 20.53125 -23.46875 L 20.53125 -28.171875 C 14.484375 -29.859375 9.34375 -31.703125 9.34375 -37 Z M 34.875 -39.875 L 37.65625 -42.953125 C 34.140625 -47 28.984375 -49.5 22.359375 -49.796875 L 22.359375 -45.96875 C 27.21875 -45.6875 31.625 -43.703125 34.875 -39.875 Z M 38.765625 -13.015625 C 38.765625 -22.875 30.234375 -25.453125 22.359375 -27.65625 L 22.359375 -22.953125 C 28.6875 -21.109375 34.359375 -19.046875 34.359375 -12.71875 C 34.359375 -8.609375 31.625 -3.234375 22.359375 -2.9375 L 22.359375 0.875 C 34.796875 0.515625 38.765625 -6.84375 38.765625 -13.015625 Z M 6.109375 -10.078125 L 3.390625 -6.921875 C 6.984375 -2.578125 12.796875 0.65625 20.53125 0.875 L 20.53125 -2.9375 C 13.984375 -3.234375 9.125 -6.625 6.109375 -10.078125 Z M 6.109375 -10.078125 "/></g></g></g><g fill="#ffffff" fill-opacity="1"><g transform="translate(274.943809, 175.045997)"><g><path d="M 21.40625 -32.8125 C 25.3125 -32.8125 28.328125 -31.40625 30.890625 -27.875 L 33.765625 -30.375 C 30.890625 -33.984375 27.359375 -36.40625 21.1875 -36.40625 C 11.1875 -36.40625 4.484375 -28.90625 3.96875 -19.125 L 8.015625 -19.125 C 8.53125 -27 13.46875 -32.8125 21.40625 -32.8125 Z M 21.1875 0.875 C 27.359375 0.875 30.890625 -1.546875 33.765625 -5.21875 L 30.96875 -7.578125 C 28.390625 -4.046875 25.3125 -2.71875 21.40625 -2.71875 C 13.234375 -2.71875 8.234375 -8.90625 8.015625 -17.140625 L 3.96875 -17.140625 C 4.1875 -7.0625 10.890625 0.875 21.1875 0.875 Z M 21.1875 0.875 "/></g></g></g><g fill="#ffffff" fill-opacity="1"><g transform="translate(311.279417, 175.045997)"><g><path d="M 3.96875 -17.796875 C 3.96875 -7.71875 10.078125 0.375 19.9375 0.875 L 19.9375 -2.578125 C 12.28125 -3.15625 8.015625 -10.15625 8.015625 -17.796875 C 8.015625 -25.375 12.28125 -32.375 19.9375 -32.875 L 19.9375 -36.40625 C 10.078125 -35.90625 3.96875 -27.8125 3.96875 -17.796875 Z M 38.109375 -17.796875 C 38.109375 -27.875 31.921875 -35.96875 21.921875 -36.40625 L 21.921875 -32.953125 C 29.71875 -32.4375 33.984375 -25.375 33.984375 -17.796875 C 33.984375 -10.15625 29.71875 -3.015625 21.921875 -2.578125 L 21.921875 0.875 C 31.921875 0.4375 38.109375 -7.71875 38.109375 -17.796875 Z M 38.109375 -17.796875 "/></g></g></g><g fill="#ffffff" fill-opacity="1"><g transform="translate(353.278673, 175.045997)"><g><path d="M 17.734375 0.8125 C 21.78125 0.8125 25.59375 -1.328125 27.953125 -3.015625 L 27.953125 -7.875 C 25.59375 -5.59375 21.40625 -3.15625 18.46875 -3.15625 C 13.3125 -3.15625 9.9375 -5.734375 9.9375 -10.8125 L 9.9375 -35.53125 L 5.953125 -35.53125 L 5.953125 -9.859375 C 5.953125 -3.90625 11.1875 0.8125 17.734375 0.8125 Z M 33.765625 0 L 33.765625 -35.53125 L 29.796875 -35.53125 L 29.796875 0 Z M 33.765625 0 "/></g></g></g><g fill="#ffffff" fill-opacity="1"><g transform="translate(392.997766, 175.045997)"><g><path d="M 6.6875 -35.53125 L 0.8125 -35.53125 L 0.8125 -32.140625 L 17.796875 -32.140625 L 17.796875 -35.53125 L 10.59375 -35.53125 L 10.59375 -45.234375 L 6.6875 -45.234375 Z M 13.6875 0.875 C 16.625 0.875 18.3125 0 19.71875 -1.328125 L 18.25 -4.265625 C 17.4375 -3.390625 15.96875 -2.578125 14.265625 -2.578125 C 11.765625 -2.578125 10.59375 -4.5625 10.59375 -7.4375 L 10.59375 -30.3125 L 6.6875 -30.3125 L 6.6875 -6.84375 C 6.6875 -1.984375 8.96875 0.875 13.6875 0.875 Z M 13.6875 0.875 "/></g></g></g><g fill="#ffffff" fill-opacity="1"><g transform="translate(147.146117, 96.560447)"><g><path d="M 10.296875 -49.0625 L 6.109375 -49.0625 L 6.109375 0 L 10.296875 0 Z M 24.0625 -24.203125 L 12.0625 -24.203125 L 12.0625 -20.375 L 24.5 -20.375 C 33.390625 -20.375 39.203125 -27.4375 39.203125 -34.71875 C 39.203125 -43.109375 33.546875 -49.0625 24.5 -49.0625 L 12.0625 -49.0625 L 12.0625 -45.234375 L 24.0625 -45.234375 C 30.3125 -45.234375 34.796875 -41.421875 34.796875 -34.71875 C 34.796875 -29.28125 30.234375 -24.203125 24.0625 -24.203125 Z M 24.0625 -24.203125 "/></g></g></g><g fill="#ffffff" fill-opacity="1"><g transform="translate(189.145373, 96.560447)"><g><path d="M 14.5625 -1.984375 L 16.625 -6.984375 L 4.78125 -35.53125 L 0.515625 -35.53125 Z M 4.265625 10.375 L 3.609375 13.90625 C 4.5625 14.203125 6.171875 14.421875 7.4375 14.421875 C 11.546875 14.421875 14.5625 12.28125 16.484375 7.796875 L 34.875 -35.53125 L 30.53125 -35.53125 L 12.796875 6.625 C 11.328125 10 9.703125 10.953125 7.359375 10.953125 C 6.46875 10.953125 5.078125 10.734375 4.265625 10.375 Z M 4.265625 10.375 "/></g></g></g><g fill="#ffffff" fill-opacity="1"><g transform="translate(224.524792, 96.560447)"><g><path d="M 10.296875 -49.0625 L 6.109375 -49.0625 L 6.109375 0 L 10.296875 0 Z M 24.0625 -24.203125 L 12.0625 -24.203125 L 12.0625 -20.375 L 24.5 -20.375 C 33.390625 -20.375 39.203125 -27.4375 39.203125 -34.71875 C 39.203125 -43.109375 33.546875 -49.0625 24.5 -49.0625 L 12.0625 -49.0625 L 12.0625 -45.234375 L 24.0625 -45.234375 C 30.3125 -45.234375 34.796875 -41.421875 34.796875 -34.71875 C 34.796875 -29.28125 30.234375 -24.203125 24.0625 -24.203125 Z M 24.0625 -24.203125 "/></g></g></g><g fill="#ffffff" fill-opacity="1"><g transform="translate(266.524049, 96.560447)"><g><path d="M 6.109375 -49.0625 L 6.109375 0 L 10.296875 0 L 10.296875 -49.0625 Z M 6.109375 -49.0625 "/></g></g></g></svg>
2 | 


--------------------------------------------------------------------------------
/tests/embeddings/test_simple_vector_database.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import MagicMock
 2 | 
 3 | import numpy as np
 4 | import polars as pl
 5 | import pytest
 6 | 
 7 | from pypi_scout.embeddings.simple_vector_database import SimpleVectorDatabase
 8 | 
 9 | 
10 | @pytest.fixture
11 | def mock_model():
12 |     # Mock the SentenceTransformer model
13 |     mock_model = MagicMock()
14 |     # Mock the encode method to return a fixed vector
15 |     mock_model.encode.return_value = np.array([0.5, 0.5, 0.5])
16 |     return mock_model
17 | 
18 | 
19 | @pytest.fixture
20 | def df_embeddings():
21 |     return pl.DataFrame(
22 |         {
23 |             "id": [1, 2, 3],
24 |             "text": ["Hello world", "Hi there", "Greetings"],
25 |             "embeddings": [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]],
26 |         }
27 |     )
28 | 
29 | 
30 | @pytest.fixture
31 | def vector_db(mock_model, df_embeddings):
32 |     return SimpleVectorDatabase(embeddings_model=mock_model, df_embeddings=df_embeddings)
33 | 
34 | 
35 | def test_embeddings_matrix_creation(vector_db):
36 |     expected_matrix = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]], dtype=np.float32)
37 | 
38 |     np.testing.assert_allclose(vector_db.embeddings_matrix, expected_matrix, rtol=1e-6, atol=1e-8)
39 | 
40 | 
41 | def test_find_similar(vector_db):
42 |     query = "Hello"
43 |     result = vector_db.find_similar(query, top_k=2)
44 | 
45 |     assert result.shape[0] == 2
46 | 
47 |     assert result["similarity"].min() >= 0
48 |     assert result["similarity"].max() <= 1
49 | 
50 |     expected_columns = ["id", "text", "similarity"]
51 |     assert set(result.columns) == set(expected_columns)
52 | 


--------------------------------------------------------------------------------