├── .dockerignore ├── .github ├── badges │ └── coverage.json └── workflows │ └── pipeline.yml ├── .gitignore ├── .python-version ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── pyproject.toml ├── scripts ├── __init__.py ├── __main__.py └── _helpers.py ├── tests ├── __init__.py ├── _fixtures.py ├── conftest.py ├── test_dict_views.py ├── test_misc.py ├── test_spell.py ├── test_spell_views.py └── test_various_views.py ├── uv.lock └── whole_app ├── __init__.py ├── __main__.py ├── auth.py ├── dictionaries ├── __init__.py ├── dummy.py ├── file.py └── protocol.py ├── misc_helpers.py ├── models.py ├── settings.py ├── spell.py └── views.py /.dockerignore: -------------------------------------------------------------------------------- 1 | scripts/ 2 | __pycache__/ 3 | .mypy_cache/ 4 | .pytest_cache/ 5 | .coverage 6 | .DS_Store 7 | .git 8 | .github 9 | .venv 10 | Dockerfile 11 | Makefile 12 | README.md 13 | -------------------------------------------------------------------------------- /.github/badges/coverage.json: -------------------------------------------------------------------------------- 1 | {"schemaVersion": 1, "label": "coverage", "message": "100%", "color": "#2A9D8F"} -------------------------------------------------------------------------------- /.github/workflows/pipeline.yml: -------------------------------------------------------------------------------- 1 | name: Main pipeline 2 | on: 3 | push: 4 | branches: 5 | - "main" 6 | - "feature/**" 7 | - "hotfix/**" 8 | - "bugfix/**" 9 | tags: 10 | - v* 11 | 12 | jobs: 13 | update-readme: 14 | runs-on: ubuntu-latest 15 | if: github.ref_type == 'branch' 16 | steps: 17 | - uses: actions/checkout@v3 18 | with: 19 | fetch-depth: 0 20 | - uses: actions/cache@v3 21 | with: 22 | path: | 23 | .venv 24 | ~/.cache/uv 25 | key: ${{ runner.os }}-uv-${{ hashFiles('pyproject.toml', 'uv.lock') }} 26 | - run: | 27 | sudo apt-get update -y 28 | sudo apt-get install -y enchant-2 hunspell-ru hunspell-es hunspell-de-de hunspell-fr hunspell-pt-pt 29 | pip install uv 30 | uv sync --group dev 31 | uv run make update-readme 32 | - uses: stefanzweifel/git-auto-commit-action@v5 33 | with: 34 | commit_message: "docs: auto-update README" 35 | file_pattern: README.md 36 | 37 | py-lint-and-test: 38 | runs-on: ubuntu-latest 39 | steps: 40 | - uses: actions/checkout@v3 41 | - run: | 42 | make build 43 | make lint-in-docker 44 | make test-in-docker 45 | 46 | docker-lint: 47 | runs-on: ubuntu-latest 48 | steps: 49 | - uses: actions/checkout@v3 50 | - uses: hadolint/hadolint-action@v2.0.0 51 | with: 52 | failure-threshold: error 53 | 54 | coverage: 55 | runs-on: ubuntu-latest 56 | permissions: 57 | contents: write 58 | pages: write 59 | id-token: write 60 | steps: 61 | - uses: actions/checkout@v3 62 | - uses: actions/cache@v3 63 | with: 64 | path: | 65 | .venv 66 | ~/.cache/uv 67 | key: ${{ runner.os }}-uv-${{ hashFiles('pyproject.toml', 'uv.lock') }} 68 | - run: | 69 | sudo apt-get update -y 70 | sudo apt-get install -y enchant-2 hunspell-ru hunspell-es hunspell-de-de hunspell-fr hunspell-pt-pt curl 71 | pip install uv 72 | uv sync --group dev 73 | - run: uv run pytest -n3 . --cov-report=xml --cov-report=html 74 | - run: uv run python -m scripts build-coverage-badge 75 | - uses: stefanzweifel/git-auto-commit-action@v5 76 | with: 77 | commit_message: "docs: update coverage badge" 78 | file_pattern: .github/badges/coverage.json 79 | - run: mv htmlcov coverage 80 | - uses: actions/upload-pages-artifact@v3 81 | with: 82 | path: coverage 83 | 84 | deploy-coverage: 85 | needs: coverage 86 | runs-on: ubuntu-latest 87 | permissions: 88 | pages: write 89 | id-token: write 90 | environment: 91 | name: github-pages 92 | url: ${{ steps.deployment.outputs.page_url }} 93 | steps: 94 | - id: deployment 95 | uses: actions/deploy-pages@v4 96 | 97 | # build stage with auto-versioning based on git tags like vX.Y.Z (example: v3.1.2) 98 | build-and-publish: 99 | needs: [py-lint-and-test, docker-lint, coverage] 100 | runs-on: ubuntu-latest 101 | if: startsWith(github.ref, 'refs/tags/v') 102 | steps: 103 | - uses: actions/checkout@v3 104 | - uses: actions/setup-python@v5 105 | with: 106 | python-version: "3.11" 107 | - run: | 108 | pip install uv 109 | uv version $(python -c "import os; print(os.getenv('GITHUB_REF').lstrip('/').replace('refs/tags/v', ''));") 110 | python -c "import re, pathlib; _p = pathlib.Path('README.md'); _p.write_text(re.sub(r'\#\# Development.*', r'', _p.read_text(), flags=re.I | re.S).strip())" 111 | - uses: docker/setup-buildx-action@v2 112 | - uses: docker/login-action@v2 113 | with: 114 | username: ${{ secrets.DOCKER_HUB_USERNAME }} 115 | password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }} 116 | - name: Add tags for image 117 | uses: docker/metadata-action@v4 118 | id: meta 119 | with: 120 | images: ${{ secrets.DOCKER_HUB_USERNAME }}/spellcheck-microservice 121 | tags: | 122 | type=semver,pattern={{version}} 123 | type=raw,value=latest 124 | - name: Build and push 125 | uses: docker/build-push-action@v3 126 | with: 127 | context: . 128 | platforms: linux/amd64 129 | push: ${{ github.event_name != 'pull_request' }} 130 | tags: ${{ steps.meta.outputs.tags }} 131 | labels: ${{ steps.meta.outputs.labels }} 132 | 133 | update-dockerhub-readme: 134 | needs: [build-and-publish] 135 | runs-on: ubuntu-latest 136 | container: 137 | image: python:3.11-slim 138 | steps: 139 | - uses: actions/checkout@v3 140 | - uses: actions/cache@v3 141 | with: 142 | path: | 143 | .venv 144 | ~/.cache/uv 145 | key: ${{ runner.os }}-uv-${{ hashFiles('pyproject.toml', 'uv.lock') }} 146 | - run: | 147 | apt-get update -y 148 | apt-get install make 149 | pip install uv 150 | uv sync --group dev 151 | uv run make update-dockerhub-readme 152 | - uses: peter-evans/dockerhub-description@v3 153 | with: 154 | username: ${{ secrets.DOCKER_HUB_USERNAME }} 155 | password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }} 156 | repository: ${{ secrets.DOCKER_HUB_USERNAME }}/spellcheck-microservice 157 | readme-filepath: ./README.md 158 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | .idea/ 161 | 162 | # More various 163 | .DS_Store 164 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.11 2 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG USERNAME=snippet-microservice-user 2 | ARG USER_UID=2000 3 | ARG USER_GID=$USER_UID 4 | ARG WORKDIR=/srv/www/ 5 | 6 | FROM pypy:3.11-slim AS builder 7 | ENV UV_COMPILE_BYTECODE=1 8 | ENV UV_LINK_MODE=copy 9 | ARG USERNAME 10 | ARG USER_UID 11 | ARG USER_GID 12 | ARG WORKDIR 13 | WORKDIR $WORKDIR 14 | RUN groupadd --gid $USER_GID $USERNAME 15 | RUN useradd --uid $USER_UID --gid $USER_GID -m $USERNAME 16 | RUN apt-get update -y 17 | # install rust 18 | RUN apt-get install -y curl 19 | RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y 20 | ENV PATH="/root/.cargo/bin:${PATH}" 21 | # install prerequisites 22 | RUN apt-get install -y build-essential libssl-dev enchant-2 hunspell-ru hunspell-es hunspell-de-de hunspell-fr hunspell-pt-pt 23 | RUN pip install -U pip uv 24 | # Install the project's dependencies using the lockfile and settings 25 | RUN --mount=type=cache,target=/root/.cache/uv \ 26 | --mount=type=bind,source=uv.lock,target=uv.lock \ 27 | --mount=type=bind,source=pyproject.toml,target=pyproject.toml \ 28 | uv sync --locked --no-install-project 29 | # massive cleanup 30 | RUN uv cache clean 31 | RUN pip uninstall -y uv pip setuptools 32 | RUN rustup self uninstall -y 33 | RUN apt-get remove -y build-essential libssl-dev gcc curl 34 | RUN apt-get clean autoclean 35 | RUN apt-get autoremove --yes 36 | RUN rm -rf /var/lib/{apt,dpkg,cache,log}/ 37 | RUN rm -rf /var/lib/apt/lists/* 38 | # make necessary dirs 39 | RUN mkdir /data/ 40 | RUN chmod 777 /data/ 41 | 42 | FROM pypy:3.11-slim AS runtime 43 | ARG USERNAME 44 | ARG WORKDIR 45 | WORKDIR $WORKDIR 46 | COPY --from=builder / / 47 | COPY . $WORKDIR 48 | USER $USERNAME 49 | ENV PATH="$WORKDIR/.venv/bin:$PATH" 50 | ENV SPELLCHECK_ENABLE_CORS=false 51 | CMD ["python", "-m", "whole_app"] 52 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Denis Anikin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | run: 2 | SPELLCHECK_DICTIONARIES_PATH=/tmp/sm-dicts/ SPELLCHECK_API_KEY=debug granian --reload --interface asgi whole_app.views:SPELL_APP 3 | 4 | build: 5 | docker build -t spellcheck-microservice . 6 | 7 | prepare-buildx: 8 | docker buildx create --use --name newbuilder 9 | 10 | build-buildx: 11 | docker buildx build --platform linux/amd64,linux/arm64 -t spellcheck-microservice . 12 | 13 | exec: 14 | docker run -it spellcheck-microservice bash 15 | 16 | test: 17 | pytest . -n3 18 | 19 | test-in-docker: 20 | docker run -t spellcheck-microservice bash -c "COVERAGE_FILE=/tmp/junk.coverage pytest . -n3" 21 | 22 | lint: 23 | ruff check . --no-fix 24 | mypy . 25 | vulture whole_app --min-confidence 100 26 | 27 | lint-in-docker: 28 | docker run -t spellcheck-microservice bash -c "RUFF_CACHE_DIR=/tmp/ruff-cache ruff check . --no-fix && mypy . && vulture whole_app --min-confidence 100" 29 | 30 | run-prod: 31 | docker run -p 10113:10113 -e SPELLCHECK_WORKERS=1 -t spellcheck-microservice:latest 32 | 33 | check-languages: 34 | python -c "import enchant; print(enchant.Broker().list_languages());" 35 | 36 | check-languages-docker: 37 | docker run -it spellcheck-microservice python -c "import enchant; print(enchant.Broker().list_languages());" 38 | 39 | update-readme: 40 | python -m scripts update-readme 41 | 42 | update-dockerhub-readme: 43 | python -m scripts update-dockerhub-readme 44 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Spellcheck microservice 2 | 3 | [![GitHub tag (latest SemVer)](https://img.shields.io/github/v/tag/xfenix/spellcheck-microservice?label=version)](https://github.com/xfenix/spellcheck-microservice/releases) 4 | [![Docker Pulls](https://img.shields.io/docker/pulls/xfenix/spellcheck-microservice)](https://hub.docker.com/r/xfenix/spellcheck-microservice) 5 | [![Coverage](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/xfenix/spellcheck-microservice/main/.github/badges/coverage.json)](https://xfenix.github.io/spellcheck-microservice/) 6 | [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) 7 | Code style: black 8 | [![Imports: isort](https://img.shields.io/badge/imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://timothycrosley.github.io/isort/) 9 | Mypy checked 10 | 11 | This is a microservice designed to check the spelling of words. Based on [pyenchant](https://github.com/pyenchant/pyenchant). Exposes a REST API.
12 | Current available languages are: ru_RU, en_US, es_ES, fr_FR, de_DE, pt_PT.
13 | It runs blazingly fast due to the use of pychant in its kernel, LRU cache usage and pypy.
14 | Also it supports feature called «user dictionaries» — user can add his own word-exceptions to personal dictionary. 15 | 16 | ## Quickstart 17 | 18 | - `docker run -p 10113:10113 -t --mount source=spellcheck-dicts,target=/data/ xfenix/spellcheck-microservice:latest` 19 | - check http://localhost:10113/docs/ for full REST documentation 20 | - main REST endpoint you will be needed is http://localhost:10113/api/check/ (this will be available without authorization) 21 | 22 | ## Configuration 23 | 24 | ### Config options 25 | 26 | You can change config of the service by changing the environment variables. Here is a list of them: 27 | * `SPELLCHECK_SENTRY_DSN` Sentry DSN for integration. Empty field disables integration. Default value is empty string. 28 | * `SPELLCHECK_API_KEY` define api key for users dictionaries mostly. Please, provide, if you want to enable user dictionaries API. Default value is empty string. 29 | * `SPELLCHECK_ENABLE_CORS` enable CORS for all endpoints. In docker container this option is disabled. Default value is `True`. 30 | * `SPELLCHECK_STRUCTURED_LOGGING` enables structured (json) logging. Default value is `True`. 31 | * `SPELLCHECK_WORKERS` define application server workers count. If you plan to use k8s and only scale with replica sets, you might want to reduce this value to `1`. Default value is `8`. Restrictions: `Gt(gt=0)`, `Lt(lt=301)` 32 | * `SPELLCHECK_SERVER_ADDRESS` binding address, default value suitable for docker. Default value is `0.0.0.0`. 33 | * `SPELLCHECK_PORT` binding port. Default value is `10113`. Restrictions: `Gt(gt=1023)`, `Lt(lt=65536)` 34 | * `SPELLCHECK_CACHE_SIZE` define LRU cache size for misspelled word/suggestions cache. Any value less than `1` makes the cache size unlimited, so be careful with this option. Default value is `10000`. 35 | * `SPELLCHECK_API_PREFIX` define all API's URL prefix. Default value is `/api/`. 36 | * `SPELLCHECK_DOCS_URL` define documentation (swagger) URL prefix. Default value is `/docs/`. 37 | * `SPELLCHECK_MAX_SUGGESTIONS` defines how many maximum suggestions for each word will be available. 0 means unlimitied. Default value is `0`. Restrictions: `Ge(ge=0)` 38 | * `SPELLCHECK_DICTIONARIES_PATH` define directory where user dicts is stored. This is inner directory in the docker image, please map it to volume as it shown in the quickstart part of this readme. Default value is `/data`. 39 | * `SPELLCHECK_DICTIONARIES_STORAGE_PROVIDER` define wich engine will store user dictionaries. Default value is `StorageProviders.FILE`. 40 | * `SPELLCHECK_DICTIONARIES_DISABLED` switches off user dictionaries API no matter what. Default value is `False`. 41 | * `SPELLCHECK_USERNAME_MIN_LENGTH` minimum length of username. Default value is `3`. 42 | * `SPELLCHECK_USERNAME_MAX_LENGTH` maximum length of username. Default value is `60`. 43 | * `SPELLCHECK_EXCLUSION_WORDS_STR` String with list of words which will be ignored in /api/check endpoint each request. Example: `'foo, bar'`. Default value is empty string. 44 | 45 | ### Deployment 46 | 47 | Note: all docker & docker-compose variants use named volumes to store user dictionaries. 48 | 49 | #### Plain docker 50 | 51 | `docker run -p 10113:10113 -t --mount source=spellcheck-dicts,target=/data/ xfenix/spellcheck-microservice:latest` 52 | 53 | #### Docker-compose 54 | 55 | - Save this example configuration as `docker-compose.yml`: 56 | 57 | ```yml 58 | version: "3.9" 59 | services: 60 | spellcheck: 61 | image: xfenix/spellcheck-microservice:latest 62 | ports: 63 | - "10113:10113" 64 | volumes: 65 | - spellcheck-dicts:/data/ 66 | 67 | volumes: 68 | spellcheck-dicts: 69 | ``` 70 | 71 | - Then run `docker-compose up` 72 | 73 | ## Changelog 74 | 75 | You cand find it here https://github.com/xfenix/spellcheck-microservice/releases 76 | 77 | ## Development 78 | 79 | ### Quickstart 80 | 81 | - Clone this repo 82 | - For MacOS X `brew install enchant` 83 | - For Debian/Ubuntu `apt-get install -y enchant-2 hunspell-ru` 84 | - `uv sync --group dev` 85 | - `source .venv/bin/activate` 86 | - Execute `make` command to run local development server 87 | - README is automatically updated in the CI pipeline for each commit 88 | 89 | ### Notes 90 | 91 | Default api-key for local development is `debug` (you will need this to work with user dictionaries API). 92 | 93 | Please check [./Makefile](./Makefile) for more details 94 | 95 | ### Troubleshooting 96 | 97 | For MacOS X on Apple Silicon add `PYENCHANT_LIBRARY_PATH=/opt/homebrew/lib/libenchant-2.dylib` to your `.zprofile` 98 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | 2 | [project] 3 | name = "spellcheck-microservice" 4 | version = "4.0.0" 5 | description = "Microservice for spellchecking" 6 | authors = [{ name = "Denis Anikin", email = "ad@xfenix.ru" }] 7 | license = { text = "MIT" } 8 | requires-python = ">=3.10" 9 | dependencies = [ 10 | "granian", 11 | "pyenchant", 12 | "toml", 13 | "cachebox", 14 | "anyio>=4", 15 | "sentry-sdk", 16 | "pydantic-settings", 17 | "fastapi", 18 | "structlog", 19 | "urlextract", 20 | ] 21 | 22 | [dependency-groups] 23 | dev = [ 24 | "httpx", 25 | "pytest", 26 | "pytest-cov", 27 | "pytest-xdist", 28 | "mypy", 29 | "requests", 30 | "types-requests", 31 | "Faker", 32 | "vulture", 33 | "types-toml", 34 | "pytest-repeat", 35 | "ruff", 36 | ] 37 | 38 | [tool.ruff] 39 | fix = true 40 | unsafe-fixes = true 41 | line-length = 120 42 | 43 | [tool.ruff.lint] 44 | select = ["ALL"] 45 | ignore = ["D1", "D203", "D213", "FA102", "COM812", "ISC001"] 46 | 47 | [tool.ruff.format] 48 | docstring-code-format = true 49 | 50 | [tool.ruff.lint.isort] 51 | no-lines-before = ["standard-library", "local-folder"] 52 | known-third-party = [] 53 | known-local-folder = ["whole_app"] 54 | lines-after-imports = 2 55 | 56 | [tool.ruff.lint.extend-per-file-ignores] 57 | "tests/*.py" = [ 58 | "ANN001", 59 | "ANN002", 60 | "ANN003", 61 | "ANN401", 62 | "S101", 63 | "PLR2004", 64 | "S311", 65 | ] 66 | "tests/_fixtures.py" = ["E501"] 67 | 68 | [tool.mypy] 69 | plugins = "pydantic.mypy" 70 | strict = true 71 | ignore_missing_imports = true 72 | 73 | [tool.vulture] 74 | exclude = ["whole_app/settings.py"] 75 | 76 | [tool.pytest.ini_options] 77 | addopts = "--cov . --cov-report term-missing" 78 | 79 | [tool.coverage.report] 80 | exclude_also = [ 81 | "if typing.TYPE_CHECKING", 82 | ] 83 | 84 | [tool.coverage.run] 85 | omit = ["scripts/*"] 86 | 87 | [tool.hatch.build.targets.wheel] 88 | packages = ["whole_app"] 89 | 90 | [build-system] 91 | requires = ["hatchling"] 92 | build-backend = "hatchling.build" 93 | -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xfenix/spellcheck-microservice/a12f35af9cf36ad85c6d9eaa1b6a7a284da7eea4/scripts/__init__.py -------------------------------------------------------------------------------- /scripts/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import json 4 | import pathlib 5 | import re 6 | import sys 7 | import types 8 | import typing 9 | import xml.etree.ElementTree as ET 10 | 11 | from ._helpers import parse_last_git_tag, replace_tag_in_readme 12 | from whole_app.settings import SETTINGS 13 | 14 | 15 | PARENT_DIR: typing.Final = pathlib.Path(__file__).parent.parent 16 | README_PATH: typing.Final = PARENT_DIR / "README.md" 17 | COVERAGE_XML_PATH: typing.Final = pathlib.Path("coverage.xml") 18 | BADGE_JSON_PATH: typing.Final = pathlib.Path(".github/badges/coverage.json") 19 | LOW_BOUNDARY: typing.Final[float] = 60 20 | HIGH_BOUNDARY: typing.Final[float] = 80 21 | 22 | 23 | def _update_dockerhub_readme() -> None: 24 | new_content = re.sub( 25 | r"\#\# Development.*", 26 | r"", 27 | README_PATH.read_text(), 28 | flags=re.IGNORECASE | re.DOTALL, 29 | ).strip() 30 | new_content = replace_tag_in_readme(new_content, parse_last_git_tag()) 31 | README_PATH.write_text(new_content + "\n") 32 | 33 | 34 | def _update_readme() -> None: 35 | pack_of_readme_lines: list[str] = [] 36 | new_content: str = README_PATH.read_text() 37 | env_prefix_value: typing.Final = SETTINGS.model_config["env_prefix"] 38 | for one_field_name, field_properties in SETTINGS.model_fields.items(): 39 | if field_properties.description is None: 40 | print("-", one_field_name, "not be available in README") # noqa: T201 41 | continue 42 | default_value_beautified: str = ( 43 | "empty string" 44 | if isinstance(field_properties.default, str) and not field_properties.default 45 | else f"`{field_properties.default}`" 46 | ) 47 | one_row_parts = [ 48 | f"`{(env_prefix_value + one_field_name).upper()}`", 49 | field_properties.description + ".", 50 | f"Default value is {default_value_beautified}.", 51 | ] 52 | if field_properties.metadata: 53 | validators_buf: list[str] = [] 54 | for one_obj in field_properties.metadata: 55 | restriction_stringified: str = str(one_obj) 56 | if any(("BeforeValidator" in restriction_stringified, "StringConstraints" in restriction_stringified)): 57 | continue 58 | validators_buf.append(f"`{restriction_stringified}`") 59 | if validators_buf: 60 | one_row_parts.append(f"Restrictions: {', '.join(validators_buf)}") 61 | pack_of_readme_lines.append(" ".join(one_row_parts)) 62 | automatic_config_readme: str = "* " + "\n* ".join(pack_of_readme_lines) 63 | new_content = re.sub( 64 | r"(.*Here is a list of them\:).*?(\#\#\#\s.*)", 65 | r"\1\n" + automatic_config_readme + r"\n\n\2", 66 | new_content, 67 | flags=re.IGNORECASE | re.MULTILINE | re.DOTALL, 68 | ) 69 | new_content = replace_tag_in_readme(new_content, parse_last_git_tag()) 70 | README_PATH.write_text(new_content) 71 | 72 | 73 | def _build_coverage_badge() -> None: 74 | xml_source_text: typing.Final[str] = COVERAGE_XML_PATH.read_text() 75 | root_element: typing.Final[ET.Element] = ET.fromstring(xml_source_text) # noqa: S314 76 | line_rate_text: typing.Final[str | None] = root_element.attrib.get("line-rate") 77 | if line_rate_text is None: 78 | missing_attr_message: typing.Final[str] = "Missing 'line-rate' attribute in coverage report" 79 | raise KeyError(missing_attr_message) 80 | coverage_percent: typing.Final[float] = float(line_rate_text) * 100.0 81 | message_text: typing.Final[str] = f"{coverage_percent:.0f}%" 82 | color_text: str 83 | if coverage_percent < LOW_BOUNDARY: 84 | color_text = "#E63946" 85 | elif coverage_percent < HIGH_BOUNDARY: 86 | color_text = "#FFB347" 87 | else: 88 | color_text = "#2A9D8F" 89 | badge_mapping: typing.Final[typing.Mapping[str, typing.Any]] = types.MappingProxyType( 90 | { 91 | "schemaVersion": 1, 92 | "label": "coverage", 93 | "message": message_text, 94 | "color": color_text, 95 | }, 96 | ) 97 | BADGE_JSON_PATH.write_text(json.dumps(dict(badge_mapping))) 98 | 99 | 100 | if __name__ == "__main__": 101 | sys.path.append(str(PARENT_DIR.resolve())) 102 | 103 | parser_obj: typing.Final = argparse.ArgumentParser() 104 | parser_obj.add_argument("action") 105 | arguments_list: argparse.Namespace = parser_obj.parse_args() 106 | match arguments_list.action: 107 | case "update-dockerhub-readme": 108 | _update_dockerhub_readme() 109 | case "update-readme": 110 | _update_readme() 111 | case "build-coverage-badge": 112 | _build_coverage_badge() 113 | case _: 114 | print("Unknown action") # noqa: T201 115 | -------------------------------------------------------------------------------- /scripts/_helpers.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import shlex 4 | import subprocess 5 | import typing 6 | 7 | 8 | def parse_last_git_tag() -> str: 9 | environment_ref_name_raw: typing.Final[str | None] = os.getenv("GITHUB_REF_NAME") 10 | if environment_ref_name_raw is not None: 11 | environment_ref_name: str = environment_ref_name_raw.lstrip("v") 12 | if re.fullmatch(r"\d+\.\d+\.\d+", environment_ref_name): 13 | return environment_ref_name 14 | return "latest" 15 | 16 | git_tags_command: typing.Final[list[str]] = shlex.split( 17 | "git rev-list --tags --max-count=1", 18 | ) 19 | last_tag_hash: typing.Final[str] = subprocess.check_output(git_tags_command).strip().decode() # noqa: S603 20 | describe_command: typing.Final[list[str]] = shlex.split( 21 | f"git describe --tags {last_tag_hash}", 22 | ) 23 | return subprocess.check_output(describe_command).strip().decode().lstrip("v") # noqa: S603 24 | 25 | 26 | def replace_tag_in_readme(readme_text: str, new_tag: str) -> str: 27 | return re.sub( 28 | r"(xfenix/spellcheck-microservice\:)([\w\.-]+)", 29 | r"\g<1>" + new_tag, 30 | readme_text, 31 | flags=re.IGNORECASE | re.DOTALL, 32 | ) 33 | 34 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xfenix/spellcheck-microservice/a12f35af9cf36ad85c6d9eaa1b6a7a284da7eea4/tests/__init__.py -------------------------------------------------------------------------------- /tests/_fixtures.py: -------------------------------------------------------------------------------- 1 | """Just a bunch of hardcode. 2 | 3 | We do not want to parse files for tests — it's just waste of time. 4 | """ 5 | 6 | import typing 7 | 8 | 9 | BAD_PAYLOAD: typing.Final[tuple[str, ...]] = ( 10 | """ 11 | Капиталисиическая экономика харпктеризуетсвя периодичкскими кризмсами. B 2008 году рначался экоеомический кризис, котооый некоторфе экономистоы считают тперминальны для индустриальной экрономики расщиренного вспроизводтва. Из-за того, что в послдние десятмилетия в мире абсолютно доминирует йинансовая сфера, кризив принял формау финансовьго.Причиноцй кризиса явщяется достмижение мироаой экономиой «пределоу роста» по доллгам, ресурспм и экологи . Проявляетая кризис в пмдении фондотдачи, то еать в снижен и нормы приьыли на вложенный капитал.Кризис насчался c пробщем на рынке мпотечного кредитованитя в США. Он притвёл к рецессри в мировой кономике, пмдению объёмв мировой тьрговли, беспирецедентнму за всю истоорию трудоыой статистрки росту беработицы и оазмыванию ак называеммого «среднерго класса» в рпзвитых стрснах Капиталисиическая экономика харпктеризуетсвя периодичкскими кризмсами. B 2008 году рначался экоеомический кризис, котооый некоторфе экономистоы считают тперминальны для индустриальной экрономики расщиренного вспроизводтва. Из-за того, что в послдние десятмилетия в мире абсолютно доминирует йинансовая сфера, кризив принял формау финансовьго.Причиноцй кризиса явщяется достмижение мироаой экономиой «пределоу роста» по доллгам, ресурспм и экологи . Проявляетая кризис в пмдении фондотдачи, то еать в снижен и нормы приьыли на вложенный капитал.Кризис насчался c пробщем на рынке мпотечного кредитованитя в США. Он притвёл к рецессри в мировой кономике, пмдению объёмв мировой тьрговли, беспирецедентнму за всю истоорию трудоыой статистрки росту беработицы и оазмыванию ак называеммого «среднерго класса» в рпзвитых стрснах Капиталисиическая экономика харпктеризуетсвя периодичкскими кризмсами. B 2008 году рначался экоеомический кризис, котооый некоторфе экономистоы считают тперминальны для индустриальной экрономики расщиренного вспроизводтва. Из-за того, что в послдние десятмилетия в мире абсолютно доминирует йинансовая сфера, кризив принял формау финансовьго.Причиноцй кризиса явщяется достмижение мироаой экономиой «пределоу роста» по доллгам, ресурспм и экологи . Проявляетая кризис в пмдении фондотдачи, то еать в снижен и нормы приьыли на вложенный капитал.Кризис насчался c пробщем на рынке мпотечного кредитованитя в США. Он притвёл к рецессри в мировой кономике, пмдению объёмв мировой тьрговли, беспирецедентнму за всю истоорию трудоыой статистрки росту беработицы и оазмыванию ак называеммого «среднерго класса» в рпзвитых 12 | """, 13 | """Апичатки — настаящая граза фсякага блохера. Это палнаценный ужос в текздах. Так жидь нельзйа""", 14 | ) 15 | 16 | COMMON_TEXT_MESSAGE: typing.Final[str] = ( 17 | "Коллеги из поддержки юридических лиц работают в чате по будням с 6:00 до 22:00 по Москве.\n" # noqa: RUF001 18 | "Напишите в рабочее время или позвоните 8(800)700-46-46 по будням с 6:00 до 22:00 суббота с 9:00 по 18:00.\n" # noqa: RUF001 19 | "{} \nЕсли хотите, я передам вопрос, и вам напишут в рабочее время." # noqa: RUF001 20 | ) 21 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import tempfile 3 | import typing 4 | 5 | import faker 6 | import pytest 7 | from fastapi.testclient import TestClient 8 | 9 | from whole_app import views 10 | from whole_app.settings import SETTINGS, StorageProviders 11 | 12 | 13 | @pytest.fixture(scope="session") 14 | def faker_obj() -> faker.Faker: 15 | return faker.Faker("ru_RU") 16 | 17 | 18 | @pytest.fixture(autouse=True) 19 | def patch_file_provider_for_temp( 20 | monkeypatch: typing.Any, 21 | ) -> typing.Generator[None, None, None]: 22 | """Patch settings, to rewrite dict path to temporary directory.""" 23 | with monkeypatch.context() as patcher, tempfile.TemporaryDirectory() as tmp_dir_name: 24 | yield patcher.setattr(SETTINGS, "dictionaries_path", pathlib.Path(tmp_dir_name)) 25 | 26 | 27 | # pylint: disable=redefined-outer-name 28 | @pytest.fixture 29 | def app_client( 30 | monkeypatch: pytest.MonkeyPatch, 31 | faker_obj: typing.Any, 32 | ) -> typing.Generator[TestClient, None, None]: 33 | """Fake client with patched fake storage. 34 | 35 | Also in a form of context manager it allow us to test startup events 36 | on every test. 37 | """ 38 | fake_api_key: typing.Final[str] = faker_obj.password() 39 | with TestClient(views.SPELL_APP) as local_client, monkeypatch.context() as patcher: 40 | patcher.setattr( 41 | SETTINGS, 42 | "dictionaries_storage_provider", 43 | StorageProviders.DUMMY, 44 | ) 45 | patcher.setattr(SETTINGS, "api_key", fake_api_key) 46 | local_client.headers.update({SETTINGS.api_key_header_name: fake_api_key}) 47 | yield local_client 48 | -------------------------------------------------------------------------------- /tests/test_dict_views.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import typing 3 | 4 | import pytest 5 | from fastapi.testclient import TestClient 6 | 7 | from whole_app import models, views 8 | from whole_app.settings import SETTINGS, StorageProviders 9 | 10 | 11 | DICT_ENDPOINT: typing.Final = f"{SETTINGS.api_prefix}/dictionaries/" 12 | 13 | 14 | class TestFileAndDummyBasedDicts: 15 | @pytest.fixture(params=[StorageProviders.DUMMY, StorageProviders.FILE]) 16 | def patch_various_providers( 17 | self: "TestFileAndDummyBasedDicts", 18 | monkeypatch: typing.Any, 19 | request: typing.Any, 20 | ) -> typing.Any: 21 | with monkeypatch.context() as patcher: 22 | yield patcher.setattr( 23 | SETTINGS, 24 | "dictionaries_storage_provider", 25 | request.param, 26 | ) 27 | 28 | @pytest.mark.repeat(3) 29 | def test_add_to_dict( 30 | self: "TestFileAndDummyBasedDicts", 31 | app_client: typing.Any, 32 | faker_obj: typing.Any, 33 | patch_various_providers: typing.Any, # noqa: ARG002 34 | ) -> None: 35 | fake_user_name: typing.Final = faker_obj.user_name() 36 | fake_exc_word: typing.Final = faker_obj.word() 37 | path_to_dict_file: typing.Final = SETTINGS.dictionaries_path.joinpath( # pylint: disable=no-member 38 | fake_user_name, 39 | ) 40 | server_response = app_client.post( 41 | DICT_ENDPOINT, 42 | json=models.UserDictionaryRequestWithWord( 43 | user_name=fake_user_name, 44 | exception_word=fake_exc_word, 45 | ).model_dump(), 46 | ) 47 | assert server_response.status_code == 201 48 | if SETTINGS.dictionaries_storage_provider == StorageProviders.FILE: 49 | assert fake_exc_word in path_to_dict_file.read_text() 50 | 51 | @pytest.mark.repeat(3) 52 | def test_remove_from_user_dict( 53 | self: "TestFileAndDummyBasedDicts", 54 | app_client: typing.Any, 55 | faker_obj: typing.Any, 56 | patch_various_providers: typing.Any, # noqa: ARG002 57 | ) -> None: 58 | fake_exc_word: typing.Final = faker_obj.word() 59 | fake_user_name: typing.Final = faker_obj.user_name() 60 | path_to_dict_file: typing.Final = SETTINGS.dictionaries_path.joinpath( # pylint: disable=no-member 61 | fake_user_name, 62 | ) 63 | path_to_dict_file.touch() 64 | path_to_dict_file.write_text(fake_exc_word) 65 | if SETTINGS.dictionaries_storage_provider == StorageProviders.FILE: 66 | assert fake_exc_word in path_to_dict_file.read_text() 67 | server_response = app_client.request( 68 | "DELETE", 69 | DICT_ENDPOINT, 70 | json=models.UserDictionaryRequestWithWord( 71 | user_name=fake_user_name, 72 | exception_word=fake_exc_word, 73 | ).model_dump(), 74 | ) 75 | assert server_response.status_code == 200 76 | if SETTINGS.dictionaries_storage_provider == StorageProviders.FILE: 77 | assert fake_exc_word not in path_to_dict_file.read_text() 78 | 79 | def test_dummy_provider_init( 80 | self: "TestFileAndDummyBasedDicts", 81 | monkeypatch: typing.Any, 82 | app_client: typing.Any, 83 | faker_obj: typing.Any, 84 | ) -> None: 85 | monkeypatch.setattr( 86 | SETTINGS, 87 | "dictionaries_storage_provider", 88 | StorageProviders.DUMMY, 89 | ) 90 | server_response = app_client.post( 91 | DICT_ENDPOINT, 92 | json=models.UserDictionaryRequestWithWord( 93 | user_name=faker_obj.user_name(), 94 | exception_word=faker_obj.word(), 95 | ).model_dump(), 96 | ) 97 | assert server_response.status_code == 201 98 | 99 | 100 | class TestVarious: 101 | def test_disabled_dictionary_views( 102 | self: "TestVarious", 103 | monkeypatch: typing.Any, 104 | ) -> None: 105 | """Test views with dictionaries_disabled SETTINGS option.""" 106 | with monkeypatch.context() as patcher: 107 | patcher.setattr(SETTINGS, "dictionaries_disabled", True) 108 | importlib.reload(views) 109 | server_response = TestClient(views.SPELL_APP).post( 110 | DICT_ENDPOINT, 111 | json=models.UserDictionaryRequestWithWord( 112 | user_name="test", 113 | exception_word="test", 114 | ).model_dump(), 115 | ) 116 | assert server_response.status_code == 404 117 | # restore back api state to ensure other tests wont break 118 | importlib.reload(views) 119 | 120 | @pytest.mark.parametrize("api_key", [None, ""]) 121 | def test_empty_auth_key(self: "TestVarious", api_key: str) -> None: 122 | server_response = TestClient(views.SPELL_APP).post( 123 | DICT_ENDPOINT, 124 | json=models.UserDictionaryRequestWithWord( 125 | user_name="test", 126 | exception_word="test", 127 | ).model_dump(), 128 | headers={} if api_key is None else {SETTINGS.api_key_header_name: ""}, 129 | ) 130 | assert server_response.status_code == 403 131 | 132 | def test_wrong_api_key(self: "TestVarious") -> None: 133 | server_response = TestClient(views.SPELL_APP).post( 134 | DICT_ENDPOINT, 135 | json=models.UserDictionaryRequestWithWord( 136 | user_name="test", 137 | exception_word="test", 138 | ).model_dump(), 139 | headers={ 140 | SETTINGS.api_key_header_name: SETTINGS.api_key + "wrongTrashKekJunk --- 5000", 141 | }, 142 | ) 143 | assert server_response.status_code == 401 144 | -------------------------------------------------------------------------------- /tests/test_misc.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import os 3 | import runpy 4 | import typing 5 | 6 | from fastapi.testclient import TestClient 7 | from granian.constants import Interfaces 8 | 9 | from whole_app import views 10 | from whole_app.settings import SETTINGS, SettingsOfMicroservice 11 | 12 | 13 | if typing.TYPE_CHECKING: 14 | import faker 15 | 16 | 17 | def test_main_py(monkeypatch: typing.Any) -> None: 18 | captured_parameters: dict[str, typing.Any] = {} 19 | 20 | class FakeGranian: 21 | def __init__( 22 | self: "FakeGranian", 23 | target: str, 24 | *, 25 | address: str, 26 | port: int, 27 | workers: int, 28 | interface: Interfaces, 29 | ) -> None: 30 | captured_parameters.update( 31 | { 32 | "target": target, 33 | "address": address, 34 | "port": port, 35 | "workers": workers, 36 | "interface": interface, 37 | }, 38 | ) 39 | 40 | def serve(self: "FakeGranian") -> None: 41 | captured_parameters["served"] = True 42 | 43 | monkeypatch.setattr("granian.Granian", FakeGranian) 44 | runpy.run_module("whole_app.__main__", run_name="__main__") 45 | 46 | assert captured_parameters == { 47 | "target": "whole_app.views:SPELL_APP", 48 | "address": SETTINGS.server_address, 49 | "port": SETTINGS.port, 50 | "workers": SETTINGS.workers, 51 | "interface": Interfaces.ASGI, 52 | "served": True, 53 | } 54 | 55 | 56 | def test_incorrect_settings(monkeypatch: typing.Any) -> None: 57 | fake_settings: SettingsOfMicroservice = SettingsOfMicroservice() 58 | assert fake_settings.cache_size == 10_000 59 | 60 | os.environ["SPELLCHECK_CACHE_SIZE"] = "-666" 61 | 62 | monkeypatch.setattr( 63 | "pathlib.Path.read_text", 64 | lambda _: "version === fucked == up == totally == 666.13.13", 65 | ) 66 | fake_settings = SettingsOfMicroservice() 67 | assert fake_settings.cache_size == 0 68 | assert fake_settings.current_version == "" 69 | 70 | 71 | def test_sentry_integration(monkeypatch: typing.Any, faker_obj: "faker.Faker") -> None: 72 | with monkeypatch.context() as patcher: 73 | patcher.setattr(SETTINGS, "sentry_dsn", f"https://{faker_obj.pystr()}") 74 | patcher.setattr("sentry_sdk.init", lambda **_: None) 75 | importlib.reload(views) 76 | server_response = TestClient(views.SPELL_APP).get( 77 | f"{SETTINGS.api_prefix}/health/", 78 | ) 79 | assert server_response.status_code == 200 80 | # restore back api state to ensure other tests wont break 81 | importlib.reload(views) 82 | -------------------------------------------------------------------------------- /tests/test_spell.py: -------------------------------------------------------------------------------- 1 | import typing 2 | 3 | import pytest 4 | 5 | from tests._fixtures import COMMON_TEXT_MESSAGE 6 | from tests.test_spell_views import RU_LANG 7 | from whole_app import models 8 | from whole_app.settings import SETTINGS 9 | from whole_app.spell import SpellCheckService 10 | 11 | 12 | @pytest.mark.parametrize( 13 | ( 14 | "text_input", 15 | "expected_corrections", 16 | ), 17 | [ 18 | ( 19 | "Превед медвет", 20 | [ 21 | ("Превед", 0, 6, None), 22 | ("медвет", 7, 13, "медведь"), 23 | ], 24 | ), 25 | ( 26 | "превет как дила", 27 | [ 28 | ("превет", 0, 6, "привет"), 29 | ("дила", 11, 15, "дела"), 30 | ], 31 | ), 32 | ], 33 | ) 34 | def test_correct_spell( 35 | text_input: str, 36 | expected_corrections: list[tuple[str, int, int, str | None]], 37 | ) -> None: 38 | fake_engine: SpellCheckService = SpellCheckService() 39 | corrections = fake_engine.prepare( 40 | models.SpellCheckRequest(text=text_input, language=RU_LANG), 41 | ).run_check() 42 | assert len(corrections) == len(expected_corrections) 43 | for one_correction, (word, first_position, last_position, suggestion) in zip( 44 | corrections, 45 | expected_corrections, strict=False, 46 | ): 47 | assert one_correction.first_position == first_position 48 | assert one_correction.last_position == last_position 49 | assert one_correction.word == word 50 | assert text_input[first_position:last_position] == word 51 | if suggestion is None: 52 | assert one_correction.suggestions 53 | else: 54 | assert suggestion in one_correction.suggestions 55 | 56 | 57 | @pytest.mark.parametrize( 58 | "url", 59 | [ 60 | "www.rzb.ru", 61 | "https://rzb.ru", 62 | "https://www.rzb.ru", 63 | "rzb.ru/taCWpO", 64 | "www.rzb.ru/taCWpO", 65 | "https://rzb.ru/taCWpO", 66 | "https://www.rzb.ru/taCWpO", 67 | "https://www.asd.google.com/search?q=some+text¶m=3#dfsdf", 68 | "https://www.google.com", 69 | "http://google.com/?q=some+text¶m=3#dfsdf", 70 | "https://www.google.com/api/?", 71 | "https://www.google.com/api/login.php", 72 | "https://r-chat.raiffeisen.ru/admin/operator/", 73 | "https://r-chat.raiffeisen.ru/admin/operator/taCWpO", 74 | ], 75 | ) 76 | def test_urls_ignored( 77 | url: str, 78 | ) -> None: 79 | fake_engine: SpellCheckService = SpellCheckService() 80 | corrections = fake_engine.prepare( 81 | models.SpellCheckRequest(text=COMMON_TEXT_MESSAGE.format(url), language="ru_RU", exclude_urls=True), 82 | ).run_check() 83 | assert not corrections 84 | 85 | 86 | @pytest.mark.parametrize( 87 | ("wannabe_user_input", "excluded_words"), 88 | [("ШЯЧЛО ПОПЯЧТСА ПОПЯЧТСА", {"шячло", "попячтса"})], 89 | ) 90 | def test_default_excluded_words( 91 | wannabe_user_input: str, 92 | excluded_words: str, 93 | monkeypatch: typing.Any, 94 | ) -> None: 95 | with monkeypatch.context() as patcher: 96 | patcher.setattr(SETTINGS, "_exclusion_words_set", excluded_words) 97 | fake_engine: SpellCheckService = SpellCheckService() 98 | prepared = fake_engine.prepare( 99 | models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG, exclude_urls=False), 100 | ) 101 | 102 | corrections = prepared.run_check() 103 | assert corrections == [], f"{corrections=} --- {prepared._exclusion_words=}" # noqa: SLF001 104 | -------------------------------------------------------------------------------- /tests/test_spell_views.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=redefined-outer-name 2 | import random 3 | import typing 4 | 5 | import pytest 6 | from requests.models import Response as RequestsResponse 7 | 8 | from ._fixtures import BAD_PAYLOAD 9 | from whole_app import models 10 | from whole_app.settings import SETTINGS, StorageProviders 11 | 12 | 13 | if typing.TYPE_CHECKING: 14 | import faker 15 | from fastapi.testclient import TestClient 16 | 17 | 18 | RUSSIAN_LETTERS: typing.Final = "абвгдежзийклмнопрстуфхцчшщъыьэюяё" 19 | RU_LANG: typing.Final = "ru_RU" 20 | 21 | 22 | @pytest.mark.parametrize( 23 | "wannabe_user_input", 24 | ["Привет как дела", "Пока, я ушёл", *BAD_PAYLOAD], 25 | ) 26 | def test_no_corrections(app_client: "TestClient", wannabe_user_input: str) -> None: 27 | """Dead simple test.""" 28 | server_response: typing.Final = app_client.post( 29 | f"{SETTINGS.api_prefix}/check/", 30 | json=models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG).model_dump(), 31 | ) 32 | assert server_response.status_code == 200 33 | 34 | 35 | @pytest.mark.repeat(5) 36 | def test_with_corrections_simple( 37 | app_client: "TestClient", 38 | faker_obj: "faker.Faker", 39 | ) -> None: 40 | """Not so dead simple test.""" 41 | generated_letter: typing.Final = random.choice(RUSSIAN_LETTERS) 42 | wannabe_user_input: typing.Final[str] = ( 43 | faker_obj.text() 44 | .lower() 45 | .replace( 46 | generated_letter, 47 | random.choice(RUSSIAN_LETTERS.replace(generated_letter, "")), 48 | ) 49 | ) 50 | server_response: typing.Final = app_client.post( 51 | f"{SETTINGS.api_prefix}/check/", 52 | json=models.SpellCheckRequest( 53 | text=wannabe_user_input, 54 | language=RU_LANG, 55 | user_name=faker_obj.user_name(), 56 | ).model_dump(), 57 | ) 58 | assert server_response.status_code == 200 59 | 60 | 61 | @pytest.mark.parametrize( 62 | ("wannabe_user_input", "tested_word"), 63 | [ 64 | (BAD_PAYLOAD[0], "Капиталисиическая"), 65 | (BAD_PAYLOAD[1], "блохера"), 66 | ], 67 | ) 68 | def test_with_exception_word_in_dictionary( 69 | monkeypatch: typing.Any, 70 | app_client: "TestClient", 71 | faker_obj: "faker.Faker", 72 | wannabe_user_input: str, 73 | tested_word: str, 74 | ) -> None: 75 | """Complex tests, where we add word to dictionary and tests that it really excluded from the output.""" 76 | # replace all symbols from wannabe_user_input except letters and numbers 77 | monkeypatch.setattr( 78 | SETTINGS, 79 | "dictionaries_storage_provider", 80 | StorageProviders.FILE, 81 | ) 82 | 83 | def run_request() -> typing.Any: 84 | return app_client.post( 85 | f"{SETTINGS.api_prefix}/check/", 86 | json=models.SpellCheckRequest( 87 | text=wannabe_user_input, 88 | language=RU_LANG, 89 | user_name=user_name, 90 | ).model_dump(), 91 | ) 92 | 93 | def parse_words(server_response: RequestsResponse) -> typing.Any: 94 | return [item["word"] for item in server_response.json()["corrections"]] 95 | 96 | user_name: typing.Final[str] = faker_obj.user_name() 97 | # run usual check request 98 | server_response: RequestsResponse = run_request() 99 | assert tested_word in parse_words(server_response) 100 | # add word to user dictionary 101 | app_client.post( 102 | f"{SETTINGS.api_prefix}/dictionaries/", 103 | json=models.UserDictionaryRequestWithWord( 104 | user_name=user_name, 105 | exception_word=tested_word, 106 | ).model_dump(), 107 | ) 108 | # and than check that excepted word not in the check output 109 | server_response = run_request() 110 | assert tested_word not in parse_words(server_response) 111 | -------------------------------------------------------------------------------- /tests/test_various_views.py: -------------------------------------------------------------------------------- 1 | import typing 2 | 3 | import toml 4 | 5 | from whole_app.settings import PATH_TO_PYPROJECT, SETTINGS 6 | 7 | 8 | if typing.TYPE_CHECKING: 9 | from fastapi.testclient import TestClient 10 | 11 | 12 | def test_healthcheck_api_good(app_client: "TestClient") -> typing.Any: 13 | server_response = app_client.get(f"{SETTINGS.api_prefix}/health/") 14 | assert server_response.status_code == 200 15 | assert server_response.json()["version"] == toml.loads(PATH_TO_PYPROJECT.read_text())["project"]["version"] 16 | -------------------------------------------------------------------------------- /whole_app/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xfenix/spellcheck-microservice/a12f35af9cf36ad85c6d9eaa1b6a7a284da7eea4/whole_app/__init__.py -------------------------------------------------------------------------------- /whole_app/__main__.py: -------------------------------------------------------------------------------- 1 | """Application server here. 2 | 3 | This file meant only for basic workers wrappers and fastapi exposure. 4 | For end-points look in views.py 5 | """ 6 | 7 | import typing 8 | 9 | from granian import Granian # type: ignore[attr-defined] 10 | from granian.constants import Interfaces 11 | 12 | from .settings import SETTINGS 13 | 14 | 15 | APPLICATION_TARGET: typing.Final[str] = "whole_app.views:SPELL_APP" 16 | 17 | 18 | def launch_server() -> None: 19 | Granian( 20 | APPLICATION_TARGET, 21 | address=SETTINGS.server_address, 22 | port=SETTINGS.port, 23 | workers=SETTINGS.workers, 24 | interface=Interfaces.ASGI, 25 | ).serve() 26 | 27 | 28 | if __name__ == "__main__": 29 | launch_server() 30 | -------------------------------------------------------------------------------- /whole_app/auth.py: -------------------------------------------------------------------------------- 1 | import typing 2 | 3 | import fastapi 4 | from fastapi.security.api_key import APIKeyHeader 5 | 6 | from .settings import SETTINGS 7 | 8 | 9 | async def auth_via_api_key( 10 | user_provided_api_key: typing.Annotated[str, fastapi.Security(APIKeyHeader(name=SETTINGS.api_key_header_name))], 11 | ) -> str: 12 | if user_provided_api_key != SETTINGS.api_key: 13 | raise fastapi.HTTPException( 14 | status_code=401, 15 | detail="Could not validate api key", 16 | ) 17 | return user_provided_api_key 18 | -------------------------------------------------------------------------------- /whole_app/dictionaries/__init__.py: -------------------------------------------------------------------------------- 1 | import typing 2 | 3 | import structlog 4 | 5 | from . import dummy as dummy_storage 6 | from . import file as file_storage 7 | from . import protocol 8 | from whole_app.settings import SETTINGS, StorageProviders 9 | 10 | 11 | LOGGER_OBJ: typing.Final = structlog.get_logger() 12 | 13 | 14 | def init_storage() -> None: 15 | if SETTINGS.dictionaries_storage_provider == StorageProviders.FILE: 16 | file_storage.init_storage() 17 | elif SETTINGS.dictionaries_storage_provider == StorageProviders.DUMMY: 18 | LOGGER_OBJ.warning( 19 | "Storage provider set to dummy mode. " 20 | "Currently all user dictionary requests will be thrown away. We worn you.", 21 | ) 22 | 23 | 24 | def prepare_storage_engine() -> protocol.UserDictProtocol: 25 | if SETTINGS.dictionaries_storage_provider == StorageProviders.FILE: 26 | return file_storage.FileProvider() 27 | return dummy_storage.DummyProvider() 28 | -------------------------------------------------------------------------------- /whole_app/dictionaries/dummy.py: -------------------------------------------------------------------------------- 1 | """Dummy provider.""" 2 | 3 | 4 | class DummyProvider: 5 | """Dummy provider for user dictionaries. 6 | 7 | In case if you want to use dcitionaries API, but don't want to do 8 | actual work. 9 | """ 10 | 11 | def prepare(self: "DummyProvider", _: str) -> "DummyProvider": 12 | """Initialize class from user id.""" 13 | return self 14 | 15 | async def save_record(self: "DummyProvider", _: str) -> None: 16 | """Save record to user dictionary.""" 17 | 18 | async def remove_record(self: "DummyProvider", _: str) -> None: 19 | """Remove record from user dictionary.""" 20 | 21 | async def fetch_records(self: "DummyProvider") -> list[str]: 22 | """Fetch records from user dictionary.""" 23 | return [] 24 | -------------------------------------------------------------------------------- /whole_app/dictionaries/file.py: -------------------------------------------------------------------------------- 1 | import typing 2 | 3 | from anyio import Path as AsyncPath 4 | 5 | from whole_app.settings import SETTINGS 6 | 7 | 8 | def init_storage() -> None: 9 | SETTINGS.dictionaries_path.mkdir( 10 | parents=True, 11 | exist_ok=True, 12 | ) 13 | 14 | 15 | class FileProvider: 16 | _user_dict_path: AsyncPath 17 | 18 | def prepare(self: "FileProvider", user_name: str) -> "FileProvider": 19 | self._user_dict_path = AsyncPath(SETTINGS.dictionaries_path / user_name) 20 | return self 21 | 22 | async def _store_lines(self: "FileProvider", lines: list[str]) -> None: 23 | await self._user_dict_path.write_text("\n".join(lines) + "\n") 24 | 25 | async def save_record(self: "FileProvider", exception_word: str) -> None: 26 | await self._user_dict_path.touch() 27 | clean_word: typing.Final = exception_word.strip().lower() 28 | file_content: typing.Final = await self.fetch_records() 29 | if clean_word not in file_content: 30 | file_content.append(clean_word) 31 | await self._store_lines(file_content) 32 | 33 | async def remove_record(self: "FileProvider", exception_word: str) -> None: 34 | file_content: typing.Final = await self.fetch_records() 35 | if exception_word in file_content: 36 | file_content.remove(exception_word) 37 | await self._store_lines(file_content) 38 | 39 | async def fetch_records(self: "FileProvider") -> list[str]: 40 | if await self._user_dict_path.exists(): 41 | return [one_line.strip() for one_line in (await self._user_dict_path.read_text()).split("\n") if one_line] 42 | return [] 43 | -------------------------------------------------------------------------------- /whole_app/dictionaries/protocol.py: -------------------------------------------------------------------------------- 1 | import typing 2 | 3 | 4 | class UserDictProtocol(typing.Protocol): 5 | """Default storage protocol/interface.""" 6 | 7 | def prepare(self: "UserDictProtocol", user_name: str) -> "UserDictProtocol": 8 | """Prepare class for user name.""" 9 | 10 | async def save_record(self: "UserDictProtocol", exception_word: str) -> None: 11 | """Save record to user dictionary.""" 12 | 13 | async def remove_record(self: "UserDictProtocol", exception_word: str) -> None: 14 | """Remove record from user dictionary.""" 15 | 16 | async def fetch_records(self: "UserDictProtocol") -> list[str]: 17 | """Fetch records from user dictionary.""" 18 | -------------------------------------------------------------------------------- /whole_app/misc_helpers.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import typing 3 | 4 | import structlog 5 | 6 | from whole_app.settings import SETTINGS 7 | 8 | 9 | def init_logger() -> None: 10 | our_processors: typing.Final[typing.Any] = [ 11 | structlog.contextvars.merge_contextvars, 12 | structlog.processors.add_log_level, 13 | structlog.processors.format_exc_info, 14 | structlog.processors.TimeStamper(fmt="iso", utc=True), 15 | ] 16 | if SETTINGS.structured_logging: 17 | our_processors.append(structlog.processors.JSONRenderer()) 18 | structlog.configure( 19 | cache_logger_on_first_use=True, 20 | wrapper_class=structlog.make_filtering_bound_logger(logging.INFO), 21 | processors=our_processors, 22 | ) 23 | -------------------------------------------------------------------------------- /whole_app/models.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-member 2 | """Models for input/output.""" 3 | 4 | import typing 5 | 6 | import pydantic 7 | 8 | from .settings import SETTINGS, AvailableLanguages, AvailableLanguagesType 9 | 10 | 11 | class OneCorrection(pydantic.BaseModel): 12 | first_position: int 13 | last_position: int 14 | word: str 15 | suggestions: set[str] 16 | 17 | 18 | class SpellCheckRequest(pydantic.BaseModel): 19 | text: str = pydantic.Field(..., examples=["Привед как дила"]) 20 | language: AvailableLanguagesType 21 | user_name: str | None = pydantic.Field( 22 | None, 23 | examples=["username"], 24 | pattern=SETTINGS.username_regex, 25 | min_length=SETTINGS.username_min_length, 26 | max_length=SETTINGS.username_max_length, 27 | ) 28 | exclude_urls: bool = True 29 | 30 | 31 | class SpellCheckResponse(pydantic.BaseModel): 32 | text: str 33 | language: str 34 | corrections: list[OneCorrection] 35 | 36 | 37 | class UserDictionaryRequest(pydantic.BaseModel): 38 | user_name: str = pydantic.Field( 39 | examples=["username"], 40 | pattern=SETTINGS.username_regex, 41 | min_length=SETTINGS.username_min_length, 42 | max_length=SETTINGS.username_max_length, 43 | ) 44 | 45 | 46 | class UserDictionaryRequestWithWord(UserDictionaryRequest): 47 | exception_word: str = pydantic.Field(..., examples=["привед"]) 48 | 49 | 50 | class HealthCheckResponse(pydantic.BaseModel): 51 | service_name: str = SETTINGS.service_name 52 | supported_languages: tuple[str, ...] = AvailableLanguages 53 | version: str = SETTINGS.current_version 54 | status: typing.Literal["ok", "notok"] = "ok" 55 | -------------------------------------------------------------------------------- /whole_app/settings.py: -------------------------------------------------------------------------------- 1 | import enum 2 | import pathlib 3 | import typing 4 | 5 | import pydantic 6 | import structlog 7 | import toml 8 | import typing_extensions 9 | from pydantic import computed_field 10 | from pydantic_settings import BaseSettings 11 | 12 | 13 | LOGGER_OBJ: typing.Final = structlog.get_logger() 14 | PATH_TO_PYPROJECT: typing.Final = pathlib.Path(__file__).parent.parent / "pyproject.toml" 15 | AvailableLanguagesType = typing.Literal[ 16 | "ru_RU", 17 | "en_US", 18 | "es_ES", 19 | "fr_FR", 20 | "de_DE", 21 | "pt_PT", 22 | ] 23 | AvailableLanguages: tuple[str, ...] = typing.get_args(AvailableLanguagesType) 24 | 25 | 26 | def _warn_about_poor_lru_cache_size( 27 | possible_value: int, 28 | ) -> int: 29 | if int(possible_value) < 1: 30 | LOGGER_OBJ.warning( 31 | ("You set cache size less then 1. In this case, the cache size will be unlimited and polute your memory."), 32 | ) 33 | return 0 34 | return possible_value 35 | 36 | 37 | def _warn_about_empty_api_key( 38 | possible_value: str, 39 | ) -> str: 40 | if not possible_value: 41 | LOGGER_OBJ.warning("You set empty API key. This is not recommended.") 42 | return possible_value 43 | 44 | 45 | def _parse_version_from_local_file( 46 | default_value: str, 47 | ) -> str: 48 | try: 49 | pyproject_obj: typing.Final[dict[str, dict[str, object]]] = toml.loads( 50 | PATH_TO_PYPROJECT.read_text(), 51 | ) 52 | return typing.cast("str", pyproject_obj["project"]["version"]) 53 | except (toml.TomlDecodeError, KeyError, FileNotFoundError) as exc: 54 | LOGGER_OBJ.warning("Cant parse version from pyproject. Trouble %s", exc) 55 | return default_value 56 | 57 | 58 | class StorageProviders(enum.Enum): 59 | FILE = "file" 60 | DUMMY = "dummy" 61 | 62 | 63 | class SettingsOfMicroservice(BaseSettings): 64 | app_title: str = "Spellcheck API" 65 | service_name: str = "spellcheck-microservice" 66 | sentry_dsn: typing.Annotated[ 67 | str, 68 | pydantic.Field( 69 | description="Sentry DSN for integration. Empty field disables integration", 70 | ), 71 | pydantic.StringConstraints( 72 | strip_whitespace=True, 73 | ), 74 | ] = "" 75 | api_key: typing.Annotated[ 76 | str, 77 | pydantic.BeforeValidator(_warn_about_empty_api_key), 78 | pydantic.Field( 79 | description=( 80 | "define api key for users dictionaries mostly. " 81 | "Please, provide, if you want to enable user dictionaries API" 82 | ), 83 | ), 84 | ] = "" 85 | api_key_header_name: typing.Annotated[ 86 | str, 87 | pydantic.StringConstraints( 88 | strip_whitespace=True, 89 | ), 90 | ] = "Api-Key" 91 | enable_cors: typing.Annotated[ 92 | bool, 93 | pydantic.Field( 94 | description="enable CORS for all endpoints. In docker container this option is disabled", 95 | ), 96 | ] = True 97 | structured_logging: typing.Annotated[ 98 | bool, 99 | pydantic.Field( 100 | description="enables structured (json) logging", 101 | ), 102 | ] = True 103 | workers: typing.Annotated[ 104 | int, 105 | pydantic.Field( 106 | gt=0, 107 | lt=301, 108 | description=( 109 | "define application server workers count. " 110 | "If you plan to use k8s and only scale with replica sets, you might want to reduce this value to `1`" 111 | ), 112 | ), 113 | ] = 8 114 | server_address: typing.Annotated[ 115 | str, 116 | pydantic.StringConstraints( 117 | strip_whitespace=True, 118 | ), 119 | pydantic.Field( 120 | description="binding address, default value suitable for docker", 121 | ), 122 | ] = "0.0.0.0" # noqa: S104 123 | port: typing.Annotated[ 124 | int, 125 | pydantic.Field( 126 | gt=1_023, 127 | lt=65_536, 128 | description="binding port", 129 | ), 130 | ] = 10_113 131 | cache_size: typing.Annotated[ 132 | int, 133 | pydantic.BeforeValidator(_warn_about_poor_lru_cache_size), 134 | pydantic.Field( 135 | description=( 136 | "define LRU cache size for misspelled word/suggestions cache. " 137 | "Any value less than `1` makes the cache size unlimited, so be careful with this option" 138 | ), 139 | ), 140 | ] = 10_000 141 | api_prefix: typing.Annotated[ 142 | str, 143 | pydantic.StringConstraints( 144 | strip_whitespace=True, 145 | ), 146 | pydantic.BeforeValidator( 147 | lambda possible_value: f"/{possible_value.strip('/')}", 148 | ), 149 | pydantic.Field(description="define all API's URL prefix"), 150 | ] = "/api/" 151 | docs_url: typing.Annotated[ 152 | str, 153 | pydantic.StringConstraints( 154 | strip_whitespace=True, 155 | ), 156 | pydantic.Field( 157 | description="define documentation (swagger) URL prefix", 158 | ), 159 | ] = "/docs/" 160 | max_suggestions: typing.Annotated[ 161 | int, 162 | pydantic.Field( 163 | ge=0, 164 | description="defines how many maximum suggestions for each word will be available. 0 means unlimitied", 165 | ), 166 | ] = 0 167 | dictionaries_path: typing.Annotated[ 168 | pathlib.Path, 169 | pydantic.Field( 170 | description=( 171 | "define directory where user dicts is stored. " 172 | "This is inner directory in the docker image, please map it to volume as it " 173 | "shown in the quickstart part of this readme" 174 | ), 175 | ), 176 | ] = pathlib.Path("/data/") 177 | dictionaries_storage_provider: typing.Annotated[ 178 | StorageProviders, 179 | pydantic.Field( 180 | description="define wich engine will store user dictionaries", 181 | ), 182 | ] = StorageProviders.FILE 183 | dictionaries_disabled: typing.Annotated[ 184 | bool, 185 | pydantic.Field( 186 | description="switches off user dictionaries API no matter what", 187 | ), 188 | ] = False 189 | current_version: typing.Annotated[ 190 | str, 191 | pydantic.BeforeValidator(_parse_version_from_local_file), 192 | ] = "" 193 | username_min_length: typing.Annotated[ 194 | int, 195 | pydantic.Field( 196 | description="minimum length of username", 197 | ), 198 | ] = 3 199 | username_max_length: typing.Annotated[ 200 | int, 201 | pydantic.Field( 202 | description="maximum length of username", 203 | ), 204 | ] = 60 205 | username_regex: str = r"^[a-zA-Z0-9-_]*$" 206 | exclusion_words_str: typing.Annotated[ 207 | str, 208 | pydantic.Field( 209 | description="String with list of words which will be ignored in /api/check endpoint each request. " 210 | "Example: `'foo, bar'`" 211 | ), 212 | ] = "" 213 | _exclusion_words_set: typing.Annotated[ 214 | set[str], 215 | pydantic.Field( 216 | description="""set of words which will ignored by default(filled from exclusion_words_str). 217 | Example: `'["foo", "bar"]'` """, 218 | ), 219 | ] = set() 220 | 221 | @computed_field 222 | def exclusion_words_set(self) -> set[str]: 223 | return self._exclusion_words_set 224 | 225 | @pydantic.model_validator(mode="after") 226 | def _assemble_exclusion_words_set(self) -> typing_extensions.Self: 227 | self._exclusion_words_set = { 228 | one_word.strip().lower() for one_word in self.exclusion_words_str.split(",") if one_word 229 | } 230 | return self 231 | 232 | class Config: 233 | env_prefix: str = "spellcheck_" 234 | 235 | 236 | SETTINGS: SettingsOfMicroservice = SettingsOfMicroservice() 237 | -------------------------------------------------------------------------------- /whole_app/spell.py: -------------------------------------------------------------------------------- 1 | import re 2 | import typing 3 | 4 | import cachebox 5 | import urlextract 6 | from enchant.checker import SpellChecker 7 | 8 | from . import models 9 | from .settings import SETTINGS 10 | 11 | 12 | _MISSPELED_CACHE: typing.Final[ 13 | cachebox.LRUCache[str, list[str]] | dict[str, list[str]] 14 | ] = ( 15 | cachebox.LRUCache[str, list[str]](SETTINGS.cache_size) 16 | if SETTINGS.cache_size > 0 17 | else typing.cast("dict[str, list[str]]", {}) 18 | ) 19 | 20 | SEPARATORS_TO_SPLIT_URL_BY_WORDS: typing.Final[re.Pattern[str]] = re.compile(r"\.|\:|\/\/|\/|\?|\&|\=|\+|\#|\-") 21 | 22 | 23 | class SpellCheckService: 24 | __slots__ = ("_exclusion_words", "_input_text", "_spellcheck_engine") 25 | _input_text: str 26 | _spellcheck_engine: SpellChecker 27 | _exclusion_words: list[str] 28 | _url_extractor: urlextract.URLExtract = urlextract.URLExtract() 29 | 30 | def prepare( 31 | self: "SpellCheckService", 32 | request_payload: models.SpellCheckRequest, 33 | exclusion_words: list[str] | None = None, 34 | ) -> "SpellCheckService": 35 | """Initialize machinery.""" 36 | self._input_text = request_payload.text 37 | self._exclusion_words = exclusion_words if exclusion_words else [] 38 | self._exclusion_words.extend(typing.cast("set[str]", SETTINGS.exclusion_words_set)) 39 | 40 | if request_payload.exclude_urls: 41 | for one_url in self._url_extractor.find_urls(self._input_text): 42 | self._exclusion_words.extend( 43 | {one_word.lower() for one_word in re.split(SEPARATORS_TO_SPLIT_URL_BY_WORDS, one_url)} 44 | ) 45 | self._spellcheck_engine = SpellChecker(request_payload.language) 46 | return self 47 | 48 | @staticmethod 49 | def get_memorized_suggestions(word_spellcheck_result: SpellChecker) -> list[str]: 50 | misspelled_suggestions: list[str] 51 | if word_spellcheck_result.word in _MISSPELED_CACHE: 52 | misspelled_suggestions = _MISSPELED_CACHE[word_spellcheck_result.word] 53 | else: 54 | misspelled_suggestions = word_spellcheck_result.suggest() 55 | _MISSPELED_CACHE[word_spellcheck_result.word] = misspelled_suggestions 56 | return ( 57 | misspelled_suggestions[: SETTINGS.max_suggestions] 58 | if SETTINGS.max_suggestions > 0 59 | else misspelled_suggestions 60 | ) 61 | 62 | def run_check(self: "SpellCheckService") -> list[models.OneCorrection]: 63 | corrections_output: list[models.OneCorrection] = [] 64 | self._spellcheck_engine.set_text(self._input_text) 65 | for one_result in self._spellcheck_engine: 66 | if one_result.word.lower() in self._exclusion_words: 67 | continue 68 | corrections_output.append( 69 | models.OneCorrection( 70 | first_position=one_result.wordpos, 71 | last_position=one_result.wordpos + len(one_result.word), 72 | word=one_result.word, 73 | suggestions=self.get_memorized_suggestions(one_result), 74 | ), 75 | ) 76 | return corrections_output 77 | -------------------------------------------------------------------------------- /whole_app/views.py: -------------------------------------------------------------------------------- 1 | import typing 2 | 3 | import fastapi 4 | import structlog 5 | from anyio import to_thread 6 | 7 | from . import dictionaries, misc_helpers, models, spell 8 | from .auth import auth_via_api_key 9 | from .dictionaries.protocol import UserDictProtocol 10 | from .settings import SETTINGS 11 | 12 | 13 | LOGGER_OBJ: typing.Final = structlog.get_logger() 14 | SPELL_APP: typing.Final = fastapi.FastAPI( 15 | title=SETTINGS.app_title, 16 | version=SETTINGS.current_version, 17 | docs_url=SETTINGS.docs_url, 18 | openapi_url=f"{SETTINGS.api_prefix}/openapi.json", 19 | ) 20 | if SETTINGS.enable_cors: 21 | from fastapi.middleware.cors import CORSMiddleware 22 | 23 | SPELL_APP.add_middleware( 24 | CORSMiddleware, 25 | allow_origins=("*",), 26 | allow_credentials=True, 27 | allow_methods=["*"], 28 | allow_headers=["*"], 29 | ) 30 | if SETTINGS.sentry_dsn: 31 | import sentry_sdk 32 | from sentry_sdk.integrations.asgi import SentryAsgiMiddleware 33 | 34 | sentry_sdk.init(dsn=SETTINGS.sentry_dsn) 35 | SPELL_APP.add_middleware(SentryAsgiMiddleware) 36 | 37 | 38 | @SPELL_APP.on_event("startup") 39 | def startup() -> None: 40 | dictionaries.init_storage() 41 | misc_helpers.init_logger() 42 | LOGGER_OBJ.info("Current settings: %s", SETTINGS) 43 | 44 | 45 | @SPELL_APP.post(f"{SETTINGS.api_prefix}/check/", summary="Check spelling") 46 | async def spell_check_main_endpoint( 47 | request_payload: models.SpellCheckRequest, 48 | spell_service: typing.Annotated[ 49 | spell.SpellCheckService, 50 | fastapi.Depends(spell.SpellCheckService), 51 | ], 52 | storage_engine: typing.Annotated[ 53 | UserDictProtocol, 54 | fastapi.Depends(dictionaries.prepare_storage_engine), 55 | ], 56 | ) -> models.SpellCheckResponse: 57 | """Check spelling of text for exact language.""" 58 | exclusion_words: list[str] = [] 59 | if request_payload.user_name and not SETTINGS.dictionaries_disabled: 60 | exclusion_words = await storage_engine.prepare( 61 | request_payload.user_name, 62 | ).fetch_records() 63 | return models.SpellCheckResponse( 64 | **request_payload.model_dump(), 65 | corrections=await to_thread.run_sync( 66 | spell_service.prepare(request_payload, exclusion_words).run_check, 67 | ), 68 | ) 69 | 70 | 71 | @SPELL_APP.get(f"{SETTINGS.api_prefix}/health/", summary="Regular healthcheck api") 72 | async def check_health_of_service() -> models.HealthCheckResponse: 73 | """Check health of service.""" 74 | return models.HealthCheckResponse() 75 | 76 | 77 | if not SETTINGS.dictionaries_disabled: 78 | 79 | @SPELL_APP.post( 80 | f"{SETTINGS.api_prefix}/dictionaries/", 81 | summary="Add word to user dictionary", 82 | status_code=201, 83 | ) 84 | async def save_word( 85 | request_model: models.UserDictionaryRequestWithWord, 86 | storage_engine: typing.Annotated[ 87 | UserDictProtocol, 88 | fastapi.Depends(dictionaries.prepare_storage_engine), 89 | ], 90 | _: typing.Annotated[str, fastapi.Depends(auth_via_api_key)], 91 | ) -> bool: 92 | """Save word to user dictionary.""" 93 | await storage_engine.prepare(request_model.user_name).save_record( 94 | request_model.exception_word, 95 | ) 96 | return True 97 | 98 | @SPELL_APP.delete( 99 | f"{SETTINGS.api_prefix}/dictionaries/", 100 | summary="Remove word from user dictionary", 101 | ) 102 | async def delete_word( 103 | request_model: models.UserDictionaryRequestWithWord, 104 | storage_engine: typing.Annotated[ 105 | UserDictProtocol, 106 | fastapi.Depends(dictionaries.prepare_storage_engine), 107 | ], 108 | _: typing.Annotated[str, fastapi.Depends(auth_via_api_key)], 109 | ) -> bool: 110 | """Save word to user dictionary.""" 111 | await storage_engine.prepare(request_model.user_name).remove_record( 112 | request_model.exception_word, 113 | ) 114 | return True 115 | --------------------------------------------------------------------------------