├── .dockerignore
├── .github
├── badges
│ └── coverage.json
└── workflows
│ └── pipeline.yml
├── .gitignore
├── .python-version
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── pyproject.toml
├── scripts
├── __init__.py
├── __main__.py
└── _helpers.py
├── tests
├── __init__.py
├── _fixtures.py
├── conftest.py
├── test_dict_views.py
├── test_misc.py
├── test_spell.py
├── test_spell_views.py
└── test_various_views.py
├── uv.lock
└── whole_app
├── __init__.py
├── __main__.py
├── auth.py
├── dictionaries
├── __init__.py
├── dummy.py
├── file.py
└── protocol.py
├── misc_helpers.py
├── models.py
├── settings.py
├── spell.py
└── views.py
/.dockerignore:
--------------------------------------------------------------------------------
1 | scripts/
2 | __pycache__/
3 | .mypy_cache/
4 | .pytest_cache/
5 | .coverage
6 | .DS_Store
7 | .git
8 | .github
9 | .venv
10 | Dockerfile
11 | Makefile
12 | README.md
13 |
--------------------------------------------------------------------------------
/.github/badges/coverage.json:
--------------------------------------------------------------------------------
1 | {"schemaVersion": 1, "label": "coverage", "message": "100%", "color": "#2A9D8F"}
--------------------------------------------------------------------------------
/.github/workflows/pipeline.yml:
--------------------------------------------------------------------------------
1 | name: Main pipeline
2 | on:
3 | push:
4 | branches:
5 | - "main"
6 | - "feature/**"
7 | - "hotfix/**"
8 | - "bugfix/**"
9 | tags:
10 | - v*
11 |
12 | jobs:
13 | update-readme:
14 | runs-on: ubuntu-latest
15 | if: github.ref_type == 'branch'
16 | steps:
17 | - uses: actions/checkout@v3
18 | with:
19 | fetch-depth: 0
20 | - uses: actions/cache@v3
21 | with:
22 | path: |
23 | .venv
24 | ~/.cache/uv
25 | key: ${{ runner.os }}-uv-${{ hashFiles('pyproject.toml', 'uv.lock') }}
26 | - run: |
27 | sudo apt-get update -y
28 | sudo apt-get install -y enchant-2 hunspell-ru hunspell-es hunspell-de-de hunspell-fr hunspell-pt-pt
29 | pip install uv
30 | uv sync --group dev
31 | uv run make update-readme
32 | - uses: stefanzweifel/git-auto-commit-action@v5
33 | with:
34 | commit_message: "docs: auto-update README"
35 | file_pattern: README.md
36 |
37 | py-lint-and-test:
38 | runs-on: ubuntu-latest
39 | steps:
40 | - uses: actions/checkout@v3
41 | - run: |
42 | make build
43 | make lint-in-docker
44 | make test-in-docker
45 |
46 | docker-lint:
47 | runs-on: ubuntu-latest
48 | steps:
49 | - uses: actions/checkout@v3
50 | - uses: hadolint/hadolint-action@v2.0.0
51 | with:
52 | failure-threshold: error
53 |
54 | coverage:
55 | runs-on: ubuntu-latest
56 | permissions:
57 | contents: write
58 | pages: write
59 | id-token: write
60 | steps:
61 | - uses: actions/checkout@v3
62 | - uses: actions/cache@v3
63 | with:
64 | path: |
65 | .venv
66 | ~/.cache/uv
67 | key: ${{ runner.os }}-uv-${{ hashFiles('pyproject.toml', 'uv.lock') }}
68 | - run: |
69 | sudo apt-get update -y
70 | sudo apt-get install -y enchant-2 hunspell-ru hunspell-es hunspell-de-de hunspell-fr hunspell-pt-pt curl
71 | pip install uv
72 | uv sync --group dev
73 | - run: uv run pytest -n3 . --cov-report=xml --cov-report=html
74 | - run: uv run python -m scripts build-coverage-badge
75 | - uses: stefanzweifel/git-auto-commit-action@v5
76 | with:
77 | commit_message: "docs: update coverage badge"
78 | file_pattern: .github/badges/coverage.json
79 | - run: mv htmlcov coverage
80 | - uses: actions/upload-pages-artifact@v3
81 | with:
82 | path: coverage
83 |
84 | deploy-coverage:
85 | needs: coverage
86 | runs-on: ubuntu-latest
87 | permissions:
88 | pages: write
89 | id-token: write
90 | environment:
91 | name: github-pages
92 | url: ${{ steps.deployment.outputs.page_url }}
93 | steps:
94 | - id: deployment
95 | uses: actions/deploy-pages@v4
96 |
97 | # build stage with auto-versioning based on git tags like vX.Y.Z (example: v3.1.2)
98 | build-and-publish:
99 | needs: [py-lint-and-test, docker-lint, coverage]
100 | runs-on: ubuntu-latest
101 | if: startsWith(github.ref, 'refs/tags/v')
102 | steps:
103 | - uses: actions/checkout@v3
104 | - uses: actions/setup-python@v5
105 | with:
106 | python-version: "3.11"
107 | - run: |
108 | pip install uv
109 | uv version $(python -c "import os; print(os.getenv('GITHUB_REF').lstrip('/').replace('refs/tags/v', ''));")
110 | python -c "import re, pathlib; _p = pathlib.Path('README.md'); _p.write_text(re.sub(r'\#\# Development.*', r'', _p.read_text(), flags=re.I | re.S).strip())"
111 | - uses: docker/setup-buildx-action@v2
112 | - uses: docker/login-action@v2
113 | with:
114 | username: ${{ secrets.DOCKER_HUB_USERNAME }}
115 | password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
116 | - name: Add tags for image
117 | uses: docker/metadata-action@v4
118 | id: meta
119 | with:
120 | images: ${{ secrets.DOCKER_HUB_USERNAME }}/spellcheck-microservice
121 | tags: |
122 | type=semver,pattern={{version}}
123 | type=raw,value=latest
124 | - name: Build and push
125 | uses: docker/build-push-action@v3
126 | with:
127 | context: .
128 | platforms: linux/amd64
129 | push: ${{ github.event_name != 'pull_request' }}
130 | tags: ${{ steps.meta.outputs.tags }}
131 | labels: ${{ steps.meta.outputs.labels }}
132 |
133 | update-dockerhub-readme:
134 | needs: [build-and-publish]
135 | runs-on: ubuntu-latest
136 | container:
137 | image: python:3.11-slim
138 | steps:
139 | - uses: actions/checkout@v3
140 | - uses: actions/cache@v3
141 | with:
142 | path: |
143 | .venv
144 | ~/.cache/uv
145 | key: ${{ runner.os }}-uv-${{ hashFiles('pyproject.toml', 'uv.lock') }}
146 | - run: |
147 | apt-get update -y
148 | apt-get install make
149 | pip install uv
150 | uv sync --group dev
151 | uv run make update-dockerhub-readme
152 | - uses: peter-evans/dockerhub-description@v3
153 | with:
154 | username: ${{ secrets.DOCKER_HUB_USERNAME }}
155 | password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
156 | repository: ${{ secrets.DOCKER_HUB_USERNAME }}/spellcheck-microservice
157 | readme-filepath: ./README.md
158 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | .idea/
161 |
162 | # More various
163 | .DS_Store
164 |
--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.11
2 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG USERNAME=snippet-microservice-user
2 | ARG USER_UID=2000
3 | ARG USER_GID=$USER_UID
4 | ARG WORKDIR=/srv/www/
5 |
6 | FROM pypy:3.11-slim AS builder
7 | ENV UV_COMPILE_BYTECODE=1
8 | ENV UV_LINK_MODE=copy
9 | ARG USERNAME
10 | ARG USER_UID
11 | ARG USER_GID
12 | ARG WORKDIR
13 | WORKDIR $WORKDIR
14 | RUN groupadd --gid $USER_GID $USERNAME
15 | RUN useradd --uid $USER_UID --gid $USER_GID -m $USERNAME
16 | RUN apt-get update -y
17 | # install rust
18 | RUN apt-get install -y curl
19 | RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
20 | ENV PATH="/root/.cargo/bin:${PATH}"
21 | # install prerequisites
22 | RUN apt-get install -y build-essential libssl-dev enchant-2 hunspell-ru hunspell-es hunspell-de-de hunspell-fr hunspell-pt-pt
23 | RUN pip install -U pip uv
24 | # Install the project's dependencies using the lockfile and settings
25 | RUN --mount=type=cache,target=/root/.cache/uv \
26 | --mount=type=bind,source=uv.lock,target=uv.lock \
27 | --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
28 | uv sync --locked --no-install-project
29 | # massive cleanup
30 | RUN uv cache clean
31 | RUN pip uninstall -y uv pip setuptools
32 | RUN rustup self uninstall -y
33 | RUN apt-get remove -y build-essential libssl-dev gcc curl
34 | RUN apt-get clean autoclean
35 | RUN apt-get autoremove --yes
36 | RUN rm -rf /var/lib/{apt,dpkg,cache,log}/
37 | RUN rm -rf /var/lib/apt/lists/*
38 | # make necessary dirs
39 | RUN mkdir /data/
40 | RUN chmod 777 /data/
41 |
42 | FROM pypy:3.11-slim AS runtime
43 | ARG USERNAME
44 | ARG WORKDIR
45 | WORKDIR $WORKDIR
46 | COPY --from=builder / /
47 | COPY . $WORKDIR
48 | USER $USERNAME
49 | ENV PATH="$WORKDIR/.venv/bin:$PATH"
50 | ENV SPELLCHECK_ENABLE_CORS=false
51 | CMD ["python", "-m", "whole_app"]
52 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 Denis Anikin
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | run:
2 | SPELLCHECK_DICTIONARIES_PATH=/tmp/sm-dicts/ SPELLCHECK_API_KEY=debug granian --reload --interface asgi whole_app.views:SPELL_APP
3 |
4 | build:
5 | docker build -t spellcheck-microservice .
6 |
7 | prepare-buildx:
8 | docker buildx create --use --name newbuilder
9 |
10 | build-buildx:
11 | docker buildx build --platform linux/amd64,linux/arm64 -t spellcheck-microservice .
12 |
13 | exec:
14 | docker run -it spellcheck-microservice bash
15 |
16 | test:
17 | pytest . -n3
18 |
19 | test-in-docker:
20 | docker run -t spellcheck-microservice bash -c "COVERAGE_FILE=/tmp/junk.coverage pytest . -n3"
21 |
22 | lint:
23 | ruff check . --no-fix
24 | mypy .
25 | vulture whole_app --min-confidence 100
26 |
27 | lint-in-docker:
28 | docker run -t spellcheck-microservice bash -c "RUFF_CACHE_DIR=/tmp/ruff-cache ruff check . --no-fix && mypy . && vulture whole_app --min-confidence 100"
29 |
30 | run-prod:
31 | docker run -p 10113:10113 -e SPELLCHECK_WORKERS=1 -t spellcheck-microservice:latest
32 |
33 | check-languages:
34 | python -c "import enchant; print(enchant.Broker().list_languages());"
35 |
36 | check-languages-docker:
37 | docker run -it spellcheck-microservice python -c "import enchant; print(enchant.Broker().list_languages());"
38 |
39 | update-readme:
40 | python -m scripts update-readme
41 |
42 | update-dockerhub-readme:
43 | python -m scripts update-dockerhub-readme
44 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Spellcheck microservice
2 |
3 | [](https://github.com/xfenix/spellcheck-microservice/releases)
4 | [](https://hub.docker.com/r/xfenix/spellcheck-microservice)
5 | [](https://xfenix.github.io/spellcheck-microservice/)
6 | [](https://github.com/astral-sh/ruff)
7 |
8 | [](https://timothycrosley.github.io/isort/)
9 |
10 |
11 | This is a microservice designed to check the spelling of words. Based on [pyenchant](https://github.com/pyenchant/pyenchant). Exposes a REST API.
12 | Current available languages are: ru_RU, en_US, es_ES, fr_FR, de_DE, pt_PT.
13 | It runs blazingly fast due to the use of pychant in its kernel, LRU cache usage and pypy.
14 | Also it supports feature called «user dictionaries» — user can add his own word-exceptions to personal dictionary.
15 |
16 | ## Quickstart
17 |
18 | - `docker run -p 10113:10113 -t --mount source=spellcheck-dicts,target=/data/ xfenix/spellcheck-microservice:latest`
19 | - check http://localhost:10113/docs/ for full REST documentation
20 | - main REST endpoint you will be needed is http://localhost:10113/api/check/ (this will be available without authorization)
21 |
22 | ## Configuration
23 |
24 | ### Config options
25 |
26 | You can change config of the service by changing the environment variables. Here is a list of them:
27 | * `SPELLCHECK_SENTRY_DSN` Sentry DSN for integration. Empty field disables integration. Default value is empty string.
28 | * `SPELLCHECK_API_KEY` define api key for users dictionaries mostly. Please, provide, if you want to enable user dictionaries API. Default value is empty string.
29 | * `SPELLCHECK_ENABLE_CORS` enable CORS for all endpoints. In docker container this option is disabled. Default value is `True`.
30 | * `SPELLCHECK_STRUCTURED_LOGGING` enables structured (json) logging. Default value is `True`.
31 | * `SPELLCHECK_WORKERS` define application server workers count. If you plan to use k8s and only scale with replica sets, you might want to reduce this value to `1`. Default value is `8`. Restrictions: `Gt(gt=0)`, `Lt(lt=301)`
32 | * `SPELLCHECK_SERVER_ADDRESS` binding address, default value suitable for docker. Default value is `0.0.0.0`.
33 | * `SPELLCHECK_PORT` binding port. Default value is `10113`. Restrictions: `Gt(gt=1023)`, `Lt(lt=65536)`
34 | * `SPELLCHECK_CACHE_SIZE` define LRU cache size for misspelled word/suggestions cache. Any value less than `1` makes the cache size unlimited, so be careful with this option. Default value is `10000`.
35 | * `SPELLCHECK_API_PREFIX` define all API's URL prefix. Default value is `/api/`.
36 | * `SPELLCHECK_DOCS_URL` define documentation (swagger) URL prefix. Default value is `/docs/`.
37 | * `SPELLCHECK_MAX_SUGGESTIONS` defines how many maximum suggestions for each word will be available. 0 means unlimitied. Default value is `0`. Restrictions: `Ge(ge=0)`
38 | * `SPELLCHECK_DICTIONARIES_PATH` define directory where user dicts is stored. This is inner directory in the docker image, please map it to volume as it shown in the quickstart part of this readme. Default value is `/data`.
39 | * `SPELLCHECK_DICTIONARIES_STORAGE_PROVIDER` define wich engine will store user dictionaries. Default value is `StorageProviders.FILE`.
40 | * `SPELLCHECK_DICTIONARIES_DISABLED` switches off user dictionaries API no matter what. Default value is `False`.
41 | * `SPELLCHECK_USERNAME_MIN_LENGTH` minimum length of username. Default value is `3`.
42 | * `SPELLCHECK_USERNAME_MAX_LENGTH` maximum length of username. Default value is `60`.
43 | * `SPELLCHECK_EXCLUSION_WORDS_STR` String with list of words which will be ignored in /api/check endpoint each request. Example: `'foo, bar'`. Default value is empty string.
44 |
45 | ### Deployment
46 |
47 | Note: all docker & docker-compose variants use named volumes to store user dictionaries.
48 |
49 | #### Plain docker
50 |
51 | `docker run -p 10113:10113 -t --mount source=spellcheck-dicts,target=/data/ xfenix/spellcheck-microservice:latest`
52 |
53 | #### Docker-compose
54 |
55 | - Save this example configuration as `docker-compose.yml`:
56 |
57 | ```yml
58 | version: "3.9"
59 | services:
60 | spellcheck:
61 | image: xfenix/spellcheck-microservice:latest
62 | ports:
63 | - "10113:10113"
64 | volumes:
65 | - spellcheck-dicts:/data/
66 |
67 | volumes:
68 | spellcheck-dicts:
69 | ```
70 |
71 | - Then run `docker-compose up`
72 |
73 | ## Changelog
74 |
75 | You cand find it here https://github.com/xfenix/spellcheck-microservice/releases
76 |
77 | ## Development
78 |
79 | ### Quickstart
80 |
81 | - Clone this repo
82 | - For MacOS X `brew install enchant`
83 | - For Debian/Ubuntu `apt-get install -y enchant-2 hunspell-ru`
84 | - `uv sync --group dev`
85 | - `source .venv/bin/activate`
86 | - Execute `make` command to run local development server
87 | - README is automatically updated in the CI pipeline for each commit
88 |
89 | ### Notes
90 |
91 | Default api-key for local development is `debug` (you will need this to work with user dictionaries API).
92 |
93 | Please check [./Makefile](./Makefile) for more details
94 |
95 | ### Troubleshooting
96 |
97 | For MacOS X on Apple Silicon add `PYENCHANT_LIBRARY_PATH=/opt/homebrew/lib/libenchant-2.dylib` to your `.zprofile`
98 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 |
2 | [project]
3 | name = "spellcheck-microservice"
4 | version = "4.0.0"
5 | description = "Microservice for spellchecking"
6 | authors = [{ name = "Denis Anikin", email = "ad@xfenix.ru" }]
7 | license = { text = "MIT" }
8 | requires-python = ">=3.10"
9 | dependencies = [
10 | "granian",
11 | "pyenchant",
12 | "toml",
13 | "cachebox",
14 | "anyio>=4",
15 | "sentry-sdk",
16 | "pydantic-settings",
17 | "fastapi",
18 | "structlog",
19 | "urlextract",
20 | ]
21 |
22 | [dependency-groups]
23 | dev = [
24 | "httpx",
25 | "pytest",
26 | "pytest-cov",
27 | "pytest-xdist",
28 | "mypy",
29 | "requests",
30 | "types-requests",
31 | "Faker",
32 | "vulture",
33 | "types-toml",
34 | "pytest-repeat",
35 | "ruff",
36 | ]
37 |
38 | [tool.ruff]
39 | fix = true
40 | unsafe-fixes = true
41 | line-length = 120
42 |
43 | [tool.ruff.lint]
44 | select = ["ALL"]
45 | ignore = ["D1", "D203", "D213", "FA102", "COM812", "ISC001"]
46 |
47 | [tool.ruff.format]
48 | docstring-code-format = true
49 |
50 | [tool.ruff.lint.isort]
51 | no-lines-before = ["standard-library", "local-folder"]
52 | known-third-party = []
53 | known-local-folder = ["whole_app"]
54 | lines-after-imports = 2
55 |
56 | [tool.ruff.lint.extend-per-file-ignores]
57 | "tests/*.py" = [
58 | "ANN001",
59 | "ANN002",
60 | "ANN003",
61 | "ANN401",
62 | "S101",
63 | "PLR2004",
64 | "S311",
65 | ]
66 | "tests/_fixtures.py" = ["E501"]
67 |
68 | [tool.mypy]
69 | plugins = "pydantic.mypy"
70 | strict = true
71 | ignore_missing_imports = true
72 |
73 | [tool.vulture]
74 | exclude = ["whole_app/settings.py"]
75 |
76 | [tool.pytest.ini_options]
77 | addopts = "--cov . --cov-report term-missing"
78 |
79 | [tool.coverage.report]
80 | exclude_also = [
81 | "if typing.TYPE_CHECKING",
82 | ]
83 |
84 | [tool.coverage.run]
85 | omit = ["scripts/*"]
86 |
87 | [tool.hatch.build.targets.wheel]
88 | packages = ["whole_app"]
89 |
90 | [build-system]
91 | requires = ["hatchling"]
92 | build-backend = "hatchling.build"
93 |
--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xfenix/spellcheck-microservice/a12f35af9cf36ad85c6d9eaa1b6a7a284da7eea4/scripts/__init__.py
--------------------------------------------------------------------------------
/scripts/__main__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import argparse
3 | import json
4 | import pathlib
5 | import re
6 | import sys
7 | import types
8 | import typing
9 | import xml.etree.ElementTree as ET
10 |
11 | from ._helpers import parse_last_git_tag, replace_tag_in_readme
12 | from whole_app.settings import SETTINGS
13 |
14 |
15 | PARENT_DIR: typing.Final = pathlib.Path(__file__).parent.parent
16 | README_PATH: typing.Final = PARENT_DIR / "README.md"
17 | COVERAGE_XML_PATH: typing.Final = pathlib.Path("coverage.xml")
18 | BADGE_JSON_PATH: typing.Final = pathlib.Path(".github/badges/coverage.json")
19 | LOW_BOUNDARY: typing.Final[float] = 60
20 | HIGH_BOUNDARY: typing.Final[float] = 80
21 |
22 |
23 | def _update_dockerhub_readme() -> None:
24 | new_content = re.sub(
25 | r"\#\# Development.*",
26 | r"",
27 | README_PATH.read_text(),
28 | flags=re.IGNORECASE | re.DOTALL,
29 | ).strip()
30 | new_content = replace_tag_in_readme(new_content, parse_last_git_tag())
31 | README_PATH.write_text(new_content + "\n")
32 |
33 |
34 | def _update_readme() -> None:
35 | pack_of_readme_lines: list[str] = []
36 | new_content: str = README_PATH.read_text()
37 | env_prefix_value: typing.Final = SETTINGS.model_config["env_prefix"]
38 | for one_field_name, field_properties in SETTINGS.model_fields.items():
39 | if field_properties.description is None:
40 | print("-", one_field_name, "not be available in README") # noqa: T201
41 | continue
42 | default_value_beautified: str = (
43 | "empty string"
44 | if isinstance(field_properties.default, str) and not field_properties.default
45 | else f"`{field_properties.default}`"
46 | )
47 | one_row_parts = [
48 | f"`{(env_prefix_value + one_field_name).upper()}`",
49 | field_properties.description + ".",
50 | f"Default value is {default_value_beautified}.",
51 | ]
52 | if field_properties.metadata:
53 | validators_buf: list[str] = []
54 | for one_obj in field_properties.metadata:
55 | restriction_stringified: str = str(one_obj)
56 | if any(("BeforeValidator" in restriction_stringified, "StringConstraints" in restriction_stringified)):
57 | continue
58 | validators_buf.append(f"`{restriction_stringified}`")
59 | if validators_buf:
60 | one_row_parts.append(f"Restrictions: {', '.join(validators_buf)}")
61 | pack_of_readme_lines.append(" ".join(one_row_parts))
62 | automatic_config_readme: str = "* " + "\n* ".join(pack_of_readme_lines)
63 | new_content = re.sub(
64 | r"(.*Here is a list of them\:).*?(\#\#\#\s.*)",
65 | r"\1\n" + automatic_config_readme + r"\n\n\2",
66 | new_content,
67 | flags=re.IGNORECASE | re.MULTILINE | re.DOTALL,
68 | )
69 | new_content = replace_tag_in_readme(new_content, parse_last_git_tag())
70 | README_PATH.write_text(new_content)
71 |
72 |
73 | def _build_coverage_badge() -> None:
74 | xml_source_text: typing.Final[str] = COVERAGE_XML_PATH.read_text()
75 | root_element: typing.Final[ET.Element] = ET.fromstring(xml_source_text) # noqa: S314
76 | line_rate_text: typing.Final[str | None] = root_element.attrib.get("line-rate")
77 | if line_rate_text is None:
78 | missing_attr_message: typing.Final[str] = "Missing 'line-rate' attribute in coverage report"
79 | raise KeyError(missing_attr_message)
80 | coverage_percent: typing.Final[float] = float(line_rate_text) * 100.0
81 | message_text: typing.Final[str] = f"{coverage_percent:.0f}%"
82 | color_text: str
83 | if coverage_percent < LOW_BOUNDARY:
84 | color_text = "#E63946"
85 | elif coverage_percent < HIGH_BOUNDARY:
86 | color_text = "#FFB347"
87 | else:
88 | color_text = "#2A9D8F"
89 | badge_mapping: typing.Final[typing.Mapping[str, typing.Any]] = types.MappingProxyType(
90 | {
91 | "schemaVersion": 1,
92 | "label": "coverage",
93 | "message": message_text,
94 | "color": color_text,
95 | },
96 | )
97 | BADGE_JSON_PATH.write_text(json.dumps(dict(badge_mapping)))
98 |
99 |
100 | if __name__ == "__main__":
101 | sys.path.append(str(PARENT_DIR.resolve()))
102 |
103 | parser_obj: typing.Final = argparse.ArgumentParser()
104 | parser_obj.add_argument("action")
105 | arguments_list: argparse.Namespace = parser_obj.parse_args()
106 | match arguments_list.action:
107 | case "update-dockerhub-readme":
108 | _update_dockerhub_readme()
109 | case "update-readme":
110 | _update_readme()
111 | case "build-coverage-badge":
112 | _build_coverage_badge()
113 | case _:
114 | print("Unknown action") # noqa: T201
115 |
--------------------------------------------------------------------------------
/scripts/_helpers.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | import shlex
4 | import subprocess
5 | import typing
6 |
7 |
8 | def parse_last_git_tag() -> str:
9 | environment_ref_name_raw: typing.Final[str | None] = os.getenv("GITHUB_REF_NAME")
10 | if environment_ref_name_raw is not None:
11 | environment_ref_name: str = environment_ref_name_raw.lstrip("v")
12 | if re.fullmatch(r"\d+\.\d+\.\d+", environment_ref_name):
13 | return environment_ref_name
14 | return "latest"
15 |
16 | git_tags_command: typing.Final[list[str]] = shlex.split(
17 | "git rev-list --tags --max-count=1",
18 | )
19 | last_tag_hash: typing.Final[str] = subprocess.check_output(git_tags_command).strip().decode() # noqa: S603
20 | describe_command: typing.Final[list[str]] = shlex.split(
21 | f"git describe --tags {last_tag_hash}",
22 | )
23 | return subprocess.check_output(describe_command).strip().decode().lstrip("v") # noqa: S603
24 |
25 |
26 | def replace_tag_in_readme(readme_text: str, new_tag: str) -> str:
27 | return re.sub(
28 | r"(xfenix/spellcheck-microservice\:)([\w\.-]+)",
29 | r"\g<1>" + new_tag,
30 | readme_text,
31 | flags=re.IGNORECASE | re.DOTALL,
32 | )
33 |
34 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xfenix/spellcheck-microservice/a12f35af9cf36ad85c6d9eaa1b6a7a284da7eea4/tests/__init__.py
--------------------------------------------------------------------------------
/tests/_fixtures.py:
--------------------------------------------------------------------------------
1 | """Just a bunch of hardcode.
2 |
3 | We do not want to parse files for tests — it's just waste of time.
4 | """
5 |
6 | import typing
7 |
8 |
9 | BAD_PAYLOAD: typing.Final[tuple[str, ...]] = (
10 | """
11 | Капиталисиическая экономика харпктеризуетсвя периодичкскими кризмсами. B 2008 году рначался экоеомический кризис, котооый некоторфе экономистоы считают тперминальны для индустриальной экрономики расщиренного вспроизводтва. Из-за того, что в послдние десятмилетия в мире абсолютно доминирует йинансовая сфера, кризив принял формау финансовьго.Причиноцй кризиса явщяется достмижение мироаой экономиой «пределоу роста» по доллгам, ресурспм и экологи . Проявляетая кризис в пмдении фондотдачи, то еать в снижен и нормы приьыли на вложенный капитал.Кризис насчался c пробщем на рынке мпотечного кредитованитя в США. Он притвёл к рецессри в мировой кономике, пмдению объёмв мировой тьрговли, беспирецедентнму за всю истоорию трудоыой статистрки росту беработицы и оазмыванию ак называеммого «среднерго класса» в рпзвитых стрснах Капиталисиическая экономика харпктеризуетсвя периодичкскими кризмсами. B 2008 году рначался экоеомический кризис, котооый некоторфе экономистоы считают тперминальны для индустриальной экрономики расщиренного вспроизводтва. Из-за того, что в послдние десятмилетия в мире абсолютно доминирует йинансовая сфера, кризив принял формау финансовьго.Причиноцй кризиса явщяется достмижение мироаой экономиой «пределоу роста» по доллгам, ресурспм и экологи . Проявляетая кризис в пмдении фондотдачи, то еать в снижен и нормы приьыли на вложенный капитал.Кризис насчался c пробщем на рынке мпотечного кредитованитя в США. Он притвёл к рецессри в мировой кономике, пмдению объёмв мировой тьрговли, беспирецедентнму за всю истоорию трудоыой статистрки росту беработицы и оазмыванию ак называеммого «среднерго класса» в рпзвитых стрснах Капиталисиическая экономика харпктеризуетсвя периодичкскими кризмсами. B 2008 году рначался экоеомический кризис, котооый некоторфе экономистоы считают тперминальны для индустриальной экрономики расщиренного вспроизводтва. Из-за того, что в послдние десятмилетия в мире абсолютно доминирует йинансовая сфера, кризив принял формау финансовьго.Причиноцй кризиса явщяется достмижение мироаой экономиой «пределоу роста» по доллгам, ресурспм и экологи . Проявляетая кризис в пмдении фондотдачи, то еать в снижен и нормы приьыли на вложенный капитал.Кризис насчался c пробщем на рынке мпотечного кредитованитя в США. Он притвёл к рецессри в мировой кономике, пмдению объёмв мировой тьрговли, беспирецедентнму за всю истоорию трудоыой статистрки росту беработицы и оазмыванию ак называеммого «среднерго класса» в рпзвитых
12 | """,
13 | """Апичатки — настаящая граза фсякага блохера. Это палнаценный ужос в текздах. Так жидь нельзйа""",
14 | )
15 |
16 | COMMON_TEXT_MESSAGE: typing.Final[str] = (
17 | "Коллеги из поддержки юридических лиц работают в чате по будням с 6:00 до 22:00 по Москве.\n" # noqa: RUF001
18 | "Напишите в рабочее время или позвоните 8(800)700-46-46 по будням с 6:00 до 22:00 суббота с 9:00 по 18:00.\n" # noqa: RUF001
19 | "{} \nЕсли хотите, я передам вопрос, и вам напишут в рабочее время." # noqa: RUF001
20 | )
21 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 | import tempfile
3 | import typing
4 |
5 | import faker
6 | import pytest
7 | from fastapi.testclient import TestClient
8 |
9 | from whole_app import views
10 | from whole_app.settings import SETTINGS, StorageProviders
11 |
12 |
13 | @pytest.fixture(scope="session")
14 | def faker_obj() -> faker.Faker:
15 | return faker.Faker("ru_RU")
16 |
17 |
18 | @pytest.fixture(autouse=True)
19 | def patch_file_provider_for_temp(
20 | monkeypatch: typing.Any,
21 | ) -> typing.Generator[None, None, None]:
22 | """Patch settings, to rewrite dict path to temporary directory."""
23 | with monkeypatch.context() as patcher, tempfile.TemporaryDirectory() as tmp_dir_name:
24 | yield patcher.setattr(SETTINGS, "dictionaries_path", pathlib.Path(tmp_dir_name))
25 |
26 |
27 | # pylint: disable=redefined-outer-name
28 | @pytest.fixture
29 | def app_client(
30 | monkeypatch: pytest.MonkeyPatch,
31 | faker_obj: typing.Any,
32 | ) -> typing.Generator[TestClient, None, None]:
33 | """Fake client with patched fake storage.
34 |
35 | Also in a form of context manager it allow us to test startup events
36 | on every test.
37 | """
38 | fake_api_key: typing.Final[str] = faker_obj.password()
39 | with TestClient(views.SPELL_APP) as local_client, monkeypatch.context() as patcher:
40 | patcher.setattr(
41 | SETTINGS,
42 | "dictionaries_storage_provider",
43 | StorageProviders.DUMMY,
44 | )
45 | patcher.setattr(SETTINGS, "api_key", fake_api_key)
46 | local_client.headers.update({SETTINGS.api_key_header_name: fake_api_key})
47 | yield local_client
48 |
--------------------------------------------------------------------------------
/tests/test_dict_views.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | import typing
3 |
4 | import pytest
5 | from fastapi.testclient import TestClient
6 |
7 | from whole_app import models, views
8 | from whole_app.settings import SETTINGS, StorageProviders
9 |
10 |
11 | DICT_ENDPOINT: typing.Final = f"{SETTINGS.api_prefix}/dictionaries/"
12 |
13 |
14 | class TestFileAndDummyBasedDicts:
15 | @pytest.fixture(params=[StorageProviders.DUMMY, StorageProviders.FILE])
16 | def patch_various_providers(
17 | self: "TestFileAndDummyBasedDicts",
18 | monkeypatch: typing.Any,
19 | request: typing.Any,
20 | ) -> typing.Any:
21 | with monkeypatch.context() as patcher:
22 | yield patcher.setattr(
23 | SETTINGS,
24 | "dictionaries_storage_provider",
25 | request.param,
26 | )
27 |
28 | @pytest.mark.repeat(3)
29 | def test_add_to_dict(
30 | self: "TestFileAndDummyBasedDicts",
31 | app_client: typing.Any,
32 | faker_obj: typing.Any,
33 | patch_various_providers: typing.Any, # noqa: ARG002
34 | ) -> None:
35 | fake_user_name: typing.Final = faker_obj.user_name()
36 | fake_exc_word: typing.Final = faker_obj.word()
37 | path_to_dict_file: typing.Final = SETTINGS.dictionaries_path.joinpath( # pylint: disable=no-member
38 | fake_user_name,
39 | )
40 | server_response = app_client.post(
41 | DICT_ENDPOINT,
42 | json=models.UserDictionaryRequestWithWord(
43 | user_name=fake_user_name,
44 | exception_word=fake_exc_word,
45 | ).model_dump(),
46 | )
47 | assert server_response.status_code == 201
48 | if SETTINGS.dictionaries_storage_provider == StorageProviders.FILE:
49 | assert fake_exc_word in path_to_dict_file.read_text()
50 |
51 | @pytest.mark.repeat(3)
52 | def test_remove_from_user_dict(
53 | self: "TestFileAndDummyBasedDicts",
54 | app_client: typing.Any,
55 | faker_obj: typing.Any,
56 | patch_various_providers: typing.Any, # noqa: ARG002
57 | ) -> None:
58 | fake_exc_word: typing.Final = faker_obj.word()
59 | fake_user_name: typing.Final = faker_obj.user_name()
60 | path_to_dict_file: typing.Final = SETTINGS.dictionaries_path.joinpath( # pylint: disable=no-member
61 | fake_user_name,
62 | )
63 | path_to_dict_file.touch()
64 | path_to_dict_file.write_text(fake_exc_word)
65 | if SETTINGS.dictionaries_storage_provider == StorageProviders.FILE:
66 | assert fake_exc_word in path_to_dict_file.read_text()
67 | server_response = app_client.request(
68 | "DELETE",
69 | DICT_ENDPOINT,
70 | json=models.UserDictionaryRequestWithWord(
71 | user_name=fake_user_name,
72 | exception_word=fake_exc_word,
73 | ).model_dump(),
74 | )
75 | assert server_response.status_code == 200
76 | if SETTINGS.dictionaries_storage_provider == StorageProviders.FILE:
77 | assert fake_exc_word not in path_to_dict_file.read_text()
78 |
79 | def test_dummy_provider_init(
80 | self: "TestFileAndDummyBasedDicts",
81 | monkeypatch: typing.Any,
82 | app_client: typing.Any,
83 | faker_obj: typing.Any,
84 | ) -> None:
85 | monkeypatch.setattr(
86 | SETTINGS,
87 | "dictionaries_storage_provider",
88 | StorageProviders.DUMMY,
89 | )
90 | server_response = app_client.post(
91 | DICT_ENDPOINT,
92 | json=models.UserDictionaryRequestWithWord(
93 | user_name=faker_obj.user_name(),
94 | exception_word=faker_obj.word(),
95 | ).model_dump(),
96 | )
97 | assert server_response.status_code == 201
98 |
99 |
100 | class TestVarious:
101 | def test_disabled_dictionary_views(
102 | self: "TestVarious",
103 | monkeypatch: typing.Any,
104 | ) -> None:
105 | """Test views with dictionaries_disabled SETTINGS option."""
106 | with monkeypatch.context() as patcher:
107 | patcher.setattr(SETTINGS, "dictionaries_disabled", True)
108 | importlib.reload(views)
109 | server_response = TestClient(views.SPELL_APP).post(
110 | DICT_ENDPOINT,
111 | json=models.UserDictionaryRequestWithWord(
112 | user_name="test",
113 | exception_word="test",
114 | ).model_dump(),
115 | )
116 | assert server_response.status_code == 404
117 | # restore back api state to ensure other tests wont break
118 | importlib.reload(views)
119 |
120 | @pytest.mark.parametrize("api_key", [None, ""])
121 | def test_empty_auth_key(self: "TestVarious", api_key: str) -> None:
122 | server_response = TestClient(views.SPELL_APP).post(
123 | DICT_ENDPOINT,
124 | json=models.UserDictionaryRequestWithWord(
125 | user_name="test",
126 | exception_word="test",
127 | ).model_dump(),
128 | headers={} if api_key is None else {SETTINGS.api_key_header_name: ""},
129 | )
130 | assert server_response.status_code == 403
131 |
132 | def test_wrong_api_key(self: "TestVarious") -> None:
133 | server_response = TestClient(views.SPELL_APP).post(
134 | DICT_ENDPOINT,
135 | json=models.UserDictionaryRequestWithWord(
136 | user_name="test",
137 | exception_word="test",
138 | ).model_dump(),
139 | headers={
140 | SETTINGS.api_key_header_name: SETTINGS.api_key + "wrongTrashKekJunk --- 5000",
141 | },
142 | )
143 | assert server_response.status_code == 401
144 |
--------------------------------------------------------------------------------
/tests/test_misc.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | import os
3 | import runpy
4 | import typing
5 |
6 | from fastapi.testclient import TestClient
7 | from granian.constants import Interfaces
8 |
9 | from whole_app import views
10 | from whole_app.settings import SETTINGS, SettingsOfMicroservice
11 |
12 |
13 | if typing.TYPE_CHECKING:
14 | import faker
15 |
16 |
17 | def test_main_py(monkeypatch: typing.Any) -> None:
18 | captured_parameters: dict[str, typing.Any] = {}
19 |
20 | class FakeGranian:
21 | def __init__(
22 | self: "FakeGranian",
23 | target: str,
24 | *,
25 | address: str,
26 | port: int,
27 | workers: int,
28 | interface: Interfaces,
29 | ) -> None:
30 | captured_parameters.update(
31 | {
32 | "target": target,
33 | "address": address,
34 | "port": port,
35 | "workers": workers,
36 | "interface": interface,
37 | },
38 | )
39 |
40 | def serve(self: "FakeGranian") -> None:
41 | captured_parameters["served"] = True
42 |
43 | monkeypatch.setattr("granian.Granian", FakeGranian)
44 | runpy.run_module("whole_app.__main__", run_name="__main__")
45 |
46 | assert captured_parameters == {
47 | "target": "whole_app.views:SPELL_APP",
48 | "address": SETTINGS.server_address,
49 | "port": SETTINGS.port,
50 | "workers": SETTINGS.workers,
51 | "interface": Interfaces.ASGI,
52 | "served": True,
53 | }
54 |
55 |
56 | def test_incorrect_settings(monkeypatch: typing.Any) -> None:
57 | fake_settings: SettingsOfMicroservice = SettingsOfMicroservice()
58 | assert fake_settings.cache_size == 10_000
59 |
60 | os.environ["SPELLCHECK_CACHE_SIZE"] = "-666"
61 |
62 | monkeypatch.setattr(
63 | "pathlib.Path.read_text",
64 | lambda _: "version === fucked == up == totally == 666.13.13",
65 | )
66 | fake_settings = SettingsOfMicroservice()
67 | assert fake_settings.cache_size == 0
68 | assert fake_settings.current_version == ""
69 |
70 |
71 | def test_sentry_integration(monkeypatch: typing.Any, faker_obj: "faker.Faker") -> None:
72 | with monkeypatch.context() as patcher:
73 | patcher.setattr(SETTINGS, "sentry_dsn", f"https://{faker_obj.pystr()}")
74 | patcher.setattr("sentry_sdk.init", lambda **_: None)
75 | importlib.reload(views)
76 | server_response = TestClient(views.SPELL_APP).get(
77 | f"{SETTINGS.api_prefix}/health/",
78 | )
79 | assert server_response.status_code == 200
80 | # restore back api state to ensure other tests wont break
81 | importlib.reload(views)
82 |
--------------------------------------------------------------------------------
/tests/test_spell.py:
--------------------------------------------------------------------------------
1 | import typing
2 |
3 | import pytest
4 |
5 | from tests._fixtures import COMMON_TEXT_MESSAGE
6 | from tests.test_spell_views import RU_LANG
7 | from whole_app import models
8 | from whole_app.settings import SETTINGS
9 | from whole_app.spell import SpellCheckService
10 |
11 |
12 | @pytest.mark.parametrize(
13 | (
14 | "text_input",
15 | "expected_corrections",
16 | ),
17 | [
18 | (
19 | "Превед медвет",
20 | [
21 | ("Превед", 0, 6, None),
22 | ("медвет", 7, 13, "медведь"),
23 | ],
24 | ),
25 | (
26 | "превет как дила",
27 | [
28 | ("превет", 0, 6, "привет"),
29 | ("дила", 11, 15, "дела"),
30 | ],
31 | ),
32 | ],
33 | )
34 | def test_correct_spell(
35 | text_input: str,
36 | expected_corrections: list[tuple[str, int, int, str | None]],
37 | ) -> None:
38 | fake_engine: SpellCheckService = SpellCheckService()
39 | corrections = fake_engine.prepare(
40 | models.SpellCheckRequest(text=text_input, language=RU_LANG),
41 | ).run_check()
42 | assert len(corrections) == len(expected_corrections)
43 | for one_correction, (word, first_position, last_position, suggestion) in zip(
44 | corrections,
45 | expected_corrections, strict=False,
46 | ):
47 | assert one_correction.first_position == first_position
48 | assert one_correction.last_position == last_position
49 | assert one_correction.word == word
50 | assert text_input[first_position:last_position] == word
51 | if suggestion is None:
52 | assert one_correction.suggestions
53 | else:
54 | assert suggestion in one_correction.suggestions
55 |
56 |
57 | @pytest.mark.parametrize(
58 | "url",
59 | [
60 | "www.rzb.ru",
61 | "https://rzb.ru",
62 | "https://www.rzb.ru",
63 | "rzb.ru/taCWpO",
64 | "www.rzb.ru/taCWpO",
65 | "https://rzb.ru/taCWpO",
66 | "https://www.rzb.ru/taCWpO",
67 | "https://www.asd.google.com/search?q=some+text¶m=3#dfsdf",
68 | "https://www.google.com",
69 | "http://google.com/?q=some+text¶m=3#dfsdf",
70 | "https://www.google.com/api/?",
71 | "https://www.google.com/api/login.php",
72 | "https://r-chat.raiffeisen.ru/admin/operator/",
73 | "https://r-chat.raiffeisen.ru/admin/operator/taCWpO",
74 | ],
75 | )
76 | def test_urls_ignored(
77 | url: str,
78 | ) -> None:
79 | fake_engine: SpellCheckService = SpellCheckService()
80 | corrections = fake_engine.prepare(
81 | models.SpellCheckRequest(text=COMMON_TEXT_MESSAGE.format(url), language="ru_RU", exclude_urls=True),
82 | ).run_check()
83 | assert not corrections
84 |
85 |
86 | @pytest.mark.parametrize(
87 | ("wannabe_user_input", "excluded_words"),
88 | [("ШЯЧЛО ПОПЯЧТСА ПОПЯЧТСА", {"шячло", "попячтса"})],
89 | )
90 | def test_default_excluded_words(
91 | wannabe_user_input: str,
92 | excluded_words: str,
93 | monkeypatch: typing.Any,
94 | ) -> None:
95 | with monkeypatch.context() as patcher:
96 | patcher.setattr(SETTINGS, "_exclusion_words_set", excluded_words)
97 | fake_engine: SpellCheckService = SpellCheckService()
98 | prepared = fake_engine.prepare(
99 | models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG, exclude_urls=False),
100 | )
101 |
102 | corrections = prepared.run_check()
103 | assert corrections == [], f"{corrections=} --- {prepared._exclusion_words=}" # noqa: SLF001
104 |
--------------------------------------------------------------------------------
/tests/test_spell_views.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=redefined-outer-name
2 | import random
3 | import typing
4 |
5 | import pytest
6 | from requests.models import Response as RequestsResponse
7 |
8 | from ._fixtures import BAD_PAYLOAD
9 | from whole_app import models
10 | from whole_app.settings import SETTINGS, StorageProviders
11 |
12 |
13 | if typing.TYPE_CHECKING:
14 | import faker
15 | from fastapi.testclient import TestClient
16 |
17 |
18 | RUSSIAN_LETTERS: typing.Final = "абвгдежзийклмнопрстуфхцчшщъыьэюяё"
19 | RU_LANG: typing.Final = "ru_RU"
20 |
21 |
22 | @pytest.mark.parametrize(
23 | "wannabe_user_input",
24 | ["Привет как дела", "Пока, я ушёл", *BAD_PAYLOAD],
25 | )
26 | def test_no_corrections(app_client: "TestClient", wannabe_user_input: str) -> None:
27 | """Dead simple test."""
28 | server_response: typing.Final = app_client.post(
29 | f"{SETTINGS.api_prefix}/check/",
30 | json=models.SpellCheckRequest(text=wannabe_user_input, language=RU_LANG).model_dump(),
31 | )
32 | assert server_response.status_code == 200
33 |
34 |
35 | @pytest.mark.repeat(5)
36 | def test_with_corrections_simple(
37 | app_client: "TestClient",
38 | faker_obj: "faker.Faker",
39 | ) -> None:
40 | """Not so dead simple test."""
41 | generated_letter: typing.Final = random.choice(RUSSIAN_LETTERS)
42 | wannabe_user_input: typing.Final[str] = (
43 | faker_obj.text()
44 | .lower()
45 | .replace(
46 | generated_letter,
47 | random.choice(RUSSIAN_LETTERS.replace(generated_letter, "")),
48 | )
49 | )
50 | server_response: typing.Final = app_client.post(
51 | f"{SETTINGS.api_prefix}/check/",
52 | json=models.SpellCheckRequest(
53 | text=wannabe_user_input,
54 | language=RU_LANG,
55 | user_name=faker_obj.user_name(),
56 | ).model_dump(),
57 | )
58 | assert server_response.status_code == 200
59 |
60 |
61 | @pytest.mark.parametrize(
62 | ("wannabe_user_input", "tested_word"),
63 | [
64 | (BAD_PAYLOAD[0], "Капиталисиическая"),
65 | (BAD_PAYLOAD[1], "блохера"),
66 | ],
67 | )
68 | def test_with_exception_word_in_dictionary(
69 | monkeypatch: typing.Any,
70 | app_client: "TestClient",
71 | faker_obj: "faker.Faker",
72 | wannabe_user_input: str,
73 | tested_word: str,
74 | ) -> None:
75 | """Complex tests, where we add word to dictionary and tests that it really excluded from the output."""
76 | # replace all symbols from wannabe_user_input except letters and numbers
77 | monkeypatch.setattr(
78 | SETTINGS,
79 | "dictionaries_storage_provider",
80 | StorageProviders.FILE,
81 | )
82 |
83 | def run_request() -> typing.Any:
84 | return app_client.post(
85 | f"{SETTINGS.api_prefix}/check/",
86 | json=models.SpellCheckRequest(
87 | text=wannabe_user_input,
88 | language=RU_LANG,
89 | user_name=user_name,
90 | ).model_dump(),
91 | )
92 |
93 | def parse_words(server_response: RequestsResponse) -> typing.Any:
94 | return [item["word"] for item in server_response.json()["corrections"]]
95 |
96 | user_name: typing.Final[str] = faker_obj.user_name()
97 | # run usual check request
98 | server_response: RequestsResponse = run_request()
99 | assert tested_word in parse_words(server_response)
100 | # add word to user dictionary
101 | app_client.post(
102 | f"{SETTINGS.api_prefix}/dictionaries/",
103 | json=models.UserDictionaryRequestWithWord(
104 | user_name=user_name,
105 | exception_word=tested_word,
106 | ).model_dump(),
107 | )
108 | # and than check that excepted word not in the check output
109 | server_response = run_request()
110 | assert tested_word not in parse_words(server_response)
111 |
--------------------------------------------------------------------------------
/tests/test_various_views.py:
--------------------------------------------------------------------------------
1 | import typing
2 |
3 | import toml
4 |
5 | from whole_app.settings import PATH_TO_PYPROJECT, SETTINGS
6 |
7 |
8 | if typing.TYPE_CHECKING:
9 | from fastapi.testclient import TestClient
10 |
11 |
12 | def test_healthcheck_api_good(app_client: "TestClient") -> typing.Any:
13 | server_response = app_client.get(f"{SETTINGS.api_prefix}/health/")
14 | assert server_response.status_code == 200
15 | assert server_response.json()["version"] == toml.loads(PATH_TO_PYPROJECT.read_text())["project"]["version"]
16 |
--------------------------------------------------------------------------------
/whole_app/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xfenix/spellcheck-microservice/a12f35af9cf36ad85c6d9eaa1b6a7a284da7eea4/whole_app/__init__.py
--------------------------------------------------------------------------------
/whole_app/__main__.py:
--------------------------------------------------------------------------------
1 | """Application server here.
2 |
3 | This file meant only for basic workers wrappers and fastapi exposure.
4 | For end-points look in views.py
5 | """
6 |
7 | import typing
8 |
9 | from granian import Granian # type: ignore[attr-defined]
10 | from granian.constants import Interfaces
11 |
12 | from .settings import SETTINGS
13 |
14 |
15 | APPLICATION_TARGET: typing.Final[str] = "whole_app.views:SPELL_APP"
16 |
17 |
18 | def launch_server() -> None:
19 | Granian(
20 | APPLICATION_TARGET,
21 | address=SETTINGS.server_address,
22 | port=SETTINGS.port,
23 | workers=SETTINGS.workers,
24 | interface=Interfaces.ASGI,
25 | ).serve()
26 |
27 |
28 | if __name__ == "__main__":
29 | launch_server()
30 |
--------------------------------------------------------------------------------
/whole_app/auth.py:
--------------------------------------------------------------------------------
1 | import typing
2 |
3 | import fastapi
4 | from fastapi.security.api_key import APIKeyHeader
5 |
6 | from .settings import SETTINGS
7 |
8 |
9 | async def auth_via_api_key(
10 | user_provided_api_key: typing.Annotated[str, fastapi.Security(APIKeyHeader(name=SETTINGS.api_key_header_name))],
11 | ) -> str:
12 | if user_provided_api_key != SETTINGS.api_key:
13 | raise fastapi.HTTPException(
14 | status_code=401,
15 | detail="Could not validate api key",
16 | )
17 | return user_provided_api_key
18 |
--------------------------------------------------------------------------------
/whole_app/dictionaries/__init__.py:
--------------------------------------------------------------------------------
1 | import typing
2 |
3 | import structlog
4 |
5 | from . import dummy as dummy_storage
6 | from . import file as file_storage
7 | from . import protocol
8 | from whole_app.settings import SETTINGS, StorageProviders
9 |
10 |
11 | LOGGER_OBJ: typing.Final = structlog.get_logger()
12 |
13 |
14 | def init_storage() -> None:
15 | if SETTINGS.dictionaries_storage_provider == StorageProviders.FILE:
16 | file_storage.init_storage()
17 | elif SETTINGS.dictionaries_storage_provider == StorageProviders.DUMMY:
18 | LOGGER_OBJ.warning(
19 | "Storage provider set to dummy mode. "
20 | "Currently all user dictionary requests will be thrown away. We worn you.",
21 | )
22 |
23 |
24 | def prepare_storage_engine() -> protocol.UserDictProtocol:
25 | if SETTINGS.dictionaries_storage_provider == StorageProviders.FILE:
26 | return file_storage.FileProvider()
27 | return dummy_storage.DummyProvider()
28 |
--------------------------------------------------------------------------------
/whole_app/dictionaries/dummy.py:
--------------------------------------------------------------------------------
1 | """Dummy provider."""
2 |
3 |
4 | class DummyProvider:
5 | """Dummy provider for user dictionaries.
6 |
7 | In case if you want to use dcitionaries API, but don't want to do
8 | actual work.
9 | """
10 |
11 | def prepare(self: "DummyProvider", _: str) -> "DummyProvider":
12 | """Initialize class from user id."""
13 | return self
14 |
15 | async def save_record(self: "DummyProvider", _: str) -> None:
16 | """Save record to user dictionary."""
17 |
18 | async def remove_record(self: "DummyProvider", _: str) -> None:
19 | """Remove record from user dictionary."""
20 |
21 | async def fetch_records(self: "DummyProvider") -> list[str]:
22 | """Fetch records from user dictionary."""
23 | return []
24 |
--------------------------------------------------------------------------------
/whole_app/dictionaries/file.py:
--------------------------------------------------------------------------------
1 | import typing
2 |
3 | from anyio import Path as AsyncPath
4 |
5 | from whole_app.settings import SETTINGS
6 |
7 |
8 | def init_storage() -> None:
9 | SETTINGS.dictionaries_path.mkdir(
10 | parents=True,
11 | exist_ok=True,
12 | )
13 |
14 |
15 | class FileProvider:
16 | _user_dict_path: AsyncPath
17 |
18 | def prepare(self: "FileProvider", user_name: str) -> "FileProvider":
19 | self._user_dict_path = AsyncPath(SETTINGS.dictionaries_path / user_name)
20 | return self
21 |
22 | async def _store_lines(self: "FileProvider", lines: list[str]) -> None:
23 | await self._user_dict_path.write_text("\n".join(lines) + "\n")
24 |
25 | async def save_record(self: "FileProvider", exception_word: str) -> None:
26 | await self._user_dict_path.touch()
27 | clean_word: typing.Final = exception_word.strip().lower()
28 | file_content: typing.Final = await self.fetch_records()
29 | if clean_word not in file_content:
30 | file_content.append(clean_word)
31 | await self._store_lines(file_content)
32 |
33 | async def remove_record(self: "FileProvider", exception_word: str) -> None:
34 | file_content: typing.Final = await self.fetch_records()
35 | if exception_word in file_content:
36 | file_content.remove(exception_word)
37 | await self._store_lines(file_content)
38 |
39 | async def fetch_records(self: "FileProvider") -> list[str]:
40 | if await self._user_dict_path.exists():
41 | return [one_line.strip() for one_line in (await self._user_dict_path.read_text()).split("\n") if one_line]
42 | return []
43 |
--------------------------------------------------------------------------------
/whole_app/dictionaries/protocol.py:
--------------------------------------------------------------------------------
1 | import typing
2 |
3 |
4 | class UserDictProtocol(typing.Protocol):
5 | """Default storage protocol/interface."""
6 |
7 | def prepare(self: "UserDictProtocol", user_name: str) -> "UserDictProtocol":
8 | """Prepare class for user name."""
9 |
10 | async def save_record(self: "UserDictProtocol", exception_word: str) -> None:
11 | """Save record to user dictionary."""
12 |
13 | async def remove_record(self: "UserDictProtocol", exception_word: str) -> None:
14 | """Remove record from user dictionary."""
15 |
16 | async def fetch_records(self: "UserDictProtocol") -> list[str]:
17 | """Fetch records from user dictionary."""
18 |
--------------------------------------------------------------------------------
/whole_app/misc_helpers.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import typing
3 |
4 | import structlog
5 |
6 | from whole_app.settings import SETTINGS
7 |
8 |
9 | def init_logger() -> None:
10 | our_processors: typing.Final[typing.Any] = [
11 | structlog.contextvars.merge_contextvars,
12 | structlog.processors.add_log_level,
13 | structlog.processors.format_exc_info,
14 | structlog.processors.TimeStamper(fmt="iso", utc=True),
15 | ]
16 | if SETTINGS.structured_logging:
17 | our_processors.append(structlog.processors.JSONRenderer())
18 | structlog.configure(
19 | cache_logger_on_first_use=True,
20 | wrapper_class=structlog.make_filtering_bound_logger(logging.INFO),
21 | processors=our_processors,
22 | )
23 |
--------------------------------------------------------------------------------
/whole_app/models.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-member
2 | """Models for input/output."""
3 |
4 | import typing
5 |
6 | import pydantic
7 |
8 | from .settings import SETTINGS, AvailableLanguages, AvailableLanguagesType
9 |
10 |
11 | class OneCorrection(pydantic.BaseModel):
12 | first_position: int
13 | last_position: int
14 | word: str
15 | suggestions: set[str]
16 |
17 |
18 | class SpellCheckRequest(pydantic.BaseModel):
19 | text: str = pydantic.Field(..., examples=["Привед как дила"])
20 | language: AvailableLanguagesType
21 | user_name: str | None = pydantic.Field(
22 | None,
23 | examples=["username"],
24 | pattern=SETTINGS.username_regex,
25 | min_length=SETTINGS.username_min_length,
26 | max_length=SETTINGS.username_max_length,
27 | )
28 | exclude_urls: bool = True
29 |
30 |
31 | class SpellCheckResponse(pydantic.BaseModel):
32 | text: str
33 | language: str
34 | corrections: list[OneCorrection]
35 |
36 |
37 | class UserDictionaryRequest(pydantic.BaseModel):
38 | user_name: str = pydantic.Field(
39 | examples=["username"],
40 | pattern=SETTINGS.username_regex,
41 | min_length=SETTINGS.username_min_length,
42 | max_length=SETTINGS.username_max_length,
43 | )
44 |
45 |
46 | class UserDictionaryRequestWithWord(UserDictionaryRequest):
47 | exception_word: str = pydantic.Field(..., examples=["привед"])
48 |
49 |
50 | class HealthCheckResponse(pydantic.BaseModel):
51 | service_name: str = SETTINGS.service_name
52 | supported_languages: tuple[str, ...] = AvailableLanguages
53 | version: str = SETTINGS.current_version
54 | status: typing.Literal["ok", "notok"] = "ok"
55 |
--------------------------------------------------------------------------------
/whole_app/settings.py:
--------------------------------------------------------------------------------
1 | import enum
2 | import pathlib
3 | import typing
4 |
5 | import pydantic
6 | import structlog
7 | import toml
8 | import typing_extensions
9 | from pydantic import computed_field
10 | from pydantic_settings import BaseSettings
11 |
12 |
13 | LOGGER_OBJ: typing.Final = structlog.get_logger()
14 | PATH_TO_PYPROJECT: typing.Final = pathlib.Path(__file__).parent.parent / "pyproject.toml"
15 | AvailableLanguagesType = typing.Literal[
16 | "ru_RU",
17 | "en_US",
18 | "es_ES",
19 | "fr_FR",
20 | "de_DE",
21 | "pt_PT",
22 | ]
23 | AvailableLanguages: tuple[str, ...] = typing.get_args(AvailableLanguagesType)
24 |
25 |
26 | def _warn_about_poor_lru_cache_size(
27 | possible_value: int,
28 | ) -> int:
29 | if int(possible_value) < 1:
30 | LOGGER_OBJ.warning(
31 | ("You set cache size less then 1. In this case, the cache size will be unlimited and polute your memory."),
32 | )
33 | return 0
34 | return possible_value
35 |
36 |
37 | def _warn_about_empty_api_key(
38 | possible_value: str,
39 | ) -> str:
40 | if not possible_value:
41 | LOGGER_OBJ.warning("You set empty API key. This is not recommended.")
42 | return possible_value
43 |
44 |
45 | def _parse_version_from_local_file(
46 | default_value: str,
47 | ) -> str:
48 | try:
49 | pyproject_obj: typing.Final[dict[str, dict[str, object]]] = toml.loads(
50 | PATH_TO_PYPROJECT.read_text(),
51 | )
52 | return typing.cast("str", pyproject_obj["project"]["version"])
53 | except (toml.TomlDecodeError, KeyError, FileNotFoundError) as exc:
54 | LOGGER_OBJ.warning("Cant parse version from pyproject. Trouble %s", exc)
55 | return default_value
56 |
57 |
58 | class StorageProviders(enum.Enum):
59 | FILE = "file"
60 | DUMMY = "dummy"
61 |
62 |
63 | class SettingsOfMicroservice(BaseSettings):
64 | app_title: str = "Spellcheck API"
65 | service_name: str = "spellcheck-microservice"
66 | sentry_dsn: typing.Annotated[
67 | str,
68 | pydantic.Field(
69 | description="Sentry DSN for integration. Empty field disables integration",
70 | ),
71 | pydantic.StringConstraints(
72 | strip_whitespace=True,
73 | ),
74 | ] = ""
75 | api_key: typing.Annotated[
76 | str,
77 | pydantic.BeforeValidator(_warn_about_empty_api_key),
78 | pydantic.Field(
79 | description=(
80 | "define api key for users dictionaries mostly. "
81 | "Please, provide, if you want to enable user dictionaries API"
82 | ),
83 | ),
84 | ] = ""
85 | api_key_header_name: typing.Annotated[
86 | str,
87 | pydantic.StringConstraints(
88 | strip_whitespace=True,
89 | ),
90 | ] = "Api-Key"
91 | enable_cors: typing.Annotated[
92 | bool,
93 | pydantic.Field(
94 | description="enable CORS for all endpoints. In docker container this option is disabled",
95 | ),
96 | ] = True
97 | structured_logging: typing.Annotated[
98 | bool,
99 | pydantic.Field(
100 | description="enables structured (json) logging",
101 | ),
102 | ] = True
103 | workers: typing.Annotated[
104 | int,
105 | pydantic.Field(
106 | gt=0,
107 | lt=301,
108 | description=(
109 | "define application server workers count. "
110 | "If you plan to use k8s and only scale with replica sets, you might want to reduce this value to `1`"
111 | ),
112 | ),
113 | ] = 8
114 | server_address: typing.Annotated[
115 | str,
116 | pydantic.StringConstraints(
117 | strip_whitespace=True,
118 | ),
119 | pydantic.Field(
120 | description="binding address, default value suitable for docker",
121 | ),
122 | ] = "0.0.0.0" # noqa: S104
123 | port: typing.Annotated[
124 | int,
125 | pydantic.Field(
126 | gt=1_023,
127 | lt=65_536,
128 | description="binding port",
129 | ),
130 | ] = 10_113
131 | cache_size: typing.Annotated[
132 | int,
133 | pydantic.BeforeValidator(_warn_about_poor_lru_cache_size),
134 | pydantic.Field(
135 | description=(
136 | "define LRU cache size for misspelled word/suggestions cache. "
137 | "Any value less than `1` makes the cache size unlimited, so be careful with this option"
138 | ),
139 | ),
140 | ] = 10_000
141 | api_prefix: typing.Annotated[
142 | str,
143 | pydantic.StringConstraints(
144 | strip_whitespace=True,
145 | ),
146 | pydantic.BeforeValidator(
147 | lambda possible_value: f"/{possible_value.strip('/')}",
148 | ),
149 | pydantic.Field(description="define all API's URL prefix"),
150 | ] = "/api/"
151 | docs_url: typing.Annotated[
152 | str,
153 | pydantic.StringConstraints(
154 | strip_whitespace=True,
155 | ),
156 | pydantic.Field(
157 | description="define documentation (swagger) URL prefix",
158 | ),
159 | ] = "/docs/"
160 | max_suggestions: typing.Annotated[
161 | int,
162 | pydantic.Field(
163 | ge=0,
164 | description="defines how many maximum suggestions for each word will be available. 0 means unlimitied",
165 | ),
166 | ] = 0
167 | dictionaries_path: typing.Annotated[
168 | pathlib.Path,
169 | pydantic.Field(
170 | description=(
171 | "define directory where user dicts is stored. "
172 | "This is inner directory in the docker image, please map it to volume as it "
173 | "shown in the quickstart part of this readme"
174 | ),
175 | ),
176 | ] = pathlib.Path("/data/")
177 | dictionaries_storage_provider: typing.Annotated[
178 | StorageProviders,
179 | pydantic.Field(
180 | description="define wich engine will store user dictionaries",
181 | ),
182 | ] = StorageProviders.FILE
183 | dictionaries_disabled: typing.Annotated[
184 | bool,
185 | pydantic.Field(
186 | description="switches off user dictionaries API no matter what",
187 | ),
188 | ] = False
189 | current_version: typing.Annotated[
190 | str,
191 | pydantic.BeforeValidator(_parse_version_from_local_file),
192 | ] = ""
193 | username_min_length: typing.Annotated[
194 | int,
195 | pydantic.Field(
196 | description="minimum length of username",
197 | ),
198 | ] = 3
199 | username_max_length: typing.Annotated[
200 | int,
201 | pydantic.Field(
202 | description="maximum length of username",
203 | ),
204 | ] = 60
205 | username_regex: str = r"^[a-zA-Z0-9-_]*$"
206 | exclusion_words_str: typing.Annotated[
207 | str,
208 | pydantic.Field(
209 | description="String with list of words which will be ignored in /api/check endpoint each request. "
210 | "Example: `'foo, bar'`"
211 | ),
212 | ] = ""
213 | _exclusion_words_set: typing.Annotated[
214 | set[str],
215 | pydantic.Field(
216 | description="""set of words which will ignored by default(filled from exclusion_words_str).
217 | Example: `'["foo", "bar"]'` """,
218 | ),
219 | ] = set()
220 |
221 | @computed_field
222 | def exclusion_words_set(self) -> set[str]:
223 | return self._exclusion_words_set
224 |
225 | @pydantic.model_validator(mode="after")
226 | def _assemble_exclusion_words_set(self) -> typing_extensions.Self:
227 | self._exclusion_words_set = {
228 | one_word.strip().lower() for one_word in self.exclusion_words_str.split(",") if one_word
229 | }
230 | return self
231 |
232 | class Config:
233 | env_prefix: str = "spellcheck_"
234 |
235 |
236 | SETTINGS: SettingsOfMicroservice = SettingsOfMicroservice()
237 |
--------------------------------------------------------------------------------
/whole_app/spell.py:
--------------------------------------------------------------------------------
1 | import re
2 | import typing
3 |
4 | import cachebox
5 | import urlextract
6 | from enchant.checker import SpellChecker
7 |
8 | from . import models
9 | from .settings import SETTINGS
10 |
11 |
12 | _MISSPELED_CACHE: typing.Final[
13 | cachebox.LRUCache[str, list[str]] | dict[str, list[str]]
14 | ] = (
15 | cachebox.LRUCache[str, list[str]](SETTINGS.cache_size)
16 | if SETTINGS.cache_size > 0
17 | else typing.cast("dict[str, list[str]]", {})
18 | )
19 |
20 | SEPARATORS_TO_SPLIT_URL_BY_WORDS: typing.Final[re.Pattern[str]] = re.compile(r"\.|\:|\/\/|\/|\?|\&|\=|\+|\#|\-")
21 |
22 |
23 | class SpellCheckService:
24 | __slots__ = ("_exclusion_words", "_input_text", "_spellcheck_engine")
25 | _input_text: str
26 | _spellcheck_engine: SpellChecker
27 | _exclusion_words: list[str]
28 | _url_extractor: urlextract.URLExtract = urlextract.URLExtract()
29 |
30 | def prepare(
31 | self: "SpellCheckService",
32 | request_payload: models.SpellCheckRequest,
33 | exclusion_words: list[str] | None = None,
34 | ) -> "SpellCheckService":
35 | """Initialize machinery."""
36 | self._input_text = request_payload.text
37 | self._exclusion_words = exclusion_words if exclusion_words else []
38 | self._exclusion_words.extend(typing.cast("set[str]", SETTINGS.exclusion_words_set))
39 |
40 | if request_payload.exclude_urls:
41 | for one_url in self._url_extractor.find_urls(self._input_text):
42 | self._exclusion_words.extend(
43 | {one_word.lower() for one_word in re.split(SEPARATORS_TO_SPLIT_URL_BY_WORDS, one_url)}
44 | )
45 | self._spellcheck_engine = SpellChecker(request_payload.language)
46 | return self
47 |
48 | @staticmethod
49 | def get_memorized_suggestions(word_spellcheck_result: SpellChecker) -> list[str]:
50 | misspelled_suggestions: list[str]
51 | if word_spellcheck_result.word in _MISSPELED_CACHE:
52 | misspelled_suggestions = _MISSPELED_CACHE[word_spellcheck_result.word]
53 | else:
54 | misspelled_suggestions = word_spellcheck_result.suggest()
55 | _MISSPELED_CACHE[word_spellcheck_result.word] = misspelled_suggestions
56 | return (
57 | misspelled_suggestions[: SETTINGS.max_suggestions]
58 | if SETTINGS.max_suggestions > 0
59 | else misspelled_suggestions
60 | )
61 |
62 | def run_check(self: "SpellCheckService") -> list[models.OneCorrection]:
63 | corrections_output: list[models.OneCorrection] = []
64 | self._spellcheck_engine.set_text(self._input_text)
65 | for one_result in self._spellcheck_engine:
66 | if one_result.word.lower() in self._exclusion_words:
67 | continue
68 | corrections_output.append(
69 | models.OneCorrection(
70 | first_position=one_result.wordpos,
71 | last_position=one_result.wordpos + len(one_result.word),
72 | word=one_result.word,
73 | suggestions=self.get_memorized_suggestions(one_result),
74 | ),
75 | )
76 | return corrections_output
77 |
--------------------------------------------------------------------------------
/whole_app/views.py:
--------------------------------------------------------------------------------
1 | import typing
2 |
3 | import fastapi
4 | import structlog
5 | from anyio import to_thread
6 |
7 | from . import dictionaries, misc_helpers, models, spell
8 | from .auth import auth_via_api_key
9 | from .dictionaries.protocol import UserDictProtocol
10 | from .settings import SETTINGS
11 |
12 |
13 | LOGGER_OBJ: typing.Final = structlog.get_logger()
14 | SPELL_APP: typing.Final = fastapi.FastAPI(
15 | title=SETTINGS.app_title,
16 | version=SETTINGS.current_version,
17 | docs_url=SETTINGS.docs_url,
18 | openapi_url=f"{SETTINGS.api_prefix}/openapi.json",
19 | )
20 | if SETTINGS.enable_cors:
21 | from fastapi.middleware.cors import CORSMiddleware
22 |
23 | SPELL_APP.add_middleware(
24 | CORSMiddleware,
25 | allow_origins=("*",),
26 | allow_credentials=True,
27 | allow_methods=["*"],
28 | allow_headers=["*"],
29 | )
30 | if SETTINGS.sentry_dsn:
31 | import sentry_sdk
32 | from sentry_sdk.integrations.asgi import SentryAsgiMiddleware
33 |
34 | sentry_sdk.init(dsn=SETTINGS.sentry_dsn)
35 | SPELL_APP.add_middleware(SentryAsgiMiddleware)
36 |
37 |
38 | @SPELL_APP.on_event("startup")
39 | def startup() -> None:
40 | dictionaries.init_storage()
41 | misc_helpers.init_logger()
42 | LOGGER_OBJ.info("Current settings: %s", SETTINGS)
43 |
44 |
45 | @SPELL_APP.post(f"{SETTINGS.api_prefix}/check/", summary="Check spelling")
46 | async def spell_check_main_endpoint(
47 | request_payload: models.SpellCheckRequest,
48 | spell_service: typing.Annotated[
49 | spell.SpellCheckService,
50 | fastapi.Depends(spell.SpellCheckService),
51 | ],
52 | storage_engine: typing.Annotated[
53 | UserDictProtocol,
54 | fastapi.Depends(dictionaries.prepare_storage_engine),
55 | ],
56 | ) -> models.SpellCheckResponse:
57 | """Check spelling of text for exact language."""
58 | exclusion_words: list[str] = []
59 | if request_payload.user_name and not SETTINGS.dictionaries_disabled:
60 | exclusion_words = await storage_engine.prepare(
61 | request_payload.user_name,
62 | ).fetch_records()
63 | return models.SpellCheckResponse(
64 | **request_payload.model_dump(),
65 | corrections=await to_thread.run_sync(
66 | spell_service.prepare(request_payload, exclusion_words).run_check,
67 | ),
68 | )
69 |
70 |
71 | @SPELL_APP.get(f"{SETTINGS.api_prefix}/health/", summary="Regular healthcheck api")
72 | async def check_health_of_service() -> models.HealthCheckResponse:
73 | """Check health of service."""
74 | return models.HealthCheckResponse()
75 |
76 |
77 | if not SETTINGS.dictionaries_disabled:
78 |
79 | @SPELL_APP.post(
80 | f"{SETTINGS.api_prefix}/dictionaries/",
81 | summary="Add word to user dictionary",
82 | status_code=201,
83 | )
84 | async def save_word(
85 | request_model: models.UserDictionaryRequestWithWord,
86 | storage_engine: typing.Annotated[
87 | UserDictProtocol,
88 | fastapi.Depends(dictionaries.prepare_storage_engine),
89 | ],
90 | _: typing.Annotated[str, fastapi.Depends(auth_via_api_key)],
91 | ) -> bool:
92 | """Save word to user dictionary."""
93 | await storage_engine.prepare(request_model.user_name).save_record(
94 | request_model.exception_word,
95 | )
96 | return True
97 |
98 | @SPELL_APP.delete(
99 | f"{SETTINGS.api_prefix}/dictionaries/",
100 | summary="Remove word from user dictionary",
101 | )
102 | async def delete_word(
103 | request_model: models.UserDictionaryRequestWithWord,
104 | storage_engine: typing.Annotated[
105 | UserDictProtocol,
106 | fastapi.Depends(dictionaries.prepare_storage_engine),
107 | ],
108 | _: typing.Annotated[str, fastapi.Depends(auth_via_api_key)],
109 | ) -> bool:
110 | """Save word to user dictionary."""
111 | await storage_engine.prepare(request_model.user_name).remove_record(
112 | request_model.exception_word,
113 | )
114 | return True
115 |
--------------------------------------------------------------------------------