├── tests
├── data
│ └── warning_list.txt
├── conftest.py
├── __init__.py
├── test_tools.py
└── check_warnings.py
├── docs
├── usage.rst
├── contribute.rst
├── _static
│ ├── custom.css
│ └── custom-icon.js
├── _template
│ └── pypackage-credit.html
├── index.rst
└── conf.py
├── codecov.yml
├── slangweb
├── py.typed
├── __init__.py
├── constants.py
├── tools.py
├── translator.py
└── cli.py
├── .readthedocs.yaml
├── .devcontainer
└── devcontainer.json
├── CITATION.cff
├── .copier-answers.yml
├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ ├── PULL_REQUEST_TEMPLATE
│ │ └── pr_template.md
│ └── feature_request.md
└── workflows
│ ├── release.yaml
│ ├── pypackage_check.yaml
│ └── unit.yaml
├── AUTHORS.rst
├── LICENSE
├── .pre-commit-config.yaml
├── .gitignore
├── pyproject.toml
├── noxfile.py
├── CONTRIBUTING.rst
├── CODE_OF_CONDUCT.rst
└── README.md
/tests/data/warning_list.txt:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | """Pytest session configuration."""
2 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """make test folder a package for coverage."""
2 |
--------------------------------------------------------------------------------
/docs/usage.rst:
--------------------------------------------------------------------------------
1 | Usage
2 | =====
3 |
4 | **Slang Web** usage documentation.
5 |
--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | # disable the treemap comment and report in PRs
2 | comment: false
3 |
--------------------------------------------------------------------------------
/slangweb/py.typed:
--------------------------------------------------------------------------------
1 | # Marker file for PEP 561. The mypy package uses inline types.
--------------------------------------------------------------------------------
/docs/contribute.rst:
--------------------------------------------------------------------------------
1 | Contribute
2 | ==========
3 |
4 | .. include:: ../CONTRIBUTING.rst
5 | :start-line: 3
6 |
--------------------------------------------------------------------------------
/docs/_static/custom.css:
--------------------------------------------------------------------------------
1 | /* add dollar sign in console code-block */
2 | div.highlight-console pre span.go::before {
3 | content: "$";
4 | margin-right: 10px;
5 | margin-left: 5px;
6 | }
7 |
--------------------------------------------------------------------------------
/docs/_template/pypackage-credit.html:
--------------------------------------------------------------------------------
1 |
2 | From
3 | @12rambau/pypackage
4 | 0.1.18 Copier project.
5 |
6 |
--------------------------------------------------------------------------------
/slangweb/__init__.py:
--------------------------------------------------------------------------------
1 | """The init file of the package."""
2 |
3 | __version__ = "0.0.0"
4 | __author__ = "Rodrigo Esteban Principe"
5 | __email__ = "fitoprincipe82@gmail.com"
6 |
7 | from .translator import Translator # noqa: F401
8 |
--------------------------------------------------------------------------------
/slangweb/constants.py:
--------------------------------------------------------------------------------
1 | """Constant variables."""
2 |
3 | DEFAULT_LANGUAGE = "en"
4 | ENCODING = "utf-8"
5 | SLANG_FOLDER = "slangweb"
6 | MODELS_LOOKUP_FILE = "models_lookup.json"
7 | MODELS_FOLDER = "models"
8 | LOOKUPS_FOLDER = "lookups"
9 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
2 |
3 | version: 2
4 |
5 | build:
6 | os: ubuntu-22.04
7 | tools:
8 | python: "3.10"
9 |
10 | sphinx:
11 | configuration: docs/conf.py
12 |
13 | python:
14 | install:
15 | - method: pip
16 | path: .
17 | extra_requirements:
18 | - doc
19 |
--------------------------------------------------------------------------------
/tests/test_tools.py:
--------------------------------------------------------------------------------
1 | """Test the tools module."""
2 |
3 | from slangweb import tools
4 |
5 |
6 | def test_get_model_folder():
7 | """Test get_model_folder function."""
8 | model_name = "Helsinki-NLP/opus-mt-en-ROMANCE"
9 | expected_folder = "models--Helsinki-NLP--opus-mt-en-ROMANCE"
10 | assert tools.get_model_folder(model_name) == expected_folder
11 |
--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "Python 3",
3 | "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
4 | "features": {
5 | "ghcr.io/devcontainers-extra/features/nox:2": {},
6 | "ghcr.io/devcontainers-extra/features/pre-commit:2": {}
7 | },
8 | "postCreateCommand": "python -m pip install commitizen uv && pre-commit install"
9 | }
10 |
--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: "1.2.0"
2 | message: "If you use this software, please cite it as below."
3 | authors:
4 | - family-names: "Principe"
5 | given-names: "Rodrigo Esteban"
6 | orcid: "https://orcid.org/0000-0000-0000-0000"
7 | title: "Slang Web"
8 | version: "0.0.0"
9 | doi: ""
10 | date-released: "2025-12-14"
11 | url: "https://github.com/fitoprincipe/slangweb"
12 |
--------------------------------------------------------------------------------
/.copier-answers.yml:
--------------------------------------------------------------------------------
1 | # Changes here will be overwritten by Copier
2 | _commit: 0.1.18
3 | _src_path: gh:12rambau/pypackage
4 | author_email: fitoprincipe82@gmail.com
5 | author_first_name: Rodrigo Esteban
6 | author_last_name: Principe
7 | author_orcid: 0000-0000-0000-0000
8 | creation_year: "2025"
9 | github_repo_name: slangweb
10 | github_user: fitoprincipe
11 | project_name: Slang Web
12 | project_slug: slangweb
13 | short_description: Simple Language Support for Web Development
14 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: ""
5 | labels: ""
6 | assignees: ""
7 | ---
8 |
9 | **Describe the bug**
10 | A clear and concise description of what the bug is.
11 |
12 | **To Reproduce**
13 | Steps to reproduce the behavior:
14 |
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 |
20 | **Screenshots**
21 | If applicable, add screenshots to help explain your problem.
22 |
23 | **Additional context**
24 | Add any other context about the problem here.
25 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/PULL_REQUEST_TEMPLATE/pr_template.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Pull request template
3 | about: Create a pull request
4 | title: ""
5 | labels: ""
6 | assignees: ""
7 | ---
8 |
9 | ## reference the related issue
10 |
11 | PR should answer problem stated in the issue tracker. please open one before starting a PR
12 |
13 | ## description of the changes
14 |
15 | Describe the changes you propose
16 |
17 | ## mention
18 |
19 | @mentions of the person or team responsible for reviewing proposed changes
20 |
21 | ## comments
22 |
23 | any other comments we should pay attention to
24 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ""
5 | labels: ""
6 | assignees: ""
7 | ---
8 |
9 | **Is your feature request related to a problem? Please describe.**
10 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
11 |
12 | **Describe the solution you'd like**
13 | A clear and concise description of what you want to happen.
14 |
15 | **Describe alternatives you've considered**
16 | A clear and concise description of any alternative solutions or features you've considered.
17 |
18 | **Additional context**
19 | Add any other context or screenshots about the feature request here.
20 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | :html_theme.sidebar_secondary.remove:
2 |
3 |
4 | Slang Web
5 | =========
6 |
7 | .. toctree::
8 | :hidden:
9 |
10 | usage
11 | contribute
12 |
13 | Documentation contents
14 | ----------------------
15 |
16 | The documentation contains 3 main sections:
17 |
18 | .. grid:: 1 2 3 3
19 |
20 | .. grid-item::
21 |
22 | .. card:: Usage
23 | :link: usage.html
24 |
25 | Usage and installation
26 |
27 | .. grid-item::
28 |
29 | .. card:: Contribute
30 | :link: contribute.html
31 |
32 | Help us improve the lib.
33 |
34 | .. grid-item::
35 |
36 | .. card:: API
37 | :link: autoapi/index.html
38 |
39 | Discover the lib API.
40 |
--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
1 | name: Upload Python Package
2 |
3 | on:
4 | release:
5 | types: [created]
6 |
7 | env:
8 | PIP_ROOT_USER_ACTION: ignore
9 |
10 | jobs:
11 | tests:
12 | uses: ./.github/workflows/unit.yaml
13 |
14 | deploy:
15 | needs: [tests]
16 | runs-on: ubuntu-latest
17 | steps:
18 | - uses: actions/checkout@v5
19 | - uses: actions/setup-python@v5
20 | with:
21 | python-version: "3.11"
22 | - name: Install dependencies
23 | run: pip install twine build nox[uv]
24 | - name: update citation date
25 | run: nox -s release-date
26 | - name: Build and publish
27 | env:
28 | TWINE_USERNAME: __token__
29 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
30 | run: python -m build && twine upload dist/*
31 |
--------------------------------------------------------------------------------
/AUTHORS.rst:
--------------------------------------------------------------------------------
1 | Thanks goes to these wonderful people (`emoji key `_):
2 |
3 | .. raw:: html
4 |
5 |
18 |
19 | This project follows the `all-contributors `_ specification.
20 |
21 | Contributions of any kind are welcome!
22 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 Rodrigo Esteban Principe
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | default_install_hook_types: [pre-commit, commit-msg]
2 |
3 | repos:
4 | - repo: "https://github.com/commitizen-tools/commitizen"
5 | rev: "v2.18.0"
6 | hooks:
7 | - id: commitizen
8 | stages: [commit-msg]
9 |
10 | - repo: "https://github.com/kynan/nbstripout"
11 | rev: "0.5.0"
12 | hooks:
13 | - id: nbstripout
14 | stages: [pre-commit]
15 |
16 | - repo: "https://github.com/pycontribs/mirrors-prettier"
17 | rev: "v3.4.2"
18 | hooks:
19 | - id: prettier
20 | stages: [pre-commit]
21 | exclude: tests\/test_.+\.
22 |
23 | - repo: https://github.com/charliermarsh/ruff-pre-commit
24 | rev: "v0.7.0"
25 | hooks:
26 | - id: ruff
27 | stages: [pre-commit]
28 | - id: ruff-format
29 | stages: [pre-commit]
30 |
31 | - repo: https://github.com/sphinx-contrib/sphinx-lint
32 | rev: "v1.0.0"
33 | hooks:
34 | - id: sphinx-lint
35 | stages: [pre-commit]
36 |
37 | - repo: https://github.com/codespell-project/codespell
38 | rev: v2.2.4
39 | hooks:
40 | - id: codespell
41 | stages: [pre-commit]
42 | additional_dependencies:
43 | - tomli
44 |
45 | # Prevent committing inline conflict markers
46 | - repo: https://github.com/pre-commit/pre-commit-hooks
47 | rev: v4.3.0
48 | hooks:
49 | - id: check-merge-conflict
50 | stages: [pre-commit]
51 | args: [--assume-in-merge]
52 |
53 | # - repo: local
54 | # hooks:
55 | # - id: generate-lookups
56 | # name: Generate Lookups
57 | # entry: python tools/generate_lookups.py
58 | # language: system
59 | # pass_filenames: false
60 |
--------------------------------------------------------------------------------
/tests/check_warnings.py:
--------------------------------------------------------------------------------
1 | """Check the warnings from doc builds."""
2 |
3 | import sys
4 | from pathlib import Path
5 |
6 |
7 | def check_warnings(file: Path) -> int:
8 | """Check the list of warnings produced by the CI tests.
9 |
10 | Raises errors if there are unexpected ones and/or if some are missing.
11 |
12 | Args:
13 | file: the path to the generated warning.txt file from
14 | the CI build
15 |
16 | Returns:
17 | 0 if the warnings are all there
18 | 1 if some warning are not registered or unexpected
19 | """
20 | # print some log
21 | print("\n=== Sphinx Warnings test ===\n")
22 |
23 | # find the file where all the known warnings are stored
24 | warning_file = Path(__file__).parent / "data" / "warning_list.txt"
25 |
26 | test_warnings = file.read_text().strip().split("\n")
27 | ref_warnings = warning_file.read_text().strip().split("\n")
28 |
29 | print(
30 | f'Checking build warnings in file: "{file}" and comparing to expected '
31 | f'warnings defined in "{warning_file}"\n\n'
32 | )
33 |
34 | # find all the missing warnings
35 | missing_warnings = []
36 | for wa in ref_warnings:
37 | index = [i for i, twa in enumerate(test_warnings) if wa in twa]
38 | if len(index) == 0:
39 | missing_warnings += [wa]
40 | print(f"Warning was not raised: {wa}")
41 | else:
42 | test_warnings.pop(index[0])
43 |
44 | # the remaining one are unexpected
45 | for twa in test_warnings:
46 | print(f"Unexpected warning: {twa}")
47 |
48 | # delete the tmp warnings file
49 | file.unlink()
50 |
51 | return len(missing_warnings) != 0 or len(test_warnings) != 0
52 |
53 |
54 | if __name__ == "__main__":
55 | # cast the file to path and resolve to an absolute one
56 | file = Path.cwd() / "warnings.txt"
57 |
58 | # execute the test
59 | sys.exit(check_warnings(file))
60 |
--------------------------------------------------------------------------------
/.github/workflows/pypackage_check.yaml:
--------------------------------------------------------------------------------
1 | name: template update check
2 |
3 | on:
4 | workflow_dispatch:
5 |
6 | env:
7 | PIP_ROOT_USER_ACTION: ignore
8 |
9 | jobs:
10 | check_version:
11 | runs-on: ubuntu-latest
12 | steps:
13 | - uses: actions/checkout@v5
14 | - uses: actions/setup-python@v5
15 | with:
16 | python-version: "3.10"
17 | - name: install dependencies
18 | run: pip install requests
19 | - name: get latest pypackage release
20 | id: get_latest_release
21 | run: |
22 | RELEASE=$(curl -s https://api.github.com/repos/12rambau/pypackage/releases | jq -r '.[0].tag_name')
23 | echo "latest=$RELEASE" >> $GITHUB_OUTPUT
24 | echo "latest release: $RELEASE"
25 | - name: get current pypackage version
26 | id: get_current_version
27 | run: |
28 | RELEASE=$(yq -r "._commit" .copier-answers.yml)
29 | echo "current=$RELEASE" >> $GITHUB_OUTPUT
30 | echo "current release: $RELEASE"
31 | - name: open issue
32 | if: steps.get_current_version.outputs.current != steps.get_latest_release.outputs.latest
33 | uses: rishabhgupta/git-action-issue@v2
34 | with:
35 | token: ${{ secrets.GITHUB_TOKEN }}
36 | title: "Update template to ${{ steps.get_latest_release.outputs.latest }}"
37 | body: |
38 | The package is based on the ${{ steps.get_current_version.outputs.current }} version of [@12rambau/pypackage](https://github.com/12rambau/pypackage).
39 |
40 | The latest version of the template is ${{ steps.get_latest_release.outputs.latest }}.
41 |
42 | Please consider updating the template to the latest version to include all the latest developments.
43 |
44 | Run the following code in your project directory to update the template:
45 |
46 | ```
47 | copier update --trust --defaults --vcs-ref ${{ steps.get_latest_release.outputs.latest }}
48 | ```
49 |
50 | > **Note**
51 | > You may need to reinstall ``copier`` and ``jinja2-time`` if they are not available in your environment.
52 |
53 | After solving the merging issues you can push back the changes to your main branch.
54 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | """Configuration file for the Sphinx documentation builder.
2 |
3 | This file only contains a selection of the most common options. For a full
4 | list see the documentation:
5 | https://www.sphinx-doc.org/en/master/usage/configuration.html
6 | """
7 |
8 | # -- Path setup ----------------------------------------------------------------
9 | from datetime import datetime
10 |
11 | # -- Project information -------------------------------------------------------
12 | project = "Slang Web"
13 | author = "Rodrigo Esteban Principe"
14 | copyright = f"2025-{datetime.now().year}, {author}"
15 | release = "0.0.0"
16 |
17 | # -- General configuration -----------------------------------------------------
18 | extensions = [
19 | "sphinx_copybutton",
20 | "sphinx.ext.napoleon",
21 | "sphinx.ext.viewcode",
22 | "sphinx.ext.intersphinx",
23 | "sphinx_design",
24 | "autoapi.extension",
25 | ]
26 | exclude_patterns = ["**.ipynb_checkpoints"]
27 | templates_path = ["_template"]
28 |
29 | # -- Options for HTML output ---------------------------------------------------
30 | html_theme = "pydata_sphinx_theme"
31 | html_static_path = ["_static"]
32 | html_theme_options = {
33 | "logo": {
34 | "text": project,
35 | },
36 | "use_edit_page_button": True,
37 | "footer_end": ["theme-version", "pypackage-credit"],
38 | "icon_links": [
39 | {
40 | "name": "GitHub",
41 | "url": "https://github.com/fitoprincipe/slangweb",
42 | "icon": "fa-brands fa-github",
43 | },
44 | {
45 | "name": "Pypi",
46 | "url": "https://pypi.org/project/slangweb/",
47 | "icon": "fa-brands fa-python",
48 | },
49 | {
50 | "name": "Conda",
51 | "url": "https://anaconda.org/conda-forge/slangweb",
52 | "icon": "fa-custom fa-conda",
53 | "type": "fontawesome",
54 | },
55 | ],
56 | }
57 | html_context = {
58 | "github_user": "fitoprincipe",
59 | "github_repo": "slangweb",
60 | "github_version": "",
61 | "doc_path": "docs",
62 | }
63 | html_css_files = ["custom.css"]
64 |
65 | # -- Options for autosummary/autodoc output ------------------------------------
66 | autodoc_typehints = "description"
67 | autoapi_dirs = ["../slangweb"]
68 | autoapi_python_class_content = "init"
69 | autoapi_member_order = "groupwise"
70 |
71 | # -- Options for intersphinx output --------------------------------------------
72 | intersphinx_mapping = {}
73 |
--------------------------------------------------------------------------------
/slangweb/tools.py:
--------------------------------------------------------------------------------
1 | """Utility functions for slangweb."""
2 |
3 | import ast
4 | import json
5 | import logging
6 | from pathlib import Path
7 |
8 | logger = logging.getLogger(__name__)
9 |
10 |
11 | def get_model_folder(model_name: str) -> str:
12 | """Get the name of the model folder for the given model name."""
13 | return f"models--{model_name.replace('/', '--')}"
14 |
15 |
16 | def available_languages(models_lookup_file: Path, models_folder: Path) -> dict[str, str]:
17 | """Return a list of available languages based on existing lookup files and model existence."""
18 | if not models_lookup_file.exists():
19 | logger.error(
20 | f"Models lookup file '{models_lookup_file}' does not exist. Create it by running 'slangweb generate-models-lookup-file'."
21 | )
22 | return {}
23 | with open(models_lookup_file, "r", encoding="utf-8") as f:
24 | models_lookup = json.load(f)
25 | languages = []
26 | lang_expanded = []
27 | for language, data in models_lookup.items():
28 | file = data.get("model")
29 | if not file:
30 | continue
31 | lang_expanded.append(data.get("name", language))
32 | model_folder = get_model_folder(file)
33 | model_path = models_folder / model_folder
34 | if model_path.exists() and model_path.is_dir():
35 | languages.append(language)
36 | return dict(zip(languages, lang_expanded))
37 |
38 |
39 | def find_translator_usages(py_file: Path, translator_class: str = "SW") -> list[str]:
40 | """Find usages of the Translator class in the given Python file.
41 |
42 | Args:
43 | py_file (Path): Path to the Python file to analyze.
44 | translator_class (str): Name of the translator class to look for. Default is "SW".
45 | """
46 | with open(py_file, "r", encoding="utf-8") as f:
47 | tree = ast.parse(f.read(), filename=py_file)
48 | usages = []
49 | for node in ast.walk(tree):
50 | if (
51 | isinstance(node, ast.Call)
52 | and hasattr(node.func, "id")
53 | and node.func.id == translator_class
54 | ):
55 | if node.args:
56 | arg = node.args[0]
57 | if isinstance(arg, ast.Str):
58 | usages.append(str(arg.s))
59 | elif isinstance(arg, ast.Name):
60 | usages.append(str(arg.id))
61 | else:
62 | usages.append(str(ast.dump(arg)))
63 | return usages
64 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 | .ruff_cache/
54 |
55 | # Translations
56 | *.mo
57 | *.pot
58 |
59 | # Django stuff:
60 | *.log
61 | local_settings.py
62 | db.sqlite3
63 | db.sqlite3-journal
64 |
65 | # Flask stuff:
66 | instance/
67 | .webassets-cache
68 |
69 | # Scrapy stuff:
70 | .scrapy
71 |
72 | # Sphinx documentation
73 | docs/_build/
74 | docs/api/
75 |
76 | # PyBuilder
77 | target/
78 |
79 | # Jupyter Notebook
80 | .ipynb_checkpoints
81 |
82 | # IPython
83 | profile_default/
84 | ipython_config.py
85 |
86 | # pyenv
87 | .python-version
88 |
89 | # pipenv
90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
93 | # install all needed dependencies.
94 | #Pipfile.lock
95 |
96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
97 | __pypackages__/
98 |
99 | # Celery stuff
100 | celerybeat-schedule
101 | celerybeat.pid
102 |
103 | # SageMath parsed files
104 | *.sage.py
105 |
106 | # Environments
107 | .env
108 | .venv
109 | env/
110 | venv/
111 | ENV/
112 | env.bak/
113 | venv.bak/
114 |
115 | # Spyder project settings
116 | .spyderproject
117 | .spyproject
118 |
119 | # Rope project settings
120 | .ropeproject
121 |
122 | # mkdocs documentation
123 | /site
124 |
125 | # mypy
126 | .mypy_cache/
127 | .dmypy.json
128 | dmypy.json
129 |
130 | # Pyre type checker
131 | .pyre/
132 |
133 | # system IDE
134 | .vscode/
135 |
136 | # image tmp file
137 | *Zone.Identifier
138 |
139 | # debugging notebooks
140 | test.ipynb
141 |
--------------------------------------------------------------------------------
/.github/workflows/unit.yaml:
--------------------------------------------------------------------------------
1 | name: Unit tests
2 |
3 | on:
4 | workflow_call:
5 | push:
6 | branches:
7 | - main
8 | pull_request:
9 |
10 | env:
11 | FORCE_COLOR: 1
12 | PIP_ROOT_USER_ACTION: ignore
13 |
14 | jobs:
15 | lint:
16 | runs-on: ubuntu-latest
17 | steps:
18 | - uses: actions/checkout@v5
19 | - uses: actions/setup-python@v5
20 | with:
21 | python-version: "3.11"
22 | - uses: pre-commit/action@v3.0.0
23 |
24 | mypy:
25 | runs-on: ubuntu-latest
26 | steps:
27 | - uses: actions/checkout@v5
28 | - uses: actions/setup-python@v5
29 | with:
30 | python-version: "3.11"
31 | - name: Install nox
32 | run: pip install nox[uv]
33 | - name: run mypy checks
34 | run: nox -s mypy
35 |
36 | docs:
37 | needs: [lint, mypy]
38 | runs-on: ubuntu-latest
39 | steps:
40 | - uses: actions/checkout@v5
41 | - uses: actions/setup-python@v5
42 | with:
43 | python-version: "3.11"
44 | - name: Install nox
45 | run: pip install nox[uv]
46 | - name: build static docs
47 | run: nox -s docs
48 |
49 | build:
50 | needs: [lint, mypy]
51 | strategy:
52 | fail-fast: true
53 | matrix:
54 | os: [ubuntu-latest]
55 | python-version: ["3.10", "3.11"]
56 | include:
57 | - os: macos-latest # macos test
58 | python-version: "3.11"
59 | - os: windows-latest # windows test
60 | python-version: "3.11"
61 | runs-on: ${{ matrix.os }}
62 | steps:
63 | - uses: actions/checkout@v5
64 | - name: Set up Python ${{ matrix.python-version }}
65 | uses: actions/setup-python@v5
66 | with:
67 | python-version: ${{ matrix.python-version }}
68 | - name: Install nox
69 | run: pip install nox[uv]
70 | - name: test with pytest
71 | run: nox -s ci-test
72 | - name: assess dead fixtures
73 | if: ${{ matrix.python-version == '3.10' }}
74 | shell: bash
75 | run: nox -s dead-fixtures
76 | - uses: actions/upload-artifact@v4
77 | if: ${{ matrix.python-version == '3.10' }}
78 | with:
79 | name: coverage
80 | path: coverage.xml
81 |
82 | coverage:
83 | needs: [build]
84 | runs-on: ubuntu-latest
85 | steps:
86 | - uses: actions/checkout@v5
87 | - uses: actions/download-artifact@v4
88 | with:
89 | name: coverage
90 | - name: codecov
91 | uses: codecov/codecov-action@v4
92 | with:
93 | token: ${{ secrets.CODECOV_TOKEN }}
94 | verbose: true
95 | fail_ci_if_error: true
96 |
--------------------------------------------------------------------------------
/docs/_static/custom-icon.js:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Set a custom icon for pypi as it's not available in the fa built-in brands
3 | */
4 | FontAwesome.library.add(
5 | (faListOldStyle = {
6 | prefix: "fa-custom",
7 | iconName: "conda",
8 | icon: [
9 | 24, // viewBox width
10 | 24, // viewBox height
11 | [], // ligature
12 | "e001", // unicode codepoint - private use area
13 | "M12.045.033a12.181 12.182 0 00-1.361.078 17.512 17.513 0 011.813 1.433l.48.438-.465.45a15.047 15.048 0 00-1.126 1.205l-.178.215a8.527 8.527 0 01.86-.05 8.154 8.155 0 11-4.286 15.149 15.764 15.765 0 01-1.841.106h-.86a21.847 21.848 0 00.264 2.866 11.966 11.967 0 106.7-21.89zM8.17.678a12.181 12.182 0 00-2.624 1.275 15.506 15.507 0 011.813.43A18.551 18.552 0 018.17.678zM9.423.75a16.237 16.238 0 00-.995 1.998 16.15 16.152 0 011.605.66 6.98 6.98 0 01.43-.509c.234-.286.472-.559.716-.817A15.047 15.048 0 009.423.75zM4.68 2.949a14.969 14.97 0 000 2.336c.587-.065 1.196-.1 1.812-.107a16.617 16.617 0 01.48-1.748 16.48 16.481 0 00-2.292-.481zM3.62 3.5A11.938 11.938 0 001.762 5.88a17.004 17.004 0 011.877-.444A17.39 17.391 0 013.62 3.5zm4.406.287c-.143.437-.265.888-.38 1.347a8.255 8.255 0 011.67-.803c-.423-.2-.845-.38-1.29-.544zM6.3 6.216a14.051 14.052 0 00-1.555.108c.064.523.157 1.038.272 1.554a8.39 8.391 0 011.283-1.662zm-2.55.137a15.313 15.313 0 00-2.602.716h-.078v.079a17.104 17.105 0 001.267 2.544l.043.071.072-.049a16.309 16.31 0 011.734-1.083l.057-.035V8.54a16.867 16.868 0 01-.408-2.094v-.092zM.644 8.095l-.063.2A11.844 11.845 0 000 11.655v.209l.143-.152a17.706 17.707 0 011.584-1.447l.057-.043-.043-.064a16.18 16.18 0 01-1.025-1.87zm3.77 1.253l-.18.1c-.465.273-.93.573-1.375.889l-.065.05.05.064c.309.437.645.867.996 1.276l.137.165v-.208a8.176 8.176 0 01.364-2.15zM2.2 10.853l-.072.05a16.574 16.574 0 00-1.813 1.734l-.058.058.066.057a15.449 15.45 0 001.991 1.483l.072.05.043-.08a16.738 16.74 0 011.053-1.64v-.05l-.043-.05a16.99 16.99 0 01-1.19-1.54zm1.855 2.071l-.121.172a15.363 15.363 0 00-.917 1.433l-.043.072.071.043a16.61 16.61 0 001.562.766l.193.086-.086-.193a8.04 8.04 0 01-.66-2.172zm-3.976.48v.2a11.758 11.759 0 00.946 3.326l.078.186.072-.194a16.215 16.216 0 01.845-2l.057-.063-.064-.043a17.197 17.198 0 01-1.776-1.284zm2.543 1.805l-.035.08a15.764 15.765 0 00-.983 2.479v.08h.086a16.15 16.152 0 002.688.5l.072.007v-.086a17.562 17.563 0 01.164-2.056v-.065H4.55a16.266 16.266 0 01-1.849-.896zm2.544 1.169v.114a17.254 17.255 0 00-.151 1.828v.078h.931c.287 0 .624.014.946 0h.209l-.166-.129a8.011 8.011 0 01-1.64-1.834zm-3.29 2.1l.115.172a11.988 11.988 0 002.502 2.737l.157.129v-.201a22.578 22.58 0 01-.2-2.336v-.071h-.072a16.23 16.23 0 01-2.3-.387z", // svg path (https://simpleicons.org/icons/anaconda.svg)
14 | ],
15 | }),
16 | );
17 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["hatchling"]
3 | build-backend = "hatchling.build"
4 |
5 | [project]
6 | name = "slangweb"
7 | version = "0.0.0"
8 | description = "Simple Language Support for Web Development"
9 | keywords = [
10 | "web",
11 | "language",
12 | "Python"
13 | ]
14 | classifiers = [
15 | "Development Status :: 3 - Alpha",
16 | "Intended Audience :: Developers",
17 | "License :: OSI Approved :: MIT License",
18 | "Programming Language :: Python :: 3.10",
19 | "Programming Language :: Python :: 3.11",
20 | ]
21 | requires-python = ">=3.10"
22 | dependencies = [
23 | "deprecated>=1.2.14",
24 | "transformers",
25 | "torch",
26 | "sentencepiece",
27 | "sacremoses",
28 | "protobuf"
29 | ]
30 |
31 | [[project.authors]]
32 | name = "Rodrigo Esteban Principe"
33 | email = "fitoprincipe82@gmail.com"
34 |
35 | [project.license]
36 | text = "MIT"
37 |
38 | [project.readme]
39 | file = "README.md"
40 | content-type = "text/markdown"
41 |
42 | [project.scripts]
43 | slangweb = "slangweb.cli:main"
44 |
45 | [project.urls]
46 | Homepage = "https://github.com/fitoprincipe/slangweb"
47 |
48 | [project.optional-dependencies]
49 | test = [
50 | "pytest",
51 | "pytest-cov",
52 | "pytest-deadfixtures"
53 | ]
54 | doc = [
55 | "sphinx>=6.2.1",
56 | "pydata-sphinx-theme",
57 | "sphinx-copybutton",
58 | "sphinx-design",
59 | "sphinx-autoapi"
60 | ]
61 | flask = [
62 | "Flask>=2.0"
63 | ]
64 | dash = [
65 | "dash>=2.0"
66 | ]
67 |
68 | [tool.hatch.build.targets.wheel]
69 | only-include = ["slangweb"]
70 |
71 | [tool.hatch.envs.default]
72 | dependencies = [
73 | "pre-commit",
74 | "commitizen",
75 | "nox[uv]"
76 | ]
77 | post-install-commands = ["pre-commit install"]
78 |
79 | [tool.commitizen]
80 | tag_format = "v$major.$minor.$patch$prerelease"
81 | update_changelog_on_bump = false
82 | version = "0.0.0"
83 | version_files = [
84 | "pyproject.toml:version",
85 | "slangweb/__init__.py:__version__",
86 | "docs/conf.py:release",
87 | "CITATION.cff:version"
88 | ]
89 |
90 | [tool.pytest.ini_options]
91 | testpaths = "tests"
92 |
93 | [tool.ruff]
94 | line-length = 100
95 | ignore-init-module-imports = true
96 | fix = true
97 |
98 | [tool.ruff.lint]
99 | select = ["E", "F", "W", "I", "D", "RUF"]
100 | ignore = [
101 | "E501", # line too long | Black take care of it
102 | "D212", # Multi-line docstring | We use D213
103 | "D101", # Missing docstring in public class | We use D106
104 | ]
105 |
106 | [tool.ruff.lint.flake8-quotes]
107 | docstring-quotes = "double"
108 |
109 | [tool.ruff.lint.pydocstyle]
110 | convention = "google"
111 |
112 | [tool.coverage.run]
113 | source = ["slangweb"]
114 |
115 | [tool.mypy]
116 | scripts_are_modules = true
117 | ignore_missing_imports = true
118 | install_types = true
119 | non_interactive = true
120 | warn_redundant_casts = true
121 |
--------------------------------------------------------------------------------
/noxfile.py:
--------------------------------------------------------------------------------
1 | """All the process that can be run using nox.
2 |
3 | The nox run are build in isolated environment that will be stored in .nox. to force the venv update, remove the .nox/xxx folder.
4 | """
5 |
6 | import datetime
7 | import fileinput
8 |
9 | import nox
10 |
11 | nox.options.sessions = ["lint", "test", "docs", "mypy"]
12 |
13 |
14 | @nox.session(reuse_venv=True, venv_backend="uv")
15 | def lint(session: nox.Session):
16 | """Apply the pre-commits."""
17 | session.install("pre-commit")
18 | session.run("pre-commit", "run", "--all-files", *session.posargs)
19 |
20 |
21 | @nox.session(reuse_venv=True, venv_backend="uv")
22 | def test(session: nox.Session):
23 | """Run the selected tests and report coverage in html."""
24 | session.install("-e", ".[test]")
25 | test_files = session.posargs or ["tests"]
26 | session.run("pytest", "--cov", "--cov-report=html", *test_files)
27 |
28 |
29 | @nox.session(reuse_venv=True, name="ci-test", venv_backend="uv")
30 | def ci_test(session: nox.Session):
31 | """Run all the test and report coverage in xml."""
32 | session.install("-e", ".[test]")
33 | session.run("pytest", "--cov", "--cov-report=xml")
34 |
35 |
36 | @nox.session(reuse_venv=True, name="dead-fixtures", venv_backend="uv")
37 | def dead_fixtures(session: nox.Session):
38 | """Check for dead fixtures within the tests."""
39 | session.install("-e", ".[test]")
40 | session.run("pytest", "--dead-fixtures")
41 |
42 |
43 | @nox.session(reuse_venv=True, venv_backend="uv")
44 | def docs(session: nox.Session):
45 | """Build the documentation."""
46 | build = session.posargs.pop() if session.posargs else "html"
47 | session.install("-e", ".[doc]")
48 | dst, warn = f"docs/_build/{build}", "warnings.txt"
49 | session.run("sphinx-build", "-v", "-b", build, "docs", dst, "-w", warn)
50 | session.run("python", "tests/check_warnings.py")
51 |
52 |
53 | @nox.session(name="mypy", reuse_venv=True, venv_backend="uv")
54 | def mypy(session: nox.Session):
55 | """Run a mypy check of the lib."""
56 | # waiting for a fix to https://github.com/laurent-laporte-pro/deprecated/issues/63
57 | # so we are forced to install "types-deprecated"
58 | session.install("mypy", "types-deprecated")
59 | test_files = session.posargs or ["slangweb"]
60 | session.run("mypy", *test_files)
61 |
62 |
63 | @nox.session(reuse_venv=True, venv_backend="uv")
64 | def stubgen(session: nox.Session):
65 | """Generate stub files for the lib but requires human attention before merge."""
66 | session.install("mypy")
67 | package = session.posargs or ["slangweb"]
68 | session.run("stubgen", "-p", package[0], "-o", "stubs", "--include-private")
69 |
70 |
71 | @nox.session(name="release-date", reuse_venv=True, venv_backend="uv")
72 | def release_date(session: nox.session):
73 | """Update the release date of the citation file."""
74 | current_date = datetime.datetime.now().strftime("%Y-%m-%d")
75 |
76 | with fileinput.FileInput("CITATION.cff", inplace=True) as file:
77 | for line in file:
78 | if line.startswith("date-released:"):
79 | print(f'date-released: "{current_date}"')
80 | else:
81 | print(line, end="")
82 |
--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
1 | Contribute
2 | ==========
3 |
4 | Thank you for your help improving **Slang Web**!
5 |
6 | **Slang Web** uses `nox `__ to automate several development-related tasks.
7 | Currently, the project uses four automation processes (called sessions) in ``noxfile.py``:
8 |
9 | - ``mypy``: to perform a mypy check on the lib;
10 | - ``test``: to run the test with pytest;
11 | - ``docs``: to build the documentation in the ``build`` folder;
12 | - ``lint``: to run the pre-commits in an isolated environment
13 |
14 | Every nox session is run in its own virtual environment, and the dependencies are installed automatically.
15 |
16 | To run a specific nox automation process, use the following command:
17 |
18 | .. code-block:: console
19 |
20 | nox -s
21 |
22 | For example: ``nox -s test`` or ``nox -s docs``.
23 |
24 | Workflow for contributing changes
25 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
26 |
27 | We follow a typical GitHub workflow of:
28 |
29 | - Create a personal fork of this repo
30 | - Create a branch
31 | - Open a pull request
32 | - Fix findings of various linters and checks
33 | - Work through code review
34 |
35 | See the following sections for more details.
36 |
37 | Clone the repository
38 | ^^^^^^^^^^^^^^^^^^^^
39 |
40 | First off, you'll need your own copy of **Slang Web** codebase. You can clone it for local development like so:
41 |
42 | Fork the repository so you have your own copy on GitHub. See the `GitHub forking guide for more information `__.
43 |
44 | Then, clone the repository locally so that you have a local copy to work on:
45 |
46 | .. code-block:: console
47 |
48 | git clone https://github.com//slangweb
49 | cd slangweb
50 |
51 | Then install the development version of the extension:
52 |
53 | .. code-block:: console
54 |
55 | pip install -e .[dev]
56 |
57 | This will install the **Slang Web** library, together with two additional tools:
58 | - `pre-commit `__ for automatically enforcing code standards and quality checks before commits.
59 | - `nox `__, for automating common development tasks.
60 |
61 | Lastly, activate the pre-commit hooks by running:
62 |
63 | .. code-block:: console
64 |
65 | pre-commit install
66 |
67 | This will install the necessary dependencies to run pre-commit every time you make a commit with Git.
68 |
69 | Contribute to the codebase
70 | ^^^^^^^^^^^^^^^^^^^^^^^^^^
71 |
72 | Any larger updates to the codebase should include tests and documentation. The tests are located in the ``tests`` folder, and the documentation is located in the ``docs`` folder.
73 |
74 | To run the tests locally, use the following command:
75 |
76 | .. code-block:: console
77 |
78 | nox -s test
79 |
80 | See :ref:`below ` for more information on how to update the documentation.
81 |
82 | .. _contributing-docs:
83 |
84 | Contribute to the docs
85 | ^^^^^^^^^^^^^^^^^^^^^^
86 |
87 | The documentation is built using `Sphinx `__ and deployed to `Read the Docs `__.
88 |
89 | To build the documentation locally, use the following command:
90 |
91 | .. code-block:: console
92 |
93 | nox -s docs
94 |
95 | For each pull request, the documentation is built and deployed to make it easier to review the changes in the PR. To access the docs build from a PR, click on the "Read the Docs" preview in the CI/CD jobs.
96 |
97 | Release new version
98 | ^^^^^^^^^^^^^^^^^^^
99 |
100 | To release a new version, start by pushing a new bump from the local directory:
101 |
102 | .. code-block::
103 |
104 | cz bump
105 |
106 | The commitizen-tool will detect the semantic version name based on the existing commits messages.
107 |
108 | Then push to Github. In Github design a new release using the same tag name nad the ``release.yaml`` job will send it to pipy.
109 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.rst:
--------------------------------------------------------------------------------
1 | Contributor Covenant Code of Conduct
2 | ====================================
3 |
4 | Our Pledge
5 | ----------
6 |
7 | We as members, contributors, and leaders pledge to make participation in our
8 | community a harassment-free experience for everyone, regardless of age, body
9 | size, visible or invisible disability, ethnicity, sex characteristics, gender
10 | identity and expression, level of experience, education, socio-economic status,
11 | nationality, personal appearance, race, religion, or sexual identity
12 | and orientation.
13 |
14 | We pledge to act and interact in ways that contribute to an open, welcoming,
15 | diverse, inclusive, and healthy community.
16 |
17 | Our Standards
18 | -------------
19 |
20 | Examples of behavior that contributes to a positive environment for our
21 | community include:
22 |
23 | * Demonstrating empathy and kindness toward other people
24 | * Being respectful of differing opinions, viewpoints, and experiences
25 | * Giving and gracefully accepting constructive feedback
26 | * Accepting responsibility and apologizing to those affected by our mistakes,
27 | and learning from the experience
28 | * Focusing on what is best not just for us as individuals, but for the
29 | overall community
30 |
31 | Examples of unacceptable behavior include:
32 |
33 | * The use of sexualized language or imagery, and sexual attention or
34 | advances of any kind
35 | * Trolling, insulting or derogatory comments, and personal or political attacks
36 | * Public or private harassment
37 | * Publishing others' private information, such as a physical or email
38 | address, without their explicit permission
39 | * Other conduct which could reasonably be considered inappropriate in a
40 | professional setting
41 |
42 | Enforcement Responsibilities
43 | ----------------------------
44 |
45 | Community leaders are responsible for clarifying and enforcing our standards of
46 | acceptable behavior and will take appropriate and fair corrective action in
47 | response to any behavior that they deem inappropriate, threatening, offensive,
48 | or harmful.
49 |
50 | Community leaders have the right and responsibility to remove, edit, or reject
51 | comments, commits, code, wiki edits, issues, and other contributions that are
52 | not aligned to this Code of Conduct, and will communicate reasons for moderation
53 | decisions when appropriate.
54 |
55 | Scope
56 | -----
57 |
58 | This Code of Conduct applies within all community spaces, and also applies when
59 | an individual is officially representing the community in public spaces.
60 | Examples of representing our community include using an official e-mail address,
61 | posting via an official social media account, or acting as an appointed
62 | representative at an online or offline event.
63 |
64 | Enforcement
65 | -----------
66 |
67 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
68 | reported to the FAO team responsible for enforcement at
69 | pierrick.rambaud49@gmail.com.
70 | All complaints will be reviewed and investigated promptly and fairly.
71 |
72 | All community leaders are obligated to respect the privacy and security of the
73 | reporter of any incident.
74 |
75 | Enforcement Guidelines
76 | ----------------------
77 |
78 | Community leaders will follow these Community Impact Guidelines in determining
79 | the consequences for any action they deem in violation of this Code of Conduct:
80 |
81 | Correction
82 | ^^^^^^^^^^
83 |
84 | **Community Impact**: Use of inappropriate language or other behavior deemed
85 | unprofessional or unwelcome in the community.
86 |
87 | **Consequence**: A private, written warning from community leaders, providing
88 | clarity around the nature of the violation and an explanation of why the
89 | behavior was inappropriate. A public apology may be requested.
90 |
91 | Warning
92 | ^^^^^^^
93 |
94 | **Community Impact**: A violation through a single incident or series
95 | of actions.
96 |
97 | **Consequence**: A warning with consequences for continued behavior. No
98 | interaction with the people involved, including unsolicited interaction with
99 | those enforcing the Code of Conduct, for a specified period of time. This
100 | includes avoiding interactions in community spaces as well as external channels
101 | like social media. Violating these terms may lead to a temporary or
102 | permanent ban.
103 |
104 | Temporary Ban
105 | ^^^^^^^^^^^^^
106 |
107 | **Community Impact**: A serious violation of community standards, including
108 | sustained inappropriate behavior.
109 |
110 | **Consequence**: A temporary ban from any sort of interaction or public
111 | communication with the community for a specified period of time. No public or
112 | private interaction with the people involved, including unsolicited interaction
113 | with those enforcing the Code of Conduct, is allowed during this period.
114 | Violating these terms may lead to a permanent ban.
115 |
116 | Permanent Ban
117 | ^^^^^^^^^^^^^
118 |
119 | **Community Impact**: Demonstrating a pattern of violation of community
120 | standards, including sustained inappropriate behavior, harassment of an
121 | individual, or aggression toward or disparagement of classes of individuals.
122 |
123 | **Consequence**: A permanent ban from any sort of public interaction within
124 | the community.
125 |
126 | Attribution
127 | -----------
128 |
129 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
130 | version 2.0, available at
131 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
132 |
133 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
134 | enforcement ladder](https://github.com/mozilla/diversity).
135 |
136 | [homepage]: https://www.contributor-covenant.org
137 |
138 | For answers to common questions about this code of conduct, see the FAQ at
139 | https://www.contributor-covenant.org/faq. Translations are available at
140 | https://www.contributor-covenant.org/translations.
141 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Simple LANGuage support for the Web (using AI)
2 |
3 | [](LICENSE)
4 | [](https://conventionalcommits.org)
5 | [](https://github.com/astral-sh/ruff)
6 | [](https://github.com/prettier/prettier)
7 | [](https://pre-commit.com/)
8 | [](https://pypi.org/project/slangweb/)
9 | [](https://github.com/fitoprincipe/slangweb/actions/workflows/unit.yaml)
10 | [](https://codecov.io/gh/fitoprincipe/slangweb)
11 | [](https://slangweb.readthedocs.io/en/latest/)
12 |
13 | ## Overview
14 |
15 | Use AI models from Hugging Face to translate your website.
16 |
17 | The system works with two different approaches:
18 |
19 | - [**Dynamic**](#2-dynamic): Translation on-the-fly. It's easy to integrate with any framework. Can be slow if the text is too long.
20 | - [**Static**](#1-static): Use a translation lookup file based on sentences. To use a key based approach would require an extra layer of complexity (maybe in the future). The lookup file must be created before deployment. This approach is harder (sometimes impossible) to integrate with any framework, for example, Flask + jinja2 templates. It's fast.
21 |
22 | At the moment, only ROMANCE languages are included by using the model [Helsinki-NLP/opus-mt-en-ROMANCE](https://huggingface.co/Helsinki-NLP/opus-mt-en-ROMANCE). This model can translate to the following languages:
23 |
24 | | Language | Code | Language | Code | Language | Code |
25 | | ---------------------------- | ----- | --------------------- | ----- | ---------- | ---- | -------- | --- |
26 | | Spanish | es | Spanish (Uruguay) | es_uy | Neapolitan | nap |
27 | | Spanish (Argentina) | es_ar | Spanish (Venezuela) | es_ve | Sicilian | scn |
28 | | Spanish (Chile) | es_cl | Portuguese | pt | Venetian | vec |
29 | | Spanish (Colombia) | es_co | Portuguese (Brazil) | pt_br | Aragonese | an |
30 | | Spanish (Costa Rica) | es_cr | Portuguese (Portugal) | pt_pt | Arpitan | frp |
31 | | Spanish (Dominican Republic) | es_do | French | fr | Corsican | co | Friulian | fur |
32 | | Spanish (Ecuador) | es_ec | French (Belgium) | fr_be | Ladin | lld |
33 | | Spanish (El Salvador) | es_sv | French (Switzerland) | fr_ch | Ladino | lad |
34 | | Spanish (Guatemala) | es_gt | French (Canada) | fr_ca | Latin | la |
35 | | Spanish (Honduras) | es_hn | French (France) | fr_fr | Ligurian | lij |
36 | | Spanish (Mexico) | es_mx | Italian | it | Mirandese | mwl |
37 | | Spanish (Nicaragua) | es_ni | Italian (Italy) | it_it | Occitan | oc |
38 | | Spanish (Panama) | es_pa | Catalan | ca | Romansh | rm |
39 | | Spanish (Peru) | es_pe | Galician | gl | Sardinian | sc |
40 | | Spanish (Puerto Rico) | es_pr | Romanian | ro | Walloon | wa |
41 | | Spanish (Spain) | es_es | Lombard | lmo |
42 |
43 | This package creates a folder inside your repo to store a configuration file and other files for the models.
44 |
45 | ## Installation
46 |
47 | Simply install via pip:
48 |
49 | `pip install slangweb`
50 |
51 | ## Initialization
52 |
53 | Let's suppose you have the following folder structure:
54 |
55 | ```
56 | my_site/
57 | ├── app.py # main application entry
58 | ├── src/ # source package / modules
59 | │ ├── index.py # main site logic / translator usage example
60 | └── pages/ # HTML/templates/pages for the site
61 | └── a_page.html # example module representing a page
62 | ```
63 |
64 | Open a terminal, activate the environment in which you installed the package, and run:
65 |
66 | ```bash
67 | (.venv) C:\my_site>slangweb init
68 | ```
69 |
70 | This will create the [configuration file](#configuration-file) and the [models lookup file](#models-lookup).
71 |
72 | ## Configuration file
73 |
74 | The configuration file (json) has the following structure:
75 |
76 | ```json
77 | {
78 | "base_folder": "slangweb",
79 | "models_lookup_file": "models_lookup.json",
80 | "models_folder": "models",
81 | "lookups_folder": "lookups",
82 | "default_language": "en",
83 | "encoding": "utf-8",
84 | "source_folders": ["."],
85 | "supported_languages": ["es"],
86 | "translator_class": "SW"
87 | }
88 | ```
89 |
90 | - `base_folder`: is the main folder where all files will be stored (including the config file).
91 | - `models_lookup_file`: name of the models lookup file. This file will and must be placed inside `base_folder`.
92 | - `models_folder`: folder where the models will and must be stored. Also, must be inside `base_folder`.
93 | - `lookups_folder`: folder where the [translations lookup](#1-static) files will be stored.
94 | - `default_language`: The base language of the site. At the moment only **en**glish is supported.
95 | - `encoding`: Encoding for the lookup files. At the moment only `utf-8` is supported.
96 | - `source_folders`: Folders that contain the source python file where the slangweb translator class is implemented. Developers can modify this at will.
97 | - `supported_languages`: Languages that the site will support. There will be one [translation lookup](#translation-lookups) file for each language.
98 | - `translator_class`: The class that will be used for static translations across the site. See the [Usage](#usage) section.
99 |
100 | ## Models lookup
101 |
102 | The `models_lookup.json` has the following structure:
103 |
104 | ```json
105 | {
106 | "es": {
107 | "model": "Helsinki-NLP/opus-mt-en-ROMANCE",
108 | "name": "Spanish"
109 | },
110 | ...
111 | }
112 | ```
113 |
114 | This file created automatically. Other languages and models can be added if needed.
115 |
116 | ## Usage
117 |
118 | Once all the configuration was created and modified (if needed), you need to download the models using the CLI application:
119 |
120 | ```bash
121 | (.venv) C:\my_site>slangweb download-models
122 | ```
123 |
124 | This will download all the models needed for the languages included in the section `supported_languages` in the [configuration file](#configuration-file).
125 |
126 | Finally, you can start implementing it in your python files. There are two main ways of using this package: [statically](#1-static) and [dynamically](#2-dynamic)
127 |
128 | ### 1. Static
129 |
130 | For each language listed in the section `supported_languages` in the [configuration file](#configuration-file) a `translation lookup` file will be created inside the `lookups_folder`. The `translation lookup` file is a json containing all relations between the sentences in the original language and the translated version. For example (spanish):
131 |
132 | `es.json`
133 |
134 | ```json
135 | {
136 | "Hello World": "Hola Mundo",
137 | ...
138 | }
139 | ```
140 |
141 | The purpose of this approach it to avoid translating on-the-fly to gain loading speed.
142 |
143 | To use the static translation system you can call the instance, which is the same as calling the method `.get_translation`:
144 |
145 | ```python
146 | from slangweb import Translator
147 | SW = Translator()
148 | translation = SW("Translate this")
149 | same_translation = SW.get_translation("Translate this")
150 | ```
151 |
152 | Example using Dash:
153 |
154 | ```python
155 | from slangweb import Translator
156 |
157 | # Init Translator
158 | # the variable name must match the "translator_class" in the config file
159 | SW = Translator()
160 |
161 | def layout(lang: str = 'en'):
162 | SW.set_language(lang)
163 | return html.Div([
164 | html.H2(SW('This is Test for the static translation system.')),
165 | html.H2(SW("Thanks for using SlangWeb!"))
166 | ])
167 | ```
168 |
169 | There are 2 ways to create the `translation lookup` files:
170 |
171 | 1. by running the website in `localhost` and accessing the pages.
172 | 2. by running the CLI:
173 |
174 | ```bash
175 | (.venv) C:\my_site>slangweb sync
176 | ```
177 |
178 | This will create the following file `C:\my_site\slangweb\lookups\es.json`
179 |
180 | ```json
181 | {
182 | "This is a Test for the static translation system.": "Esta es una prueba para el sistema de traducción estática.",
183 | "Thanks for using SlangWeb!": "¡Gracias por usar SlangWeb!"
184 | }
185 | ```
186 |
187 | ### 2. Dynamic
188 |
189 | In this case, the `translation lookup` file will not be created, and the translation will happen on-the-fly.
190 |
191 | In your code (using Dash):
192 |
193 | ```python
194 | from slangweb import Translator
195 |
196 | # Init Translator
197 | SW = Translator()
198 | t = SW.translate
199 |
200 | def layout(lang: str = 'en'):
201 | SW.set_language(lang)
202 | return html.Div([
203 | html.H2(t('This is Test for the static translation system.')),
204 | html.H2(t("Thanks for using SlangWeb!"))
205 | ])
206 | ```
207 |
208 | ## Recommendations & caveats
209 |
210 | - Model downloads can be large; ensure enough disk space.
211 | - For production, prefer Static lookups where possible for performance.
212 | - Dynamic translation may add latency; consider caching translations.
213 | - If using private Hugging Face models, set the HF_TOKEN environment variable before running CLI/tools:
214 |
215 | ```powershell
216 | setx HF_TOKEN "your_token_here"
217 | ```
218 |
219 | ## Credits
220 |
221 | This package was created with [Copier](https://copier.readthedocs.io/en/latest/) and the [@12rambau/pypackage](https://github.com/12rambau/pypackage) 0.1.18 project template.
222 |
--------------------------------------------------------------------------------
/slangweb/translator.py:
--------------------------------------------------------------------------------
1 | """Translations in Dash applications.
2 |
3 | Only translates from English to other languages for now.
4 | """
5 |
6 | import json
7 | import os
8 | from logging import getLogger
9 | from pathlib import Path
10 |
11 | from transformers import MarianMTModel, MarianTokenizer
12 |
13 | from .constants import (
14 | DEFAULT_LANGUAGE,
15 | ENCODING,
16 | LOOKUPS_FOLDER,
17 | MODELS_FOLDER,
18 | MODELS_LOOKUP_FILE,
19 | SLANG_FOLDER,
20 | )
21 |
22 | logger = getLogger(__name__)
23 |
24 |
25 | def lang_from_path(path: str) -> str | None:
26 | """Extract the language code from the given URL path."""
27 | parts = (path or "").strip("/").split("/")
28 | return parts[0] if parts else DEFAULT_LANGUAGE
29 |
30 |
31 | class Translator:
32 | """A simple translator class to manage translations."""
33 |
34 | def __init__(
35 | self,
36 | base_folder: str = SLANG_FOLDER,
37 | models_folder: str = MODELS_FOLDER,
38 | lookup_folder: str = LOOKUPS_FOLDER,
39 | models_lookup_file: str = MODELS_LOOKUP_FILE,
40 | ):
41 | """Initialize the Translator.
42 |
43 | There are 2 kind of lookup files:
44 | 1. Models lookup file: maps language codes to model names.
45 | 2. Translation lookup files: per-language files that map source texts to translated texts.
46 |
47 | If the model lookup file does not exist, it must be created using the cli tool.
48 |
49 | >slangweb generate-models-lookup-file
50 |
51 | Args:
52 | base_folder (Path): Base directory for slangweb data.
53 | models_folder (Path): Directory to store/load translation models.
54 | lookup_folder (Path): Directory to store/load translation lookups.
55 | models_lookup_file (Path): Path to the models configuration file.
56 | """
57 | here = Path(os.getcwd())
58 | self.language: str | None = None
59 | self.base_folder = here / base_folder
60 | self.models_folder = here / base_folder / models_folder
61 | self.lookup_folder = here / base_folder / lookup_folder
62 | self.models_lookup_file = here / base_folder / models_lookup_file
63 | self._models_lookup: dict | None = None
64 | self._translation_lookup_file: Path | None = None
65 | self._model = None
66 | self._tokenizer = None
67 |
68 | def set_language(self, language: str | None) -> None:
69 | """Set the current language for translation."""
70 | language = language.lower() if language else None
71 | if self.language != language:
72 | self.language = language
73 | # reset model and tokenizer
74 | self._model = None
75 | self._tokenizer = None
76 |
77 | @property
78 | def models_lookup(self) -> dict:
79 | """Load the models configuration from the models file."""
80 | if not self.models_lookup_file.exists():
81 | logger.error(f"Models lookup file not found: {self.models_lookup_file}")
82 | return {}
83 | if self._models_lookup is None:
84 | with open(self.models_lookup_file, "r", encoding="utf-8") as f:
85 | models = json.load(f)
86 | self._models_lookup = models
87 | return self._models_lookup | {}
88 |
89 | @property
90 | def model_name(self) -> str | None:
91 | """Get the model name for the current language."""
92 | model_name = self.models_lookup.get(self.language, {}).get("model")
93 | if not model_name:
94 | logger.warning(f"Language '{self.language}' not found in models lookup.")
95 | return model_name
96 |
97 | def is_language_in_lookup(self) -> bool:
98 | """Check if the current language is in the lookup file."""
99 | if self.language is None or self.language == DEFAULT_LANGUAGE:
100 | return False
101 | is_in_lookup = self.language in self.models_lookup
102 | if not is_in_lookup:
103 | logger.error(f"Language '{self.language}' not found in models lookup.")
104 | return is_in_lookup
105 |
106 | @property
107 | def model_filename(self) -> Path | None:
108 | """Get the model directory for the current language."""
109 | if self.model_name is None:
110 | return None
111 | model_fn = self.models_folder / f"models--{self.model_name.replace('/', '--')}"
112 | return model_fn
113 |
114 | def is_model_available(self) -> bool:
115 | """Check if the model for the current language is available."""
116 | model_fn = self.model_filename
117 | if model_fn is None:
118 | return False
119 | return model_fn.is_dir()
120 |
121 | @property
122 | def translation_lookup_file(self) -> Path:
123 | """Get the translation lookup file for the current language."""
124 | fn = self.lookup_folder / f"{self.language}.json"
125 | if not fn.exists():
126 | logger.info(f"Creating new lookup file: {fn}")
127 | fn.parent.mkdir(parents=True, exist_ok=True)
128 | with open(fn, "w", encoding=ENCODING) as f:
129 | json.dump({}, f, indent=4, ensure_ascii=False)
130 | return fn
131 |
132 | @property
133 | def translation_lookup(self) -> dict:
134 | """Get the translation lookup for the current language."""
135 | with open(self.translation_lookup_file, "r", encoding=ENCODING) as f:
136 | lookup = json.load(f)
137 | return lookup
138 |
139 | def get_tokenizer(self) -> MarianTokenizer | None:
140 | """Get the tokenizer for the current language."""
141 | if self._tokenizer is not None:
142 | return self._tokenizer
143 |
144 | if self.is_model_available() and self.is_language_in_lookup():
145 | self._tokenizer = MarianTokenizer.from_pretrained(
146 | self.model_name, cache_dir=self.models_folder, local_files_only=True
147 | )
148 | return self._tokenizer
149 | else:
150 | return None
151 |
152 | def get_model(self) -> MarianMTModel | None:
153 | """Get the translation model for the current language."""
154 | if self._model is not None:
155 | return self._model
156 |
157 | if self.is_model_available() and self.is_language_in_lookup():
158 | import torch
159 |
160 | # Disable low_cpu_mem_usage to avoid meta device
161 | self._model = MarianMTModel.from_pretrained(
162 | self.model_name,
163 | cache_dir=self.models_folder,
164 | local_files_only=True,
165 | dtype=torch.float32,
166 | low_cpu_mem_usage=False,
167 | )
168 | return self._model
169 | else:
170 | return None
171 |
172 | def can_be_translated(self) -> bool:
173 | """Check if the current language can be translated."""
174 | # exit: no language set
175 | if self.language is None:
176 | logger.warning("No language set. Make sure to set it using 'set_language' method.")
177 | return False
178 |
179 | # exit: default language
180 | if self.language == DEFAULT_LANGUAGE:
181 | logger.info(f"Default language set ({self.language}), no translation needed.")
182 | return False
183 |
184 | # exit: model lookup file missing
185 | if not self.models_lookup_file.exists():
186 | logger.error(
187 | f"Models lookup file not found: {self.models_lookup_file}. Create using the CLI application."
188 | )
189 | return False
190 |
191 | # exit: model not available
192 | if not self.is_model_available():
193 | logger.error(
194 | f"Model for language '{self.language}' not available. Download it using the CLI application."
195 | )
196 | return False
197 |
198 | return True
199 |
200 | def translate(self, text: str) -> str:
201 | """Translate the given text to the current language, directly using the model.
202 |
203 | Since this is the main function, check related to translation using the model will be performed here.
204 |
205 | Args:
206 | text (str): The text to translate.
207 | """
208 | if not self.can_be_translated():
209 | return text
210 |
211 | try:
212 | # translate using model
213 | tokenizer = self.get_tokenizer()
214 | model = self.get_model()
215 | if tokenizer is None or model is None:
216 | logger.error("Tokenizer or model not available for translation.")
217 | return text
218 | if self.model_name == "Helsinki-NLP/opus-mt-en-ROMANCE":
219 | # for romance languages, lowercase the text to improve results
220 | tgt_lang = f">>{self.language}<<"
221 | inputs = tokenizer(f"{tgt_lang} {text}", return_tensors="pt", padding=True)
222 | else:
223 | inputs = tokenizer(text, return_tensors="pt", padding=True)
224 | translated = model.generate(**inputs)
225 | tgt_text = [tokenizer.decode(t, skip_special_tokens=True) for t in translated]
226 | translation = tgt_text[0] if tgt_text else ""
227 | return translation
228 | except Exception as e:
229 | logger.error(f"Error during translation: {e}")
230 | return text
231 |
232 | def get_translation_from_lookup(self, text: str) -> str | None:
233 | """Get translation from the lookup file.
234 |
235 | Since this is a main function, check related to the lookup file will be performed here.
236 |
237 | Args:
238 | text (str): The text to translate.
239 | """
240 | if not self.can_be_translated():
241 | return text
242 | return self.translation_lookup.get(text)
243 |
244 | def save_translation(self, text: str, translated_text: str) -> None:
245 | """Save the translated text to the lookup file."""
246 | with open(self.translation_lookup_file, "r", encoding=ENCODING) as f:
247 | lookup = json.load(f)
248 | lookup[text] = translated_text
249 | with open(self.translation_lookup_file, "w", encoding=ENCODING) as f:
250 | json.dump(lookup, f, indent=4, ensure_ascii=False)
251 |
252 | def get_translation(self, text: str) -> str:
253 | """Get translation from lookup or translate and save it to lookup.
254 |
255 | Args:
256 | text (str): The text to translate.
257 | """
258 | translation = self.get_translation_from_lookup(text)
259 | if translation == text:
260 | return text
261 | if translation is None: # not found in lookup
262 | translation = self.translate(text)
263 | # update lookup file
264 | self.save_translation(text, translation)
265 | return translation
266 |
267 | def __call__(self, text: str) -> str:
268 | """Translate the given text using the translator instance."""
269 | logger.debug(f"Translating text: {text}")
270 | return self.get_translation(text)
271 |
--------------------------------------------------------------------------------
/slangweb/cli.py:
--------------------------------------------------------------------------------
1 | """CLI entry point for slangweb package."""
2 |
3 | import json
4 | import os
5 | import shutil
6 | import sys
7 | from pathlib import Path
8 |
9 | import click
10 | from transformers import MarianMTModel, MarianTokenizer
11 |
12 | from .constants import ENCODING, LOOKUPS_FOLDER, MODELS_FOLDER, MODELS_LOOKUP_FILE, SLANG_FOLDER
13 | from .tools import available_languages, find_translator_usages
14 | from .translator import Translator
15 |
16 |
17 | @click.group()
18 | def cli():
19 | """Translation Dev Tools CLI."""
20 | pass
21 |
22 |
23 | def _create_config_file(folder: str = SLANG_FOLDER, overwrite: bool = False):
24 | """Create the config file in the specified folder.
25 |
26 | Args:
27 | folder (str): Folder where to create the config file.
28 | overwrite (bool): Whether to overwrite existing config file.
29 | """
30 | here = Path(os.getcwd())
31 | folder_path = here / folder
32 | folder_path.mkdir(parents=True, exist_ok=True)
33 | source_folders = ["."]
34 | # exclude hidden folders, __pycache__, docs, tests, etc.
35 | exclude_folders = {folder, "__pycache__", "docs", "tests"}
36 | for item in os.listdir(here):
37 | item_path = here / item
38 | if item_path.is_dir() and item not in exclude_folders and not item.startswith("."):
39 | source_folders.append(item)
40 | config = {
41 | "base_folder": folder,
42 | "models_lookup_file": MODELS_LOOKUP_FILE,
43 | "models_folder": MODELS_FOLDER,
44 | "lookups_folder": LOOKUPS_FOLDER,
45 | "default_language": "en",
46 | "encoding": ENCODING,
47 | "source_folders": source_folders,
48 | "supported_languages": ["es"],
49 | "translator_class": "SW",
50 | }
51 | config_file = folder_path / "config.json"
52 | if config_file.exists() and not overwrite:
53 | click.echo(
54 | f"Configuration file already exists at '{config_file}'. Use overwrite=True to overwrite."
55 | )
56 | return
57 | with open(config_file, "w", encoding=ENCODING) as f:
58 | json.dump(config, f, indent=4, ensure_ascii=False)
59 | click.echo(f"Configuration file created at '{config_file}'")
60 |
61 |
62 | @cli.command()
63 | @click.argument("folder", default=SLANG_FOLDER, type=str)
64 | @click.option("--overwrite", is_flag=True, help="Overwrite existing config file if it exists.")
65 | def create_config(folder, overwrite):
66 | """Create the config file in the specified folder.
67 |
68 | The configuration file contains the following structure:
69 |
70 | {
71 | "base_folder": "slangweb",
72 | "models_lookup_file": "models_lookup.json",
73 | "models_folder": "models",
74 | "lookups_folder": "lookups",
75 | "default_language": "en",
76 | "encoding": "utf-8",
77 | "source_folders": ["."], # you can modify
78 | "supported_languages": ["es"], # you can modify
79 | "translator_class": "SW"
80 | }
81 | """
82 | _create_config_file(folder, overwrite)
83 |
84 |
85 | def _read_config(folder: str = SLANG_FOLDER) -> dict:
86 | """Read the config file from the specified folder."""
87 | here = Path(os.getcwd())
88 | config_file = here / folder / "config.json"
89 | if not config_file.exists():
90 | click.echo(
91 | f"Config file '{config_file}' does not exist. Create it first by running 'slangweb create-config'.",
92 | err=True,
93 | )
94 | sys.exit(1)
95 | with open(config_file, "r", encoding="utf-8") as f:
96 | config = json.load(f)
97 | return {
98 | "base_folder": here / config.get("base_folder", SLANG_FOLDER),
99 | "models_lookup_file": here / folder / config.get("models_lookup_file", MODELS_LOOKUP_FILE),
100 | "models_folder": here / folder / config.get("models_folder", MODELS_FOLDER),
101 | "lookups_folder": here / folder / config.get("lookups_folder", LOOKUPS_FOLDER),
102 | "default_language": config.get("default_language", "en"),
103 | "encoding": config.get("encoding", "utf-8"),
104 | "source_folders": config.get("source_folders", ["."]),
105 | "supported_languages": config.get("supported_languages", ["es"]),
106 | "translator_class": config.get("translator_class", "SW"),
107 | }
108 |
109 |
110 | def _create_models_lookup_file(output_file: Path, overwrite: bool = False):
111 | """Create a models lookup file with predefined content."""
112 | content = {
113 | "fr": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "French"},
114 | "fr_be": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "French (Belgium)"},
115 | "fr_ch": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "French (Switzerland)"},
116 | "fr_ca": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "French (Canada)"},
117 | "fr_fr": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "French (France)"},
118 | "wa": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Walloon"},
119 | "frp": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Arpitan"},
120 | "oc": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Occitan"},
121 | "ca": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Catalan"},
122 | "rm": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Romansh"},
123 | "lld": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Ladin"},
124 | "fur": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Friulian"},
125 | "lij": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Ligurian"},
126 | "lmo": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Lombard"},
127 | "es": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish"},
128 | "es_ar": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Argentina)"},
129 | "es_cl": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Chile)"},
130 | "es_co": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Colombia)"},
131 | "es_cr": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Costa Rica)"},
132 | "es_do": {
133 | "model": "Helsinki-NLP/opus-mt-en-ROMANCE",
134 | "name": "Spanish (Dominican Republic)",
135 | },
136 | "es_ec": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Ecuador)"},
137 | "es_es": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Spain)"},
138 | "es_gt": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Guatemala)"},
139 | "es_hn": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Honduras)"},
140 | "es_mx": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Mexico)"},
141 | "es_ni": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Nicaragua)"},
142 | "es_pa": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Panama)"},
143 | "es_pe": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Peru)"},
144 | "es_pr": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Puerto Rico)"},
145 | "es_sv": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (El Salvador)"},
146 | "es_uy": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Uruguay)"},
147 | "es_ve": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Venezuela)"},
148 | "pt": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Portuguese"},
149 | "pt_br": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Portuguese (Brazil)"},
150 | "pt_pt": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Portuguese (Portugal)"},
151 | "gl": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Galician"},
152 | "lad": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Ladino"},
153 | "an": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Aragonese"},
154 | "mwl": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Mirandese"},
155 | "it": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Italian"},
156 | "it_it": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Italian (Italy)"},
157 | "co": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Corsican"},
158 | "nap": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Neapolitan"},
159 | "scn": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Sicilian"},
160 | "vec": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Venetian"},
161 | "sc": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Sardinian"},
162 | "ro": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Romanian"},
163 | "la": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Latin"},
164 | }
165 | if output_file.exists() and not overwrite:
166 | click.echo(
167 | f"Models lookup file already exists at '{output_file}'. Use overwrite=True to overwrite."
168 | )
169 | return
170 | with open(output_file, "w", encoding=ENCODING) as f:
171 | json.dump(content, f, indent=4, ensure_ascii=False)
172 | click.echo(f"Models lookup file created at '{output_file}'")
173 |
174 |
175 | @cli.command()
176 | @click.argument("folder", default=SLANG_FOLDER, type=str)
177 | @click.option(
178 | "--overwrite", is_flag=True, help="Overwrite existing models lookup file if it exists."
179 | )
180 | def create_models_lookup_file(folder: str = SLANG_FOLDER, overwrite: bool = False):
181 | """Generate models lookup file.
182 |
183 | The location and name of the file will be taken from the config file if provided.
184 | """
185 | config = _read_config(folder)
186 | _create_models_lookup_file(config["models_lookup_file"], overwrite)
187 |
188 |
189 | @cli.command()
190 | @click.argument("folder", default=SLANG_FOLDER, type=str)
191 | def init(folder: str = SLANG_FOLDER):
192 | """Initialize the slangweb project structure."""
193 | _create_config_file(folder, overwrite=False)
194 | config = _read_config(folder)
195 | _create_models_lookup_file(config["models_lookup_file"], overwrite=False)
196 | here = Path(os.getcwd())
197 | folder_path = here / folder
198 | (folder_path / LOOKUPS_FOLDER).mkdir(parents=True, exist_ok=True)
199 | (folder_path / MODELS_FOLDER).mkdir(parents=True, exist_ok=True)
200 | click.echo(f"Initialized slangweb project structure in folder '{folder}'.")
201 |
202 |
203 | def _available_languages(folder: str = SLANG_FOLDER) -> dict[str, str]:
204 | """Return a list of available languages with downloaded models."""
205 | config = _read_config(folder)
206 | return available_languages(config["models_lookup_file"], config["models_folder"])
207 |
208 |
209 | @cli.command()
210 | @click.option(
211 | "--folder",
212 | default=SLANG_FOLDER,
213 | required=False,
214 | help="Folder where the config file is located.",
215 | )
216 | def list_languages(folder):
217 | """List available languages with downloaded models."""
218 | languages = _available_languages(folder)
219 | if not languages:
220 | click.echo("No languages with downloaded models found.")
221 | return
222 | click.echo("Available languages with downloaded models:")
223 | for lang, expanded in languages.items():
224 | click.echo(f"- {lang} ({expanded})")
225 |
226 |
227 | def _download_model(language: str, config: dict):
228 | """Download a translation model by name (HuggingFace)."""
229 | with open(config["models_lookup_file"], "r", encoding="utf-8") as f:
230 | models_lookup = json.load(f)
231 | model_data = models_lookup.get(language)
232 | if not model_data:
233 | click.echo(f"Unsupported language code: {language}", err=True)
234 | sys.exit(1)
235 | model_name = model_data.get("model")
236 | lang = model_data.get("name", language)
237 | click.echo(f"Downloading model '{model_name}' for language '{language} ({lang})'...")
238 | MarianMTModel.from_pretrained(model_name, cache_dir=config["models_folder"])
239 | MarianTokenizer.from_pretrained(model_name, cache_dir=config["models_folder"])
240 |
241 |
242 | @cli.command()
243 | @click.option(
244 | "--folder",
245 | default=SLANG_FOLDER,
246 | required=False,
247 | help="Folder where the config file is located.",
248 | )
249 | def download_models(folder):
250 | """Download a translation model by name (HuggingFace)."""
251 | config = _read_config(folder)
252 | supported_languages = config.get("supported_languages", [])
253 | with open(config["models_lookup_file"], "r", encoding="utf-8") as f:
254 | models_lookup = json.load(f)
255 | languages = [lang for lang in models_lookup.keys() if lang in supported_languages]
256 | print(languages)
257 | for language in languages:
258 | _download_model(language, config)
259 |
260 |
261 | def _sync(file: Path, language: str, config: dict) -> None:
262 | """Sync translations found in the given Python file."""
263 | if not file.exists():
264 | click.echo(f"File or folder '{file}' does not exist.", err=True)
265 | return None
266 | if not file.is_file():
267 | click.echo(f"Only Python files are supported. '{file}' is not a file.", err=True)
268 | return None
269 | if file.suffix != ".py":
270 | click.echo(f"Only Python files are supported. '{file}' is not a Python file.", err=True)
271 | return None
272 | click.echo(f"Syncing translations in: {file}")
273 | SW = Translator(
274 | base_folder=config.get("base_folder", SLANG_FOLDER),
275 | models_folder=config.get("models_folder", MODELS_FOLDER),
276 | lookup_folder=config.get("lookups_folder", LOOKUPS_FOLDER),
277 | models_lookup_file=config.get("models_lookup_file", MODELS_LOOKUP_FILE),
278 | )
279 | to_translate = find_translator_usages(file, config.get("translator_class", "SW"))
280 | click.echo(f"Translations for language '{language}':")
281 | SW.set_language(language)
282 | if SW.can_be_translated():
283 | for text in to_translate:
284 | translation = SW(text)
285 | click.echo(f"- {text} => {translation}")
286 | return None
287 |
288 |
289 | @cli.command()
290 | @click.argument("file", default=None, required=False, type=str)
291 | @click.option(
292 | "--folder",
293 | default=SLANG_FOLDER,
294 | required=False,
295 | help="Folder where the config file is located.",
296 | )
297 | def sync(file, folder):
298 | """Sync translations found in the given Python file."""
299 | here = Path(os.getcwd())
300 | config = _read_config(folder)
301 | languages = _available_languages(folder).keys()
302 | supported_languages = config.get("supported_languages", [])
303 | languages = [lang for lang in languages if lang in supported_languages]
304 | for lang in languages:
305 | if file is None:
306 | # Sync all Python files in the source folders
307 | for fold in config.get("source_folders", []):
308 | folder_path = here / fold
309 | print(folder_path)
310 | if not folder_path.exists() or not folder_path.is_dir():
311 | click.echo(
312 | f"Source folder '{folder_path}' does not exist or is not a directory.",
313 | err=True,
314 | )
315 | continue
316 | for item in folder_path.glob("*.py"):
317 | _sync(item, lang, config)
318 | else:
319 | file = here / file
320 | if not file.exists():
321 | click.echo(f"File or folder '{file}' does not exist.", err=True)
322 | sys.exit(1)
323 | if file.is_file() and file.suffix != ".py":
324 | click.echo(f"File '{file}' is not a Python file.", err=True)
325 | sys.exit(1)
326 | _sync(file, lang, config)
327 |
328 |
329 | @cli.command()
330 | def create_flask_example():
331 | """Create a Flask example file.
332 |
333 | This command will create a folder called 'slangweb_flask_example' in the current working directory,
334 | containing a simple Flask application that demonstrates how to use the slangweb Translator class.
335 | """
336 | here = Path(os.getcwd())
337 | example_folder = here / "slangweb_flask_example"
338 | example_folder.mkdir(parents=True, exist_ok=True)
339 | # copy the flask_example.py content
340 | # flask_example_path = example_folder / "flask_example.py"
341 | # with open(flask_example_path, 'w', encoding='utf-8') as f:
342 | # f.write(flask_example)
343 | shutil.copy(
344 | Path(__file__).parent / "examples" / "flask_example.py", example_folder / "flask_example.py"
345 | )
346 | click.echo(f"Flask example created at '{example_folder / 'flask_example.py'}'")
347 |
348 |
349 | def main():
350 | """Main entry point for the CLI."""
351 | cli()
352 |
353 |
354 | if __name__ == "__main__":
355 | main()
356 |
--------------------------------------------------------------------------------