├── tests ├── data │ └── warning_list.txt ├── conftest.py ├── __init__.py ├── test_tools.py └── check_warnings.py ├── docs ├── usage.rst ├── contribute.rst ├── _static │ ├── custom.css │ └── custom-icon.js ├── _template │ └── pypackage-credit.html ├── index.rst └── conf.py ├── codecov.yml ├── slangweb ├── py.typed ├── __init__.py ├── constants.py ├── tools.py ├── translator.py └── cli.py ├── .readthedocs.yaml ├── .devcontainer └── devcontainer.json ├── CITATION.cff ├── .copier-answers.yml ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── PULL_REQUEST_TEMPLATE │ │ └── pr_template.md │ └── feature_request.md └── workflows │ ├── release.yaml │ ├── pypackage_check.yaml │ └── unit.yaml ├── AUTHORS.rst ├── LICENSE ├── .pre-commit-config.yaml ├── .gitignore ├── pyproject.toml ├── noxfile.py ├── CONTRIBUTING.rst ├── CODE_OF_CONDUCT.rst └── README.md /tests/data/warning_list.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """Pytest session configuration.""" 2 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """make test folder a package for coverage.""" 2 | -------------------------------------------------------------------------------- /docs/usage.rst: -------------------------------------------------------------------------------- 1 | Usage 2 | ===== 3 | 4 | **Slang Web** usage documentation. 5 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | # disable the treemap comment and report in PRs 2 | comment: false 3 | -------------------------------------------------------------------------------- /slangweb/py.typed: -------------------------------------------------------------------------------- 1 | # Marker file for PEP 561. The mypy package uses inline types. -------------------------------------------------------------------------------- /docs/contribute.rst: -------------------------------------------------------------------------------- 1 | Contribute 2 | ========== 3 | 4 | .. include:: ../CONTRIBUTING.rst 5 | :start-line: 3 6 | -------------------------------------------------------------------------------- /docs/_static/custom.css: -------------------------------------------------------------------------------- 1 | /* add dollar sign in console code-block */ 2 | div.highlight-console pre span.go::before { 3 | content: "$"; 4 | margin-right: 10px; 5 | margin-left: 5px; 6 | } 7 | -------------------------------------------------------------------------------- /docs/_template/pypackage-credit.html: -------------------------------------------------------------------------------- 1 |

2 | From 3 | @12rambau/pypackage 4 | 0.1.18 Copier project. 5 |

6 | -------------------------------------------------------------------------------- /slangweb/__init__.py: -------------------------------------------------------------------------------- 1 | """The init file of the package.""" 2 | 3 | __version__ = "0.0.0" 4 | __author__ = "Rodrigo Esteban Principe" 5 | __email__ = "fitoprincipe82@gmail.com" 6 | 7 | from .translator import Translator # noqa: F401 8 | -------------------------------------------------------------------------------- /slangweb/constants.py: -------------------------------------------------------------------------------- 1 | """Constant variables.""" 2 | 3 | DEFAULT_LANGUAGE = "en" 4 | ENCODING = "utf-8" 5 | SLANG_FOLDER = "slangweb" 6 | MODELS_LOOKUP_FILE = "models_lookup.json" 7 | MODELS_FOLDER = "models" 8 | LOOKUPS_FOLDER = "lookups" 9 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 2 | 3 | version: 2 4 | 5 | build: 6 | os: ubuntu-22.04 7 | tools: 8 | python: "3.10" 9 | 10 | sphinx: 11 | configuration: docs/conf.py 12 | 13 | python: 14 | install: 15 | - method: pip 16 | path: . 17 | extra_requirements: 18 | - doc 19 | -------------------------------------------------------------------------------- /tests/test_tools.py: -------------------------------------------------------------------------------- 1 | """Test the tools module.""" 2 | 3 | from slangweb import tools 4 | 5 | 6 | def test_get_model_folder(): 7 | """Test get_model_folder function.""" 8 | model_name = "Helsinki-NLP/opus-mt-en-ROMANCE" 9 | expected_folder = "models--Helsinki-NLP--opus-mt-en-ROMANCE" 10 | assert tools.get_model_folder(model_name) == expected_folder 11 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Python 3", 3 | "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye", 4 | "features": { 5 | "ghcr.io/devcontainers-extra/features/nox:2": {}, 6 | "ghcr.io/devcontainers-extra/features/pre-commit:2": {} 7 | }, 8 | "postCreateCommand": "python -m pip install commitizen uv && pre-commit install" 9 | } 10 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: "1.2.0" 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - family-names: "Principe" 5 | given-names: "Rodrigo Esteban" 6 | orcid: "https://orcid.org/0000-0000-0000-0000" 7 | title: "Slang Web" 8 | version: "0.0.0" 9 | doi: "" 10 | date-released: "2025-12-14" 11 | url: "https://github.com/fitoprincipe/slangweb" 12 | -------------------------------------------------------------------------------- /.copier-answers.yml: -------------------------------------------------------------------------------- 1 | # Changes here will be overwritten by Copier 2 | _commit: 0.1.18 3 | _src_path: gh:12rambau/pypackage 4 | author_email: fitoprincipe82@gmail.com 5 | author_first_name: Rodrigo Esteban 6 | author_last_name: Principe 7 | author_orcid: 0000-0000-0000-0000 8 | creation_year: "2025" 9 | github_repo_name: slangweb 10 | github_user: fitoprincipe 11 | project_name: Slang Web 12 | project_slug: slangweb 13 | short_description: Simple Language Support for Web Development 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "" 5 | labels: "" 6 | assignees: "" 7 | --- 8 | 9 | **Describe the bug** 10 | A clear and concise description of what the bug is. 11 | 12 | **To Reproduce** 13 | Steps to reproduce the behavior: 14 | 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Screenshots** 21 | If applicable, add screenshots to help explain your problem. 22 | 23 | **Additional context** 24 | Add any other context about the problem here. 25 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/PULL_REQUEST_TEMPLATE/pr_template.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Pull request template 3 | about: Create a pull request 4 | title: "" 5 | labels: "" 6 | assignees: "" 7 | --- 8 | 9 | ## reference the related issue 10 | 11 | PR should answer problem stated in the issue tracker. please open one before starting a PR 12 | 13 | ## description of the changes 14 | 15 | Describe the changes you propose 16 | 17 | ## mention 18 | 19 | @mentions of the person or team responsible for reviewing proposed changes 20 | 21 | ## comments 22 | 23 | any other comments we should pay attention to 24 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: "" 5 | labels: "" 6 | assignees: "" 7 | --- 8 | 9 | **Is your feature request related to a problem? Please describe.** 10 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 11 | 12 | **Describe the solution you'd like** 13 | A clear and concise description of what you want to happen. 14 | 15 | **Describe alternatives you've considered** 16 | A clear and concise description of any alternative solutions or features you've considered. 17 | 18 | **Additional context** 19 | Add any other context or screenshots about the feature request here. 20 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | :html_theme.sidebar_secondary.remove: 2 | 3 | 4 | Slang Web 5 | ========= 6 | 7 | .. toctree:: 8 | :hidden: 9 | 10 | usage 11 | contribute 12 | 13 | Documentation contents 14 | ---------------------- 15 | 16 | The documentation contains 3 main sections: 17 | 18 | .. grid:: 1 2 3 3 19 | 20 | .. grid-item:: 21 | 22 | .. card:: Usage 23 | :link: usage.html 24 | 25 | Usage and installation 26 | 27 | .. grid-item:: 28 | 29 | .. card:: Contribute 30 | :link: contribute.html 31 | 32 | Help us improve the lib. 33 | 34 | .. grid-item:: 35 | 36 | .. card:: API 37 | :link: autoapi/index.html 38 | 39 | Discover the lib API. 40 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | env: 8 | PIP_ROOT_USER_ACTION: ignore 9 | 10 | jobs: 11 | tests: 12 | uses: ./.github/workflows/unit.yaml 13 | 14 | deploy: 15 | needs: [tests] 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v5 19 | - uses: actions/setup-python@v5 20 | with: 21 | python-version: "3.11" 22 | - name: Install dependencies 23 | run: pip install twine build nox[uv] 24 | - name: update citation date 25 | run: nox -s release-date 26 | - name: Build and publish 27 | env: 28 | TWINE_USERNAME: __token__ 29 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 30 | run: python -m build && twine upload dist/* 31 | -------------------------------------------------------------------------------- /AUTHORS.rst: -------------------------------------------------------------------------------- 1 | Thanks goes to these wonderful people (`emoji key `_): 2 | 3 | .. raw:: html 4 | 5 | 6 | 7 | 8 | 15 | 16 | 17 |
9 | 10 | fitoprincipe
11 | Rodrigo Esteban Principe 12 |
13 | 💻 14 |
18 | 19 | This project follows the `all-contributors `_ specification. 20 | 21 | Contributions of any kind are welcome! 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Rodrigo Esteban Principe 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_install_hook_types: [pre-commit, commit-msg] 2 | 3 | repos: 4 | - repo: "https://github.com/commitizen-tools/commitizen" 5 | rev: "v2.18.0" 6 | hooks: 7 | - id: commitizen 8 | stages: [commit-msg] 9 | 10 | - repo: "https://github.com/kynan/nbstripout" 11 | rev: "0.5.0" 12 | hooks: 13 | - id: nbstripout 14 | stages: [pre-commit] 15 | 16 | - repo: "https://github.com/pycontribs/mirrors-prettier" 17 | rev: "v3.4.2" 18 | hooks: 19 | - id: prettier 20 | stages: [pre-commit] 21 | exclude: tests\/test_.+\. 22 | 23 | - repo: https://github.com/charliermarsh/ruff-pre-commit 24 | rev: "v0.7.0" 25 | hooks: 26 | - id: ruff 27 | stages: [pre-commit] 28 | - id: ruff-format 29 | stages: [pre-commit] 30 | 31 | - repo: https://github.com/sphinx-contrib/sphinx-lint 32 | rev: "v1.0.0" 33 | hooks: 34 | - id: sphinx-lint 35 | stages: [pre-commit] 36 | 37 | - repo: https://github.com/codespell-project/codespell 38 | rev: v2.2.4 39 | hooks: 40 | - id: codespell 41 | stages: [pre-commit] 42 | additional_dependencies: 43 | - tomli 44 | 45 | # Prevent committing inline conflict markers 46 | - repo: https://github.com/pre-commit/pre-commit-hooks 47 | rev: v4.3.0 48 | hooks: 49 | - id: check-merge-conflict 50 | stages: [pre-commit] 51 | args: [--assume-in-merge] 52 | 53 | # - repo: local 54 | # hooks: 55 | # - id: generate-lookups 56 | # name: Generate Lookups 57 | # entry: python tools/generate_lookups.py 58 | # language: system 59 | # pass_filenames: false 60 | -------------------------------------------------------------------------------- /tests/check_warnings.py: -------------------------------------------------------------------------------- 1 | """Check the warnings from doc builds.""" 2 | 3 | import sys 4 | from pathlib import Path 5 | 6 | 7 | def check_warnings(file: Path) -> int: 8 | """Check the list of warnings produced by the CI tests. 9 | 10 | Raises errors if there are unexpected ones and/or if some are missing. 11 | 12 | Args: 13 | file: the path to the generated warning.txt file from 14 | the CI build 15 | 16 | Returns: 17 | 0 if the warnings are all there 18 | 1 if some warning are not registered or unexpected 19 | """ 20 | # print some log 21 | print("\n=== Sphinx Warnings test ===\n") 22 | 23 | # find the file where all the known warnings are stored 24 | warning_file = Path(__file__).parent / "data" / "warning_list.txt" 25 | 26 | test_warnings = file.read_text().strip().split("\n") 27 | ref_warnings = warning_file.read_text().strip().split("\n") 28 | 29 | print( 30 | f'Checking build warnings in file: "{file}" and comparing to expected ' 31 | f'warnings defined in "{warning_file}"\n\n' 32 | ) 33 | 34 | # find all the missing warnings 35 | missing_warnings = [] 36 | for wa in ref_warnings: 37 | index = [i for i, twa in enumerate(test_warnings) if wa in twa] 38 | if len(index) == 0: 39 | missing_warnings += [wa] 40 | print(f"Warning was not raised: {wa}") 41 | else: 42 | test_warnings.pop(index[0]) 43 | 44 | # the remaining one are unexpected 45 | for twa in test_warnings: 46 | print(f"Unexpected warning: {twa}") 47 | 48 | # delete the tmp warnings file 49 | file.unlink() 50 | 51 | return len(missing_warnings) != 0 or len(test_warnings) != 0 52 | 53 | 54 | if __name__ == "__main__": 55 | # cast the file to path and resolve to an absolute one 56 | file = Path.cwd() / "warnings.txt" 57 | 58 | # execute the test 59 | sys.exit(check_warnings(file)) 60 | -------------------------------------------------------------------------------- /.github/workflows/pypackage_check.yaml: -------------------------------------------------------------------------------- 1 | name: template update check 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | env: 7 | PIP_ROOT_USER_ACTION: ignore 8 | 9 | jobs: 10 | check_version: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v5 14 | - uses: actions/setup-python@v5 15 | with: 16 | python-version: "3.10" 17 | - name: install dependencies 18 | run: pip install requests 19 | - name: get latest pypackage release 20 | id: get_latest_release 21 | run: | 22 | RELEASE=$(curl -s https://api.github.com/repos/12rambau/pypackage/releases | jq -r '.[0].tag_name') 23 | echo "latest=$RELEASE" >> $GITHUB_OUTPUT 24 | echo "latest release: $RELEASE" 25 | - name: get current pypackage version 26 | id: get_current_version 27 | run: | 28 | RELEASE=$(yq -r "._commit" .copier-answers.yml) 29 | echo "current=$RELEASE" >> $GITHUB_OUTPUT 30 | echo "current release: $RELEASE" 31 | - name: open issue 32 | if: steps.get_current_version.outputs.current != steps.get_latest_release.outputs.latest 33 | uses: rishabhgupta/git-action-issue@v2 34 | with: 35 | token: ${{ secrets.GITHUB_TOKEN }} 36 | title: "Update template to ${{ steps.get_latest_release.outputs.latest }}" 37 | body: | 38 | The package is based on the ${{ steps.get_current_version.outputs.current }} version of [@12rambau/pypackage](https://github.com/12rambau/pypackage). 39 | 40 | The latest version of the template is ${{ steps.get_latest_release.outputs.latest }}. 41 | 42 | Please consider updating the template to the latest version to include all the latest developments. 43 | 44 | Run the following code in your project directory to update the template: 45 | 46 | ``` 47 | copier update --trust --defaults --vcs-ref ${{ steps.get_latest_release.outputs.latest }} 48 | ``` 49 | 50 | > **Note** 51 | > You may need to reinstall ``copier`` and ``jinja2-time`` if they are not available in your environment. 52 | 53 | After solving the merging issues you can push back the changes to your main branch. 54 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | """Configuration file for the Sphinx documentation builder. 2 | 3 | This file only contains a selection of the most common options. For a full 4 | list see the documentation: 5 | https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | """ 7 | 8 | # -- Path setup ---------------------------------------------------------------- 9 | from datetime import datetime 10 | 11 | # -- Project information ------------------------------------------------------- 12 | project = "Slang Web" 13 | author = "Rodrigo Esteban Principe" 14 | copyright = f"2025-{datetime.now().year}, {author}" 15 | release = "0.0.0" 16 | 17 | # -- General configuration ----------------------------------------------------- 18 | extensions = [ 19 | "sphinx_copybutton", 20 | "sphinx.ext.napoleon", 21 | "sphinx.ext.viewcode", 22 | "sphinx.ext.intersphinx", 23 | "sphinx_design", 24 | "autoapi.extension", 25 | ] 26 | exclude_patterns = ["**.ipynb_checkpoints"] 27 | templates_path = ["_template"] 28 | 29 | # -- Options for HTML output --------------------------------------------------- 30 | html_theme = "pydata_sphinx_theme" 31 | html_static_path = ["_static"] 32 | html_theme_options = { 33 | "logo": { 34 | "text": project, 35 | }, 36 | "use_edit_page_button": True, 37 | "footer_end": ["theme-version", "pypackage-credit"], 38 | "icon_links": [ 39 | { 40 | "name": "GitHub", 41 | "url": "https://github.com/fitoprincipe/slangweb", 42 | "icon": "fa-brands fa-github", 43 | }, 44 | { 45 | "name": "Pypi", 46 | "url": "https://pypi.org/project/slangweb/", 47 | "icon": "fa-brands fa-python", 48 | }, 49 | { 50 | "name": "Conda", 51 | "url": "https://anaconda.org/conda-forge/slangweb", 52 | "icon": "fa-custom fa-conda", 53 | "type": "fontawesome", 54 | }, 55 | ], 56 | } 57 | html_context = { 58 | "github_user": "fitoprincipe", 59 | "github_repo": "slangweb", 60 | "github_version": "", 61 | "doc_path": "docs", 62 | } 63 | html_css_files = ["custom.css"] 64 | 65 | # -- Options for autosummary/autodoc output ------------------------------------ 66 | autodoc_typehints = "description" 67 | autoapi_dirs = ["../slangweb"] 68 | autoapi_python_class_content = "init" 69 | autoapi_member_order = "groupwise" 70 | 71 | # -- Options for intersphinx output -------------------------------------------- 72 | intersphinx_mapping = {} 73 | -------------------------------------------------------------------------------- /slangweb/tools.py: -------------------------------------------------------------------------------- 1 | """Utility functions for slangweb.""" 2 | 3 | import ast 4 | import json 5 | import logging 6 | from pathlib import Path 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | def get_model_folder(model_name: str) -> str: 12 | """Get the name of the model folder for the given model name.""" 13 | return f"models--{model_name.replace('/', '--')}" 14 | 15 | 16 | def available_languages(models_lookup_file: Path, models_folder: Path) -> dict[str, str]: 17 | """Return a list of available languages based on existing lookup files and model existence.""" 18 | if not models_lookup_file.exists(): 19 | logger.error( 20 | f"Models lookup file '{models_lookup_file}' does not exist. Create it by running 'slangweb generate-models-lookup-file'." 21 | ) 22 | return {} 23 | with open(models_lookup_file, "r", encoding="utf-8") as f: 24 | models_lookup = json.load(f) 25 | languages = [] 26 | lang_expanded = [] 27 | for language, data in models_lookup.items(): 28 | file = data.get("model") 29 | if not file: 30 | continue 31 | lang_expanded.append(data.get("name", language)) 32 | model_folder = get_model_folder(file) 33 | model_path = models_folder / model_folder 34 | if model_path.exists() and model_path.is_dir(): 35 | languages.append(language) 36 | return dict(zip(languages, lang_expanded)) 37 | 38 | 39 | def find_translator_usages(py_file: Path, translator_class: str = "SW") -> list[str]: 40 | """Find usages of the Translator class in the given Python file. 41 | 42 | Args: 43 | py_file (Path): Path to the Python file to analyze. 44 | translator_class (str): Name of the translator class to look for. Default is "SW". 45 | """ 46 | with open(py_file, "r", encoding="utf-8") as f: 47 | tree = ast.parse(f.read(), filename=py_file) 48 | usages = [] 49 | for node in ast.walk(tree): 50 | if ( 51 | isinstance(node, ast.Call) 52 | and hasattr(node.func, "id") 53 | and node.func.id == translator_class 54 | ): 55 | if node.args: 56 | arg = node.args[0] 57 | if isinstance(arg, ast.Str): 58 | usages.append(str(arg.s)) 59 | elif isinstance(arg, ast.Name): 60 | usages.append(str(arg.id)) 61 | else: 62 | usages.append(str(ast.dump(arg))) 63 | return usages 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | .ruff_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | docs/api/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # pipenv 90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 93 | # install all needed dependencies. 94 | #Pipfile.lock 95 | 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 97 | __pypackages__/ 98 | 99 | # Celery stuff 100 | celerybeat-schedule 101 | celerybeat.pid 102 | 103 | # SageMath parsed files 104 | *.sage.py 105 | 106 | # Environments 107 | .env 108 | .venv 109 | env/ 110 | venv/ 111 | ENV/ 112 | env.bak/ 113 | venv.bak/ 114 | 115 | # Spyder project settings 116 | .spyderproject 117 | .spyproject 118 | 119 | # Rope project settings 120 | .ropeproject 121 | 122 | # mkdocs documentation 123 | /site 124 | 125 | # mypy 126 | .mypy_cache/ 127 | .dmypy.json 128 | dmypy.json 129 | 130 | # Pyre type checker 131 | .pyre/ 132 | 133 | # system IDE 134 | .vscode/ 135 | 136 | # image tmp file 137 | *Zone.Identifier 138 | 139 | # debugging notebooks 140 | test.ipynb 141 | -------------------------------------------------------------------------------- /.github/workflows/unit.yaml: -------------------------------------------------------------------------------- 1 | name: Unit tests 2 | 3 | on: 4 | workflow_call: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | 10 | env: 11 | FORCE_COLOR: 1 12 | PIP_ROOT_USER_ACTION: ignore 13 | 14 | jobs: 15 | lint: 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v5 19 | - uses: actions/setup-python@v5 20 | with: 21 | python-version: "3.11" 22 | - uses: pre-commit/action@v3.0.0 23 | 24 | mypy: 25 | runs-on: ubuntu-latest 26 | steps: 27 | - uses: actions/checkout@v5 28 | - uses: actions/setup-python@v5 29 | with: 30 | python-version: "3.11" 31 | - name: Install nox 32 | run: pip install nox[uv] 33 | - name: run mypy checks 34 | run: nox -s mypy 35 | 36 | docs: 37 | needs: [lint, mypy] 38 | runs-on: ubuntu-latest 39 | steps: 40 | - uses: actions/checkout@v5 41 | - uses: actions/setup-python@v5 42 | with: 43 | python-version: "3.11" 44 | - name: Install nox 45 | run: pip install nox[uv] 46 | - name: build static docs 47 | run: nox -s docs 48 | 49 | build: 50 | needs: [lint, mypy] 51 | strategy: 52 | fail-fast: true 53 | matrix: 54 | os: [ubuntu-latest] 55 | python-version: ["3.10", "3.11"] 56 | include: 57 | - os: macos-latest # macos test 58 | python-version: "3.11" 59 | - os: windows-latest # windows test 60 | python-version: "3.11" 61 | runs-on: ${{ matrix.os }} 62 | steps: 63 | - uses: actions/checkout@v5 64 | - name: Set up Python ${{ matrix.python-version }} 65 | uses: actions/setup-python@v5 66 | with: 67 | python-version: ${{ matrix.python-version }} 68 | - name: Install nox 69 | run: pip install nox[uv] 70 | - name: test with pytest 71 | run: nox -s ci-test 72 | - name: assess dead fixtures 73 | if: ${{ matrix.python-version == '3.10' }} 74 | shell: bash 75 | run: nox -s dead-fixtures 76 | - uses: actions/upload-artifact@v4 77 | if: ${{ matrix.python-version == '3.10' }} 78 | with: 79 | name: coverage 80 | path: coverage.xml 81 | 82 | coverage: 83 | needs: [build] 84 | runs-on: ubuntu-latest 85 | steps: 86 | - uses: actions/checkout@v5 87 | - uses: actions/download-artifact@v4 88 | with: 89 | name: coverage 90 | - name: codecov 91 | uses: codecov/codecov-action@v4 92 | with: 93 | token: ${{ secrets.CODECOV_TOKEN }} 94 | verbose: true 95 | fail_ci_if_error: true 96 | -------------------------------------------------------------------------------- /docs/_static/custom-icon.js: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Set a custom icon for pypi as it's not available in the fa built-in brands 3 | */ 4 | FontAwesome.library.add( 5 | (faListOldStyle = { 6 | prefix: "fa-custom", 7 | iconName: "conda", 8 | icon: [ 9 | 24, // viewBox width 10 | 24, // viewBox height 11 | [], // ligature 12 | "e001", // unicode codepoint - private use area 13 | "M12.045.033a12.181 12.182 0 00-1.361.078 17.512 17.513 0 011.813 1.433l.48.438-.465.45a15.047 15.048 0 00-1.126 1.205l-.178.215a8.527 8.527 0 01.86-.05 8.154 8.155 0 11-4.286 15.149 15.764 15.765 0 01-1.841.106h-.86a21.847 21.848 0 00.264 2.866 11.966 11.967 0 106.7-21.89zM8.17.678a12.181 12.182 0 00-2.624 1.275 15.506 15.507 0 011.813.43A18.551 18.552 0 018.17.678zM9.423.75a16.237 16.238 0 00-.995 1.998 16.15 16.152 0 011.605.66 6.98 6.98 0 01.43-.509c.234-.286.472-.559.716-.817A15.047 15.048 0 009.423.75zM4.68 2.949a14.969 14.97 0 000 2.336c.587-.065 1.196-.1 1.812-.107a16.617 16.617 0 01.48-1.748 16.48 16.481 0 00-2.292-.481zM3.62 3.5A11.938 11.938 0 001.762 5.88a17.004 17.004 0 011.877-.444A17.39 17.391 0 013.62 3.5zm4.406.287c-.143.437-.265.888-.38 1.347a8.255 8.255 0 011.67-.803c-.423-.2-.845-.38-1.29-.544zM6.3 6.216a14.051 14.052 0 00-1.555.108c.064.523.157 1.038.272 1.554a8.39 8.391 0 011.283-1.662zm-2.55.137a15.313 15.313 0 00-2.602.716h-.078v.079a17.104 17.105 0 001.267 2.544l.043.071.072-.049a16.309 16.31 0 011.734-1.083l.057-.035V8.54a16.867 16.868 0 01-.408-2.094v-.092zM.644 8.095l-.063.2A11.844 11.845 0 000 11.655v.209l.143-.152a17.706 17.707 0 011.584-1.447l.057-.043-.043-.064a16.18 16.18 0 01-1.025-1.87zm3.77 1.253l-.18.1c-.465.273-.93.573-1.375.889l-.065.05.05.064c.309.437.645.867.996 1.276l.137.165v-.208a8.176 8.176 0 01.364-2.15zM2.2 10.853l-.072.05a16.574 16.574 0 00-1.813 1.734l-.058.058.066.057a15.449 15.45 0 001.991 1.483l.072.05.043-.08a16.738 16.74 0 011.053-1.64v-.05l-.043-.05a16.99 16.99 0 01-1.19-1.54zm1.855 2.071l-.121.172a15.363 15.363 0 00-.917 1.433l-.043.072.071.043a16.61 16.61 0 001.562.766l.193.086-.086-.193a8.04 8.04 0 01-.66-2.172zm-3.976.48v.2a11.758 11.759 0 00.946 3.326l.078.186.072-.194a16.215 16.216 0 01.845-2l.057-.063-.064-.043a17.197 17.198 0 01-1.776-1.284zm2.543 1.805l-.035.08a15.764 15.765 0 00-.983 2.479v.08h.086a16.15 16.152 0 002.688.5l.072.007v-.086a17.562 17.563 0 01.164-2.056v-.065H4.55a16.266 16.266 0 01-1.849-.896zm2.544 1.169v.114a17.254 17.255 0 00-.151 1.828v.078h.931c.287 0 .624.014.946 0h.209l-.166-.129a8.011 8.011 0 01-1.64-1.834zm-3.29 2.1l.115.172a11.988 11.988 0 002.502 2.737l.157.129v-.201a22.578 22.58 0 01-.2-2.336v-.071h-.072a16.23 16.23 0 01-2.3-.387z", // svg path (https://simpleicons.org/icons/anaconda.svg) 14 | ], 15 | }), 16 | ); 17 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "slangweb" 7 | version = "0.0.0" 8 | description = "Simple Language Support for Web Development" 9 | keywords = [ 10 | "web", 11 | "language", 12 | "Python" 13 | ] 14 | classifiers = [ 15 | "Development Status :: 3 - Alpha", 16 | "Intended Audience :: Developers", 17 | "License :: OSI Approved :: MIT License", 18 | "Programming Language :: Python :: 3.10", 19 | "Programming Language :: Python :: 3.11", 20 | ] 21 | requires-python = ">=3.10" 22 | dependencies = [ 23 | "deprecated>=1.2.14", 24 | "transformers", 25 | "torch", 26 | "sentencepiece", 27 | "sacremoses", 28 | "protobuf" 29 | ] 30 | 31 | [[project.authors]] 32 | name = "Rodrigo Esteban Principe" 33 | email = "fitoprincipe82@gmail.com" 34 | 35 | [project.license] 36 | text = "MIT" 37 | 38 | [project.readme] 39 | file = "README.md" 40 | content-type = "text/markdown" 41 | 42 | [project.scripts] 43 | slangweb = "slangweb.cli:main" 44 | 45 | [project.urls] 46 | Homepage = "https://github.com/fitoprincipe/slangweb" 47 | 48 | [project.optional-dependencies] 49 | test = [ 50 | "pytest", 51 | "pytest-cov", 52 | "pytest-deadfixtures" 53 | ] 54 | doc = [ 55 | "sphinx>=6.2.1", 56 | "pydata-sphinx-theme", 57 | "sphinx-copybutton", 58 | "sphinx-design", 59 | "sphinx-autoapi" 60 | ] 61 | flask = [ 62 | "Flask>=2.0" 63 | ] 64 | dash = [ 65 | "dash>=2.0" 66 | ] 67 | 68 | [tool.hatch.build.targets.wheel] 69 | only-include = ["slangweb"] 70 | 71 | [tool.hatch.envs.default] 72 | dependencies = [ 73 | "pre-commit", 74 | "commitizen", 75 | "nox[uv]" 76 | ] 77 | post-install-commands = ["pre-commit install"] 78 | 79 | [tool.commitizen] 80 | tag_format = "v$major.$minor.$patch$prerelease" 81 | update_changelog_on_bump = false 82 | version = "0.0.0" 83 | version_files = [ 84 | "pyproject.toml:version", 85 | "slangweb/__init__.py:__version__", 86 | "docs/conf.py:release", 87 | "CITATION.cff:version" 88 | ] 89 | 90 | [tool.pytest.ini_options] 91 | testpaths = "tests" 92 | 93 | [tool.ruff] 94 | line-length = 100 95 | ignore-init-module-imports = true 96 | fix = true 97 | 98 | [tool.ruff.lint] 99 | select = ["E", "F", "W", "I", "D", "RUF"] 100 | ignore = [ 101 | "E501", # line too long | Black take care of it 102 | "D212", # Multi-line docstring | We use D213 103 | "D101", # Missing docstring in public class | We use D106 104 | ] 105 | 106 | [tool.ruff.lint.flake8-quotes] 107 | docstring-quotes = "double" 108 | 109 | [tool.ruff.lint.pydocstyle] 110 | convention = "google" 111 | 112 | [tool.coverage.run] 113 | source = ["slangweb"] 114 | 115 | [tool.mypy] 116 | scripts_are_modules = true 117 | ignore_missing_imports = true 118 | install_types = true 119 | non_interactive = true 120 | warn_redundant_casts = true 121 | -------------------------------------------------------------------------------- /noxfile.py: -------------------------------------------------------------------------------- 1 | """All the process that can be run using nox. 2 | 3 | The nox run are build in isolated environment that will be stored in .nox. to force the venv update, remove the .nox/xxx folder. 4 | """ 5 | 6 | import datetime 7 | import fileinput 8 | 9 | import nox 10 | 11 | nox.options.sessions = ["lint", "test", "docs", "mypy"] 12 | 13 | 14 | @nox.session(reuse_venv=True, venv_backend="uv") 15 | def lint(session: nox.Session): 16 | """Apply the pre-commits.""" 17 | session.install("pre-commit") 18 | session.run("pre-commit", "run", "--all-files", *session.posargs) 19 | 20 | 21 | @nox.session(reuse_venv=True, venv_backend="uv") 22 | def test(session: nox.Session): 23 | """Run the selected tests and report coverage in html.""" 24 | session.install("-e", ".[test]") 25 | test_files = session.posargs or ["tests"] 26 | session.run("pytest", "--cov", "--cov-report=html", *test_files) 27 | 28 | 29 | @nox.session(reuse_venv=True, name="ci-test", venv_backend="uv") 30 | def ci_test(session: nox.Session): 31 | """Run all the test and report coverage in xml.""" 32 | session.install("-e", ".[test]") 33 | session.run("pytest", "--cov", "--cov-report=xml") 34 | 35 | 36 | @nox.session(reuse_venv=True, name="dead-fixtures", venv_backend="uv") 37 | def dead_fixtures(session: nox.Session): 38 | """Check for dead fixtures within the tests.""" 39 | session.install("-e", ".[test]") 40 | session.run("pytest", "--dead-fixtures") 41 | 42 | 43 | @nox.session(reuse_venv=True, venv_backend="uv") 44 | def docs(session: nox.Session): 45 | """Build the documentation.""" 46 | build = session.posargs.pop() if session.posargs else "html" 47 | session.install("-e", ".[doc]") 48 | dst, warn = f"docs/_build/{build}", "warnings.txt" 49 | session.run("sphinx-build", "-v", "-b", build, "docs", dst, "-w", warn) 50 | session.run("python", "tests/check_warnings.py") 51 | 52 | 53 | @nox.session(name="mypy", reuse_venv=True, venv_backend="uv") 54 | def mypy(session: nox.Session): 55 | """Run a mypy check of the lib.""" 56 | # waiting for a fix to https://github.com/laurent-laporte-pro/deprecated/issues/63 57 | # so we are forced to install "types-deprecated" 58 | session.install("mypy", "types-deprecated") 59 | test_files = session.posargs or ["slangweb"] 60 | session.run("mypy", *test_files) 61 | 62 | 63 | @nox.session(reuse_venv=True, venv_backend="uv") 64 | def stubgen(session: nox.Session): 65 | """Generate stub files for the lib but requires human attention before merge.""" 66 | session.install("mypy") 67 | package = session.posargs or ["slangweb"] 68 | session.run("stubgen", "-p", package[0], "-o", "stubs", "--include-private") 69 | 70 | 71 | @nox.session(name="release-date", reuse_venv=True, venv_backend="uv") 72 | def release_date(session: nox.session): 73 | """Update the release date of the citation file.""" 74 | current_date = datetime.datetime.now().strftime("%Y-%m-%d") 75 | 76 | with fileinput.FileInput("CITATION.cff", inplace=True) as file: 77 | for line in file: 78 | if line.startswith("date-released:"): 79 | print(f'date-released: "{current_date}"') 80 | else: 81 | print(line, end="") 82 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | Contribute 2 | ========== 3 | 4 | Thank you for your help improving **Slang Web**! 5 | 6 | **Slang Web** uses `nox `__ to automate several development-related tasks. 7 | Currently, the project uses four automation processes (called sessions) in ``noxfile.py``: 8 | 9 | - ``mypy``: to perform a mypy check on the lib; 10 | - ``test``: to run the test with pytest; 11 | - ``docs``: to build the documentation in the ``build`` folder; 12 | - ``lint``: to run the pre-commits in an isolated environment 13 | 14 | Every nox session is run in its own virtual environment, and the dependencies are installed automatically. 15 | 16 | To run a specific nox automation process, use the following command: 17 | 18 | .. code-block:: console 19 | 20 | nox -s 21 | 22 | For example: ``nox -s test`` or ``nox -s docs``. 23 | 24 | Workflow for contributing changes 25 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 26 | 27 | We follow a typical GitHub workflow of: 28 | 29 | - Create a personal fork of this repo 30 | - Create a branch 31 | - Open a pull request 32 | - Fix findings of various linters and checks 33 | - Work through code review 34 | 35 | See the following sections for more details. 36 | 37 | Clone the repository 38 | ^^^^^^^^^^^^^^^^^^^^ 39 | 40 | First off, you'll need your own copy of **Slang Web** codebase. You can clone it for local development like so: 41 | 42 | Fork the repository so you have your own copy on GitHub. See the `GitHub forking guide for more information `__. 43 | 44 | Then, clone the repository locally so that you have a local copy to work on: 45 | 46 | .. code-block:: console 47 | 48 | git clone https://github.com//slangweb 49 | cd slangweb 50 | 51 | Then install the development version of the extension: 52 | 53 | .. code-block:: console 54 | 55 | pip install -e .[dev] 56 | 57 | This will install the **Slang Web** library, together with two additional tools: 58 | - `pre-commit `__ for automatically enforcing code standards and quality checks before commits. 59 | - `nox `__, for automating common development tasks. 60 | 61 | Lastly, activate the pre-commit hooks by running: 62 | 63 | .. code-block:: console 64 | 65 | pre-commit install 66 | 67 | This will install the necessary dependencies to run pre-commit every time you make a commit with Git. 68 | 69 | Contribute to the codebase 70 | ^^^^^^^^^^^^^^^^^^^^^^^^^^ 71 | 72 | Any larger updates to the codebase should include tests and documentation. The tests are located in the ``tests`` folder, and the documentation is located in the ``docs`` folder. 73 | 74 | To run the tests locally, use the following command: 75 | 76 | .. code-block:: console 77 | 78 | nox -s test 79 | 80 | See :ref:`below ` for more information on how to update the documentation. 81 | 82 | .. _contributing-docs: 83 | 84 | Contribute to the docs 85 | ^^^^^^^^^^^^^^^^^^^^^^ 86 | 87 | The documentation is built using `Sphinx `__ and deployed to `Read the Docs `__. 88 | 89 | To build the documentation locally, use the following command: 90 | 91 | .. code-block:: console 92 | 93 | nox -s docs 94 | 95 | For each pull request, the documentation is built and deployed to make it easier to review the changes in the PR. To access the docs build from a PR, click on the "Read the Docs" preview in the CI/CD jobs. 96 | 97 | Release new version 98 | ^^^^^^^^^^^^^^^^^^^ 99 | 100 | To release a new version, start by pushing a new bump from the local directory: 101 | 102 | .. code-block:: 103 | 104 | cz bump 105 | 106 | The commitizen-tool will detect the semantic version name based on the existing commits messages. 107 | 108 | Then push to Github. In Github design a new release using the same tag name nad the ``release.yaml`` job will send it to pipy. 109 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.rst: -------------------------------------------------------------------------------- 1 | Contributor Covenant Code of Conduct 2 | ==================================== 3 | 4 | Our Pledge 5 | ---------- 6 | 7 | We as members, contributors, and leaders pledge to make participation in our 8 | community a harassment-free experience for everyone, regardless of age, body 9 | size, visible or invisible disability, ethnicity, sex characteristics, gender 10 | identity and expression, level of experience, education, socio-economic status, 11 | nationality, personal appearance, race, religion, or sexual identity 12 | and orientation. 13 | 14 | We pledge to act and interact in ways that contribute to an open, welcoming, 15 | diverse, inclusive, and healthy community. 16 | 17 | Our Standards 18 | ------------- 19 | 20 | Examples of behavior that contributes to a positive environment for our 21 | community include: 22 | 23 | * Demonstrating empathy and kindness toward other people 24 | * Being respectful of differing opinions, viewpoints, and experiences 25 | * Giving and gracefully accepting constructive feedback 26 | * Accepting responsibility and apologizing to those affected by our mistakes, 27 | and learning from the experience 28 | * Focusing on what is best not just for us as individuals, but for the 29 | overall community 30 | 31 | Examples of unacceptable behavior include: 32 | 33 | * The use of sexualized language or imagery, and sexual attention or 34 | advances of any kind 35 | * Trolling, insulting or derogatory comments, and personal or political attacks 36 | * Public or private harassment 37 | * Publishing others' private information, such as a physical or email 38 | address, without their explicit permission 39 | * Other conduct which could reasonably be considered inappropriate in a 40 | professional setting 41 | 42 | Enforcement Responsibilities 43 | ---------------------------- 44 | 45 | Community leaders are responsible for clarifying and enforcing our standards of 46 | acceptable behavior and will take appropriate and fair corrective action in 47 | response to any behavior that they deem inappropriate, threatening, offensive, 48 | or harmful. 49 | 50 | Community leaders have the right and responsibility to remove, edit, or reject 51 | comments, commits, code, wiki edits, issues, and other contributions that are 52 | not aligned to this Code of Conduct, and will communicate reasons for moderation 53 | decisions when appropriate. 54 | 55 | Scope 56 | ----- 57 | 58 | This Code of Conduct applies within all community spaces, and also applies when 59 | an individual is officially representing the community in public spaces. 60 | Examples of representing our community include using an official e-mail address, 61 | posting via an official social media account, or acting as an appointed 62 | representative at an online or offline event. 63 | 64 | Enforcement 65 | ----------- 66 | 67 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 68 | reported to the FAO team responsible for enforcement at 69 | pierrick.rambaud49@gmail.com. 70 | All complaints will be reviewed and investigated promptly and fairly. 71 | 72 | All community leaders are obligated to respect the privacy and security of the 73 | reporter of any incident. 74 | 75 | Enforcement Guidelines 76 | ---------------------- 77 | 78 | Community leaders will follow these Community Impact Guidelines in determining 79 | the consequences for any action they deem in violation of this Code of Conduct: 80 | 81 | Correction 82 | ^^^^^^^^^^ 83 | 84 | **Community Impact**: Use of inappropriate language or other behavior deemed 85 | unprofessional or unwelcome in the community. 86 | 87 | **Consequence**: A private, written warning from community leaders, providing 88 | clarity around the nature of the violation and an explanation of why the 89 | behavior was inappropriate. A public apology may be requested. 90 | 91 | Warning 92 | ^^^^^^^ 93 | 94 | **Community Impact**: A violation through a single incident or series 95 | of actions. 96 | 97 | **Consequence**: A warning with consequences for continued behavior. No 98 | interaction with the people involved, including unsolicited interaction with 99 | those enforcing the Code of Conduct, for a specified period of time. This 100 | includes avoiding interactions in community spaces as well as external channels 101 | like social media. Violating these terms may lead to a temporary or 102 | permanent ban. 103 | 104 | Temporary Ban 105 | ^^^^^^^^^^^^^ 106 | 107 | **Community Impact**: A serious violation of community standards, including 108 | sustained inappropriate behavior. 109 | 110 | **Consequence**: A temporary ban from any sort of interaction or public 111 | communication with the community for a specified period of time. No public or 112 | private interaction with the people involved, including unsolicited interaction 113 | with those enforcing the Code of Conduct, is allowed during this period. 114 | Violating these terms may lead to a permanent ban. 115 | 116 | Permanent Ban 117 | ^^^^^^^^^^^^^ 118 | 119 | **Community Impact**: Demonstrating a pattern of violation of community 120 | standards, including sustained inappropriate behavior, harassment of an 121 | individual, or aggression toward or disparagement of classes of individuals. 122 | 123 | **Consequence**: A permanent ban from any sort of public interaction within 124 | the community. 125 | 126 | Attribution 127 | ----------- 128 | 129 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 130 | version 2.0, available at 131 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 132 | 133 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 134 | enforcement ladder](https://github.com/mozilla/diversity). 135 | 136 | [homepage]: https://www.contributor-covenant.org 137 | 138 | For answers to common questions about this code of conduct, see the FAQ at 139 | https://www.contributor-covenant.org/faq. Translations are available at 140 | https://www.contributor-covenant.org/translations. 141 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Simple LANGuage support for the Web (using AI) 2 | 3 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg?logo=opensourceinitiative&logoColor=white)](LICENSE) 4 | [![Conventional Commits](https://img.shields.io/badge/Conventional%20Commits-1.0.0-yellow.svg?logo=git&logoColor=white)](https://conventionalcommits.org) 5 | [![ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) 6 | [![prettier](https://img.shields.io/badge/code_style-prettier-ff69b4.svg?logo=prettier&logoColor=white)](https://github.com/prettier/prettier) 7 | [![pre-commit](https://img.shields.io/badge/pre--commit-active-yellow?logo=pre-commit&logoColor=white)](https://pre-commit.com/) 8 | [![PyPI](https://img.shields.io/pypi/v/slangweb?color=blue&logo=pypi&logoColor=white)](https://pypi.org/project/slangweb/) 9 | [![build](https://img.shields.io/github/actions/workflow/status/fitoprincipe/slangweb/unit.yaml?logo=github&logoColor=white)](https://github.com/fitoprincipe/slangweb/actions/workflows/unit.yaml) 10 | [![coverage](https://img.shields.io/codecov/c/github/fitoprincipe/slangweb?logo=codecov&logoColor=white)](https://codecov.io/gh/fitoprincipe/slangweb) 11 | [![docs](https://img.shields.io/readthedocs/slangweb?logo=readthedocs&logoColor=white)](https://slangweb.readthedocs.io/en/latest/) 12 | 13 | ## Overview 14 | 15 | Use AI models from Hugging Face to translate your website. 16 | 17 | The system works with two different approaches: 18 | 19 | - [**Dynamic**](#2-dynamic): Translation on-the-fly. It's easy to integrate with any framework. Can be slow if the text is too long. 20 | - [**Static**](#1-static): Use a translation lookup file based on sentences. To use a key based approach would require an extra layer of complexity (maybe in the future). The lookup file must be created before deployment. This approach is harder (sometimes impossible) to integrate with any framework, for example, Flask + jinja2 templates. It's fast. 21 | 22 | At the moment, only ROMANCE languages are included by using the model [Helsinki-NLP/opus-mt-en-ROMANCE](https://huggingface.co/Helsinki-NLP/opus-mt-en-ROMANCE). This model can translate to the following languages: 23 | 24 | | Language | Code | Language | Code | Language | Code | 25 | | ---------------------------- | ----- | --------------------- | ----- | ---------- | ---- | -------- | --- | 26 | | Spanish | es | Spanish (Uruguay) | es_uy | Neapolitan | nap | 27 | | Spanish (Argentina) | es_ar | Spanish (Venezuela) | es_ve | Sicilian | scn | 28 | | Spanish (Chile) | es_cl | Portuguese | pt | Venetian | vec | 29 | | Spanish (Colombia) | es_co | Portuguese (Brazil) | pt_br | Aragonese | an | 30 | | Spanish (Costa Rica) | es_cr | Portuguese (Portugal) | pt_pt | Arpitan | frp | 31 | | Spanish (Dominican Republic) | es_do | French | fr | Corsican | co | Friulian | fur | 32 | | Spanish (Ecuador) | es_ec | French (Belgium) | fr_be | Ladin | lld | 33 | | Spanish (El Salvador) | es_sv | French (Switzerland) | fr_ch | Ladino | lad | 34 | | Spanish (Guatemala) | es_gt | French (Canada) | fr_ca | Latin | la | 35 | | Spanish (Honduras) | es_hn | French (France) | fr_fr | Ligurian | lij | 36 | | Spanish (Mexico) | es_mx | Italian | it | Mirandese | mwl | 37 | | Spanish (Nicaragua) | es_ni | Italian (Italy) | it_it | Occitan | oc | 38 | | Spanish (Panama) | es_pa | Catalan | ca | Romansh | rm | 39 | | Spanish (Peru) | es_pe | Galician | gl | Sardinian | sc | 40 | | Spanish (Puerto Rico) | es_pr | Romanian | ro | Walloon | wa | 41 | | Spanish (Spain) | es_es | Lombard | lmo | 42 | 43 | This package creates a folder inside your repo to store a configuration file and other files for the models. 44 | 45 | ## Installation 46 | 47 | Simply install via pip: 48 | 49 | `pip install slangweb` 50 | 51 | ## Initialization 52 | 53 | Let's suppose you have the following folder structure: 54 | 55 | ``` 56 | my_site/ 57 | ├── app.py # main application entry 58 | ├── src/ # source package / modules 59 | │ ├── index.py # main site logic / translator usage example 60 | └── pages/ # HTML/templates/pages for the site 61 | └── a_page.html # example module representing a page 62 | ``` 63 | 64 | Open a terminal, activate the environment in which you installed the package, and run: 65 | 66 | ```bash 67 | (.venv) C:\my_site>slangweb init 68 | ``` 69 | 70 | This will create the [configuration file](#configuration-file) and the [models lookup file](#models-lookup). 71 | 72 | ## Configuration file 73 | 74 | The configuration file (json) has the following structure: 75 | 76 | ```json 77 | { 78 | "base_folder": "slangweb", 79 | "models_lookup_file": "models_lookup.json", 80 | "models_folder": "models", 81 | "lookups_folder": "lookups", 82 | "default_language": "en", 83 | "encoding": "utf-8", 84 | "source_folders": ["."], 85 | "supported_languages": ["es"], 86 | "translator_class": "SW" 87 | } 88 | ``` 89 | 90 | - `base_folder`: is the main folder where all files will be stored (including the config file). 91 | - `models_lookup_file`: name of the models lookup file. This file will and must be placed inside `base_folder`. 92 | - `models_folder`: folder where the models will and must be stored. Also, must be inside `base_folder`. 93 | - `lookups_folder`: folder where the [translations lookup](#1-static) files will be stored. 94 | - `default_language`: The base language of the site. At the moment only **en**glish is supported. 95 | - `encoding`: Encoding for the lookup files. At the moment only `utf-8` is supported. 96 | - `source_folders`: Folders that contain the source python file where the slangweb translator class is implemented. Developers can modify this at will. 97 | - `supported_languages`: Languages that the site will support. There will be one [translation lookup](#translation-lookups) file for each language. 98 | - `translator_class`: The class that will be used for static translations across the site. See the [Usage](#usage) section. 99 | 100 | ## Models lookup 101 | 102 | The `models_lookup.json` has the following structure: 103 | 104 | ```json 105 | { 106 | "es": { 107 | "model": "Helsinki-NLP/opus-mt-en-ROMANCE", 108 | "name": "Spanish" 109 | }, 110 | ... 111 | } 112 | ``` 113 | 114 | This file created automatically. Other languages and models can be added if needed. 115 | 116 | ## Usage 117 | 118 | Once all the configuration was created and modified (if needed), you need to download the models using the CLI application: 119 | 120 | ```bash 121 | (.venv) C:\my_site>slangweb download-models 122 | ``` 123 | 124 | This will download all the models needed for the languages included in the section `supported_languages` in the [configuration file](#configuration-file). 125 | 126 | Finally, you can start implementing it in your python files. There are two main ways of using this package: [statically](#1-static) and [dynamically](#2-dynamic) 127 | 128 | ### 1. Static 129 | 130 | For each language listed in the section `supported_languages` in the [configuration file](#configuration-file) a `translation lookup` file will be created inside the `lookups_folder`. The `translation lookup` file is a json containing all relations between the sentences in the original language and the translated version. For example (spanish): 131 | 132 | `es.json` 133 | 134 | ```json 135 | { 136 | "Hello World": "Hola Mundo", 137 | ... 138 | } 139 | ``` 140 | 141 | The purpose of this approach it to avoid translating on-the-fly to gain loading speed. 142 | 143 | To use the static translation system you can call the instance, which is the same as calling the method `.get_translation`: 144 | 145 | ```python 146 | from slangweb import Translator 147 | SW = Translator() 148 | translation = SW("Translate this") 149 | same_translation = SW.get_translation("Translate this") 150 | ``` 151 | 152 | Example using Dash: 153 | 154 | ```python 155 | from slangweb import Translator 156 | 157 | # Init Translator 158 | # the variable name must match the "translator_class" in the config file 159 | SW = Translator() 160 | 161 | def layout(lang: str = 'en'): 162 | SW.set_language(lang) 163 | return html.Div([ 164 | html.H2(SW('This is Test for the static translation system.')), 165 | html.H2(SW("Thanks for using SlangWeb!")) 166 | ]) 167 | ``` 168 | 169 | There are 2 ways to create the `translation lookup` files: 170 | 171 | 1. by running the website in `localhost` and accessing the pages. 172 | 2. by running the CLI: 173 | 174 | ```bash 175 | (.venv) C:\my_site>slangweb sync 176 | ``` 177 | 178 | This will create the following file `C:\my_site\slangweb\lookups\es.json` 179 | 180 | ```json 181 | { 182 | "This is a Test for the static translation system.": "Esta es una prueba para el sistema de traducción estática.", 183 | "Thanks for using SlangWeb!": "¡Gracias por usar SlangWeb!" 184 | } 185 | ``` 186 | 187 | ### 2. Dynamic 188 | 189 | In this case, the `translation lookup` file will not be created, and the translation will happen on-the-fly. 190 | 191 | In your code (using Dash): 192 | 193 | ```python 194 | from slangweb import Translator 195 | 196 | # Init Translator 197 | SW = Translator() 198 | t = SW.translate 199 | 200 | def layout(lang: str = 'en'): 201 | SW.set_language(lang) 202 | return html.Div([ 203 | html.H2(t('This is Test for the static translation system.')), 204 | html.H2(t("Thanks for using SlangWeb!")) 205 | ]) 206 | ``` 207 | 208 | ## Recommendations & caveats 209 | 210 | - Model downloads can be large; ensure enough disk space. 211 | - For production, prefer Static lookups where possible for performance. 212 | - Dynamic translation may add latency; consider caching translations. 213 | - If using private Hugging Face models, set the HF_TOKEN environment variable before running CLI/tools: 214 | 215 | ```powershell 216 | setx HF_TOKEN "your_token_here" 217 | ``` 218 | 219 | ## Credits 220 | 221 | This package was created with [Copier](https://copier.readthedocs.io/en/latest/) and the [@12rambau/pypackage](https://github.com/12rambau/pypackage) 0.1.18 project template. 222 | -------------------------------------------------------------------------------- /slangweb/translator.py: -------------------------------------------------------------------------------- 1 | """Translations in Dash applications. 2 | 3 | Only translates from English to other languages for now. 4 | """ 5 | 6 | import json 7 | import os 8 | from logging import getLogger 9 | from pathlib import Path 10 | 11 | from transformers import MarianMTModel, MarianTokenizer 12 | 13 | from .constants import ( 14 | DEFAULT_LANGUAGE, 15 | ENCODING, 16 | LOOKUPS_FOLDER, 17 | MODELS_FOLDER, 18 | MODELS_LOOKUP_FILE, 19 | SLANG_FOLDER, 20 | ) 21 | 22 | logger = getLogger(__name__) 23 | 24 | 25 | def lang_from_path(path: str) -> str | None: 26 | """Extract the language code from the given URL path.""" 27 | parts = (path or "").strip("/").split("/") 28 | return parts[0] if parts else DEFAULT_LANGUAGE 29 | 30 | 31 | class Translator: 32 | """A simple translator class to manage translations.""" 33 | 34 | def __init__( 35 | self, 36 | base_folder: str = SLANG_FOLDER, 37 | models_folder: str = MODELS_FOLDER, 38 | lookup_folder: str = LOOKUPS_FOLDER, 39 | models_lookup_file: str = MODELS_LOOKUP_FILE, 40 | ): 41 | """Initialize the Translator. 42 | 43 | There are 2 kind of lookup files: 44 | 1. Models lookup file: maps language codes to model names. 45 | 2. Translation lookup files: per-language files that map source texts to translated texts. 46 | 47 | If the model lookup file does not exist, it must be created using the cli tool. 48 | 49 | >slangweb generate-models-lookup-file 50 | 51 | Args: 52 | base_folder (Path): Base directory for slangweb data. 53 | models_folder (Path): Directory to store/load translation models. 54 | lookup_folder (Path): Directory to store/load translation lookups. 55 | models_lookup_file (Path): Path to the models configuration file. 56 | """ 57 | here = Path(os.getcwd()) 58 | self.language: str | None = None 59 | self.base_folder = here / base_folder 60 | self.models_folder = here / base_folder / models_folder 61 | self.lookup_folder = here / base_folder / lookup_folder 62 | self.models_lookup_file = here / base_folder / models_lookup_file 63 | self._models_lookup: dict | None = None 64 | self._translation_lookup_file: Path | None = None 65 | self._model = None 66 | self._tokenizer = None 67 | 68 | def set_language(self, language: str | None) -> None: 69 | """Set the current language for translation.""" 70 | language = language.lower() if language else None 71 | if self.language != language: 72 | self.language = language 73 | # reset model and tokenizer 74 | self._model = None 75 | self._tokenizer = None 76 | 77 | @property 78 | def models_lookup(self) -> dict: 79 | """Load the models configuration from the models file.""" 80 | if not self.models_lookup_file.exists(): 81 | logger.error(f"Models lookup file not found: {self.models_lookup_file}") 82 | return {} 83 | if self._models_lookup is None: 84 | with open(self.models_lookup_file, "r", encoding="utf-8") as f: 85 | models = json.load(f) 86 | self._models_lookup = models 87 | return self._models_lookup | {} 88 | 89 | @property 90 | def model_name(self) -> str | None: 91 | """Get the model name for the current language.""" 92 | model_name = self.models_lookup.get(self.language, {}).get("model") 93 | if not model_name: 94 | logger.warning(f"Language '{self.language}' not found in models lookup.") 95 | return model_name 96 | 97 | def is_language_in_lookup(self) -> bool: 98 | """Check if the current language is in the lookup file.""" 99 | if self.language is None or self.language == DEFAULT_LANGUAGE: 100 | return False 101 | is_in_lookup = self.language in self.models_lookup 102 | if not is_in_lookup: 103 | logger.error(f"Language '{self.language}' not found in models lookup.") 104 | return is_in_lookup 105 | 106 | @property 107 | def model_filename(self) -> Path | None: 108 | """Get the model directory for the current language.""" 109 | if self.model_name is None: 110 | return None 111 | model_fn = self.models_folder / f"models--{self.model_name.replace('/', '--')}" 112 | return model_fn 113 | 114 | def is_model_available(self) -> bool: 115 | """Check if the model for the current language is available.""" 116 | model_fn = self.model_filename 117 | if model_fn is None: 118 | return False 119 | return model_fn.is_dir() 120 | 121 | @property 122 | def translation_lookup_file(self) -> Path: 123 | """Get the translation lookup file for the current language.""" 124 | fn = self.lookup_folder / f"{self.language}.json" 125 | if not fn.exists(): 126 | logger.info(f"Creating new lookup file: {fn}") 127 | fn.parent.mkdir(parents=True, exist_ok=True) 128 | with open(fn, "w", encoding=ENCODING) as f: 129 | json.dump({}, f, indent=4, ensure_ascii=False) 130 | return fn 131 | 132 | @property 133 | def translation_lookup(self) -> dict: 134 | """Get the translation lookup for the current language.""" 135 | with open(self.translation_lookup_file, "r", encoding=ENCODING) as f: 136 | lookup = json.load(f) 137 | return lookup 138 | 139 | def get_tokenizer(self) -> MarianTokenizer | None: 140 | """Get the tokenizer for the current language.""" 141 | if self._tokenizer is not None: 142 | return self._tokenizer 143 | 144 | if self.is_model_available() and self.is_language_in_lookup(): 145 | self._tokenizer = MarianTokenizer.from_pretrained( 146 | self.model_name, cache_dir=self.models_folder, local_files_only=True 147 | ) 148 | return self._tokenizer 149 | else: 150 | return None 151 | 152 | def get_model(self) -> MarianMTModel | None: 153 | """Get the translation model for the current language.""" 154 | if self._model is not None: 155 | return self._model 156 | 157 | if self.is_model_available() and self.is_language_in_lookup(): 158 | import torch 159 | 160 | # Disable low_cpu_mem_usage to avoid meta device 161 | self._model = MarianMTModel.from_pretrained( 162 | self.model_name, 163 | cache_dir=self.models_folder, 164 | local_files_only=True, 165 | dtype=torch.float32, 166 | low_cpu_mem_usage=False, 167 | ) 168 | return self._model 169 | else: 170 | return None 171 | 172 | def can_be_translated(self) -> bool: 173 | """Check if the current language can be translated.""" 174 | # exit: no language set 175 | if self.language is None: 176 | logger.warning("No language set. Make sure to set it using 'set_language' method.") 177 | return False 178 | 179 | # exit: default language 180 | if self.language == DEFAULT_LANGUAGE: 181 | logger.info(f"Default language set ({self.language}), no translation needed.") 182 | return False 183 | 184 | # exit: model lookup file missing 185 | if not self.models_lookup_file.exists(): 186 | logger.error( 187 | f"Models lookup file not found: {self.models_lookup_file}. Create using the CLI application." 188 | ) 189 | return False 190 | 191 | # exit: model not available 192 | if not self.is_model_available(): 193 | logger.error( 194 | f"Model for language '{self.language}' not available. Download it using the CLI application." 195 | ) 196 | return False 197 | 198 | return True 199 | 200 | def translate(self, text: str) -> str: 201 | """Translate the given text to the current language, directly using the model. 202 | 203 | Since this is the main function, check related to translation using the model will be performed here. 204 | 205 | Args: 206 | text (str): The text to translate. 207 | """ 208 | if not self.can_be_translated(): 209 | return text 210 | 211 | try: 212 | # translate using model 213 | tokenizer = self.get_tokenizer() 214 | model = self.get_model() 215 | if tokenizer is None or model is None: 216 | logger.error("Tokenizer or model not available for translation.") 217 | return text 218 | if self.model_name == "Helsinki-NLP/opus-mt-en-ROMANCE": 219 | # for romance languages, lowercase the text to improve results 220 | tgt_lang = f">>{self.language}<<" 221 | inputs = tokenizer(f"{tgt_lang} {text}", return_tensors="pt", padding=True) 222 | else: 223 | inputs = tokenizer(text, return_tensors="pt", padding=True) 224 | translated = model.generate(**inputs) 225 | tgt_text = [tokenizer.decode(t, skip_special_tokens=True) for t in translated] 226 | translation = tgt_text[0] if tgt_text else "" 227 | return translation 228 | except Exception as e: 229 | logger.error(f"Error during translation: {e}") 230 | return text 231 | 232 | def get_translation_from_lookup(self, text: str) -> str | None: 233 | """Get translation from the lookup file. 234 | 235 | Since this is a main function, check related to the lookup file will be performed here. 236 | 237 | Args: 238 | text (str): The text to translate. 239 | """ 240 | if not self.can_be_translated(): 241 | return text 242 | return self.translation_lookup.get(text) 243 | 244 | def save_translation(self, text: str, translated_text: str) -> None: 245 | """Save the translated text to the lookup file.""" 246 | with open(self.translation_lookup_file, "r", encoding=ENCODING) as f: 247 | lookup = json.load(f) 248 | lookup[text] = translated_text 249 | with open(self.translation_lookup_file, "w", encoding=ENCODING) as f: 250 | json.dump(lookup, f, indent=4, ensure_ascii=False) 251 | 252 | def get_translation(self, text: str) -> str: 253 | """Get translation from lookup or translate and save it to lookup. 254 | 255 | Args: 256 | text (str): The text to translate. 257 | """ 258 | translation = self.get_translation_from_lookup(text) 259 | if translation == text: 260 | return text 261 | if translation is None: # not found in lookup 262 | translation = self.translate(text) 263 | # update lookup file 264 | self.save_translation(text, translation) 265 | return translation 266 | 267 | def __call__(self, text: str) -> str: 268 | """Translate the given text using the translator instance.""" 269 | logger.debug(f"Translating text: {text}") 270 | return self.get_translation(text) 271 | -------------------------------------------------------------------------------- /slangweb/cli.py: -------------------------------------------------------------------------------- 1 | """CLI entry point for slangweb package.""" 2 | 3 | import json 4 | import os 5 | import shutil 6 | import sys 7 | from pathlib import Path 8 | 9 | import click 10 | from transformers import MarianMTModel, MarianTokenizer 11 | 12 | from .constants import ENCODING, LOOKUPS_FOLDER, MODELS_FOLDER, MODELS_LOOKUP_FILE, SLANG_FOLDER 13 | from .tools import available_languages, find_translator_usages 14 | from .translator import Translator 15 | 16 | 17 | @click.group() 18 | def cli(): 19 | """Translation Dev Tools CLI.""" 20 | pass 21 | 22 | 23 | def _create_config_file(folder: str = SLANG_FOLDER, overwrite: bool = False): 24 | """Create the config file in the specified folder. 25 | 26 | Args: 27 | folder (str): Folder where to create the config file. 28 | overwrite (bool): Whether to overwrite existing config file. 29 | """ 30 | here = Path(os.getcwd()) 31 | folder_path = here / folder 32 | folder_path.mkdir(parents=True, exist_ok=True) 33 | source_folders = ["."] 34 | # exclude hidden folders, __pycache__, docs, tests, etc. 35 | exclude_folders = {folder, "__pycache__", "docs", "tests"} 36 | for item in os.listdir(here): 37 | item_path = here / item 38 | if item_path.is_dir() and item not in exclude_folders and not item.startswith("."): 39 | source_folders.append(item) 40 | config = { 41 | "base_folder": folder, 42 | "models_lookup_file": MODELS_LOOKUP_FILE, 43 | "models_folder": MODELS_FOLDER, 44 | "lookups_folder": LOOKUPS_FOLDER, 45 | "default_language": "en", 46 | "encoding": ENCODING, 47 | "source_folders": source_folders, 48 | "supported_languages": ["es"], 49 | "translator_class": "SW", 50 | } 51 | config_file = folder_path / "config.json" 52 | if config_file.exists() and not overwrite: 53 | click.echo( 54 | f"Configuration file already exists at '{config_file}'. Use overwrite=True to overwrite." 55 | ) 56 | return 57 | with open(config_file, "w", encoding=ENCODING) as f: 58 | json.dump(config, f, indent=4, ensure_ascii=False) 59 | click.echo(f"Configuration file created at '{config_file}'") 60 | 61 | 62 | @cli.command() 63 | @click.argument("folder", default=SLANG_FOLDER, type=str) 64 | @click.option("--overwrite", is_flag=True, help="Overwrite existing config file if it exists.") 65 | def create_config(folder, overwrite): 66 | """Create the config file in the specified folder. 67 | 68 | The configuration file contains the following structure: 69 | 70 | { 71 | "base_folder": "slangweb", 72 | "models_lookup_file": "models_lookup.json", 73 | "models_folder": "models", 74 | "lookups_folder": "lookups", 75 | "default_language": "en", 76 | "encoding": "utf-8", 77 | "source_folders": ["."], # you can modify 78 | "supported_languages": ["es"], # you can modify 79 | "translator_class": "SW" 80 | } 81 | """ 82 | _create_config_file(folder, overwrite) 83 | 84 | 85 | def _read_config(folder: str = SLANG_FOLDER) -> dict: 86 | """Read the config file from the specified folder.""" 87 | here = Path(os.getcwd()) 88 | config_file = here / folder / "config.json" 89 | if not config_file.exists(): 90 | click.echo( 91 | f"Config file '{config_file}' does not exist. Create it first by running 'slangweb create-config'.", 92 | err=True, 93 | ) 94 | sys.exit(1) 95 | with open(config_file, "r", encoding="utf-8") as f: 96 | config = json.load(f) 97 | return { 98 | "base_folder": here / config.get("base_folder", SLANG_FOLDER), 99 | "models_lookup_file": here / folder / config.get("models_lookup_file", MODELS_LOOKUP_FILE), 100 | "models_folder": here / folder / config.get("models_folder", MODELS_FOLDER), 101 | "lookups_folder": here / folder / config.get("lookups_folder", LOOKUPS_FOLDER), 102 | "default_language": config.get("default_language", "en"), 103 | "encoding": config.get("encoding", "utf-8"), 104 | "source_folders": config.get("source_folders", ["."]), 105 | "supported_languages": config.get("supported_languages", ["es"]), 106 | "translator_class": config.get("translator_class", "SW"), 107 | } 108 | 109 | 110 | def _create_models_lookup_file(output_file: Path, overwrite: bool = False): 111 | """Create a models lookup file with predefined content.""" 112 | content = { 113 | "fr": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "French"}, 114 | "fr_be": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "French (Belgium)"}, 115 | "fr_ch": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "French (Switzerland)"}, 116 | "fr_ca": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "French (Canada)"}, 117 | "fr_fr": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "French (France)"}, 118 | "wa": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Walloon"}, 119 | "frp": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Arpitan"}, 120 | "oc": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Occitan"}, 121 | "ca": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Catalan"}, 122 | "rm": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Romansh"}, 123 | "lld": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Ladin"}, 124 | "fur": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Friulian"}, 125 | "lij": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Ligurian"}, 126 | "lmo": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Lombard"}, 127 | "es": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish"}, 128 | "es_ar": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Argentina)"}, 129 | "es_cl": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Chile)"}, 130 | "es_co": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Colombia)"}, 131 | "es_cr": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Costa Rica)"}, 132 | "es_do": { 133 | "model": "Helsinki-NLP/opus-mt-en-ROMANCE", 134 | "name": "Spanish (Dominican Republic)", 135 | }, 136 | "es_ec": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Ecuador)"}, 137 | "es_es": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Spain)"}, 138 | "es_gt": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Guatemala)"}, 139 | "es_hn": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Honduras)"}, 140 | "es_mx": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Mexico)"}, 141 | "es_ni": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Nicaragua)"}, 142 | "es_pa": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Panama)"}, 143 | "es_pe": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Peru)"}, 144 | "es_pr": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Puerto Rico)"}, 145 | "es_sv": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (El Salvador)"}, 146 | "es_uy": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Uruguay)"}, 147 | "es_ve": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Spanish (Venezuela)"}, 148 | "pt": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Portuguese"}, 149 | "pt_br": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Portuguese (Brazil)"}, 150 | "pt_pt": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Portuguese (Portugal)"}, 151 | "gl": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Galician"}, 152 | "lad": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Ladino"}, 153 | "an": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Aragonese"}, 154 | "mwl": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Mirandese"}, 155 | "it": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Italian"}, 156 | "it_it": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Italian (Italy)"}, 157 | "co": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Corsican"}, 158 | "nap": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Neapolitan"}, 159 | "scn": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Sicilian"}, 160 | "vec": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Venetian"}, 161 | "sc": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Sardinian"}, 162 | "ro": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Romanian"}, 163 | "la": {"model": "Helsinki-NLP/opus-mt-en-ROMANCE", "name": "Latin"}, 164 | } 165 | if output_file.exists() and not overwrite: 166 | click.echo( 167 | f"Models lookup file already exists at '{output_file}'. Use overwrite=True to overwrite." 168 | ) 169 | return 170 | with open(output_file, "w", encoding=ENCODING) as f: 171 | json.dump(content, f, indent=4, ensure_ascii=False) 172 | click.echo(f"Models lookup file created at '{output_file}'") 173 | 174 | 175 | @cli.command() 176 | @click.argument("folder", default=SLANG_FOLDER, type=str) 177 | @click.option( 178 | "--overwrite", is_flag=True, help="Overwrite existing models lookup file if it exists." 179 | ) 180 | def create_models_lookup_file(folder: str = SLANG_FOLDER, overwrite: bool = False): 181 | """Generate models lookup file. 182 | 183 | The location and name of the file will be taken from the config file if provided. 184 | """ 185 | config = _read_config(folder) 186 | _create_models_lookup_file(config["models_lookup_file"], overwrite) 187 | 188 | 189 | @cli.command() 190 | @click.argument("folder", default=SLANG_FOLDER, type=str) 191 | def init(folder: str = SLANG_FOLDER): 192 | """Initialize the slangweb project structure.""" 193 | _create_config_file(folder, overwrite=False) 194 | config = _read_config(folder) 195 | _create_models_lookup_file(config["models_lookup_file"], overwrite=False) 196 | here = Path(os.getcwd()) 197 | folder_path = here / folder 198 | (folder_path / LOOKUPS_FOLDER).mkdir(parents=True, exist_ok=True) 199 | (folder_path / MODELS_FOLDER).mkdir(parents=True, exist_ok=True) 200 | click.echo(f"Initialized slangweb project structure in folder '{folder}'.") 201 | 202 | 203 | def _available_languages(folder: str = SLANG_FOLDER) -> dict[str, str]: 204 | """Return a list of available languages with downloaded models.""" 205 | config = _read_config(folder) 206 | return available_languages(config["models_lookup_file"], config["models_folder"]) 207 | 208 | 209 | @cli.command() 210 | @click.option( 211 | "--folder", 212 | default=SLANG_FOLDER, 213 | required=False, 214 | help="Folder where the config file is located.", 215 | ) 216 | def list_languages(folder): 217 | """List available languages with downloaded models.""" 218 | languages = _available_languages(folder) 219 | if not languages: 220 | click.echo("No languages with downloaded models found.") 221 | return 222 | click.echo("Available languages with downloaded models:") 223 | for lang, expanded in languages.items(): 224 | click.echo(f"- {lang} ({expanded})") 225 | 226 | 227 | def _download_model(language: str, config: dict): 228 | """Download a translation model by name (HuggingFace).""" 229 | with open(config["models_lookup_file"], "r", encoding="utf-8") as f: 230 | models_lookup = json.load(f) 231 | model_data = models_lookup.get(language) 232 | if not model_data: 233 | click.echo(f"Unsupported language code: {language}", err=True) 234 | sys.exit(1) 235 | model_name = model_data.get("model") 236 | lang = model_data.get("name", language) 237 | click.echo(f"Downloading model '{model_name}' for language '{language} ({lang})'...") 238 | MarianMTModel.from_pretrained(model_name, cache_dir=config["models_folder"]) 239 | MarianTokenizer.from_pretrained(model_name, cache_dir=config["models_folder"]) 240 | 241 | 242 | @cli.command() 243 | @click.option( 244 | "--folder", 245 | default=SLANG_FOLDER, 246 | required=False, 247 | help="Folder where the config file is located.", 248 | ) 249 | def download_models(folder): 250 | """Download a translation model by name (HuggingFace).""" 251 | config = _read_config(folder) 252 | supported_languages = config.get("supported_languages", []) 253 | with open(config["models_lookup_file"], "r", encoding="utf-8") as f: 254 | models_lookup = json.load(f) 255 | languages = [lang for lang in models_lookup.keys() if lang in supported_languages] 256 | print(languages) 257 | for language in languages: 258 | _download_model(language, config) 259 | 260 | 261 | def _sync(file: Path, language: str, config: dict) -> None: 262 | """Sync translations found in the given Python file.""" 263 | if not file.exists(): 264 | click.echo(f"File or folder '{file}' does not exist.", err=True) 265 | return None 266 | if not file.is_file(): 267 | click.echo(f"Only Python files are supported. '{file}' is not a file.", err=True) 268 | return None 269 | if file.suffix != ".py": 270 | click.echo(f"Only Python files are supported. '{file}' is not a Python file.", err=True) 271 | return None 272 | click.echo(f"Syncing translations in: {file}") 273 | SW = Translator( 274 | base_folder=config.get("base_folder", SLANG_FOLDER), 275 | models_folder=config.get("models_folder", MODELS_FOLDER), 276 | lookup_folder=config.get("lookups_folder", LOOKUPS_FOLDER), 277 | models_lookup_file=config.get("models_lookup_file", MODELS_LOOKUP_FILE), 278 | ) 279 | to_translate = find_translator_usages(file, config.get("translator_class", "SW")) 280 | click.echo(f"Translations for language '{language}':") 281 | SW.set_language(language) 282 | if SW.can_be_translated(): 283 | for text in to_translate: 284 | translation = SW(text) 285 | click.echo(f"- {text} => {translation}") 286 | return None 287 | 288 | 289 | @cli.command() 290 | @click.argument("file", default=None, required=False, type=str) 291 | @click.option( 292 | "--folder", 293 | default=SLANG_FOLDER, 294 | required=False, 295 | help="Folder where the config file is located.", 296 | ) 297 | def sync(file, folder): 298 | """Sync translations found in the given Python file.""" 299 | here = Path(os.getcwd()) 300 | config = _read_config(folder) 301 | languages = _available_languages(folder).keys() 302 | supported_languages = config.get("supported_languages", []) 303 | languages = [lang for lang in languages if lang in supported_languages] 304 | for lang in languages: 305 | if file is None: 306 | # Sync all Python files in the source folders 307 | for fold in config.get("source_folders", []): 308 | folder_path = here / fold 309 | print(folder_path) 310 | if not folder_path.exists() or not folder_path.is_dir(): 311 | click.echo( 312 | f"Source folder '{folder_path}' does not exist or is not a directory.", 313 | err=True, 314 | ) 315 | continue 316 | for item in folder_path.glob("*.py"): 317 | _sync(item, lang, config) 318 | else: 319 | file = here / file 320 | if not file.exists(): 321 | click.echo(f"File or folder '{file}' does not exist.", err=True) 322 | sys.exit(1) 323 | if file.is_file() and file.suffix != ".py": 324 | click.echo(f"File '{file}' is not a Python file.", err=True) 325 | sys.exit(1) 326 | _sync(file, lang, config) 327 | 328 | 329 | @cli.command() 330 | def create_flask_example(): 331 | """Create a Flask example file. 332 | 333 | This command will create a folder called 'slangweb_flask_example' in the current working directory, 334 | containing a simple Flask application that demonstrates how to use the slangweb Translator class. 335 | """ 336 | here = Path(os.getcwd()) 337 | example_folder = here / "slangweb_flask_example" 338 | example_folder.mkdir(parents=True, exist_ok=True) 339 | # copy the flask_example.py content 340 | # flask_example_path = example_folder / "flask_example.py" 341 | # with open(flask_example_path, 'w', encoding='utf-8') as f: 342 | # f.write(flask_example) 343 | shutil.copy( 344 | Path(__file__).parent / "examples" / "flask_example.py", example_folder / "flask_example.py" 345 | ) 346 | click.echo(f"Flask example created at '{example_folder / 'flask_example.py'}'") 347 | 348 | 349 | def main(): 350 | """Main entry point for the CLI.""" 351 | cli() 352 | 353 | 354 | if __name__ == "__main__": 355 | main() 356 | --------------------------------------------------------------------------------