├── .github └── workflows │ ├── publish.yml │ └── python_test.yml ├── .gitignore ├── .nerve.toml ├── .pre-commit-config.yaml ├── LICENSE ├── NOTICE.MD ├── README.md ├── feature_test ├── __init__.py ├── lingua_t.py └── spacy.py ├── pdm.lock ├── pyproject.toml ├── src └── fast_langdetect │ ├── LICENSE │ ├── __init__.py │ ├── infer.py │ └── resources │ ├── NOTICE.MD │ └── lid.176.ftz └── tests ├── __init__.py ├── conftest.py ├── test_chinese_path.py ├── test_detect.py └── test_real_detection.py /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: publish 2 | on: 3 | workflow_dispatch: 4 | push: 5 | tags: 6 | - pypi* 7 | 8 | permissions: 9 | contents: read 10 | 11 | jobs: 12 | pypi-publish: 13 | name: upload release to PyPI 14 | runs-on: ubuntu-latest 15 | permissions: 16 | # IMPORTANT: this permission is mandatory for trusted publishing 17 | id-token: write 18 | steps: 19 | - uses: actions/checkout@v3 20 | 21 | - uses: pdm-project/setup-pdm@v3 22 | 23 | - name: Publish package distributions to PyPI 24 | run: pdm publish 25 | -------------------------------------------------------------------------------- /.github/workflows/python_test.yml: -------------------------------------------------------------------------------- 1 | name: Run tests 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | - dev 9 | - 'dev-**' 10 | 11 | jobs: 12 | Testing: 13 | runs-on: ${{ matrix.os }} 14 | strategy: 15 | matrix: 16 | python-version: [ '3.9', '3.10', '3.11' ,'3.12', '3.13' ] 17 | os: [ ubuntu-latest, windows-latest ] # , ubuntu-latest, macos-latest 18 | 19 | steps: 20 | - uses: actions/checkout@v3 21 | - name: Set up PDM 22 | uses: pdm-project/setup-pdm@v3 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | 26 | - name: Install dependencies 27 | run: | 28 | pdm install --no-lock -G testing 29 | 30 | - name: Run Regular Tests 31 | run: | 32 | pdm run -v pytest tests -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm-project.org/#use-with-ide 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | .idea/ 163 | -------------------------------------------------------------------------------- /.nerve.toml: -------------------------------------------------------------------------------- 1 | # https://github.com/LlmKira/contributor/blob/main/.nerve.toml 2 | contributor = "68baa075-303d-47a8-80ca-b6a5a0fa8e3a" 3 | 4 | language = "English" 5 | issue_auto_label = true 6 | issue_title_format = true 7 | issue_body_format = false 8 | issue_close_with_report = true 9 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.5.0 4 | hooks: 5 | - id: trailing-whitespace 6 | - id: end-of-file-fixer 7 | - id: check-yaml 8 | - id: check-added-large-files 9 | 10 | # check python requirements 11 | - repo: https://github.com/pdm-project/pdm 12 | rev: 2.11.2 # a PDM release exposing the hook 13 | hooks: 14 | - id: pdm-lock-check 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 LLM Kira 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NOTICE.MD: -------------------------------------------------------------------------------- 1 | # NOTICE OF THIS PROJECT 2 | 3 | ## MIT License 4 | 5 | The MIT license applies to the files in: 6 | 7 | file: "fast_langdetect/ft_detect/infer.py" from https://github.com/zafercavdar/fasttext-langdetect -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fast-langdetect 🚀 2 | 3 | [![PyPI version](https://badge.fury.io/py/fast-langdetect.svg)](https://badge.fury.io/py/fast-langdetect) 4 | [![Downloads](https://pepy.tech/badge/fast-langdetect)](https://pepy.tech/project/fast-langdetect) 5 | [![Downloads](https://pepy.tech/badge/fast-langdetect/month)](https://pepy.tech/project/fast-langdetect/) 6 | 7 | ## Overview 8 | 9 | **`fast-langdetect`** is an ultra-fast and highly accurate language detection library based on FastText, a library developed by Facebook. Its incredible speed and accuracy make it 80x faster than conventional methods and deliver up to 95% accuracy. 10 | 11 | - Supported Python `3.9` to `3.13`. 12 | - Works offline in low memory mode 13 | - No `numpy` required (thanks to @dalf). 14 | 15 | > ### Background 16 | > 17 | > This project builds upon [zafercavdar/fasttext-langdetect](https://github.com/zafercavdar/fasttext-langdetect#benchmark) with enhancements in packaging. 18 | > For more information about the underlying model, see the official FastText documentation: [Language Identification](https://fasttext.cc/docs/en/language-identification.html). 19 | 20 | > ### Possible memory usage 21 | > 22 | > *This library requires at least **200MB memory** in low-memory mode.* 23 | 24 | ## Installation 💻 25 | 26 | To install fast-langdetect, you can use either `pip` or `pdm`: 27 | 28 | ### Using pip 29 | 30 | ```bash 31 | pip install fast-langdetect 32 | ``` 33 | 34 | ### Using pdm 35 | 36 | ```bash 37 | pdm add fast-langdetect 38 | ``` 39 | 40 | ## Usage 🖥️ 41 | 42 | In scenarios **where accuracy is important**, you should not rely on the detection results of small models, use `low_memory=False` to download larger models! 43 | 44 | ### Prerequisites 45 | 46 | - If the sample is too long or too short, the accuracy will be reduced. 47 | - The model will be downloaded to system temporary directory by default. You can customize it by: 48 | - Setting `FTLANG_CACHE` environment variable 49 | - Using `LangDetectConfig(cache_dir="your/path")` 50 | 51 | ### Native API (Recommended) 52 | 53 | ```python 54 | from fast_langdetect import detect, detect_multilingual, LangDetector, LangDetectConfig, DetectError 55 | 56 | # Simple detection 57 | print(detect("Hello, world!")) 58 | # Output: {'lang': 'en', 'score': 0.12450417876243591} 59 | 60 | # Using large model for better accuracy 61 | print(detect("Hello, world!", low_memory=False)) 62 | # Output: {'lang': 'en', 'score': 0.98765432109876} 63 | 64 | # Custom configuration with fallback mechanism 65 | config = LangDetectConfig( 66 | cache_dir="/custom/cache/path", # Custom model cache directory 67 | allow_fallback=True # Enable fallback to small model if large model fails 68 | ) 69 | detector = LangDetector(config) 70 | 71 | try: 72 | result = detector.detect("Hello world", low_memory=False) 73 | print(result) # {'lang': 'en', 'score': 0.98} 74 | except DetectError as e: 75 | print(f"Detection failed: {e}") 76 | 77 | # How to deal with multiline text 78 | multiline_text = """ 79 | Hello, world! 80 | This is a multiline text. 81 | """ 82 | multiline_text = multiline_text.replace("\n", " ") 83 | print(detect(multiline_text)) 84 | # Output: {'lang': 'en', 'score': 0.8509423136711121} 85 | 86 | # Multi-language detection 87 | results = detect_multilingual( 88 | "Hello 世界 こんにちは", 89 | low_memory=False, # Use large model for better accuracy 90 | k=3 # Return top 3 languages 91 | ) 92 | print(results) 93 | # Output: [ 94 | # {'lang': 'ja', 'score': 0.4}, 95 | # {'lang': 'zh', 'score': 0.3}, 96 | # {'lang': 'en', 'score': 0.2} 97 | # ] 98 | ``` 99 | 100 | #### Fallbacks 101 | 102 | We provide a fallback mechanism: when `allow_fallback=True`, if the program fails to load the **large model** (`low_memory=False`), it will fall back to the offline **small model** to complete the prediction task. 103 | 104 | ```python 105 | # Disable fallback - will raise error if large model fails to load 106 | # But fallback disabled when custom_model_path is not None, because its a custom model, we will directly use it. 107 | import tempfile 108 | config = LangDetectConfig( 109 | allow_fallback=False, 110 | custom_model_path=None, 111 | cache_dir=tempfile.gettempdir(), 112 | ) 113 | detector = LangDetector(config) 114 | 115 | try: 116 | result = detector.detect("Hello world", low_memory=False) 117 | except DetectError as e: 118 | print("Model loading failed and fallback is disabled") 119 | ``` 120 | 121 | ### Convenient `detect_language` Function 122 | 123 | ```python 124 | from fast_langdetect import detect_language 125 | 126 | # Single language detection 127 | print(detect_language("Hello, world!")) 128 | # Output: EN 129 | 130 | print(detect_language("Привет, мир!")) 131 | # Output: RU 132 | 133 | print(detect_language("你好,世界!")) 134 | # Output: ZH 135 | ``` 136 | 137 | ### Load Custom Models 138 | 139 | ```python 140 | # Load model from local file 141 | config = LangDetectConfig( 142 | custom_model_path="/path/to/your/model.bin", # Use local model file 143 | disable_verify=True # Skip MD5 verification 144 | ) 145 | detector = LangDetector(config) 146 | result = detector.detect("Hello world") 147 | ``` 148 | 149 | ### Splitting Text by Language 🌐 150 | 151 | For text splitting based on language, please refer to the [split-lang](https://github.com/DoodleBears/split-lang) 152 | repository. 153 | 154 | ## Benchmark 📊 155 | 156 | For detailed benchmark results, refer 157 | to [zafercavdar/fasttext-langdetect#benchmark](https://github.com/zafercavdar/fasttext-langdetect#benchmark). 158 | 159 | ## References 📚 160 | 161 | [1] A. Joulin, E. Grave, P. Bojanowski, T. Mikolov, Bag of Tricks for Efficient Text Classification 162 | 163 | ```bibtex 164 | @article{joulin2016bag, 165 | title={Bag of Tricks for Efficient Text Classification}, 166 | author={Joulin, Armand and Grave, Edouard and Bojanowski, Piotr and Mikolov, Tomas}, 167 | journal={arXiv preprint arXiv:1607.01759}, 168 | year={2016} 169 | } 170 | ``` 171 | 172 | [2] A. Joulin, E. Grave, P. Bojanowski, M. Douze, H. Jégou, T. Mikolov, FastText.zip: Compressing text classification 173 | models 174 | 175 | ```bibtex 176 | @article{joulin2016fasttext, 177 | title={FastText.zip: Compressing text classification models}, 178 | author={Joulin, Armand and Grave, Edouard and Bojanowski, Piotr and Douze, Matthijs and J{\'e}gou, H{\'e}rve and Mikolov, Tomas}, 179 | journal={arXiv preprint arXiv:1612.03651}, 180 | year={2016} 181 | } 182 | ``` 183 | -------------------------------------------------------------------------------- /feature_test/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2024/1/18 上午11:41 3 | # @Author : sudoskys 4 | from fast_langdetect import ( 5 | detect, 6 | detect_multilingual, 7 | detect_language, 8 | LangDetector, 9 | LangDetectConfig, 10 | ) 11 | 12 | # 测试繁体,简体,日文,英文,韩文,法文,德文,西班牙文 13 | print(detect_multilingual("Hello, world!你好世界!Привет, мир!", low_memory=False)) 14 | # [{'lang': 'ja', 'score': 0.32009604573249817}, {'lang': 'uk', 'score': 0.27781224250793457}, {'lang': 'zh', 'score': 0.17542070150375366}, {'lang': 'sr', 'score': 0.08751443773508072}, {'lang': 'bg', 'score': 0.05222449079155922}] 15 | print(detect("hello world")) 16 | print(detect("你好世界")) 17 | print(detect_language("Привет, мир!")) 18 | print(detect_language("你好世界")) 19 | print(detect_language("こんにちは世界")) 20 | print(detect_language("안녕하세요 세계")) 21 | print(detect_language("Bonjour le monde")) 22 | print(detect_language("Hallo Welt")) 23 | print(detect_language("Hola mundo")) 24 | print( 25 | detect_language( 26 | "這些機構主辦的課程,多以基本電腦使用為主,例如文書處理、中文輸入、互聯網應用等" 27 | ) 28 | ) 29 | 30 | # When offline, its raise error 31 | print( 32 | detect_multilingual( 33 | "Hello, world!你好世界!Привет, мир!", 34 | low_memory=False, 35 | config=LangDetectConfig(allow_fallback=True) 36 | ) 37 | ) 38 | 39 | config = LangDetectConfig(allow_fallback=False) 40 | detector = LangDetector(config) 41 | # 尝试使用大模型进行检测(应该会失败并回退到小模型) 42 | result = detector.detect("Hello world", low_memory=False) 43 | print(result) 44 | -------------------------------------------------------------------------------- /feature_test/lingua_t.py: -------------------------------------------------------------------------------- 1 | from lingua import LanguageDetectorBuilder 2 | 3 | from fast_langdetect import detect_language, detect_multilingual 4 | 5 | low_mem_detector = (LanguageDetectorBuilder 6 | .from_all_languages() 7 | .with_low_accuracy_mode() 8 | .with_preloaded_language_models() 9 | .build()) 10 | detector = (LanguageDetectorBuilder 11 | .from_all_languages() 12 | .with_preloaded_language_models() 13 | .build()) 14 | ja_sentence = "こんにちは世界" 15 | print(detect_language(ja_sentence)) 16 | print(low_mem_detector.detect_language_of(ja_sentence).iso_code_639_1.name) 17 | print("===") 18 | ko_sentence = "안녕하세요 세계" 19 | print(detect_language(ko_sentence)) 20 | print(low_mem_detector.detect_language_of(ko_sentence).iso_code_639_1.name) 21 | print("===") 22 | fr_sentence = "Bonjour le monde" 23 | print(detect_language(fr_sentence)) 24 | print(low_mem_detector.detect_language_of(fr_sentence).iso_code_639_1.name) 25 | print("===") 26 | de_sentence = "Hallo Welt" 27 | print(detect_language(de_sentence)) 28 | print(low_mem_detector.detect_language_of(de_sentence).iso_code_639_1.name) 29 | print("===") 30 | zh_sentence = "這些機構主辦的課程,多以基本電腦使用為主,例如文書處理、中文輸入、互聯網應用等" 31 | print(detect_language(zh_sentence)) 32 | print(low_mem_detector.detect_language_of(zh_sentence).iso_code_639_1.name) 33 | print("===") 34 | es_sentence = "Hola mundo" 35 | print(detect_language(es_sentence)) 36 | print(low_mem_detector.detect_language_of(es_sentence).iso_code_639_1.name) 37 | print("===") 38 | 39 | sentence = "こんにちは世界" 40 | for result in detector.detect_multiple_languages_of(sentence): 41 | print(result.language) 42 | print("===") 43 | sentence = """ 44 | こんにちは世界 45 | 안녕하세요 세계 46 | Hallo Welt 47 | 這些機構主辦的課程,多以基本電腦使用為主,例如文書處理、中文輸入、互聯網應用等 48 | Bonjour le monde 49 | """ 50 | langs = detect_multilingual(sentence.replace("\n", " "), low_memory=False) 51 | for lang in langs: 52 | print(lang) 53 | confidence_values = detector.compute_language_confidence_values(sentence) 54 | for confidence in confidence_values: 55 | if confidence.value > 0: 56 | print(f"{confidence.language.iso_code_639_1.name}: {confidence.value:.2f}") 57 | print("===") 58 | for result in low_mem_detector.detect_multiple_languages_of(sentence): 59 | print(result.language.iso_code_639_1.name) 60 | -------------------------------------------------------------------------------- /feature_test/spacy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2024/1/18 下午3:25 3 | # @Author : sudoskys 4 | # @File : spacy.py 5 | # @Software: PyCharm 6 | import spacy 7 | import spacy_fastlang # noqa: F401 8 | 9 | nlp = spacy.blank("xx") 10 | nlp.add_pipe("language_detector") 11 | 12 | # nlp = spacy.load("en_core_web_sm") 13 | # nlp.add_pipe("language_detector") 14 | doc = nlp('Life is like a box of chocolates. You never know what you are gonna get.') 15 | 16 | assert doc._.language == 'en' 17 | assert doc._.language_score >= 0.8 18 | -------------------------------------------------------------------------------- /pdm.lock: -------------------------------------------------------------------------------- 1 | # This file is @generated by PDM. 2 | # It is not intended for manual editing. 3 | 4 | [metadata] 5 | groups = ["default", "dev"] 6 | strategy = ["inherit_metadata"] 7 | lock_version = "4.5.0" 8 | content_hash = "sha256:458ea4588e153e9471b5a168654371e729a75b77eda9b33e681c293fd689c2ce" 9 | 10 | [[metadata.targets]] 11 | requires_python = ">=3.9" 12 | 13 | [[package]] 14 | name = "certifi" 15 | version = "2024.12.14" 16 | requires_python = ">=3.6" 17 | summary = "Python package for providing Mozilla's CA Bundle." 18 | groups = ["default"] 19 | files = [ 20 | {file = "certifi-2024.12.14-py3-none-any.whl", hash = "sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56"}, 21 | {file = "certifi-2024.12.14.tar.gz", hash = "sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db"}, 22 | ] 23 | 24 | [[package]] 25 | name = "charset-normalizer" 26 | version = "3.4.1" 27 | requires_python = ">=3.7" 28 | summary = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." 29 | groups = ["default"] 30 | files = [ 31 | {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"}, 32 | {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"}, 33 | {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e218488cd232553829be0664c2292d3af2eeeb94b32bea483cf79ac6a694e037"}, 34 | {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:80ed5e856eb7f30115aaf94e4a08114ccc8813e6ed1b5efa74f9f82e8509858f"}, 35 | {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b010a7a4fd316c3c484d482922d13044979e78d1861f0e0650423144c616a46a"}, 36 | {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4532bff1b8421fd0a320463030c7520f56a79c9024a4e88f01c537316019005a"}, 37 | {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d973f03c0cb71c5ed99037b870f2be986c3c05e63622c017ea9816881d2dd247"}, 38 | {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3a3bd0dcd373514dcec91c411ddb9632c0d7d92aed7093b8c3bbb6d69ca74408"}, 39 | {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:d9c3cdf5390dcd29aa8056d13e8e99526cda0305acc038b96b30352aff5ff2bb"}, 40 | {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:2bdfe3ac2e1bbe5b59a1a63721eb3b95fc9b6817ae4a46debbb4e11f6232428d"}, 41 | {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:eab677309cdb30d047996b36d34caeda1dc91149e4fdca0b1a039b3f79d9a807"}, 42 | {file = "charset_normalizer-3.4.1-cp310-cp310-win32.whl", hash = "sha256:c0429126cf75e16c4f0ad00ee0eae4242dc652290f940152ca8c75c3a4b6ee8f"}, 43 | {file = "charset_normalizer-3.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:9f0b8b1c6d84c8034a44893aba5e767bf9c7a211e313a9605d9c617d7083829f"}, 44 | {file = "charset_normalizer-3.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125"}, 45 | {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1"}, 46 | {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3"}, 47 | {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd"}, 48 | {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00"}, 49 | {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12"}, 50 | {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77"}, 51 | {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146"}, 52 | {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd"}, 53 | {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6"}, 54 | {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8"}, 55 | {file = "charset_normalizer-3.4.1-cp311-cp311-win32.whl", hash = "sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b"}, 56 | {file = "charset_normalizer-3.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76"}, 57 | {file = "charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545"}, 58 | {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7"}, 59 | {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757"}, 60 | {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa"}, 61 | {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d"}, 62 | {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616"}, 63 | {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b"}, 64 | {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d"}, 65 | {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a"}, 66 | {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9"}, 67 | {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1"}, 68 | {file = "charset_normalizer-3.4.1-cp312-cp312-win32.whl", hash = "sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35"}, 69 | {file = "charset_normalizer-3.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f"}, 70 | {file = "charset_normalizer-3.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda"}, 71 | {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313"}, 72 | {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9"}, 73 | {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b"}, 74 | {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11"}, 75 | {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f"}, 76 | {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd"}, 77 | {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2"}, 78 | {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886"}, 79 | {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601"}, 80 | {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd"}, 81 | {file = "charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407"}, 82 | {file = "charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971"}, 83 | {file = "charset_normalizer-3.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b97e690a2118911e39b4042088092771b4ae3fc3aa86518f84b8cf6888dbdb41"}, 84 | {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78baa6d91634dfb69ec52a463534bc0df05dbd546209b79a3880a34487f4b84f"}, 85 | {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1a2bc9f351a75ef49d664206d51f8e5ede9da246602dc2d2726837620ea034b2"}, 86 | {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75832c08354f595c760a804588b9357d34ec00ba1c940c15e31e96d902093770"}, 87 | {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0af291f4fe114be0280cdd29d533696a77b5b49cfde5467176ecab32353395c4"}, 88 | {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0167ddc8ab6508fe81860a57dd472b2ef4060e8d378f0cc555707126830f2537"}, 89 | {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2a75d49014d118e4198bcee5ee0a6f25856b29b12dbf7cd012791f8a6cc5c496"}, 90 | {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:363e2f92b0f0174b2f8238240a1a30142e3db7b957a5dd5689b0e75fb717cc78"}, 91 | {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ab36c8eb7e454e34e60eb55ca5d241a5d18b2c6244f6827a30e451c42410b5f7"}, 92 | {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:4c0907b1928a36d5a998d72d64d8eaa7244989f7aaaf947500d3a800c83a3fd6"}, 93 | {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:04432ad9479fa40ec0f387795ddad4437a2b50417c69fa275e212933519ff294"}, 94 | {file = "charset_normalizer-3.4.1-cp39-cp39-win32.whl", hash = "sha256:3bed14e9c89dcb10e8f3a29f9ccac4955aebe93c71ae803af79265c9ca5644c5"}, 95 | {file = "charset_normalizer-3.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:49402233c892a461407c512a19435d1ce275543138294f7ef013f0b63d5d3765"}, 96 | {file = "charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85"}, 97 | {file = "charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3"}, 98 | ] 99 | 100 | [[package]] 101 | name = "colorama" 102 | version = "0.4.6" 103 | requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" 104 | summary = "Cross-platform colored terminal text." 105 | groups = ["default", "dev"] 106 | marker = "sys_platform == \"win32\" or platform_system == \"Windows\"" 107 | files = [ 108 | {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, 109 | {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, 110 | ] 111 | 112 | [[package]] 113 | name = "colorlog" 114 | version = "6.9.0" 115 | requires_python = ">=3.6" 116 | summary = "Add colours to the output of Python's logging module." 117 | groups = ["default"] 118 | dependencies = [ 119 | "colorama; sys_platform == \"win32\"", 120 | ] 121 | files = [ 122 | {file = "colorlog-6.9.0-py3-none-any.whl", hash = "sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff"}, 123 | {file = "colorlog-6.9.0.tar.gz", hash = "sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2"}, 124 | ] 125 | 126 | [[package]] 127 | name = "exceptiongroup" 128 | version = "1.2.2" 129 | requires_python = ">=3.7" 130 | summary = "Backport of PEP 654 (exception groups)" 131 | groups = ["dev"] 132 | marker = "python_version < \"3.11\"" 133 | files = [ 134 | {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, 135 | {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, 136 | ] 137 | 138 | [[package]] 139 | name = "fasttext-predict" 140 | version = "0.9.2.4" 141 | summary = "fasttext with wheels and no external dependency, but only the predict method (<1MB)" 142 | groups = ["default"] 143 | files = [ 144 | {file = "fasttext_predict-0.9.2.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ba432f33228928df5f2af6dfa50560cd77f9859914cffd652303fb02ba100456"}, 145 | {file = "fasttext_predict-0.9.2.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6a8e8f17eb894d450168d2590e23d809e845bd4fad5e39b5708dacb2fdb9b2c7"}, 146 | {file = "fasttext_predict-0.9.2.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19565fdf0bb9427831cfc75fca736ab9d71ba7ce02e3ea951e5839beb66560b6"}, 147 | {file = "fasttext_predict-0.9.2.4-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cb6986815506e3261c0b3f6227dce49eeb4fd3422dab9cd37e2db2fb3691c68b"}, 148 | {file = "fasttext_predict-0.9.2.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:229dfdf8943dd76231206c7c9179e3f99d45879e5b654626ee7b73b7fa495d53"}, 149 | {file = "fasttext_predict-0.9.2.4-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:397016ebfa9ec06d6dba09c29e295eea583ea3f45fa4592cc832b257dc84522e"}, 150 | {file = "fasttext_predict-0.9.2.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:fc93f9f8f7e982eb635bc860688be04f355fab3d76a243037e26862646f50430"}, 151 | {file = "fasttext_predict-0.9.2.4-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:f4be96ac0b01a3cda82be90e7f6afdafab98919995825c27babd2749a8319be9"}, 152 | {file = "fasttext_predict-0.9.2.4-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:f505f737f9493d22ee0c54af7c7eb7828624d5089a1e85072bdb1bd7d3f8f82e"}, 153 | {file = "fasttext_predict-0.9.2.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9ce69f28862dd551d43e27aa0a8de924b6b34412bff998c23c3d4abd70813183"}, 154 | {file = "fasttext_predict-0.9.2.4-cp310-cp310-win32.whl", hash = "sha256:864b6bb543275aee74360eee1d2cc23a440f09991e97efcdcf0b9a5af00f9aa9"}, 155 | {file = "fasttext_predict-0.9.2.4-cp310-cp310-win_amd64.whl", hash = "sha256:7e72abe12c13fd12f8bb137b1f7561096fbd3bb24905a27d9e93a4921ee68dc6"}, 156 | {file = "fasttext_predict-0.9.2.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:147996c86aa0928c7118f85d18b6a77c458db9ca236db26d44ee5ceaab0c0b6b"}, 157 | {file = "fasttext_predict-0.9.2.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5342f7363709e22524a31750c21e4b735b6666749a167fc03cc3bbf18ea8eccd"}, 158 | {file = "fasttext_predict-0.9.2.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6cbecd3908909339316f61db38030ce43890c25bddb06c955191458af13ccfc5"}, 159 | {file = "fasttext_predict-0.9.2.4-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9de4fcfb54bec35be6b0dffcdc5ace1a3a07f79ee3e8d33d13b82cc4116c5f2f"}, 160 | {file = "fasttext_predict-0.9.2.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5af82e09227d993befc00271407b9d3c8aae81d34b35f96208223faf609f4b0c"}, 161 | {file = "fasttext_predict-0.9.2.4-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:337ee60179f32e8b0efa822e59316de15709c7684e7854021b4f6af82b7767ac"}, 162 | {file = "fasttext_predict-0.9.2.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:aa9da0c52e65a45dbc87df67015ec1d2712f04de47733e197176550521feea87"}, 163 | {file = "fasttext_predict-0.9.2.4-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:495efde8afb622266c0e4de41978a6db731a0a685e1db032e7d22937850c9b44"}, 164 | {file = "fasttext_predict-0.9.2.4-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e5726ba34d79a143b69426e29905eb4d3f4ee8aee94927b3bea3dd566712986b"}, 165 | {file = "fasttext_predict-0.9.2.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:5ac2f35830705c61dd848314c4c077a393608c181725dc353a69361821aa69a8"}, 166 | {file = "fasttext_predict-0.9.2.4-cp311-cp311-win32.whl", hash = "sha256:7b2f8a5cf5f2c451777dbb7ea4957c7919a57ce29a4157a0a381933c9ea6fa70"}, 167 | {file = "fasttext_predict-0.9.2.4-cp311-cp311-win_amd64.whl", hash = "sha256:83a3c00fdb73a304bc529bc0ae0e225bc2cb956fcfb8e1c7a882b2a1aaa97e19"}, 168 | {file = "fasttext_predict-0.9.2.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:dcf8661da4f515551523470a745df246121f7e19736fcf3f48f04287963e6279"}, 169 | {file = "fasttext_predict-0.9.2.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:99dbfcc3f353da2639fd04fc574a65ff4195b018311f790583147cdc6eb122f4"}, 170 | {file = "fasttext_predict-0.9.2.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:427e99ba963b2c744ed7233304037a83b7adece97de6f361cfd356aa43cb87f3"}, 171 | {file = "fasttext_predict-0.9.2.4-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8b9480cc75a906571a8e5fc717b91b4783f1820aaa5ed36a304d689280de8602"}, 172 | {file = "fasttext_predict-0.9.2.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11ef7af2a4431c76d2226e47334e86b9c4a78a98f6cb68b1ce9a1fc20e04c904"}, 173 | {file = "fasttext_predict-0.9.2.4-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:ecb0b854596ba847742597b35c2d0134fcf3a59214d09351d01535854078d56b"}, 174 | {file = "fasttext_predict-0.9.2.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:fbbcfefac10f625d95fc42f28d76cc5bf0c12875f147b5a79108a2669e64a2dc"}, 175 | {file = "fasttext_predict-0.9.2.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:a8cb78a00c04b7eb7da18b4805f8557b36911dc4375c947d8938897d2e131841"}, 176 | {file = "fasttext_predict-0.9.2.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:299ae56ad53e1381c65030143da7bcae12546fd32bc019215592ec1ee40fd19e"}, 177 | {file = "fasttext_predict-0.9.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:091938062002fe30d214f6e493a3a1e6180d401212d37eea23c29f4b55f3f347"}, 178 | {file = "fasttext_predict-0.9.2.4-cp312-cp312-win32.whl", hash = "sha256:981b8d9734623f8f9a8003970f765e14b1d91ee82c59c35e8eba6b76368fa95e"}, 179 | {file = "fasttext_predict-0.9.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:bd3c33971c241577b0767e55d97acfda790f77378f9d5ee7872b6ee4bd63130b"}, 180 | {file = "fasttext_predict-0.9.2.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ddb85e62c95e4e02d417c782e3434ef65554df19e3522f5230f6be15a9373c05"}, 181 | {file = "fasttext_predict-0.9.2.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:102129d45cf98dda871e83ae662f71d999b9ef6ff26bc842ffc1520a1f82930c"}, 182 | {file = "fasttext_predict-0.9.2.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05ba6a0fbf8cb2141b8ca2bc461db97af8ac31a62341e4696a75048b9de39e10"}, 183 | {file = "fasttext_predict-0.9.2.4-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c7a779215571296ecfcf86545cb30ec3f1c6f43cbcd69f83cc4f67049375ea1"}, 184 | {file = "fasttext_predict-0.9.2.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ddd2f03f3f206585543f5274b1dbc5f651bae141a1b14c9d5225c2a12e5075c2"}, 185 | {file = "fasttext_predict-0.9.2.4-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:748f9edc3222a1fb7a61331c4e06d3b7f2390ae493f91f09d372a00b81762a8d"}, 186 | {file = "fasttext_predict-0.9.2.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1aee47a40757cd24272b34eaf9ceeea86577fd0761b0fd0e41599c6549abdf04"}, 187 | {file = "fasttext_predict-0.9.2.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:6ff0f152391ee03ffc18495322100c01735224f7843533a7c4ff33c8853d7be1"}, 188 | {file = "fasttext_predict-0.9.2.4-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4d92f5265318b41d6e68659fd459babbff692484e492c5013995b90a56b517c9"}, 189 | {file = "fasttext_predict-0.9.2.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3a7720cce1b8689d88df76cac1425e84f9911c69a4e40a5309d7d3435e1bb97c"}, 190 | {file = "fasttext_predict-0.9.2.4-cp313-cp313-win32.whl", hash = "sha256:d16acfced7871ed0cd55b476f0dbdddc7a5da1ffc9745a3c5674846cf1555886"}, 191 | {file = "fasttext_predict-0.9.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:96a23328729ce62a851f8953582e576ca075ee78d637df4a78a2b3609784849e"}, 192 | {file = "fasttext_predict-0.9.2.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:b1357d0d9d8568db84668b57e7c6880b9c46f757e8954ad37634402d36f09dba"}, 193 | {file = "fasttext_predict-0.9.2.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:9604c464c5d86c7eba34b040080be7012e246ef512b819e428b7deb817290dae"}, 194 | {file = "fasttext_predict-0.9.2.4-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc6da186c2e4497cbfaba9c5424e58c7b72728b25d980829eb96daccd7cface1"}, 195 | {file = "fasttext_predict-0.9.2.4-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:366ed2ca4f4170418f3585e92059cf17ee2c963bf179111c5b8ba48f06cd69d1"}, 196 | {file = "fasttext_predict-0.9.2.4-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f1877edbb815a43e7d38cc7332202e759054cf0b5a4b7e34a743c0f5d6e7333"}, 197 | {file = "fasttext_predict-0.9.2.4-cp313-cp313t-manylinux_2_31_armv7l.whl", hash = "sha256:f63c31352ba6fc910290b0fe12733770acd8cfa0945fcb9cf3984d241abcfc9d"}, 198 | {file = "fasttext_predict-0.9.2.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:898e14b03fbfb0a8d9a5185a0a00ff656772b3baa37cad122e06e8e4d6da3832"}, 199 | {file = "fasttext_predict-0.9.2.4-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:a33bb5832a69fc54d18cadcf015677c1acb5ccc7f0125d261df2a89f8aff01f6"}, 200 | {file = "fasttext_predict-0.9.2.4-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:7fe9e98bd0701d598bf245eb2fbf592145cd03551684a2102a4b301294b9bd87"}, 201 | {file = "fasttext_predict-0.9.2.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dcb8c5a74c1785f005fd83d445137437b79ac70a2dfbfe4bb1b09aa5643be545"}, 202 | {file = "fasttext_predict-0.9.2.4-cp313-cp313t-win32.whl", hash = "sha256:a85c7de3d4480faa12b930637fca9c23144d1520786fedf9ba8edd8642ed4aea"}, 203 | {file = "fasttext_predict-0.9.2.4-cp313-cp313t-win_amd64.whl", hash = "sha256:be0933fa4af7abae09c703d28f9e17c80e7069eb6f92100b21985b777f4ea275"}, 204 | {file = "fasttext_predict-0.9.2.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8ff71f9905567271a760139978dec62f8c224f20c8c42a45addd4830fa3db977"}, 205 | {file = "fasttext_predict-0.9.2.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:89401fa60533a9307bf26c312f3a47c58f9f8daf735532a03b0a88af391a6b7a"}, 206 | {file = "fasttext_predict-0.9.2.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b8e51eef5ebb1905b3b10e0f19cec7f0259f9134cfde76e4c172ac5dff3d1f1"}, 207 | {file = "fasttext_predict-0.9.2.4-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4d4bd0178d295ed898903fc8e1454682a44e9e3db8bc3e777c3e122f2c5d2a39"}, 208 | {file = "fasttext_predict-0.9.2.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37717d593560d2d82911ba644dc0eb0c8d9b270b005d59bc278ae1465b77b50e"}, 209 | {file = "fasttext_predict-0.9.2.4-cp39-cp39-manylinux_2_31_armv7l.whl", hash = "sha256:144decf434c79b80cacbb14007602ca0e563a951000dc7ca3308d022b1c6a56c"}, 210 | {file = "fasttext_predict-0.9.2.4-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:abd5f77f491f83f9f2f374c38adb9432fae1e92db28fdd2cf5c0f3db48e1f805"}, 211 | {file = "fasttext_predict-0.9.2.4-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:25f3f82b847a320ce595dc772f5e1054ef0a1aa02e7d39feb0ea6374dc83aa55"}, 212 | {file = "fasttext_predict-0.9.2.4-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:6390f898bbc83a85447338e1a68d1730d5a5ca68292ea3621718c3f4be39288f"}, 213 | {file = "fasttext_predict-0.9.2.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:038bf374a9b9bd665fe58ef28a9b6a4703f8ba1de93bb747b974d7f78f023222"}, 214 | {file = "fasttext_predict-0.9.2.4-cp39-cp39-win32.whl", hash = "sha256:639ab150585ceb3832912d9b623122735481cff676876040ca9b08312264634a"}, 215 | {file = "fasttext_predict-0.9.2.4-cp39-cp39-win_amd64.whl", hash = "sha256:91c84cfb18a3a617e785fc9aa3bd4c80ffbe20009beb8f9e63e362160cb71a08"}, 216 | {file = "fasttext_predict-0.9.2.4-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b11ba9414aa71754f798a102cf7d3df53307055b2b0f0b258a3f2d59c5a12cfa"}, 217 | {file = "fasttext_predict-0.9.2.4-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c89c769e3646bdb341487a68835239f35a4a0959cc1a8d8a7d215f40b22a230"}, 218 | {file = "fasttext_predict-0.9.2.4-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f3b9cd4a2cf4c4853323f57c5da6ecffca6aeb9b6d8751ee40fe611d6edf8dd"}, 219 | {file = "fasttext_predict-0.9.2.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:1c92905396c74e5cb29ddbfa763b5addec1581b6e0eae4cbe82248dfe733557e"}, 220 | {file = "fasttext_predict-0.9.2.4.tar.gz", hash = "sha256:18a6fb0d74c7df9280db1f96cb75d990bfd004fa9d669493ea3dd3d54f84dbc7"}, 221 | ] 222 | 223 | [[package]] 224 | name = "idna" 225 | version = "3.10" 226 | requires_python = ">=3.6" 227 | summary = "Internationalized Domain Names in Applications (IDNA)" 228 | groups = ["default"] 229 | files = [ 230 | {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, 231 | {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, 232 | ] 233 | 234 | [[package]] 235 | name = "iniconfig" 236 | version = "2.0.0" 237 | requires_python = ">=3.7" 238 | summary = "brain-dead simple config-ini parsing" 239 | groups = ["dev"] 240 | files = [ 241 | {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, 242 | {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, 243 | ] 244 | 245 | [[package]] 246 | name = "packaging" 247 | version = "24.2" 248 | requires_python = ">=3.8" 249 | summary = "Core utilities for Python packages" 250 | groups = ["dev"] 251 | files = [ 252 | {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, 253 | {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, 254 | ] 255 | 256 | [[package]] 257 | name = "pluggy" 258 | version = "1.5.0" 259 | requires_python = ">=3.8" 260 | summary = "plugin and hook calling mechanisms for python" 261 | groups = ["dev"] 262 | files = [ 263 | {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, 264 | {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, 265 | ] 266 | 267 | [[package]] 268 | name = "pytest" 269 | version = "8.3.4" 270 | requires_python = ">=3.8" 271 | summary = "pytest: simple powerful testing with Python" 272 | groups = ["dev"] 273 | dependencies = [ 274 | "colorama; sys_platform == \"win32\"", 275 | "exceptiongroup>=1.0.0rc8; python_version < \"3.11\"", 276 | "iniconfig", 277 | "packaging", 278 | "pluggy<2,>=1.5", 279 | "tomli>=1; python_version < \"3.11\"", 280 | ] 281 | files = [ 282 | {file = "pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6"}, 283 | {file = "pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761"}, 284 | ] 285 | 286 | [[package]] 287 | name = "requests" 288 | version = "2.32.3" 289 | requires_python = ">=3.8" 290 | summary = "Python HTTP for Humans." 291 | groups = ["default"] 292 | dependencies = [ 293 | "certifi>=2017.4.17", 294 | "charset-normalizer<4,>=2", 295 | "idna<4,>=2.5", 296 | "urllib3<3,>=1.21.1", 297 | ] 298 | files = [ 299 | {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, 300 | {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, 301 | ] 302 | 303 | [[package]] 304 | name = "robust-downloader" 305 | version = "0.0.2" 306 | summary = "A Simple Robust Downloader written in Python" 307 | groups = ["default"] 308 | dependencies = [ 309 | "colorlog", 310 | "requests", 311 | "tqdm", 312 | ] 313 | files = [ 314 | {file = "robust-downloader-0.0.2.tar.gz", hash = "sha256:08c938b96e317abe6b037e34230a91bda9b5d613f009bca4a47664997c61de90"}, 315 | {file = "robust_downloader-0.0.2-py3-none-any.whl", hash = "sha256:8fe08bfb64d714fd1a048a7df6eb7b413eb4e624309a49db2c16fbb80a62869d"}, 316 | ] 317 | 318 | [[package]] 319 | name = "tomli" 320 | version = "2.2.1" 321 | requires_python = ">=3.8" 322 | summary = "A lil' TOML parser" 323 | groups = ["dev"] 324 | marker = "python_version < \"3.11\"" 325 | files = [ 326 | {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, 327 | {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, 328 | {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a"}, 329 | {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee"}, 330 | {file = "tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e"}, 331 | {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4"}, 332 | {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106"}, 333 | {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8"}, 334 | {file = "tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff"}, 335 | {file = "tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b"}, 336 | {file = "tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea"}, 337 | {file = "tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8"}, 338 | {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192"}, 339 | {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222"}, 340 | {file = "tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77"}, 341 | {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6"}, 342 | {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd"}, 343 | {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e"}, 344 | {file = "tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98"}, 345 | {file = "tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4"}, 346 | {file = "tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7"}, 347 | {file = "tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c"}, 348 | {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13"}, 349 | {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281"}, 350 | {file = "tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272"}, 351 | {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140"}, 352 | {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2"}, 353 | {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744"}, 354 | {file = "tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec"}, 355 | {file = "tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69"}, 356 | {file = "tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc"}, 357 | {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"}, 358 | ] 359 | 360 | [[package]] 361 | name = "tqdm" 362 | version = "4.67.1" 363 | requires_python = ">=3.7" 364 | summary = "Fast, Extensible Progress Meter" 365 | groups = ["default"] 366 | dependencies = [ 367 | "colorama; platform_system == \"Windows\"", 368 | ] 369 | files = [ 370 | {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"}, 371 | {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"}, 372 | ] 373 | 374 | [[package]] 375 | name = "urllib3" 376 | version = "2.3.0" 377 | requires_python = ">=3.9" 378 | summary = "HTTP library with thread-safe connection pooling, file post, and more." 379 | groups = ["default"] 380 | files = [ 381 | {file = "urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df"}, 382 | {file = "urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d"}, 383 | ] 384 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "fast-langdetect" 3 | version = "0.3.2" 4 | description = "Quickly detect text language and segment language" 5 | authors = [ 6 | { name = "sudoskys", email = "coldlando@hotmail.com" }, 7 | ] 8 | dependencies = [ 9 | "robust-downloader>=0.0.2", 10 | "requests>=2.32.3", 11 | "fasttext-predict>=0.9.2.4", 12 | ] 13 | requires-python = ">=3.9" 14 | readme = "README.md" 15 | license = { text = "MIT" } 16 | 17 | [build-system] 18 | requires = ["pdm-backend"] 19 | build-backend = "pdm.backend" 20 | 21 | [tool.pdm] 22 | distribution = true 23 | 24 | [tool.pdm.dev-dependencies] 25 | dev = [ 26 | "pytest>=7.4.4", 27 | ] 28 | -------------------------------------------------------------------------------- /src/fast_langdetect/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Zafer Çavdar 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/fast_langdetect/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2024/1/17 下午4:00 3 | 4 | from .infer import LangDetector, LangDetectConfig, DetectError # noqa: F401 5 | from .infer import detect 6 | from .infer import detect_multilingual # noqa: F401 7 | 8 | 9 | def is_japanese(string): 10 | for ch in string: 11 | if 0x3040 < ord(ch) < 0x30FF: 12 | return True 13 | return False 14 | 15 | 16 | def detect_language(sentence: str, *, low_memory: bool = True): 17 | """ 18 | Detect language 19 | :param sentence: str sentence 20 | :param low_memory: bool (default: True) whether to use low memory mode 21 | :return: ZH, EN, JA, KO, FR, DE, ES, .... (two uppercase letters) 22 | """ 23 | lang_code = detect(sentence, low_memory=low_memory).get("lang").upper() 24 | if lang_code == "JA" and not is_japanese(sentence): 25 | lang_code = "ZH" 26 | return lang_code 27 | -------------------------------------------------------------------------------- /src/fast_langdetect/infer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | FastText based language detection module. 4 | """ 5 | 6 | import hashlib 7 | import logging 8 | import os 9 | import platform 10 | import re 11 | import shutil 12 | import tempfile 13 | from pathlib import Path 14 | from typing import Dict, List, Optional, Union, Any 15 | 16 | import fasttext 17 | from robust_downloader import download 18 | 19 | logger = logging.getLogger(__name__) 20 | 21 | # Use system temporary directory as default cache directory 22 | DEFAULT_CACHE_DIR = Path(tempfile.gettempdir()) / "fasttext-langdetect" 23 | CACHE_DIRECTORY = os.getenv("FTLANG_CACHE", str(DEFAULT_CACHE_DIR)) 24 | FASTTEXT_LARGE_MODEL_URL = ( 25 | "https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin" 26 | ) 27 | FASTTEXT_LARGE_MODEL_NAME = "lid.176.bin" 28 | _LOCAL_SMALL_MODEL_PATH = Path(__file__).parent / "resources" / "lid.176.ftz" 29 | 30 | 31 | class DetectError(Exception): 32 | """Base exception for language detection errors.""" 33 | 34 | pass 35 | 36 | 37 | class ModelVerifier: 38 | """Model file verification utilities.""" 39 | 40 | @staticmethod 41 | def calculate_md5(file_path: Union[str, Path], chunk_size: int = 8192) -> str: 42 | """ 43 | Calculate MD5 hash of a file. 44 | 45 | :param file_path: Path to the file 46 | :param chunk_size: Size of chunks to read 47 | 48 | :return: MD5 hash string 49 | """ 50 | md5 = hashlib.md5() 51 | with open(file_path, "rb") as f: 52 | for chunk in iter(lambda: f.read(chunk_size), b""): 53 | md5.update(chunk) 54 | return md5.hexdigest() 55 | 56 | @staticmethod 57 | def verify(file_path: Union[str, Path], expected_md5: str) -> bool: 58 | """ 59 | Verify file integrity using MD5 hash. 60 | 61 | :param file_path: Path to the file 62 | :param expected_md5: Expected MD5 hash 63 | 64 | :return: True if hash matches, False otherwise 65 | """ 66 | return ModelVerifier.calculate_md5(file_path) == expected_md5 67 | 68 | 69 | class ModelDownloader: 70 | """Model download handler.""" 71 | 72 | @staticmethod 73 | def download(url: str, save_path: Path, proxy: Optional[str] = None) -> None: 74 | """ 75 | Download model file if not exists. 76 | 77 | :param url: URL to download from 78 | :param save_path: Path to save the model 79 | :param proxy: Optional proxy URL 80 | 81 | :raises: 82 | DetectError: If download fails 83 | """ 84 | if save_path.exists(): 85 | logger.info(f"fast-langdetect: Model exists at {save_path}") 86 | return 87 | 88 | logger.info(f"fast-langdetect: Downloading model from {url}") 89 | try: 90 | download( 91 | url=url, 92 | folder=str(save_path.parent), 93 | filename=save_path.name, 94 | proxy=proxy, 95 | retry_max=2, 96 | sleep_max=5, 97 | timeout=7, 98 | ) 99 | except Exception as e: 100 | raise DetectError(f"fast-langdetect: Download failed: {e}") 101 | 102 | 103 | class ModelLoader: 104 | """Model loading and caching handler.""" 105 | 106 | def __init__(self): 107 | self._verifier = ModelVerifier() 108 | self._downloader = ModelDownloader() 109 | 110 | def load_local(self, model_path: Path, verify_hash: Optional[str] = None) -> Any: 111 | """Load model from local file.""" 112 | if verify_hash and model_path.exists(): 113 | if not self._verifier.verify(model_path, verify_hash): 114 | logger.warning( 115 | f"fast-langdetect: MD5 verification failed for {model_path}. " 116 | "This may affect prediction accuracy." 117 | ) 118 | 119 | if not model_path.exists(): 120 | raise DetectError(f"Model file not found: {model_path}") 121 | 122 | if platform.system() == "Windows": 123 | return self._load_windows_compatible(model_path) 124 | return self._load_unix(model_path) 125 | 126 | def load_with_download(self, model_path: Path, proxy: Optional[str] = None) -> Any: 127 | """Internal method to load model with download if needed.""" 128 | if not model_path.exists(): 129 | self._downloader.download(FASTTEXT_LARGE_MODEL_URL, model_path, proxy) 130 | return self.load_local(model_path) 131 | 132 | def _load_windows_compatible(self, model_path: Path) -> Any: 133 | """ 134 | Handle Windows path compatibility issues when loading FastText models. 135 | 136 | Attempts multiple strategies in order: 137 | 1. Direct loading if path contains only safe characters 138 | 2. Loading via relative path if possible 139 | 3. Copying to temporary file as last resort 140 | 141 | :param model_path: Path to the model file 142 | :return: Loaded FastText model 143 | :raises DetectError: If all loading strategies fail 144 | """ 145 | model_path_str = str(model_path.resolve()) 146 | 147 | # Try to load model directly 148 | try: 149 | return fasttext.load_model(model_path_str) 150 | except Exception as e: 151 | logger.debug(f"fast-langdetect: Load model failed: {e}") 152 | 153 | # Try to load model using relative path 154 | try: 155 | cwd = Path.cwd() 156 | rel_path = os.path.relpath(model_path, cwd) 157 | return fasttext.load_model(rel_path) 158 | except Exception as e: 159 | logger.debug(f"fast-langdetect: Failed to load model using relative path: {e}") 160 | 161 | # Use temporary file as last resort 162 | logger.debug(f"fast-langdetect: Using temporary file to load model: {model_path}") 163 | tmp_path = None 164 | try: 165 | # Use NamedTemporaryFile to create a temporary file 166 | tmp_fd, tmp_path = tempfile.mkstemp(suffix='.bin') 167 | os.close(tmp_fd) # Close file descriptor 168 | 169 | # Copy model file to temporary location 170 | shutil.copy2(model_path, tmp_path) 171 | return fasttext.load_model(tmp_path) 172 | except Exception as e: 173 | raise DetectError(f"Failed to load model using temporary file: {e}") 174 | finally: 175 | # Clean up temporary file 176 | if tmp_path and os.path.exists(tmp_path): 177 | try: 178 | os.unlink(tmp_path) 179 | except (OSError, PermissionError) as e: 180 | logger.warning(f"fast-langdetect: Failed to delete temporary file {tmp_path}: {e}") 181 | # Plan to delete on next reboot on Windows 182 | if platform.system() == "Windows": 183 | try: 184 | import _winapi 185 | _winapi.MoveFileEx(tmp_path, None, _winapi.MOVEFILE_DELAY_UNTIL_REBOOT) 186 | except (ImportError, AttributeError, OSError) as we: 187 | logger.warning(f"fast-langdetect: Failed to schedule file deletion: {we}") 188 | 189 | def _load_unix(self, model_path: Path) -> Any: 190 | """Load model on Unix-like systems.""" 191 | try: 192 | return fasttext.load_model(str(model_path)) 193 | except Exception as e: 194 | raise DetectError(f"fast-langdetect: Failed to load model: {e}") 195 | 196 | 197 | class LangDetectConfig: 198 | """ 199 | Configuration for language detection. 200 | 201 | :param cache_dir: Directory for storing downloaded models 202 | :param custom_model_path: Path to custom model file (if using own model) 203 | :param proxy: HTTP proxy for downloads 204 | :param allow_fallback: Whether to fallback to small model 205 | :param disable_verify: Whether to disable MD5 verification 206 | :param normalize_input: Whether to normalize input text (e.g. lowercase for uppercase text) 207 | """ 208 | 209 | def __init__( 210 | self, 211 | cache_dir: Optional[str] = None, 212 | custom_model_path: Optional[str] = None, 213 | proxy: Optional[str] = None, 214 | allow_fallback: bool = True, 215 | disable_verify: bool = False, 216 | verify_hash: Optional[str] = None, 217 | normalize_input: bool = True, 218 | ): 219 | self.cache_dir = cache_dir or CACHE_DIRECTORY 220 | self.custom_model_path = custom_model_path 221 | self.proxy = proxy 222 | self.allow_fallback = allow_fallback 223 | # Only verify large model 224 | self.disable_verify = disable_verify 225 | self.verify_hash = verify_hash 226 | self.normalize_input = normalize_input 227 | if self.custom_model_path and not Path(self.custom_model_path).exists(): 228 | raise FileNotFoundError(f"fast-langdetect: Target model file not found: {self.custom_model_path}") 229 | 230 | 231 | class LangDetector: 232 | """Language detector using FastText models.""" 233 | VERIFY_FASTTEXT_LARGE_MODEL = "01810bc59c6a3d2b79c79e6336612f65" 234 | 235 | def __init__(self, config: Optional[LangDetectConfig] = None): 236 | """ 237 | Initialize language detector. 238 | 239 | :param config: Optional configuration for the detector 240 | """ 241 | self._models = {} 242 | self.config = config or LangDetectConfig() 243 | self._model_loader = ModelLoader() 244 | 245 | @staticmethod 246 | def _preprocess_text(text: str) -> str: 247 | """ 248 | Check text for newline characters and length. 249 | 250 | :param text: Input text 251 | :return: Processed text 252 | """ 253 | if len(text) > 100: 254 | logger.warning( 255 | "fast-langdetect: Text may be too long. " 256 | "Consider passing only a single sentence for accurate prediction." 257 | ) 258 | if "\n" in text: 259 | logger.warning( 260 | "fast-langdetect: Newline characters will be removed. " 261 | "Input should not contain newline characters. or FastText will raise an error." 262 | ) 263 | text = text.replace("\n", " ") 264 | return text 265 | 266 | @staticmethod 267 | def _normalize_text(text: str, should_normalize: bool = False) -> str: 268 | """ 269 | Normalize text based on configuration. 270 | 271 | Currently, handles: 272 | - Removing newline characters for better prediction 273 | - Lowercasing uppercase text to prevent misdetection as Japanese 274 | 275 | :param text: Input text 276 | :param should_normalize: Whether normalization should be applied 277 | :return: Normalized text 278 | """ 279 | # If not normalization is needed, return the processed text 280 | if not should_normalize: 281 | return text 282 | 283 | # Check if text is all uppercase or mostly uppercase 284 | # https://github.com/LlmKira/fast-langdetect/issues/14 285 | if text.isupper() or ( 286 | len(re.findall(r'[A-Z]', text)) > 0.8 * len(re.findall(r'[A-Za-z]', text)) 287 | and len(text) > 5 288 | ): 289 | return text.lower() 290 | 291 | return text 292 | 293 | def _get_model(self, low_memory: bool = True) -> Any: 294 | """Get or load appropriate model.""" 295 | cache_key = "low_memory" if low_memory else "high_memory" 296 | if model := self._models.get(cache_key): 297 | return model 298 | 299 | try: 300 | if self.config.custom_model_path is not None: 301 | # Load Custom Model 302 | if self.config.disable_verify: 303 | self.config.verify_hash = None 304 | model = self._model_loader.load_local(Path(self.config.custom_model_path)) 305 | elif low_memory is True: 306 | self.config.verify_hash = None 307 | # Load Small Model 308 | model = self._model_loader.load_local(_LOCAL_SMALL_MODEL_PATH) 309 | else: 310 | if self.config.verify_hash is None and not self.config.disable_verify: 311 | self.config.verify_hash = self.VERIFY_FASTTEXT_LARGE_MODEL 312 | # Download and Load Large Model 313 | model_path = Path(self.config.cache_dir) / FASTTEXT_LARGE_MODEL_NAME 314 | model = self._model_loader.load_with_download( 315 | model_path, 316 | self.config.proxy, 317 | ) 318 | self._models[cache_key] = model 319 | return model 320 | except Exception as e: 321 | if low_memory is not True and self.config.allow_fallback: 322 | logger.info("fast-langdetect: Falling back to low-memory model...") 323 | return self._get_model(low_memory=True) 324 | raise DetectError("Failed to load model") from e 325 | 326 | def detect( 327 | self, text: str, low_memory: bool = True 328 | ) -> Dict[str, Union[str, float]]: 329 | """ 330 | Detect primary language of text. 331 | 332 | :param text: Input text 333 | :param low_memory: Whether to use memory-efficient model 334 | 335 | :return: Dictionary with language and confidence score 336 | 337 | :raises: 338 | DetectError: If detection fails 339 | """ 340 | model = self._get_model(low_memory) 341 | text = self._preprocess_text(text) 342 | normalized_text = self._normalize_text(text, self.config.normalize_input) 343 | try: 344 | labels, scores = model.predict(normalized_text) 345 | return { 346 | "lang": labels[0].replace("__label__", ""), 347 | "score": min(float(scores[0]), 1.0), 348 | } 349 | except Exception as e: 350 | logger.error(f"fast-langdetect: Language detection error: {e}") 351 | raise DetectError("Language detection failed") from e 352 | 353 | def detect_multilingual( 354 | self, 355 | text: str, 356 | low_memory: bool = False, 357 | k: int = 5, 358 | threshold: float = 0.0, 359 | ) -> List[Dict[str, Any]]: 360 | """ 361 | Detect multiple possible languages in text. 362 | 363 | :param text: Input text 364 | :param low_memory: Whether to use memory-efficient model 365 | :param k: Number of top languages to return 366 | :param threshold: Minimum confidence threshold 367 | 368 | :return: List of dictionaries with languages and scores 369 | 370 | :raises: 371 | DetectError: If detection fails 372 | """ 373 | model = self._get_model(low_memory) 374 | text = self._preprocess_text(text) 375 | normalized_text = self._normalize_text(text, self.config.normalize_input) 376 | try: 377 | labels, scores = model.predict(normalized_text, k=k, threshold=threshold) 378 | results = [ 379 | { 380 | "lang": label.replace("__label__", ""), 381 | "score": min(float(score), 1.0), 382 | } 383 | for label, score in zip(labels, scores) 384 | ] 385 | return sorted(results, key=lambda x: x["score"], reverse=True) 386 | except Exception as e: 387 | logger.error(f"fast-langdetect: Multilingual detection error: {e}") 388 | raise DetectError("Multilingual detection failed.") 389 | 390 | 391 | # Global instance for simple usage 392 | _default_detector = LangDetector() 393 | 394 | 395 | def detect( 396 | text: str, 397 | *, 398 | low_memory: bool = True, 399 | model_download_proxy: Optional[str] = None, 400 | use_strict_mode: bool = False, 401 | config: Optional[LangDetectConfig] = None, 402 | ) -> Dict[str, Union[str, float]]: 403 | """ 404 | Simple interface for language detection. 405 | 406 | Too long or too short text will effect the accuracy of the prediction. 407 | 408 | :param text: Input text without newline characters 409 | :param low_memory: Whether to use memory-efficient model 410 | :param model_download_proxy: [DEPRECATED] Optional proxy for model download 411 | :param use_strict_mode: [DEPRECATED] Disable fallback to small model 412 | :param config: Optional LangDetectConfig object for advanced configuration 413 | 414 | :return: Dictionary with language and confidence score 415 | """ 416 | # Provide config 417 | if config is not None: 418 | detector = LangDetector(config) 419 | return detector.detect(text, low_memory=low_memory) 420 | 421 | # Check if any custom parameters are provided 422 | has_custom_params = any([ 423 | model_download_proxy is not None, 424 | use_strict_mode, 425 | ]) 426 | if has_custom_params: 427 | # Show warning if using individual parameters 428 | logger.warning( 429 | "fast-langdetect: Using individual parameters is deprecated. " 430 | "Consider using LangDetectConfig for better configuration management. " 431 | "Will be removed in next major release. see https://github.com/LlmKira/fast-langdetect/pull/16" 432 | ) 433 | custom_config = LangDetectConfig( 434 | proxy=model_download_proxy, 435 | allow_fallback=not use_strict_mode, 436 | ) 437 | detector = LangDetector(custom_config) 438 | return detector.detect(text, low_memory=low_memory) 439 | 440 | # Use default detector 441 | return _default_detector.detect(text, low_memory=low_memory) 442 | 443 | 444 | def detect_multilingual( 445 | text: str, 446 | *, 447 | low_memory: bool = False, 448 | model_download_proxy: Optional[str] = None, 449 | k: int = 5, 450 | threshold: float = 0.0, 451 | use_strict_mode: bool = False, 452 | config: Optional[LangDetectConfig] = None, 453 | ) -> List[Dict[str, Any]]: 454 | """ 455 | Simple interface for multi-language detection. 456 | 457 | Too long or too short text will effect the accuracy of the prediction. 458 | 459 | :param text: Input text without newline characters 460 | :param low_memory: Whether to use memory-efficient model 461 | :param k: Number of top languages to return 462 | :param threshold: Minimum confidence threshold 463 | :param model_download_proxy: [DEPRECATED] Optional proxy for model download 464 | :param use_strict_mode: [DEPRECATED] Disable fallback to small model 465 | :param config: Optional LangDetectConfig object for advanced configuration 466 | 467 | :return: List of dictionaries with languages and scores 468 | """ 469 | # Use provided config or create new config 470 | if config is not None: 471 | detector = LangDetector(config) 472 | return detector.detect_multilingual( 473 | text, low_memory=low_memory, k=k, threshold=threshold 474 | ) 475 | 476 | # Check if any custom parameters are provided 477 | has_custom_params = any([ 478 | model_download_proxy is not None, 479 | use_strict_mode, 480 | ]) 481 | if has_custom_params: 482 | # Show warning if using individual parameters 483 | logger.warning( 484 | "fast-langdetect: Using individual parameters is deprecated. " 485 | "Consider using LangDetectConfig for better configuration management. " 486 | "Will be removed in next major release. see https://github.com/LlmKira/fast-langdetect/pull/16" 487 | ) 488 | custom_config = LangDetectConfig( 489 | proxy=model_download_proxy, 490 | allow_fallback=not use_strict_mode, 491 | ) 492 | detector = LangDetector(custom_config) 493 | return detector.detect_multilingual( 494 | text, low_memory=low_memory, k=k, threshold=threshold 495 | ) 496 | 497 | # Use default detector 498 | return _default_detector.detect_multilingual( 499 | text, low_memory=low_memory, k=k, threshold=threshold 500 | ) 501 | -------------------------------------------------------------------------------- /src/fast_langdetect/resources/NOTICE.MD: -------------------------------------------------------------------------------- 1 | # License Notice 2 | 3 | ## Files `fast_langdetect/ft_detect/resources/lid.176.ftz` 4 | 5 | The models are distributed under 6 | the [Creative Commons Attribution-Share-Alike License 3.0](https://creativecommons.org/licenses/by-sa/3.0/). 7 | 8 | ## References 9 | 10 | https://fasttext.cc/docs/en/language-identification.html 11 | https://creativecommons.org/licenses/by-sa/3.0/ -------------------------------------------------------------------------------- /src/fast_langdetect/resources/lid.176.ftz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LlmKira/fast-langdetect/f4fc0321fa0aea7a1b9c9797cfcf800ef18cf62d/src/fast_langdetect/resources/lid.176.ftz -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | def pytest_configure(config): 4 | """注册自定义标记。""" 5 | config.addinivalue_line( 6 | "markers", 7 | "slow: Run in long progress" 8 | ) 9 | config.addinivalue_line( 10 | "markers", 11 | "real: Test with real model" 12 | ) -------------------------------------------------------------------------------- /tests/test_chinese_path.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | import shutil 4 | import platform 5 | from pathlib import Path 6 | import pytest 7 | from fast_langdetect.infer import LangDetectConfig, LangDetector 8 | 9 | @pytest.mark.skipif(platform.system() != "Windows", reason="Windows path test") 10 | def test_model_loading_with_chinese_path(): 11 | # 创建包含中文字符的临时目录 12 | temp_dir = Path(tempfile.gettempdir()) / "测试_模型_路径" 13 | os.makedirs(temp_dir, exist_ok=True) 14 | 15 | try: 16 | # 使用项目中已有的模型文件 17 | # 查找项目根目录 18 | project_root = Path(__file__).parent.parent 19 | model_path = project_root / "src" / "fast_langdetect" / "resources" / "lid.176.ftz" 20 | 21 | if not model_path.exists(): 22 | pytest.skip(f"Model file does not exist: {model_path}") 23 | 24 | # 复制模型文件到中文路径 25 | chinese_model_path = temp_dir / "测试模型.ftz" 26 | shutil.copy2(model_path, chinese_model_path) 27 | 28 | # 正确使用自定义模型路径 29 | config = LangDetectConfig( 30 | custom_model_path=str(chinese_model_path), 31 | allow_fallback=False 32 | ) 33 | detector = LangDetector(config) 34 | result = detector.detect("This is a test") 35 | 36 | assert "lang" in result 37 | assert "score" in result 38 | finally: 39 | # 清理 40 | shutil.rmtree(temp_dir, ignore_errors=True) -------------------------------------------------------------------------------- /tests/test_detect.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2024/1/17 下午5:28 3 | 4 | def test_muti_detect(): 5 | from fast_langdetect import detect_multilingual,LangDetectConfig 6 | result = detect_multilingual( 7 | "hello world", 8 | low_memory=True, 9 | config=LangDetectConfig(allow_fallback=False) 10 | ) 11 | assert result[0].get("lang") == "en", "ft_detect error" 12 | return True 13 | 14 | 15 | def test_large(): 16 | from fast_langdetect import detect_multilingual, LangDetectConfig 17 | result = detect_multilingual("hello world", low_memory=True, config=LangDetectConfig(allow_fallback=False)) 18 | assert result[0].get("lang") == "en", "ft_detect error" 19 | result = detect_multilingual("你好世界", low_memory=False, config=LangDetectConfig(allow_fallback=False)) 20 | assert result[0].get("lang") == "zh", "ft_detect error" 21 | 22 | 23 | def test_detect(): 24 | from fast_langdetect import detect, LangDetectConfig 25 | assert detect("hello world", low_memory=False, config=LangDetectConfig(allow_fallback=False))["lang"] == "en", "ft_detect error" 26 | assert detect("你好世界", low_memory=True, config=LangDetectConfig(allow_fallback=False))["lang"] == "zh", "ft_detect error" 27 | assert detect("こんにちは世界", low_memory=False, config=LangDetectConfig(allow_fallback=False))["lang"] == "ja", "ft_detect error" 28 | assert detect("안녕하세요 세계", low_memory=True, config=LangDetectConfig(allow_fallback=False))["lang"] == "ko", "ft_detect error" 29 | assert detect("Bonjour le monde", low_memory=False, config=LangDetectConfig(allow_fallback=False))["lang"] == "fr", "ft_detect error" 30 | 31 | 32 | def test_detect_totally(): 33 | from fast_langdetect import detect_language 34 | assert detect_language("hello world") == "EN", "ft_detect error" 35 | assert detect_language("你好世界") == "ZH", "ft_detect error" 36 | assert detect_language("こんにちは世界") == "JA", "ft_detect error" 37 | assert detect_language("안녕하세요 세계") == "KO", "ft_detect error" 38 | assert detect_language("Bonjour le monde") == "FR", "ft_detect error" 39 | assert detect_language("Hallo Welt") == "DE", "ft_detect error" 40 | assert detect_language( 41 | "這些機構主辦的課程,多以基本電腦使用為主,例如文書處理、中文輸入、互聯網應用等" 42 | ) == "ZH", "ft_detect error" 43 | 44 | 45 | def test_failed_example(): 46 | from fast_langdetect import detect 47 | try: 48 | detect("hello world\nNEW LINE", low_memory=True) 49 | except Exception as e: 50 | assert isinstance(e, Exception), "ft_detect exception error" 51 | -------------------------------------------------------------------------------- /tests/test_real_detection.py: -------------------------------------------------------------------------------- 1 | """Real environment tests for language detection.""" 2 | 3 | import pytest 4 | from fast_langdetect import ( 5 | detect, 6 | detect_multilingual, 7 | LangDetector, 8 | LangDetectConfig, 9 | DetectError, 10 | ) 11 | 12 | # Test samples with known languages 13 | SAMPLES = [ 14 | ("Hello world", "en"), 15 | ("你好世界", "zh"), 16 | ("こんにちは世界", "ja"), 17 | ("Привет мир", "ru"), 18 | ("안녕하세요 세계", "ko"), 19 | ] 20 | 21 | # Mixed language samples 22 | MIXED_SAMPLES = [ 23 | "Hello 世界 こんにちは", # en-zh-ja 24 | "你好 world こんにちは", # zh-en-ja 25 | "Bonjour 世界 hello", # fr-zh-en 26 | ] 27 | 28 | 29 | @pytest.mark.real 30 | class TestRealDetection: 31 | """Test language detection with real FastText models.""" 32 | 33 | @pytest.mark.parametrize("text,expected", SAMPLES) 34 | def test_basic_detection(self, text, expected): 35 | """Test basic language detection for various languages.""" 36 | result = detect(text) 37 | print(result) 38 | assert result["lang"] == expected 39 | assert 0.1 <= result["score"] <= 1.0 40 | 41 | def test_multilingual_detection(self): 42 | """Test multilingual detection with mixed language text.""" 43 | for text in MIXED_SAMPLES: 44 | results = detect_multilingual(text, k=3) 45 | assert len(results) == 3 46 | # 验证结果是按置信度排序的 47 | assert all( 48 | results[i]["score"] >= results[i + 1]["score"] 49 | for i in range(len(results) - 1) 50 | ) 51 | 52 | def test_low_memory_mode(self): 53 | """Test detection works in low memory mode.""" 54 | for text, expected in SAMPLES: 55 | result = detect(text, low_memory=True) 56 | assert result["lang"] == expected 57 | 58 | def test_strict_mode(self): 59 | """Test detection in strict mode.""" 60 | result = detect(SAMPLES[0][0], use_strict_mode=True) 61 | assert result["lang"] == SAMPLES[0][1] 62 | 63 | def test_long_text(self): 64 | """Test detection with longer text.""" 65 | long_text = " ".join([text for text, _ in SAMPLES]) 66 | result = detect(long_text) 67 | assert "lang" in result 68 | assert "score" in result 69 | 70 | def test_very_short_text(self): 71 | """Test detection with very short text.""" 72 | result = detect("Hi") 73 | assert "lang" in result 74 | assert "score" in result 75 | 76 | def test_custom_config(self): 77 | """Test detection with custom configuration.""" 78 | config = LangDetectConfig(allow_fallback=False) 79 | detector = LangDetector(config) 80 | result = detector.detect(SAMPLES[0][0]) 81 | assert result["lang"] == SAMPLES[0][1] 82 | 83 | def test_not_found_model(self): 84 | """Test fallback to small model when large model fails to load.""" 85 | # 创建一个配置,指定一个不存在的大模型路径 86 | 87 | with pytest.raises(FileNotFoundError): 88 | config = LangDetectConfig( 89 | cache_dir="/nonexistent/path", 90 | custom_model_path="invalid_path", 91 | allow_fallback=True, 92 | ) 93 | detector = LangDetector(config) 94 | detector.detect("Hello world", low_memory=False) 95 | 96 | def test_not_found_model_with_fallback(self): 97 | """Test fallback to small model when large model fails to load.""" 98 | config = LangDetectConfig( 99 | cache_dir="/nonexistent/path", 100 | allow_fallback=True, 101 | ) 102 | detector = LangDetector(config) 103 | result = detector.detect("Hello world", low_memory=False) 104 | assert result["lang"] == "en" 105 | assert 0.1 <= result["score"] <= 1.0 106 | 107 | @pytest.mark.real 108 | @pytest.mark.slow 109 | class TestEdgeCases: 110 | """Test language detection edge cases with real models.""" 111 | 112 | def test_empty_string(self): 113 | """Test detection with empty string.""" 114 | result = detect("") 115 | assert "lang" in result 116 | assert "score" in result 117 | 118 | def test_special_characters(self): 119 | """Test detection with special characters.""" 120 | texts = [ 121 | "Hello! @#$%^&*()", 122 | "你好!@#¥%……&*()", 123 | "こんにちは!@#$%^&*()", 124 | ] 125 | for text in texts: 126 | result = detect(text) 127 | assert "lang" in result 128 | assert "score" in result 129 | 130 | def test_numbers_only(self): 131 | """Test detection with numbers only.""" 132 | result = detect("12345") 133 | assert "lang" in result 134 | assert "score" in result 135 | 136 | def test_mixed_scripts(self): 137 | """Test detection with mixed scripts.""" 138 | mixed_texts = [ 139 | "Hello你好こんにちは", 140 | "12345 Hello 你好", 141 | "Hello! 你好! こんにちは!", 142 | ] 143 | for text in mixed_texts: 144 | results = detect_multilingual(text, k=3) 145 | assert len(results) == 3 146 | --------------------------------------------------------------------------------