├── .github └── workflows │ └── pythonpackage.yml ├── .gitignore ├── .isort.cfg ├── .pre-commit-config.yaml ├── .python-version ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── conftest.py ├── experiments ├── baseline.yaml ├── networks │ └── relu_small.py └── tfidf_small.yaml ├── lidtk ├── __init__.py ├── analysis │ ├── __init__.py │ ├── example.pdf │ └── unicode_block.py ├── classifiers │ ├── __init__.py │ ├── char_distribution │ │ ├── __init__.py │ │ └── char_dist_metric_train_test.py │ ├── char_features.py │ ├── cld2_mod.py │ ├── config │ │ ├── cld2.yaml │ │ ├── google-cloud.yaml │ │ ├── langdetect.yaml │ │ ├── langid.yaml │ │ ├── textcat.yaml │ │ ├── tfidf_nn.yaml │ │ ├── tfidf_nn_big.yaml │ │ └── tfidf_nn_case_sensitive.yaml │ ├── google_mod.py │ ├── langdetect_mod.py │ ├── langid_mod.py │ ├── mlp.py │ ├── nn │ │ └── __init__.py │ ├── text_cat.py │ ├── tfidf_features.py │ └── tfidf_nn.py ├── cli.py ├── config.yaml ├── data │ ├── __init__.py │ ├── char_distribution.py │ ├── create_ml_dataset.py │ ├── download_documents.py │ ├── language_utils.py │ ├── pickle_to_txt.py │ └── wili.py ├── features.py ├── get_predictions.py ├── languages.csv ├── models │ ├── __init__.py │ ├── detectlanguage.csv │ ├── service_cld2.py │ └── service_detectlanguage.py └── utils.py ├── requirements ├── ci.in ├── ci.txt ├── dev.in ├── dev.txt ├── prod.in └── prod.txt ├── setup.cfg ├── setup.py ├── tests ├── test_char_dist.py ├── test_classifiers.py ├── test_download_documents.py ├── test_language_utils.py └── test_utils.py └── tox.ini /.github/workflows/pythonpackage.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/.github/workflows/pythonpackage.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/.gitignore -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/.isort.cfg -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.8.3 2 | 3.7.7 3 | 3.6.10 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/.travis.yml -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/README.md -------------------------------------------------------------------------------- /conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/conftest.py -------------------------------------------------------------------------------- /experiments/baseline.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/experiments/baseline.yaml -------------------------------------------------------------------------------- /experiments/networks/relu_small.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/experiments/networks/relu_small.py -------------------------------------------------------------------------------- /experiments/tfidf_small.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/experiments/tfidf_small.yaml -------------------------------------------------------------------------------- /lidtk/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/__init__.py -------------------------------------------------------------------------------- /lidtk/analysis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/analysis/__init__.py -------------------------------------------------------------------------------- /lidtk/analysis/example.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/analysis/example.pdf -------------------------------------------------------------------------------- /lidtk/analysis/unicode_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/analysis/unicode_block.py -------------------------------------------------------------------------------- /lidtk/classifiers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/classifiers/__init__.py -------------------------------------------------------------------------------- /lidtk/classifiers/char_distribution/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lidtk/classifiers/char_distribution/char_dist_metric_train_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/classifiers/char_distribution/char_dist_metric_train_test.py -------------------------------------------------------------------------------- /lidtk/classifiers/char_features.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/classifiers/char_features.py -------------------------------------------------------------------------------- /lidtk/classifiers/cld2_mod.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/classifiers/cld2_mod.py -------------------------------------------------------------------------------- /lidtk/classifiers/config/cld2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/classifiers/config/cld2.yaml -------------------------------------------------------------------------------- /lidtk/classifiers/config/google-cloud.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/classifiers/config/google-cloud.yaml -------------------------------------------------------------------------------- /lidtk/classifiers/config/langdetect.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/classifiers/config/langdetect.yaml -------------------------------------------------------------------------------- /lidtk/classifiers/config/langid.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/classifiers/config/langid.yaml -------------------------------------------------------------------------------- /lidtk/classifiers/config/textcat.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/classifiers/config/textcat.yaml -------------------------------------------------------------------------------- /lidtk/classifiers/config/tfidf_nn.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/classifiers/config/tfidf_nn.yaml -------------------------------------------------------------------------------- /lidtk/classifiers/config/tfidf_nn_big.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/classifiers/config/tfidf_nn_big.yaml -------------------------------------------------------------------------------- /lidtk/classifiers/config/tfidf_nn_case_sensitive.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/classifiers/config/tfidf_nn_case_sensitive.yaml -------------------------------------------------------------------------------- /lidtk/classifiers/google_mod.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/classifiers/google_mod.py -------------------------------------------------------------------------------- /lidtk/classifiers/langdetect_mod.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/classifiers/langdetect_mod.py -------------------------------------------------------------------------------- /lidtk/classifiers/langid_mod.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/classifiers/langid_mod.py -------------------------------------------------------------------------------- /lidtk/classifiers/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/classifiers/mlp.py -------------------------------------------------------------------------------- /lidtk/classifiers/nn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/classifiers/nn/__init__.py -------------------------------------------------------------------------------- /lidtk/classifiers/text_cat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/classifiers/text_cat.py -------------------------------------------------------------------------------- /lidtk/classifiers/tfidf_features.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/classifiers/tfidf_features.py -------------------------------------------------------------------------------- /lidtk/classifiers/tfidf_nn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/classifiers/tfidf_nn.py -------------------------------------------------------------------------------- /lidtk/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/cli.py -------------------------------------------------------------------------------- /lidtk/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/config.yaml -------------------------------------------------------------------------------- /lidtk/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lidtk/data/char_distribution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/data/char_distribution.py -------------------------------------------------------------------------------- /lidtk/data/create_ml_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/data/create_ml_dataset.py -------------------------------------------------------------------------------- /lidtk/data/download_documents.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/data/download_documents.py -------------------------------------------------------------------------------- /lidtk/data/language_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/data/language_utils.py -------------------------------------------------------------------------------- /lidtk/data/pickle_to_txt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/data/pickle_to_txt.py -------------------------------------------------------------------------------- /lidtk/data/wili.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/data/wili.py -------------------------------------------------------------------------------- /lidtk/features.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/features.py -------------------------------------------------------------------------------- /lidtk/get_predictions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/get_predictions.py -------------------------------------------------------------------------------- /lidtk/languages.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/languages.csv -------------------------------------------------------------------------------- /lidtk/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lidtk/models/detectlanguage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/models/detectlanguage.csv -------------------------------------------------------------------------------- /lidtk/models/service_cld2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/models/service_cld2.py -------------------------------------------------------------------------------- /lidtk/models/service_detectlanguage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/models/service_detectlanguage.py -------------------------------------------------------------------------------- /lidtk/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/lidtk/utils.py -------------------------------------------------------------------------------- /requirements/ci.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/requirements/ci.in -------------------------------------------------------------------------------- /requirements/ci.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/requirements/ci.txt -------------------------------------------------------------------------------- /requirements/dev.in: -------------------------------------------------------------------------------- 1 | pip-tools 2 | pydocstyle 3 | twine 4 | wheel 5 | -------------------------------------------------------------------------------- /requirements/dev.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/requirements/dev.txt -------------------------------------------------------------------------------- /requirements/prod.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/requirements/prod.in -------------------------------------------------------------------------------- /requirements/prod.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/requirements/prod.txt -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/setup.cfg -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/setup.py -------------------------------------------------------------------------------- /tests/test_char_dist.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/tests/test_char_dist.py -------------------------------------------------------------------------------- /tests/test_classifiers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/tests/test_classifiers.py -------------------------------------------------------------------------------- /tests/test_download_documents.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/tests/test_download_documents.py -------------------------------------------------------------------------------- /tests/test_language_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/tests/test_language_utils.py -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/tests/test_utils.py -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/lidtk/HEAD/tox.ini --------------------------------------------------------------------------------