├── LICENSE ├── README.md ├── contrib ├── README.md ├── correct_moses_tokenizer.py ├── hindi_to_kannada_transliterator.py └── indic_scraper_project_sample.ipynb ├── docs ├── Makefile ├── cmd.rst ├── code.rst ├── conf.py ├── index.rst ├── indicnlp.MD ├── indicnlp.cli.rst ├── indicnlp.morph.rst ├── indicnlp.normalize.rst ├── indicnlp.pdf ├── indicnlp.rst ├── indicnlp.script.rst ├── indicnlp.syllable.rst ├── indicnlp.tokenize.rst ├── indicnlp.transliterate.rst ├── make.bat └── modules.rst ├── indicnlp ├── __init__.py ├── cli │ ├── __init__.py │ └── cliparser.py ├── common.py ├── langinfo.py ├── loader.py ├── morph │ ├── __init__.py │ └── unsupervised_morph.py ├── normalize │ ├── __init__.py │ └── indic_normalize.py ├── script │ ├── __init__.py │ ├── english_script.py │ ├── indic_scripts.py │ └── phonetic_sim.py ├── syllable │ ├── __init__.py │ └── syllabifier.py ├── test │ ├── __init__.py │ └── unit │ │ └── __init__.py ├── tokenize │ ├── __init__.py │ ├── indic_detokenize.py │ ├── indic_tokenize.py │ └── sentence_tokenize.py ├── transliterate │ ├── __init__.py │ ├── acronym_transliterator.py │ ├── script_unifier.py │ ├── sinhala_transliterator.py │ └── unicode_transliterate.py └── version.txt ├── requirements.txt ├── setup.py └── test_data ├── morph └── mr.txt ├── normalize ├── bn.txt ├── en.txt ├── gu.txt ├── hi.txt ├── kK.txt ├── ma.txt ├── mr.txt ├── pa.txt ├── ta.txt ├── te.txt └── ur.txt ├── tokenize └── trivial.txt └── transliterate.ipynb /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/README.md -------------------------------------------------------------------------------- /contrib/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/contrib/README.md -------------------------------------------------------------------------------- /contrib/correct_moses_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/contrib/correct_moses_tokenizer.py -------------------------------------------------------------------------------- /contrib/hindi_to_kannada_transliterator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/contrib/hindi_to_kannada_transliterator.py -------------------------------------------------------------------------------- /contrib/indic_scraper_project_sample.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/contrib/indic_scraper_project_sample.ipynb -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/docs/Makefile -------------------------------------------------------------------------------- /docs/cmd.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/docs/cmd.rst -------------------------------------------------------------------------------- /docs/code.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/docs/code.rst -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/docs/conf.py -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/docs/index.rst -------------------------------------------------------------------------------- /docs/indicnlp.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/docs/indicnlp.MD -------------------------------------------------------------------------------- /docs/indicnlp.cli.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/docs/indicnlp.cli.rst -------------------------------------------------------------------------------- /docs/indicnlp.morph.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/docs/indicnlp.morph.rst -------------------------------------------------------------------------------- /docs/indicnlp.normalize.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/docs/indicnlp.normalize.rst -------------------------------------------------------------------------------- /docs/indicnlp.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/docs/indicnlp.pdf -------------------------------------------------------------------------------- /docs/indicnlp.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/docs/indicnlp.rst -------------------------------------------------------------------------------- /docs/indicnlp.script.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/docs/indicnlp.script.rst -------------------------------------------------------------------------------- /docs/indicnlp.syllable.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/docs/indicnlp.syllable.rst -------------------------------------------------------------------------------- /docs/indicnlp.tokenize.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/docs/indicnlp.tokenize.rst -------------------------------------------------------------------------------- /docs/indicnlp.transliterate.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/docs/indicnlp.transliterate.rst -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/docs/make.bat -------------------------------------------------------------------------------- /docs/modules.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/docs/modules.rst -------------------------------------------------------------------------------- /indicnlp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/indicnlp/__init__.py -------------------------------------------------------------------------------- /indicnlp/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /indicnlp/cli/cliparser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/indicnlp/cli/cliparser.py -------------------------------------------------------------------------------- /indicnlp/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/indicnlp/common.py -------------------------------------------------------------------------------- /indicnlp/langinfo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/indicnlp/langinfo.py -------------------------------------------------------------------------------- /indicnlp/loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/indicnlp/loader.py -------------------------------------------------------------------------------- /indicnlp/morph/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /indicnlp/morph/unsupervised_morph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/indicnlp/morph/unsupervised_morph.py -------------------------------------------------------------------------------- /indicnlp/normalize/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /indicnlp/normalize/indic_normalize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/indicnlp/normalize/indic_normalize.py -------------------------------------------------------------------------------- /indicnlp/script/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /indicnlp/script/english_script.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/indicnlp/script/english_script.py -------------------------------------------------------------------------------- /indicnlp/script/indic_scripts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/indicnlp/script/indic_scripts.py -------------------------------------------------------------------------------- /indicnlp/script/phonetic_sim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/indicnlp/script/phonetic_sim.py -------------------------------------------------------------------------------- /indicnlp/syllable/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /indicnlp/syllable/syllabifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/indicnlp/syllable/syllabifier.py -------------------------------------------------------------------------------- /indicnlp/test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /indicnlp/test/unit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /indicnlp/tokenize/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /indicnlp/tokenize/indic_detokenize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/indicnlp/tokenize/indic_detokenize.py -------------------------------------------------------------------------------- /indicnlp/tokenize/indic_tokenize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/indicnlp/tokenize/indic_tokenize.py -------------------------------------------------------------------------------- /indicnlp/tokenize/sentence_tokenize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/indicnlp/tokenize/sentence_tokenize.py -------------------------------------------------------------------------------- /indicnlp/transliterate/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /indicnlp/transliterate/acronym_transliterator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/indicnlp/transliterate/acronym_transliterator.py -------------------------------------------------------------------------------- /indicnlp/transliterate/script_unifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/indicnlp/transliterate/script_unifier.py -------------------------------------------------------------------------------- /indicnlp/transliterate/sinhala_transliterator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/indicnlp/transliterate/sinhala_transliterator.py -------------------------------------------------------------------------------- /indicnlp/transliterate/unicode_transliterate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/indicnlp/transliterate/unicode_transliterate.py -------------------------------------------------------------------------------- /indicnlp/version.txt: -------------------------------------------------------------------------------- 1 | 0.92 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/setup.py -------------------------------------------------------------------------------- /test_data/morph/mr.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/test_data/morph/mr.txt -------------------------------------------------------------------------------- /test_data/normalize/bn.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/test_data/normalize/bn.txt -------------------------------------------------------------------------------- /test_data/normalize/en.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/test_data/normalize/en.txt -------------------------------------------------------------------------------- /test_data/normalize/gu.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/test_data/normalize/gu.txt -------------------------------------------------------------------------------- /test_data/normalize/hi.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/test_data/normalize/hi.txt -------------------------------------------------------------------------------- /test_data/normalize/kK.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/test_data/normalize/kK.txt -------------------------------------------------------------------------------- /test_data/normalize/ma.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/test_data/normalize/ma.txt -------------------------------------------------------------------------------- /test_data/normalize/mr.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/test_data/normalize/mr.txt -------------------------------------------------------------------------------- /test_data/normalize/pa.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/test_data/normalize/pa.txt -------------------------------------------------------------------------------- /test_data/normalize/ta.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/test_data/normalize/ta.txt -------------------------------------------------------------------------------- /test_data/normalize/te.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/test_data/normalize/te.txt -------------------------------------------------------------------------------- /test_data/normalize/ur.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/test_data/normalize/ur.txt -------------------------------------------------------------------------------- /test_data/tokenize/trivial.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/test_data/tokenize/trivial.txt -------------------------------------------------------------------------------- /test_data/transliterate.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anoopkunchukuttan/indic_nlp_library/HEAD/test_data/transliterate.ipynb --------------------------------------------------------------------------------