├── .appveyor.yml ├── .github └── workflows │ └── test.yaml ├── CONTRIBUTORS.md ├── LICENSE ├── README.md ├── SECURITY.md ├── requirements.txt ├── sacremoses ├── __init__.py ├── __main__.py ├── chinese.py ├── cli.py ├── corpus.py ├── data │ ├── nonbreaking_prefixes │ │ ├── README.txt │ │ ├── nonbreaking_prefix.as │ │ ├── nonbreaking_prefix.bn │ │ ├── nonbreaking_prefix.ca │ │ ├── nonbreaking_prefix.cs │ │ ├── nonbreaking_prefix.de │ │ ├── nonbreaking_prefix.el │ │ ├── nonbreaking_prefix.en │ │ ├── nonbreaking_prefix.es │ │ ├── nonbreaking_prefix.et │ │ ├── nonbreaking_prefix.fi │ │ ├── nonbreaking_prefix.fr │ │ ├── nonbreaking_prefix.ga │ │ ├── nonbreaking_prefix.gu │ │ ├── nonbreaking_prefix.hi │ │ ├── nonbreaking_prefix.hu │ │ ├── nonbreaking_prefix.is │ │ ├── nonbreaking_prefix.it │ │ ├── nonbreaking_prefix.kn │ │ ├── nonbreaking_prefix.lt │ │ ├── nonbreaking_prefix.lv │ │ ├── nonbreaking_prefix.ml │ │ ├── nonbreaking_prefix.mni │ │ ├── nonbreaking_prefix.mr │ │ ├── nonbreaking_prefix.nl │ │ ├── nonbreaking_prefix.or │ │ ├── nonbreaking_prefix.pa │ │ ├── nonbreaking_prefix.pl │ │ ├── nonbreaking_prefix.pt │ │ ├── nonbreaking_prefix.ro │ │ ├── nonbreaking_prefix.ru │ │ ├── nonbreaking_prefix.sk │ │ ├── nonbreaking_prefix.sl │ │ ├── nonbreaking_prefix.sv │ │ ├── nonbreaking_prefix.ta │ │ ├── nonbreaking_prefix.tdt │ │ ├── nonbreaking_prefix.te │ │ ├── nonbreaking_prefix.yue │ │ └── nonbreaking_prefix.zh │ └── perluniprops │ │ ├── CJK.txt │ │ ├── CJKSymbols.txt │ │ ├── Close_Punctuation.txt │ │ ├── Currency_Symbol.txt │ │ ├── Han.txt │ │ ├── Hangul.txt │ │ ├── Hangul_Syllables.txt │ │ ├── Hiragana.txt │ │ ├── IsAlnum-unichars-au.txt │ │ ├── IsAlnum.txt │ │ ├── IsAlpha-unichars-au.txt │ │ ├── IsAlpha.txt │ │ ├── IsLower.txt │ │ ├── IsN.txt │ │ ├── IsPf.txt │ │ ├── IsPi.txt │ │ ├── IsSc.txt │ │ ├── IsSo.txt │ │ ├── IsUpper.txt │ │ ├── Katakana.txt │ │ ├── Line_Separator.txt │ │ ├── Lowercase_Letter.txt │ │ ├── Number.txt │ │ ├── Open_Punctuation.txt │ │ ├── Punctuation.txt │ │ ├── Separator.txt │ │ ├── Symbol.txt │ │ ├── Titlecase_Letter.txt │ │ └── Uppercase_Letter.txt ├── indic.py ├── normalize.py ├── sent_tokenize.py ├── subwords.py ├── test │ ├── test_corpus.py │ ├── test_no_redos_has_numeric_only.py │ ├── test_normalizer.py │ ├── test_tokenizer.py │ └── test_truecaser.py ├── tokenize.py ├── truecase.py └── util.py └── setup.py /.appveyor.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/.appveyor.yml -------------------------------------------------------------------------------- /.github/workflows/test.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/.github/workflows/test.yaml -------------------------------------------------------------------------------- /CONTRIBUTORS.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/CONTRIBUTORS.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/README.md -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/SECURITY.md -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/requirements.txt -------------------------------------------------------------------------------- /sacremoses/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/__init__.py -------------------------------------------------------------------------------- /sacremoses/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/__main__.py -------------------------------------------------------------------------------- /sacremoses/chinese.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/chinese.py -------------------------------------------------------------------------------- /sacremoses/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/cli.py -------------------------------------------------------------------------------- /sacremoses/corpus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/corpus.py -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/README.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/README.txt -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.as: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.as -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.bn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.bn -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.ca: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.ca -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.cs -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.de: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.de -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.el: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.el -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.en: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.en -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.es: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.es -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.et: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.et -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.fi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.fi -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.fr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.fr -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.ga: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.ga -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.gu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.gu -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.hi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.hi -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.hu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.hu -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.is: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.is -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.it: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.it -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.kn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.kn -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.lt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.lt -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.lv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.lv -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.ml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.ml -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.mni: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.mni -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.mr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.mr -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.nl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.nl -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.or: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.or -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.pa: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.pa -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.pl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.pl -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.pt -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.ro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.ro -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.ru: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.ru -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.sk -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.sl -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.sv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.sv -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.ta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.ta -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.tdt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.tdt -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.te: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.te -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.yue: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.yue -------------------------------------------------------------------------------- /sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.zh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.zh -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/CJK.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/CJK.txt -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/CJKSymbols.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/CJKSymbols.txt -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/Close_Punctuation.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/Close_Punctuation.txt -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/Currency_Symbol.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/Currency_Symbol.txt -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/Han.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/Han.txt -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/Hangul.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/Hangul.txt -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/Hangul_Syllables.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/Hangul_Syllables.txt -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/Hiragana.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/Hiragana.txt -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/IsAlnum-unichars-au.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/IsAlnum-unichars-au.txt -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/IsAlnum.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/IsAlnum.txt -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/IsAlpha-unichars-au.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/IsAlpha-unichars-au.txt -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/IsAlpha.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/IsAlpha.txt -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/IsLower.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/IsLower.txt -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/IsN.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/IsN.txt -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/IsPf.txt: -------------------------------------------------------------------------------- 1 | »’”›⸃⸅⸊⸍⸝⸡ -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/IsPi.txt: -------------------------------------------------------------------------------- 1 | «‘‛“‟‹⸂⸄⸉⸌⸜⸠ -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/IsSc.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/IsSc.txt -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/IsSo.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/IsSo.txt -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/IsUpper.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/IsUpper.txt -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/Katakana.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/Katakana.txt -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/Line_Separator.txt: -------------------------------------------------------------------------------- 1 | ---- 2 | -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/Lowercase_Letter.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/Lowercase_Letter.txt -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/Number.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/Number.txt -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/Open_Punctuation.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/Open_Punctuation.txt -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/Punctuation.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/Punctuation.txt -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/Separator.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/Separator.txt -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/Symbol.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/Symbol.txt -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/Titlecase_Letter.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/Titlecase_Letter.txt -------------------------------------------------------------------------------- /sacremoses/data/perluniprops/Uppercase_Letter.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/data/perluniprops/Uppercase_Letter.txt -------------------------------------------------------------------------------- /sacremoses/indic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/indic.py -------------------------------------------------------------------------------- /sacremoses/normalize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/normalize.py -------------------------------------------------------------------------------- /sacremoses/sent_tokenize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/sent_tokenize.py -------------------------------------------------------------------------------- /sacremoses/subwords.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/subwords.py -------------------------------------------------------------------------------- /sacremoses/test/test_corpus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/test/test_corpus.py -------------------------------------------------------------------------------- /sacremoses/test/test_no_redos_has_numeric_only.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/test/test_no_redos_has_numeric_only.py -------------------------------------------------------------------------------- /sacremoses/test/test_normalizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/test/test_normalizer.py -------------------------------------------------------------------------------- /sacremoses/test/test_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/test/test_tokenizer.py -------------------------------------------------------------------------------- /sacremoses/test/test_truecaser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/test/test_truecaser.py -------------------------------------------------------------------------------- /sacremoses/tokenize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/tokenize.py -------------------------------------------------------------------------------- /sacremoses/truecase.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/truecase.py -------------------------------------------------------------------------------- /sacremoses/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/sacremoses/util.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hplt-project/sacremoses/HEAD/setup.py --------------------------------------------------------------------------------