├── .gitignore ├── MANIFEST.in ├── README.md ├── african-stopwords ├── LANGUAGE-TABLE.md ├── Masakhane_Stopword_Extraction_and_Validation.ipynb ├── README.md ├── extract.py ├── languages │ ├── README.md │ ├── af.txt │ ├── ha.txt │ ├── pcm.txt │ ├── rn.txt │ ├── rw.txt │ ├── so.txt │ ├── st.txt │ ├── sw.txt │ ├── yo.txt │ └── zu.txt └── stage2 │ ├── README.md │ ├── code.md │ ├── eval.md │ ├── sourcing.md │ └── stopwords-2.ipynb ├── masakhanePreprocessor ├── __init__.py ├── langs.py ├── languages.json ├── preprocess.py └── rules │ ├── README.md │ ├── fon.json │ ├── ig.json │ └── yo.json ├── requirements.txt ├── setup.cfg └── setup.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/.gitignore -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include masakhanePreprocessor *.json -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/README.md -------------------------------------------------------------------------------- /african-stopwords/LANGUAGE-TABLE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/african-stopwords/LANGUAGE-TABLE.md -------------------------------------------------------------------------------- /african-stopwords/Masakhane_Stopword_Extraction_and_Validation.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/african-stopwords/Masakhane_Stopword_Extraction_and_Validation.ipynb -------------------------------------------------------------------------------- /african-stopwords/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/african-stopwords/README.md -------------------------------------------------------------------------------- /african-stopwords/extract.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/african-stopwords/extract.py -------------------------------------------------------------------------------- /african-stopwords/languages/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/african-stopwords/languages/README.md -------------------------------------------------------------------------------- /african-stopwords/languages/af.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/african-stopwords/languages/af.txt -------------------------------------------------------------------------------- /african-stopwords/languages/ha.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/african-stopwords/languages/ha.txt -------------------------------------------------------------------------------- /african-stopwords/languages/pcm.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/african-stopwords/languages/pcm.txt -------------------------------------------------------------------------------- /african-stopwords/languages/rn.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/african-stopwords/languages/rn.txt -------------------------------------------------------------------------------- /african-stopwords/languages/rw.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/african-stopwords/languages/rw.txt -------------------------------------------------------------------------------- /african-stopwords/languages/so.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/african-stopwords/languages/so.txt -------------------------------------------------------------------------------- /african-stopwords/languages/st.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/african-stopwords/languages/st.txt -------------------------------------------------------------------------------- /african-stopwords/languages/sw.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/african-stopwords/languages/sw.txt -------------------------------------------------------------------------------- /african-stopwords/languages/yo.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/african-stopwords/languages/yo.txt -------------------------------------------------------------------------------- /african-stopwords/languages/zu.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/african-stopwords/languages/zu.txt -------------------------------------------------------------------------------- /african-stopwords/stage2/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/african-stopwords/stage2/README.md -------------------------------------------------------------------------------- /african-stopwords/stage2/code.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /african-stopwords/stage2/eval.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /african-stopwords/stage2/sourcing.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/african-stopwords/stage2/sourcing.md -------------------------------------------------------------------------------- /african-stopwords/stage2/stopwords-2.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/african-stopwords/stage2/stopwords-2.ipynb -------------------------------------------------------------------------------- /masakhanePreprocessor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/masakhanePreprocessor/__init__.py -------------------------------------------------------------------------------- /masakhanePreprocessor/langs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/masakhanePreprocessor/langs.py -------------------------------------------------------------------------------- /masakhanePreprocessor/languages.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/masakhanePreprocessor/languages.json -------------------------------------------------------------------------------- /masakhanePreprocessor/preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/masakhanePreprocessor/preprocess.py -------------------------------------------------------------------------------- /masakhanePreprocessor/rules/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/masakhanePreprocessor/rules/README.md -------------------------------------------------------------------------------- /masakhanePreprocessor/rules/fon.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/masakhanePreprocessor/rules/fon.json -------------------------------------------------------------------------------- /masakhanePreprocessor/rules/ig.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/masakhanePreprocessor/rules/ig.json -------------------------------------------------------------------------------- /masakhanePreprocessor/rules/yo.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/masakhanePreprocessor/rules/yo.json -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | clean-text 2 | langcodes 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/setup.cfg -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masakhane-io/masakhanePreprocessor/HEAD/setup.py --------------------------------------------------------------------------------