├── .gitignore ├── Hinglish-Logo.png ├── LICENSE ├── ML Baselines └── NB-SVM_and_LR-TF-IDF.ipynb ├── README.md ├── Transformers └── Transformers.ipynb ├── ULMFiT ├── SentencePieceTokenizer.ipynb └── ULMFIT.ipynb ├── data ├── cleanlab │ ├── errorsBERT.txt │ ├── errorsDistilBert.txt │ ├── errorsRoberta.txt │ └── errorsULMFiT.txt ├── interim │ ├── all_hinglish_data.txt │ ├── final_test.json │ ├── hinglish_sp.model │ ├── hinglish_sp.vocab │ ├── hinglish_unsup_high_confidence.json │ ├── hinglish_unsup_less_confidence.json │ ├── test.json │ ├── train-large.json │ ├── train.json │ └── valid.json ├── processed │ ├── TestDistilBert.csv │ ├── TestRoberta.csv │ ├── TestUlmfit.csv │ ├── ValidBert.csv │ ├── ValidDistilBert.csv │ ├── ValidRoberta.csv │ ├── ValidUlmfit.csv │ ├── bertOutput.csv │ ├── distilBertOutput.csv │ ├── robertaOutput.csv │ ├── test.json │ ├── train-large.json │ ├── train.json │ └── ulmfitOutput.csv └── raw │ ├── dev_3k_split_conll.txt │ ├── distilBertOutput.csv │ ├── test.txt │ ├── train.json │ ├── train.txt │ ├── trial.json │ ├── trial.txt │ └── tweets_hate_speech.tsv ├── hinglish.py ├── hinglishutils.py ├── misc ├── CleanlabDistilbert.ipynb ├── DetectOtherLanguages.ipynb ├── MajorityVoting.ipynb ├── NoisyLabels.ipynb ├── TweetMining.ipynb ├── data_tfms.ipynb └── download.ipynb ├── nb-stripout.py ├── requirements.txt ├── run_language_modeling.py ├── sweep.yaml └── train.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/.gitignore -------------------------------------------------------------------------------- /Hinglish-Logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/Hinglish-Logo.png -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/LICENSE -------------------------------------------------------------------------------- /ML Baselines/NB-SVM_and_LR-TF-IDF.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/ML Baselines/NB-SVM_and_LR-TF-IDF.ipynb -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/README.md -------------------------------------------------------------------------------- /Transformers/Transformers.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/Transformers/Transformers.ipynb -------------------------------------------------------------------------------- /ULMFiT/SentencePieceTokenizer.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/ULMFiT/SentencePieceTokenizer.ipynb -------------------------------------------------------------------------------- /ULMFiT/ULMFIT.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/ULMFiT/ULMFIT.ipynb -------------------------------------------------------------------------------- /data/cleanlab/errorsBERT.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/cleanlab/errorsBERT.txt -------------------------------------------------------------------------------- /data/cleanlab/errorsDistilBert.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/cleanlab/errorsDistilBert.txt -------------------------------------------------------------------------------- /data/cleanlab/errorsRoberta.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/cleanlab/errorsRoberta.txt -------------------------------------------------------------------------------- /data/cleanlab/errorsULMFiT.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/cleanlab/errorsULMFiT.txt -------------------------------------------------------------------------------- /data/interim/all_hinglish_data.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/interim/all_hinglish_data.txt -------------------------------------------------------------------------------- /data/interim/final_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/interim/final_test.json -------------------------------------------------------------------------------- /data/interim/hinglish_sp.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/interim/hinglish_sp.model -------------------------------------------------------------------------------- /data/interim/hinglish_sp.vocab: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/interim/hinglish_sp.vocab -------------------------------------------------------------------------------- /data/interim/hinglish_unsup_high_confidence.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/interim/hinglish_unsup_high_confidence.json -------------------------------------------------------------------------------- /data/interim/hinglish_unsup_less_confidence.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/interim/hinglish_unsup_less_confidence.json -------------------------------------------------------------------------------- /data/interim/test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/interim/test.json -------------------------------------------------------------------------------- /data/interim/train-large.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/interim/train-large.json -------------------------------------------------------------------------------- /data/interim/train.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/interim/train.json -------------------------------------------------------------------------------- /data/interim/valid.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/interim/valid.json -------------------------------------------------------------------------------- /data/processed/TestDistilBert.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/processed/TestDistilBert.csv -------------------------------------------------------------------------------- /data/processed/TestRoberta.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/processed/TestRoberta.csv -------------------------------------------------------------------------------- /data/processed/TestUlmfit.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/processed/TestUlmfit.csv -------------------------------------------------------------------------------- /data/processed/ValidBert.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/processed/ValidBert.csv -------------------------------------------------------------------------------- /data/processed/ValidDistilBert.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/processed/ValidDistilBert.csv -------------------------------------------------------------------------------- /data/processed/ValidRoberta.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/processed/ValidRoberta.csv -------------------------------------------------------------------------------- /data/processed/ValidUlmfit.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/processed/ValidUlmfit.csv -------------------------------------------------------------------------------- /data/processed/bertOutput.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/processed/bertOutput.csv -------------------------------------------------------------------------------- /data/processed/distilBertOutput.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/processed/distilBertOutput.csv -------------------------------------------------------------------------------- /data/processed/robertaOutput.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/processed/robertaOutput.csv -------------------------------------------------------------------------------- /data/processed/test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/processed/test.json -------------------------------------------------------------------------------- /data/processed/train-large.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/processed/train-large.json -------------------------------------------------------------------------------- /data/processed/train.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/processed/train.json -------------------------------------------------------------------------------- /data/processed/ulmfitOutput.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/processed/ulmfitOutput.csv -------------------------------------------------------------------------------- /data/raw/dev_3k_split_conll.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/raw/dev_3k_split_conll.txt -------------------------------------------------------------------------------- /data/raw/distilBertOutput.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/raw/distilBertOutput.csv -------------------------------------------------------------------------------- /data/raw/test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/raw/test.txt -------------------------------------------------------------------------------- /data/raw/train.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/raw/train.json -------------------------------------------------------------------------------- /data/raw/train.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/raw/train.txt -------------------------------------------------------------------------------- /data/raw/trial.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/raw/trial.json -------------------------------------------------------------------------------- /data/raw/trial.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/raw/trial.txt -------------------------------------------------------------------------------- /data/raw/tweets_hate_speech.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/data/raw/tweets_hate_speech.tsv -------------------------------------------------------------------------------- /hinglish.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/hinglish.py -------------------------------------------------------------------------------- /hinglishutils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/hinglishutils.py -------------------------------------------------------------------------------- /misc/CleanlabDistilbert.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/misc/CleanlabDistilbert.ipynb -------------------------------------------------------------------------------- /misc/DetectOtherLanguages.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/misc/DetectOtherLanguages.ipynb -------------------------------------------------------------------------------- /misc/MajorityVoting.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/misc/MajorityVoting.ipynb -------------------------------------------------------------------------------- /misc/NoisyLabels.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/misc/NoisyLabels.ipynb -------------------------------------------------------------------------------- /misc/TweetMining.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/misc/TweetMining.ipynb -------------------------------------------------------------------------------- /misc/data_tfms.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/misc/data_tfms.ipynb -------------------------------------------------------------------------------- /misc/download.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/misc/download.ipynb -------------------------------------------------------------------------------- /nb-stripout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/nb-stripout.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/requirements.txt -------------------------------------------------------------------------------- /run_language_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/run_language_modeling.py -------------------------------------------------------------------------------- /sweep.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/sweep.yaml -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NirantK/Hinglish/HEAD/train.py --------------------------------------------------------------------------------