├── data ├── Arabic │ ├── Finetune │ │ ├── WVQ_Arabic_1000.jsonl │ │ ├── WVQ_Arabic_150.jsonl │ │ ├── WVQ_Arabic_50.jsonl │ │ ├── WVQ_Arabic_sentence_only.jsonl │ │ ├── WVQ_arabic_Iraq_Jordan.jsonl │ │ ├── WVQ_arabic_Iraq_Jordan_llama.jsonl │ │ └── WVQ_arabic_Iraq_L.jsonl │ ├── Iraq.csv │ ├── Jordan.csv │ ├── MP │ │ ├── VulgarSpeech.jsonl │ │ ├── hateSpeech.jsonl │ │ └── offens.jsonl │ ├── OSACT4 │ │ ├── dev_data.jsonl │ │ └── dev_data_offens.jsonl │ ├── OSACT5 │ │ ├── hateSpeech.jsonl │ │ ├── hate_Finegrained.jsonl │ │ └── offens.jsonl │ ├── OffensEval2020 │ │ └── OffensEval.jsonl │ └── SpamDetect │ │ └── span_detect_2.jsonl ├── Bengali │ ├── BAD-Bangla-Aggressive-Text-Dataset │ │ └── data-2.jsonl │ ├── Bangla-Abusive-Comment-Dataset │ │ ├── racism.jsonl │ │ └── threat.jsonl │ ├── Bangladesh.csv │ ├── Bengali hate speech dataset │ │ └── religion_data-2.jsonl │ ├── Finetune │ │ ├── WVQ_Bengali.jsonl │ │ ├── WVQ_Bengali_1000.jsonl │ │ ├── WVQ_Bengali_150.jsonl │ │ ├── WVQ_Bengali_50.jsonl │ │ ├── WVQ_Bengali_L.jsonl │ │ ├── WVQ_Bengali_llama.jsonl │ │ └── WVQ_Bengali_sentence_only.jsonl │ ├── Trac2-Task1-Aggresion │ │ └── aggression-data-2.jsonl │ └── Trac2-Task2-Misogynistic │ │ └── Misogynistic-data-2.jsonl ├── China │ ├── CDial-Bias │ │ └── gender-2.jsonl │ ├── CValues │ │ ├── cvalues_responsibility_mc.jsonl │ │ ├── output_context_chatgpt.jsonl │ │ └── output_context_china.jsonl │ ├── China.csv │ ├── Chinese-Camouflage-Spam-dataset │ │ └── data-2.jsonl │ └── Finetune │ │ ├── WVQ_China.jsonl │ │ ├── WVQ_China_1000.jsonl │ │ ├── WVQ_China_150.jsonl │ │ ├── WVQ_China_50.jsonl │ │ ├── WVQ_China_500.jsonl │ │ ├── WVQ_China_L.jsonl │ │ ├── WVQ_China_llama.jsonl │ │ └── WVQ_China_sentence_only.jsonl ├── English │ ├── CONAN │ │ ├── en_data-2.jsonl │ │ ├── en_data.jsonl │ │ ├── info.txt │ │ ├── main.py │ │ └── raw data │ │ │ └── CONAN.csv │ ├── CrowS-Pairs-TODO │ │ ├── data.jsonl │ │ ├── info.txt │ │ ├── main.py │ │ └── raw data │ │ │ └── crows_pairs_anonymized.csv │ ├── EXIST 2021 │ │ ├── en_data.jsonl │ │ ├── info.txt │ │ ├── main.py │ │ ├── raw data │ │ │ └── EXIST_2021_Dataset │ │ │ │ ├── EXIST 2021 Guidelines.pdf │ │ │ │ ├── test │ │ │ │ ├── EXIST2021_test.tsv │ │ │ │ └── EXIST2021_test_labeled.tsv │ │ │ │ └── training │ │ │ │ └── EXIST2021_training.tsv │ │ └── task1.jsonl │ ├── Finetune │ │ ├── WVQ_English.jsonl │ │ ├── WVQ_English_1000.jsonl │ │ ├── WVQ_English_150.jsonl │ │ ├── WVQ_English_50.jsonl │ │ ├── WVQ_English_llama.jsonl │ │ └── WVQ_English_sentence_only.jsonl │ ├── HASOC2020 │ │ ├── clean_english_test_1509.csv │ │ ├── data.jsonl │ │ └── data_finegrained.jsonl │ ├── HateEval 2019 │ │ ├── data.jsonl │ │ ├── hateval2019 │ │ │ ├── hateval2019_en_dev.csv │ │ │ ├── hateval2019_en_test.csv │ │ │ └── hateval2019_en_train.csv │ │ ├── hateval2019_en_test.jsonl │ │ ├── info.txt │ │ └── main.py │ ├── MLMA hate speech │ │ ├── data-2.jsonl │ │ └── directness.jsonl │ ├── OLID │ │ ├── data.jsonl │ │ ├── info.txt │ │ ├── main.py │ │ ├── offense.jsonl │ │ ├── offense_classify.jsonl │ │ ├── offense_target.jsonl │ │ └── raw data │ │ │ └── olid-training-v1.0.tsv │ ├── SOLID │ │ └── test_a_tweets_easy.jsonl │ ├── Toxic Comment Classification Challenge │ │ ├── threat.jsonl │ │ └── toxic.jsonl │ ├── United States.csv │ └── hate-speech-and-offensive-language │ │ └── data.jsonl ├── Finetune │ └── WVQ_all.jsonl ├── Germany │ ├── Finetune │ │ ├── WVQ_Germany.jsonl │ │ ├── WVQ_Germany_1000.jsonl │ │ ├── WVQ_Germany_150.jsonl │ │ ├── WVQ_Germany_50.jsonl │ │ ├── WVQ_Germany_L.jsonl │ │ ├── WVQ_Germany_llama.jsonl │ │ └── WVQ_Germany_sentence_only.jsonl │ ├── GermEval │ │ └── germeval2018.jsonl │ ├── Germany.csv │ ├── HASOC │ │ └── hate_off_detect.jsonl │ ├── IWG_hatespeech_public │ │ ├── german_hatespeech_refugees_1.jsonl │ │ └── german_hatespeech_refugees_2.jsonl │ └── MHC │ │ └── hatecheck_cases_final_german.jsonl ├── Greece │ ├── Finetune │ │ ├── WVQ_Greece.jsonl │ │ ├── WVQ_Greece_adapter.jsonl │ │ └── WVQ_Greece_llama.jsonl │ ├── Greece.csv │ ├── OffensEval2020 │ │ ├── OGTDv1.csv │ │ ├── OGTDv1.txt │ │ └── OffensEval.jsonl │ └── gazzetta │ │ ├── G-TEST-S-preprocessed.json │ │ └── G-TEST-S-preprocessed.jsonl ├── Korean │ ├── AbuseEval │ │ └── data-2.jsonl │ ├── CADD │ │ └── data-2.jsonl │ ├── Finetune │ │ ├── WVQ_Korean.jsonl │ │ ├── WVQ_Korean_1000.jsonl │ │ ├── WVQ_Korean_150.jsonl │ │ ├── WVQ_Korean_50.jsonl │ │ ├── WVQ_Korean_L.jsonl │ │ ├── WVQ_Korean_llama.jsonl │ │ └── WVQ_Korean_sentence_only.jsonl │ ├── K-MHaS │ │ └── data-2.jsonl │ ├── Korean-Hate-Speech-Detection │ │ └── data-2.jsonl │ ├── KoreanHateSpeechdataset │ │ └── data-2.jsonl │ ├── South Korea.csv │ └── Waseem │ │ └── data-2.jsonl ├── Portuguese │ ├── Brazil.csv │ ├── Finetune │ │ ├── WVQ_Portuguese.jsonl │ │ ├── WVQ_Portuguese_1000.jsonl │ │ ├── WVQ_Portuguese_150.jsonl │ │ ├── WVQ_Portuguese_50.jsonl │ │ ├── WVQ_Portuguese_L.jsonl │ │ ├── WVQ_Portuguese_llama.jsonl │ │ └── WVQ_Portuguese_sentence_only.jsonl │ ├── HateBR │ │ └── data-2.jsonl │ ├── OffComBR │ │ └── data.jsonl │ └── ToLD-Br │ │ ├── homophobia.jsonl │ │ ├── insult.jsonl │ │ └── misogyny.jsonl ├── Spanish │ ├── AMI IberEval 2018_offens │ │ └── data-2.jsonl │ ├── Argentina.csv │ ├── DETOXIS 2021 │ │ ├── aggressiveness.jsonl │ │ ├── improper_language.jsonl │ │ ├── insult.jsonl │ │ ├── mockery.jsonl │ │ ├── negative_stance.jsonl │ │ └── stereotype.jsonl │ ├── Finetune │ │ ├── WVQ_Spanish.jsonl │ │ ├── WVQ_Spanish_1000.jsonl │ │ ├── WVQ_Spanish_150.jsonl │ │ ├── WVQ_Spanish_50.jsonl │ │ ├── WVQ_Spanish_L.jsonl │ │ ├── WVQ_Spanish_llama.jsonl │ │ └── WVQ_Spanish_sentence_only.jsonl │ ├── HateEval 2019_HS │ │ └── data-2.jsonl │ ├── HaterNet_HS │ │ └── data-2.jsonl │ ├── MEX-A3T_offens │ │ └── data-2.jsonl │ ├── Mexico.csv │ └── OffendES_offens │ │ └── data-2.jsonl ├── Turkey │ ├── ATC │ │ └── fold_0_test.jsonl │ ├── Finetune │ │ ├── WVQ_Turkey.jsonl │ │ ├── WVQ_Turkey_1000.jsonl │ │ ├── WVQ_Turkey_150.jsonl │ │ ├── WVQ_Turkey_50.jsonl │ │ ├── WVQ_Turkey_L.jsonl │ │ ├── WVQ_Turkey_llama.jsonl │ │ └── WVQ_Turkey_sentence_only.jsonl │ ├── OffensEval2020 │ │ └── OffensEval.jsonl │ ├── Turkey.csv │ ├── TurkishSpam │ │ └── trspam.jsonl │ ├── offensDetect-kaggle2 │ │ └── test.jsonl │ ├── offenseCorpus │ │ ├── offens.jsonl │ │ └── offens_fine-graind.jsonl │ └── offenssDetect-kaggle │ │ └── turkish_tweets_2020.jsonl ├── WVQ.csv ├── WVQ.jsonl ├── culture_context.jsonl ├── new_WVQ_100.jsonl ├── new_WVQ_1000.jsonl ├── new_WVQ_100_v2.jsonl ├── new_WVQ_500.jsonl └── new_WVQ_sentence_only.jsonl ├── data_process.py ├── diverse_gain.py ├── fig-overview.jpg ├── llama_finetune.py ├── llm_response.py ├── main.py ├── ppl.py ├── readme.md ├── test.py ├── test.sh ├── test_CValues.py └── test_offensEval.py /data/Arabic/Finetune/WVQ_Arabic_1000.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Arabic/Finetune/WVQ_Arabic_1000.jsonl -------------------------------------------------------------------------------- /data/Arabic/Finetune/WVQ_Arabic_150.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Arabic/Finetune/WVQ_Arabic_150.jsonl -------------------------------------------------------------------------------- /data/Arabic/Finetune/WVQ_Arabic_50.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Arabic/Finetune/WVQ_Arabic_50.jsonl -------------------------------------------------------------------------------- /data/Arabic/Finetune/WVQ_Arabic_sentence_only.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Arabic/Finetune/WVQ_Arabic_sentence_only.jsonl -------------------------------------------------------------------------------- /data/Arabic/Finetune/WVQ_arabic_Iraq_Jordan.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Arabic/Finetune/WVQ_arabic_Iraq_Jordan.jsonl -------------------------------------------------------------------------------- /data/Arabic/Finetune/WVQ_arabic_Iraq_Jordan_llama.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Arabic/Finetune/WVQ_arabic_Iraq_Jordan_llama.jsonl -------------------------------------------------------------------------------- /data/Arabic/Finetune/WVQ_arabic_Iraq_L.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Arabic/Finetune/WVQ_arabic_Iraq_L.jsonl -------------------------------------------------------------------------------- /data/Arabic/Iraq.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Arabic/Iraq.csv -------------------------------------------------------------------------------- /data/Arabic/Jordan.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Arabic/Jordan.csv -------------------------------------------------------------------------------- /data/Arabic/MP/VulgarSpeech.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Arabic/MP/VulgarSpeech.jsonl -------------------------------------------------------------------------------- /data/Arabic/MP/hateSpeech.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Arabic/MP/hateSpeech.jsonl -------------------------------------------------------------------------------- /data/Arabic/MP/offens.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Arabic/MP/offens.jsonl -------------------------------------------------------------------------------- /data/Arabic/OSACT4/dev_data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Arabic/OSACT4/dev_data.jsonl -------------------------------------------------------------------------------- /data/Arabic/OSACT4/dev_data_offens.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Arabic/OSACT4/dev_data_offens.jsonl -------------------------------------------------------------------------------- /data/Arabic/OSACT5/hateSpeech.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Arabic/OSACT5/hateSpeech.jsonl -------------------------------------------------------------------------------- /data/Arabic/OSACT5/hate_Finegrained.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Arabic/OSACT5/hate_Finegrained.jsonl -------------------------------------------------------------------------------- /data/Arabic/OSACT5/offens.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Arabic/OSACT5/offens.jsonl -------------------------------------------------------------------------------- /data/Arabic/OffensEval2020/OffensEval.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Arabic/OffensEval2020/OffensEval.jsonl -------------------------------------------------------------------------------- /data/Arabic/SpamDetect/span_detect_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Arabic/SpamDetect/span_detect_2.jsonl -------------------------------------------------------------------------------- /data/Bengali/BAD-Bangla-Aggressive-Text-Dataset/data-2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Bengali/BAD-Bangla-Aggressive-Text-Dataset/data-2.jsonl -------------------------------------------------------------------------------- /data/Bengali/Bangla-Abusive-Comment-Dataset/racism.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Bengali/Bangla-Abusive-Comment-Dataset/racism.jsonl -------------------------------------------------------------------------------- /data/Bengali/Bangla-Abusive-Comment-Dataset/threat.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Bengali/Bangla-Abusive-Comment-Dataset/threat.jsonl -------------------------------------------------------------------------------- /data/Bengali/Bangladesh.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Bengali/Bangladesh.csv -------------------------------------------------------------------------------- /data/Bengali/Bengali hate speech dataset/religion_data-2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Bengali/Bengali hate speech dataset/religion_data-2.jsonl -------------------------------------------------------------------------------- /data/Bengali/Finetune/WVQ_Bengali.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Bengali/Finetune/WVQ_Bengali.jsonl -------------------------------------------------------------------------------- /data/Bengali/Finetune/WVQ_Bengali_1000.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Bengali/Finetune/WVQ_Bengali_1000.jsonl -------------------------------------------------------------------------------- /data/Bengali/Finetune/WVQ_Bengali_150.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Bengali/Finetune/WVQ_Bengali_150.jsonl -------------------------------------------------------------------------------- /data/Bengali/Finetune/WVQ_Bengali_50.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Bengali/Finetune/WVQ_Bengali_50.jsonl -------------------------------------------------------------------------------- /data/Bengali/Finetune/WVQ_Bengali_L.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Bengali/Finetune/WVQ_Bengali_L.jsonl -------------------------------------------------------------------------------- /data/Bengali/Finetune/WVQ_Bengali_llama.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Bengali/Finetune/WVQ_Bengali_llama.jsonl -------------------------------------------------------------------------------- /data/Bengali/Finetune/WVQ_Bengali_sentence_only.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Bengali/Finetune/WVQ_Bengali_sentence_only.jsonl -------------------------------------------------------------------------------- /data/Bengali/Trac2-Task1-Aggresion/aggression-data-2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Bengali/Trac2-Task1-Aggresion/aggression-data-2.jsonl -------------------------------------------------------------------------------- /data/Bengali/Trac2-Task2-Misogynistic/Misogynistic-data-2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Bengali/Trac2-Task2-Misogynistic/Misogynistic-data-2.jsonl -------------------------------------------------------------------------------- /data/China/CDial-Bias/gender-2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/China/CDial-Bias/gender-2.jsonl -------------------------------------------------------------------------------- /data/China/CValues/cvalues_responsibility_mc.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/China/CValues/cvalues_responsibility_mc.jsonl -------------------------------------------------------------------------------- /data/China/CValues/output_context_chatgpt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/China/CValues/output_context_chatgpt.jsonl -------------------------------------------------------------------------------- /data/China/CValues/output_context_china.jsonl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/China/China.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/China/China.csv -------------------------------------------------------------------------------- /data/China/Chinese-Camouflage-Spam-dataset/data-2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/China/Chinese-Camouflage-Spam-dataset/data-2.jsonl -------------------------------------------------------------------------------- /data/China/Finetune/WVQ_China.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/China/Finetune/WVQ_China.jsonl -------------------------------------------------------------------------------- /data/China/Finetune/WVQ_China_1000.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/China/Finetune/WVQ_China_1000.jsonl -------------------------------------------------------------------------------- /data/China/Finetune/WVQ_China_150.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/China/Finetune/WVQ_China_150.jsonl -------------------------------------------------------------------------------- /data/China/Finetune/WVQ_China_50.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/China/Finetune/WVQ_China_50.jsonl -------------------------------------------------------------------------------- /data/China/Finetune/WVQ_China_500.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/China/Finetune/WVQ_China_500.jsonl -------------------------------------------------------------------------------- /data/China/Finetune/WVQ_China_L.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/China/Finetune/WVQ_China_L.jsonl -------------------------------------------------------------------------------- /data/China/Finetune/WVQ_China_llama.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/China/Finetune/WVQ_China_llama.jsonl -------------------------------------------------------------------------------- /data/China/Finetune/WVQ_China_sentence_only.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/China/Finetune/WVQ_China_sentence_only.jsonl -------------------------------------------------------------------------------- /data/English/CONAN/en_data-2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/CONAN/en_data-2.jsonl -------------------------------------------------------------------------------- /data/English/CONAN/en_data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/CONAN/en_data.jsonl -------------------------------------------------------------------------------- /data/English/CONAN/info.txt: -------------------------------------------------------------------------------- 1 | https://github.com/marcoguerini/CONAN/tree/master -------------------------------------------------------------------------------- /data/English/CONAN/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/CONAN/main.py -------------------------------------------------------------------------------- /data/English/CONAN/raw data/CONAN.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/CONAN/raw data/CONAN.csv -------------------------------------------------------------------------------- /data/English/CrowS-Pairs-TODO/data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/CrowS-Pairs-TODO/data.jsonl -------------------------------------------------------------------------------- /data/English/CrowS-Pairs-TODO/info.txt: -------------------------------------------------------------------------------- 1 | https://github.com/nyu-mll/crows-pairs -------------------------------------------------------------------------------- /data/English/CrowS-Pairs-TODO/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/CrowS-Pairs-TODO/main.py -------------------------------------------------------------------------------- /data/English/CrowS-Pairs-TODO/raw data/crows_pairs_anonymized.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/CrowS-Pairs-TODO/raw data/crows_pairs_anonymized.csv -------------------------------------------------------------------------------- /data/English/EXIST 2021/en_data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/EXIST 2021/en_data.jsonl -------------------------------------------------------------------------------- /data/English/EXIST 2021/info.txt: -------------------------------------------------------------------------------- 1 | link: http://nlp.uned.es/exist2021/ -------------------------------------------------------------------------------- /data/English/EXIST 2021/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/EXIST 2021/main.py -------------------------------------------------------------------------------- /data/English/EXIST 2021/raw data/EXIST_2021_Dataset/EXIST 2021 Guidelines.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/EXIST 2021/raw data/EXIST_2021_Dataset/EXIST 2021 Guidelines.pdf -------------------------------------------------------------------------------- /data/English/EXIST 2021/raw data/EXIST_2021_Dataset/test/EXIST2021_test.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/EXIST 2021/raw data/EXIST_2021_Dataset/test/EXIST2021_test.tsv -------------------------------------------------------------------------------- /data/English/EXIST 2021/raw data/EXIST_2021_Dataset/test/EXIST2021_test_labeled.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/EXIST 2021/raw data/EXIST_2021_Dataset/test/EXIST2021_test_labeled.tsv -------------------------------------------------------------------------------- /data/English/EXIST 2021/raw data/EXIST_2021_Dataset/training/EXIST2021_training.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/EXIST 2021/raw data/EXIST_2021_Dataset/training/EXIST2021_training.tsv -------------------------------------------------------------------------------- /data/English/EXIST 2021/task1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/EXIST 2021/task1.jsonl -------------------------------------------------------------------------------- /data/English/Finetune/WVQ_English.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/Finetune/WVQ_English.jsonl -------------------------------------------------------------------------------- /data/English/Finetune/WVQ_English_1000.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/Finetune/WVQ_English_1000.jsonl -------------------------------------------------------------------------------- /data/English/Finetune/WVQ_English_150.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/Finetune/WVQ_English_150.jsonl -------------------------------------------------------------------------------- /data/English/Finetune/WVQ_English_50.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/Finetune/WVQ_English_50.jsonl -------------------------------------------------------------------------------- /data/English/Finetune/WVQ_English_llama.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/Finetune/WVQ_English_llama.jsonl -------------------------------------------------------------------------------- /data/English/Finetune/WVQ_English_sentence_only.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/Finetune/WVQ_English_sentence_only.jsonl -------------------------------------------------------------------------------- /data/English/HASOC2020/clean_english_test_1509.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/HASOC2020/clean_english_test_1509.csv -------------------------------------------------------------------------------- /data/English/HASOC2020/data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/HASOC2020/data.jsonl -------------------------------------------------------------------------------- /data/English/HASOC2020/data_finegrained.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/HASOC2020/data_finegrained.jsonl -------------------------------------------------------------------------------- /data/English/HateEval 2019/data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/HateEval 2019/data.jsonl -------------------------------------------------------------------------------- /data/English/HateEval 2019/hateval2019/hateval2019_en_dev.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/HateEval 2019/hateval2019/hateval2019_en_dev.csv -------------------------------------------------------------------------------- /data/English/HateEval 2019/hateval2019/hateval2019_en_test.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/HateEval 2019/hateval2019/hateval2019_en_test.csv -------------------------------------------------------------------------------- /data/English/HateEval 2019/hateval2019/hateval2019_en_train.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/HateEval 2019/hateval2019/hateval2019_en_train.csv -------------------------------------------------------------------------------- /data/English/HateEval 2019/hateval2019_en_test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/HateEval 2019/hateval2019_en_test.jsonl -------------------------------------------------------------------------------- /data/English/HateEval 2019/info.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/HateEval 2019/info.txt -------------------------------------------------------------------------------- /data/English/HateEval 2019/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/HateEval 2019/main.py -------------------------------------------------------------------------------- /data/English/MLMA hate speech/data-2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/MLMA hate speech/data-2.jsonl -------------------------------------------------------------------------------- /data/English/MLMA hate speech/directness.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/MLMA hate speech/directness.jsonl -------------------------------------------------------------------------------- /data/English/OLID/data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/OLID/data.jsonl -------------------------------------------------------------------------------- /data/English/OLID/info.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/OLID/info.txt -------------------------------------------------------------------------------- /data/English/OLID/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/OLID/main.py -------------------------------------------------------------------------------- /data/English/OLID/offense.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/OLID/offense.jsonl -------------------------------------------------------------------------------- /data/English/OLID/offense_classify.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/OLID/offense_classify.jsonl -------------------------------------------------------------------------------- /data/English/OLID/offense_target.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/OLID/offense_target.jsonl -------------------------------------------------------------------------------- /data/English/OLID/raw data/olid-training-v1.0.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/OLID/raw data/olid-training-v1.0.tsv -------------------------------------------------------------------------------- /data/English/SOLID/test_a_tweets_easy.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/SOLID/test_a_tweets_easy.jsonl -------------------------------------------------------------------------------- /data/English/Toxic Comment Classification Challenge/threat.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/Toxic Comment Classification Challenge/threat.jsonl -------------------------------------------------------------------------------- /data/English/Toxic Comment Classification Challenge/toxic.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/Toxic Comment Classification Challenge/toxic.jsonl -------------------------------------------------------------------------------- /data/English/United States.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/United States.csv -------------------------------------------------------------------------------- /data/English/hate-speech-and-offensive-language/data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/English/hate-speech-and-offensive-language/data.jsonl -------------------------------------------------------------------------------- /data/Finetune/WVQ_all.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Finetune/WVQ_all.jsonl -------------------------------------------------------------------------------- /data/Germany/Finetune/WVQ_Germany.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Germany/Finetune/WVQ_Germany.jsonl -------------------------------------------------------------------------------- /data/Germany/Finetune/WVQ_Germany_1000.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Germany/Finetune/WVQ_Germany_1000.jsonl -------------------------------------------------------------------------------- /data/Germany/Finetune/WVQ_Germany_150.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Germany/Finetune/WVQ_Germany_150.jsonl -------------------------------------------------------------------------------- /data/Germany/Finetune/WVQ_Germany_50.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Germany/Finetune/WVQ_Germany_50.jsonl -------------------------------------------------------------------------------- /data/Germany/Finetune/WVQ_Germany_L.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Germany/Finetune/WVQ_Germany_L.jsonl -------------------------------------------------------------------------------- /data/Germany/Finetune/WVQ_Germany_llama.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Germany/Finetune/WVQ_Germany_llama.jsonl -------------------------------------------------------------------------------- /data/Germany/Finetune/WVQ_Germany_sentence_only.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Germany/Finetune/WVQ_Germany_sentence_only.jsonl -------------------------------------------------------------------------------- /data/Germany/GermEval/germeval2018.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Germany/GermEval/germeval2018.jsonl -------------------------------------------------------------------------------- /data/Germany/Germany.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Germany/Germany.csv -------------------------------------------------------------------------------- /data/Germany/HASOC/hate_off_detect.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Germany/HASOC/hate_off_detect.jsonl -------------------------------------------------------------------------------- /data/Germany/IWG_hatespeech_public/german_hatespeech_refugees_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Germany/IWG_hatespeech_public/german_hatespeech_refugees_1.jsonl -------------------------------------------------------------------------------- /data/Germany/IWG_hatespeech_public/german_hatespeech_refugees_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Germany/IWG_hatespeech_public/german_hatespeech_refugees_2.jsonl -------------------------------------------------------------------------------- /data/Germany/MHC/hatecheck_cases_final_german.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Germany/MHC/hatecheck_cases_final_german.jsonl -------------------------------------------------------------------------------- /data/Greece/Finetune/WVQ_Greece.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Greece/Finetune/WVQ_Greece.jsonl -------------------------------------------------------------------------------- /data/Greece/Finetune/WVQ_Greece_adapter.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Greece/Finetune/WVQ_Greece_adapter.jsonl -------------------------------------------------------------------------------- /data/Greece/Finetune/WVQ_Greece_llama.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Greece/Finetune/WVQ_Greece_llama.jsonl -------------------------------------------------------------------------------- /data/Greece/Greece.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Greece/Greece.csv -------------------------------------------------------------------------------- /data/Greece/OffensEval2020/OGTDv1.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Greece/OffensEval2020/OGTDv1.csv -------------------------------------------------------------------------------- /data/Greece/OffensEval2020/OGTDv1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Greece/OffensEval2020/OGTDv1.txt -------------------------------------------------------------------------------- /data/Greece/OffensEval2020/OffensEval.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Greece/OffensEval2020/OffensEval.jsonl -------------------------------------------------------------------------------- /data/Greece/gazzetta/G-TEST-S-preprocessed.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Greece/gazzetta/G-TEST-S-preprocessed.json -------------------------------------------------------------------------------- /data/Greece/gazzetta/G-TEST-S-preprocessed.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Greece/gazzetta/G-TEST-S-preprocessed.jsonl -------------------------------------------------------------------------------- /data/Korean/AbuseEval/data-2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Korean/AbuseEval/data-2.jsonl -------------------------------------------------------------------------------- /data/Korean/CADD/data-2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Korean/CADD/data-2.jsonl -------------------------------------------------------------------------------- /data/Korean/Finetune/WVQ_Korean.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Korean/Finetune/WVQ_Korean.jsonl -------------------------------------------------------------------------------- /data/Korean/Finetune/WVQ_Korean_1000.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Korean/Finetune/WVQ_Korean_1000.jsonl -------------------------------------------------------------------------------- /data/Korean/Finetune/WVQ_Korean_150.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Korean/Finetune/WVQ_Korean_150.jsonl -------------------------------------------------------------------------------- /data/Korean/Finetune/WVQ_Korean_50.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Korean/Finetune/WVQ_Korean_50.jsonl -------------------------------------------------------------------------------- /data/Korean/Finetune/WVQ_Korean_L.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Korean/Finetune/WVQ_Korean_L.jsonl -------------------------------------------------------------------------------- /data/Korean/Finetune/WVQ_Korean_llama.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Korean/Finetune/WVQ_Korean_llama.jsonl -------------------------------------------------------------------------------- /data/Korean/Finetune/WVQ_Korean_sentence_only.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Korean/Finetune/WVQ_Korean_sentence_only.jsonl -------------------------------------------------------------------------------- /data/Korean/K-MHaS/data-2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Korean/K-MHaS/data-2.jsonl -------------------------------------------------------------------------------- /data/Korean/Korean-Hate-Speech-Detection/data-2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Korean/Korean-Hate-Speech-Detection/data-2.jsonl -------------------------------------------------------------------------------- /data/Korean/KoreanHateSpeechdataset/data-2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Korean/KoreanHateSpeechdataset/data-2.jsonl -------------------------------------------------------------------------------- /data/Korean/South Korea.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Korean/South Korea.csv -------------------------------------------------------------------------------- /data/Korean/Waseem/data-2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Korean/Waseem/data-2.jsonl -------------------------------------------------------------------------------- /data/Portuguese/Brazil.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Portuguese/Brazil.csv -------------------------------------------------------------------------------- /data/Portuguese/Finetune/WVQ_Portuguese.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Portuguese/Finetune/WVQ_Portuguese.jsonl -------------------------------------------------------------------------------- /data/Portuguese/Finetune/WVQ_Portuguese_1000.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Portuguese/Finetune/WVQ_Portuguese_1000.jsonl -------------------------------------------------------------------------------- /data/Portuguese/Finetune/WVQ_Portuguese_150.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Portuguese/Finetune/WVQ_Portuguese_150.jsonl -------------------------------------------------------------------------------- /data/Portuguese/Finetune/WVQ_Portuguese_50.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Portuguese/Finetune/WVQ_Portuguese_50.jsonl -------------------------------------------------------------------------------- /data/Portuguese/Finetune/WVQ_Portuguese_L.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Portuguese/Finetune/WVQ_Portuguese_L.jsonl -------------------------------------------------------------------------------- /data/Portuguese/Finetune/WVQ_Portuguese_llama.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Portuguese/Finetune/WVQ_Portuguese_llama.jsonl -------------------------------------------------------------------------------- /data/Portuguese/Finetune/WVQ_Portuguese_sentence_only.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Portuguese/Finetune/WVQ_Portuguese_sentence_only.jsonl -------------------------------------------------------------------------------- /data/Portuguese/HateBR/data-2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Portuguese/HateBR/data-2.jsonl -------------------------------------------------------------------------------- /data/Portuguese/OffComBR/data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Portuguese/OffComBR/data.jsonl -------------------------------------------------------------------------------- /data/Portuguese/ToLD-Br/homophobia.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Portuguese/ToLD-Br/homophobia.jsonl -------------------------------------------------------------------------------- /data/Portuguese/ToLD-Br/insult.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Portuguese/ToLD-Br/insult.jsonl -------------------------------------------------------------------------------- /data/Portuguese/ToLD-Br/misogyny.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Portuguese/ToLD-Br/misogyny.jsonl -------------------------------------------------------------------------------- /data/Spanish/AMI IberEval 2018_offens/data-2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Spanish/AMI IberEval 2018_offens/data-2.jsonl -------------------------------------------------------------------------------- /data/Spanish/Argentina.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Spanish/Argentina.csv -------------------------------------------------------------------------------- /data/Spanish/DETOXIS 2021/aggressiveness.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Spanish/DETOXIS 2021/aggressiveness.jsonl -------------------------------------------------------------------------------- /data/Spanish/DETOXIS 2021/improper_language.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Spanish/DETOXIS 2021/improper_language.jsonl -------------------------------------------------------------------------------- /data/Spanish/DETOXIS 2021/insult.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Spanish/DETOXIS 2021/insult.jsonl -------------------------------------------------------------------------------- /data/Spanish/DETOXIS 2021/mockery.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Spanish/DETOXIS 2021/mockery.jsonl -------------------------------------------------------------------------------- /data/Spanish/DETOXIS 2021/negative_stance.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Spanish/DETOXIS 2021/negative_stance.jsonl -------------------------------------------------------------------------------- /data/Spanish/DETOXIS 2021/stereotype.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Spanish/DETOXIS 2021/stereotype.jsonl -------------------------------------------------------------------------------- /data/Spanish/Finetune/WVQ_Spanish.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Spanish/Finetune/WVQ_Spanish.jsonl -------------------------------------------------------------------------------- /data/Spanish/Finetune/WVQ_Spanish_1000.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Spanish/Finetune/WVQ_Spanish_1000.jsonl -------------------------------------------------------------------------------- /data/Spanish/Finetune/WVQ_Spanish_150.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Spanish/Finetune/WVQ_Spanish_150.jsonl -------------------------------------------------------------------------------- /data/Spanish/Finetune/WVQ_Spanish_50.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Spanish/Finetune/WVQ_Spanish_50.jsonl -------------------------------------------------------------------------------- /data/Spanish/Finetune/WVQ_Spanish_L.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Spanish/Finetune/WVQ_Spanish_L.jsonl -------------------------------------------------------------------------------- /data/Spanish/Finetune/WVQ_Spanish_llama.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Spanish/Finetune/WVQ_Spanish_llama.jsonl -------------------------------------------------------------------------------- /data/Spanish/Finetune/WVQ_Spanish_sentence_only.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Spanish/Finetune/WVQ_Spanish_sentence_only.jsonl -------------------------------------------------------------------------------- /data/Spanish/HateEval 2019_HS/data-2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Spanish/HateEval 2019_HS/data-2.jsonl -------------------------------------------------------------------------------- /data/Spanish/HaterNet_HS/data-2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Spanish/HaterNet_HS/data-2.jsonl -------------------------------------------------------------------------------- /data/Spanish/MEX-A3T_offens/data-2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Spanish/MEX-A3T_offens/data-2.jsonl -------------------------------------------------------------------------------- /data/Spanish/Mexico.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Spanish/Mexico.csv -------------------------------------------------------------------------------- /data/Spanish/OffendES_offens/data-2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Spanish/OffendES_offens/data-2.jsonl -------------------------------------------------------------------------------- /data/Turkey/ATC/fold_0_test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Turkey/ATC/fold_0_test.jsonl -------------------------------------------------------------------------------- /data/Turkey/Finetune/WVQ_Turkey.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Turkey/Finetune/WVQ_Turkey.jsonl -------------------------------------------------------------------------------- /data/Turkey/Finetune/WVQ_Turkey_1000.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Turkey/Finetune/WVQ_Turkey_1000.jsonl -------------------------------------------------------------------------------- /data/Turkey/Finetune/WVQ_Turkey_150.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Turkey/Finetune/WVQ_Turkey_150.jsonl -------------------------------------------------------------------------------- /data/Turkey/Finetune/WVQ_Turkey_50.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Turkey/Finetune/WVQ_Turkey_50.jsonl -------------------------------------------------------------------------------- /data/Turkey/Finetune/WVQ_Turkey_L.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Turkey/Finetune/WVQ_Turkey_L.jsonl -------------------------------------------------------------------------------- /data/Turkey/Finetune/WVQ_Turkey_llama.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Turkey/Finetune/WVQ_Turkey_llama.jsonl -------------------------------------------------------------------------------- /data/Turkey/Finetune/WVQ_Turkey_sentence_only.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Turkey/Finetune/WVQ_Turkey_sentence_only.jsonl -------------------------------------------------------------------------------- /data/Turkey/OffensEval2020/OffensEval.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Turkey/OffensEval2020/OffensEval.jsonl -------------------------------------------------------------------------------- /data/Turkey/Turkey.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Turkey/Turkey.csv -------------------------------------------------------------------------------- /data/Turkey/TurkishSpam/trspam.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Turkey/TurkishSpam/trspam.jsonl -------------------------------------------------------------------------------- /data/Turkey/offensDetect-kaggle2/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Turkey/offensDetect-kaggle2/test.jsonl -------------------------------------------------------------------------------- /data/Turkey/offenseCorpus/offens.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Turkey/offenseCorpus/offens.jsonl -------------------------------------------------------------------------------- /data/Turkey/offenseCorpus/offens_fine-graind.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Turkey/offenseCorpus/offens_fine-graind.jsonl -------------------------------------------------------------------------------- /data/Turkey/offenssDetect-kaggle/turkish_tweets_2020.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/Turkey/offenssDetect-kaggle/turkish_tweets_2020.jsonl -------------------------------------------------------------------------------- /data/WVQ.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/WVQ.csv -------------------------------------------------------------------------------- /data/WVQ.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/WVQ.jsonl -------------------------------------------------------------------------------- /data/culture_context.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/culture_context.jsonl -------------------------------------------------------------------------------- /data/new_WVQ_100.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/new_WVQ_100.jsonl -------------------------------------------------------------------------------- /data/new_WVQ_1000.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/new_WVQ_1000.jsonl -------------------------------------------------------------------------------- /data/new_WVQ_100_v2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/new_WVQ_100_v2.jsonl -------------------------------------------------------------------------------- /data/new_WVQ_500.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/new_WVQ_500.jsonl -------------------------------------------------------------------------------- /data/new_WVQ_sentence_only.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data/new_WVQ_sentence_only.jsonl -------------------------------------------------------------------------------- /data_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/data_process.py -------------------------------------------------------------------------------- /diverse_gain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/diverse_gain.py -------------------------------------------------------------------------------- /fig-overview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/fig-overview.jpg -------------------------------------------------------------------------------- /llama_finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/llama_finetune.py -------------------------------------------------------------------------------- /llm_response.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/llm_response.py -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/main.py -------------------------------------------------------------------------------- /ppl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/ppl.py -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/readme.md -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/test.py -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/test.sh -------------------------------------------------------------------------------- /test_CValues.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/test_CValues.py -------------------------------------------------------------------------------- /test_offensEval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scarelette/CultureLLM/HEAD/test_offensEval.py --------------------------------------------------------------------------------