├── .gitignore ├── LICENSE ├── RAW ├── README.md ├── get_everylang.py ├── get_langs.py ├── work_on_files.py └── שפות.xlsx ├── README ├── README.md ├── akkadian_bert ├── data_collators_bert.py ├── datasets_bert.py ├── evaluate_bert.py ├── main_bert.py ├── tokenize_bert.py ├── train_bert.py ├── utils.py ├── visualize_evaluations.py └── write_bert.py ├── data ├── Finished │ └── PreWork │ │ ├── data_tabler.py │ │ ├── periods_and_langs.py │ │ ├── results_benny │ │ ├── analysis of the lemmas, every reference.txt │ │ ├── results │ │ │ ├── results.zip │ │ │ └── results_saa │ │ │ │ ├── words_count.csv │ │ │ │ ├── words_count_by_project.csv │ │ │ │ └── words_count_by_project.xlsx │ │ ├── rinap analysis.txt │ │ ├── rinap lemma_len.txt │ │ ├── stat.csv │ │ ├── word_statistics.py │ │ ├── words_variants (version 1).xlsb.csv │ │ └── words_variants.csv │ │ ├── textdata.csv │ │ └── textdata.xlsx ├── Location │ ├── ANE.kmz (1).kml │ ├── Locator.py │ ├── PlacelocationsAMGG.csv │ ├── Van,turkey.json │ ├── dataframe.csv │ ├── places.csv │ ├── toponym_cordinate.csv │ └── ‏‏ANE.xml ├── akk_from_jsons.jsonl ├── eng_from_website.jsonl ├── getting_tokens │ ├── check_frequancy.py │ └── getting_the_tokens.py ├── jsonl │ ├── adsd.jsonl │ ├── aemw.jsonl │ ├── akklove.jsonl │ ├── amgg.jsonl │ ├── ario.jsonl │ ├── armep.jsonl │ ├── arrim.jsonl │ ├── asbp.jsonl │ ├── atae.jsonl │ ├── blms.jsonl │ ├── btto.jsonl │ ├── cams.jsonl │ ├── ckst.jsonl │ ├── cmawro.jsonl │ ├── contrib.jsonl │ ├── ctij.jsonl │ ├── dcclt.jsonl │ ├── dccmt.jsonl │ ├── desktop.ini.jsonl │ ├── dsst.jsonl │ ├── ecut.jsonl │ ├── etcsri.jsonl │ ├── glass.jsonl │ ├── hbtin.jsonl │ ├── issl.jsonl │ ├── lacost.jsonl │ ├── lovelyrics.jsonl │ ├── nimrud.jsonl │ ├── obmc.jsonl │ ├── obta.jsonl │ ├── ogsl.jsonl │ ├── oimea.jsonl │ ├── pnao.jsonl │ ├── qcat.jsonl │ ├── riao.jsonl │ ├── ribo.jsonl │ ├── rimanum.jsonl │ ├── rinap.jsonl │ ├── saao.jsonl │ ├── suhu.jsonl │ ├── tcma.jsonl │ ├── tsae.jsonl │ └── xcat.jsonl ├── lists │ ├── Divine Names.txt │ ├── Names.txt │ ├── Places.txt │ ├── Proper Nouns.txt │ ├── Temples.txt │ ├── Text Origin.txt │ ├── Watercourses.txt │ ├── disallowed_langs.txt │ ├── disallowed_periods │ ├── languages.json │ ├── periods.txt │ └── placelist.txt ├── manual_evaluation_old │ ├── project_benny_labels.json │ └── project_benny_royal.jsonl ├── ratings │ ├── location_google.csv │ ├── rater.py │ └── tezxt.py ├── scrapper │ └── web_scrapper.py ├── tiny_test_file.jsonl └── update_data │ └── updater.py ├── plots ├── first_plot.jpg ├── no_hyphens_no_pseudowords.jpg ├── with_hyphens_no_pseudowords.jpg └── with_hyphens_with_pseudowords.jpg ├── preprocessing ├── data_dist.py ├── lists_and_words │ ├── Geographical Names.txt │ └── Personal Names.txt ├── logogram.py ├── main_preprocess.py ├── preliminary_analysis.py ├── pse_words.py └── scraping.py ├── scripts ├── run_bert_train.sh ├── run_chau_eval.sh ├── run_eval.sh ├── run_from_scratch.sh ├── run_manual_eval.sh ├── run_pipeline.sh ├── run_preprocessing.sh ├── run_scrape.sh └── run_train.sh └── tokens_wp_stats.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/LICENSE -------------------------------------------------------------------------------- /RAW/README.md: -------------------------------------------------------------------------------- 1 | "here we will put the raw files to sort" 2 | -------------------------------------------------------------------------------- /RAW/get_everylang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/RAW/get_everylang.py -------------------------------------------------------------------------------- /RAW/get_langs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/RAW/get_langs.py -------------------------------------------------------------------------------- /RAW/work_on_files.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/RAW/work_on_files.py -------------------------------------------------------------------------------- /RAW/שפות.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/RAW/שפות.xlsx -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/README -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/README.md -------------------------------------------------------------------------------- /akkadian_bert/data_collators_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/akkadian_bert/data_collators_bert.py -------------------------------------------------------------------------------- /akkadian_bert/datasets_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/akkadian_bert/datasets_bert.py -------------------------------------------------------------------------------- /akkadian_bert/evaluate_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/akkadian_bert/evaluate_bert.py -------------------------------------------------------------------------------- /akkadian_bert/main_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/akkadian_bert/main_bert.py -------------------------------------------------------------------------------- /akkadian_bert/tokenize_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/akkadian_bert/tokenize_bert.py -------------------------------------------------------------------------------- /akkadian_bert/train_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/akkadian_bert/train_bert.py -------------------------------------------------------------------------------- /akkadian_bert/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/akkadian_bert/utils.py -------------------------------------------------------------------------------- /akkadian_bert/visualize_evaluations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/akkadian_bert/visualize_evaluations.py -------------------------------------------------------------------------------- /akkadian_bert/write_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/akkadian_bert/write_bert.py -------------------------------------------------------------------------------- /data/Finished/PreWork/data_tabler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/Finished/PreWork/data_tabler.py -------------------------------------------------------------------------------- /data/Finished/PreWork/periods_and_langs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/Finished/PreWork/periods_and_langs.py -------------------------------------------------------------------------------- /data/Finished/PreWork/results_benny/analysis of the lemmas, every reference.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/Finished/PreWork/results_benny/analysis of the lemmas, every reference.txt -------------------------------------------------------------------------------- /data/Finished/PreWork/results_benny/results/results.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/Finished/PreWork/results_benny/results/results.zip -------------------------------------------------------------------------------- /data/Finished/PreWork/results_benny/results/results_saa/words_count.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/Finished/PreWork/results_benny/results/results_saa/words_count.csv -------------------------------------------------------------------------------- /data/Finished/PreWork/results_benny/results/results_saa/words_count_by_project.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/Finished/PreWork/results_benny/results/results_saa/words_count_by_project.csv -------------------------------------------------------------------------------- /data/Finished/PreWork/results_benny/results/results_saa/words_count_by_project.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/Finished/PreWork/results_benny/results/results_saa/words_count_by_project.xlsx -------------------------------------------------------------------------------- /data/Finished/PreWork/results_benny/rinap analysis.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/Finished/PreWork/results_benny/rinap analysis.txt -------------------------------------------------------------------------------- /data/Finished/PreWork/results_benny/rinap lemma_len.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/Finished/PreWork/results_benny/rinap lemma_len.txt -------------------------------------------------------------------------------- /data/Finished/PreWork/results_benny/stat.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/Finished/PreWork/results_benny/stat.csv -------------------------------------------------------------------------------- /data/Finished/PreWork/results_benny/word_statistics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/Finished/PreWork/results_benny/word_statistics.py -------------------------------------------------------------------------------- /data/Finished/PreWork/results_benny/words_variants (version 1).xlsb.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/Finished/PreWork/results_benny/words_variants (version 1).xlsb.csv -------------------------------------------------------------------------------- /data/Finished/PreWork/results_benny/words_variants.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/Finished/PreWork/results_benny/words_variants.csv -------------------------------------------------------------------------------- /data/Finished/PreWork/textdata.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/Finished/PreWork/textdata.csv -------------------------------------------------------------------------------- /data/Finished/PreWork/textdata.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/Finished/PreWork/textdata.xlsx -------------------------------------------------------------------------------- /data/Location/ANE.kmz (1).kml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/Location/ANE.kmz (1).kml -------------------------------------------------------------------------------- /data/Location/Locator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/Location/Locator.py -------------------------------------------------------------------------------- /data/Location/PlacelocationsAMGG.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/Location/PlacelocationsAMGG.csv -------------------------------------------------------------------------------- /data/Location/Van,turkey.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/Location/Van,turkey.json -------------------------------------------------------------------------------- /data/Location/dataframe.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/Location/dataframe.csv -------------------------------------------------------------------------------- /data/Location/places.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/Location/places.csv -------------------------------------------------------------------------------- /data/Location/toponym_cordinate.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/Location/toponym_cordinate.csv -------------------------------------------------------------------------------- /data/Location/‏‏ANE.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/Location/‏‏ANE.xml -------------------------------------------------------------------------------- /data/akk_from_jsons.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/akk_from_jsons.jsonl -------------------------------------------------------------------------------- /data/eng_from_website.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/eng_from_website.jsonl -------------------------------------------------------------------------------- /data/getting_tokens/check_frequancy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/getting_tokens/check_frequancy.py -------------------------------------------------------------------------------- /data/getting_tokens/getting_the_tokens.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/getting_tokens/getting_the_tokens.py -------------------------------------------------------------------------------- /data/jsonl/adsd.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/adsd.jsonl -------------------------------------------------------------------------------- /data/jsonl/aemw.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/aemw.jsonl -------------------------------------------------------------------------------- /data/jsonl/akklove.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/akklove.jsonl -------------------------------------------------------------------------------- /data/jsonl/amgg.jsonl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/jsonl/ario.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/ario.jsonl -------------------------------------------------------------------------------- /data/jsonl/armep.jsonl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/jsonl/arrim.jsonl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/jsonl/asbp.jsonl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/jsonl/atae.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/atae.jsonl -------------------------------------------------------------------------------- /data/jsonl/blms.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/blms.jsonl -------------------------------------------------------------------------------- /data/jsonl/btto.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/btto.jsonl -------------------------------------------------------------------------------- /data/jsonl/cams.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/cams.jsonl -------------------------------------------------------------------------------- /data/jsonl/ckst.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/ckst.jsonl -------------------------------------------------------------------------------- /data/jsonl/cmawro.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/cmawro.jsonl -------------------------------------------------------------------------------- /data/jsonl/contrib.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/contrib.jsonl -------------------------------------------------------------------------------- /data/jsonl/ctij.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/ctij.jsonl -------------------------------------------------------------------------------- /data/jsonl/dcclt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/dcclt.jsonl -------------------------------------------------------------------------------- /data/jsonl/dccmt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/dccmt.jsonl -------------------------------------------------------------------------------- /data/jsonl/desktop.ini.jsonl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/jsonl/dsst.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/dsst.jsonl -------------------------------------------------------------------------------- /data/jsonl/ecut.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/ecut.jsonl -------------------------------------------------------------------------------- /data/jsonl/etcsri.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/etcsri.jsonl -------------------------------------------------------------------------------- /data/jsonl/glass.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/glass.jsonl -------------------------------------------------------------------------------- /data/jsonl/hbtin.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/hbtin.jsonl -------------------------------------------------------------------------------- /data/jsonl/issl.jsonl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/jsonl/lacost.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/lacost.jsonl -------------------------------------------------------------------------------- /data/jsonl/lovelyrics.jsonl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/jsonl/nimrud.jsonl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/jsonl/obmc.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/obmc.jsonl -------------------------------------------------------------------------------- /data/jsonl/obta.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/obta.jsonl -------------------------------------------------------------------------------- /data/jsonl/ogsl.jsonl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/jsonl/oimea.jsonl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/jsonl/pnao.jsonl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/jsonl/qcat.jsonl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/jsonl/riao.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/riao.jsonl -------------------------------------------------------------------------------- /data/jsonl/ribo.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/ribo.jsonl -------------------------------------------------------------------------------- /data/jsonl/rimanum.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/rimanum.jsonl -------------------------------------------------------------------------------- /data/jsonl/rinap.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/rinap.jsonl -------------------------------------------------------------------------------- /data/jsonl/saao.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/saao.jsonl -------------------------------------------------------------------------------- /data/jsonl/suhu.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/jsonl/suhu.jsonl -------------------------------------------------------------------------------- /data/jsonl/tcma.jsonl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/jsonl/tsae.jsonl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/jsonl/xcat.jsonl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/lists/Divine Names.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/lists/Divine Names.txt -------------------------------------------------------------------------------- /data/lists/Names.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/lists/Names.txt -------------------------------------------------------------------------------- /data/lists/Places.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/lists/Places.txt -------------------------------------------------------------------------------- /data/lists/Proper Nouns.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/lists/Proper Nouns.txt -------------------------------------------------------------------------------- /data/lists/Temples.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/lists/Temples.txt -------------------------------------------------------------------------------- /data/lists/Text Origin.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/lists/Text Origin.txt -------------------------------------------------------------------------------- /data/lists/Watercourses.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/lists/Watercourses.txt -------------------------------------------------------------------------------- /data/lists/disallowed_langs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/lists/disallowed_langs.txt -------------------------------------------------------------------------------- /data/lists/disallowed_periods: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/lists/disallowed_periods -------------------------------------------------------------------------------- /data/lists/languages.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/lists/languages.json -------------------------------------------------------------------------------- /data/lists/periods.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/lists/periods.txt -------------------------------------------------------------------------------- /data/lists/placelist.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/manual_evaluation_old/project_benny_labels.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/manual_evaluation_old/project_benny_labels.json -------------------------------------------------------------------------------- /data/manual_evaluation_old/project_benny_royal.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/manual_evaluation_old/project_benny_royal.jsonl -------------------------------------------------------------------------------- /data/ratings/location_google.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/ratings/location_google.csv -------------------------------------------------------------------------------- /data/ratings/rater.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/ratings/rater.py -------------------------------------------------------------------------------- /data/ratings/tezxt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/ratings/tezxt.py -------------------------------------------------------------------------------- /data/scrapper/web_scrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/scrapper/web_scrapper.py -------------------------------------------------------------------------------- /data/tiny_test_file.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/tiny_test_file.jsonl -------------------------------------------------------------------------------- /data/update_data/updater.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/data/update_data/updater.py -------------------------------------------------------------------------------- /plots/first_plot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/plots/first_plot.jpg -------------------------------------------------------------------------------- /plots/no_hyphens_no_pseudowords.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/plots/no_hyphens_no_pseudowords.jpg -------------------------------------------------------------------------------- /plots/with_hyphens_no_pseudowords.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/plots/with_hyphens_no_pseudowords.jpg -------------------------------------------------------------------------------- /plots/with_hyphens_with_pseudowords.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/plots/with_hyphens_with_pseudowords.jpg -------------------------------------------------------------------------------- /preprocessing/data_dist.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/preprocessing/data_dist.py -------------------------------------------------------------------------------- /preprocessing/lists_and_words/Geographical Names.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/preprocessing/lists_and_words/Geographical Names.txt -------------------------------------------------------------------------------- /preprocessing/lists_and_words/Personal Names.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/preprocessing/lists_and_words/Personal Names.txt -------------------------------------------------------------------------------- /preprocessing/logogram.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/preprocessing/logogram.py -------------------------------------------------------------------------------- /preprocessing/main_preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/preprocessing/main_preprocess.py -------------------------------------------------------------------------------- /preprocessing/preliminary_analysis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/preprocessing/preliminary_analysis.py -------------------------------------------------------------------------------- /preprocessing/pse_words.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/preprocessing/pse_words.py -------------------------------------------------------------------------------- /preprocessing/scraping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/preprocessing/scraping.py -------------------------------------------------------------------------------- /scripts/run_bert_train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/scripts/run_bert_train.sh -------------------------------------------------------------------------------- /scripts/run_chau_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/scripts/run_chau_eval.sh -------------------------------------------------------------------------------- /scripts/run_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/scripts/run_eval.sh -------------------------------------------------------------------------------- /scripts/run_from_scratch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/scripts/run_from_scratch.sh -------------------------------------------------------------------------------- /scripts/run_manual_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/scripts/run_manual_eval.sh -------------------------------------------------------------------------------- /scripts/run_pipeline.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/scripts/run_pipeline.sh -------------------------------------------------------------------------------- /scripts/run_preprocessing.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/scripts/run_preprocessing.sh -------------------------------------------------------------------------------- /scripts/run_scrape.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/scripts/run_scrape.sh -------------------------------------------------------------------------------- /scripts/run_train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/scripts/run_train.sh -------------------------------------------------------------------------------- /tokens_wp_stats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SLAB-NLP/Akk/HEAD/tokens_wp_stats.py --------------------------------------------------------------------------------