├── EUD ├── README.md ├── back_conversion.py ├── conlleval.py ├── conlleval_perl.py ├── conllu-quick-fix.pl ├── convert_EUD_to_collapsed.py ├── data │ ├── cpos.ud │ ├── deprel.af │ ├── deprel.aii │ ├── deprel.akk │ ├── deprel.am │ ├── deprel.ar │ ├── deprel.be │ ├── deprel.bg │ ├── deprel.bm │ ├── deprel.br │ ├── deprel.bxr │ ├── deprel.ca │ ├── deprel.cop │ ├── deprel.cs │ ├── deprel.cu │ ├── deprel.cy │ ├── deprel.da │ ├── deprel.de │ ├── deprel.el │ ├── deprel.en │ ├── deprel.es │ ├── deprel.et │ ├── deprel.eu │ ├── deprel.fa │ ├── deprel.fi │ ├── deprel.fo │ ├── deprel.fr │ ├── deprel.fro │ ├── deprel.ga │ ├── deprel.gd │ ├── deprel.gl │ ├── deprel.got │ ├── deprel.grc │ ├── deprel.gsw │ ├── deprel.gun │ ├── deprel.he │ ├── deprel.hi │ ├── deprel.hr │ ├── deprel.hsb │ ├── deprel.hu │ ├── deprel.hy │ ├── deprel.id │ ├── deprel.is │ ├── deprel.it │ ├── deprel.ja │ ├── deprel.kk │ ├── deprel.kmr │ ├── deprel.ko │ ├── deprel.koi │ ├── deprel.kpv │ ├── deprel.krl │ ├── deprel.la │ ├── deprel.lt │ ├── deprel.lv │ ├── deprel.lzh │ ├── deprel.mdf │ ├── deprel.mr │ ├── deprel.mt │ ├── deprel.myv │ ├── deprel.nl │ ├── deprel.no │ ├── deprel.olo │ ├── deprel.orv │ ├── deprel.pcm │ ├── deprel.pl │ ├── deprel.pt │ ├── deprel.qhe │ ├── deprel.quz │ ├── deprel.ro │ ├── deprel.ru │ ├── deprel.sa │ ├── deprel.shopen │ ├── deprel.sk │ ├── deprel.sl │ ├── deprel.sme │ ├── deprel.sms │ ├── deprel.sq │ ├── deprel.sr │ ├── deprel.sv │ ├── deprel.swl │ ├── deprel.ta │ ├── deprel.te │ ├── deprel.th │ ├── deprel.tl │ ├── deprel.tr │ ├── deprel.ud │ ├── deprel.ug │ ├── deprel.uk │ ├── deprel.ur │ ├── deprel.vi │ ├── deprel.wbp │ ├── deprel.wo │ ├── deprel.yo │ ├── deprel.yue │ ├── deprel.zh │ ├── edeprel.ar │ ├── edeprel.bg │ ├── edeprel.cs │ ├── edeprel.en │ ├── edeprel.et │ ├── edeprel.fi │ ├── edeprel.fr │ ├── edeprel.it │ ├── edeprel.lt │ ├── edeprel.lv │ ├── edeprel.nl │ ├── edeprel.pl │ ├── edeprel.ru │ ├── edeprel.sk │ ├── edeprel.sv │ ├── edeprel.ta │ ├── edeprel.ud │ ├── edeprel.uk │ ├── feat_val.af │ ├── feat_val.akk │ ├── feat_val.am │ ├── feat_val.ar │ ├── feat_val.be │ ├── feat_val.bg │ ├── feat_val.bho │ ├── feat_val.bm │ ├── feat_val.br │ ├── feat_val.bxr │ ├── feat_val.ca │ ├── feat_val.cop │ ├── feat_val.cs │ ├── feat_val.cu │ ├── feat_val.cy │ ├── feat_val.da │ ├── feat_val.de │ ├── feat_val.el │ ├── feat_val.en │ ├── feat_val.es │ ├── feat_val.et │ ├── feat_val.eu │ ├── feat_val.fa │ ├── feat_val.fi │ ├── feat_val.fo │ ├── feat_val.fr │ ├── feat_val.fro │ ├── feat_val.ga │ ├── feat_val.gd │ ├── feat_val.gl │ ├── feat_val.got │ ├── feat_val.grc │ ├── feat_val.gun │ ├── feat_val.he │ ├── feat_val.hi │ ├── feat_val.hr │ ├── feat_val.hsb │ ├── feat_val.hu │ ├── feat_val.hy │ ├── feat_val.id │ ├── feat_val.is │ ├── feat_val.it │ ├── feat_val.ja │ ├── feat_val.kk │ ├── feat_val.kmr │ ├── feat_val.ko │ ├── feat_val.koi │ ├── feat_val.kpv │ ├── feat_val.krl │ ├── feat_val.la │ ├── feat_val.lt │ ├── feat_val.lv │ ├── feat_val.lzh │ ├── feat_val.mdf │ ├── feat_val.mr │ ├── feat_val.mt │ ├── feat_val.myv │ ├── feat_val.nl │ ├── feat_val.no │ ├── feat_val.olo │ ├── feat_val.orv │ ├── feat_val.pl │ ├── feat_val.pt │ ├── feat_val.quz │ ├── feat_val.ro │ ├── feat_val.ru │ ├── feat_val.sa │ ├── feat_val.sd │ ├── feat_val.shopen │ ├── feat_val.sk │ ├── feat_val.sl │ ├── feat_val.sme │ ├── feat_val.sms │ ├── feat_val.sq │ ├── feat_val.sr │ ├── feat_val.sv │ ├── feat_val.swl │ ├── feat_val.ta │ ├── feat_val.th │ ├── feat_val.tr │ ├── feat_val.ud │ ├── feat_val.ug │ ├── feat_val.uk │ ├── feat_val.ur │ ├── feat_val.vi │ ├── feat_val.wbp │ ├── feat_val.wo │ ├── feat_val.yo │ ├── feat_val.yue │ ├── feat_val.zh │ ├── feats.ud │ ├── tokens_w_space.am │ ├── tokens_w_space.br │ ├── tokens_w_space.fi │ ├── tokens_w_space.fr │ ├── tokens_w_space.fro │ ├── tokens_w_space.gun │ ├── tokens_w_space.kk │ ├── tokens_w_space.kmr │ ├── tokens_w_space.lt │ ├── tokens_w_space.lv │ ├── tokens_w_space.myv │ ├── tokens_w_space.orv │ ├── tokens_w_space.pl │ ├── tokens_w_space.shopen │ ├── tokens_w_space.sms │ ├── tokens_w_space.sv │ ├── tokens_w_space.ud │ └── tokens_w_space.vi ├── enhanced_collapse_empty_nodes.pl ├── iwpt20_xud_eval.py ├── read_dataset.py └── validate.py ├── LICENSE ├── README.md ├── algorithms ├── __init__.py ├── dict_merge.py └── maximum_spanning_tree.py ├── config ├── README.md ├── multi_bert_1000epoch_0.5inter_3000batch_0.002lr_400hidden_multilingual_nocrf_fast_nodev_dependency0.yaml ├── multi_bert_10epoch_0.5inter_3000batch_0.00005lr_20lrrate_multilingual_nocrf_fast_warmup_freezing_beta_weightdecay_finetune_nodev_dependency15.yaml ├── multi_bert_10epoch_10anneal_2000batch_0.00005lr_10000lrrate_5decay_800hidden_multilingual_crf_sentloss_distill_posterior_4temperature_fast_finetune_relearn_nodev_ner1.yaml ├── multi_bert_10epoch_2000batch_0.00005lr_10000lrrate_5decay_800hidden_multilingual_crf_sentloss_baseline_fast_finetune_relearn_nodev_ner0.yaml ├── multi_bert_10epoch_2000batch_0.00005lr_multilingual_nocrf_sentloss_baseline_fast_finetune_relearn_nodev_ner0.yaml ├── multi_bert_300epoch_0.5anneal_2000batch_0.1lr_600hidden_multilingual_crf_sentloss_10patience_distill_fast_1best_old_relearn_nodev_fast_new_ner0.yaml ├── multi_bert_300epoch_0.5anneal_2000batch_0.1lr_600hidden_multilingual_crf_sentloss_10patience_distill_fast_crfatt_old_relearn_nodev_fast_new_ner0.yaml ├── multi_bert_300epoch_0.5anneal_2000batch_0.1lr_600hidden_multilingual_crf_sentloss_10patience_distill_fast_crfatt_posterior_4temperature_both_old_relearn_nodev_fast_new_ner1.yaml ├── multi_bert_300epoch_0.5anneal_2000batch_0.1lr_600hidden_multilingual_crf_sentloss_10patience_distill_fast_posterior_2.25temperature_old_relearn_nodev_fast_new_ner0.yaml ├── multi_bert_300epoch_0.5anneal_2000batch_0.1lr_600hidden_multilingual_crf_sentloss_10patience_distill_fast_posterior_4temperature_old_relearn_nodev_fast_new_ner0.yaml ├── multi_bert_flair_2000batch_1lr_de_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner1.yaml ├── multi_bert_flair_2000batch_1lr_en_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner1.yaml ├── multi_bert_flair_2000batch_1lr_es_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner0.yaml ├── multi_bert_flair_2000batch_1lr_nl_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner1.yaml ├── multi_bert_origflair_300epoch_2000batch_0.1lr_256hidden_de_monolingual_crf_sentloss_10patience_baseline_nodev_ner0.yaml ├── multi_bert_origflair_300epoch_2000batch_0.1lr_256hidden_en_monolingual_crf_sentloss_10patience_baseline_nodev_ner0.yaml ├── multi_bert_origflair_300epoch_2000batch_0.1lr_256hidden_es_monolingual_crf_sentloss_10patience_baseline_nodev_ner1.yaml ├── multi_bert_origflair_300epoch_2000batch_0.1lr_256hidden_nl_monolingual_crf_sentloss_10patience_baseline_nodev_ner1.yaml ├── multi_bert_origflair_300epoch_2000batch_1lr_256hidden_de_monolingual_crf_sentloss_10patience_baseline_fast_nodev_ner12.yaml ├── multi_bert_origflair_300epoch_2000batch_1lr_256hidden_en_monolingual_crf_sentloss_10patience_baseline_fast_nodev_ner11.yaml ├── multi_bert_origflair_300epoch_2000batch_1lr_256hidden_es_monolingual_crf_sentloss_10patience_baseline_fast_nodev_ner12.yaml ├── multi_bert_origflair_300epoch_2000batch_1lr_256hidden_nl_monolingual_crf_sentloss_10patience_baseline_fast_nodev_ner11.yaml ├── multi_bert_word_origflair_1000epoch_0.5inter_3000batch_0.002lr_400hidden_de_monolingual_nocrf_fast_nodev_dependency0.yaml ├── multi_bert_word_origflair_1000epoch_0.5inter_3000batch_0.002lr_400hidden_en_monolingual_nocrf_fast_nodev_dependency1.yaml ├── multi_bert_word_origflair_1000epoch_0.5inter_3000batch_0.002lr_400hidden_es_monolingual_nocrf_fast_nodev_dependency1.yaml ├── multi_bert_word_origflair_1000epoch_0.5inter_3000batch_0.002lr_400hidden_fa_monolingual_nocrf_fast_nodev_dependency0.yaml ├── multi_bert_word_origflair_1000epoch_0.5inter_3000batch_0.002lr_400hidden_fr_monolingual_nocrf_fast_nodev_dependency0.yaml ├── multi_bert_word_origflair_1000epoch_0.5inter_3000batch_0.002lr_400hidden_he_monolingual_nocrf_fast_nodev_dependency1.yaml ├── multi_bert_word_origflair_1000epoch_0.5inter_3000batch_0.002lr_400hidden_id_monolingual_nocrf_fast_nodev_dependency1.yaml ├── multi_bert_word_origflair_1000epoch_0.5inter_3000batch_0.002lr_400hidden_it_monolingual_nocrf_fast_nodev_dependency0.yaml ├── multi_bert_word_origflair_1000epoch_0.5inter_3000batch_0.002lr_400hidden_ja_monolingual_nocrf_fast_nodev_dependency1.yaml ├── multi_bert_word_origflair_1000epoch_0.5inter_3000batch_0.002lr_400hidden_nl_monolingual_nocrf_fast_nodev_dependency1.yaml ├── multi_bert_word_origflair_1000epoch_0.5inter_3000batch_0.002lr_400hidden_sl_monolingual_nocrf_fast_nodev_dependency1.yaml ├── multi_bert_word_origflair_1000epoch_0.5inter_3000batch_0.002lr_400hidden_sv_monolingual_nocrf_fast_nodev_dependency0.yaml ├── multi_bert_word_origflair_1000epoch_0.5inter_3000batch_0.002lr_400hidden_ta_monolingual_nocrf_fast_nodev_dependency1.yaml ├── test_de.yaml ├── xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_ar_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml ├── xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_bg_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml ├── xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_cs_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml ├── xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_en_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml ├── xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_et_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml ├── xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_fi_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml ├── xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_fr_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml ├── xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_it_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml ├── xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_lt_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml ├── xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_lv_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml ├── xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_nl_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml ├── xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_pl_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml ├── xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_ru_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml ├── xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_sk_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml ├── xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_sv_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml ├── xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_uk_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml └── xlmr_word_origflair_1000epoch_0.1inter_2000batch_0.002lr_400hidden_ta_monolingual_nocrf_fast_2nd_unrel_250upsample_nodev_enhancedud27.yaml ├── flair ├── CRF.py ├── __init__.py ├── __init__.pyc ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── __init__.cpython-37.pyc │ ├── config_parser.cpython-36.pyc │ ├── config_parser.cpython-37.pyc │ ├── corpus_mapping.cpython-36.pyc │ ├── corpus_mapping.cpython-37.pyc │ ├── custom_data_loader.cpython-36.pyc │ ├── custom_data_loader.cpython-37.pyc │ ├── data.cpython-36.pyc │ ├── data.cpython-37.pyc │ ├── datasets.cpython-36.pyc │ ├── datasets.cpython-37.pyc │ ├── embeddings.cpython-36.pyc │ ├── embeddings.cpython-37.pyc │ ├── file_utils.cpython-36.pyc │ ├── file_utils.cpython-37.pyc │ ├── linear_functions.cpython-36.pyc │ ├── linear_functions.cpython-37.pyc │ ├── list_data.cpython-36.pyc │ ├── list_data.cpython-37.pyc │ ├── nn.cpython-36.pyc │ ├── nn.cpython-37.pyc │ ├── optim.cpython-36.pyc │ ├── optim.cpython-37.pyc │ ├── training_utils.cpython-36.pyc │ ├── training_utils.cpython-37.pyc │ ├── variational_inference.cpython-36.pyc │ └── variational_inference.cpython-37.pyc ├── algorithms │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── dict_merge.cpython-36.pyc │ │ ├── dict_merge.cpython-37.pyc │ │ └── maximum_spanning_tree.cpython-36.pyc │ ├── dict_merge.py │ └── maximum_spanning_tree.py ├── config_parser.py ├── corpus_mapping.py ├── custom_data_loader.py ├── data.py ├── data_fetcher.py ├── datasets.py ├── embeddings.py ├── file_utils.py ├── hyperparameter │ ├── __init__.py │ ├── param_selection.py │ └── parameter.py ├── linear_functions.py ├── list_data.py ├── models │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── biaffine_attention.cpython-36.pyc │ │ ├── biaffine_attention.cpython-37.pyc │ │ ├── dependency_model.cpython-36.pyc │ │ ├── dependency_model.cpython-37.pyc │ │ ├── language_model.cpython-36.pyc │ │ ├── language_model.cpython-37.pyc │ │ ├── mst_decoder.cpython-36.pyc │ │ ├── mst_decoder.cpython-37.pyc │ │ ├── sequence_tagger_model.cpython-36.pyc │ │ ├── sequence_tagger_model.cpython-37.pyc │ │ ├── text_classification_model.cpython-36.pyc │ │ └── text_classification_model.cpython-37.pyc │ ├── biaffine_attention.py │ ├── dependency_model.py │ ├── language_model.py │ ├── mst_decoder.py │ ├── sequence_tagger_model.py │ ├── text_classification_model.py │ └── text_regression_model.py ├── nn.py ├── optim.py ├── parser │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── model.cpython-36.pyc │ │ └── model.cpython-37.pyc │ ├── cmds │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── cmd.cpython-36.pyc │ │ │ ├── cmd.cpython-37.pyc │ │ │ ├── evaluate.cpython-36.pyc │ │ │ ├── evaluate.cpython-37.pyc │ │ │ ├── predict.cpython-37.pyc │ │ │ └── train.cpython-37.pyc │ │ ├── cmd.py │ │ ├── evaluate.py │ │ ├── predict.py │ │ └── train.py │ ├── config.py │ ├── model.py │ ├── modules │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── bert.cpython-36.pyc │ │ │ ├── bert.cpython-37.pyc │ │ │ ├── biaffine.cpython-36.pyc │ │ │ ├── biaffine.cpython-37.pyc │ │ │ ├── bilstm.cpython-36.pyc │ │ │ ├── bilstm.cpython-37.pyc │ │ │ ├── char_lstm.cpython-36.pyc │ │ │ ├── char_lstm.cpython-37.pyc │ │ │ ├── dropout.cpython-36.pyc │ │ │ ├── dropout.cpython-37.pyc │ │ │ ├── mlp.cpython-36.pyc │ │ │ ├── mlp.cpython-37.pyc │ │ │ ├── scalar_mix.cpython-36.pyc │ │ │ ├── scalar_mix.cpython-37.pyc │ │ │ ├── trilinear_attention.cpython-36.pyc │ │ │ └── trilinear_attention.cpython-37.pyc │ │ ├── bert.py │ │ ├── biaffine.py │ │ ├── bilstm.py │ │ ├── char_lstm.py │ │ ├── dropout.py │ │ ├── mlp.py │ │ ├── scalar_mix.py │ │ └── trilinear_attention.py │ └── utils │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── alg.cpython-36.pyc │ │ ├── alg.cpython-37.pyc │ │ ├── common.cpython-36.pyc │ │ ├── common.cpython-37.pyc │ │ ├── corpus.cpython-36.pyc │ │ ├── corpus.cpython-37.pyc │ │ ├── data.cpython-36.pyc │ │ ├── data.cpython-37.pyc │ │ ├── embedding.cpython-36.pyc │ │ ├── embedding.cpython-37.pyc │ │ ├── field.cpython-36.pyc │ │ ├── field.cpython-37.pyc │ │ ├── fn.cpython-36.pyc │ │ ├── fn.cpython-37.pyc │ │ ├── metric.cpython-36.pyc │ │ ├── metric.cpython-37.pyc │ │ ├── vocab.cpython-36.pyc │ │ └── vocab.cpython-37.pyc │ │ ├── alg.py │ │ ├── common.py │ │ ├── corpus.py │ │ ├── data.py │ │ ├── embedding.py │ │ ├── field.py │ │ ├── fn.py │ │ ├── metric.py │ │ └── vocab.py ├── samplers.py ├── trainers │ ├── .finetune_trainer.py.swp │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── distillation_trainer.cpython-36.pyc │ │ ├── distillation_trainer.cpython-37.pyc │ │ ├── finetune_trainer.cpython-36.pyc │ │ ├── finetune_trainer.cpython-37.pyc │ │ ├── trainer.cpython-36.pyc │ │ └── trainer.cpython-37.pyc │ ├── distillation_trainer.py │ ├── finetune_trainer.py │ ├── language_model_trainer.py │ └── trainer.py ├── training_utils.py ├── utils │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── archival.cpython-36.pyc │ │ ├── checks.cpython-36.pyc │ │ ├── checks.cpython-37.pyc │ │ ├── environment.cpython-36.pyc │ │ ├── exception_hook.cpython-36.pyc │ │ ├── exception_hook.cpython-37.pyc │ │ ├── file.cpython-36.pyc │ │ ├── from_params.cpython-36.pyc │ │ ├── from_params.cpython-37.pyc │ │ ├── logging.cpython-36.pyc │ │ ├── logging.cpython-37.pyc │ │ ├── nn.cpython-36.pyc │ │ ├── params.cpython-36.pyc │ │ ├── params.cpython-37.pyc │ │ ├── registrable.cpython-36.pyc │ │ ├── string.cpython-36.pyc │ │ ├── time.cpython-36.pyc │ │ └── tqdm.cpython-36.pyc │ ├── archival.py │ ├── checks.py │ ├── environment.py │ ├── exception_hook.py │ ├── extract_tokens_from_amr.py │ ├── file.py │ ├── from_params.py │ ├── logging.py │ ├── nn.py │ ├── params.py │ ├── registrable.py │ ├── string.py │ ├── time.py │ └── tqdm.py └── visual │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── __init__.cpython-37.pyc │ ├── activations.cpython-36.pyc │ ├── activations.cpython-37.pyc │ ├── manifold.cpython-36.pyc │ ├── manifold.cpython-37.pyc │ ├── training_curves.cpython-36.pyc │ └── training_curves.cpython-37.pyc │ ├── activations.py │ ├── html_templates.py │ ├── manifold.py │ ├── ner_html.py │ └── training_curves.py ├── requirements.txt ├── resources ├── docs │ ├── EXPERIMENTS.md │ ├── TUTORIAL_1_BASICS.md │ ├── TUTORIAL_2_TAGGING.md │ ├── TUTORIAL_3_WORD_EMBEDDING.md │ ├── TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md │ ├── TUTORIAL_5_DOCUMENT_EMBEDDINGS.md │ ├── TUTORIAL_6_CORPUS.md │ ├── TUTORIAL_7_TRAINING_A_MODEL.md │ ├── TUTORIAL_8_MODEL_OPTIMIZATION.md │ ├── TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md │ ├── embeddings │ │ ├── BYTE_PAIR_EMBEDDINGS.md │ │ ├── CHARACTER_EMBEDDINGS.md │ │ ├── CLASSIC_WORD_EMBEDDINGS.md │ │ ├── ELMO_EMBEDDINGS.md │ │ ├── FASTTEXT_EMBEDDINGS.md │ │ ├── FLAIR_EMBEDDINGS.md │ │ └── TRANSFOMER_EMBEDDINGS.md │ └── flair_logo.svg └── taggers │ ├── ast_tags_new.pkl │ ├── dependency_projective_tags.pkl │ ├── enhancedud_tags.pkl │ ├── ner_tags.pkl │ ├── np_tags.pkl │ ├── pos_tags.pkl │ ├── ptb_tags.pkl │ └── ud_dependency_tags.pkl ├── train_with_teacher.py └── utils ├── __init__.py ├── archival.py ├── checks.py ├── environment.py ├── exception_hook.py ├── extract_tokens_from_amr.py ├── file.py ├── from_params.py ├── logging.py ├── nn.py ├── params.py ├── registrable.py ├── string.py ├── time.py └── tqdm.py /EUD/README.md: -------------------------------------------------------------------------------- 1 | # Enhanced Universal Dependency Parsing 2 | 3 | [Data](https://universaldependencies.org/iwpt20/data.html) 4 | In our settings, we concat all treebanks for each language 5 | 6 | ## Preprocessing 7 | 8 | Modify `tar_dir` and `target` in `convert_EUD_to_collapsed.py`, then: 9 | 10 | ``` 11 | python convert_EUD_to_collapsed.py 12 | ``` 13 | 14 | ## Postprocessing 15 | 16 | move all output `.conll` file in to a certain directory and modify `tar_dir` in `back_conversion.py`, then: 17 | 18 | ``` 19 | python back_conversion.py 20 | ``` 21 | 22 | If you just want to evaluate the results offline without any other validation constraints, run: 23 | 24 | ``` 25 | python iwpt20_xud_eval.py $gold_file $system_file 26 | ``` 27 | 28 | If you want to use the official evaluation pipeline, run: 29 | ``` 30 | perl conllu-quick-fix.pl $system_file > $output_file 31 | python validate.py --level 2 --lang $language $output_file 32 | ``` 33 | The `validate.py` may warn very few non-connected graphs in the file (usually for the low-resource Tamil outputs), I fixed this issue manually :). Then you can reformat the output files according to the [submission rules](https://universaldependencies.org/iwpt20/submission.html) and submit your results to the [submission site](https://quest.ms.mff.cuni.cz/sharedtask/) 34 | 35 | -------------------------------------------------------------------------------- /EUD/convert_EUD_to_collapsed.py: -------------------------------------------------------------------------------- 1 | import pdb 2 | import os 3 | import subprocess 4 | tar_dir='sub10' 5 | files=os.listdir(tar_dir) 6 | # write=False 7 | # # target='test' 8 | target='conllu' 9 | 10 | for file in files: 11 | # pdb.set_trace() 12 | filedir=os.path.join(tar_dir,file) 13 | # if not os.path.isdir(filedir): 14 | # continue 15 | # if 'UD_' not in file: 16 | # continue 17 | # print(file,end=' ') 18 | print(file) 19 | filename=filedir 20 | if target in filename and 'conll' in filename: 21 | # print(filename) 22 | # count_projective(os.path.join(filedir,filename)) 23 | # count_additional(os.path.join(filedir,filename)) 24 | # pdb.set_trace() 25 | names=filename.split('.') 26 | names[-2]=names[-2]+'_collapsed' 27 | # pdb.set_trace() 28 | with open('.'.join(names),'w') as outfile: 29 | subprocess.run(['perl','tools/enhanced_collapse_empty_nodes.pl',filename],stdout=outfile) 30 | continue 31 | 32 | # pdb.set_trace() 33 | for filename in os.listdir(filedir): 34 | 35 | # if target not in filename:# or '_modified' not in filename: 36 | # continue 37 | # pdb.set_trace() 38 | if target in filename and 'conll' in filename: 39 | # print(filename) 40 | # count_projective(os.path.join(filedir,filename)) 41 | # count_additional(os.path.join(filedir,filename)) 42 | # pdb.set_trace() 43 | names=filename.split('.') 44 | names[-2]=names[-2]+'_collapsed' 45 | 46 | with open(os.path.join(filedir,'.'.join(names)),'w') as outfile: 47 | subprocess.run(['perl','tools/enhanced_collapse_empty_nodes.pl',os.path.join(filedir,filename)],stdout=outfile) 48 | # break 49 | # sets=os.listdir() 50 | # pdb.set_trace() 51 | # for dataset in sets: 52 | # if '_modified' not in dataset: 53 | # continue 54 | # preprocessing(os.path.join(tar_dir,file,dataset)) 55 | # pdb.set_trace() 56 | -------------------------------------------------------------------------------- /EUD/data/cpos.ud: -------------------------------------------------------------------------------- 1 | ADJ 2 | ADP 3 | ADV 4 | AUX 5 | CCONJ 6 | DET 7 | INTJ 8 | NOUN 9 | NUM 10 | PART 11 | PRON 12 | PROPN 13 | PUNCT 14 | SCONJ 15 | SYM 16 | VERB 17 | X 18 | -------------------------------------------------------------------------------- /EUD/data/deprel.af: -------------------------------------------------------------------------------- 1 | aux:pass 2 | csubj:pass 3 | compound:prt 4 | nsubj:pass 5 | -------------------------------------------------------------------------------- /EUD/data/deprel.aii: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | aux:poss 3 | nmod:poss 4 | -------------------------------------------------------------------------------- /EUD/data/deprel.akk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/EUD/data/deprel.akk -------------------------------------------------------------------------------- /EUD/data/deprel.am: -------------------------------------------------------------------------------- 1 | compound:svc 2 | csubj:pass 3 | nsubj:pass 4 | -------------------------------------------------------------------------------- /EUD/data/deprel.ar: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | advmod:emph 3 | aux:pass 4 | cc:preconj 5 | compound:prt 6 | csubj:pass 7 | det:predet 8 | flat:foreign 9 | flat:name 10 | nmod:gmod 11 | nmod:poss 12 | nsubj:pass 13 | obl:arg 14 | obl:tmod 15 | -------------------------------------------------------------------------------- /EUD/data/deprel.be: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | advmod:discourse 3 | aux:pass 4 | flat:foreign 5 | flat:name 6 | nsubj:pass 7 | nummod:gov 8 | nummod:entity 9 | obl:agent 10 | -------------------------------------------------------------------------------- /EUD/data/deprel.bg: -------------------------------------------------------------------------------- 1 | acl 2 | acl:relcl 3 | advcl 4 | advmod 5 | amod 6 | appos 7 | aux 8 | aux:pass 9 | case 10 | cc 11 | ccomp 12 | compound 13 | conj 14 | cop 15 | csubj 16 | csubj:pass 17 | det 18 | discourse 19 | expl 20 | fixed 21 | flat 22 | goeswith 23 | iobj 24 | mark 25 | nmod 26 | nsubj 27 | nsubj:pass 28 | nummod 29 | obj 30 | obl 31 | orphan 32 | parataxis 33 | punct 34 | root 35 | vocative 36 | xcomp 37 | -------------------------------------------------------------------------------- /EUD/data/deprel.bm: -------------------------------------------------------------------------------- 1 | det:rel 2 | parataxis:obj 3 | compound:redup 4 | nmod:poss 5 | -------------------------------------------------------------------------------- /EUD/data/deprel.br: -------------------------------------------------------------------------------- 1 | acl:focus 2 | acl:relcl 3 | advmod:neg 4 | aux:pass 5 | fixed:name 6 | flat:name 7 | nmod:gen 8 | nmod:poss 9 | nsubj:appos 10 | nsubj:cop 11 | obl:agent 12 | obl:x 13 | -------------------------------------------------------------------------------- /EUD/data/deprel.bxr: -------------------------------------------------------------------------------- 1 | advmod:neg 2 | aux:pass 3 | flat:foreign 4 | nmod:own 5 | nsubj:pass 6 | -------------------------------------------------------------------------------- /EUD/data/deprel.ca: -------------------------------------------------------------------------------- 1 | advmod:emph 2 | aux:pass 3 | csubj:pass 4 | expl:pass 5 | nsubj:pass 6 | obl:arg 7 | -------------------------------------------------------------------------------- /EUD/data/deprel.cop: -------------------------------------------------------------------------------- 1 | obl:npmod -------------------------------------------------------------------------------- /EUD/data/deprel.cs: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | advmod:emph 3 | aux:pass 4 | csubj:pass 5 | det:numgov 6 | det:nummod 7 | expl:pass 8 | expl:pv 9 | flat:foreign 10 | nsubj:pass 11 | nummod:gov 12 | obl:agent 13 | obl:arg 14 | -------------------------------------------------------------------------------- /EUD/data/deprel.cu: -------------------------------------------------------------------------------- 1 | advmod:emph 2 | expl:pass 3 | obj:dir 4 | aux:pass 5 | nsubj:pass 6 | csubj:pass 7 | obl:agent 8 | flat:name 9 | flat:foreign -------------------------------------------------------------------------------- /EUD/data/deprel.cy: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | flat:name 3 | nmod:poss 4 | nmod:obl 5 | nmod:agent 6 | nmod:nsubj 7 | nmod:obj 8 | nmod:ccomp 9 | case:pred 10 | aux:pass 11 | obl:agent 12 | nsubj:pass 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /EUD/data/deprel.da: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | compound:prt 3 | nmod:poss 4 | obl:loc 5 | obl:tmod -------------------------------------------------------------------------------- /EUD/data/deprel.de: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | aux:pass 3 | cc:preconj 4 | compound:prt 5 | csubj:pass 6 | det:poss 7 | det:predet 8 | expl:pv 9 | flat:foreign 10 | flat:name 11 | nmod:gmod 12 | nmod:poss 13 | nsubj:pass 14 | obl:agent 15 | obl:arg 16 | obl:tmod 17 | -------------------------------------------------------------------------------- /EUD/data/deprel.el: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | obl:arg 3 | nsubj:pass 4 | csubj:pass 5 | obl:agent 6 | -------------------------------------------------------------------------------- /EUD/data/deprel.en: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | cc:preconj 3 | compound:prt 4 | det:predet 5 | nmod:npmod 6 | obl:npmod 7 | nmod:poss 8 | nmod:tmod 9 | obl:tmod 10 | nsubj:pass 11 | aux:pass 12 | csubj:pass 13 | flat:foreign 14 | -------------------------------------------------------------------------------- /EUD/data/deprel.es: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | aux:pass 3 | cc:preconj 4 | compound:prt 5 | csubj:pass 6 | det:predet 7 | expl:pass 8 | flat:name 9 | nmod:poss 10 | nmod:tmod 11 | nsubj:pass 12 | obl:tmod 13 | -------------------------------------------------------------------------------- /EUD/data/deprel.et: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | advmod:quant 3 | cc:preconj 4 | compound:prt 5 | csubj:cop 6 | flat:foreign 7 | nmod:poss 8 | nsubj:cop 9 | -------------------------------------------------------------------------------- /EUD/data/deprel.eu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/EUD/data/deprel.eu -------------------------------------------------------------------------------- /EUD/data/deprel.fa: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | aux:pass 3 | cc:preconj 4 | compound:lvc 5 | compound:prt 6 | csubj:pass 7 | det:predet 8 | flat:foreign 9 | nmod:poss 10 | nsubj:nc 11 | nsubj:pass 12 | 13 | -------------------------------------------------------------------------------- /EUD/data/deprel.fi: -------------------------------------------------------------------------------- 1 | aux:pass 2 | acl:relcl 3 | cc:preconj 4 | compound:nn 5 | compound:prt 6 | csubj:cop 7 | nmod:gobj 8 | nmod:gsubj 9 | nmod:poss 10 | nsubj:cop 11 | xcomp:ds 12 | flat:name 13 | flat:foreign 14 | cop:own 15 | -------------------------------------------------------------------------------- /EUD/data/deprel.fo: -------------------------------------------------------------------------------- 1 | cc:preconj 2 | nsubj:pass 3 | csubj:pass 4 | nmod:poss 5 | aux:pass 6 | acl:cleft 7 | acl:relcl 8 | -------------------------------------------------------------------------------- /EUD/data/deprel.fr: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | advcl:cleft 3 | advcl:periph 4 | advmod:periph 5 | appos:conj 6 | appos:nmod 7 | aux:caus 8 | aux:pass 9 | aux:tense 10 | ccomp:cleft 11 | conj:coord 12 | conj:dicto 13 | csubj:pass 14 | dep:iobj 15 | dep:obj 16 | det:predet 17 | expl:pass 18 | expl:subj 19 | expl:comp 20 | flat:foreign 21 | flat:name 22 | iobj:agent 23 | nmod:appos 24 | nsubj:caus 25 | nsubj:expl 26 | nsubj:pass 27 | obj:agent 28 | obj:lvc 29 | obl:agent 30 | obl:arg 31 | obl:comp 32 | obl:mod 33 | obl:periph 34 | parataxis:discourse 35 | parataxis:insert 36 | parataxis:obj 37 | parataxis:parenth 38 | xcomp:lvc 39 | -------------------------------------------------------------------------------- /EUD/data/deprel.fro: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | advmod:obl 3 | aux:pass 4 | case:det 5 | cc:nc 6 | mark:advmod 7 | mark:obj 8 | mark:obl 9 | nsubj:advmod 10 | nsubj:obj 11 | obj:advmod 12 | obj:advneg 13 | obj:obl 14 | obl:advmod 15 | -------------------------------------------------------------------------------- /EUD/data/deprel.ga: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | case:voc 3 | compound:prt 4 | csubj:cleft 5 | csubj:cop 6 | flat:foreign 7 | flat:name 8 | mark:prt 9 | nmod:poss 10 | obl:prep 11 | obl:tmod 12 | xcomp:pred 13 | -------------------------------------------------------------------------------- /EUD/data/deprel.gd: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | case:voc 3 | compound:prt 4 | csubj:cleft 5 | csubj:cop 6 | flat:foreign 7 | flat:name 8 | mark:prt 9 | nmod:poss 10 | obl:prep 11 | obl:smod 12 | obl:tmod 13 | xcomp:pred 14 | -------------------------------------------------------------------------------- /EUD/data/deprel.gl: -------------------------------------------------------------------------------- 1 | flat:name 2 | aux:pass 3 | nsubj:pass 4 | flat:foreign 5 | -------------------------------------------------------------------------------- /EUD/data/deprel.got: -------------------------------------------------------------------------------- 1 | advmod:emph 2 | expl:pass 3 | obj:dir 4 | aux:pass 5 | nsubj:pass 6 | csubj:pass 7 | obl:agent 8 | flat:name 9 | flat:foreign -------------------------------------------------------------------------------- /EUD/data/deprel.grc: -------------------------------------------------------------------------------- 1 | advmod:emph 2 | nsubj:pass 3 | csubj:pass 4 | obl:agent 5 | flat:name 6 | flat:foreign 7 | aux:pass -------------------------------------------------------------------------------- /EUD/data/deprel.gsw: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | aux:pass 3 | compound:prt 4 | csubj:pass 5 | det:poss 6 | expl:pv 7 | flat:foreign 8 | flat:name 9 | nmod:poss 10 | nsubj:pass 11 | obl:agent 12 | obl:arg 13 | -------------------------------------------------------------------------------- /EUD/data/deprel.gun: -------------------------------------------------------------------------------- 1 | advmod:sentcon 2 | compound:svc 3 | dep:mod 4 | discourse:q 5 | dislocated:cleft 6 | obl:sentcon 7 | parataxis:rep 8 | -------------------------------------------------------------------------------- /EUD/data/deprel.he: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | case:acc 3 | case:gen 4 | compound:affix 5 | compound:smixut 6 | det:def 7 | flat:name 8 | mark:q 9 | nmod:poss 10 | nsubj:cop 11 | -------------------------------------------------------------------------------- /EUD/data/deprel.hi: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | aux:pass 3 | compound:conjv 4 | compound:redup 5 | csubj:pass 6 | det:predet 7 | flat:name 8 | nmod:poss 9 | nsubj:pass 10 | obl:agent 11 | obl:tmod 12 | -------------------------------------------------------------------------------- /EUD/data/deprel.hr: -------------------------------------------------------------------------------- 1 | advmod:emph 2 | aux:pass 3 | csubj:pass 4 | expl:pv 5 | flat:foreign 6 | nsubj:pass 7 | 8 | -------------------------------------------------------------------------------- /EUD/data/deprel.hsb: -------------------------------------------------------------------------------- 1 | advmod:emph 2 | aux:pass 3 | csubj:pass 4 | dep:alt 5 | det:numgov 6 | det:nummod 7 | expl:pass 8 | expl:pv 9 | flat:foreign 10 | nsubj:pass 11 | nummod:gov 12 | -------------------------------------------------------------------------------- /EUD/data/deprel.hu: -------------------------------------------------------------------------------- 1 | advmod:locy 2 | advmod:mode 3 | advmod:obl 4 | advmod:que 5 | advmod:tfrom 6 | advmod:tlocy 7 | advmod:to 8 | advmod:tto 9 | amod:att 10 | amod:attlvc 11 | amod:mode 12 | amod:obl 13 | ccomp:obj 14 | ccomp:obl 15 | ccomp:pred 16 | compound:preverb 17 | nmod:att 18 | nmod:attlvc 19 | nmod:obl 20 | nmod:obllvc 21 | nsubj:lvc 22 | obj:lvc 23 | flat:name 24 | -------------------------------------------------------------------------------- /EUD/data/deprel.hy: -------------------------------------------------------------------------------- 1 | nsubj:caus 2 | nsubj:pass 3 | iobj:agent 4 | csubj:pass 5 | obl:agent 6 | advmod:emph 7 | aux:caus 8 | aux:ex 9 | nmod:npmod 10 | nmod:poss 11 | acl:relcl 12 | det:poss 13 | case:loc 14 | compound:lvc 15 | compound:redup 16 | compound:svc 17 | -------------------------------------------------------------------------------- /EUD/data/deprel.id: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | cc:preconj 3 | compound:a 4 | compound:n 5 | compound:plur 6 | compound:v 7 | csubj:pass 8 | dep:prt 9 | flat:name 10 | nmod:poss 11 | nsubj:pass 12 | obl:poss 13 | obl:tmod 14 | -------------------------------------------------------------------------------- /EUD/data/deprel.is: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | aux:pass 3 | compound:prt 4 | csubj:pass 5 | flat:name 6 | nmod:poss 7 | nsubj:pass 8 | obl:arg 9 | -------------------------------------------------------------------------------- /EUD/data/deprel.it: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | aux:pass 3 | cc:preconj 4 | csubj:pass 5 | det:poss 6 | det:predet 7 | discourse:emo 8 | expl:impers 9 | expl:pass 10 | flat:foreign 11 | flat:name 12 | nmod:poss 13 | nsubj:pass 14 | obl:agent 15 | obl:tmod 16 | parataxis:appos 17 | parataxis:discourse 18 | parataxis:hashtag 19 | parataxis:insert 20 | parataxis:nsubj 21 | parataxis:obj 22 | vocative:mention 23 | -------------------------------------------------------------------------------- /EUD/data/deprel.ja: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | aux:pass 3 | compound:prt 4 | compound:v 5 | flat:name 6 | nsubj:pass 7 | obl:tmod 8 | -------------------------------------------------------------------------------- /EUD/data/deprel.kk: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | obl:own 3 | nmod:poss 4 | iobj:caus 5 | flat:name 6 | compound:lvc 7 | acl:poss 8 | -------------------------------------------------------------------------------- /EUD/data/deprel.kmr: -------------------------------------------------------------------------------- 1 | nmod:poss 2 | case:circ 3 | nmod:dat 4 | compound:lvc 5 | advmod:neg 6 | compound:nn 7 | compound:redup 8 | -------------------------------------------------------------------------------- /EUD/data/deprel.ko: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | compound:lvc 3 | csubj:pass 4 | dep:prt 5 | det:poss 6 | flat:name 7 | nmod:poss 8 | nsubj:pass 9 | obl:tmod 10 | -------------------------------------------------------------------------------- /EUD/data/deprel.koi: -------------------------------------------------------------------------------- 1 | advmod:lmod 2 | advmod:tmod 3 | aux:cnd 4 | aux:neg 5 | cc:preconj 6 | flat:name 7 | nmod:poss 8 | nsubj:cop 9 | obl:agent 10 | obl:lmod 11 | obl:tmod 12 | -------------------------------------------------------------------------------- /EUD/data/deprel.kpv: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | advmod:lmod 3 | advmod:tmod 4 | aux:neg 5 | flat:name 6 | obl:lmod 7 | obl:tmod 8 | -------------------------------------------------------------------------------- /EUD/data/deprel.krl: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | aux:pass 3 | compound:prt 4 | cop:own 5 | csubj:pass 6 | flat:name 7 | nmod:gsubj 8 | nmod:poss 9 | nsubj:cop 10 | nsubj:pass 11 | xcomp:ds 12 | -------------------------------------------------------------------------------- /EUD/data/deprel.la: -------------------------------------------------------------------------------- 1 | advmod:emph 2 | aux:pass 3 | csubj:pass 4 | expl:pass 5 | nsubj:pass 6 | obl:agent 7 | flat:name 8 | flat:foreign 9 | acl:relcl 10 | obl:arg 11 | advmod:cc 12 | acl:appos 13 | nmod:appos 14 | advcl:appos 15 | advmod:appos 16 | nmod:advmod 17 | amod:advmod 18 | advcl:arg 19 | -------------------------------------------------------------------------------- /EUD/data/deprel.lt: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | advmod:emph 3 | aux:pass 4 | csubj:pass 5 | det:numgov 6 | flat:foreign 7 | nsubj:pass 8 | nummod:gov 9 | obl:agent 10 | obl:arg 11 | -------------------------------------------------------------------------------- /EUD/data/deprel.lv: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | aux:pass 3 | csubj:pass 4 | flat:foreign 5 | flat:name 6 | nsubj:pass 7 | -------------------------------------------------------------------------------- /EUD/data/deprel.lzh: -------------------------------------------------------------------------------- 1 | compound:redup 2 | csubj:pass 3 | discourse:sp 4 | flat:vv 5 | nsubj:pass 6 | obl:lmod 7 | obl:tmod 8 | -------------------------------------------------------------------------------- /EUD/data/deprel.mdf: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | advcl:tmod 3 | advmod:lmod 4 | advmod:tmod 5 | aux:neg 6 | aux:opt 7 | aux:q 8 | cc:preconj 9 | cop:own 10 | csubj:cop 11 | flat:ellipsis 12 | flat:name 13 | nmod:bahuv 14 | nmod:part 15 | nmod:poss 16 | nsubj:cop 17 | nsubj:pass 18 | obl:agent 19 | obl:lmod 20 | obl:tmod 21 | -------------------------------------------------------------------------------- /EUD/data/deprel.mr: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | aux:pass 3 | compound:lvc 4 | compound:redup 5 | compound:svc 6 | nmod:poss 7 | nsubj:own 8 | nsubj:pass 9 | -------------------------------------------------------------------------------- /EUD/data/deprel.mt: -------------------------------------------------------------------------------- 1 | advmod:neg 2 | aux:neg 3 | aux:part 4 | aux:pass 5 | case:det 6 | cop:expl 7 | flat:name 8 | nmod:poss 9 | nsubj:pass 10 | obl:arg 11 | obl:agent 12 | -------------------------------------------------------------------------------- /EUD/data/deprel.myv: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | advmod:tmod 3 | aux:cnd 4 | aux:imp 5 | aux:neg 6 | aux:opt 7 | aux:q 8 | cc:preconj 9 | compound:coll 10 | compound:redup 11 | compound:svc 12 | csubj:cop 13 | flat:ellipsis 14 | flat:name 15 | nmod:comp 16 | nmod:gobj 17 | nmod:gsubj 18 | nsubj:cop 19 | obl:agent 20 | obl:tmod 21 | xcomp:ds 22 | aux:q 23 | -------------------------------------------------------------------------------- /EUD/data/deprel.nl: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | aux:pass 3 | compound:prt 4 | expl:pv 5 | flat:name 6 | nmod:poss 7 | nsubj:pass 8 | obl:agent 9 | -------------------------------------------------------------------------------- /EUD/data/deprel.no: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | aux:pass 3 | compound:prt 4 | csubj:pass 5 | flat:foreign 6 | flat:name 7 | nsubj:pass 8 | parataxis:deletion 9 | discourse:filler 10 | acl:cleft 11 | -------------------------------------------------------------------------------- /EUD/data/deprel.olo: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | aux:neg 3 | aux:pass 4 | compound:nn 5 | compound:prt 6 | cop:own 7 | csubj:cop 8 | csubj:pass 9 | flat:name 10 | nmod:gsubj 11 | nmod:poss 12 | nsubj:cop 13 | nsubj:pass 14 | xcomp:ds 15 | -------------------------------------------------------------------------------- /EUD/data/deprel.orv: -------------------------------------------------------------------------------- 1 | advmod:emph 2 | expl:pass 3 | obj:dir 4 | aux:pass 5 | nsubj:pass 6 | csubj:pass 7 | obl:agent 8 | flat:name 9 | flat:foreign 10 | nummod:gov 11 | acl:relcl 12 | expl:pv 13 | -------------------------------------------------------------------------------- /EUD/data/deprel.pcm: -------------------------------------------------------------------------------- 1 | acl:cleft 2 | acl:relcl 3 | advcl:periph 4 | advmod:emph 5 | aux:pass 6 | ccomp:cleft 7 | compound:prt 8 | conj:redup 9 | compound:svc 10 | conj:appos 11 | conj:coord 12 | conj:dicto 13 | csubj:quasi 14 | det:predet 15 | nmod:npmod 16 | nmod:poss 17 | nsubj:expl 18 | nsubj:pass 19 | nsubj:quasi 20 | obl:arg 21 | obl:mod 22 | obl:periph 23 | parataxis:conj 24 | parataxis:discourse 25 | parataxis:dislocated 26 | parataxis:obj 27 | parataxis:parenth 28 | -------------------------------------------------------------------------------- /EUD/data/deprel.pl: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | advmod:emph 3 | aux:aglt 4 | aux:mood 5 | aux:pass 6 | cc:preconj 7 | ccomp:obj 8 | cop:locat 9 | csubj:pass 10 | det:numgov 11 | det:nummod 12 | expl:impers 13 | expl:pass 14 | expl:pv 15 | flat:foreign 16 | nmod:poss 17 | nsubj:pass 18 | nummod:gov 19 | obl:agent 20 | obl:arg 21 | xcomp:obj 22 | advcl:relcl 23 | advcl:cmpr 24 | advmod:arg 25 | advmod:neg 26 | amod:flat 27 | aux:clitic 28 | aux:cnd 29 | aux:imp 30 | ccomp:cleft 31 | det:poss 32 | discourse:emo 33 | discourse:intj 34 | nmod:arg 35 | nmod:flat 36 | nmod:pred 37 | nummod:flat 38 | obl:cmpr 39 | obl:orphan 40 | parataxis:insert 41 | parataxis:obj 42 | xcomp:cleft 43 | xcomp:pred 44 | xcomp:subj 45 | -------------------------------------------------------------------------------- /EUD/data/deprel.pt: -------------------------------------------------------------------------------- 1 | acl:inf 2 | acl:part 3 | acl:relcl 4 | appos:parataxis 5 | aux:pass 6 | cc:preconj 7 | ccomp:parataxis 8 | compound:prt 9 | csubj:pass 10 | det:poss 11 | det:predet 12 | expl:pv 13 | flat:foreign 14 | flat:name 15 | nmod:npmod 16 | nmod:tmod 17 | nsubj:pass 18 | obl:agent 19 | obl:tmod 20 | xcomp:adj 21 | -------------------------------------------------------------------------------- /EUD/data/deprel.qhe: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | aux:pass 3 | compound:conjv 4 | compound:redup 5 | csubj:pass 6 | det:predet 7 | flat:name 8 | nmod:poss 9 | nsubj:pass 10 | obl:agent 11 | obl:tmod 12 | -------------------------------------------------------------------------------- /EUD/data/deprel.quz: -------------------------------------------------------------------------------- 1 | flat:foreign 2 | obl:ben 3 | advcl:ss 4 | advcl:ds 5 | obl:src 6 | nmod:loc 7 | obl:caus 8 | -------------------------------------------------------------------------------- /EUD/data/deprel.ro: -------------------------------------------------------------------------------- 1 | advcl:tcl 2 | advcl:tmod 3 | advmod:tmod 4 | aux:pass 5 | cc:preconj 6 | ccomp:pmod 7 | csubj:pass 8 | expl:impers 9 | expl:pass 10 | expl:poss 11 | expl:pv 12 | nmod:agent 13 | nmod:pmod 14 | nmod:tmod 15 | nsubj:pass 16 | -------------------------------------------------------------------------------- /EUD/data/deprel.ru: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | aux:pass 3 | cc:preconj 4 | csubj:pass 5 | flat:foreign 6 | flat:name 7 | nmod:gmod 8 | nmod:poss 9 | nsubj:pass 10 | nummod:gov 11 | nummod:entity 12 | obl:agent 13 | obl:tmod 14 | -------------------------------------------------------------------------------- /EUD/data/deprel.sa: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | advmod:emph 3 | csubj:pass 4 | nmod:poss 5 | nsubj:cop 6 | nsubj:pass 7 | nummod:gov 8 | obl:agent 9 | obl:arg 10 | -------------------------------------------------------------------------------- /EUD/data/deprel.shopen: -------------------------------------------------------------------------------- 1 | acl:datsub 2 | acl:relcl 3 | advmod:emph 4 | aux:pass 5 | cc:preconj 6 | cc:postconj 7 | compound:dir 8 | compound:ext 9 | compound:prt 10 | compound:redup 11 | compound:svc 12 | det:poss 13 | mark:relcl 14 | nmod:poss 15 | nsubj:ben 16 | nsubj:caus 17 | nsubj:cop 18 | nsubj:loc 19 | nsubj:pass 20 | obj:agent 21 | obj:patient 22 | obl:agent 23 | obl:arg 24 | obl:patient 25 | obl:tmod 26 | -------------------------------------------------------------------------------- /EUD/data/deprel.sk: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | advmod:emph 3 | aux:pass 4 | csubj:pass 5 | det:numgov 6 | det:nummod 7 | expl:pass 8 | expl:pv 9 | flat:foreign 10 | nsubj:pass 11 | nummod:gov 12 | obl:agent 13 | obl:arg 14 | -------------------------------------------------------------------------------- /EUD/data/deprel.sl: -------------------------------------------------------------------------------- 1 | cc:preconj 2 | conj:extend 3 | discourse:filler 4 | parataxis:discourse 5 | parataxis:restart 6 | flat:foreign 7 | flat:name 8 | -------------------------------------------------------------------------------- /EUD/data/deprel.sme: -------------------------------------------------------------------------------- 1 | nmod:poss 2 | acl:relcl 3 | aux:neg 4 | cc:preconj 5 | xcomp:obj 6 | compound:nn 7 | xcomp:pred 8 | -------------------------------------------------------------------------------- /EUD/data/deprel.sms: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | advcl:tmod 3 | advmod:tmod 4 | aux:neg 5 | cc:preconj 6 | flat:name 7 | nmod:part 8 | nmod:poss 9 | nsubj:cop 10 | obl:agent 11 | obl:lmod 12 | obl:tmod 13 | xcomp:obj 14 | xcomp:pred 15 | -------------------------------------------------------------------------------- /EUD/data/deprel.sq: -------------------------------------------------------------------------------- 1 | csubj:pass 2 | nmod:poss 3 | det:adj 4 | det:pron 5 | det:noun 6 | acl:relcl 7 | 8 | -------------------------------------------------------------------------------- /EUD/data/deprel.sr: -------------------------------------------------------------------------------- 1 | nummod:gov 2 | det:numgov 3 | 4 | -------------------------------------------------------------------------------- /EUD/data/deprel.sv: -------------------------------------------------------------------------------- 1 | acl:cleft 2 | acl:relcl 3 | aux:pass 4 | compound:prt 5 | csubj:pass 6 | flat:name 7 | nmod:poss 8 | nsubj:pass 9 | obl:agent 10 | -------------------------------------------------------------------------------- /EUD/data/deprel.swl: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | nmod:poss 3 | compound:svc 4 | compound:prt 5 | -------------------------------------------------------------------------------- /EUD/data/deprel.ta: -------------------------------------------------------------------------------- 1 | advmod:emph 2 | compound:prt 3 | nsubj:pass 4 | obl:arg 5 | -------------------------------------------------------------------------------- /EUD/data/deprel.te: -------------------------------------------------------------------------------- 1 | nmod:poss 2 | obl:tmod 3 | compound:svc 4 | compound:lvc 5 | nsubj:nc 6 | acl:relcl 7 | advcl:cond 8 | compound:redup 9 | nmod:tmod 10 | nmod:cmp 11 | obl:cau 12 | -------------------------------------------------------------------------------- /EUD/data/deprel.th: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | cc:preconj 3 | compound:prt 4 | det:predet 5 | nmod:npmod 6 | obl:npmod 7 | nmod:poss 8 | nmod:tmod 9 | obl:tmod 10 | nsubj:pass 11 | aux:pass 12 | csubj:pass 13 | flat:foreign 14 | flat:name 15 | obl:poss 16 | -------------------------------------------------------------------------------- /EUD/data/deprel.tl: -------------------------------------------------------------------------------- 1 | compound:redup 2 | nmod:poss 3 | -------------------------------------------------------------------------------- /EUD/data/deprel.tr: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | advmod:emph 3 | aux:q 4 | cc:preconj 5 | compound:lvc 6 | compound:prt 7 | compound:redup 8 | det:predet 9 | flat:name 10 | nmod:arg 11 | nmod:poss 12 | nmod:tmod 13 | obl:tmod 14 | nsubj:cop 15 | csubj:cop 16 | nmod:part 17 | nmod:comp 18 | obl:agent 19 | -------------------------------------------------------------------------------- /EUD/data/deprel.ud: -------------------------------------------------------------------------------- 1 | #Allows empty 2 | acl 3 | advcl 4 | advmod 5 | amod 6 | appos 7 | aux 8 | case 9 | cc 10 | ccomp 11 | clf 12 | compound 13 | conj 14 | cop 15 | csubj 16 | dep 17 | det 18 | discourse 19 | dislocated 20 | expl 21 | fixed 22 | flat 23 | goeswith 24 | iobj 25 | list 26 | mark 27 | nmod 28 | nsubj 29 | nummod 30 | obj 31 | obl 32 | orphan 33 | parataxis 34 | punct 35 | reparandum 36 | root 37 | vocative 38 | xcomp 39 | -------------------------------------------------------------------------------- /EUD/data/deprel.ug: -------------------------------------------------------------------------------- 1 | advcl:cond 2 | advmod:emph 3 | advmod:tmod 4 | aux:q 5 | compound:lvc 6 | compound:redup 7 | nmod:abl 8 | nmod:cau 9 | nmod:clas 10 | nmod:comp 11 | nmod:ins 12 | nmod:loc 13 | nmod:part 14 | nmod:pass 15 | nmod:poss 16 | nmod:ref 17 | nmod:tmod 18 | nsubj:cop 19 | nsubj:pass 20 | obj:cau 21 | -------------------------------------------------------------------------------- /EUD/data/deprel.uk: -------------------------------------------------------------------------------- 1 | acl:adv 2 | acl:relcl 3 | advcl:sp 4 | advcl:svc 5 | advmod:det 6 | compound:svc 7 | conj:svc 8 | det:numgov 9 | det:nummod 10 | flat:abs 11 | flat:foreign 12 | flat:name 13 | flat:range 14 | flat:repeat 15 | flat:sibl 16 | flat:title 17 | nummod:gov 18 | parataxis:discourse 19 | parataxis:newsent 20 | parataxis:rel 21 | vocative:cl 22 | xcomp:sp 23 | -------------------------------------------------------------------------------- /EUD/data/deprel.ur: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | 3 | -------------------------------------------------------------------------------- /EUD/data/deprel.vi: -------------------------------------------------------------------------------- 1 | aux:pass 2 | 3 | -------------------------------------------------------------------------------- /EUD/data/deprel.wbp: -------------------------------------------------------------------------------- 1 | nmod:poss 2 | obl:tmod 3 | -------------------------------------------------------------------------------- /EUD/data/deprel.wo: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | compound:prt 3 | compound:svc 4 | iobj:appl 5 | nmod:poss 6 | obj:appl 7 | obj:caus 8 | obl:appl -------------------------------------------------------------------------------- /EUD/data/deprel.yo: -------------------------------------------------------------------------------- 1 | compound:prt 2 | compound:svc 3 | -------------------------------------------------------------------------------- /EUD/data/deprel.yue: -------------------------------------------------------------------------------- 1 | advcl:coverb 2 | advmod:df 3 | discourse:sp 4 | case:loc 5 | compound:dir 6 | compound:ext 7 | compound:quant 8 | compound:vo 9 | compound:vv 10 | mark:adv 11 | mark:rel 12 | nsubj:pass 13 | nsubj:periph 14 | obj:periph 15 | obl:agent 16 | obl:patient 17 | obl:tmod 18 | -------------------------------------------------------------------------------- /EUD/data/deprel.zh: -------------------------------------------------------------------------------- 1 | acl:relcl 2 | aux:caus 3 | aux:pass 4 | case:aspect 5 | case:dec 6 | case:pref 7 | case:suff 8 | csubj:pass 9 | flat:foreign 10 | flat:name 11 | mark:advb 12 | mark:comp 13 | mark:prt 14 | mark:relcl 15 | nmod:poss 16 | nmod:tmod 17 | nsubj:pass 18 | advmod:df 19 | case:loc 20 | compound:dir 21 | compound:ext 22 | compound:vo 23 | compound:vv 24 | discourse:sp 25 | dislocated:vo 26 | mark:adv 27 | mark:rel 28 | obl:agent 29 | obl:patient 30 | obl:tmod 31 | discourse:sp 32 | compound:vv 33 | obl:tmod 34 | mark:rel 35 | obj:periph 36 | case:loc 37 | compound:dir 38 | obl:patient 39 | obl:agent 40 | compound:ext 41 | advmod:df 42 | compound:vo 43 | -------------------------------------------------------------------------------- /EUD/data/edeprel.fr: -------------------------------------------------------------------------------- 1 | acl:enh 2 | acl:relclenh 3 | advcl:enh 4 | advmod:enh 5 | amod:enh 6 | appos:enh 7 | case:enh 8 | ccomp:enh 9 | ccomp:xoxcsubj 10 | ccomp:xoxcsubjenh 11 | cop:enh 12 | csubj:enh 13 | iobj:agentxoxnsubj 14 | iobj:enh 15 | nmod:enh 16 | nsubj:enh 17 | nsubj:passxoxobj 18 | nsubj:passxoxobjenh 19 | nsubj:xoxnsubjcaus 20 | nsubj:xoxnsubjcausenh 21 | obj:agentxoxnsubj 22 | obj:enh 23 | obj:xoxnsubj 24 | obj:xoxnsubjenh 25 | obj:xoxnsubjxxx 26 | obl:agentenh 27 | obl:agentxoxnsubj 28 | obl:agentxoxnsubjenh 29 | obl:enh 30 | xcomp:enh 31 | xcomp:xoxcsubj 32 | xcomp:xoxcsubjenh 33 | -------------------------------------------------------------------------------- /EUD/data/edeprel.lv: -------------------------------------------------------------------------------- 1 | acl:acc 2 | acl:dat 3 | acl:gen 4 | acl:loc 5 | acl:nom 6 | advcl:kā 7 | advcl:nekā 8 | nmod:acc 9 | nmod:aiz 10 | nmod:ap 11 | nmod:apakšā 12 | nmod:apkārt 13 | nmod:ar 14 | nmod:bez 15 | nmod:caur 16 | nmod:cauri 17 | nmod:dat 18 | nmod:gar 19 | nmod:gen 20 | nmod:iepretim 21 | nmod:kopš 22 | nmod:kā 23 | nmod:lejpus 24 | nmod:labā 25 | nmod:loc 26 | nmod:līdz 27 | nmod:nekā 28 | nmod:no 29 | nmod:nom 30 | nmod:otrpus 31 | nmod:pa 32 | nmod:par 33 | nmod:pie 34 | nmod:pirms 35 | nmod:pret 36 | nmod:pār 37 | nmod:pāri 38 | nmod:pēc 39 | nmod:starp 40 | nmod:uz 41 | nmod:virs 42 | nmod:viņpus 43 | nmod:voc 44 | nmod:zem 45 | nmod:ārpus 46 | obl:acc 47 | obl:aiz 48 | obl:ap 49 | obl:apkārt 50 | obl:ar 51 | obl:augšpus 52 | obl:bez 53 | obl:blakus 54 | obl:caur 55 | obl:cauri 56 | obl:dat 57 | obl:dēļ 58 | obl:gar 59 | obl:garām 60 | obl:gen 61 | obl:iepretim 62 | obl:iepretī 63 | obl:klāt 64 | obl:kopš 65 | obl:kā 66 | obl:kā_jau 67 | obl:labad 68 | obl:loc 69 | obl:līdz 70 | obl:līdzi 71 | obl:līdztekus 72 | obl:līdzās 73 | obl:nekā 74 | obl:no 75 | obl:nom 76 | obl:otrpus 77 | obl:pa 78 | obl:pakaļ 79 | obl:par 80 | obl:pie 81 | obl:pirms 82 | obl:pret 83 | obl:pretim 84 | obl:pretī 85 | obl:priekšā 86 | obl:pār 87 | obl:pāri 88 | obl:pēc 89 | obl:starp 90 | obl:tuvu 91 | obl:tuvāk 92 | obl:tā_kā 93 | obl:tāpat_kā 94 | obl:uz 95 | obl:virs 96 | obl:virsū 97 | obl:visaplīk 98 | obl:zem 99 | obl:ārpus 100 | -------------------------------------------------------------------------------- /EUD/data/edeprel.ud: -------------------------------------------------------------------------------- 1 | ref 2 | -------------------------------------------------------------------------------- /EUD/data/feat_val.af: -------------------------------------------------------------------------------- 1 | AdjType=Attr 2 | AdjType=Pred 3 | AdpType=Prep 4 | Degree=Dim 5 | PartType=Gen 6 | PartType=Inf 7 | PartType=Neg 8 | Subcat=Intr 9 | Subcat=Prep 10 | Subcat=Tran 11 | VerbType=Aux 12 | VerbType=Cop 13 | VerbType=Mod 14 | VerbType=Pas 15 | -------------------------------------------------------------------------------- /EUD/data/feat_val.akk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/EUD/data/feat_val.akk -------------------------------------------------------------------------------- /EUD/data/feat_val.am: -------------------------------------------------------------------------------- 1 | Case=Mal 2 | Voice=Trans 3 | -------------------------------------------------------------------------------- /EUD/data/feat_val.ar: -------------------------------------------------------------------------------- 1 | AdpType=Prep 2 | ConjType=Oper 3 | NumForm=Digit 4 | NumForm=Word 5 | NumValue=1 6 | NumValue=2 7 | NumValue=3 8 | -------------------------------------------------------------------------------- /EUD/data/feat_val.be: -------------------------------------------------------------------------------- 1 | Variant=Short 2 | -------------------------------------------------------------------------------- /EUD/data/feat_val.bg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/EUD/data/feat_val.bg -------------------------------------------------------------------------------- /EUD/data/feat_val.bho: -------------------------------------------------------------------------------- 1 | AdpType=Post 2 | AdvType=Deg 3 | Echo=Rdp 4 | Gender[psor]=Masc 5 | Gender[psor]=Fem 6 | Mood=Cont 7 | Number[psor]=Sing 8 | Number[psor]=Plur 9 | -------------------------------------------------------------------------------- /EUD/data/feat_val.bm: -------------------------------------------------------------------------------- 1 | Valency=1 2 | Valency=2 3 | AdjType=Attr 4 | -------------------------------------------------------------------------------- /EUD/data/feat_val.br: -------------------------------------------------------------------------------- 1 | Person=Auto 2 | Gender[psor]=Fem 3 | Gender[psor]=Masc 4 | -------------------------------------------------------------------------------- /EUD/data/feat_val.bxr: -------------------------------------------------------------------------------- 1 | Mood=Prsc 2 | Number[psor]=Sing 3 | Person[psor]=3 4 | Tense=Aor 5 | VerbForm=Coverb 6 | -------------------------------------------------------------------------------- /EUD/data/feat_val.ca: -------------------------------------------------------------------------------- 1 | AdpType=Prep 2 | AdpType=Preppron 3 | AdvType=Tim 4 | Number[psor]=Plur 5 | Number[psor]=Sing 6 | NumForm=Digit 7 | PrepCase=Npr 8 | PrepCase=Pre 9 | PunctSide=Fin 10 | PunctSide=Ini 11 | PunctType=Brck 12 | PunctType=Colo 13 | PunctType=Comm 14 | PunctType=Dash 15 | PunctType=Excl 16 | PunctType=Peri 17 | PunctType=Qest 18 | PunctType=Quot 19 | PunctType=Semi 20 | -------------------------------------------------------------------------------- /EUD/data/feat_val.cop: -------------------------------------------------------------------------------- 1 | Gender[psor]=Fem 2 | Gender[psor]=Masc 3 | Number[psor]=Plur 4 | Number[psor]=Sing 5 | -------------------------------------------------------------------------------- /EUD/data/feat_val.cs: -------------------------------------------------------------------------------- 1 | AdpType=Comprep 2 | AdpType=Prep 3 | AdpType=Voc 4 | ConjType=Oper 5 | NumForm=Digit 6 | NumForm=Roman 7 | NumForm=Word 8 | NumValue=1 9 | NumValue=2 10 | NumValue=3 11 | Gender[psor]=Masc 12 | Gender[psor]=Fem 13 | Gender[psor]=Neut 14 | Number[psor]=Sing 15 | Number[psor]=Plur 16 | PrepCase=Npr 17 | PrepCase=Pre 18 | NameType=Giv 19 | NameType=Sur 20 | NameType=Geo 21 | NameType=Com 22 | NameType=Pro 23 | NameType=Nat 24 | NameType=Oth 25 | Hyph=Yes 26 | Style=Arch 27 | Style=Coll 28 | Style=Expr 29 | Style=Rare 30 | Style=Slng 31 | Style=Vrnc 32 | Style=Vulg 33 | Typo=Yes 34 | Variant=Short 35 | -------------------------------------------------------------------------------- /EUD/data/feat_val.cu: -------------------------------------------------------------------------------- 1 | Aspect=Res 2 | Strength=Strong 3 | Strength=Weak 4 | -------------------------------------------------------------------------------- /EUD/data/feat_val.cy: -------------------------------------------------------------------------------- 1 | Mutation=AM 2 | Mutation=NM 3 | Mutation=SM 4 | PronType=Contrastive 5 | Relative=Rel 6 | -------------------------------------------------------------------------------- /EUD/data/feat_val.da: -------------------------------------------------------------------------------- 1 | AdpType=Prep 2 | Variant=Long 3 | PartType=Inf 4 | Number[psor]=Sing 5 | Number[psor]=Plur 6 | Style=Arch 7 | Style=Form 8 | Variant=Short 9 | -------------------------------------------------------------------------------- /EUD/data/feat_val.de: -------------------------------------------------------------------------------- 1 | AdjType=Pdt 2 | AdpType=Circ 3 | AdpType=Post 4 | AdpType=Prep 5 | Aspect=Perf 6 | ConjType=Comp 7 | Foreign=Yes 8 | Hyph=Yes 9 | Gender[psor]=Masc 10 | Gender[psor]=Fem 11 | Gender[psor]=Neut 12 | Mood=Imp 13 | Mood=Ind 14 | Number[psor]=Sing 15 | Number[psor]=Plur 16 | NumType=Card 17 | PartType=Inf 18 | PartType=Res 19 | PartType=Vbp 20 | Person[psor]=1 21 | Person[psor]=2 22 | Person[psor]=3 23 | Polarity=Neg 24 | Poss=Yes 25 | PronType=Art 26 | PronType=Dem 27 | PronType=Ind,Neg,Tot 28 | PronType=Int 29 | PronType=Prs 30 | PronType=Rel 31 | PunctType=Brck 32 | PunctType=Comm 33 | PunctType=Peri 34 | Reflex=Yes 35 | Typo=Yes 36 | Variant=Short 37 | VerbForm=Fin 38 | VerbForm=Inf 39 | VerbForm=Part 40 | VerbType=Mod 41 | -------------------------------------------------------------------------------- /EUD/data/feat_val.el: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/EUD/data/feat_val.el -------------------------------------------------------------------------------- /EUD/data/feat_val.en: -------------------------------------------------------------------------------- 1 | Typo=Yes 2 | -------------------------------------------------------------------------------- /EUD/data/feat_val.es: -------------------------------------------------------------------------------- 1 | AdpType=Prep 2 | AdpType=Preppron 3 | AdvType=Tim 4 | Number[psor]=Plur 5 | Number[psor]=Sing 6 | NumForm=Digit 7 | PrepCase=Npr 8 | PrepCase=Pre 9 | PunctSide=Fin 10 | PunctSide=Ini 11 | PunctType=Brck 12 | PunctType=Colo 13 | PunctType=Comm 14 | PunctType=Dash 15 | PunctType=Excl 16 | PunctType=Peri 17 | PunctType=Qest 18 | PunctType=Quot 19 | PunctType=Semi 20 | -------------------------------------------------------------------------------- /EUD/data/feat_val.et: -------------------------------------------------------------------------------- 1 | AdpType=Post 2 | AdpType=Prep 3 | Connegative=Yes 4 | Hyph=Yes 5 | NumForm=Digit 6 | NumForm=Letter 7 | NumForm=Roman 8 | 9 | -------------------------------------------------------------------------------- /EUD/data/feat_val.eu: -------------------------------------------------------------------------------- 1 | Person[abs]=3 2 | Number[abs]=Sing 3 | Person[erg]=3 4 | Number[erg]=Sing 5 | Number[abs]=Plur 6 | Number[erg]=Plur 7 | Person[dat]=3 8 | Person[erg]=1 9 | Number[dat]=Sing 10 | Person[abs]=1 11 | Number[dat]=Plur 12 | Person[erg]=2 13 | Person[dat]=1 14 | Person[abs]=2 15 | Person[dat]=2 16 | Gender[erg]=Fem 17 | Gender[dat]=Masc 18 | Gender[erg]=Masc 19 | Polite[abs]=Infm 20 | Polite[dat]=Infm 21 | Polite[erg]=Infm 22 | -------------------------------------------------------------------------------- /EUD/data/feat_val.fa: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/EUD/data/feat_val.fa -------------------------------------------------------------------------------- /EUD/data/feat_val.fi: -------------------------------------------------------------------------------- 1 | AdpType=Post 2 | AdpType=Prep 3 | Clitic=Han 4 | Clitic=Ka 5 | Clitic=Kaan 6 | Clitic=Kin 7 | Clitic=Ko 8 | Clitic=Pa 9 | Clitic=S 10 | Connegative=Yes 11 | Derivation=Inen 12 | Derivation=Ja 13 | Derivation=Lainen 14 | Derivation=Llinen 15 | Derivation=Minen 16 | Derivation=Sti 17 | Derivation=Tar 18 | Derivation=Ton 19 | Derivation=Ttaa 20 | Derivation=Ttain 21 | Derivation=U 22 | Derivation=Vs 23 | InfForm=1 24 | InfForm=2 25 | InfForm=3 26 | Number[psor]=Plur 27 | Number[psor]=Sing 28 | PartForm=Agt 29 | PartForm=Neg 30 | PartForm=Past 31 | PartForm=Pres 32 | Person[psor]=1 33 | Person[psor]=2 34 | Person[psor]=3 35 | PunctSide=Ini 36 | PunctSide=Fin 37 | Style=Arch 38 | Style=Coll 39 | Typo=Yes 40 | -------------------------------------------------------------------------------- /EUD/data/feat_val.fo: -------------------------------------------------------------------------------- 1 | NameType=Prs 2 | NameType=Geo 3 | -------------------------------------------------------------------------------- /EUD/data/feat_val.fr: -------------------------------------------------------------------------------- 1 | Number[psor]=Sing 2 | Number[psor]=Plur 3 | Typo=Yes 4 | -------------------------------------------------------------------------------- /EUD/data/feat_val.fro: -------------------------------------------------------------------------------- 1 | Morph=VFin 2 | Morph=VInf 3 | Morph=VPar 4 | Polarity=Int 5 | PronType=Ord 6 | -------------------------------------------------------------------------------- /EUD/data/feat_val.ga: -------------------------------------------------------------------------------- 1 | Case=NomAcc 2 | Dialect=Connaught 3 | Dialect=Munster 4 | Dialect=Ulster 5 | Form=Ecl 6 | Form=Emp 7 | Form=HPref 8 | Form=Len 9 | Form=VF 10 | Mood=Int 11 | NounType=NotSlender 12 | NounType=Slender 13 | NounType=Strong 14 | NounType=Weak 15 | PartType=Ad 16 | PartType=Cmpl 17 | PartType=Comp 18 | PartType=Sup 19 | PartType=Cop 20 | PartType=Deg 21 | PartType=Inf 22 | PartType=Num 23 | PartType=Pat 24 | PartType=Vb 25 | PartType=Voc 26 | PrepForm=Cmpd 27 | VerbForm=Cop 28 | Voice=Auto 29 | -------------------------------------------------------------------------------- /EUD/data/feat_val.gd: -------------------------------------------------------------------------------- 1 | Case=NomAcc 2 | Dialect=Connaught 3 | Dialect=Munster 4 | Dialect=Ulster 5 | Form=Ecl 6 | Form=Emp 7 | Form=HPref 8 | Form=Len 9 | Form=VF 10 | Mood=Int 11 | NounType=NotSlender 12 | NounType=Slender 13 | NounType=Strong 14 | NounType=Weak 15 | NumType=Pers 16 | PartType=Ad 17 | PartType=Cmpl 18 | PartType=Comp 19 | PartType=Cop 20 | PartType=Deg 21 | PartType=Inf 22 | PartType=Num 23 | PartType=Pat 24 | PartType=Vb 25 | PartType=Voc 26 | PrepForm=Cmpd 27 | VerbForm=Cop 28 | Voice=Auto 29 | -------------------------------------------------------------------------------- /EUD/data/feat_val.gl: -------------------------------------------------------------------------------- 1 | AdpType=Prep 2 | Clitic=Yes 3 | Number[psor]=Sing 4 | Number[psor]=Plur 5 | -------------------------------------------------------------------------------- /EUD/data/feat_val.got: -------------------------------------------------------------------------------- 1 | Strength=Strong 2 | Strength=Weak 3 | -------------------------------------------------------------------------------- /EUD/data/feat_val.grc: -------------------------------------------------------------------------------- 1 | AdpType=Prep 2 | Tense=Aor 3 | -------------------------------------------------------------------------------- /EUD/data/feat_val.gun: -------------------------------------------------------------------------------- 1 | Clusivity=In 2 | Clusivity=Ex 3 | Clusivity[obj]=In 4 | Clusivity[obj]=Ex 5 | Clusivity[psor]=In 6 | Clusivity[psor]=Ex 7 | Clusivity[subj]=In 8 | Clusivity[subj]=Ex 9 | Mood=Des 10 | Mood=Ind 11 | Mood=Imp 12 | Number=Sing 13 | Number=Plur 14 | Number[psor]=Sing 15 | Number[psor]=Plur 16 | NumType=Card 17 | Person=1 18 | Person=2 19 | Person=3 20 | Person[subj]=1 21 | Person[subj]=12 22 | Person[subj]=2 23 | Person[subj]=3 24 | Person[obj]=1 25 | Person[obj]=12 26 | Person[obj]=2 27 | Person[obj]=3 28 | Polarity=Neg 29 | PronType=Add 30 | PronType=Dem 31 | PronType=Ind 32 | PronType=Int 33 | PronType=Neg 34 | PronType=Prs 35 | PronType=Tot 36 | Subcat=Int 37 | Subcat=IntInd 38 | Subcat=Tran 39 | Subcat=Ditran 40 | VerbForm=Fin 41 | VerbForm=Inf 42 | VerbForm=Part 43 | VerbForm=Ser 44 | VerbForm=Post 45 | VerbForm=Prov 46 | VerbForm=Vnoun 47 | -------------------------------------------------------------------------------- /EUD/data/feat_val.he: -------------------------------------------------------------------------------- 1 | HebBinyan=HIFIL 2 | HebBinyan=HITPAEL 3 | HebBinyan=HUFAL 4 | HebBinyan=NIFAL 5 | HebBinyan=PAAL 6 | HebBinyan=PIEL 7 | HebBinyan=PUAL 8 | HebExistential=True 9 | HebSource=ConvUncertainHead 10 | HebSource=ConvUncertainLabel 11 | Prefix=Yes 12 | VerbType=Cop 13 | VerbType=Mod 14 | Xtra=Junk 15 | -------------------------------------------------------------------------------- /EUD/data/feat_val.hi: -------------------------------------------------------------------------------- 1 | AdpType=Post 2 | AdvType=Deg 3 | Echo=Rdp 4 | Gender[psor]=Masc 5 | Gender[psor]=Fem 6 | Mood=Cont 7 | Number[psor]=Sing 8 | Number[psor]=Plur 9 | -------------------------------------------------------------------------------- /EUD/data/feat_val.hr: -------------------------------------------------------------------------------- 1 | Gender[psor]=Fem 2 | Gender[psor]=Masc 3 | Gender[psor]=Neut 4 | Number[psor]=Plur 5 | Number[psor]=Sing 6 | 7 | -------------------------------------------------------------------------------- /EUD/data/feat_val.hsb: -------------------------------------------------------------------------------- 1 | AdvType=Mod 2 | Gender[psor]=Masc 3 | Gender[psor]=Fem 4 | Gender[psor]=Neut 5 | Hyph=Yes 6 | Number[psor]=Sing 7 | Number[psor]=Dual 8 | Number[psor]=Plur 9 | PrepCase=Npr 10 | PrepCase=Pre 11 | VerbType=Mod 12 | -------------------------------------------------------------------------------- /EUD/data/feat_val.hu: -------------------------------------------------------------------------------- 1 | Definite=2 2 | Number[psed]=None 3 | Number[psed]=Sing 4 | Number[psor]=None 5 | Number[psor]=Plur 6 | Number[psor]=Sing 7 | Person[psor]=1 8 | Person[psor]=3 9 | Person[psor]=None 10 | VerbForm=PartFut 11 | VerbForm=PartPast 12 | VerbForm=PartPres 13 | -------------------------------------------------------------------------------- /EUD/data/feat_val.hy: -------------------------------------------------------------------------------- 1 | AdpType=Comadp 2 | AdpType=Post 3 | AdpType=Prep 4 | Aspect=Dur 5 | ConjType=Comp 6 | ConjType=Oper 7 | Connegative=Yes 8 | Distance=Prox 9 | Distance=Med 10 | Distance=Dist 11 | Echo=Ech 12 | Hyph=Yes 13 | NameType=Giv 14 | NameType=Sur 15 | NameType=Geo 16 | NameType=Com 17 | NameType=Pro 18 | NameType=Prs 19 | NameType=Oth 20 | Number=Assoc 21 | Number[psor]=Plur 22 | Number[psor]=Sing 23 | NumForm=Armenian 24 | NumForm=Digit 25 | NumForm=Roman 26 | NumForm=Word 27 | Person[psor]=1 28 | Person[psor]=2 29 | Person[psor]=3 30 | Style=Arch 31 | Style=Coll 32 | Style=Expr 33 | Style=Rare 34 | Style=Slng 35 | Style=Vrnc 36 | Style=Vulg 37 | Subcat=Intr 38 | Subcat=Tran 39 | Typo=Yes 40 | -------------------------------------------------------------------------------- /EUD/data/feat_val.id: -------------------------------------------------------------------------------- 1 | Clusivity=In 2 | Clusivity=Ex 3 | Number[psor]=Sing 4 | Number[psor]=Plur 5 | Person[psor]=1 6 | Person[psor]=2 7 | Person[psor]=3 8 | -------------------------------------------------------------------------------- /EUD/data/feat_val.is: -------------------------------------------------------------------------------- 1 | PunctSide=Fin 2 | PunctSide=Ini 3 | -------------------------------------------------------------------------------- /EUD/data/feat_val.it: -------------------------------------------------------------------------------- 1 | Clitic=Yes 2 | Number[psor]=Sing 3 | Number[psor]=Plur 4 | NumType=Range 5 | -------------------------------------------------------------------------------- /EUD/data/feat_val.ja: -------------------------------------------------------------------------------- 1 | Case=Advb 2 | Case=Comp 3 | Form=Adn 4 | Form=Irr 5 | Form=Real 6 | Form=Spcf 7 | -------------------------------------------------------------------------------- /EUD/data/feat_val.kk: -------------------------------------------------------------------------------- 1 | Mood=Vol 2 | Number[psor]=Plur 3 | Number[psor]=Sing 4 | NumType=Coll 5 | Person[psor]=1 6 | Person[psor]=2 7 | Person[psor]=3 8 | PronType=Qnt 9 | PronType=Ref 10 | Tense=Aor 11 | Tense=FutPlan 12 | Voice=Coop 13 | VerbForm=Cov 14 | -------------------------------------------------------------------------------- /EUD/data/feat_val.kmr: -------------------------------------------------------------------------------- 1 | AdpType=Post 2 | AdpType=Prep 3 | Case=Con 4 | Case=Obl 5 | -------------------------------------------------------------------------------- /EUD/data/feat_val.ko: -------------------------------------------------------------------------------- 1 | Case=Advb 2 | Case=Comp 3 | Form=Adn 4 | Form=Aux 5 | Form=Compl 6 | -------------------------------------------------------------------------------- /EUD/data/feat_val.koi: -------------------------------------------------------------------------------- 1 | AdpType=Post 2 | AdvType=Loc 3 | Case=Apr 4 | Case=Car 5 | Case=Comp 6 | Case=Egr 7 | Case=Prl 8 | Case=Tra 9 | Clitic=O 10 | Clitic=So 11 | Connegative=Yes 12 | Derivation=A 13 | Derivation=An 14 | Derivation=Zhyk 15 | Derivation=Ig 16 | Derivation=Igdyrji 17 | Derivation=Igmoz 18 | Derivation=Ik 19 | Derivation=Ja 20 | Derivation=Oma 21 | Derivation=Omon 22 | Derivation=Sa 23 | Derivation=Tog 24 | Derivation=Tom 25 | Number[psor]=Plur 26 | Number[psor]=Sing 27 | NumType=MultDist 28 | NumType=OrdMult 29 | PartForm=Pres 30 | Person[psor]=1 31 | Person[psor]=2 32 | Person[psor]=3 33 | PronType=Inter 34 | PronType=Qnt 35 | Tense=Prt1 36 | Tense=Prt2 37 | VerbType=Aux 38 | -------------------------------------------------------------------------------- /EUD/data/feat_val.kpv: -------------------------------------------------------------------------------- 1 | Case=Apr 2 | Case=Car 3 | Case=Egr 4 | Case=Prl 5 | Case=Tra 6 | Clitic=O 7 | Clitic=So 8 | Connegative=Yes 9 | Derivation=A 10 | Derivation=An 11 | Derivation=Ig 12 | Derivation=Igdyrji 13 | Derivation=Igmoz 14 | Derivation=Ik 15 | Derivation=Ja 16 | Derivation=Oma 17 | Derivation=Omon 18 | Derivation=Sa 19 | Derivation=Tog 20 | Derivation=Tom 21 | Derivation=Ysj 22 | Number[psor]=Plur 23 | Number[psor]=Sing 24 | PartForm=Pres 25 | Person[psor]=1 26 | Person[psor]=2 27 | Person[psor]=3 28 | PronType=Inter 29 | PronType=Qnt 30 | -------------------------------------------------------------------------------- /EUD/data/feat_val.krl: -------------------------------------------------------------------------------- 1 | Clitic=Ki 2 | Clitic=Ko 3 | Connegative=Yes 4 | Number[psor]=Plur 5 | Number[psor]=Sing 6 | PartForm=Past 7 | Person[psor]=1 8 | Person[psor]=2 9 | Person[psor]=3 10 | Typo=Yes 11 | -------------------------------------------------------------------------------- /EUD/data/feat_val.la: -------------------------------------------------------------------------------- 1 | AdpType=Prep 2 | NumForm=Digit 3 | VerbType=Mod 4 | -------------------------------------------------------------------------------- /EUD/data/feat_val.lt: -------------------------------------------------------------------------------- 1 | AdpType=Prep 2 | Hyph=Yes 3 | NameType=Geo 4 | NameType=Giv 5 | NameType=Sur 6 | NumForm=Combi 7 | NumForm=Digit 8 | NumForm=Roman 9 | NumForm=Word 10 | PunctType=Peri 11 | Reflex=Yes 12 | Reflex=No 13 | VerbForm=PartPad 14 | VerbForm=PartPus 15 | Tense=PastSimp 16 | Tense=PastIter 17 | Tense=PresHab 18 | Variant=Short 19 | Variant=Full 20 | Variant=Long 21 | Voice=Necess 22 | -------------------------------------------------------------------------------- /EUD/data/feat_val.lv: -------------------------------------------------------------------------------- 1 | Polarity=Pos 2 | Evident=Fh 3 | Polarity=Neg 4 | Abbr=Yes 5 | Foreign=Yes 6 | Evident=Nfh 7 | -------------------------------------------------------------------------------- /EUD/data/feat_val.lzh: -------------------------------------------------------------------------------- 1 | AdvType=Cau 2 | AdvType=Deg 3 | AdvType=Tim 4 | NameType=Geo 5 | NameType=Giv 6 | NameType=Nat 7 | NameType=Prs 8 | NameType=Sur 9 | NounType=Class 10 | VerbType=Cop 11 | -------------------------------------------------------------------------------- /EUD/data/feat_val.mdf: -------------------------------------------------------------------------------- 1 | AdvType=Deg 2 | AdvType=Loc 3 | AdvType=Man 4 | AdpType=Post 5 | AdvType=Sta 6 | AdvType=Tim 7 | Aspect=Inch 8 | Case=Abe 9 | Case=Abl 10 | Case=Com 11 | Case=Comp 12 | Case=Dat 13 | Case=Ela 14 | Case=Gen 15 | Case=Ill 16 | Case=Ine 17 | Case=Lat 18 | Case=Loc 19 | Case=Nom 20 | Case=Prl 21 | Case=Temp 22 | Case=Tra 23 | Clitic=Add 24 | Clitic=AddGA 25 | Clitic=AddKige 26 | Clitic=AddNgA 27 | Clitic=AddVok 28 | Connegative=Yes 29 | Definite=Def 30 | Definite=Ind 31 | Derivation=Dimin 32 | Derivation=F 33 | Derivation=I 34 | Derivation=NomAg 35 | Derivation=Ord 36 | Derivation=Ozj 37 | Derivation=Poss 38 | Derivation=PrcPrt1 39 | Derivation=Shka 40 | Derivation=VGen 41 | Derivation=Voc 42 | Derivation=VerbYks 43 | Derivation=Vnoun 44 | Derivation=Wife 45 | Derivation=Y 46 | Mood=Cnd 47 | Mood=CndCnj 48 | Mood=Cnj 49 | Mood=Des 50 | Mood=Imp 51 | Mood=Ind 52 | Mood=Opt 53 | Mood=Proh 54 | NameType=Giv 55 | NameType=Sur 56 | NegationType=Contrastive 57 | Number[obj]=Plur 58 | Number[obj]=Sing 59 | Number[psor]=Plur 60 | Number[psor]=Sing 61 | Number[subj]=Plur 62 | Number[subj]=Sing 63 | NumType=Card 64 | NumType=Coll 65 | NumType=Dist 66 | NumType=Mult 67 | NumType=Ord 68 | NumType=OrdinalSets 69 | PartType=Emp 70 | Person[obj]=1 71 | Person[obj]=2 72 | Person[obj]=3 73 | Person[psor]=1 74 | Person[psor]=2 75 | Person[psor]=3 76 | Person[subj]=1 77 | Person[subj]=2 78 | Person[subj]=3 79 | PronType=Dem 80 | PronType=Ind 81 | PronType=Int 82 | PronType=Prs 83 | PronType=Refl 84 | PronType=Rel 85 | Tense=Prt1 86 | Tense=Prt2 87 | Typo=Yes 88 | Valency=1 89 | Valency=2 90 | Variant=Short 91 | Variant=Long 92 | VerbForm=Conv 93 | VerbForm=Inf 94 | VerbForm=Part 95 | VerbForm=Vnoun 96 | VerbType=Aux 97 | VerbType=Cop 98 | -------------------------------------------------------------------------------- /EUD/data/feat_val.mr: -------------------------------------------------------------------------------- 1 | Case=Obl 2 | Clusivity=Incl 3 | Distance=Dist 4 | Distance=Prox 5 | InfForm=Dict 6 | InfForm=Incp 7 | -------------------------------------------------------------------------------- /EUD/data/feat_val.mt: -------------------------------------------------------------------------------- 1 | AdpType=Prep 2 | AdpType=Preppron 3 | AdvType=Ex 4 | NumForm=Digit 5 | NumForm=Roman 6 | NumForm=Word 7 | NumValue=1 8 | -------------------------------------------------------------------------------- /EUD/data/feat_val.nl: -------------------------------------------------------------------------------- 1 | AdpType=Circ 2 | AdpType=Post 3 | AdpType=Prep 4 | AdpType=Preppron 5 | AdvType=Ex 6 | Number[psor]=Plur 7 | Number[psor]=Sing 8 | NumForm=Digit 9 | PartType=Inf 10 | PartType=Vbp 11 | PunctSide=Fin 12 | PunctSide=Ini 13 | PunctType=Brck 14 | PunctType=Colo 15 | PunctType=Comm 16 | PunctType=Excl 17 | PunctType=Peri 18 | PunctType=Qest 19 | PunctType=Quot 20 | PunctType=Semi 21 | Subcat=Intr 22 | Subcat=Tran 23 | Variant=Short 24 | VerbType=Aux 25 | VerbType=Cop 26 | VerbType=Mod 27 | -------------------------------------------------------------------------------- /EUD/data/feat_val.no: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/EUD/data/feat_val.no -------------------------------------------------------------------------------- /EUD/data/feat_val.olo: -------------------------------------------------------------------------------- 1 | AdpType=Post 2 | Clitic=Gi 3 | Clitic=Bo 4 | Clitic=Ko 5 | Clitic=Go 6 | Connegative=Yes 7 | Derivation=Ma 8 | Number[psor]=Plur 9 | Number[psor]=Sing 10 | PartForm=Past 11 | Person[psor]=1 12 | Person[psor]=2 13 | Person[psor]=3 14 | Polarity=Neg 15 | Typo=Yes 16 | Valency=1 17 | Valency=2 18 | -------------------------------------------------------------------------------- /EUD/data/feat_val.orv: -------------------------------------------------------------------------------- 1 | Aspect=Res 2 | Strength=Strong 3 | Strength=Weak 4 | Degree=Cmp2 5 | Variant=Long 6 | Variant=Short 7 | Number=Adnum 8 | NumForm=Digit 9 | Tense=Fut1 10 | VerbForm=PartRes 11 | Analyt=Yes 12 | -------------------------------------------------------------------------------- /EUD/data/feat_val.pl: -------------------------------------------------------------------------------- 1 | Abbr=Yes 2 | AdpType=Post 3 | AdpType=Prep 4 | Agglutination=Agl 5 | Agglutination=Nagl 6 | Clitic=Yes 7 | ConjType=Cmpr 8 | Emphatic=Yes 9 | Hyph=Yes 10 | Number[psor]=Plur 11 | Number[psor]=Sing 12 | NumForm=Digit 13 | NumForm=Roman 14 | NumForm=Word 15 | PartType=Int 16 | Polarity=Neg 17 | Polarity=Pos 18 | Polite=Depr 19 | PrepCase=Npr 20 | PrepCase=Pre 21 | PunctSide=Fin 22 | PunctSide=Ini 23 | PunctType=Brck 24 | PunctType=Comm 25 | PunctType=Dash 26 | PunctType=Excl 27 | PunctType=Peri 28 | PunctType=Qest 29 | PunctType=Quot 30 | PunctType=Semi 31 | SubGender=Masc1 32 | SubGender=Masc2 33 | SubGender=Masc3 34 | Typo=Yes 35 | Variant=Long 36 | Variant=Short 37 | VerbType=Quasi 38 | ConjType=Oper 39 | ConjType=Pred 40 | Foreign=No 41 | NounForm=Depr 42 | PartType=Mod 43 | Pun=No 44 | Pun=Yes 45 | PunctType=Colo 46 | PunctType=Elip 47 | PunctType=Hyph 48 | PunctType=Slsh 49 | VerbType=Mod 50 | -------------------------------------------------------------------------------- /EUD/data/feat_val.pt: -------------------------------------------------------------------------------- 1 | AdpType=Prep 2 | AdpType=Preppron 3 | Gender=Unsp 4 | Number=Unsp 5 | Number[psor]=Sing 6 | Number[psor]=Plur 7 | -------------------------------------------------------------------------------- /EUD/data/feat_val.quz: -------------------------------------------------------------------------------- 1 | Aspect=Inch 2 | Deriv=Rptn 3 | Deriv=Ag 4 | Evident=DirE 5 | Evident=Assumptive 6 | Evident=Fact 7 | Evident=IndE 8 | Evident=Sqa 9 | Mood=Assistive 10 | Number[psor]=Sing 11 | Number[psor]=Plur 12 | Person[psor]=1 13 | Person[psor]=3 14 | PronType=Det 15 | Topic=Yes 16 | -------------------------------------------------------------------------------- /EUD/data/feat_val.ro: -------------------------------------------------------------------------------- 1 | AdpType=Prep 2 | Compound=Yes 3 | Number[psor]=Plur 4 | Number[psor]=Sing 5 | NumForm=Digit 6 | NumForm=Roman 7 | NumForm=Word 8 | PartType=Inf 9 | PartType=Sub 10 | Position=Postnom 11 | Position=Prenom 12 | Strength=Strong 13 | Strength=Weak 14 | Variant=Long 15 | Variant=Short 16 | -------------------------------------------------------------------------------- /EUD/data/feat_val.ru: -------------------------------------------------------------------------------- 1 | Gender[psor]=Masc 2 | Gender[psor]=Fem 3 | Number[psor]=Sing 4 | Number[psor]=Plur 5 | Typo=Yes 6 | Variant=Short 7 | Variant=Long 8 | -------------------------------------------------------------------------------- /EUD/data/feat_val.sa: -------------------------------------------------------------------------------- 1 | Compound=Yes 2 | Hyph=Yes 3 | PartType=Int 4 | Preverb=Yes 5 | -------------------------------------------------------------------------------- /EUD/data/feat_val.sd: -------------------------------------------------------------------------------- 1 | Form=Bound 2 | Form=Simple 3 | -------------------------------------------------------------------------------- /EUD/data/feat_val.sk: -------------------------------------------------------------------------------- 1 | AdpType=Comprep 2 | AdpType=Prep 3 | AdpType=Preppron 4 | AdpType=Voc 5 | ConjType=Oper 6 | NumForm=Digit 7 | NumForm=Roman 8 | NumForm=Word 9 | NumValue=1 10 | NumValue=2 11 | NumValue=3 12 | Gender[psor]=Masc 13 | Gender[psor]=Fem 14 | Gender[psor]=Neut 15 | Number[psor]=Sing 16 | Number[psor]=Plur 17 | PrepCase=Npr 18 | PrepCase=Pre 19 | NameType=Giv 20 | NameType=Sur 21 | NameType=Geo 22 | NameType=Com 23 | NameType=Pro 24 | NameType=Nat 25 | NameType=Oth 26 | Hyph=Yes 27 | Style=Arch 28 | Style=Coll 29 | Typo=Yes 30 | Variant=Short 31 | -------------------------------------------------------------------------------- /EUD/data/feat_val.sl: -------------------------------------------------------------------------------- 1 | Gender[psor]=Fem 2 | Gender[psor]=Masc 3 | Gender[psor]=Neut 4 | NumForm=Digit 5 | NumForm=Roman 6 | NumForm=Word 7 | Number[psor]=Dual 8 | Number[psor]=Plur 9 | Number[psor]=Sing 10 | Variant=Bound 11 | Variant=Short 12 | -------------------------------------------------------------------------------- /EUD/data/feat_val.sme: -------------------------------------------------------------------------------- 1 | Connegative=Yes 2 | Number[psor]=Plur 3 | Number[psor]=Sing 4 | Number[psor]=Dual 5 | Person[psor]=1 6 | Person[psor]=2 7 | Person[psor]=3 8 | PronType=Coll 9 | -------------------------------------------------------------------------------- /EUD/data/feat_val.sms: -------------------------------------------------------------------------------- 1 | AdvType=Tim 2 | Clitic=AddI 3 | Clitic=QstA 4 | Connegative=Yes 5 | Derivation=Vnoun 6 | Mood=Pot 7 | Number[psor]=Plur 8 | Number[psor]=Sing 9 | Number[psor]=Dual 10 | Person[psor]=1 11 | Person[psor]=2 12 | Person[psor]=3 13 | PronType=Refl 14 | PronType=Coll 15 | Tense=Prt 16 | Valency=1 -------------------------------------------------------------------------------- /EUD/data/feat_val.sq: -------------------------------------------------------------------------------- 1 | NounType=Het 2 | -------------------------------------------------------------------------------- /EUD/data/feat_val.sr: -------------------------------------------------------------------------------- 1 | Gender[psor]=Fem 2 | Gender[psor]=Masc 3 | Gender[psor]=Neut 4 | Number[psor]=Plur 5 | Number[psor]=Sing 6 | NumType=Gen 7 | 8 | -------------------------------------------------------------------------------- /EUD/data/feat_val.sv: -------------------------------------------------------------------------------- 1 | VerbForm=Stem 2 | -------------------------------------------------------------------------------- /EUD/data/feat_val.swl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/EUD/data/feat_val.swl -------------------------------------------------------------------------------- /EUD/data/feat_val.ta: -------------------------------------------------------------------------------- 1 | AdpType=Post 2 | NumForm=Digit 3 | PunctType=Comm 4 | PunctType=Peri 5 | -------------------------------------------------------------------------------- /EUD/data/feat_val.th: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/EUD/data/feat_val.th -------------------------------------------------------------------------------- /EUD/data/feat_val.tr: -------------------------------------------------------------------------------- 1 | Aspect=Dur 2 | Aspect=DurPerf 3 | Aspect=DurPerfProg 4 | Aspect=DurProg 5 | Aspect=PerfRapid 6 | Aspect=ProgRapid 7 | Aspect=Rapid 8 | Echo=Rdp 9 | Mood=CndGen 10 | Mood=CndGenPot 11 | Mood=CndPot 12 | Mood=CndPot 13 | Mood=DesPot 14 | Mood=DesPot 15 | Mood=Gen 16 | Mood=GenNec 17 | Mood=GenNecPot 18 | Mood=GenNecPot 19 | Mood=GenPot 20 | Mood=GenPot 21 | Mood=GenPotPot 22 | Mood=ImpPot 23 | Mood=ImpPot 24 | Mood=NecPot 25 | Mood=NecPot 26 | Mood=Pot 27 | Mood=PotPot 28 | Mood=Prs 29 | Mood=PrsPot 30 | Number[psor]=Plur 31 | Number[psor]=Sing 32 | Person[psor]=1 33 | Person[psor]=2 34 | Person[psor]=3 35 | Register=Form 36 | Register=Inf 37 | Tense=Aor 38 | Tense=AorPast 39 | Tense=FutPast 40 | Voice=CauCau 41 | Voice=CauCauPass 42 | Voice=CauPass 43 | Voice=CauPassRcp 44 | Voice=CauRcp 45 | Voice=PassPass 46 | Voice=PassRcp 47 | Voice=PassRfl 48 | Voice=Rfl 49 | -------------------------------------------------------------------------------- /EUD/data/feat_val.ug: -------------------------------------------------------------------------------- 1 | Aspect=Rapid 2 | Aspect=ProgRapid 3 | Aspect=DurPerfProg 4 | Aspect=DurPerf 5 | Aspect=DurProg 6 | Echo=Rdp 7 | Mood=Abil 8 | Mood=Gen 9 | Mood=AbilGenNec 10 | Mood=Prs 11 | Mood=AbilGen 12 | Mood=AbilCnd 13 | Mood=GenNec 14 | Mood=AbilDes 15 | Mood=AbilPrs 16 | Mood=AbilImp 17 | Mood=AbilNec 18 | Mood=Gen-Nec 19 | Number[psor]=Sing 20 | Number[psor]=Plur 21 | Person[psor]=1 22 | Person[psor]=2 23 | Person[psor]=3 24 | Register=Form 25 | Register=Inf 26 | Tense=Aor 27 | Tense=AorPast 28 | Tense=FutPast 29 | VerbForm=Cov 30 | Voice=CauPass 31 | -------------------------------------------------------------------------------- /EUD/data/feat_val.uk: -------------------------------------------------------------------------------- 1 | Animacy[gram]=Anim 2 | Animacy[gram]=Inan 3 | Hyph=Yes 4 | NameType=Giv 5 | NameType=Pat 6 | NameType=Sur 7 | Orth=Alt 8 | PartType=Conseq 9 | PunctType=Bull 10 | PunctType=Dash 11 | PunctType=Hyph 12 | PunctType=Ndash 13 | PunctType=Quot 14 | Uninflect=Yes 15 | Variant=Short 16 | Variant=Uncontr 17 | -------------------------------------------------------------------------------- /EUD/data/feat_val.ur: -------------------------------------------------------------------------------- 1 | AdpType=Post 2 | AdvType=Deg 3 | Echo=Rdp 4 | 5 | -------------------------------------------------------------------------------- /EUD/data/feat_val.vi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/EUD/data/feat_val.vi -------------------------------------------------------------------------------- /EUD/data/feat_val.wbp: -------------------------------------------------------------------------------- 1 | Clitic=Yes 2 | Number[obj]=Sing 3 | Number[obj]=Dual 4 | Number[obj]=Plur 5 | Person[dat]=1 6 | Person[dat]=2 7 | Person[dat]=3 8 | Person[obj]=1 9 | Person[obj]=2 10 | Person[obj]=3 11 | Person[sdat]=1 12 | Person[sdat]=2 13 | Person[sdat]=3 14 | -------------------------------------------------------------------------------- /EUD/data/feat_val.wo: -------------------------------------------------------------------------------- 1 | Deixis=Prox 2 | Deixis=Med 3 | Deixis=Remt 4 | DeixisRef=1 5 | DeixisRef=2 6 | FocusType=Compl 7 | FocusType=Verb 8 | FocusType=Subj 9 | NounClass=Wol1 10 | NounClass=Wol2 11 | NounClass=Wol3 12 | NounClass=Wol4 13 | NounClass=Wol5 14 | NounClass=Wol6 15 | NounClass=Wol7 16 | NounClass=Wol8 17 | NounClass=Wol9 18 | NounClass=Wol10 19 | NounClass=Wol11 20 | NounClass=Wol12 -------------------------------------------------------------------------------- /EUD/data/feat_val.yo: -------------------------------------------------------------------------------- 1 | Typo=Yes 2 | -------------------------------------------------------------------------------- /EUD/data/feat_val.yue: -------------------------------------------------------------------------------- 1 | NounType=Clf 2 | -------------------------------------------------------------------------------- /EUD/data/feat_val.zh: -------------------------------------------------------------------------------- 1 | Case=Rel 2 | Case=Comp 3 | Case=Advb 4 | Mood=Inter 5 | NounType=Clf 6 | -------------------------------------------------------------------------------- /EUD/data/feats.ud: -------------------------------------------------------------------------------- 1 | Animacy 2 | Aspect 3 | Case 4 | Definite 5 | Degree 6 | Gender 7 | Mood 8 | Negative 9 | NumType 10 | Number 11 | Person 12 | Poss 13 | PronType 14 | Reflex 15 | Tense 16 | VerbForm 17 | Voice 18 | -------------------------------------------------------------------------------- /EUD/data/tokens_w_space.am: -------------------------------------------------------------------------------- 1 | [0-9]+ [0-9]+ 2 | [0-9]+ [0-9]+ 3 | [0-9]+ % 4 | [ንበት] [ ንበት] 5 | [ኡኝ] [ ኡኝ] 6 | -------------------------------------------------------------------------------- /EUD/data/tokens_w_space.br: -------------------------------------------------------------------------------- 1 | [0-9]+ [0-9]+ 2 | [0-9]+ [0-9]+ 3 | [0-9]+ % 4 | [Aa] galon 5 | [Aa]m eus 6 | [Aa]r pezh 7 | [Aa]r re all 8 | [Bb]anelloù hent 9 | [Bb]ro [lL]eon 10 | [Dd]a gentañ 11 | [Ee]m boa 12 | [Ee]m eus 13 | [Ee]n deus 14 | [Ee]n doa 15 | [Ee]n o zouez 16 | [Ee]n un 17 | [Ee]n ur 18 | [Ee]r maez 19 | [Aa]r [pP]oulgwenn 20 | [Ee]r [pP]oulgwenn 21 | [Ee]z eus 22 | [Gg]ant ma 23 | [Hh]e deus 24 | [Hh]e devoa 25 | [Hh]e do 26 | [Hh]oc['’]h eus 27 | [Hh]on eus 28 | [Hh]o pet 29 | [Hh]o peus 30 | [Hh]o pez 31 | [Hh]o po 32 | [Hh]o poa 33 | [Kk]entelioù noz 34 | [Oo] deus 35 | [Oo] do 36 | [Oo] doa 37 | [Aa]r pezh a 38 | -------------------------------------------------------------------------------- /EUD/data/tokens_w_space.fi: -------------------------------------------------------------------------------- 1 | # emoticons written with space 2 | : \) 3 | : D 4 | # inflected numbers 5 | [0-9 ]+:([a-z]+)? 6 | -------------------------------------------------------------------------------- /EUD/data/tokens_w_space.fr: -------------------------------------------------------------------------------- 1 | [0-9 ,]+ 2 | 3 | -------------------------------------------------------------------------------- /EUD/data/tokens_w_space.fro: -------------------------------------------------------------------------------- 1 | ambe .ii. 2 | -------------------------------------------------------------------------------- /EUD/data/tokens_w_space.gun: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/EUD/data/tokens_w_space.gun -------------------------------------------------------------------------------- /EUD/data/tokens_w_space.kk: -------------------------------------------------------------------------------- 1 | [АӘБВГҒДЕЁЖЗИІЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЬЫЪЭЮЯаәбвгғдеёжзиійкқлмнңоөпрстуұүфхһцчшщьыъэюя]+[гқкғ][ае]н жоқ 2 | [АӘБВГҒДЕЁЖЗИІЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЬЫЪЭЮЯаәбвгғдеёжзиійкқлмнңоөпрстуұүфхһцчшщьыъэюя]+[гқкғ][ае]н жоқ екен 3 | [АӘБВГҒДЕЁЖЗИІЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЬЫЪЭЮЯаәбвгғдеёжзиійкқлмнңоөпрстуұүфхһцчшщьыъэюя]+[гқкғ][ае]н емес[өәұүіа-яң]* 4 | [АӘБВГҒДЕЁЖЗИІЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЬЫЪЭЮЯаәбвгғдеёжзиійкқлмнңоөпрстуұүфхһцчшщьыъэюя]+ [мпб][еа] 5 | [АӘБВГҒДЕЁЖЗИІЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЬЫЪЭЮЯаәбвгғдеёжзиійкқлмнңоөпрстуұүфхһцчшщьыъэюя]+ [тд][еа] 6 | [АӘБВГҒДЕЁЖЗИІЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЬЫЪЭЮЯаәбвгғдеёжзиійкқлмнңоөпрстуұүфхһцчшщьыъэюя]+ [ғкқг]ана 7 | [АӘБВГҒДЕЁЖЗИІЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЬЫЪЭЮЯаәбвгғдеёжзиійкқлмнңоөпрстуұүфхһцчшщьыъэюя]+ қой 8 | [АӘБВГҒДЕЁЖЗИІЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЬЫЪЭЮЯаәбвгғдеёжзиійкқлмнңоөпрстуұүфхһцчшщьыъэюя]+ шығар 9 | [АӘБВГҒДЕЁЖЗИІЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЬЫЪЭЮЯаәбвгғдеёжзиійкқлмнңоөпрстуұүфхһцчшщьыъэюя]+ екен[өәұүіа-яң]* 10 | не болмаса 11 | [Сс]онымен қатар 12 | еш нәрсені 13 | алып кел 14 | -------------------------------------------------------------------------------- /EUD/data/tokens_w_space.kmr: -------------------------------------------------------------------------------- 1 | [^ ]+ bû[^ ]* 2 | Stok moranê 3 | nas dikim 4 | [^ ]+ kir[^ ]* 5 | xuya dik[^ ]+ 6 | vene gerand 7 | qebûl dik[^ ]+ 8 | ji bo 9 | şîrove bik[^ ]+ 10 | wêran dik[^ ]+ 11 | lê dix[^ ]+ 12 | ceza dik[^ ]+ 13 | dest pê kir 14 | dest pê (bi|di)[^ ]+ 15 | ava (bi|di)[^ ]+ 16 | dûr (bi|di)[^ ]+ 17 | çê (bi|di)b[^ ]+ 18 | em dê 19 | bicih (bi|di)[^ ]+ 20 | -------------------------------------------------------------------------------- /EUD/data/tokens_w_space.lt: -------------------------------------------------------------------------------- 1 | \d+( \d+)+(,\d+)? 2 | [-A-Za-z0-9 ]+[<=>]+[0-9, ]+ 3 | (\d+\s*)?[-\.]\s*\d+ 4 | 1993 / 13 / EEB 5 | DAg = DA \+ ∑ kn DPn \+ ∑ DSn 6 | A Bao A Ku 7 | be kita ko 8 | [Bb]e to 9 | bet (kada|kas|kokio|koks|kuri|kurios|kurią|kuris|ką) 10 | [Čč]ia pat 11 | [Dd]aug (kas|kuo|kur) 12 | [Dd]ėl (ko|to) 13 | [Ii]ki (pat|šiol) 14 | in corpore 15 | [Ii]š (anksto|eilės|esmės|karto|naujo|paskos|pradžių|principo|tiesų|tikrųjų|tolo|viso) 16 | [Kk](ada|aip|as|iek|okia|okiame|okios|okiu|oks|okį|uo|urio|urioj|uris|ą) (nors|tik) 17 | [Kk]ai (kas|kurie|kurios|kuriose|kuris|kuriuos|kuriuose|kurių) 18 | [Kk]as kita 19 | [Kk]ita vertus 20 | [Kk]o (gero|nors) 21 | ([Kk]ol|kur) kas 22 | [Ll]aba diena 23 | ligi (šiol|šiolei) 24 | nedaug kas 25 | nežinia ką 26 | nieko prieš 27 | norom nenorom 28 | [Nn]ė (kiek|vienas|vieno|vienoje) 29 | odd - ball 30 | [Pp]o (to|velnių) 31 | prieš (pat|tai) 32 | [Šš]iek tiek 33 | šis tas 34 | šį tą 35 | [Tt]\. (t|y)\.? 36 | ta (pati|pačia) 37 | tai (ką|yra) 38 | [Tt]aip pat 39 | tam (tikra|tikrais|tikras|tikri|tikro|tikroje|tikros|tikru|tikrus|tikrą|tikrų) 40 | [Tt]as (pats?|pt?as) 41 | tie patys 42 | [Tt]iek (pat|to) 43 | tiems patiems 44 | tiesą sakant 45 | to paties 46 | toje pačioje 47 | [Tt]oki[eų] pat 48 | [Tt]okiu būdu 49 | [Tt]oks pat 50 | tomis pačiomis 51 | tos pačios 52 | [Tt]uo (pačiu|tarpu) 53 | tuoj pat 54 | tuojau pat 55 | tuos pačius 56 | (turėti|turint) omeny 57 | tą pačią 58 | vargais negalais 59 | [Vv]argu ar 60 | [Vv]iena vertus 61 | [Vv]is (dar|dėlto|tiek) 62 | [Vv]isa tai 63 | [Vv]isų pirma 64 | vos tik 65 | -------------------------------------------------------------------------------- /EUD/data/tokens_w_space.lv: -------------------------------------------------------------------------------- 1 | ([+-]\s*)?\d+(\s+\d+)* 2 | u\.\s*t\.\s*jpr\. 3 | u\.\s*c\. 4 | u\.\s*tml\. 5 | v\.\s*tml\. 6 | u\.\s*t\.\s*t\. 7 | N.\s*B. 8 | (P\.\s*)+S\. 9 | -------------------------------------------------------------------------------- /EUD/data/tokens_w_space.myv: -------------------------------------------------------------------------------- 1 | # emoticons written with space 2 | : \) 3 | : D 4 | # inflected numbers 5 | [0-9 ]+-([a-z]+)? 6 | -------------------------------------------------------------------------------- /EUD/data/tokens_w_space.orv: -------------------------------------------------------------------------------- 1 | не быти 2 | -------------------------------------------------------------------------------- /EUD/data/tokens_w_space.pl: -------------------------------------------------------------------------------- 1 | bieżący rok 2 | bieżący miesiąc 3 | do spraw 4 | i tak dalej 5 | i tym podobne 6 | Immunoglobina E 7 | kilometr kwadratowy 8 | między innymi 9 | na przykład 10 | na temat 11 | nad poziomem morza 12 | ograniczona odpowiedzialność 13 | pod nazwą 14 | pod tytułem 15 | post scriptum 16 | pod wezwaniem 17 | przed naszą erą 18 | spółka akcyjna 19 | świętej pamięci 20 | tak zwany 21 | to jest 22 | to znaczy 23 | ubiegły rok 24 | w sprawie 25 | wyżej wymieniony 26 | -------------------------------------------------------------------------------- /EUD/data/tokens_w_space.shopen: -------------------------------------------------------------------------------- 1 | cung van 2 | -------------------------------------------------------------------------------- /EUD/data/tokens_w_space.sms: -------------------------------------------------------------------------------- 1 | # -:- coding= UTF-8 -:- 2 | # emoticons written with space 3 | : \) 4 | : D 5 | # inflected numbers 6 | [0-9 ]+-([a-z]+)? 7 | # fixed expressions 8 | čõõđ ääiʹj 9 | ouddâl ko 10 | -------------------------------------------------------------------------------- /EUD/data/tokens_w_space.sv: -------------------------------------------------------------------------------- 1 | [Bb]l a 2 | [Dd] v s 3 | [Ee] d 4 | [Ff] n 5 | [Ff]r o m 6 | [Mm] fl 7 | [Mm] m 8 | [Oo] s v 9 | [Ss] k 10 | [Tt] ex 11 | [Tt] o m 12 | [Tt] v 13 | [0-9]+ [0-9]+([-–][0-9]+ [0-9]+) 14 | -------------------------------------------------------------------------------- /EUD/data/tokens_w_space.ud: -------------------------------------------------------------------------------- 1 | [0-9 ]+ 2 | [0-9 ]+[,.][0-9]+ 3 | 4 | -------------------------------------------------------------------------------- /EUD/data/tokens_w_space.vi: -------------------------------------------------------------------------------- 1 | .+ 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Flair is licensed under the following MIT License (MIT) Copyright © 2018 Zalando SE, https://tech.zalando.com 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /algorithms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/algorithms/__init__.py -------------------------------------------------------------------------------- /algorithms/dict_merge.py: -------------------------------------------------------------------------------- 1 | # Recursive dictionary merge 2 | # Copyright (C) 2016 Paul Durivage 3 | # 4 | # This program is free software: you can redistribute it and/or modify 5 | # it under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # This program is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with this program. If not, see . 16 | 17 | # Found here https://gist.github.com/angstwad/bf22d1822c38a92ec0a9 18 | # Using jpopelka's modified solution. 19 | 20 | import collections 21 | 22 | def dict_merge(dct, merge_dct): 23 | """ Recursive dict merge. Inspired by :meth:``dict.update()``, instead of 24 | updating only top-level keys, dict_merge recurses down into dicts nested 25 | to an arbitrary depth, updating keys. The ``merge_dct`` is merged into 26 | ``dct``. 27 | 28 | :param dct: dict onto which the merge is executed 29 | :param merge_dct: dct merged into dct 30 | :return: None 31 | """ 32 | for k, v in merge_dct.items(): 33 | if isinstance(dct.get(k), dict) and isinstance(v, collections.Mapping): 34 | dict_merge(dct[k], v) 35 | else: 36 | dct[k] = v 37 | -------------------------------------------------------------------------------- /config/README.md: -------------------------------------------------------------------------------- 1 | # Configureation Files 2 | 3 | --- 4 | 5 | The config files are based on yaml format. 6 | 7 | * `targets`: The target task (for example, ner) 8 | * `ner`: An example for the `targets`. If `targets: ner`, then the code will read the values with the key of `ner`. 9 | * `Corpus`: The training corpora for the model, use `:` to split different corpora. 10 | * `teachers`: The teacher models for training, values are the config files and the values of these config files are the teaching corpora (split by `:`). 11 | * `tag_dictionary`: They tag dictionary for the task, this is important for multilingual knowledge distillation since all teachers and students should share the same tag dictionary. If the path does not exist, the code will generate a tag dictionary automaticly. 12 | * `target_dir`: Save directory. 13 | * `model_name`: The trained models will be save in `$target_dir/$model_name`. 14 | * `model`: The model to train, depending on the task. 15 | * `FastSequenceTagger`: An example of `model`, which is a modified version of SequenceTagger class from flair. The values are the parameters. 16 | * `embeddings`: The embeddings for the model, each key is the class name of the embedding and the values of the key are the parameters. 17 | * `is_teacher_list`: Set to True in default. 18 | * `trainer`: The trainer class. 19 | * `ModelDistiller`: An example of `trainer`, the values are the parameters for the trainer. 20 | * `train`: the parameters for the `train` function in `trainer` (for example, `ModelDistiller.train()`). 21 | * `teacher_annealing`: Anneal the weight of distillation loss in training. 22 | * `anneal_factor`: the anneal rate for the distillation. -------------------------------------------------------------------------------- /config/multi_bert_300epoch_0.5anneal_2000batch_0.1lr_600hidden_multilingual_crf_sentloss_10patience_distill_fast_crfatt_old_relearn_nodev_fast_new_ner0.yaml: -------------------------------------------------------------------------------- 1 | ModelDistiller: 2 | distill_mode: true 3 | optimizer: SGD 4 | train_with_professor: false 5 | anneal_factor: 0.5 6 | embeddings: 7 | BertEmbeddings: 8 | bert_model_or_path: bert-base-multilingual-cased 9 | layers: '-1' 10 | pooling_operation: mean 11 | interpolation: 0.5 12 | is_teacher_list: true 13 | model: 14 | FastSequenceTagger: 15 | crf_attention: true 16 | distill_crf: true 17 | dropout: 0.0 18 | hidden_size: 600 19 | relearn_embeddings: true 20 | sentence_loss: true 21 | use_crf: true 22 | model_name: multi_bert_300epoch_0.5anneal_2000batch_0.1lr_600hidden_multilingual_crf_sentloss_10patience_distill_fast_crfatt_old_relearn_nodev_fast_new_ner0 23 | ner: 24 | Corpus: CONLL_03_DUTCH:CONLL_03_SPANISH:CONLL_03:CONLL_03_GERMAN 25 | tag_dictionary: resources/taggers/ner_tags.pkl 26 | teachers: 27 | ? config/multi_bert_origflair_300epoch_2000batch_1lr_256hidden_de_monolingual_crf_sentloss_10patience_baseline_fast_nodev_ner12.yaml 28 | : CONLL_03_GERMAN 29 | ? config/multi_bert_origflair_300epoch_2000batch_1lr_256hidden_en_monolingual_crf_sentloss_10patience_baseline_fast_nodev_ner11.yaml 30 | : CONLL_03 31 | ? config/multi_bert_origflair_300epoch_2000batch_1lr_256hidden_es_monolingual_crf_sentloss_10patience_baseline_fast_nodev_ner12.yaml 32 | : CONLL_03_SPANISH 33 | ? config/multi_bert_origflair_300epoch_2000batch_1lr_256hidden_nl_monolingual_crf_sentloss_10patience_baseline_fast_nodev_ner11.yaml 34 | : CONLL_03_DUTCH 35 | target_dir: resources/taggers/ 36 | targets: ner 37 | teacher_annealing: true 38 | train: 39 | learning_rate: 0.1 40 | max_epochs: 300 41 | mini_batch_size: 2000 42 | monitor_test: false 43 | patience: 10 44 | professor_interpolation: 0.5 45 | save_final_model: false 46 | train_with_dev: false 47 | true_reshuffle: false 48 | trainer: ModelDistiller 49 | -------------------------------------------------------------------------------- /config/multi_bert_origflair_300epoch_2000batch_0.1lr_256hidden_de_monolingual_crf_sentloss_10patience_baseline_nodev_ner0.yaml: -------------------------------------------------------------------------------- 1 | ModelDistiller: 2 | distill_mode: false 3 | train_with_professor: false 4 | anneal_factor: 2 5 | embeddings: 6 | BertEmbeddings: 7 | bert_model_or_path: bert-base-multilingual-cased 8 | layers: '-1' 9 | pooling_operation: mean 10 | FlairEmbeddings-1: 11 | model: de-forward 12 | FlairEmbeddings-2: 13 | model: de-backward 14 | WordEmbeddings: 15 | embeddings: de 16 | interpolation: 0.5 17 | is_teacher_list: true 18 | model: 19 | SequenceTagger: 20 | hidden_size: 256 21 | sentence_loss: true 22 | use_crf: true 23 | model_name: multi_bert_origflair_300epoch_2000batch_1lr_256hidden_de_monolingual_crf_sentloss_10patience_baseline_nodev_ner0 24 | ner: 25 | Corpus: CONLL_03_GERMAN 26 | tag_dictionary: resources/taggers/ner_tags.pkl 27 | teachers: 28 | config/multi_bert_flair_2000batch_1lr_de_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner1.yaml: CONLL_03_GERMAN 29 | config/multi_bert_flair_2000batch_1lr_en_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner1.yaml: CONLL_03 30 | config/multi_bert_flair_2000batch_1lr_es_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner0.yaml: CONLL_03_SPANISH 31 | config/multi_bert_flair_2000batch_1lr_nl_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner1.yaml: CONLL_03_DUTCH 32 | target_dir: resources/taggers/ 33 | targets: ner 34 | teacher_annealing: false 35 | train: 36 | learning_rate: 0.1 37 | max_epochs: 300 38 | mini_batch_size: 2000 39 | monitor_test: false 40 | patience: 10 41 | professor_interpolation: 0.5 42 | save_final_model: false 43 | train_with_dev: false 44 | upos: 45 | Corpus: UD_GERMAN:UD_ENGLISH:UD_FRENCH:UD_ITALIAN:UD_DUTCH:UD_SPANISH:UD_PORTUGUESE:UD_CHINESE 46 | UD_GERMAN: 47 | train_config: config/ 48 | tag_dictionary: resources/taggers/pos_tags.pkl 49 | -------------------------------------------------------------------------------- /config/multi_bert_origflair_300epoch_2000batch_0.1lr_256hidden_en_monolingual_crf_sentloss_10patience_baseline_nodev_ner0.yaml: -------------------------------------------------------------------------------- 1 | ModelDistiller: 2 | distill_mode: false 3 | train_with_professor: false 4 | anneal_factor: 2 5 | embeddings: 6 | BertEmbeddings: 7 | bert_model_or_path: bert-base-multilingual-cased 8 | layers: '-1' 9 | pooling_operation: mean 10 | FlairEmbeddings-1: 11 | model: en-forward 12 | FlairEmbeddings-2: 13 | model: en-backward 14 | WordEmbeddings: 15 | embeddings: en 16 | interpolation: 0.5 17 | is_teacher_list: true 18 | model: 19 | SequenceTagger: 20 | hidden_size: 256 21 | sentence_loss: true 22 | use_crf: true 23 | model_name: multi_bert_origflair_300epoch_2000batch_1lr_256hidden_en_monolingual_crf_sentloss_10patience_baseline_nodev_ner0 24 | ner: 25 | Corpus: CONLL_03 26 | tag_dictionary: resources/taggers/ner_tags.pkl 27 | teachers: 28 | config/multi_bert_flair_2000batch_1lr_de_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner1.yaml: CONLL_03_GERMAN 29 | config/multi_bert_flair_2000batch_1lr_en_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner1.yaml: CONLL_03 30 | config/multi_bert_flair_2000batch_1lr_es_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner0.yaml: CONLL_03_SPANISH 31 | config/multi_bert_flair_2000batch_1lr_nl_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner1.yaml: CONLL_03_DUTCH 32 | target_dir: resources/taggers/ 33 | targets: ner 34 | teacher_annealing: false 35 | train: 36 | learning_rate: 0.1 37 | max_epochs: 300 38 | mini_batch_size: 2000 39 | monitor_test: false 40 | patience: 10 41 | professor_interpolation: 0.5 42 | save_final_model: false 43 | train_with_dev: false 44 | upos: 45 | Corpus: UD_GERMAN:UD_ENGLISH:UD_FRENCH:UD_ITALIAN:UD_DUTCH:UD_SPANISH:UD_PORTUGUESE:UD_CHINESE 46 | UD_GERMAN: 47 | train_config: config/ 48 | tag_dictionary: resources/taggers/pos_tags.pkl 49 | -------------------------------------------------------------------------------- /config/multi_bert_origflair_300epoch_2000batch_0.1lr_256hidden_es_monolingual_crf_sentloss_10patience_baseline_nodev_ner1.yaml: -------------------------------------------------------------------------------- 1 | ModelDistiller: 2 | distill_mode: false 3 | train_with_professor: false 4 | anneal_factor: 2 5 | embeddings: 6 | BertEmbeddings: 7 | bert_model_or_path: bert-base-multilingual-cased 8 | layers: '-1' 9 | pooling_operation: mean 10 | FlairEmbeddings-1: 11 | model: es-forward 12 | FlairEmbeddings-2: 13 | model: es-backward 14 | WordEmbeddings: 15 | embeddings: es 16 | interpolation: 0.5 17 | is_teacher_list: true 18 | model: 19 | SequenceTagger: 20 | hidden_size: 256 21 | sentence_loss: true 22 | use_crf: true 23 | model_name: multi_bert_origflair_300epoch_2000batch_1lr_256hidden_es_monolingual_crf_sentloss_10patience_baseline_nodev_ner1 24 | ner: 25 | Corpus: CONLL_03_SPANISH 26 | tag_dictionary: resources/taggers/ner_tags.pkl 27 | teachers: 28 | config/multi_bert_flair_2000batch_1lr_de_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner1.yaml: CONLL_03_GERMAN 29 | config/multi_bert_flair_2000batch_1lr_en_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner1.yaml: CONLL_03 30 | config/multi_bert_flair_2000batch_1lr_es_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner0.yaml: CONLL_03_SPANISH 31 | config/multi_bert_flair_2000batch_1lr_nl_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner1.yaml: CONLL_03_DUTCH 32 | target_dir: resources/taggers/ 33 | targets: ner 34 | teacher_annealing: false 35 | train: 36 | learning_rate: 0.1 37 | max_epochs: 300 38 | mini_batch_size: 2000 39 | monitor_test: false 40 | patience: 10 41 | professor_interpolation: 0.5 42 | save_final_model: false 43 | train_with_dev: false 44 | upos: 45 | Corpus: UD_GERMAN:UD_ENGLISH:UD_FRENCH:UD_ITALIAN:UD_DUTCH:UD_SPANISH:UD_PORTUGUESE:UD_CHINESE 46 | UD_GERMAN: 47 | train_config: config/ 48 | tag_dictionary: resources/taggers/pos_tags.pkl 49 | -------------------------------------------------------------------------------- /config/multi_bert_origflair_300epoch_2000batch_0.1lr_256hidden_nl_monolingual_crf_sentloss_10patience_baseline_nodev_ner1.yaml: -------------------------------------------------------------------------------- 1 | ModelDistiller: 2 | distill_mode: false 3 | train_with_professor: false 4 | anneal_factor: 2 5 | embeddings: 6 | BertEmbeddings: 7 | bert_model_or_path: bert-base-multilingual-cased 8 | layers: '-1' 9 | pooling_operation: mean 10 | FlairEmbeddings-1: 11 | model: nl-forward 12 | FlairEmbeddings-2: 13 | model: nl-backward 14 | WordEmbeddings: 15 | embeddings: nl 16 | interpolation: 0.5 17 | is_teacher_list: true 18 | model: 19 | SequenceTagger: 20 | hidden_size: 256 21 | sentence_loss: true 22 | use_crf: true 23 | model_name: multi_bert_origflair_300epoch_2000batch_1lr_256hidden_nl_monolingual_crf_sentloss_10patience_baseline_nodev_ner1 24 | ner: 25 | Corpus: CONLL_03_DUTCH 26 | tag_dictionary: resources/taggers/ner_tags.pkl 27 | teachers: 28 | config/multi_bert_flair_2000batch_1lr_de_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner1.yaml: CONLL_03_GERMAN 29 | config/multi_bert_flair_2000batch_1lr_en_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner1.yaml: CONLL_03 30 | config/multi_bert_flair_2000batch_1lr_es_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner0.yaml: CONLL_03_SPANISH 31 | config/multi_bert_flair_2000batch_1lr_nl_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner1.yaml: CONLL_03_DUTCH 32 | target_dir: resources/taggers/ 33 | targets: ner 34 | teacher_annealing: false 35 | train: 36 | learning_rate: 0.1 37 | max_epochs: 300 38 | mini_batch_size: 2000 39 | monitor_test: false 40 | patience: 10 41 | professor_interpolation: 0.5 42 | save_final_model: false 43 | train_with_dev: false 44 | upos: 45 | Corpus: UD_GERMAN:UD_ENGLISH:UD_FRENCH:UD_ITALIAN:UD_DUTCH:UD_SPANISH:UD_PORTUGUESE:UD_CHINESE 46 | UD_GERMAN: 47 | train_config: config/ 48 | tag_dictionary: resources/taggers/pos_tags.pkl 49 | -------------------------------------------------------------------------------- /config/test_de.yaml: -------------------------------------------------------------------------------- 1 | ModelDistiller: 2 | distill_mode: false 3 | train_with_professor: false 4 | anneal_factor: 2 5 | embeddings: 6 | TransformerWordEmbeddings: 7 | model: bert-base-multilingual-cased 8 | layers: '-1' 9 | FlairEmbeddings-1: 10 | model: de-forward 11 | FlairEmbeddings-2: 12 | model: de-backward 13 | WordEmbeddings: 14 | embeddings: de 15 | interpolation: 0.5 16 | is_teacher_list: true 17 | model: 18 | SequenceTagger: 19 | hidden_size: 256 20 | sentence_loss: true 21 | use_crf: true 22 | model_name: multi_bert_origflair_300epoch_2000batch_1lr_256hidden_de_monolingual_crf_sentloss_10patience_baseline_nodev_ner0 23 | ner: 24 | Corpus: CONLL_03_GERMAN 25 | professors: 26 | config/single-de-ner.yaml: CONLL_03_GERMAN 27 | config/single-en-ner.yaml: CONLL_03 28 | config/single-es-ner.yaml: CONLL_03_SPANISH 29 | config/single-nl-ner.yaml: CONLL_03_DUTCH 30 | tag_dictionary: resources/taggers/ner_tags.pkl 31 | teachers: 32 | config/multi_bert_flair_2000batch_1lr_de_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner1.yaml: CONLL_03_GERMAN 33 | config/multi_bert_flair_2000batch_1lr_en_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner1.yaml: CONLL_03 34 | config/multi_bert_flair_2000batch_1lr_es_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner0.yaml: CONLL_03_SPANISH 35 | config/multi_bert_flair_2000batch_1lr_nl_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner1.yaml: CONLL_03_DUTCH 36 | target_dir: resources/taggers/ 37 | targets: ner 38 | teacher_annealing: false 39 | train: 40 | learning_rate: 0.1 41 | max_epochs: 300 42 | mini_batch_size: 2000 43 | monitor_test: false 44 | patience: 10 45 | professor_interpolation: 0.5 46 | save_final_model: false 47 | train_with_dev: false 48 | upos: 49 | Corpus: UD_GERMAN:UD_ENGLISH:UD_FRENCH:UD_ITALIAN:UD_DUTCH:UD_SPANISH:UD_PORTUGUESE:UD_CHINESE 50 | UD_GERMAN: 51 | train_config: config/ 52 | tag_dictionary: resources/taggers/pos_tags.pkl 53 | -------------------------------------------------------------------------------- /config/xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_ar_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml: -------------------------------------------------------------------------------- 1 | ModelFinetuner: 2 | direct_upsample_rate: -1 3 | distill_mode: false 4 | down_sample_amount: -1 5 | ensemble_distill_mode: false 6 | language_resample: false 7 | optimizer: Adam 8 | train_with_professor: false 9 | dependency: 10 | Corpus: CTB 11 | embeddings: 12 | XLMRoBERTaEmbeddings: 13 | layers: '-1' 14 | pooling_operation: mean 15 | enhancedud: 16 | Corpus: UD_Arabic 17 | is_teacher_list: true 18 | is_toy: false 19 | model: 20 | SemanticDependencyParser: 21 | binary: true 22 | factorize: true 23 | hidden_size: 400 24 | init_std: 0.25 25 | interpolation: 0.1 26 | iterations: 3 27 | lstm_dropout: 0.33 28 | mlp_dropout: 0.33 29 | n_mlp_arc: 500 30 | n_mlp_rel: 100 31 | n_mlp_sec: 150 32 | rnn_layers: 3 33 | tree: true 34 | use_cop: true 35 | use_crf: false 36 | use_gp: true 37 | use_rnn: true 38 | use_second_order: true 39 | use_sib: true 40 | word_dropout: 0.33 41 | model_name: xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_ar_monolingual_nocrf_fast_2nd_nodev_enhancedud15 42 | target_dir: resources/taggers/ 43 | targets: enhancedud 44 | teacher_annealing: false 45 | train: 46 | best_k: 5 47 | betas: 48 | - 0.9 49 | - 0.9 50 | calc_teachers_target_loss: false 51 | entropy_loss_rate: 0.001 52 | fine_tune_mode: false 53 | freezing: false 54 | language_attention_entropy: false 55 | language_attention_warmup: false 56 | language_attention_warmup_and_fix: false 57 | learning_rate: 0.002 58 | lr_rate: 1 59 | max_epochs: 1000 60 | min_freq: 2 61 | mini_batch_size: 2000 62 | monitor_test: false 63 | rootschedule: false 64 | save_final_model: false 65 | sort_data: true 66 | train_language_attention_by_dev: false 67 | train_with_dev: false 68 | true_reshuffle: false 69 | use_unlabeled_data: false 70 | use_warmup: false 71 | trainer: ModelFinetuner 72 | -------------------------------------------------------------------------------- /config/xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_bg_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml: -------------------------------------------------------------------------------- 1 | ModelFinetuner: 2 | direct_upsample_rate: -1 3 | distill_mode: false 4 | down_sample_amount: -1 5 | ensemble_distill_mode: false 6 | language_resample: false 7 | optimizer: Adam 8 | train_with_professor: false 9 | dependency: 10 | Corpus: CTB 11 | embeddings: 12 | XLMRoBERTaEmbeddings: 13 | layers: '-1' 14 | pooling_operation: mean 15 | enhancedud: 16 | Corpus: UD_Bulgarian 17 | is_teacher_list: true 18 | is_toy: false 19 | model: 20 | SemanticDependencyParser: 21 | binary: true 22 | factorize: true 23 | hidden_size: 400 24 | init_std: 0.25 25 | interpolation: 0.1 26 | iterations: 3 27 | lstm_dropout: 0.33 28 | mlp_dropout: 0.33 29 | n_mlp_arc: 500 30 | n_mlp_rel: 100 31 | n_mlp_sec: 150 32 | rnn_layers: 3 33 | tree: true 34 | use_cop: true 35 | use_crf: false 36 | use_gp: true 37 | use_rnn: true 38 | use_second_order: true 39 | use_sib: true 40 | word_dropout: 0.33 41 | model_name: xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_bg_monolingual_nocrf_fast_2nd_nodev_enhancedud15 42 | target_dir: resources/taggers/ 43 | targets: enhancedud 44 | teacher_annealing: false 45 | train: 46 | best_k: 5 47 | betas: 48 | - 0.9 49 | - 0.9 50 | calc_teachers_target_loss: false 51 | entropy_loss_rate: 0.001 52 | fine_tune_mode: false 53 | freezing: false 54 | language_attention_entropy: false 55 | language_attention_warmup: false 56 | language_attention_warmup_and_fix: false 57 | learning_rate: 0.002 58 | lr_rate: 1 59 | max_epochs: 1000 60 | min_freq: 2 61 | mini_batch_size: 2000 62 | monitor_test: false 63 | rootschedule: false 64 | save_final_model: false 65 | sort_data: true 66 | train_language_attention_by_dev: false 67 | train_with_dev: false 68 | true_reshuffle: false 69 | use_unlabeled_data: false 70 | use_warmup: false 71 | trainer: ModelFinetuner 72 | -------------------------------------------------------------------------------- /config/xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_cs_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml: -------------------------------------------------------------------------------- 1 | ModelFinetuner: 2 | direct_upsample_rate: -1 3 | distill_mode: false 4 | down_sample_amount: -1 5 | ensemble_distill_mode: false 6 | language_resample: false 7 | optimizer: Adam 8 | train_with_professor: false 9 | dependency: 10 | Corpus: CTB 11 | embeddings: 12 | XLMRoBERTaEmbeddings: 13 | layers: '-1' 14 | pooling_operation: mean 15 | enhancedud: 16 | Corpus: UD_Czech 17 | is_teacher_list: true 18 | is_toy: false 19 | model: 20 | SemanticDependencyParser: 21 | binary: true 22 | factorize: true 23 | hidden_size: 400 24 | init_std: 0.25 25 | interpolation: 0.1 26 | iterations: 3 27 | lstm_dropout: 0.33 28 | mlp_dropout: 0.33 29 | n_mlp_arc: 500 30 | n_mlp_rel: 100 31 | n_mlp_sec: 150 32 | rnn_layers: 3 33 | tree: true 34 | use_cop: true 35 | use_crf: false 36 | use_gp: true 37 | use_rnn: true 38 | use_second_order: true 39 | use_sib: true 40 | word_dropout: 0.33 41 | model_name: xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_cs_monolingual_nocrf_fast_2nd_nodev_enhancedud15 42 | target_dir: resources/taggers/ 43 | targets: enhancedud 44 | teacher_annealing: false 45 | train: 46 | best_k: 5 47 | betas: 48 | - 0.9 49 | - 0.9 50 | calc_teachers_target_loss: false 51 | entropy_loss_rate: 0.001 52 | fine_tune_mode: false 53 | freezing: false 54 | language_attention_entropy: false 55 | language_attention_warmup: false 56 | language_attention_warmup_and_fix: false 57 | learning_rate: 0.002 58 | lr_rate: 1 59 | max_epochs: 1000 60 | min_freq: 2 61 | mini_batch_size: 2000 62 | monitor_test: false 63 | rootschedule: false 64 | save_final_model: false 65 | sort_data: true 66 | train_language_attention_by_dev: false 67 | train_with_dev: false 68 | true_reshuffle: false 69 | use_unlabeled_data: false 70 | use_warmup: false 71 | trainer: ModelFinetuner 72 | -------------------------------------------------------------------------------- /config/xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_en_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml: -------------------------------------------------------------------------------- 1 | ModelFinetuner: 2 | direct_upsample_rate: -1 3 | distill_mode: false 4 | down_sample_amount: -1 5 | ensemble_distill_mode: false 6 | language_resample: false 7 | optimizer: Adam 8 | train_with_professor: false 9 | dependency: 10 | Corpus: CTB 11 | embeddings: 12 | XLMRoBERTaEmbeddings: 13 | layers: '-1' 14 | pooling_operation: mean 15 | enhancedud: 16 | Corpus: UD_English 17 | is_teacher_list: true 18 | is_toy: false 19 | model: 20 | SemanticDependencyParser: 21 | binary: true 22 | factorize: true 23 | hidden_size: 400 24 | init_std: 0.25 25 | interpolation: 0.1 26 | iterations: 3 27 | lstm_dropout: 0.33 28 | mlp_dropout: 0.33 29 | n_mlp_arc: 500 30 | n_mlp_rel: 100 31 | n_mlp_sec: 150 32 | rnn_layers: 3 33 | tree: true 34 | use_cop: true 35 | use_crf: false 36 | use_gp: true 37 | use_rnn: true 38 | use_second_order: true 39 | use_sib: true 40 | word_dropout: 0.33 41 | model_name: xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_en_monolingual_nocrf_fast_2nd_nodev_enhancedud15 42 | target_dir: resources/taggers/ 43 | targets: enhancedud 44 | teacher_annealing: false 45 | train: 46 | best_k: 5 47 | betas: 48 | - 0.9 49 | - 0.9 50 | calc_teachers_target_loss: false 51 | entropy_loss_rate: 0.001 52 | fine_tune_mode: false 53 | freezing: false 54 | language_attention_entropy: false 55 | language_attention_warmup: false 56 | language_attention_warmup_and_fix: false 57 | learning_rate: 0.002 58 | lr_rate: 1 59 | max_epochs: 1000 60 | min_freq: 2 61 | mini_batch_size: 2000 62 | monitor_test: false 63 | rootschedule: false 64 | save_final_model: false 65 | sort_data: true 66 | train_language_attention_by_dev: false 67 | train_with_dev: false 68 | true_reshuffle: false 69 | use_unlabeled_data: false 70 | use_warmup: false 71 | trainer: ModelFinetuner 72 | -------------------------------------------------------------------------------- /config/xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_et_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml: -------------------------------------------------------------------------------- 1 | ModelFinetuner: 2 | direct_upsample_rate: -1 3 | distill_mode: false 4 | down_sample_amount: -1 5 | ensemble_distill_mode: false 6 | language_resample: false 7 | optimizer: Adam 8 | train_with_professor: false 9 | dependency: 10 | Corpus: CTB 11 | embeddings: 12 | XLMRoBERTaEmbeddings: 13 | layers: '-1' 14 | pooling_operation: mean 15 | enhancedud: 16 | Corpus: UD_Estonian 17 | is_teacher_list: true 18 | is_toy: false 19 | model: 20 | SemanticDependencyParser: 21 | binary: true 22 | factorize: true 23 | hidden_size: 400 24 | init_std: 0.25 25 | interpolation: 0.1 26 | iterations: 3 27 | lstm_dropout: 0.33 28 | mlp_dropout: 0.33 29 | n_mlp_arc: 500 30 | n_mlp_rel: 100 31 | n_mlp_sec: 150 32 | rnn_layers: 3 33 | tree: true 34 | use_cop: true 35 | use_crf: false 36 | use_gp: true 37 | use_rnn: true 38 | use_second_order: true 39 | use_sib: true 40 | word_dropout: 0.33 41 | model_name: xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_et_monolingual_nocrf_fast_2nd_nodev_enhancedud15 42 | target_dir: resources/taggers/ 43 | targets: enhancedud 44 | teacher_annealing: false 45 | train: 46 | best_k: 5 47 | betas: 48 | - 0.9 49 | - 0.9 50 | calc_teachers_target_loss: false 51 | entropy_loss_rate: 0.001 52 | fine_tune_mode: false 53 | freezing: false 54 | language_attention_entropy: false 55 | language_attention_warmup: false 56 | language_attention_warmup_and_fix: false 57 | learning_rate: 0.002 58 | lr_rate: 1 59 | max_epochs: 1000 60 | min_freq: 2 61 | mini_batch_size: 2000 62 | monitor_test: false 63 | rootschedule: false 64 | save_final_model: false 65 | sort_data: true 66 | train_language_attention_by_dev: false 67 | train_with_dev: false 68 | true_reshuffle: false 69 | use_unlabeled_data: false 70 | use_warmup: false 71 | trainer: ModelFinetuner 72 | -------------------------------------------------------------------------------- /config/xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_fi_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml: -------------------------------------------------------------------------------- 1 | ModelFinetuner: 2 | direct_upsample_rate: -1 3 | distill_mode: false 4 | down_sample_amount: -1 5 | ensemble_distill_mode: false 6 | language_resample: false 7 | optimizer: Adam 8 | train_with_professor: false 9 | dependency: 10 | Corpus: CTB 11 | embeddings: 12 | XLMRoBERTaEmbeddings: 13 | layers: '-1' 14 | pooling_operation: mean 15 | enhancedud: 16 | Corpus: UD_Finnish 17 | is_teacher_list: true 18 | is_toy: false 19 | model: 20 | SemanticDependencyParser: 21 | binary: true 22 | factorize: true 23 | hidden_size: 400 24 | init_std: 0.25 25 | interpolation: 0.1 26 | iterations: 3 27 | lstm_dropout: 0.33 28 | mlp_dropout: 0.33 29 | n_mlp_arc: 500 30 | n_mlp_rel: 100 31 | n_mlp_sec: 150 32 | rnn_layers: 3 33 | tree: true 34 | use_cop: true 35 | use_crf: false 36 | use_gp: true 37 | use_rnn: true 38 | use_second_order: true 39 | use_sib: true 40 | word_dropout: 0.33 41 | model_name: xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_fi_monolingual_nocrf_fast_2nd_nodev_enhancedud15 42 | target_dir: resources/taggers/ 43 | targets: enhancedud 44 | teacher_annealing: false 45 | train: 46 | best_k: 5 47 | betas: 48 | - 0.9 49 | - 0.9 50 | calc_teachers_target_loss: false 51 | entropy_loss_rate: 0.001 52 | fine_tune_mode: false 53 | freezing: false 54 | language_attention_entropy: false 55 | language_attention_warmup: false 56 | language_attention_warmup_and_fix: false 57 | learning_rate: 0.002 58 | lr_rate: 1 59 | max_epochs: 1000 60 | min_freq: 2 61 | mini_batch_size: 2000 62 | monitor_test: false 63 | rootschedule: false 64 | save_final_model: false 65 | sort_data: true 66 | train_language_attention_by_dev: false 67 | train_with_dev: false 68 | true_reshuffle: false 69 | use_unlabeled_data: false 70 | use_warmup: false 71 | trainer: ModelFinetuner 72 | -------------------------------------------------------------------------------- /config/xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_fr_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml: -------------------------------------------------------------------------------- 1 | ModelFinetuner: 2 | direct_upsample_rate: -1 3 | distill_mode: false 4 | down_sample_amount: -1 5 | ensemble_distill_mode: false 6 | language_resample: false 7 | optimizer: Adam 8 | train_with_professor: false 9 | dependency: 10 | Corpus: CTB 11 | embeddings: 12 | XLMRoBERTaEmbeddings: 13 | layers: '-1' 14 | pooling_operation: mean 15 | enhancedud: 16 | Corpus: UD_French 17 | is_teacher_list: true 18 | is_toy: false 19 | model: 20 | SemanticDependencyParser: 21 | binary: true 22 | factorize: true 23 | hidden_size: 400 24 | init_std: 0.25 25 | interpolation: 0.1 26 | iterations: 3 27 | lstm_dropout: 0.33 28 | mlp_dropout: 0.33 29 | n_mlp_arc: 500 30 | n_mlp_rel: 100 31 | n_mlp_sec: 150 32 | rnn_layers: 3 33 | tree: true 34 | use_cop: true 35 | use_crf: false 36 | use_gp: true 37 | use_rnn: true 38 | use_second_order: true 39 | use_sib: true 40 | word_dropout: 0.33 41 | model_name: xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_fr_monolingual_nocrf_fast_2nd_nodev_enhancedud15 42 | target_dir: resources/taggers/ 43 | targets: enhancedud 44 | teacher_annealing: false 45 | train: 46 | best_k: 5 47 | betas: 48 | - 0.9 49 | - 0.9 50 | calc_teachers_target_loss: false 51 | entropy_loss_rate: 0.001 52 | fine_tune_mode: false 53 | freezing: false 54 | language_attention_entropy: false 55 | language_attention_warmup: false 56 | language_attention_warmup_and_fix: false 57 | learning_rate: 0.002 58 | lr_rate: 1 59 | max_epochs: 1000 60 | min_freq: 2 61 | mini_batch_size: 2000 62 | monitor_test: false 63 | rootschedule: false 64 | save_final_model: false 65 | sort_data: true 66 | train_language_attention_by_dev: false 67 | train_with_dev: false 68 | true_reshuffle: false 69 | use_unlabeled_data: false 70 | use_warmup: false 71 | trainer: ModelFinetuner 72 | -------------------------------------------------------------------------------- /config/xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_it_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml: -------------------------------------------------------------------------------- 1 | ModelFinetuner: 2 | direct_upsample_rate: -1 3 | distill_mode: false 4 | down_sample_amount: -1 5 | ensemble_distill_mode: false 6 | language_resample: false 7 | optimizer: Adam 8 | train_with_professor: false 9 | dependency: 10 | Corpus: CTB 11 | embeddings: 12 | XLMRoBERTaEmbeddings: 13 | layers: '-1' 14 | pooling_operation: mean 15 | enhancedud: 16 | Corpus: UD_Italian 17 | is_teacher_list: true 18 | is_toy: false 19 | model: 20 | SemanticDependencyParser: 21 | binary: true 22 | factorize: true 23 | hidden_size: 400 24 | init_std: 0.25 25 | interpolation: 0.1 26 | iterations: 3 27 | lstm_dropout: 0.33 28 | mlp_dropout: 0.33 29 | n_mlp_arc: 500 30 | n_mlp_rel: 100 31 | n_mlp_sec: 150 32 | rnn_layers: 3 33 | tree: true 34 | use_cop: true 35 | use_crf: false 36 | use_gp: true 37 | use_rnn: true 38 | use_second_order: true 39 | use_sib: true 40 | word_dropout: 0.33 41 | model_name: xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_it_monolingual_nocrf_fast_2nd_nodev_enhancedud15 42 | target_dir: resources/taggers/ 43 | targets: enhancedud 44 | teacher_annealing: false 45 | train: 46 | best_k: 5 47 | betas: 48 | - 0.9 49 | - 0.9 50 | calc_teachers_target_loss: false 51 | entropy_loss_rate: 0.001 52 | fine_tune_mode: false 53 | freezing: false 54 | language_attention_entropy: false 55 | language_attention_warmup: false 56 | language_attention_warmup_and_fix: false 57 | learning_rate: 0.002 58 | lr_rate: 1 59 | max_epochs: 1000 60 | min_freq: 2 61 | mini_batch_size: 2000 62 | monitor_test: false 63 | rootschedule: false 64 | save_final_model: false 65 | sort_data: true 66 | train_language_attention_by_dev: false 67 | train_with_dev: false 68 | true_reshuffle: false 69 | use_unlabeled_data: false 70 | use_warmup: false 71 | trainer: ModelFinetuner 72 | -------------------------------------------------------------------------------- /config/xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_lt_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml: -------------------------------------------------------------------------------- 1 | ModelFinetuner: 2 | direct_upsample_rate: -1 3 | distill_mode: false 4 | down_sample_amount: -1 5 | ensemble_distill_mode: false 6 | language_resample: false 7 | optimizer: Adam 8 | train_with_professor: false 9 | dependency: 10 | Corpus: CTB 11 | embeddings: 12 | XLMRoBERTaEmbeddings: 13 | layers: '-1' 14 | pooling_operation: mean 15 | enhancedud: 16 | Corpus: UD_Lithuanian 17 | is_teacher_list: true 18 | is_toy: false 19 | model: 20 | SemanticDependencyParser: 21 | binary: true 22 | factorize: true 23 | hidden_size: 400 24 | init_std: 0.25 25 | interpolation: 0.1 26 | iterations: 3 27 | lstm_dropout: 0.33 28 | mlp_dropout: 0.33 29 | n_mlp_arc: 500 30 | n_mlp_rel: 100 31 | n_mlp_sec: 150 32 | rnn_layers: 3 33 | tree: true 34 | use_cop: true 35 | use_crf: false 36 | use_gp: true 37 | use_rnn: true 38 | use_second_order: true 39 | use_sib: true 40 | word_dropout: 0.33 41 | model_name: xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_lt_monolingual_nocrf_fast_2nd_nodev_enhancedud15 42 | target_dir: resources/taggers/ 43 | targets: enhancedud 44 | teacher_annealing: false 45 | train: 46 | best_k: 5 47 | betas: 48 | - 0.9 49 | - 0.9 50 | calc_teachers_target_loss: false 51 | entropy_loss_rate: 0.001 52 | fine_tune_mode: false 53 | freezing: false 54 | language_attention_entropy: false 55 | language_attention_warmup: false 56 | language_attention_warmup_and_fix: false 57 | learning_rate: 0.002 58 | lr_rate: 1 59 | max_epochs: 1000 60 | min_freq: 2 61 | mini_batch_size: 2000 62 | monitor_test: false 63 | rootschedule: false 64 | save_final_model: false 65 | sort_data: true 66 | train_language_attention_by_dev: false 67 | train_with_dev: false 68 | true_reshuffle: false 69 | use_unlabeled_data: false 70 | use_warmup: false 71 | trainer: ModelFinetuner 72 | -------------------------------------------------------------------------------- /config/xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_lv_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml: -------------------------------------------------------------------------------- 1 | ModelFinetuner: 2 | direct_upsample_rate: -1 3 | distill_mode: false 4 | down_sample_amount: -1 5 | ensemble_distill_mode: false 6 | language_resample: false 7 | optimizer: Adam 8 | train_with_professor: false 9 | dependency: 10 | Corpus: CTB 11 | embeddings: 12 | XLMRoBERTaEmbeddings: 13 | layers: '-1' 14 | pooling_operation: mean 15 | enhancedud: 16 | Corpus: UD_Latvian 17 | is_teacher_list: true 18 | is_toy: false 19 | model: 20 | SemanticDependencyParser: 21 | binary: true 22 | factorize: true 23 | hidden_size: 400 24 | init_std: 0.25 25 | interpolation: 0.1 26 | iterations: 3 27 | lstm_dropout: 0.33 28 | mlp_dropout: 0.33 29 | n_mlp_arc: 500 30 | n_mlp_rel: 100 31 | n_mlp_sec: 150 32 | rnn_layers: 3 33 | tree: true 34 | use_cop: true 35 | use_crf: false 36 | use_gp: true 37 | use_rnn: true 38 | use_second_order: true 39 | use_sib: true 40 | word_dropout: 0.33 41 | model_name: xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_lv_monolingual_nocrf_fast_2nd_nodev_enhancedud15 42 | target_dir: resources/taggers/ 43 | targets: enhancedud 44 | teacher_annealing: false 45 | train: 46 | best_k: 5 47 | betas: 48 | - 0.9 49 | - 0.9 50 | calc_teachers_target_loss: false 51 | entropy_loss_rate: 0.001 52 | fine_tune_mode: false 53 | freezing: false 54 | language_attention_entropy: false 55 | language_attention_warmup: false 56 | language_attention_warmup_and_fix: false 57 | learning_rate: 0.002 58 | lr_rate: 1 59 | max_epochs: 1000 60 | min_freq: 2 61 | mini_batch_size: 2000 62 | monitor_test: false 63 | rootschedule: false 64 | save_final_model: false 65 | sort_data: true 66 | train_language_attention_by_dev: false 67 | train_with_dev: false 68 | true_reshuffle: false 69 | use_unlabeled_data: false 70 | use_warmup: false 71 | trainer: ModelFinetuner 72 | -------------------------------------------------------------------------------- /config/xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_nl_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml: -------------------------------------------------------------------------------- 1 | ModelFinetuner: 2 | direct_upsample_rate: -1 3 | distill_mode: false 4 | down_sample_amount: -1 5 | ensemble_distill_mode: false 6 | language_resample: false 7 | optimizer: Adam 8 | train_with_professor: false 9 | dependency: 10 | Corpus: CTB 11 | embeddings: 12 | XLMRoBERTaEmbeddings: 13 | layers: '-1' 14 | pooling_operation: mean 15 | enhancedud: 16 | Corpus: UD_Dutch 17 | is_teacher_list: true 18 | is_toy: false 19 | model: 20 | SemanticDependencyParser: 21 | binary: true 22 | factorize: true 23 | hidden_size: 400 24 | init_std: 0.25 25 | interpolation: 0.1 26 | iterations: 3 27 | lstm_dropout: 0.33 28 | mlp_dropout: 0.33 29 | n_mlp_arc: 500 30 | n_mlp_rel: 100 31 | n_mlp_sec: 150 32 | rnn_layers: 3 33 | tree: true 34 | use_cop: true 35 | use_crf: false 36 | use_gp: true 37 | use_rnn: true 38 | use_second_order: true 39 | use_sib: true 40 | word_dropout: 0.33 41 | model_name: xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_nl_monolingual_nocrf_fast_2nd_nodev_enhancedud15 42 | target_dir: resources/taggers/ 43 | targets: enhancedud 44 | teacher_annealing: false 45 | train: 46 | best_k: 5 47 | betas: 48 | - 0.9 49 | - 0.9 50 | calc_teachers_target_loss: false 51 | entropy_loss_rate: 0.001 52 | fine_tune_mode: false 53 | freezing: false 54 | language_attention_entropy: false 55 | language_attention_warmup: false 56 | language_attention_warmup_and_fix: false 57 | learning_rate: 0.002 58 | lr_rate: 1 59 | max_epochs: 1000 60 | min_freq: 2 61 | mini_batch_size: 2000 62 | monitor_test: false 63 | rootschedule: false 64 | save_final_model: false 65 | sort_data: true 66 | train_language_attention_by_dev: false 67 | train_with_dev: false 68 | true_reshuffle: false 69 | use_unlabeled_data: false 70 | use_warmup: false 71 | trainer: ModelFinetuner 72 | -------------------------------------------------------------------------------- /config/xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_pl_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml: -------------------------------------------------------------------------------- 1 | ModelFinetuner: 2 | direct_upsample_rate: -1 3 | distill_mode: false 4 | down_sample_amount: -1 5 | ensemble_distill_mode: false 6 | language_resample: false 7 | optimizer: Adam 8 | train_with_professor: false 9 | dependency: 10 | Corpus: CTB 11 | embeddings: 12 | XLMRoBERTaEmbeddings: 13 | layers: '-1' 14 | pooling_operation: mean 15 | enhancedud: 16 | Corpus: UD_Polish 17 | is_teacher_list: true 18 | is_toy: false 19 | model: 20 | SemanticDependencyParser: 21 | binary: true 22 | factorize: true 23 | hidden_size: 400 24 | init_std: 0.25 25 | interpolation: 0.1 26 | iterations: 3 27 | lstm_dropout: 0.33 28 | mlp_dropout: 0.33 29 | n_mlp_arc: 500 30 | n_mlp_rel: 100 31 | n_mlp_sec: 150 32 | rnn_layers: 3 33 | tree: true 34 | use_cop: true 35 | use_crf: false 36 | use_gp: true 37 | use_rnn: true 38 | use_second_order: true 39 | use_sib: true 40 | word_dropout: 0.33 41 | model_name: xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_pl_monolingual_nocrf_fast_2nd_nodev_enhancedud15 42 | target_dir: resources/taggers/ 43 | targets: enhancedud 44 | teacher_annealing: false 45 | train: 46 | best_k: 5 47 | betas: 48 | - 0.9 49 | - 0.9 50 | calc_teachers_target_loss: false 51 | entropy_loss_rate: 0.001 52 | fine_tune_mode: false 53 | freezing: false 54 | language_attention_entropy: false 55 | language_attention_warmup: false 56 | language_attention_warmup_and_fix: false 57 | learning_rate: 0.002 58 | lr_rate: 1 59 | max_epochs: 1000 60 | min_freq: 2 61 | mini_batch_size: 2000 62 | monitor_test: false 63 | rootschedule: false 64 | save_final_model: false 65 | sort_data: true 66 | train_language_attention_by_dev: false 67 | train_with_dev: false 68 | true_reshuffle: false 69 | use_unlabeled_data: false 70 | use_warmup: false 71 | trainer: ModelFinetuner 72 | -------------------------------------------------------------------------------- /config/xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_ru_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml: -------------------------------------------------------------------------------- 1 | ModelFinetuner: 2 | direct_upsample_rate: -1 3 | distill_mode: false 4 | down_sample_amount: -1 5 | ensemble_distill_mode: false 6 | language_resample: false 7 | optimizer: Adam 8 | train_with_professor: false 9 | dependency: 10 | Corpus: CTB 11 | embeddings: 12 | XLMRoBERTaEmbeddings: 13 | layers: '-1' 14 | pooling_operation: mean 15 | enhancedud: 16 | Corpus: UD_Russian 17 | is_teacher_list: true 18 | is_toy: false 19 | model: 20 | SemanticDependencyParser: 21 | binary: true 22 | factorize: true 23 | hidden_size: 400 24 | init_std: 0.25 25 | interpolation: 0.1 26 | iterations: 3 27 | lstm_dropout: 0.33 28 | mlp_dropout: 0.33 29 | n_mlp_arc: 500 30 | n_mlp_rel: 100 31 | n_mlp_sec: 150 32 | rnn_layers: 3 33 | tree: true 34 | use_cop: true 35 | use_crf: false 36 | use_gp: true 37 | use_rnn: true 38 | use_second_order: true 39 | use_sib: true 40 | word_dropout: 0.33 41 | model_name: xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_ru_monolingual_nocrf_fast_2nd_nodev_enhancedud15 42 | target_dir: resources/taggers/ 43 | targets: enhancedud 44 | teacher_annealing: false 45 | train: 46 | best_k: 5 47 | betas: 48 | - 0.9 49 | - 0.9 50 | calc_teachers_target_loss: false 51 | entropy_loss_rate: 0.001 52 | fine_tune_mode: false 53 | freezing: false 54 | language_attention_entropy: false 55 | language_attention_warmup: false 56 | language_attention_warmup_and_fix: false 57 | learning_rate: 0.002 58 | lr_rate: 1 59 | max_epochs: 1000 60 | min_freq: 2 61 | mini_batch_size: 2000 62 | monitor_test: false 63 | rootschedule: false 64 | save_final_model: false 65 | sort_data: true 66 | train_language_attention_by_dev: false 67 | train_with_dev: false 68 | true_reshuffle: false 69 | use_unlabeled_data: false 70 | use_warmup: false 71 | trainer: ModelFinetuner 72 | -------------------------------------------------------------------------------- /config/xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_sk_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml: -------------------------------------------------------------------------------- 1 | ModelFinetuner: 2 | direct_upsample_rate: -1 3 | distill_mode: false 4 | down_sample_amount: -1 5 | ensemble_distill_mode: false 6 | language_resample: false 7 | optimizer: Adam 8 | train_with_professor: false 9 | dependency: 10 | Corpus: CTB 11 | embeddings: 12 | XLMRoBERTaEmbeddings: 13 | layers: '-1' 14 | pooling_operation: mean 15 | enhancedud: 16 | Corpus: UD_Slovak 17 | is_teacher_list: true 18 | is_toy: false 19 | model: 20 | SemanticDependencyParser: 21 | binary: true 22 | factorize: true 23 | hidden_size: 400 24 | init_std: 0.25 25 | interpolation: 0.1 26 | iterations: 3 27 | lstm_dropout: 0.33 28 | mlp_dropout: 0.33 29 | n_mlp_arc: 500 30 | n_mlp_rel: 100 31 | n_mlp_sec: 150 32 | rnn_layers: 3 33 | tree: true 34 | use_cop: true 35 | use_crf: false 36 | use_gp: true 37 | use_rnn: true 38 | use_second_order: true 39 | use_sib: true 40 | word_dropout: 0.33 41 | model_name: xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_sk_monolingual_nocrf_fast_2nd_nodev_enhancedud15 42 | target_dir: resources/taggers/ 43 | targets: enhancedud 44 | teacher_annealing: false 45 | train: 46 | best_k: 5 47 | betas: 48 | - 0.9 49 | - 0.9 50 | calc_teachers_target_loss: false 51 | entropy_loss_rate: 0.001 52 | fine_tune_mode: false 53 | freezing: false 54 | language_attention_entropy: false 55 | language_attention_warmup: false 56 | language_attention_warmup_and_fix: false 57 | learning_rate: 0.002 58 | lr_rate: 1 59 | max_epochs: 1000 60 | min_freq: 2 61 | mini_batch_size: 2000 62 | monitor_test: false 63 | rootschedule: false 64 | save_final_model: false 65 | sort_data: true 66 | train_language_attention_by_dev: false 67 | train_with_dev: false 68 | true_reshuffle: false 69 | use_unlabeled_data: false 70 | use_warmup: false 71 | trainer: ModelFinetuner 72 | -------------------------------------------------------------------------------- /config/xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_sv_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml: -------------------------------------------------------------------------------- 1 | ModelFinetuner: 2 | direct_upsample_rate: -1 3 | distill_mode: false 4 | down_sample_amount: -1 5 | ensemble_distill_mode: false 6 | language_resample: false 7 | optimizer: Adam 8 | train_with_professor: false 9 | dependency: 10 | Corpus: CTB 11 | embeddings: 12 | XLMRoBERTaEmbeddings: 13 | layers: '-1' 14 | pooling_operation: mean 15 | enhancedud: 16 | Corpus: UD_Swedish 17 | is_teacher_list: true 18 | is_toy: false 19 | model: 20 | SemanticDependencyParser: 21 | binary: true 22 | factorize: true 23 | hidden_size: 400 24 | init_std: 0.25 25 | interpolation: 0.1 26 | iterations: 3 27 | lstm_dropout: 0.33 28 | mlp_dropout: 0.33 29 | n_mlp_arc: 500 30 | n_mlp_rel: 100 31 | n_mlp_sec: 150 32 | rnn_layers: 3 33 | tree: true 34 | use_cop: true 35 | use_crf: false 36 | use_gp: true 37 | use_rnn: true 38 | use_second_order: true 39 | use_sib: true 40 | word_dropout: 0.33 41 | model_name: xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_sv_monolingual_nocrf_fast_2nd_nodev_enhancedud15 42 | target_dir: resources/taggers/ 43 | targets: enhancedud 44 | teacher_annealing: false 45 | train: 46 | best_k: 5 47 | betas: 48 | - 0.9 49 | - 0.9 50 | calc_teachers_target_loss: false 51 | entropy_loss_rate: 0.001 52 | fine_tune_mode: false 53 | freezing: false 54 | language_attention_entropy: false 55 | language_attention_warmup: false 56 | language_attention_warmup_and_fix: false 57 | learning_rate: 0.002 58 | lr_rate: 1 59 | max_epochs: 1000 60 | min_freq: 2 61 | mini_batch_size: 2000 62 | monitor_test: false 63 | rootschedule: false 64 | save_final_model: false 65 | sort_data: true 66 | train_language_attention_by_dev: false 67 | train_with_dev: false 68 | true_reshuffle: false 69 | use_unlabeled_data: false 70 | use_warmup: false 71 | trainer: ModelFinetuner 72 | -------------------------------------------------------------------------------- /config/xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_uk_monolingual_nocrf_fast_2nd_nodev_enhancedud15.yaml: -------------------------------------------------------------------------------- 1 | ModelFinetuner: 2 | direct_upsample_rate: -1 3 | distill_mode: false 4 | down_sample_amount: -1 5 | ensemble_distill_mode: false 6 | language_resample: false 7 | optimizer: Adam 8 | train_with_professor: false 9 | dependency: 10 | Corpus: CTB 11 | embeddings: 12 | XLMRoBERTaEmbeddings: 13 | layers: '-1' 14 | pooling_operation: mean 15 | enhancedud: 16 | Corpus: UD_Ukrainian 17 | is_teacher_list: true 18 | is_toy: false 19 | model: 20 | SemanticDependencyParser: 21 | binary: true 22 | factorize: true 23 | hidden_size: 400 24 | init_std: 0.25 25 | interpolation: 0.1 26 | iterations: 3 27 | lstm_dropout: 0.33 28 | mlp_dropout: 0.33 29 | n_mlp_arc: 500 30 | n_mlp_rel: 100 31 | n_mlp_sec: 150 32 | rnn_layers: 3 33 | tree: true 34 | use_cop: true 35 | use_crf: false 36 | use_gp: true 37 | use_rnn: true 38 | use_second_order: true 39 | use_sib: true 40 | word_dropout: 0.33 41 | model_name: xlmr_1000epoch_0.1inter_2000batch_0.002lr_400hidden_uk_monolingual_nocrf_fast_2nd_nodev_enhancedud15 42 | target_dir: resources/taggers/ 43 | targets: enhancedud 44 | teacher_annealing: false 45 | train: 46 | best_k: 5 47 | betas: 48 | - 0.9 49 | - 0.9 50 | calc_teachers_target_loss: false 51 | entropy_loss_rate: 0.001 52 | fine_tune_mode: false 53 | freezing: false 54 | language_attention_entropy: false 55 | language_attention_warmup: false 56 | language_attention_warmup_and_fix: false 57 | learning_rate: 0.002 58 | lr_rate: 1 59 | max_epochs: 1000 60 | min_freq: 2 61 | mini_batch_size: 2000 62 | monitor_test: false 63 | rootschedule: false 64 | save_final_model: false 65 | sort_data: true 66 | train_language_attention_by_dev: false 67 | train_with_dev: false 68 | true_reshuffle: false 69 | use_unlabeled_data: false 70 | use_warmup: false 71 | trainer: ModelFinetuner 72 | -------------------------------------------------------------------------------- /flair/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | 4 | # global variable: cache_root 5 | cache_root = os.path.expanduser(os.path.join("~", ".flair")) 6 | 7 | # global variable: device 8 | device = None 9 | if torch.cuda.is_available(): 10 | device = torch.device("cuda:0") 11 | else: 12 | device = torch.device("cpu") 13 | 14 | from . import data 15 | from . import models 16 | from . import visual 17 | from . import trainers 18 | from . import nn 19 | 20 | import logging.config 21 | 22 | __version__ = "0.4.3" 23 | 24 | logging.config.dictConfig( 25 | { 26 | "version": 1, 27 | "disable_existing_loggers": False, 28 | "formatters": {"standard": {"format": "%(asctime)-15s %(message)s"}}, 29 | "handlers": { 30 | "console": { 31 | "level": "INFO", 32 | "class": "logging.StreamHandler", 33 | "formatter": "standard", 34 | "stream": "ext://sys.stdout", 35 | } 36 | }, 37 | "loggers": { 38 | "flair": {"handlers": ["console"], "level": "INFO", "propagate": False} 39 | }, 40 | "root": {"handlers": ["console"], "level": "WARNING"}, 41 | } 42 | ) 43 | 44 | logger = logging.getLogger("flair") 45 | -------------------------------------------------------------------------------- /flair/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__init__.pyc -------------------------------------------------------------------------------- /flair/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /flair/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /flair/__pycache__/config_parser.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/config_parser.cpython-36.pyc -------------------------------------------------------------------------------- /flair/__pycache__/config_parser.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/config_parser.cpython-37.pyc -------------------------------------------------------------------------------- /flair/__pycache__/corpus_mapping.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/corpus_mapping.cpython-36.pyc -------------------------------------------------------------------------------- /flair/__pycache__/corpus_mapping.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/corpus_mapping.cpython-37.pyc -------------------------------------------------------------------------------- /flair/__pycache__/custom_data_loader.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/custom_data_loader.cpython-36.pyc -------------------------------------------------------------------------------- /flair/__pycache__/custom_data_loader.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/custom_data_loader.cpython-37.pyc -------------------------------------------------------------------------------- /flair/__pycache__/data.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/data.cpython-36.pyc -------------------------------------------------------------------------------- /flair/__pycache__/data.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/data.cpython-37.pyc -------------------------------------------------------------------------------- /flair/__pycache__/datasets.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/datasets.cpython-36.pyc -------------------------------------------------------------------------------- /flair/__pycache__/datasets.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/datasets.cpython-37.pyc -------------------------------------------------------------------------------- /flair/__pycache__/embeddings.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/embeddings.cpython-36.pyc -------------------------------------------------------------------------------- /flair/__pycache__/embeddings.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/embeddings.cpython-37.pyc -------------------------------------------------------------------------------- /flair/__pycache__/file_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/file_utils.cpython-36.pyc -------------------------------------------------------------------------------- /flair/__pycache__/file_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/file_utils.cpython-37.pyc -------------------------------------------------------------------------------- /flair/__pycache__/linear_functions.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/linear_functions.cpython-36.pyc -------------------------------------------------------------------------------- /flair/__pycache__/linear_functions.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/linear_functions.cpython-37.pyc -------------------------------------------------------------------------------- /flair/__pycache__/list_data.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/list_data.cpython-36.pyc -------------------------------------------------------------------------------- /flair/__pycache__/list_data.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/list_data.cpython-37.pyc -------------------------------------------------------------------------------- /flair/__pycache__/nn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/nn.cpython-36.pyc -------------------------------------------------------------------------------- /flair/__pycache__/nn.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/nn.cpython-37.pyc -------------------------------------------------------------------------------- /flair/__pycache__/optim.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/optim.cpython-36.pyc -------------------------------------------------------------------------------- /flair/__pycache__/optim.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/optim.cpython-37.pyc -------------------------------------------------------------------------------- /flair/__pycache__/training_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/training_utils.cpython-36.pyc -------------------------------------------------------------------------------- /flair/__pycache__/training_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/training_utils.cpython-37.pyc -------------------------------------------------------------------------------- /flair/__pycache__/variational_inference.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/variational_inference.cpython-36.pyc -------------------------------------------------------------------------------- /flair/__pycache__/variational_inference.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/__pycache__/variational_inference.cpython-37.pyc -------------------------------------------------------------------------------- /flair/algorithms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/algorithms/__init__.py -------------------------------------------------------------------------------- /flair/algorithms/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/algorithms/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /flair/algorithms/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/algorithms/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /flair/algorithms/__pycache__/dict_merge.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/algorithms/__pycache__/dict_merge.cpython-36.pyc -------------------------------------------------------------------------------- /flair/algorithms/__pycache__/dict_merge.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/algorithms/__pycache__/dict_merge.cpython-37.pyc -------------------------------------------------------------------------------- /flair/algorithms/__pycache__/maximum_spanning_tree.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/algorithms/__pycache__/maximum_spanning_tree.cpython-36.pyc -------------------------------------------------------------------------------- /flair/algorithms/dict_merge.py: -------------------------------------------------------------------------------- 1 | # Recursive dictionary merge 2 | # Copyright (C) 2016 Paul Durivage 3 | # 4 | # This program is free software: you can redistribute it and/or modify 5 | # it under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # This program is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with this program. If not, see . 16 | 17 | # Found here https://gist.github.com/angstwad/bf22d1822c38a92ec0a9 18 | # Using jpopelka's modified solution. 19 | 20 | import collections 21 | 22 | def dict_merge(dct, merge_dct): 23 | """ Recursive dict merge. Inspired by :meth:``dict.update()``, instead of 24 | updating only top-level keys, dict_merge recurses down into dicts nested 25 | to an arbitrary depth, updating keys. The ``merge_dct`` is merged into 26 | ``dct``. 27 | 28 | :param dct: dict onto which the merge is executed 29 | :param merge_dct: dct merged into dct 30 | :return: None 31 | """ 32 | for k, v in merge_dct.items(): 33 | if isinstance(dct.get(k), dict) and isinstance(v, collections.Mapping): 34 | dict_merge(dct[k], v) 35 | else: 36 | dct[k] = v 37 | -------------------------------------------------------------------------------- /flair/hyperparameter/__init__.py: -------------------------------------------------------------------------------- 1 | from .parameter import ( 2 | Parameter, 3 | SEQUENCE_TAGGER_PARAMETERS, 4 | TRAINING_PARAMETERS, 5 | DOCUMENT_EMBEDDING_PARAMETERS, 6 | ) 7 | from .param_selection import ( 8 | SequenceTaggerParamSelector, 9 | TextClassifierParamSelector, 10 | SearchSpace, 11 | ) 12 | -------------------------------------------------------------------------------- /flair/list_data.py: -------------------------------------------------------------------------------- 1 | from .data import * 2 | class ListCorpus(Corpus): 3 | def __init__( 4 | self, 5 | train: List[FlairDataset], 6 | dev: List[FlairDataset], 7 | test: List[FlairDataset], 8 | name: str = "listcorpus", 9 | targets: list = [], 10 | ): 11 | # In this Corpus, we set train list to be our target to train, we keep self._train the same as the Class Corpus as the counting and preprocessing is needed 12 | self.train_list: List[FlairDataset] = train 13 | self.dev_list: List[FlairDataset] = dev 14 | self.test_list: List[FlairDataset] = test 15 | self._train: FlairDataset = ConcatDataset([data for data in train]) 16 | self._dev: FlairDataset = ConcatDataset([data for data in dev]) 17 | self._test: FlairDataset = ConcatDataset([data for data in test]) 18 | self.name: str = name 19 | self.targets = targets -------------------------------------------------------------------------------- /flair/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .sequence_tagger_model import SequenceTagger, FastSequenceTagger 2 | from .dependency_model import SemanticDependencyParser 3 | from .language_model import LanguageModel 4 | from .text_classification_model import TextClassifier 5 | -------------------------------------------------------------------------------- /flair/models/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/models/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /flair/models/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/models/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /flair/models/__pycache__/biaffine_attention.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/models/__pycache__/biaffine_attention.cpython-36.pyc -------------------------------------------------------------------------------- /flair/models/__pycache__/biaffine_attention.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/models/__pycache__/biaffine_attention.cpython-37.pyc -------------------------------------------------------------------------------- /flair/models/__pycache__/dependency_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/models/__pycache__/dependency_model.cpython-36.pyc -------------------------------------------------------------------------------- /flair/models/__pycache__/dependency_model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/models/__pycache__/dependency_model.cpython-37.pyc -------------------------------------------------------------------------------- /flair/models/__pycache__/language_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/models/__pycache__/language_model.cpython-36.pyc -------------------------------------------------------------------------------- /flair/models/__pycache__/language_model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/models/__pycache__/language_model.cpython-37.pyc -------------------------------------------------------------------------------- /flair/models/__pycache__/mst_decoder.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/models/__pycache__/mst_decoder.cpython-36.pyc -------------------------------------------------------------------------------- /flair/models/__pycache__/mst_decoder.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/models/__pycache__/mst_decoder.cpython-37.pyc -------------------------------------------------------------------------------- /flair/models/__pycache__/sequence_tagger_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/models/__pycache__/sequence_tagger_model.cpython-36.pyc -------------------------------------------------------------------------------- /flair/models/__pycache__/sequence_tagger_model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/models/__pycache__/sequence_tagger_model.cpython-37.pyc -------------------------------------------------------------------------------- /flair/models/__pycache__/text_classification_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/models/__pycache__/text_classification_model.cpython-36.pyc -------------------------------------------------------------------------------- /flair/models/__pycache__/text_classification_model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/models/__pycache__/text_classification_model.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .model import Model 4 | 5 | __all__ = ['Model'] 6 | -------------------------------------------------------------------------------- /flair/parser/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /flair/parser/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/__pycache__/model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/__pycache__/model.cpython-36.pyc -------------------------------------------------------------------------------- /flair/parser/__pycache__/model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/__pycache__/model.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/cmds/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .evaluate import Evaluate 4 | from .predict import Predict 5 | from .train import Train 6 | 7 | __all__ = ['Evaluate', 'Predict', 'Train'] 8 | -------------------------------------------------------------------------------- /flair/parser/cmds/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/cmds/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /flair/parser/cmds/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/cmds/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/cmds/__pycache__/cmd.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/cmds/__pycache__/cmd.cpython-36.pyc -------------------------------------------------------------------------------- /flair/parser/cmds/__pycache__/cmd.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/cmds/__pycache__/cmd.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/cmds/__pycache__/evaluate.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/cmds/__pycache__/evaluate.cpython-36.pyc -------------------------------------------------------------------------------- /flair/parser/cmds/__pycache__/evaluate.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/cmds/__pycache__/evaluate.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/cmds/__pycache__/predict.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/cmds/__pycache__/predict.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/cmds/__pycache__/train.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/cmds/__pycache__/train.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/cmds/predict.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datetime import datetime 4 | from parser import Model 5 | from parser.cmds.cmd import CMD 6 | from parser.utils.corpus import Corpus 7 | from parser.utils.data import TextDataset, batchify 8 | 9 | 10 | class Predict(CMD): 11 | 12 | def add_subparser(self, name, parser): 13 | subparser = parser.add_parser( 14 | name, help='Use a trained model to make predictions.' 15 | ) 16 | subparser.add_argument('--batch-size', default=5000, type=int, 17 | help='batch size') 18 | subparser.add_argument('--fdata', default='data/ptb/test.conllx', 19 | help='path to dataset') 20 | subparser.add_argument('--fpred', default='pred.conllx', 21 | help='path to predicted result') 22 | 23 | return subparser 24 | 25 | def __call__(self, args): 26 | super(Predict, self).__call__(args) 27 | 28 | print("Load the dataset") 29 | corpus = Corpus.load(args.fdata, self.fields) 30 | dataset = TextDataset(corpus, [self.WORD, self.FEAT]) 31 | # set the data loader 32 | dataset.loader = batchify(dataset, args.batch_size) 33 | print(f"{len(dataset)} sentences, " 34 | f"{len(dataset.loader)} batches") 35 | 36 | print("Load the model") 37 | self.model = Model.load(args.model) 38 | print(f"{self.model}\n") 39 | 40 | print("Make predictions on the dataset") 41 | start = datetime.now() 42 | corpus.heads, corpus.rels = self.predict(dataset.loader) 43 | print(f"Save the predicted result to {args.fpred}") 44 | corpus.save(args.fpred) 45 | total_time = datetime.now() - start 46 | print(f"{total_time}s elapsed, " 47 | f"{len(dataset) / total_time.total_seconds():.2f} Sents/s") 48 | -------------------------------------------------------------------------------- /flair/parser/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ast import literal_eval 4 | from configparser import ConfigParser 5 | from argparse import Namespace 6 | 7 | 8 | class Config(ConfigParser): 9 | 10 | def __init__(self, path): 11 | super(Config, self).__init__() 12 | 13 | self.read(path) 14 | self.namespace = Namespace() 15 | self.update(dict((name, literal_eval(value)) 16 | for section in self.sections() 17 | for name, value in self.items(section))) 18 | 19 | def __repr__(self): 20 | s = line = "-" * 15 + "-+-" + "-" * 25 + "\n" 21 | s += f"{'Param':15} | {'Value':^25}\n" + line 22 | for name, value in vars(self.namespace).items(): 23 | s += f"{name:15} | {str(value):^25}\n" 24 | s += line 25 | 26 | return s 27 | 28 | def __getattr__(self, attr): 29 | return getattr(self.namespace, attr) 30 | 31 | def __getstate__(self): 32 | return vars(self) 33 | 34 | def __setstate__(self, state): 35 | self.__dict__.update(state) 36 | 37 | def update(self, kwargs): 38 | for name, value in kwargs.items(): 39 | setattr(self.namespace, name, value) 40 | 41 | return self 42 | -------------------------------------------------------------------------------- /flair/parser/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from . import dropout 4 | from .bert import BertEmbedding 5 | from .biaffine import Biaffine 6 | from .trilinear_attention import TrilinearScorer 7 | from .bilstm import BiLSTM 8 | from .char_lstm import CHAR_LSTM 9 | from .mlp import MLP 10 | 11 | __all__ = ['CHAR_LSTM', 'MLP', 'BertEmbedding', 12 | 'Biaffine', 'BiLSTM', 'dropout', 'TrilinearScorer'] 13 | -------------------------------------------------------------------------------- /flair/parser/modules/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/modules/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /flair/parser/modules/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/modules/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/modules/__pycache__/bert.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/modules/__pycache__/bert.cpython-36.pyc -------------------------------------------------------------------------------- /flair/parser/modules/__pycache__/bert.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/modules/__pycache__/bert.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/modules/__pycache__/biaffine.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/modules/__pycache__/biaffine.cpython-36.pyc -------------------------------------------------------------------------------- /flair/parser/modules/__pycache__/biaffine.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/modules/__pycache__/biaffine.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/modules/__pycache__/bilstm.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/modules/__pycache__/bilstm.cpython-36.pyc -------------------------------------------------------------------------------- /flair/parser/modules/__pycache__/bilstm.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/modules/__pycache__/bilstm.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/modules/__pycache__/char_lstm.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/modules/__pycache__/char_lstm.cpython-36.pyc -------------------------------------------------------------------------------- /flair/parser/modules/__pycache__/char_lstm.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/modules/__pycache__/char_lstm.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/modules/__pycache__/dropout.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/modules/__pycache__/dropout.cpython-36.pyc -------------------------------------------------------------------------------- /flair/parser/modules/__pycache__/dropout.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/modules/__pycache__/dropout.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/modules/__pycache__/mlp.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/modules/__pycache__/mlp.cpython-36.pyc -------------------------------------------------------------------------------- /flair/parser/modules/__pycache__/mlp.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/modules/__pycache__/mlp.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/modules/__pycache__/scalar_mix.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/modules/__pycache__/scalar_mix.cpython-36.pyc -------------------------------------------------------------------------------- /flair/parser/modules/__pycache__/scalar_mix.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/modules/__pycache__/scalar_mix.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/modules/__pycache__/trilinear_attention.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/modules/__pycache__/trilinear_attention.cpython-36.pyc -------------------------------------------------------------------------------- /flair/parser/modules/__pycache__/trilinear_attention.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/modules/__pycache__/trilinear_attention.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/modules/bert.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import torch 4 | import torch.nn as nn 5 | from transformers import BertModel 6 | 7 | from .scalar_mix import ScalarMix 8 | 9 | 10 | class BertEmbedding(nn.Module): 11 | 12 | def __init__(self, model, n_layers, n_out, requires_grad=False): 13 | super(BertEmbedding, self).__init__() 14 | 15 | self.bert = BertModel.from_pretrained(model, output_hidden_states=True) 16 | self.bert = self.bert.requires_grad_(requires_grad) 17 | self.n_layers = n_layers 18 | self.n_out = n_out 19 | self.requires_grad = requires_grad 20 | self.hidden_size = self.bert.config.hidden_size 21 | 22 | self.scalar_mix = ScalarMix(n_layers) 23 | self.projection = nn.Linear(self.hidden_size, n_out, False) 24 | 25 | def __repr__(self): 26 | s = self.__class__.__name__ + '(' 27 | s += f"n_layers={self.n_layers}, n_out={self.n_out}" 28 | if self.requires_grad: 29 | s += f", requires_grad={self.requires_grad}" 30 | s += ')' 31 | 32 | return s 33 | 34 | def forward(self, subwords, bert_lens, bert_mask): 35 | batch_size, seq_len = bert_lens.shape 36 | mask = bert_lens.gt(0) 37 | 38 | if not self.requires_grad: 39 | self.bert.eval() 40 | _, _, bert = self.bert(subwords, attention_mask=bert_mask) 41 | bert = bert[-self.n_layers:] 42 | bert = self.scalar_mix(bert) 43 | bert = bert[bert_mask].split(bert_lens[mask].tolist()) 44 | bert = torch.stack([i.mean(0) for i in bert]) 45 | bert_embed = bert.new_zeros(batch_size, seq_len, self.hidden_size) 46 | bert_embed = bert_embed.masked_scatter_(mask.unsqueeze(-1), bert) 47 | bert_embed = self.projection(bert_embed) 48 | 49 | return bert_embed 50 | -------------------------------------------------------------------------------- /flair/parser/modules/biaffine.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | 7 | class Biaffine(nn.Module): 8 | 9 | def __init__(self, n_in, n_out=1, bias_x=True, bias_y=True, diagonal=False): 10 | super(Biaffine, self).__init__() 11 | 12 | self.n_in = n_in 13 | self.n_out = n_out 14 | self.bias_x = bias_x 15 | self.bias_y = bias_y 16 | self.diagonal = diagonal 17 | if self.diagonal: 18 | self.weight = nn.Parameter(torch.Tensor(n_out, 19 | n_in + bias_x)) 20 | else: 21 | self.weight = nn.Parameter(torch.Tensor(n_out, 22 | n_in + bias_x, 23 | n_in + bias_y)) 24 | self.reset_parameters() 25 | 26 | def extra_repr(self): 27 | s = f"n_in={self.n_in}, n_out={self.n_out}" 28 | if self.bias_x: 29 | s += f", bias_x={self.bias_x}" 30 | if self.bias_y: 31 | s += f", bias_y={self.bias_y}" 32 | 33 | return s 34 | 35 | def reset_parameters(self): 36 | nn.init.zeros_(self.weight) 37 | 38 | def forward(self, x, y): 39 | if self.bias_x: 40 | x = torch.cat((x, torch.ones_like(x[..., :1])), -1) 41 | if self.bias_y: 42 | y = torch.cat((y, torch.ones_like(y[..., :1])), -1) 43 | # [batch_size, n_out, seq_len, seq_len] 44 | if self.diagonal: 45 | s = torch.einsum('bxi,byi,oi->boxy', x, y, self.weight) 46 | else: 47 | s = torch.einsum('bxi,oij,byj->boxy', x, self.weight, y) 48 | # remove dim 1 if n_out == 1 49 | s = s.squeeze(1) 50 | 51 | return s 52 | -------------------------------------------------------------------------------- /flair/parser/modules/char_lstm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.nn.utils.rnn import pack_padded_sequence 6 | 7 | 8 | class CHAR_LSTM(nn.Module): 9 | 10 | def __init__(self, n_chars, n_embed, n_out): 11 | super(CHAR_LSTM, self).__init__() 12 | 13 | # the embedding layer 14 | self.embed = nn.Embedding(num_embeddings=n_chars, 15 | embedding_dim=n_embed) 16 | # the lstm layer 17 | self.lstm = nn.LSTM(input_size=n_embed, 18 | hidden_size=n_out//2, 19 | batch_first=True, 20 | bidirectional=True) 21 | 22 | def forward(self, x): 23 | mask = x.gt(0) 24 | lens = mask.sum(dim=1) 25 | 26 | x = pack_padded_sequence(self.embed(x), lens, True, False) 27 | x, (hidden, _) = self.lstm(x) 28 | hidden = torch.cat(torch.unbind(hidden), dim=-1) 29 | 30 | return hidden 31 | -------------------------------------------------------------------------------- /flair/parser/modules/dropout.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import torch 4 | import torch.nn as nn 5 | import pdb 6 | 7 | class SharedDropout(nn.Module): 8 | 9 | def __init__(self, p=0.5, batch_first=True): 10 | super(SharedDropout, self).__init__() 11 | 12 | self.p = p 13 | self.batch_first = batch_first 14 | 15 | def extra_repr(self): 16 | s = f"p={self.p}" 17 | if self.batch_first: 18 | s += f", batch_first={self.batch_first}" 19 | 20 | return s 21 | 22 | def forward(self, x): 23 | if self.training: 24 | if self.batch_first: 25 | mask = self.get_mask(x[:, 0], self.p) 26 | else: 27 | mask = self.get_mask(x[0], self.p) 28 | x *= mask.unsqueeze(1) if self.batch_first else mask 29 | 30 | return x 31 | 32 | @staticmethod 33 | def get_mask(x, p): 34 | mask = x.new_empty(x.shape).bernoulli_(1 - p) 35 | mask = mask / (1 - p) 36 | 37 | return mask 38 | 39 | 40 | class IndependentDropout(nn.Module): 41 | 42 | def __init__(self, p=0.5): 43 | super(IndependentDropout, self).__init__() 44 | 45 | self.p = p 46 | 47 | def extra_repr(self): 48 | return f"p={self.p}" 49 | 50 | def forward(self, *items): 51 | if self.training: 52 | masks = [x.new_empty(x.shape[:2]).bernoulli_(1 - self.p) 53 | for x in items] 54 | total = sum(masks) 55 | scale = len(items) / total.max(torch.ones_like(total)) 56 | masks = [mask * scale for mask in masks] 57 | items = [item * mask.unsqueeze(dim=-1) 58 | for item, mask in zip(items, masks)] 59 | 60 | return items 61 | -------------------------------------------------------------------------------- /flair/parser/modules/mlp.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from flair.parser.modules.dropout import SharedDropout 4 | 5 | import torch.nn as nn 6 | 7 | 8 | class MLP(nn.Module): 9 | 10 | def __init__(self, n_in, n_hidden, dropout=0): 11 | super(MLP, self).__init__() 12 | 13 | self.linear = nn.Linear(n_in, n_hidden) 14 | self.activation = nn.LeakyReLU(negative_slope=0.1) 15 | self.dropout = SharedDropout(p=dropout) 16 | 17 | self.reset_parameters() 18 | 19 | def reset_parameters(self): 20 | nn.init.orthogonal_(self.linear.weight) 21 | nn.init.zeros_(self.linear.bias) 22 | 23 | def forward(self, x): 24 | x = self.linear(x) 25 | x = self.activation(x) 26 | x = self.dropout(x) 27 | 28 | return x 29 | -------------------------------------------------------------------------------- /flair/parser/modules/scalar_mix.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | 7 | class ScalarMix(nn.Module): 8 | 9 | def __init__(self, n_layers, dropout=0): 10 | super(ScalarMix, self).__init__() 11 | 12 | self.n_layers = n_layers 13 | self.dropout = dropout 14 | 15 | self.weights = nn.Parameter(torch.zeros(n_layers)) 16 | self.gamma = nn.Parameter(torch.tensor([1.0])) 17 | self.dropout = nn.Dropout(dropout) 18 | 19 | def extra_repr(self): 20 | s = f"n_layers={self.n_layers}" 21 | if self.dropout.p > 0: 22 | s += f", dropout={self.dropout.p}" 23 | 24 | return s 25 | 26 | def forward(self, tensors): 27 | normed_weights = self.dropout(self.weights.softmax(-1)) 28 | weighted_sum = sum(w * h for w, h in zip(normed_weights, tensors)) 29 | 30 | return self.gamma * weighted_sum 31 | -------------------------------------------------------------------------------- /flair/parser/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from . import corpus, data, field, fn, metric 4 | from .embedding import Embedding 5 | from .vocab import Vocab 6 | 7 | __all__ = ['Corpus', 'Embedding', 'Vocab', 8 | 'corpus', 'data', 'field', 'fn', 'metric'] 9 | -------------------------------------------------------------------------------- /flair/parser/utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /flair/parser/utils/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/utils/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/utils/__pycache__/alg.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/utils/__pycache__/alg.cpython-36.pyc -------------------------------------------------------------------------------- /flair/parser/utils/__pycache__/alg.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/utils/__pycache__/alg.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/utils/__pycache__/common.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/utils/__pycache__/common.cpython-36.pyc -------------------------------------------------------------------------------- /flair/parser/utils/__pycache__/common.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/utils/__pycache__/common.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/utils/__pycache__/corpus.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/utils/__pycache__/corpus.cpython-36.pyc -------------------------------------------------------------------------------- /flair/parser/utils/__pycache__/corpus.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/utils/__pycache__/corpus.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/utils/__pycache__/data.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/utils/__pycache__/data.cpython-36.pyc -------------------------------------------------------------------------------- /flair/parser/utils/__pycache__/data.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/utils/__pycache__/data.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/utils/__pycache__/embedding.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/utils/__pycache__/embedding.cpython-36.pyc -------------------------------------------------------------------------------- /flair/parser/utils/__pycache__/embedding.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/utils/__pycache__/embedding.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/utils/__pycache__/field.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/utils/__pycache__/field.cpython-36.pyc -------------------------------------------------------------------------------- /flair/parser/utils/__pycache__/field.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/utils/__pycache__/field.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/utils/__pycache__/fn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/utils/__pycache__/fn.cpython-36.pyc -------------------------------------------------------------------------------- /flair/parser/utils/__pycache__/fn.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/utils/__pycache__/fn.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/utils/__pycache__/metric.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/utils/__pycache__/metric.cpython-36.pyc -------------------------------------------------------------------------------- /flair/parser/utils/__pycache__/metric.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/utils/__pycache__/metric.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/utils/__pycache__/vocab.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/utils/__pycache__/vocab.cpython-36.pyc -------------------------------------------------------------------------------- /flair/parser/utils/__pycache__/vocab.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/parser/utils/__pycache__/vocab.cpython-37.pyc -------------------------------------------------------------------------------- /flair/parser/utils/common.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | pad = '' 4 | unk = '' 5 | bos = '' 6 | eos = '' 7 | -------------------------------------------------------------------------------- /flair/parser/utils/embedding.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import torch 4 | 5 | 6 | class Embedding(object): 7 | 8 | def __init__(self, tokens, vectors, unk=None): 9 | super(Embedding, self).__init__() 10 | 11 | self.tokens = tokens 12 | self.vectors = torch.tensor(vectors) 13 | self.pretrained = {w: v for w, v in zip(tokens, vectors)} 14 | self.unk = unk 15 | 16 | def __len__(self): 17 | return len(self.tokens) 18 | 19 | def __contains__(self, token): 20 | return token in self.pretrained 21 | 22 | @property 23 | def dim(self): 24 | return self.vectors.size(1) 25 | 26 | @property 27 | def unk_index(self): 28 | if self.unk is not None: 29 | return self.tokens.index(self.unk) 30 | else: 31 | raise AttributeError 32 | 33 | @classmethod 34 | def load(cls, path, unk=None): 35 | with open(path, 'r') as f: 36 | lines = [line for line in f] 37 | splits = [line.split() for line in lines] 38 | tokens, vectors = zip(*[(s[0], list(map(float, s[1:]))) 39 | for s in splits]) 40 | 41 | return cls(tokens, vectors, unk=unk) 42 | -------------------------------------------------------------------------------- /flair/parser/utils/metric.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | class Metric(object): 5 | 6 | def __init__(self, eps=1e-5): 7 | super(Metric, self).__init__() 8 | 9 | self.eps = eps 10 | 11 | self.n = 0.0 12 | self.n_ucm = 0.0 13 | self.n_lcm = 0.0 14 | self.total = 0.0 15 | self.correct_arcs = 0.0 16 | self.correct_rels = 0.0 17 | 18 | def __repr__(self): 19 | s = f"UCM: {self.ucm:6.2%} LCM: {self.lcm:6.2%} " 20 | s += f"UAS: {self.uas:6.2%} LAS: {self.las:6.2%}" 21 | return s 22 | 23 | def __call__(self, arc_preds, rel_preds, arc_golds, rel_golds, mask): 24 | lens = mask.sum(1) 25 | arc_mask = arc_preds.eq(arc_golds) & mask 26 | rel_mask = rel_preds.eq(rel_golds) & arc_mask 27 | arc_mask_seq, rel_mask_seq = arc_mask[mask], rel_mask[mask] 28 | 29 | self.n += len(mask) 30 | self.n_ucm += arc_mask.sum(1).eq(lens).sum().item() 31 | self.n_lcm += rel_mask.sum(1).eq(lens).sum().item() 32 | 33 | self.total += len(arc_mask_seq) 34 | self.correct_arcs += arc_mask_seq.sum().item() 35 | self.correct_rels += rel_mask_seq.sum().item() 36 | 37 | def __lt__(self, other): 38 | return self.score < other 39 | 40 | def __le__(self, other): 41 | return self.score <= other 42 | 43 | def __ge__(self, other): 44 | return self.score >= other 45 | 46 | def __gt__(self, other): 47 | return self.score > other 48 | 49 | @property 50 | def score(self): 51 | return self.las 52 | 53 | @property 54 | def ucm(self): 55 | return self.n_ucm / (self.n + self.eps) 56 | 57 | @property 58 | def lcm(self): 59 | return self.n_lcm / (self.n + self.eps) 60 | 61 | @property 62 | def uas(self): 63 | return self.correct_arcs / (self.total + self.eps) 64 | 65 | @property 66 | def las(self): 67 | return self.correct_rels / (self.total + self.eps) 68 | -------------------------------------------------------------------------------- /flair/parser/utils/vocab.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from collections.abc import Iterable 4 | from flair.parser.utils.common import unk 5 | 6 | 7 | class Vocab(object): 8 | 9 | def __init__(self, counter, min_freq=1, specials=[]): 10 | self.itos = specials 11 | self.stoi = {token: i for i, token in enumerate(self.itos)} 12 | 13 | self.extend([token for token, freq in counter.items() 14 | if freq >= min_freq]) 15 | self.unk_index = self.stoi.get(unk, 0) 16 | self.n_init = len(self) 17 | 18 | def __len__(self): 19 | return len(self.itos) 20 | 21 | def __getitem__(self, key): 22 | return self.stoi[key] 23 | 24 | def __contains__(self, token): 25 | return token in self.stoi 26 | 27 | def token2id(self, sequence): 28 | return [self.stoi.get(token, self.unk_index) for token in sequence] 29 | 30 | def id2token(self, ids): 31 | if isinstance(ids, Iterable): 32 | return [self.itos[i] for i in ids] 33 | else: 34 | return self.itos[ids] 35 | 36 | def extend(self, tokens): 37 | self.itos.extend(sorted(set(tokens).difference(self.stoi))) 38 | self.stoi = {token: i for i, token in enumerate(self.itos)} 39 | -------------------------------------------------------------------------------- /flair/trainers/.finetune_trainer.py.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/trainers/.finetune_trainer.py.swp -------------------------------------------------------------------------------- /flair/trainers/__init__.py: -------------------------------------------------------------------------------- 1 | from .trainer import ModelTrainer 2 | from .distillation_trainer import ModelDistiller 3 | from .finetune_trainer import ModelFinetuner 4 | -------------------------------------------------------------------------------- /flair/trainers/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/trainers/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /flair/trainers/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/trainers/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /flair/trainers/__pycache__/distillation_trainer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/trainers/__pycache__/distillation_trainer.cpython-36.pyc -------------------------------------------------------------------------------- /flair/trainers/__pycache__/distillation_trainer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/trainers/__pycache__/distillation_trainer.cpython-37.pyc -------------------------------------------------------------------------------- /flair/trainers/__pycache__/finetune_trainer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/trainers/__pycache__/finetune_trainer.cpython-36.pyc -------------------------------------------------------------------------------- /flair/trainers/__pycache__/finetune_trainer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/trainers/__pycache__/finetune_trainer.cpython-37.pyc -------------------------------------------------------------------------------- /flair/trainers/__pycache__/trainer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/trainers/__pycache__/trainer.cpython-36.pyc -------------------------------------------------------------------------------- /flair/trainers/__pycache__/trainer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/trainers/__pycache__/trainer.cpython-37.pyc -------------------------------------------------------------------------------- /flair/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .exception_hook import ExceptionHook 2 | from typing import Iterator, List, TypeVar, Iterable, Dict 3 | import random 4 | from itertools import zip_longest, islice 5 | 6 | A = TypeVar('A') 7 | def lazy_groups_of(iterator: Iterator[A], group_size: int) -> Iterator[List[A]]: 8 | """ 9 | Takes an iterator and batches the invididual instances into lists of the 10 | specified size. The last list may be smaller if there are instances left over. 11 | """ 12 | return iter(lambda: list(islice(iterator, 0, group_size)), []) 13 | 14 | def ensure_list(iterable: Iterable[A]) -> List[A]: 15 | """ 16 | An Iterable may be a list or a generator. 17 | This ensures we get a list without making an unnecessary copy. 18 | """ 19 | if isinstance(iterable, list): 20 | return iterable 21 | else: 22 | return list(iterable) 23 | 24 | def is_lazy(iterable: Iterable[A]) -> bool: 25 | """ 26 | Checks if the given iterable is lazy, 27 | which here just means it's not a list. 28 | """ 29 | return not isinstance(iterable, list) 30 | 31 | def add_noise_to_dict_values(dictionary: Dict[A, float], noise_param: float) -> Dict[A, float]: 32 | """ 33 | Returns a new dictionary with noise added to every key in ``dictionary``. The noise is 34 | uniformly distributed within ``noise_param`` percent of the value for every value in the 35 | dictionary. 36 | """ 37 | new_dict = {} 38 | for key, value in dictionary.items(): 39 | noise_value = value * noise_param 40 | noise = random.uniform(-noise_value, noise_value) 41 | new_dict[key] = value + noise 42 | return new_dict 43 | -------------------------------------------------------------------------------- /flair/utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /flair/utils/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/utils/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /flair/utils/__pycache__/archival.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/utils/__pycache__/archival.cpython-36.pyc -------------------------------------------------------------------------------- /flair/utils/__pycache__/checks.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/utils/__pycache__/checks.cpython-36.pyc -------------------------------------------------------------------------------- /flair/utils/__pycache__/checks.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/utils/__pycache__/checks.cpython-37.pyc -------------------------------------------------------------------------------- /flair/utils/__pycache__/environment.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/utils/__pycache__/environment.cpython-36.pyc -------------------------------------------------------------------------------- /flair/utils/__pycache__/exception_hook.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/utils/__pycache__/exception_hook.cpython-36.pyc -------------------------------------------------------------------------------- /flair/utils/__pycache__/exception_hook.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/utils/__pycache__/exception_hook.cpython-37.pyc -------------------------------------------------------------------------------- /flair/utils/__pycache__/file.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/utils/__pycache__/file.cpython-36.pyc -------------------------------------------------------------------------------- /flair/utils/__pycache__/from_params.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/utils/__pycache__/from_params.cpython-36.pyc -------------------------------------------------------------------------------- /flair/utils/__pycache__/from_params.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/utils/__pycache__/from_params.cpython-37.pyc -------------------------------------------------------------------------------- /flair/utils/__pycache__/logging.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/utils/__pycache__/logging.cpython-36.pyc -------------------------------------------------------------------------------- /flair/utils/__pycache__/logging.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/utils/__pycache__/logging.cpython-37.pyc -------------------------------------------------------------------------------- /flair/utils/__pycache__/nn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/utils/__pycache__/nn.cpython-36.pyc -------------------------------------------------------------------------------- /flair/utils/__pycache__/params.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/utils/__pycache__/params.cpython-36.pyc -------------------------------------------------------------------------------- /flair/utils/__pycache__/params.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/utils/__pycache__/params.cpython-37.pyc -------------------------------------------------------------------------------- /flair/utils/__pycache__/registrable.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/utils/__pycache__/registrable.cpython-36.pyc -------------------------------------------------------------------------------- /flair/utils/__pycache__/string.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/utils/__pycache__/string.cpython-36.pyc -------------------------------------------------------------------------------- /flair/utils/__pycache__/time.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/utils/__pycache__/time.cpython-36.pyc -------------------------------------------------------------------------------- /flair/utils/__pycache__/tqdm.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/utils/__pycache__/tqdm.cpython-36.pyc -------------------------------------------------------------------------------- /flair/utils/checks.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adopted from AllenNLP: 3 | https://github.com/allenai/allennlp/tree/v0.6.1/allennlp/common 4 | 5 | Functions and exceptions for checking that 6 | AllenNLP and its models are configured correctly. 7 | """ 8 | 9 | from torch import cuda 10 | 11 | from flair.utils import logging 12 | 13 | logger = logging.init_logger() # pylint: disable=invalid-name 14 | 15 | 16 | class ConfigurationError(Exception): 17 | """ 18 | The exception raised by any AllenNLP object when it's misconfigured 19 | (e.g. missing properties, invalid properties, unknown properties). 20 | """ 21 | 22 | def __init__(self, message): 23 | super(ConfigurationError, self).__init__() 24 | self.message = message 25 | 26 | def __str__(self): 27 | return repr(self.message) 28 | 29 | 30 | def log_pytorch_version_info(): 31 | import torch 32 | logger.info("Pytorch version: %s", torch.__version__) 33 | 34 | 35 | def check_dimensions_match(dimension_1: int, 36 | dimension_2: int, 37 | dim_1_name: str, 38 | dim_2_name: str) -> None: 39 | if dimension_1 != dimension_2: 40 | raise ConfigurationError(f"{dim_1_name} must match {dim_2_name}, but got {dimension_1} " 41 | f"and {dimension_2} instead") 42 | 43 | 44 | def check_for_gpu(device_id: int): 45 | if device_id is not None and device_id >= cuda.device_count(): 46 | raise ConfigurationError("Experiment specified a GPU but none is available;" 47 | " if you want to run on CPU use the override" 48 | " 'trainer.cuda_device=-1' in the json config file.") 49 | -------------------------------------------------------------------------------- /flair/utils/exception_hook.py: -------------------------------------------------------------------------------- 1 | class ExceptionHook: 2 | instance = None 3 | def __call__(self, *args, **kwargs): 4 | if self.instance is None: 5 | from IPython.core import ultratb 6 | self.instance = ultratb.FormattedTB(mode="Plain", color_scheme="Linux", call_pdb=1) 7 | return self.instance(*args, **kwargs) 8 | 9 | -------------------------------------------------------------------------------- /flair/utils/extract_tokens_from_amr.py: -------------------------------------------------------------------------------- 1 | from stog.data.dataset_readers import AbstractMeaningRepresentationDatasetReader 2 | import sys 3 | from stog.utils import logging 4 | 5 | logger = logging.init_logger() 6 | def extract_amr_token(file_path): 7 | dataset_reader = AbstractMeaningRepresentationDatasetReader() 8 | for instance in dataset_reader.read(file_path): 9 | amr_tokens = instance.fields["amr_tokens"]["decoder_tokens"] 10 | yield " ".join(amr_tokens) 11 | 12 | 13 | if __name__ == "__main__": 14 | if len(sys.argv) < 2: 15 | print("""Usage: 16 | python {} [amr_file] 17 | 18 | The output will in stdout. 19 | """) 20 | for filename in sys.argv[1:]: 21 | for line in extract_amr_token(filename): 22 | sys.stdout.write(line + "\n") 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /flair/utils/time.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | 4 | def time_to_str(timestamp: int) -> str: 5 | """ 6 | Convert seconds past Epoch to human readable string. 7 | """ 8 | datetimestamp = datetime.datetime.fromtimestamp(timestamp) 9 | return '{:04d}-{:02d}-{:02d}-{:02d}-{:02d}-{:02d}'.format( 10 | datetimestamp.year, datetimestamp.month, datetimestamp.day, 11 | datetimestamp.hour, datetimestamp.minute, datetimestamp.second 12 | ) 13 | 14 | 15 | def str_to_time(time_str: str) -> datetime.datetime: 16 | """ 17 | Convert human readable string to datetime.datetime. 18 | """ 19 | pieces = [int(piece) for piece in time_str.split('-')] 20 | return datetime.datetime(*pieces) 21 | -------------------------------------------------------------------------------- /flair/utils/tqdm.py: -------------------------------------------------------------------------------- 1 | """ 2 | :class:`~allennlp.common.tqdm.Tqdm` wraps tqdm so we can add configurable 3 | global defaults for certain tqdm parameters. 4 | 5 | Adopted from AllenNLP: 6 | https://github.com/allenai/allennlp/blob/v0.6.1/allennlp/common/tqdm.py 7 | """ 8 | 9 | from tqdm import tqdm as _tqdm 10 | # This is neccesary to stop tqdm from hanging 11 | # when exceptions are raised inside iterators. 12 | # It should have been fixed in 4.2.1, but it still 13 | # occurs. 14 | # TODO(Mark): Remove this once tqdm cleans up after itself properly. 15 | # https://github.com/tqdm/tqdm/issues/469 16 | _tqdm.monitor_interval = 0 17 | 18 | class Tqdm: 19 | # These defaults are the same as the argument defaults in tqdm. 20 | default_mininterval: float = 0.1 21 | 22 | @staticmethod 23 | def set_default_mininterval(value: float) -> None: 24 | Tqdm.default_mininterval = value 25 | 26 | @staticmethod 27 | def set_slower_interval(use_slower_interval: bool) -> None: 28 | """ 29 | If ``use_slower_interval`` is ``True``, we will dramatically slow down ``tqdm's`` default 30 | output rate. ``tqdm's`` default output rate is great for interactively watching progress, 31 | but it is not great for log files. You might want to set this if you are primarily going 32 | to be looking at output through log files, not the terminal. 33 | """ 34 | if use_slower_interval: 35 | Tqdm.default_mininterval = 10.0 36 | else: 37 | Tqdm.default_mininterval = 0.1 38 | 39 | @staticmethod 40 | def tqdm(*args, **kwargs): 41 | new_kwargs = { 42 | 'mininterval': Tqdm.default_mininterval, 43 | **kwargs 44 | } 45 | 46 | return _tqdm(*args, **new_kwargs) 47 | -------------------------------------------------------------------------------- /flair/visual/__init__.py: -------------------------------------------------------------------------------- 1 | from .manifold import Visualizer 2 | from .activations import Highlighter 3 | -------------------------------------------------------------------------------- /flair/visual/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/visual/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /flair/visual/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/visual/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /flair/visual/__pycache__/activations.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/visual/__pycache__/activations.cpython-36.pyc -------------------------------------------------------------------------------- /flair/visual/__pycache__/activations.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/visual/__pycache__/activations.cpython-37.pyc -------------------------------------------------------------------------------- /flair/visual/__pycache__/manifold.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/visual/__pycache__/manifold.cpython-36.pyc -------------------------------------------------------------------------------- /flair/visual/__pycache__/manifold.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/visual/__pycache__/manifold.cpython-37.pyc -------------------------------------------------------------------------------- /flair/visual/__pycache__/training_curves.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/visual/__pycache__/training_curves.cpython-36.pyc -------------------------------------------------------------------------------- /flair/visual/__pycache__/training_curves.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/flair/visual/__pycache__/training_curves.cpython-37.pyc -------------------------------------------------------------------------------- /flair/visual/html_templates.py: -------------------------------------------------------------------------------- 1 | TAGGED_ENTITY = ''' 2 | 3 | {entity} 4 | {label} 5 | 6 | ''' 7 | 8 | HTML_PAGE = """ 9 | 10 | 11 | 12 | Flair 13 | 14 | 15 | {text} 16 | 17 | """ 18 | -------------------------------------------------------------------------------- /resources/docs/embeddings/BYTE_PAIR_EMBEDDINGS.md: -------------------------------------------------------------------------------- 1 | # Byte Pair Embeddings 2 | 3 | `BytePairEmbeddings` are word embeddings that are precomputed on the subword-level. This means that they are able to 4 | embed any word by splitting words into subwords and looking up their embeddings. `BytePairEmbeddings` were proposed 5 | and computed by [Heinzerling and Strube (2018)](https://www.aclweb.org/anthology/L18-1473) who found that they offer nearly the same accuracy as word embeddings, but at a fraction 6 | of the model size. So they are a great choice if you want to train small models. 7 | 8 | You initialize with a language code (275 languages supported), a number of 'syllables' (one of ) and 9 | a number of dimensions (one of 50, 100, 200 or 300). The following initializes and uses byte pair embeddings 10 | for English: 11 | 12 | ```python 13 | from flair.embeddings import BytePairEmbeddings 14 | 15 | # init embedding 16 | embedding = BytePairEmbeddings('en') 17 | 18 | # create a sentence 19 | sentence = Sentence('The grass is green .') 20 | 21 | # embed words in sentence 22 | embedding.embed(sentence) 23 | ``` 24 | 25 | More information can be found 26 | on the [byte pair embeddings](https://nlp.h-its.org/bpemb/) web page. 27 | 28 | `BytePairEmbeddings` also have a multilingual model capable of embedding any word in any language. 29 | You can instantiate it with: 30 | 31 | ```python 32 | # init embedding 33 | embedding = BytePairEmbeddings('multi') 34 | ``` 35 | -------------------------------------------------------------------------------- /resources/docs/embeddings/CHARACTER_EMBEDDINGS.md: -------------------------------------------------------------------------------- 1 | # Character Embeddings 2 | 3 | `CharacterEmbeddings` allow you to add character-level word embeddings during model training. Note that these embeddings 4 | are randomly initialized when you initialize the class, so they are not meaningful unless you train them on a specific 5 | downstream task. 6 | 7 | For instance, the standard sequence labeling architecture used by [Lample et al. (2016)](https://www.aclweb.org/anthology/N16-1030) is a combination of classic word embeddings with task-trained character features. Normally this would require you to implement a [hierarchical embedding architecture](http://neuroner.com/NeuroNERengine_with_caption_no_figure.png) in which character-level embeddings for each word are computed using an RNN and then concatenated with word embeddings. 8 | 9 | In Flair, we simplify this by treating `CharacterEmbeddings` just like any other embedding class. To reproduce the 10 | Lample architecture, you need only combine them with standard `WordEmbeddings` in an embedding stack: 11 | 12 | 13 | ```python 14 | # init embedding stack 15 | embedding = StackedEmbeddings( 16 | [ 17 | # standard word embeddings 18 | WordEmbeddings('glove'), 19 | 20 | # character-level features 21 | CharacterEmbeddings(), 22 | ] 23 | ) 24 | ``` 25 | 26 | If you pass this stacked embedding to a train method, the character-level features will now automatically be trained 27 | for your downstream task. 28 | -------------------------------------------------------------------------------- /resources/docs/embeddings/ELMO_EMBEDDINGS.md: -------------------------------------------------------------------------------- 1 | # ELMo Embeddings 2 | 3 | [ELMo embeddings](http://www.aclweb.org/anthology/N18-1202) were presented by Peters et al. in 2018. They are using 4 | a bidirectional recurrent neural network to predict the next word in a text. 5 | We are using the implementation of [AllenNLP](https://allennlp.org/elmo). As this implementation comes with a lot of 6 | sub-dependencies, which we don't want to include in Flair, you need to first install the library via 7 | `pip install allennlp` before you can use it in Flair. 8 | Using the embeddings is as simple as using any other embedding type: 9 | 10 | ```python 11 | from flair.embeddings import ELMoEmbeddings 12 | 13 | # init embedding 14 | embedding = ELMoEmbeddings() 15 | 16 | # create a sentence 17 | sentence = Sentence('The grass is green .') 18 | 19 | # embed words in sentence 20 | embedding.embed(sentence) 21 | ``` 22 | 23 | AllenNLP provides the following pre-trained models. To use any of the following models inside Flair 24 | simple specify the embedding id when initializing the `ELMoEmbeddings`. 25 | 26 | | ID | Language | Embedding | 27 | | ------------- | ------------- | ------------- | 28 | | 'small' | English | 1024-hidden, 1 layer, 14.6M parameters | 29 | | 'medium' | English | 2048-hidden, 1 layer, 28.0M parameters | 30 | | 'original' | English | 4096-hidden, 2 layers, 93.6M parameters | 31 | | 'large' | English | | 32 | | 'pt' | Portuguese | | 33 | | 'pubmed' | English biomedical data | [more information](https://allennlp.org/elmo) | 34 | -------------------------------------------------------------------------------- /resources/docs/embeddings/FASTTEXT_EMBEDDINGS.md: -------------------------------------------------------------------------------- 1 | # FastText Embeddings 2 | 3 | FastText Embeddings can give you vectors for out of vocabulary(oov) words by using the sub-word information. 4 | To use this functionality with Flair, use `FastTextEmbeddings` class as shown: 5 | 6 | ```python 7 | from flair.embeddings import FastTextEmbeddings 8 | 9 | # init embedding 10 | embedding = FastTextEmbeddings('/path/to/local/custom_fasttext_embeddings.bin') 11 | 12 | # create a sentence 13 | sentence = Sentence('The grass is green .') 14 | 15 | # embed words in sentence 16 | embedding.embed(sentence) 17 | ``` 18 | 19 | You can initialize the class by passing the remote downloadable URL as well. 20 | 21 | ```python 22 | embedding = FastTextEmbeddings('/path/to/remote/downloadable/custom_fasttext_embeddings.bin', use_local=False) 23 | ``` 24 | 25 | Note that FastText embeddings typically have huge models resulting in equally huge models for downstream tasks. 26 | 27 | Alternatively, you can use FastText embeddings without the oov functionality by using normal `WordEmbeddings` which 28 | are smaller and get 29 | the oov functionality from the `BytePairEmbeddings` which are tiny. So, instead of using English `FastTextEmbeddings` 30 | with oov handling, you could use this stack: 31 | 32 | ```python 33 | from flair.embeddings import WordEmbeddings, BytePairEmbeddings, StackedEmbeddings 34 | 35 | # init embedding 36 | embedding = StackedEmbeddings( 37 | [ 38 | # standard FastText word embeddings for English 39 | WordEmbeddings('en'), 40 | # Byte pair embeddings for English 41 | BytePairEmbeddings('en'), 42 | ] 43 | ) 44 | 45 | # create a sentence 46 | sentence = Sentence('The grass is green .') 47 | 48 | # embed words in sentence 49 | embedding.embed(sentence) 50 | ``` 51 | 52 | -------------------------------------------------------------------------------- /resources/taggers/ast_tags_new.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/resources/taggers/ast_tags_new.pkl -------------------------------------------------------------------------------- /resources/taggers/dependency_projective_tags.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/resources/taggers/dependency_projective_tags.pkl -------------------------------------------------------------------------------- /resources/taggers/enhancedud_tags.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/resources/taggers/enhancedud_tags.pkl -------------------------------------------------------------------------------- /resources/taggers/ner_tags.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/resources/taggers/ner_tags.pkl -------------------------------------------------------------------------------- /resources/taggers/np_tags.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/resources/taggers/np_tags.pkl -------------------------------------------------------------------------------- /resources/taggers/pos_tags.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/resources/taggers/pos_tags.pkl -------------------------------------------------------------------------------- /resources/taggers/ptb_tags.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/resources/taggers/ptb_tags.pkl -------------------------------------------------------------------------------- /resources/taggers/ud_dependency_tags.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alibaba-NLP/MultilangStructureKD/b1f6ad068d641b1fd842272c851d5df0cec5a8dc/resources/taggers/ud_dependency_tags.pkl -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .exception_hook import ExceptionHook 2 | from typing import Iterator, List, TypeVar, Iterable, Dict 3 | import random 4 | from itertools import zip_longest, islice 5 | 6 | A = TypeVar('A') 7 | def lazy_groups_of(iterator: Iterator[A], group_size: int) -> Iterator[List[A]]: 8 | """ 9 | Takes an iterator and batches the invididual instances into lists of the 10 | specified size. The last list may be smaller if there are instances left over. 11 | """ 12 | return iter(lambda: list(islice(iterator, 0, group_size)), []) 13 | 14 | def ensure_list(iterable: Iterable[A]) -> List[A]: 15 | """ 16 | An Iterable may be a list or a generator. 17 | This ensures we get a list without making an unnecessary copy. 18 | """ 19 | if isinstance(iterable, list): 20 | return iterable 21 | else: 22 | return list(iterable) 23 | 24 | def is_lazy(iterable: Iterable[A]) -> bool: 25 | """ 26 | Checks if the given iterable is lazy, 27 | which here just means it's not a list. 28 | """ 29 | return not isinstance(iterable, list) 30 | 31 | def add_noise_to_dict_values(dictionary: Dict[A, float], noise_param: float) -> Dict[A, float]: 32 | """ 33 | Returns a new dictionary with noise added to every key in ``dictionary``. The noise is 34 | uniformly distributed within ``noise_param`` percent of the value for every value in the 35 | dictionary. 36 | """ 37 | new_dict = {} 38 | for key, value in dictionary.items(): 39 | noise_value = value * noise_param 40 | noise = random.uniform(-noise_value, noise_value) 41 | new_dict[key] = value + noise 42 | return new_dict 43 | -------------------------------------------------------------------------------- /utils/checks.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adopted from AllenNLP: 3 | https://github.com/allenai/allennlp/tree/v0.6.1/allennlp/common 4 | 5 | Functions and exceptions for checking that 6 | AllenNLP and its models are configured correctly. 7 | """ 8 | 9 | from torch import cuda 10 | 11 | from utils import logging 12 | 13 | logger = logging.init_logger() # pylint: disable=invalid-name 14 | 15 | 16 | class ConfigurationError(Exception): 17 | """ 18 | The exception raised by any AllenNLP object when it's misconfigured 19 | (e.g. missing properties, invalid properties, unknown properties). 20 | """ 21 | 22 | def __init__(self, message): 23 | super(ConfigurationError, self).__init__() 24 | self.message = message 25 | 26 | def __str__(self): 27 | return repr(self.message) 28 | 29 | 30 | def log_pytorch_version_info(): 31 | import torch 32 | logger.info("Pytorch version: %s", torch.__version__) 33 | 34 | 35 | def check_dimensions_match(dimension_1: int, 36 | dimension_2: int, 37 | dim_1_name: str, 38 | dim_2_name: str) -> None: 39 | if dimension_1 != dimension_2: 40 | raise ConfigurationError(f"{dim_1_name} must match {dim_2_name}, but got {dimension_1} " 41 | f"and {dimension_2} instead") 42 | 43 | 44 | def check_for_gpu(device_id: int): 45 | if device_id is not None and device_id >= cuda.device_count(): 46 | raise ConfigurationError("Experiment specified a GPU but none is available;" 47 | " if you want to run on CPU use the override" 48 | " 'trainer.cuda_device=-1' in the json config file.") 49 | -------------------------------------------------------------------------------- /utils/exception_hook.py: -------------------------------------------------------------------------------- 1 | class ExceptionHook: 2 | instance = None 3 | def __call__(self, *args, **kwargs): 4 | if self.instance is None: 5 | from IPython.core import ultratb 6 | self.instance = ultratb.FormattedTB(mode="Plain", color_scheme="Linux", call_pdb=1) 7 | return self.instance(*args, **kwargs) 8 | 9 | -------------------------------------------------------------------------------- /utils/extract_tokens_from_amr.py: -------------------------------------------------------------------------------- 1 | from stog.data.dataset_readers import AbstractMeaningRepresentationDatasetReader 2 | import sys 3 | from stog.utils import logging 4 | 5 | logger = logging.init_logger() 6 | def extract_amr_token(file_path): 7 | dataset_reader = AbstractMeaningRepresentationDatasetReader() 8 | for instance in dataset_reader.read(file_path): 9 | amr_tokens = instance.fields["amr_tokens"]["decoder_tokens"] 10 | yield " ".join(amr_tokens) 11 | 12 | 13 | if __name__ == "__main__": 14 | if len(sys.argv) < 2: 15 | print("""Usage: 16 | python {} [amr_file] 17 | 18 | The output will in stdout. 19 | """) 20 | for filename in sys.argv[1:]: 21 | for line in extract_amr_token(filename): 22 | sys.stdout.write(line + "\n") 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /utils/time.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | 4 | def time_to_str(timestamp: int) -> str: 5 | """ 6 | Convert seconds past Epoch to human readable string. 7 | """ 8 | datetimestamp = datetime.datetime.fromtimestamp(timestamp) 9 | return '{:04d}-{:02d}-{:02d}-{:02d}-{:02d}-{:02d}'.format( 10 | datetimestamp.year, datetimestamp.month, datetimestamp.day, 11 | datetimestamp.hour, datetimestamp.minute, datetimestamp.second 12 | ) 13 | 14 | 15 | def str_to_time(time_str: str) -> datetime.datetime: 16 | """ 17 | Convert human readable string to datetime.datetime. 18 | """ 19 | pieces = [int(piece) for piece in time_str.split('-')] 20 | return datetime.datetime(*pieces) 21 | -------------------------------------------------------------------------------- /utils/tqdm.py: -------------------------------------------------------------------------------- 1 | """ 2 | :class:`~allennlp.common.tqdm.Tqdm` wraps tqdm so we can add configurable 3 | global defaults for certain tqdm parameters. 4 | 5 | Adopted from AllenNLP: 6 | https://github.com/allenai/allennlp/blob/v0.6.1/allennlp/common/tqdm.py 7 | """ 8 | 9 | from tqdm import tqdm as _tqdm 10 | # This is neccesary to stop tqdm from hanging 11 | # when exceptions are raised inside iterators. 12 | # It should have been fixed in 4.2.1, but it still 13 | # occurs. 14 | # TODO(Mark): Remove this once tqdm cleans up after itself properly. 15 | # https://github.com/tqdm/tqdm/issues/469 16 | _tqdm.monitor_interval = 0 17 | 18 | class Tqdm: 19 | # These defaults are the same as the argument defaults in tqdm. 20 | default_mininterval: float = 0.1 21 | 22 | @staticmethod 23 | def set_default_mininterval(value: float) -> None: 24 | Tqdm.default_mininterval = value 25 | 26 | @staticmethod 27 | def set_slower_interval(use_slower_interval: bool) -> None: 28 | """ 29 | If ``use_slower_interval`` is ``True``, we will dramatically slow down ``tqdm's`` default 30 | output rate. ``tqdm's`` default output rate is great for interactively watching progress, 31 | but it is not great for log files. You might want to set this if you are primarily going 32 | to be looking at output through log files, not the terminal. 33 | """ 34 | if use_slower_interval: 35 | Tqdm.default_mininterval = 10.0 36 | else: 37 | Tqdm.default_mininterval = 0.1 38 | 39 | @staticmethod 40 | def tqdm(*args, **kwargs): 41 | new_kwargs = { 42 | 'mininterval': Tqdm.default_mininterval, 43 | **kwargs 44 | } 45 | 46 | return _tqdm(*args, **new_kwargs) 47 | --------------------------------------------------------------------------------