├── .circleci ├── config.yml └── deploy.sh ├── .coveragerc ├── .github ├── ISSUE_TEMPLATE │ ├── ---new-benchmark.md │ ├── --new-model-addition.md │ ├── bug-report.md │ ├── feature-request.md │ ├── migration.md │ └── question-help.md ├── PULL_REQUEST_TEMPLATE.md ├── conda │ ├── build.sh │ └── meta.yaml ├── stale.yml └── workflows │ ├── github-torch-hub.yml │ ├── model-templates.yml │ ├── release-conda.yml │ ├── self-push.yml │ └── self-scheduled.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── docker ├── transformers-cpu │ └── Dockerfile ├── transformers-gpu │ └── Dockerfile ├── transformers-pytorch-cpu │ └── Dockerfile ├── transformers-pytorch-gpu │ └── Dockerfile ├── transformers-pytorch-tpu │ ├── Dockerfile │ ├── bert-base-cased.jsonnet │ ├── dataset.yaml │ └── docker-entrypoint.sh ├── transformers-tensorflow-cpu │ └── Dockerfile └── transformers-tensorflow-gpu │ └── Dockerfile ├── docs ├── Makefile ├── README.md └── source │ ├── _static │ ├── css │ │ ├── Calibre-Light.ttf │ │ ├── Calibre-Medium.otf │ │ ├── Calibre-Regular.otf │ │ ├── Calibre-Thin.otf │ │ ├── code-snippets.css │ │ └── huggingface.css │ └── js │ │ ├── custom.js │ │ └── huggingface_logo.svg │ ├── benchmarks.rst │ ├── bertology.rst │ ├── conf.py │ ├── contributing.md │ ├── converting_tensorflow_models.rst │ ├── custom_datasets.rst │ ├── examples.md │ ├── favicon.ico │ ├── glossary.rst │ ├── imgs │ ├── local_attention_mask.png │ ├── ppl_chunked.gif │ ├── ppl_full.gif │ ├── ppl_sliding.gif │ ├── transformers_logo_name.png │ ├── warmup_constant_schedule.png │ ├── warmup_cosine_hard_restarts_schedule.png │ ├── warmup_cosine_schedule.png │ ├── warmup_cosine_warm_restarts_schedule.png │ └── warmup_linear_schedule.png │ ├── index.rst │ ├── installation.md │ ├── internal │ ├── generation_utils.rst │ ├── modeling_utils.rst │ ├── pipelines_utils.rst │ ├── tokenization_utils.rst │ └── trainer_utils.rst │ ├── main_classes │ ├── callback.rst │ ├── configuration.rst │ ├── logging.rst │ ├── model.rst │ ├── optimizer_schedules.rst │ ├── output.rst │ ├── pipelines.rst │ ├── processors.rst │ ├── tokenizer.rst │ └── trainer.rst │ ├── migration.md │ ├── model_doc │ ├── albert.rst │ ├── auto.rst │ ├── bart.rst │ ├── barthez.rst │ ├── bert.rst │ ├── bertgeneration.rst │ ├── blenderbot.rst │ ├── camembert.rst │ ├── ctrl.rst │ ├── deberta.rst │ ├── dialogpt.rst │ ├── distilbert.rst │ ├── dpr.rst │ ├── electra.rst │ ├── encoderdecoder.rst │ ├── flaubert.rst │ ├── fsmt.rst │ ├── funnel.rst │ ├── gpt.rst │ ├── gpt2.rst │ ├── layoutlm.rst │ ├── longformer.rst │ ├── lxmert.rst │ ├── marian.rst │ ├── mbart.rst │ ├── mobilebert.rst │ ├── mpnet.rst │ ├── mt5.rst │ ├── pegasus.rst │ ├── prophetnet.rst │ ├── rag.rst │ ├── reformer.rst │ ├── retribert.rst │ ├── roberta.rst │ ├── squeezebert.rst │ ├── t5.rst │ ├── transformerxl.rst │ ├── xlm.rst │ ├── xlmprophetnet.rst │ ├── xlmroberta.rst │ └── xlnet.rst │ ├── model_sharing.rst │ ├── model_summary.rst │ ├── multilingual.rst │ ├── notebooks.md │ ├── perplexity.rst │ ├── philosophy.rst │ ├── preprocessing.rst │ ├── pretrained_models.rst │ ├── quicktour.rst │ ├── serialization.rst │ ├── task_summary.rst │ ├── testing.rst │ ├── tokenizer_summary.rst │ └── training.rst ├── examples ├── README.md ├── adversarial │ ├── README.md │ ├── run_hans.py │ └── utils_hans.py ├── benchmarking │ ├── README.md │ ├── plot_csv_file.py │ ├── run_benchmark.py │ └── run_benchmark_tf.py ├── bert-loses-patience │ ├── README.md │ ├── pabee │ │ ├── __init__.py │ │ ├── modeling_pabee_albert.py │ │ └── modeling_pabee_bert.py │ ├── run_glue_with_pabee.py │ └── test_run_glue_with_pabee.py ├── bertology │ └── run_bertology.py ├── conftest.py ├── contrib │ ├── README.md │ ├── legacy │ │ └── run_language_modeling.py │ ├── mm-imdb │ │ ├── README.md │ │ ├── run_mmimdb.py │ │ └── utils_mmimdb.py │ ├── run_camembert.py │ ├── run_chinese_ref.py │ ├── run_openai_gpt.py │ ├── run_swag.py │ └── run_transfo_xl.py ├── deebert │ ├── README.md │ ├── entropy_eval.sh │ ├── eval_deebert.sh │ ├── run_glue_deebert.py │ ├── src │ │ ├── __init__.py │ │ ├── modeling_highway_bert.py │ │ └── modeling_highway_roberta.py │ ├── test_glue_deebert.py │ └── train_deebert.sh ├── distillation │ ├── README.md │ ├── distiller.py │ ├── grouped_batch_sampler.py │ ├── lm_seqs_dataset.py │ ├── requirements.txt │ ├── run_squad_w_distillation.py │ ├── scripts │ │ ├── binarized_data.py │ │ ├── extract.py │ │ ├── extract_distilbert.py │ │ └── token_counts.py │ ├── train.py │ ├── training_configs │ │ ├── distilbert-base-cased.json │ │ ├── distilbert-base-multilingual-cased.json │ │ ├── distilbert-base-uncased.json │ │ ├── distilgpt2.json │ │ └── distilroberta-base.json │ └── utils.py ├── language-modeling │ ├── README.md │ ├── run_clm.py │ ├── run_mlm.py │ ├── run_mlm_flax.py │ ├── run_mlm_wwm.py │ └── run_plm.py ├── lightning_base.py ├── longform-qa │ ├── README.md │ ├── eli5_app.py │ └── eli5_utils.py ├── lxmert │ ├── README.md │ ├── demo.ipynb │ ├── extracting_data.py │ ├── modeling_frcnn.py │ ├── processing_image.py │ ├── requirements.txt │ ├── utils.py │ └── visualizing_image.py ├── movement-pruning │ ├── README.md │ ├── Saving_PruneBERT.ipynb │ ├── bertarize.py │ ├── counts_parameters.py │ ├── emmental │ │ ├── __init__.py │ │ ├── configuration_bert_masked.py │ │ ├── modeling_bert_masked.py │ │ └── modules │ │ │ ├── __init__.py │ │ │ ├── binarizer.py │ │ │ └── masked_nn.py │ ├── masked_run_glue.py │ ├── masked_run_squad.py │ └── requirements.txt ├── multiple-choice │ ├── README.md │ ├── run_multiple_choice.py │ ├── run_tf_multiple_choice.py │ └── utils_multiple_choice.py ├── question-answering │ ├── README.md │ ├── run_qa.py │ ├── run_qa_beam_search.py │ ├── run_squad.py │ ├── run_squad_trainer.py │ ├── run_tf_squad.py │ ├── squad_v2_local │ │ ├── evaluate.py │ │ └── squad_v2_local.py │ ├── trainer_qa.py │ └── utils_qa.py ├── rag │ ├── README.md │ ├── __init__.py │ ├── callbacks_rag.py │ ├── consolidate_rag_checkpoint.py │ ├── distributed_retriever.py │ ├── eval_rag.py │ ├── finetune_rag.py │ ├── finetune_rag.sh │ ├── parse_dpr_relevance_data.py │ ├── requirements.txt │ ├── test_data │ │ └── my_knowledge_dataset.csv │ ├── test_distributed_retriever.py │ ├── test_finetune_rag.py │ ├── use_own_knowledge_dataset.py │ └── utils_rag.py ├── requirements.txt ├── seq2seq │ ├── README.md │ ├── __init__.py │ ├── bertabs │ │ ├── README.md │ │ ├── __init__.py │ │ ├── configuration_bertabs.py │ │ ├── convert_bertabs_original_pytorch_checkpoint.py │ │ ├── modeling_bertabs.py │ │ ├── requirements.txt │ │ ├── run_summarization.py │ │ ├── test_utils_summarization.py │ │ └── utils_summarization.py │ ├── builtin_trainer │ │ ├── finetune.sh │ │ ├── finetune_tpu.sh │ │ ├── train_distil_marian_enro.sh │ │ ├── train_distil_marian_enro_tpu.sh │ │ ├── train_distilbart_cnn.sh │ │ └── train_mbart_cc25_enro.sh │ ├── callbacks.py │ ├── convert_model_to_fp16.py │ ├── convert_pl_checkpoint_to_hf.py │ ├── distil_marian_enro_teacher.sh │ ├── distil_marian_no_teacher.sh │ ├── distillation.py │ ├── download_wmt.py │ ├── dynamic_bs_example.sh │ ├── finetune.py │ ├── finetune.sh │ ├── finetune_bart_tiny.sh │ ├── finetune_pegasus_xsum.sh │ ├── finetune_t5.sh │ ├── finetune_trainer.py │ ├── make_student.py │ ├── minify_dataset.py │ ├── pack_dataset.py │ ├── precomputed_pseudo_labels.md │ ├── romanian_postprocessing.md │ ├── rouge_cli.py │ ├── run_distributed_eval.py │ ├── run_eval.py │ ├── run_eval_search.py │ ├── save_len_file.py │ ├── save_randomly_initialized_model.py │ ├── sentence_splitter.py │ ├── seq2seq_trainer.py │ ├── seq2seq_training_args.py │ ├── test_bash_script.py │ ├── test_calculate_rouge.py │ ├── test_data │ │ ├── fsmt │ │ │ ├── build-eval-data.py │ │ │ └── fsmt_val_data.json │ │ └── wmt_en_ro │ │ │ ├── test.source │ │ │ ├── test.target │ │ │ ├── train.len │ │ │ ├── train.source │ │ │ ├── train.target │ │ │ ├── val.len │ │ │ ├── val.source │ │ │ └── val.target │ ├── test_datasets.py │ ├── test_finetune_trainer.py │ ├── test_fsmt_bleu_score.py │ ├── test_make_student.py │ ├── test_seq2seq_examples.py │ ├── test_seq2seq_examples_multi_gpu.py │ ├── test_tatoeba_conversion.py │ ├── train_distilbart_cnn.sh │ ├── train_distilbart_xsum.sh │ ├── train_mbart_cc25_enro.sh │ ├── utils.py │ └── xla_spawn.py ├── test_examples.py ├── test_xla_examples.py ├── text-classification │ ├── README.md │ ├── run_glue.py │ ├── run_pl.sh │ ├── run_pl_glue.py │ ├── run_tf_glue.py │ ├── run_tf_text_classification.py │ └── run_xnli.py ├── text-generation │ ├── README.md │ ├── pplm │ │ ├── README.md │ │ ├── imgs │ │ │ ├── headfigure.png │ │ │ └── wooly.png │ │ ├── pplm_classification_head.py │ │ ├── run_pplm.py │ │ └── run_pplm_discrim_train.py │ └── run_generation.py ├── token-classification │ ├── README.md │ ├── run.sh │ ├── run_chunk.sh │ ├── run_ner.py │ ├── run_ner_old.py │ ├── run_old.sh │ ├── run_pl.sh │ ├── run_pl_ner.py │ ├── run_pos.sh │ ├── run_pos_pl.sh │ ├── run_tf_ner.py │ ├── scripts │ │ └── preprocess.py │ ├── tasks.py │ ├── test_ner_examples.py │ └── utils_ner.py └── xla_spawn.py ├── hubconf.py ├── model_cards ├── Cinnamon │ └── electra-small-japanese-discriminator │ │ └── README.md ├── DJSammy │ └── bert-base-danish-uncased_BotXO,ai │ │ └── README.md ├── DeepPavlov │ ├── bert-base-bg-cs-pl-ru-cased │ │ └── README.md │ ├── bert-base-cased-conversational │ │ └── README.md │ ├── bert-base-multilingual-cased-sentence │ │ └── README.md │ ├── rubert-base-cased-conversational │ │ └── README.md │ ├── rubert-base-cased-sentence │ │ └── README.md │ └── rubert-base-cased │ │ └── README.md ├── Geotrend │ ├── bert-base-15lang-cased │ │ └── README.md │ ├── bert-base-ar-cased │ │ └── README.md │ ├── bert-base-bg-cased │ │ └── README.md │ ├── bert-base-de-cased │ │ └── README.md │ ├── bert-base-el-cased │ │ └── README.md │ ├── bert-base-en-ar-cased │ │ └── README.md │ ├── bert-base-en-bg-cased │ │ └── README.md │ ├── bert-base-en-cased │ │ └── README.md │ ├── bert-base-en-de-cased │ │ └── README.md │ ├── bert-base-en-el-cased │ │ └── README.md │ ├── bert-base-en-es-cased │ │ └── README.md │ ├── bert-base-en-fr-cased │ │ └── README.md │ ├── bert-base-en-hi-cased │ │ └── README.md │ ├── bert-base-en-ru-cased │ │ └── README.md │ ├── bert-base-en-sw-cased │ │ └── README.md │ ├── bert-base-en-th-cased │ │ └── README.md │ ├── bert-base-en-tr-cased │ │ └── README.md │ ├── bert-base-en-ur-cased │ │ └── README.md │ ├── bert-base-en-vi-cased │ │ └── README.md │ ├── bert-base-en-zh-cased │ │ └── README.md │ ├── bert-base-es-cased │ │ └── README.md │ ├── bert-base-fr-cased │ │ └── README.md │ ├── bert-base-hi-cased │ │ └── README.md │ ├── bert-base-ru-cased │ │ └── README.md │ ├── bert-base-sw-cased │ │ └── README.md │ ├── bert-base-th-cased │ │ └── README.md │ ├── bert-base-tr-cased │ │ └── README.md │ ├── bert-base-ur-cased │ │ └── README.md │ ├── bert-base-vi-cased │ │ └── README.md │ └── bert-base-zh-cased │ │ └── README.md ├── Hate-speech-CNERG │ ├── dehatebert-mono-arabic │ │ └── README.md │ ├── dehatebert-mono-english │ │ └── README.md │ ├── dehatebert-mono-french │ │ └── README.md │ ├── dehatebert-mono-german │ │ └── README.md │ ├── dehatebert-mono-indonesian │ │ └── README.md │ ├── dehatebert-mono-italian │ │ └── README.md │ ├── dehatebert-mono-polish │ │ └── README.md │ ├── dehatebert-mono-portugese │ │ └── README.md │ └── dehatebert-mono-spanish │ │ └── README.md ├── HooshvareLab │ ├── bert-base-parsbert-armanner-uncased │ │ └── README.md │ ├── bert-base-parsbert-ner-uncased │ │ └── README.md │ ├── bert-base-parsbert-peymaner-uncased │ │ └── README.md │ ├── bert-base-parsbert-uncased │ │ └── README.md │ └── bert-fa-base-uncased │ │ └── README.md ├── KB │ ├── albert-base-swedish-cased-alpha │ │ └── README.md │ ├── bert-base-swedish-cased-ner │ │ └── README.md │ └── bert-base-swedish-cased │ │ └── README.md ├── LorenzoDeMattei │ └── GePpeTto │ │ └── README.md ├── Michau │ └── t5-base-en-generate-headline │ │ └── README.md ├── MoseliMotsoehli │ ├── TswanaBert │ │ └── README.md │ └── zuBERTa │ │ └── README.md ├── Musixmatch │ ├── umberto-commoncrawl-cased-v1 │ │ └── README.md │ └── umberto-wikipedia-uncased-v1 │ │ └── README.md ├── NLP4H │ └── ms_bert │ │ └── README.md ├── Naveen-k │ └── KanBERTo │ │ └── README.md ├── NeuML │ ├── bert-small-cord19-squad2 │ │ └── README.md │ ├── bert-small-cord19 │ │ └── README.md │ └── bert-small-cord19qa │ │ └── README.md ├── NlpHUST │ └── vibert4news-base-cased │ │ └── README.md ├── Norod78 │ └── hewiki-articles-distilGPT2py-il │ │ └── README.md ├── Ogayo │ └── Hel-ach-en │ │ └── README.md ├── Primer │ └── bart-squad2 │ │ └── README.md ├── Rostlab │ ├── prot_bert │ │ └── README.md │ ├── prot_bert_bfd │ │ └── README.md │ └── prot_t5_xl_bfd │ │ └── README.md ├── SZTAKI-HLT │ └── hubert-base-cc │ │ └── README.md ├── SparkBeyond │ └── roberta-large-sts-b │ │ └── README.md ├── T-Systems-onsite │ ├── bert-german-dbmdz-uncased-sentence-stsb │ │ └── README.md │ ├── cross-en-de-roberta-sentence-transformer │ │ └── README.md │ └── german-roberta-sentence-transformer-v2 │ │ └── README.md ├── Tereveni-AI │ └── gpt2-124M-uk-fiction │ │ └── README.md ├── TurkuNLP │ ├── bert-base-finnish-cased-v1 │ │ └── README.md │ └── bert-base-finnish-uncased-v1 │ │ └── README.md ├── TypicaAI │ └── magbert-ner │ │ └── README.md ├── Vamsi │ └── T5_Paraphrase_Paws │ │ └── README.md ├── VictorSanh │ └── roberta-base-finetuned-yelp-polarity │ │ └── README.md ├── ViktorAlm │ └── electra-base-norwegian-uncased-discriminator │ │ └── README.md ├── a-ware │ ├── bart-squadv2 │ │ └── README.md │ ├── roberta-large-squad-classification │ │ └── README.md │ └── xlmroberta-squadv2 │ │ └── README.md ├── abhilash1910 │ ├── financial_roberta │ │ └── README.md │ └── french-roberta │ │ └── README.md ├── activebus │ ├── BERT-DK_laptop │ │ └── README.md │ ├── BERT-DK_rest │ │ └── README.md │ ├── BERT-PT_laptop │ │ └── README.md │ ├── BERT-PT_rest │ │ └── README.md │ ├── BERT-XD_Review │ │ └── README.md │ └── BERT_Review │ │ └── README.md ├── adalbertojunior │ └── PTT5-SMALL-SUM │ │ └── README.md ├── ahotrod │ ├── albert_xxlargev1_squad2_512 │ │ └── README.md │ ├── electra_large_discriminator_squad2_512 │ │ └── README.md │ ├── roberta_large_squad2 │ │ └── README.md │ └── xlnet_large_squad2_512 │ │ └── README.md ├── ai4bharat │ └── indic-bert │ │ └── README.md ├── akhooli │ ├── gpt2-small-arabic-poetry │ │ └── README.md │ ├── gpt2-small-arabic │ │ └── README.md │ ├── mbart-large-cc25-ar-en │ │ └── README.md │ ├── mbart-large-cc25-en-ar │ │ └── README.md │ ├── personachat-arabic │ │ └── README.md │ ├── xlm-r-large-arabic-sent │ │ └── README.md │ └── xlm-r-large-arabic-toxic │ │ └── README.md ├── albert-base-v1-README.md ├── albert-xxlarge-v2-README.md ├── aliosm │ ├── ComVE-distilgpt2 │ │ └── README.md │ ├── ComVE-gpt2-large │ │ └── README.md │ ├── ComVE-gpt2-medium │ │ └── README.md │ ├── ComVE-gpt2 │ │ └── README.md │ ├── ai-soco-cpp-roberta-small-clas │ │ └── README.md │ ├── ai-soco-cpp-roberta-small │ │ └── README.md │ ├── ai-soco-cpp-roberta-tiny-96-clas │ │ └── README.md │ ├── ai-soco-cpp-roberta-tiny-96 │ │ └── README.md │ ├── ai-soco-cpp-roberta-tiny-clas │ │ └── README.md │ └── ai-soco-cpp-roberta-tiny │ │ └── README.md ├── allegro │ ├── herbert-base-cased │ │ └── README.md │ ├── herbert-klej-cased-tokenizer-v1 │ │ └── README.md │ ├── herbert-klej-cased-v1 │ │ └── README.md │ └── herbert-large-cased │ │ └── README.md ├── allenai │ ├── biomed_roberta_base │ │ └── README.md │ ├── longformer-base-4096-extra.pos.embd.only │ │ └── README.md │ ├── longformer-base-4096 │ │ └── README.md │ ├── scibert_scivocab_cased │ │ └── README.md │ ├── scibert_scivocab_uncased │ │ └── README.md │ ├── wmt16-en-de-12-1 │ │ └── README.md │ ├── wmt16-en-de-dist-12-1 │ │ └── README.md │ ├── wmt16-en-de-dist-6-1 │ │ └── README.md │ ├── wmt19-de-en-6-6-base │ │ └── README.md │ └── wmt19-de-en-6-6-big │ │ └── README.md ├── allenyummy │ └── chinese-bert-wwm-ehr-ner-sl │ │ └── README.md ├── amberoad │ └── bert-multilingual-passage-reranking-msmarco │ │ └── README.md ├── amine │ └── bert-base-5lang-cased │ │ └── README.md ├── antoiloui │ └── belgpt2 │ │ └── README.md ├── aodiniz │ ├── bert_uncased_L-10_H-512_A-8_cord19-200616 │ │ └── README.md │ ├── bert_uncased_L-10_H-512_A-8_cord19-200616_squad2 │ │ └── README.md │ ├── bert_uncased_L-2_H-512_A-8_cord19-200616 │ │ └── README.md │ └── bert_uncased_L-4_H-256_A-4_cord19-200616 │ │ └── README.md ├── asafaya │ ├── bert-base-arabic │ │ └── README.md │ ├── bert-large-arabic │ │ └── README.md │ ├── bert-medium-arabic │ │ └── README.md │ └── bert-mini-arabic │ │ └── README.md ├── ashwani-tanwar │ └── Gujarati-XLM-R-Base │ │ └── README.md ├── aubmindlab │ ├── bert-base-arabert │ │ └── README.md │ └── bert-base-arabertv01 │ │ └── README.md ├── bart-large-cnn │ └── README.md ├── bart-large-xsum │ └── README.md ├── bashar-talafha │ └── multi-dialect-bert-base-arabic │ │ └── README.md ├── bayartsogt │ ├── albert-mongolian │ │ └── README.md │ ├── bert-base-mongolian-cased │ │ └── README.md │ └── bert-base-mongolian-uncased │ │ └── README.md ├── bert-base-cased-README.md ├── bert-base-chinese-README.md ├── bert-base-german-cased-README.md ├── bert-base-german-dbmdz-cased-README.md ├── bert-base-german-dbmdz-uncased-README.md ├── bert-base-multilingual-cased-README.md ├── bert-base-multilingual-uncased-README.md ├── bert-base-uncased-README.md ├── bert-large-cased-README.md ├── binwang │ └── xlnet-base-cased │ │ └── README.md ├── bionlp │ ├── bluebert_pubmed_mimic_uncased_L-12_H-768_A-12 │ │ └── README.md │ ├── bluebert_pubmed_mimic_uncased_L-24_H-1024_A-16 │ │ └── README.md │ ├── bluebert_pubmed_uncased_L-12_H-768_A-12 │ │ └── README.md │ └── bluebert_pubmed_uncased_L-24_H-1024_A-16 │ │ └── README.md ├── blinoff │ └── roberta-base-russian-v0 │ │ └── README.md ├── cahya │ ├── bert-base-indonesian-522M │ │ └── README.md │ ├── gpt2-small-indonesian-522M │ │ └── README.md │ └── roberta-base-indonesian-522M │ │ └── README.md ├── cambridgeltl │ └── BioRedditBERT-uncased │ │ └── README.md ├── camembert-base-README.md ├── camembert │ ├── camembert-base-ccnet-4gb │ │ └── README.md │ ├── camembert-base-ccnet │ │ └── README.md │ ├── camembert-base-oscar-4gb │ │ └── README.md │ ├── camembert-base-wikipedia-4gb │ │ └── README.md │ └── camembert-large │ │ └── README.md ├── canwenxu │ └── BERT-of-Theseus-MNLI │ │ └── README.md ├── cedpsam │ └── chatbot_fr │ │ └── README.md ├── ceostroff │ └── harry-potter-gpt2-fanfiction │ │ └── README.md ├── chrisliu298 │ └── arxiv_ai_gpt2 │ │ └── README.md ├── cimm-kzn │ ├── endr-bert │ │ └── README.md │ ├── enrudr-bert │ │ └── README.md │ └── rudr-bert │ │ └── README.md ├── clue │ ├── albert_chinese_small │ │ └── README.md │ ├── albert_chinese_tiny │ │ └── README.md │ ├── roberta_chinese_3L312_clue_tiny │ │ └── README.md │ ├── roberta_chinese_base │ │ └── README.md │ ├── roberta_chinese_large │ │ └── README.md │ └── xlnet_chinese_large │ │ └── README.md ├── codegram │ ├── calbert-base-uncased │ │ └── README.md │ └── calbert-tiny-uncased │ │ └── README.md ├── cooelf │ └── limitbert │ │ └── README.md ├── csarron │ ├── bert-base-uncased-squad-v1 │ │ └── README.md │ ├── mobilebert-uncased-squad-v1 │ │ └── README.md │ ├── mobilebert-uncased-squad-v2 │ │ └── README.md │ └── roberta-base-squad-v1 │ │ └── README.md ├── daigo │ └── bert-base-japanese-sentiment │ │ └── README.md ├── dbmdz │ ├── bert-base-german-cased │ │ └── README.md │ ├── bert-base-german-europeana-cased │ │ └── README.md │ ├── bert-base-german-europeana-uncased │ │ └── README.md │ ├── bert-base-german-uncased │ │ └── README.md │ ├── bert-base-italian-cased │ │ └── README.md │ ├── bert-base-italian-uncased │ │ └── README.md │ ├── bert-base-italian-xxl-cased │ │ └── README.md │ ├── bert-base-italian-xxl-uncased │ │ └── README.md │ ├── bert-base-turkish-128k-cased │ │ └── README.md │ ├── bert-base-turkish-128k-uncased │ │ └── README.md │ ├── bert-base-turkish-cased │ │ └── README.md │ ├── bert-base-turkish-uncased │ │ └── README.md │ ├── distilbert-base-turkish-cased │ │ └── README.md │ ├── electra-base-italian-xxl-cased-discriminator │ │ └── README.md │ ├── electra-base-italian-xxl-cased-generator │ │ └── README.md │ ├── electra-base-turkish-cased-discriminator │ │ └── README.md │ └── electra-small-turkish-cased-discriminator │ │ └── README.md ├── dccuchile │ ├── bert-base-spanish-wwm-cased │ │ └── README.md │ └── bert-base-spanish-wwm-uncased │ │ └── README.md ├── deepset │ ├── bert-base-german-cased-oldvocab │ │ └── README.md │ ├── electra-base-squad2 │ │ └── README.md │ ├── gbert-base │ │ └── README.md │ ├── gbert-large │ │ └── README.md │ ├── gelectra-base-generator │ │ └── README.md │ ├── gelectra-base │ │ └── README.md │ ├── gelectra-large-generator │ │ └── README.md │ ├── gelectra-large │ │ └── README.md │ ├── minilm-uncased-squad2 │ │ └── README.md │ ├── quora_dedup_bert_base │ │ └── README.md │ ├── roberta-base-squad2-covid │ │ └── README.md │ ├── roberta-base-squad2-v2 │ │ └── README.md │ ├── roberta-base-squad2 │ │ └── README.md │ ├── sentence_bert │ │ └── README.md │ └── xlm-roberta-large-squad2 │ │ └── README.md ├── digitalepidemiologylab │ └── covid-twitter-bert │ │ └── README.md ├── distilbert-base-cased-README.md ├── distilbert-base-cased-distilled-squad-README.md ├── distilbert-base-german-cased-README.md ├── distilbert-base-multilingual-cased-README.md ├── distilbert-base-uncased-README.md ├── distilbert-base-uncased-distilled-squad-README.md ├── distilbert-base-uncased-finetuned-sst-2-english-README.md ├── distilgpt2-README.md ├── distilroberta-base-README.md ├── djstrong │ └── bg_cs_pl_ru_cased_L-12_H-768_A-12 │ │ └── README.md ├── dkleczek │ ├── bert-base-polish-cased-v1 │ │ └── README.md │ └── bert-base-polish-uncased-v1 │ │ └── README.md ├── dslim │ └── bert-base-NER │ │ └── README.md ├── dumitrescustefan │ ├── bert-base-romanian-cased-v1 │ │ └── README.md │ └── bert-base-romanian-uncased-v1 │ │ └── README.md ├── e-tony │ └── gpt2-rnm │ │ └── README.md ├── elgeish │ ├── cs224n-squad2.0-albert-base-v2 │ │ └── README.md │ ├── cs224n-squad2.0-albert-large-v2 │ │ └── README.md │ ├── cs224n-squad2.0-albert-xxlarge-v1 │ │ └── README.md │ ├── cs224n-squad2.0-distilbert-base-uncased │ │ └── README.md │ └── cs224n-squad2.0-roberta-base │ │ └── README.md ├── emilyalsentzer │ ├── Bio_ClinicalBERT │ │ └── README.md │ └── Bio_Discharge_Summary_BERT │ │ └── README.md ├── etalab-ia │ └── camembert-base-squadFR-fquad-piaf │ │ └── README.md ├── ethanyt │ ├── guwenbert-base │ │ └── README.md │ └── guwenbert-large │ │ └── README.md ├── facebook │ ├── bart-large-cnn │ │ └── README.md │ ├── bart-large-mnli │ │ └── README.md │ ├── bart-large │ │ └── README.md │ ├── rag-sequence-base │ │ └── README.md │ ├── rag-sequence-nq │ │ └── README.md │ ├── rag-token-base │ │ └── README.md │ ├── rag-token-nq │ │ └── README.md │ ├── rag-token-nq_new │ │ └── README.md │ ├── wmt19-de-en │ │ └── README.md │ ├── wmt19-en-de │ │ └── README.md │ ├── wmt19-en-ru │ │ └── README.md │ └── wmt19-ru-en │ │ └── README.md ├── flexudy │ └── t5-base-multi-sentence-doctor │ │ ├── README.md │ │ └── sent-banner.png ├── fmikaelian │ ├── camembert-base-fquad │ │ └── README.md │ ├── camembert-base-squad │ │ └── README.md │ └── flaubert-base-uncased-squad │ │ └── README.md ├── fran-martinez │ └── scibert_scivocab_cased_ner_jnlpba │ │ └── README.md ├── funnel-transformer │ ├── intermediate-base │ │ └── README.md │ ├── intermediate │ │ └── README.md │ ├── large-base │ │ └── README.md │ ├── large │ │ └── README.md │ ├── medium-base │ │ └── README.md │ ├── medium │ │ └── README.md │ ├── small-base │ │ └── README.md │ ├── small │ │ └── README.md │ ├── xlarge-base │ │ └── README.md │ └── xlarge │ │ └── README.md ├── gaochangkuan │ └── model_dir │ │ └── README.md ├── german-nlp-group │ └── electra-base-german-uncased │ │ └── README.md ├── giganticode │ └── StackOBERTflow-comments-small-v1 │ │ └── README.md ├── gilf │ ├── french-camembert-postag-model │ │ └── README.md │ └── french-postag-model │ │ └── README.md ├── google │ ├── bert2bert_L-24_wmt_de_en │ │ └── README.md │ ├── bert2bert_L-24_wmt_en_de │ │ └── README.md │ ├── bert_uncased_L-10_H-128_A-2 │ │ └── README.md │ ├── bert_uncased_L-10_H-256_A-4 │ │ └── README.md │ ├── bert_uncased_L-10_H-512_A-8 │ │ └── README.md │ ├── bert_uncased_L-10_H-768_A-12 │ │ └── README.md │ ├── bert_uncased_L-12_H-128_A-2 │ │ └── README.md │ ├── bert_uncased_L-12_H-256_A-4 │ │ └── README.md │ ├── bert_uncased_L-12_H-512_A-8 │ │ └── README.md │ ├── bert_uncased_L-12_H-768_A-12 │ │ └── README.md │ ├── bert_uncased_L-2_H-128_A-2 │ │ └── README.md │ ├── bert_uncased_L-2_H-256_A-4 │ │ └── README.md │ ├── bert_uncased_L-2_H-512_A-8 │ │ └── README.md │ ├── bert_uncased_L-2_H-768_A-12 │ │ └── README.md │ ├── bert_uncased_L-4_H-128_A-2 │ │ └── README.md │ ├── bert_uncased_L-4_H-256_A-4 │ │ └── README.md │ ├── bert_uncased_L-4_H-512_A-8 │ │ └── README.md │ ├── bert_uncased_L-4_H-768_A-12 │ │ └── README.md │ ├── bert_uncased_L-6_H-128_A-2 │ │ └── README.md │ ├── bert_uncased_L-6_H-256_A-4 │ │ └── README.md │ ├── bert_uncased_L-6_H-512_A-8 │ │ └── README.md │ ├── bert_uncased_L-6_H-768_A-12 │ │ └── README.md │ ├── bert_uncased_L-8_H-128_A-2 │ │ └── README.md │ ├── bert_uncased_L-8_H-256_A-4 │ │ └── README.md │ ├── bert_uncased_L-8_H-512_A-8 │ │ └── README.md │ ├── bert_uncased_L-8_H-768_A-12 │ │ └── README.md │ ├── electra-base-discriminator │ │ └── README.md │ ├── electra-base-generator │ │ └── README.md │ ├── electra-large-discriminator │ │ └── README.md │ ├── electra-large-generator │ │ └── README.md │ ├── electra-small-discriminator │ │ └── README.md │ ├── electra-small-generator │ │ └── README.md │ ├── mobilebert-uncased │ │ └── README.md │ ├── reformer-crime-and-punishment │ │ └── README.md │ ├── reformer-enwik8 │ │ └── README.md │ ├── roberta2roberta_L-24_bbc │ │ └── README.md │ ├── roberta2roberta_L-24_cnn_daily_mail │ │ └── README.md │ ├── roberta2roberta_L-24_discofuse │ │ └── README.md │ ├── roberta2roberta_L-24_gigaword │ │ └── README.md │ └── roberta2roberta_L-24_wikisplit │ │ └── README.md ├── gpt2-README.md ├── gpt2-large-README.md ├── gpt2-medium-README.md ├── gpt2-xl-README.md ├── gsarti │ ├── biobert-nli │ │ └── README.md │ ├── covidbert-nli │ │ └── README.md │ └── scibert-nli │ │ └── README.md ├── gurkan08 │ └── bert-turkish-text-classification │ │ └── README.md ├── hatmimoha │ └── arabic-ner │ │ └── README.md ├── healx │ ├── gpt-2-pubmed-large │ │ └── README.md │ └── gpt-2-pubmed-medium │ │ └── README.md ├── henryk │ ├── bert-base-multilingual-cased-finetuned-dutch-squad2 │ │ └── README.md │ ├── bert-base-multilingual-cased-finetuned-polish-squad1 │ │ └── README.md │ └── bert-base-multilingual-cased-finetuned-polish-squad2 │ │ └── README.md ├── huawei-noah │ ├── DynaBERT_MNLI │ │ └── README.md │ ├── DynaBERT_SST-2 │ │ └── README.md │ └── TinyBERT_General_4L_312D │ │ └── README.md ├── huggingface │ ├── CodeBERTa-language-id │ │ └── README.md │ └── CodeBERTa-small-v1 │ │ └── README.md ├── huseinzol05 │ ├── albert-base-bahasa-cased │ │ └── README.md │ ├── albert-tiny-bahasa-cased │ │ └── README.md │ ├── bert-base-bahasa-cased │ │ └── README.md │ ├── electra-base-discriminator-bahasa-cased │ │ └── README.md │ ├── electra-base-generator-bahasa-cased │ │ └── README.md │ ├── electra-small-discriminator-bahasa-cased │ │ └── README.md │ ├── electra-small-generator-bahasa-cased │ │ └── README.md │ ├── gpt2-117M-bahasa-cased │ │ └── README.md │ ├── gpt2-345M-bahasa-cased │ │ └── README.md │ ├── t5-base-bahasa-cased │ │ └── README.md │ ├── t5-base-bahasa-summarization-cased │ │ └── README.md │ ├── t5-small-bahasa-cased │ │ └── README.md │ ├── t5-small-bahasa-summarization-cased │ │ └── README.md │ ├── tiny-bert-bahasa-cased │ │ └── README.md │ └── xlnet-base-bahasa-cased │ │ └── README.md ├── iarfmoose │ ├── bert-base-cased-qa-evaluator │ │ └── README.md │ ├── roberta-base-bulgarian-pos │ │ └── README.md │ ├── roberta-base-bulgarian │ │ └── README.md │ ├── roberta-small-bulgarian-pos │ │ └── README.md │ ├── roberta-small-bulgarian │ │ └── README.md │ └── t5-base-question-generator │ │ └── README.md ├── illuin │ ├── camembert-base-fquad │ │ └── README.md │ ├── camembert-large-fquad │ │ └── README.md │ └── lepetit │ │ └── README.md ├── indobenchmark │ ├── indobert-base-p1 │ │ └── README.md │ ├── indobert-base-p2 │ │ └── README.md │ ├── indobert-large-p1 │ │ └── README.md │ ├── indobert-large-p2 │ │ └── README.md │ ├── indobert-lite-base-p1 │ │ └── README.md │ ├── indobert-lite-base-p2 │ │ └── README.md │ ├── indobert-lite-large-p1 │ │ └── README.md │ └── indobert-lite-large-p2 │ │ └── README.md ├── indolem │ └── indobert-base-uncased │ │ └── README.md ├── ipuneetrathore │ └── bert-base-cased-finetuned-finBERT │ │ └── README.md ├── iuliaturc │ └── bert_uncased_L-2_H-128_A-2 │ │ └── README.md ├── ixa-ehu │ ├── berteus-base-cased │ │ └── README.md │ └── ixambert-base-cased │ │ └── README.md ├── jannesg │ ├── bertsson │ │ └── README.md │ ├── takalane_afr_roberta │ │ └── README.md │ ├── takalane_nbl_roberta │ │ └── README.md │ ├── takalane_nso_roberta │ │ └── README.md │ ├── takalane_sot_roberta │ │ └── README.md │ ├── takalane_ssw_roberta │ │ └── README.md │ ├── takalane_tsn_roberta │ │ └── README.md │ ├── takalane_tso_roberta │ │ └── README.md │ ├── takalane_ven_roberta │ │ └── README.md │ ├── takalane_xho_roberta │ │ └── README.md │ └── takalane_zul_roberta │ │ └── README.md ├── jcblaise │ ├── bert-tagalog-base-cased-WWM │ │ └── README.md │ ├── bert-tagalog-base-cased │ │ └── README.md │ ├── bert-tagalog-base-uncased-WWM │ │ └── README.md │ ├── bert-tagalog-base-uncased │ │ └── README.md │ ├── distilbert-tagalog-base-cased │ │ └── README.md │ ├── electra-tagalog-base-cased-discriminator │ │ └── README.md │ ├── electra-tagalog-base-cased-generator │ │ └── README.md │ ├── electra-tagalog-base-uncased-discriminator │ │ └── README.md │ ├── electra-tagalog-base-uncased-generator │ │ └── README.md │ ├── electra-tagalog-small-cased-discriminator │ │ └── README.md │ ├── electra-tagalog-small-cased-generator │ │ └── README.md │ ├── electra-tagalog-small-uncased-discriminator │ │ └── README.md │ └── electra-tagalog-small-uncased-generator │ │ └── README.md ├── jimregan │ └── BERTreach │ │ └── README.md ├── jme-p │ └── shrugging-grace-tweet-classifier │ │ └── README.md ├── joeddav │ ├── bart-large-mnli-yahoo-answers │ │ └── README.md │ └── xlm-roberta-large-xnli │ │ └── README.md ├── jordimas │ └── julibert │ │ └── README.md ├── jplu │ ├── tf-camembert-base │ │ └── README.md │ ├── tf-xlm-r-ner-40-lang │ │ └── README.md │ ├── tf-xlm-roberta-base │ │ └── README.md │ └── tf-xlm-roberta-large │ │ └── README.md ├── julien-c │ ├── EsperBERTo-small-pos │ │ └── README.md │ ├── EsperBERTo-small │ │ └── README.md │ ├── bert-xsmall-dummy │ │ └── README.md │ └── dummy-unknown │ │ └── README.md ├── keshan │ └── SinhalaBERTo │ │ └── README.md ├── krevas │ ├── finance-koelectra-base-discriminator │ │ └── README.md │ ├── finance-koelectra-base-generator │ │ └── README.md │ ├── finance-koelectra-small-discriminator │ │ └── README.md │ └── finance-koelectra-small-generator │ │ └── README.md ├── ktrapeznikov │ ├── albert-xlarge-v2-squad-v2 │ │ └── README.md │ ├── biobert_v1.1_pubmed_squad_v2 │ │ └── README.md │ ├── gpt2-medium-topic-news │ │ └── README.md │ └── scibert_scivocab_uncased_squad_v2 │ │ └── README.md ├── kuisailab │ ├── albert-base-arabic │ │ └── README.md │ ├── albert-large-arabic │ │ └── README.md │ └── albert-xlarge-arabic │ │ └── README.md ├── kuppuluri │ ├── telugu_bertu │ │ └── README.md │ ├── telugu_bertu_ner │ │ └── README.md │ ├── telugu_bertu_pos │ │ └── README.md │ └── telugu_bertu_tydiqa │ │ └── README.md ├── lanwuwei │ └── GigaBERT-v3-Arabic-and-English │ │ └── README.md ├── loodos │ ├── albert-base-turkish-uncased │ │ └── README.md │ ├── bert-base-turkish-uncased │ │ └── README.md │ ├── electra-base-turkish-64k-uncased-discriminator │ │ └── README.md │ ├── electra-base-turkish-uncased-discriminator │ │ └── README.md │ ├── electra-small-turkish-cased-discriminator │ │ └── README.md │ └── electra-small-turkish-uncased-discriminator │ │ └── README.md ├── lordtt13 │ ├── COVID-SciBERT │ │ └── README.md │ └── emo-mobilebert │ │ └── README.md ├── lserinol │ └── bert-turkish-question-answering │ │ └── README.md ├── lvwerra │ ├── bert-imdb │ │ └── README.md │ ├── gpt2-imdb-ctrl │ │ └── README.md │ ├── gpt2-imdb-pos │ │ └── README.md │ ├── gpt2-imdb │ │ └── README.md │ └── gpt2-medium-taboo │ │ └── README.md ├── lysandre │ ├── arxiv-nlp │ │ └── README.md │ └── arxiv │ │ └── README.md ├── m3hrdadfi │ ├── albert-fa-base-v2 │ │ └── README.md │ ├── bert2bert-fa-news-headline │ │ └── README.md │ └── bert2bert-fa-wiki-summary │ │ └── README.md ├── microsoft │ ├── DeBERTa-base │ │ └── README.md │ ├── DeBERTa-large │ │ └── README.md │ ├── DialoGPT-large │ │ └── README.md │ ├── DialoGPT-medium │ │ └── README.md │ ├── DialoGPT-small │ │ └── README.md │ ├── MiniLM-L12-H384-uncased │ │ └── README.md │ ├── Multilingual-MiniLM-L12-H384 │ │ └── README.md │ ├── codebert-base-mlm │ │ └── README.md │ ├── codebert-base │ │ └── README.md │ ├── layoutlm-base-uncased │ │ └── README.md │ ├── layoutlm-large-uncased │ │ └── README.md │ ├── prophetnet-large-uncased-cnndm │ │ └── README.md │ ├── prophetnet-large-uncased-squad-qg │ │ └── README.md │ ├── prophetnet-large-uncased │ │ └── README.md │ ├── xprophetnet-large-wiki100-cased-xglue-ntg │ │ └── README.md │ ├── xprophetnet-large-wiki100-cased-xglue-qg │ │ └── README.md │ └── xprophetnet-large-wiki100-cased │ │ └── README.md ├── monilouise │ └── ner_pt_br │ │ └── README.md ├── monologg │ ├── koelectra-base-discriminator │ │ └── README.md │ ├── koelectra-base-generator │ │ └── README.md │ ├── koelectra-small-discriminator │ │ └── README.md │ └── koelectra-small-generator │ │ └── README.md ├── monsoon-nlp │ └── dv-wave │ │ └── README.md ├── moumeneb1 │ └── flaubert-base-cased-ecology_crisis │ │ └── README.md ├── mrm8488 │ ├── CodeBERTaPy │ │ └── README.md │ ├── GPT-2-finetuned-CORD19 │ │ └── README.md │ ├── GPT-2-finetuned-common_gen │ │ └── README.md │ ├── GPT-2-finetuned-covid-bio-medrxiv │ │ └── README.md │ ├── GuaPeTe-2-tiny │ │ └── README.md │ ├── RoBERTinha │ │ └── README.md │ ├── RoBasquERTa │ │ └── README.md │ ├── RuPERTa-base-finetuned-ner │ │ └── README.md │ ├── RuPERTa-base-finetuned-pawsx-es │ │ └── README.md │ ├── RuPERTa-base-finetuned-pos │ │ └── README.md │ ├── RuPERTa-base-finetuned-squadv1 │ │ └── README.md │ ├── RuPERTa-base-finetuned-squadv2 │ │ └── README.md │ ├── RuPERTa-base │ │ └── README.md │ ├── TinyBERT-spanish-uncased-finetuned-ner │ │ └── README.md │ ├── bert-base-german-dbmdz-cased-finetuned-pawsx-de │ │ └── README.md │ ├── bert-base-german-finetuned-ler │ │ └── README.md │ ├── bert-base-spanish-wwm-cased-finetuned-spa-squad2-es │ │ └── README.md │ ├── bert-italian-finedtuned-squadv1-it-alfa │ │ └── README.md │ ├── bert-medium-finetuned-squadv2 │ │ └── README.md │ ├── bert-mini-finetuned-squadv2 │ │ └── README.md │ ├── bert-mini2bert-mini-finetuned-cnn_daily_mail-summarization │ │ └── README.md │ ├── bert-multi-cased-finedtuned-xquad-tydiqa-goldp │ │ └── README.md │ ├── bert-multi-cased-finetuned-xquadv1 │ │ └── README.md │ ├── bert-multi-uncased-finetuned-xquadv1 │ │ └── README.md │ ├── bert-small-finetuned-squadv2 │ │ └── README.md │ ├── bert-small-finetuned-typo-detection │ │ └── README.md │ ├── bert-small2bert-small-finetuned-cnn_daily_mail-summarization │ │ └── README.md │ ├── bert-spanish-cased-finetuned-ner │ │ └── README.md │ ├── bert-spanish-cased-finetuned-pos-syntax │ │ └── README.md │ ├── bert-spanish-cased-finetuned-pos │ │ └── README.md │ ├── bert-tiny-finetuned-squadv2 │ │ └── README.md │ ├── bert-uncased-finetuned-qnli │ │ └── README.md │ ├── camembert-base-finetuned-pawsx-fr │ │ └── README.md │ ├── chEMBL_smiles_v1 │ │ └── README.md │ ├── codeBERTaJS │ │ └── README.md │ ├── codebert-base-finetuned-detect-insecure-code │ │ └── README.md │ ├── distilbert-base-multi-cased-finetuned-typo-detection │ │ └── README.md │ ├── distilbert-multi-finetuned-for-xqua-on-tydiqa │ │ └── README.md │ ├── distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es │ │ └── README.md │ ├── distilroberta-base-finetuned-sentiment │ │ └── README.md │ ├── electra-base-finetuned-squadv1 │ │ └── README.md │ ├── electra-small-finetuned-squadv1 │ │ └── README.md │ ├── electra-small-finetuned-squadv2 │ │ └── README.md │ ├── electricidad-base-discriminator │ │ └── README.md │ ├── electricidad-base-finetuned-pawsx-es │ │ └── README.md │ ├── electricidad-base-generator │ │ └── README.md │ ├── electricidad-small-discriminator │ │ └── README.md │ ├── electricidad-small-finetuned-squadv1-es │ │ └── README.md │ ├── gpt2-finetuned-recipes-cooking │ │ └── README.md │ ├── gpt2-finetuned-recipes-cooking_v2 │ │ └── README.md │ ├── gpt2-imdb-neg │ │ └── README.md │ ├── gpt2-imdb-neutral │ │ └── README.md │ ├── longformer-base-4096-finetuned-squadv2 │ │ └── README.md │ ├── mT5-small-finetuned-tydiqa-for-xqa │ │ └── README.md │ ├── mobilebert-uncased-finetuned-squadv1 │ │ └── README.md │ ├── mobilebert-uncased-finetuned-squadv2 │ │ └── README.md │ ├── roberta-base-1B-1-finetuned-squadv1 │ │ └── README.md │ ├── roberta-base-1B-1-finetuned-squadv2 │ │ └── README.md │ ├── roberta-large-finetuned-wsc │ │ └── README.md │ ├── spanbert-base-finetuned-squadv1 │ │ └── README.md │ ├── spanbert-base-finetuned-squadv2 │ │ └── README.md │ ├── spanbert-base-finetuned-tacred │ │ └── README.md │ ├── spanbert-finetuned-squadv1 │ │ └── README.md │ ├── spanbert-finetuned-squadv2 │ │ └── README.md │ ├── spanbert-large-finetuned-squadv1 │ │ └── README.md │ ├── spanbert-large-finetuned-squadv2 │ │ └── README.md │ ├── spanbert-large-finetuned-tacred │ │ └── README.md │ ├── squeezebert-finetuned-squadv1 │ │ └── README.md │ ├── squeezebert-finetuned-squadv2 │ │ └── README.md │ ├── t5-base-finetuned-break_data-question-retrieval │ │ └── README.md │ ├── t5-base-finetuned-break_data │ │ └── README.md │ ├── t5-base-finetuned-common_gen │ │ └── README.md │ ├── t5-base-finetuned-e2m-intent │ │ └── README.md │ ├── t5-base-finetuned-emotion │ │ └── README.md │ ├── t5-base-finetuned-imdb-sentiment │ │ └── README.md │ ├── t5-base-finetuned-qasc │ │ └── README.md │ ├── t5-base-finetuned-quarel │ │ └── README.md │ ├── t5-base-finetuned-quartz │ │ └── README.md │ ├── t5-base-finetuned-question-generation-ap │ │ └── README.md │ ├── t5-base-finetuned-sarcasm-twitter │ │ └── README.md │ ├── t5-base-finetuned-span-sentiment-extraction │ │ └── README.md │ ├── t5-base-finetuned-squadv2 │ │ └── README.md │ ├── t5-base-finetuned-summarize-news │ │ └── README.md │ ├── t5-base-finetuned-wikiSQL-sql-to-en │ │ └── README.md │ ├── t5-base-finetuned-wikiSQL │ │ └── README.md │ ├── t5-small-finetuned-emotion │ │ └── README.md │ ├── t5-small-finetuned-imdb-sentiment │ │ └── README.md │ ├── t5-small-finetuned-quora-for-paraphrasing │ │ └── README.md │ ├── t5-small-finetuned-squadv1 │ │ └── README.md │ ├── t5-small-finetuned-squadv2 │ │ └── README.md │ ├── t5-small-finetuned-wikiSQL │ │ └── README.md │ ├── umberto-wikipedia-uncased-v1-finetuned-squadv1-it │ │ └── README.md │ └── xlm-multi-finetuned-xquadv1 │ │ └── README.md ├── mymusise │ └── gpt2-medium-chinese │ │ └── README.md ├── mys │ └── electra-base-turkish-cased-ner │ │ └── README.md ├── ncoop57 │ └── bart-base-code-summarizer-java-v0 │ │ └── README.md ├── neuralmind │ ├── bert-base-portuguese-cased │ │ └── README.md │ └── bert-large-portuguese-cased │ │ └── README.md ├── neuralspace-reverie │ ├── indic-transformers-bn-bert │ │ └── README.md │ ├── indic-transformers-bn-distilbert │ │ └── README.md │ ├── indic-transformers-bn-roberta │ │ └── README.md │ ├── indic-transformers-bn-xlmroberta │ │ └── README.md │ ├── indic-transformers-hi-bert │ │ └── README.md │ ├── indic-transformers-hi-distilbert │ │ └── README.md │ ├── indic-transformers-hi-roberta │ │ └── README.md │ ├── indic-transformers-hi-xlmroberta │ │ └── README.md │ ├── indic-transformers-te-bert │ │ └── README.md │ ├── indic-transformers-te-distilbert │ │ └── README.md │ ├── indic-transformers-te-roberta │ │ └── README.md │ └── indic-transformers-te-xlmroberta │ │ └── README.md ├── neuraly │ └── bert-base-italian-cased-sentiment │ │ └── README.md ├── neurocode │ └── IsRoBERTa │ │ └── README.md ├── nghuyong │ ├── ernie-1.0 │ │ └── README.md │ ├── ernie-2.0-en │ │ └── README.md │ ├── ernie-2.0-large-en │ │ └── README.md │ └── ernie-tiny │ │ └── README.md ├── nikokons │ └── gpt2-greek │ │ └── README.md ├── nlpaueb │ ├── bert-base-greek-uncased-v1 │ │ └── README.md │ └── legal-bert-base-uncased │ │ └── README.md ├── nlptown │ └── bert-base-multilingual-uncased-sentiment │ │ └── README.md ├── nyu-mll │ ├── roberta-base-100M-1 │ │ └── README.md │ ├── roberta-base-100M-2 │ │ └── README.md │ ├── roberta-base-100M-3 │ │ └── README.md │ ├── roberta-base-10M-1 │ │ └── README.md │ ├── roberta-base-10M-2 │ │ └── README.md │ ├── roberta-base-10M-3 │ │ └── README.md │ ├── roberta-base-1B-1 │ │ └── README.md │ ├── roberta-base-1B-2 │ │ └── README.md │ ├── roberta-base-1B-3 │ │ └── README.md │ ├── roberta-med-small-1M-1 │ │ └── README.md │ ├── roberta-med-small-1M-2 │ │ └── README.md │ ├── roberta-med-small-1M-3 │ │ └── README.md │ └── roberta_1M_to_1B │ │ └── README.md ├── oliverguhr │ └── german-sentiment-bert │ │ └── README.md ├── patrickvonplaten │ ├── bert2bert-cnn_dailymail-fp16 │ │ └── README.md │ ├── bert2bert_cnn_daily_mail │ │ └── README.md │ ├── bert2gpt2-cnn_dailymail-fp16 │ │ └── README.md │ ├── longformer2roberta-cnn_dailymail-fp16 │ │ └── README.md │ ├── roberta2roberta-cnn_dailymail-fp16 │ │ └── README.md │ ├── roberta2roberta-share-cnn_dailymail-fp16 │ │ └── README.md │ └── roberta_shared_bbc_xsum │ │ └── README.md ├── pdelobelle │ └── robbert-v2-dutch-base │ │ └── README.md ├── pedropei │ └── question-intimacy │ │ └── README.md ├── pierreguillou │ └── gpt2-small-portuguese │ │ └── README.md ├── pradhyra │ └── AWSBlogBert │ │ └── README.md ├── pranavpsv │ └── gpt2-genre-story-generator │ │ └── README.md ├── pvl │ └── labse_bert │ │ └── README.md ├── ramsrigouthamg │ └── t5_paraphraser │ │ └── README.md ├── rdenadai │ └── BR_BERTo │ │ └── README.md ├── redewiedergabe │ └── bert-base-historical-german-rw-cased │ │ └── README.md ├── rjbownes │ └── Magic-The-Generating │ │ └── README.md ├── roberta-base-README.md ├── roberta-large-README.md ├── roberta-large-mnli-README.md ├── rohanrajpal │ ├── bert-base-codemixed-uncased-sentiment │ │ └── README.md │ ├── bert-base-en-es-codemix-cased │ │ └── README.md │ ├── bert-base-en-hi-codemix-cased │ │ └── README.md │ └── bert-base-multilingual-codemixed-cased-sentiment │ │ └── README.md ├── sachaarbonel │ └── bert-italian-cased-finetuned-pos │ │ └── README.md ├── sagorsarker │ ├── bangla-bert-base │ │ └── README.md │ ├── bangla-bert-sentiment │ │ └── README.md │ ├── codeswitch-hineng-lid-lince │ │ └── README.md │ ├── codeswitch-hineng-ner-lince │ │ └── README.md │ ├── codeswitch-hineng-pos-lince │ │ └── README.md │ ├── codeswitch-nepeng-lid-lince │ │ └── README.md │ ├── codeswitch-spaeng-lid-lince │ │ └── README.md │ ├── codeswitch-spaeng-ner-lince │ │ └── README.md │ ├── codeswitch-spaeng-pos-lince │ │ └── README.md │ └── codeswitch-spaeng-sentiment-analysis-lince │ │ └── README.md ├── sarahlintang │ └── IndoBERT │ │ └── README.md ├── sarnikowski │ └── electra-small-discriminator-da-256-cased │ │ └── README.md ├── savasy │ ├── bert-base-turkish-ner-cased │ │ └── README.md │ ├── bert-base-turkish-sentiment-cased │ │ └── README.md │ ├── bert-base-turkish-squad │ │ └── README.md │ └── bert-turkish-text-classification │ │ └── README.md ├── schmidek │ └── electra-small-cased │ │ └── README.md ├── seiya │ └── oubiobert-base-uncased │ │ └── README.md ├── sentence-transformers │ ├── LaBSE │ │ └── README.md │ ├── bert-base-nli-cls-token │ │ └── README.md │ ├── bert-base-nli-max-tokens │ │ └── README.md │ └── bert-base-nli-mean-tokens │ │ └── README.md ├── severinsimmler │ └── literary-german-bert │ │ ├── README.md │ │ ├── kfold.png │ │ └── prosa-jahre.png ├── seyonec │ └── ChemBERTa-zinc-base-v1 │ │ └── README.md ├── shoarora │ ├── alectra-small-owt │ │ └── README.md │ └── electra-small-owt │ │ └── README.md ├── shrugging-grace │ └── tweetclassifier │ │ └── README.md ├── smanjil │ └── German-MedBERT │ │ └── README.md ├── spentaur │ └── yelp │ │ └── README.md ├── squeezebert │ ├── squeezebert-mnli-headless │ │ └── README.md │ ├── squeezebert-mnli │ │ └── README.md │ └── squeezebert-uncased │ │ └── README.md ├── stas │ └── tiny-wmt19-en-de │ │ └── README.md ├── stevhliu │ └── astroGPT │ │ └── README.md ├── surajp │ ├── RoBERTa-hindi-guj-san │ │ └── README.md │ ├── SanBERTa │ │ └── README.md │ └── albert-base-sanskrit │ │ └── README.md ├── t5-11b-README.md ├── t5-3b-README.md ├── t5-base-README.md ├── t5-large-README.md ├── t5-small-README.md ├── tartuNLP │ └── EstBERT │ │ └── README.md ├── tblard │ └── tf-allocine │ │ └── README.md ├── tuner007 │ ├── pegasus_paraphrase │ │ └── README.md │ ├── pegasus_qa │ │ └── README.md │ └── t5_abs_qa │ │ └── README.md ├── twmkn9 │ ├── albert-base-v2-squad2 │ │ └── README.md │ ├── bert-base-uncased-squad2 │ │ └── README.md │ ├── distilbert-base-uncased-squad2 │ │ └── README.md │ └── distilroberta-base-squad2 │ │ └── README.md ├── uer │ ├── chinese_roberta_L-2_H-128 │ │ └── README.md │ ├── gpt2-chinese-couplet │ │ └── README.md │ └── gpt2-chinese-poem │ │ └── README.md ├── uncnlp │ └── lxmert-base-uncased │ │ ├── LICENSE │ │ ├── README.md │ │ └── lxmert_model-1.jpg ├── unideeplearning │ └── polibert_sa │ │ └── README.md ├── urduhack │ └── roberta-urdu-small │ │ └── README.md ├── valhalla │ ├── bart-large-finetuned-squadv1 │ │ └── README.md │ ├── distilbart-mnli-12-1 │ │ └── README.md │ ├── distilbart-mnli-12-3 │ │ └── README.md │ ├── distilbart-mnli-12-6 │ │ └── README.md │ ├── distilbart-mnli-12-9 │ │ └── README.md │ ├── electra-base-discriminator-finetuned_squadv1 │ │ └── README.md │ ├── longformer-base-4096-finetuned-squadv1 │ │ └── README.md │ ├── t5-base-e2e-qg │ │ └── README.md │ ├── t5-base-qa-qg-hl │ │ └── README.md │ ├── t5-base-qg-hl │ │ └── README.md │ ├── t5-base-squad │ │ └── README.md │ ├── t5-samll-qg-prepend │ │ └── README.md │ ├── t5-small-e2e-qg │ │ └── README.md │ ├── t5-small-qa-qg-hl │ │ └── README.md │ └── t5-small-qg-hl │ │ └── README.md ├── vinai │ ├── bertweet-base │ │ └── README.md │ ├── bertweet-covid19-base-cased │ │ └── README.md │ ├── bertweet-covid19-base-uncased │ │ └── README.md │ ├── phobert-base │ │ └── README.md │ └── phobert-large │ │ └── README.md ├── voidful │ ├── albert_chinese_base │ │ └── README.md │ ├── albert_chinese_large │ │ └── README.md │ ├── albert_chinese_small │ │ └── README.md │ ├── albert_chinese_tiny │ │ └── README.md │ ├── albert_chinese_xlarge │ │ └── README.md │ └── albert_chinese_xxlarge │ │ └── README.md ├── wietsedv │ └── bert-base-dutch-cased │ │ └── README.md ├── wptoux │ └── albert-chinese-large-qa │ │ └── README.md ├── xlm-mlm-en-2048-README.md ├── xlm-roberta-base-README.md ├── xlm-roberta-large-finetuned-conll03-german-README.md ├── yjernite │ └── bart_eli5 │ │ └── README.md ├── ynie │ └── roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli │ │ └── README.md ├── youscan │ └── ukr-roberta-base │ │ └── README.md ├── yuvraj │ ├── summarizer-cnndm │ │ └── README.md │ └── xSumm │ │ └── README.md └── zanelim │ ├── singbert-large-sg │ └── README.md │ ├── singbert-lite-sg │ └── README.md │ └── singbert │ └── README.md ├── notebooks ├── 01-training-tokenizers.ipynb ├── 02-transformers.ipynb ├── 03-pipelines.ipynb ├── 04-onnx-export.ipynb ├── 05-benchmark.ipynb └── README.md ├── pyproject.toml ├── scripts ├── fsmt │ ├── convert-allenai-wmt16.sh │ ├── convert-allenai-wmt19.sh │ ├── convert-facebook-wmt19.sh │ ├── eval-allenai-wmt16.sh │ ├── eval-allenai-wmt19.sh │ ├── eval-facebook-wmt19.sh │ ├── fsmt-make-super-tiny-model.py │ ├── fsmt-make-tiny-model.py │ ├── gen-card-allenai-wmt16.py │ ├── gen-card-allenai-wmt19.py │ ├── gen-card-facebook-wmt19.py │ ├── s3-move.sh │ └── tests-to-run.sh ├── pegasus │ └── build_test_sample_spm_no_bos.py └── tatoeba │ └── README.md ├── setup.cfg ├── setup.py ├── src └── transformers │ ├── __init__.py │ ├── activations.py │ ├── activations_tf.py │ ├── benchmark │ ├── __init__.py │ ├── benchmark.py │ ├── benchmark_args.py │ ├── benchmark_args_tf.py │ ├── benchmark_args_utils.py │ ├── benchmark_tf.py │ └── benchmark_utils.py │ ├── commands │ ├── __init__.py │ ├── add_new_model.py │ ├── convert.py │ ├── download.py │ ├── env.py │ ├── lfs.py │ ├── run.py │ ├── serving.py │ ├── train.py │ ├── transformers_cli.py │ └── user.py │ ├── configuration_utils.py │ ├── convert_graph_to_onnx.py │ ├── convert_pytorch_checkpoint_to_tf2.py │ ├── convert_slow_tokenizer.py │ ├── convert_slow_tokenizers_checkpoints_to_fast.py │ ├── convert_tf_hub_seq_to_seq_bert_to_pytorch.py │ ├── data │ ├── __init__.py │ ├── data_collator.py │ ├── datasets │ │ ├── __init__.py │ │ ├── glue.py │ │ ├── language_modeling.py │ │ └── squad.py │ ├── metrics │ │ ├── __init__.py │ │ └── squad_metrics.py │ ├── processors │ │ ├── __init__.py │ │ ├── glue.py │ │ ├── squad.py │ │ ├── utils.py │ │ └── xnli.py │ └── test_generation_utils.py │ ├── dependency_versions_check.py │ ├── dependency_versions_table.py │ ├── file_utils.py │ ├── generation_beam_search.py │ ├── generation_logits_process.py │ ├── generation_tf_utils.py │ ├── generation_utils.py │ ├── hf_api.py │ ├── hf_argparser.py │ ├── integrations.py │ ├── modelcard.py │ ├── modeling_flax_utils.py │ ├── modeling_outputs.py │ ├── modeling_tf_outputs.py │ ├── modeling_tf_pytorch_utils.py │ ├── modeling_tf_utils.py │ ├── modeling_utils.py │ ├── models │ ├── __init__.py │ ├── albert │ │ ├── __init__.py │ │ ├── configuration_albert.py │ │ ├── convert_albert_original_tf_checkpoint_to_pytorch.py │ │ ├── modeling_albert.py │ │ ├── modeling_tf_albert.py │ │ ├── tokenization_albert.py │ │ └── tokenization_albert_fast.py │ ├── auto │ │ ├── __init__.py │ │ ├── configuration_auto.py │ │ ├── modeling_auto.py │ │ ├── modeling_flax_auto.py │ │ ├── modeling_tf_auto.py │ │ └── tokenization_auto.py │ ├── bart │ │ ├── __init__.py │ │ ├── configuration_bart.py │ │ ├── convert_bart_original_pytorch_checkpoint_to_pytorch.py │ │ ├── modeling_bart.py │ │ ├── modeling_tf_bart.py │ │ ├── tokenization_bart.py │ │ └── tokenization_bart_fast.py │ ├── barthez │ │ ├── __init__.py │ │ ├── tokenization_barthez.py │ │ └── tokenization_barthez_fast.py │ ├── bert │ │ ├── __init__.py │ │ ├── configuration_bert.py │ │ ├── convert_bert_original_tf2_checkpoint_to_pytorch.py │ │ ├── convert_bert_original_tf_checkpoint_to_pytorch.py │ │ ├── convert_bert_pytorch_checkpoint_to_original_tf.py │ │ ├── modeling_bert.py │ │ ├── modeling_flax_bert.py │ │ ├── modeling_tf_bert.py │ │ ├── tokenization_bert.py │ │ └── tokenization_bert_fast.py │ ├── bert_generation │ │ ├── __init__.py │ │ ├── configuration_bert_generation.py │ │ ├── modeling_bert_generation.py │ │ └── tokenization_bert_generation.py │ ├── bert_japanese │ │ ├── __init__.py │ │ └── tokenization_bert_japanese.py │ ├── bertweet │ │ ├── __init__.py │ │ └── tokenization_bertweet.py │ ├── blenderbot │ │ ├── __init__.py │ │ ├── configuration_blenderbot.py │ │ ├── convert_blenderbot_original_pytorch_checkpoint_to_pytorch.py │ │ ├── modeling_blenderbot.py │ │ ├── modeling_tf_blenderbot.py │ │ └── tokenization_blenderbot.py │ ├── camembert │ │ ├── __init__.py │ │ ├── configuration_camembert.py │ │ ├── modeling_camembert.py │ │ ├── modeling_tf_camembert.py │ │ ├── tokenization_camembert.py │ │ └── tokenization_camembert_fast.py │ ├── ctrl │ │ ├── __init__.py │ │ ├── configuration_ctrl.py │ │ ├── modeling_ctrl.py │ │ ├── modeling_tf_ctrl.py │ │ └── tokenization_ctrl.py │ ├── deberta │ │ ├── __init__.py │ │ ├── configuration_deberta.py │ │ ├── modeling_deberta.py │ │ └── tokenization_deberta.py │ ├── dialogpt │ │ ├── __init__.py │ │ └── convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py │ ├── distilbert │ │ ├── __init__.py │ │ ├── configuration_distilbert.py │ │ ├── modeling_distilbert.py │ │ ├── modeling_tf_distilbert.py │ │ ├── tokenization_distilbert.py │ │ └── tokenization_distilbert_fast.py │ ├── dpr │ │ ├── __init__.py │ │ ├── configuration_dpr.py │ │ ├── convert_dpr_original_checkpoint_to_pytorch.py │ │ ├── modeling_dpr.py │ │ ├── modeling_tf_dpr.py │ │ ├── tokenization_dpr.py │ │ └── tokenization_dpr_fast.py │ ├── electra │ │ ├── __init__.py │ │ ├── configuration_electra.py │ │ ├── convert_electra_original_tf_checkpoint_to_pytorch.py │ │ ├── modeling_electra.py │ │ ├── modeling_tf_electra.py │ │ ├── tokenization_electra.py │ │ └── tokenization_electra_fast.py │ ├── encoder_decoder │ │ ├── __init__.py │ │ ├── configuration_encoder_decoder.py │ │ └── modeling_encoder_decoder.py │ ├── flaubert │ │ ├── __init__.py │ │ ├── configuration_flaubert.py │ │ ├── modeling_flaubert.py │ │ ├── modeling_tf_flaubert.py │ │ └── tokenization_flaubert.py │ ├── fsmt │ │ ├── __init__.py │ │ ├── configuration_fsmt.py │ │ ├── convert_fsmt_original_pytorch_checkpoint_to_pytorch.py │ │ ├── modeling_fsmt.py │ │ └── tokenization_fsmt.py │ ├── funnel │ │ ├── __init__.py │ │ ├── configuration_funnel.py │ │ ├── convert_funnel_original_tf_checkpoint_to_pytorch.py │ │ ├── modeling_funnel.py │ │ ├── modeling_tf_funnel.py │ │ ├── tokenization_funnel.py │ │ └── tokenization_funnel_fast.py │ ├── gpt2 │ │ ├── __init__.py │ │ ├── configuration_gpt2.py │ │ ├── convert_gpt2_original_tf_checkpoint_to_pytorch.py │ │ ├── modeling_gpt2.py │ │ ├── modeling_tf_gpt2.py │ │ ├── tokenization_gpt2.py │ │ └── tokenization_gpt2_fast.py │ ├── herbert │ │ ├── __init__.py │ │ ├── tokenization_herbert.py │ │ └── tokenization_herbert_fast.py │ ├── layoutlm │ │ ├── __init__.py │ │ ├── configuration_layoutlm.py │ │ ├── modeling_layoutlm.py │ │ ├── tokenization_layoutlm.py │ │ └── tokenization_layoutlm_fast.py │ ├── longformer │ │ ├── __init__.py │ │ ├── configuration_longformer.py │ │ ├── convert_longformer_original_pytorch_lightning_to_pytorch.py │ │ ├── modeling_longformer.py │ │ ├── modeling_tf_longformer.py │ │ ├── tokenization_longformer.py │ │ └── tokenization_longformer_fast.py │ ├── lxmert │ │ ├── __init__.py │ │ ├── configuration_lxmert.py │ │ ├── convert_lxmert_original_tf_checkpoint_to_pytorch.py │ │ ├── modeling_lxmert.py │ │ ├── modeling_tf_lxmert.py │ │ ├── tokenization_lxmert.py │ │ └── tokenization_lxmert_fast.py │ ├── marian │ │ ├── __init__.py │ │ ├── configuration_marian.py │ │ ├── convert_marian_tatoeba_to_pytorch.py │ │ ├── convert_marian_to_pytorch.py │ │ ├── modeling_marian.py │ │ ├── modeling_tf_marian.py │ │ └── tokenization_marian.py │ ├── mbart │ │ ├── __init__.py │ │ ├── configuration_mbart.py │ │ ├── convert_mbart_original_checkpoint_to_pytorch.py │ │ ├── modeling_mbart.py │ │ ├── modeling_tf_mbart.py │ │ ├── tokenization_mbart.py │ │ └── tokenization_mbart_fast.py │ ├── mmbt │ │ ├── __init__.py │ │ ├── configuration_mmbt.py │ │ └── modeling_mmbt.py │ ├── mobilebert │ │ ├── __init__.py │ │ ├── configuration_mobilebert.py │ │ ├── convert_mobilebert_original_tf_checkpoint_to_pytorch.py │ │ ├── modeling_mobilebert.py │ │ ├── modeling_tf_mobilebert.py │ │ ├── tokenization_mobilebert.py │ │ └── tokenization_mobilebert_fast.py │ ├── mpnet │ │ ├── __init__.py │ │ ├── configuration_mpnet.py │ │ ├── modeling_mpnet.py │ │ ├── modeling_tf_mpnet.py │ │ ├── tokenization_mpnet.py │ │ └── tokenization_mpnet_fast.py │ ├── mt5 │ │ ├── __init__.py │ │ ├── configuration_mt5.py │ │ ├── modeling_mt5.py │ │ └── modeling_tf_mt5.py │ ├── openai │ │ ├── __init__.py │ │ ├── configuration_openai.py │ │ ├── convert_openai_original_tf_checkpoint_to_pytorch.py │ │ ├── modeling_openai.py │ │ ├── modeling_tf_openai.py │ │ ├── tokenization_openai.py │ │ └── tokenization_openai_fast.py │ ├── pegasus │ │ ├── __init__.py │ │ ├── configuration_pegasus.py │ │ ├── convert_pegasus_tf_to_pytorch.py │ │ ├── modeling_pegasus.py │ │ ├── modeling_tf_pegasus.py │ │ ├── tokenization_pegasus.py │ │ └── tokenization_pegasus_fast.py │ ├── phobert │ │ ├── __init__.py │ │ └── tokenization_phobert.py │ ├── prophetnet │ │ ├── __init__.py │ │ ├── configuration_prophetnet.py │ │ ├── convert_prophetnet_original_pytorch_checkpoint_to_pytorch.py │ │ ├── modeling_prophetnet.py │ │ └── tokenization_prophetnet.py │ ├── rag │ │ ├── __init__.py │ │ ├── configuration_rag.py │ │ ├── modeling_rag.py │ │ ├── retrieval_rag.py │ │ └── tokenization_rag.py │ ├── reformer │ │ ├── __init__.py │ │ ├── configuration_reformer.py │ │ ├── convert_reformer_trax_checkpoint_to_pytorch.py │ │ ├── modeling_reformer.py │ │ ├── tokenization_reformer.py │ │ └── tokenization_reformer_fast.py │ ├── retribert │ │ ├── __init__.py │ │ ├── configuration_retribert.py │ │ ├── modeling_retribert.py │ │ ├── tokenization_retribert.py │ │ └── tokenization_retribert_fast.py │ ├── roberta │ │ ├── __init__.py │ │ ├── configuration_roberta.py │ │ ├── convert_roberta_original_pytorch_checkpoint_to_pytorch.py │ │ ├── modeling_flax_roberta.py │ │ ├── modeling_roberta.py │ │ ├── modeling_tf_roberta.py │ │ ├── tokenization_roberta.py │ │ └── tokenization_roberta_fast.py │ ├── squeezebert │ │ ├── __init__.py │ │ ├── configuration_squeezebert.py │ │ ├── modeling_squeezebert.py │ │ ├── tokenization_squeezebert.py │ │ └── tokenization_squeezebert_fast.py │ ├── t5 │ │ ├── __init__.py │ │ ├── configuration_t5.py │ │ ├── convert_t5_original_tf_checkpoint_to_pytorch.py │ │ ├── modeling_t5.py │ │ ├── modeling_tf_t5.py │ │ ├── tokenization_t5.py │ │ └── tokenization_t5_fast.py │ ├── transfo_xl │ │ ├── __init__.py │ │ ├── configuration_transfo_xl.py │ │ ├── convert_transfo_xl_original_tf_checkpoint_to_pytorch.py │ │ ├── modeling_tf_transfo_xl.py │ │ ├── modeling_tf_transfo_xl_utilities.py │ │ ├── modeling_transfo_xl.py │ │ ├── modeling_transfo_xl_utilities.py │ │ └── tokenization_transfo_xl.py │ ├── xlm │ │ ├── __init__.py │ │ ├── configuration_xlm.py │ │ ├── convert_xlm_original_pytorch_checkpoint_to_pytorch.py │ │ ├── modeling_tf_xlm.py │ │ ├── modeling_xlm.py │ │ └── tokenization_xlm.py │ ├── xlm_prophetnet │ │ ├── __init__.py │ │ ├── configuration_xlm_prophetnet.py │ │ ├── modeling_xlm_prophetnet.py │ │ └── tokenization_xlm_prophetnet.py │ ├── xlm_roberta │ │ ├── __init__.py │ │ ├── configuration_xlm_roberta.py │ │ ├── modeling_tf_xlm_roberta.py │ │ ├── modeling_xlm_roberta.py │ │ ├── tokenization_xlm_roberta.py │ │ └── tokenization_xlm_roberta_fast.py │ └── xlnet │ │ ├── __init__.py │ │ ├── configuration_xlnet.py │ │ ├── convert_xlnet_original_tf_checkpoint_to_pytorch.py │ │ ├── modeling_tf_xlnet.py │ │ ├── modeling_xlnet.py │ │ ├── tokenization_xlnet.py │ │ └── tokenization_xlnet_fast.py │ ├── optimization.py │ ├── optimization_tf.py │ ├── pipelines.py │ ├── testing_utils.py │ ├── tokenization_utils.py │ ├── tokenization_utils_base.py │ ├── tokenization_utils_fast.py │ ├── trainer.py │ ├── trainer_callback.py │ ├── trainer_pt_utils.py │ ├── trainer_tf.py │ ├── trainer_utils.py │ ├── training_args.py │ ├── training_args_tf.py │ └── utils │ ├── __init__.py │ ├── dummy_flax_objects.py │ ├── dummy_pt_objects.py │ ├── dummy_sentencepiece_objects.py │ ├── dummy_tf_objects.py │ ├── dummy_tokenizers_objects.py │ ├── hp_naming.py │ ├── logging.py │ ├── model_parallel_utils.py │ ├── notebook.py │ ├── sentencepiece_model_pb2.py │ └── versions.py ├── templates ├── adding_a_new_example_script │ ├── README.md │ ├── cookiecutter.json │ └── {{cookiecutter.directory_name}} │ │ └── run_{{cookiecutter.example_shortcut}}.py └── adding_a_new_model │ ├── README.md │ ├── cookiecutter-template-{{cookiecutter.modelname}} │ ├── __init__.py │ ├── configuration.json │ ├── configuration_{{cookiecutter.lowercase_modelname}}.py │ ├── modeling_tf_{{cookiecutter.lowercase_modelname}}.py │ ├── modeling_{{cookiecutter.lowercase_modelname}}.py │ ├── test_modeling_tf_{{cookiecutter.lowercase_modelname}}.py │ ├── test_modeling_{{cookiecutter.lowercase_modelname}}.py │ ├── to_replace_{{cookiecutter.lowercase_modelname}}.py │ ├── tokenization_fast_{{cookiecutter.lowercase_modelname}}.py │ ├── tokenization_{{cookiecutter.lowercase_modelname}}.py │ └── {{cookiecutter.lowercase_modelname}}.rst │ ├── cookiecutter.json │ └── tests │ ├── encoder-bert-tokenizer.json │ ├── pt-encoder-bert-tokenizer.json │ ├── standalone.json │ └── tf-encoder-bert-tokenizer.json ├── tests ├── __init__.py ├── conftest.py ├── fixtures │ ├── dummy-config.json │ ├── empty.txt │ ├── input.txt │ ├── sample_text.txt │ ├── sample_text_no_unicode.txt │ ├── spiece.model │ ├── test_sentencepiece.model │ ├── test_sentencepiece_no_bos.model │ └── tests_samples │ │ ├── .gitignore │ │ ├── GermEval │ │ ├── dev.txt │ │ ├── labels.txt │ │ └── train.txt │ │ ├── MRPC │ │ ├── dev.csv │ │ ├── dev.tsv │ │ ├── train.csv │ │ └── train.tsv │ │ ├── SQUAD │ │ └── sample.json │ │ ├── STS-B │ │ ├── dev.tsv │ │ └── train.tsv │ │ ├── conll │ │ └── sample.json │ │ └── wiki_text │ │ └── wiki_00 ├── test_activations.py ├── test_activations_tf.py ├── test_benchmark.py ├── test_benchmark_tf.py ├── test_cli.py ├── test_configuration_auto.py ├── test_configuration_common.py ├── test_data_collator.py ├── test_doc_samples.py ├── test_file_utils.py ├── test_flax_auto.py ├── test_generation_beam_search.py ├── test_generation_logits_process.py ├── test_generation_utils.py ├── test_hf_api.py ├── test_hf_argparser.py ├── test_logging.py ├── test_model_card.py ├── test_model_output.py ├── test_modeling_albert.py ├── test_modeling_auto.py ├── test_modeling_bart.py ├── test_modeling_bert.py ├── test_modeling_bert_generation.py ├── test_modeling_blenderbot.py ├── test_modeling_camembert.py ├── test_modeling_common.py ├── test_modeling_ctrl.py ├── test_modeling_deberta.py ├── test_modeling_distilbert.py ├── test_modeling_dpr.py ├── test_modeling_electra.py ├── test_modeling_encoder_decoder.py ├── test_modeling_flaubert.py ├── test_modeling_flax_bert.py ├── test_modeling_flax_roberta.py ├── test_modeling_fsmt.py ├── test_modeling_funnel.py ├── test_modeling_gpt2.py ├── test_modeling_layoutlm.py ├── test_modeling_longformer.py ├── test_modeling_lxmert.py ├── test_modeling_marian.py ├── test_modeling_mbart.py ├── test_modeling_mobilebert.py ├── test_modeling_mpnet.py ├── test_modeling_mt5.py ├── test_modeling_openai.py ├── test_modeling_pegasus.py ├── test_modeling_prophetnet.py ├── test_modeling_rag.py ├── test_modeling_reformer.py ├── test_modeling_roberta.py ├── test_modeling_squeezebert.py ├── test_modeling_t5.py ├── test_modeling_tf_albert.py ├── test_modeling_tf_auto.py ├── test_modeling_tf_bart.py ├── test_modeling_tf_bert.py ├── test_modeling_tf_blenderbot.py ├── test_modeling_tf_camembert.py ├── test_modeling_tf_common.py ├── test_modeling_tf_ctrl.py ├── test_modeling_tf_distilbert.py ├── test_modeling_tf_dpr.py ├── test_modeling_tf_electra.py ├── test_modeling_tf_flaubert.py ├── test_modeling_tf_funnel.py ├── test_modeling_tf_gpt2.py ├── test_modeling_tf_longformer.py ├── test_modeling_tf_lxmert.py ├── test_modeling_tf_marian.py ├── test_modeling_tf_mbart.py ├── test_modeling_tf_mobilebert.py ├── test_modeling_tf_mpnet.py ├── test_modeling_tf_mt5.py ├── test_modeling_tf_openai.py ├── test_modeling_tf_pegasus.py ├── test_modeling_tf_pytorch.py ├── test_modeling_tf_roberta.py ├── test_modeling_tf_t5.py ├── test_modeling_tf_transfo_xl.py ├── test_modeling_tf_xlm.py ├── test_modeling_tf_xlm_roberta.py ├── test_modeling_tf_xlnet.py ├── test_modeling_transfo_xl.py ├── test_modeling_xlm.py ├── test_modeling_xlm_prophetnet.py ├── test_modeling_xlm_roberta.py ├── test_modeling_xlnet.py ├── test_onnx.py ├── test_optimization.py ├── test_optimization_tf.py ├── test_pipelines_common.py ├── test_pipelines_conversational.py ├── test_pipelines_feature_extraction.py ├── test_pipelines_fill_mask.py ├── test_pipelines_ner.py ├── test_pipelines_question_answering.py ├── test_pipelines_sentiment_analysis.py ├── test_pipelines_summarization.py ├── test_pipelines_text2text_generation.py ├── test_pipelines_text_generation.py ├── test_pipelines_translation.py ├── test_pipelines_zero_shot.py ├── test_retrieval_rag.py ├── test_skip_decorators.py ├── test_tokenization_albert.py ├── test_tokenization_auto.py ├── test_tokenization_bart.py ├── test_tokenization_barthez.py ├── test_tokenization_bert.py ├── test_tokenization_bert_generation.py ├── test_tokenization_bert_japanese.py ├── test_tokenization_bertweet.py ├── test_tokenization_blenderbot.py ├── test_tokenization_camembert.py ├── test_tokenization_common.py ├── test_tokenization_ctrl.py ├── test_tokenization_deberta.py ├── test_tokenization_distilbert.py ├── test_tokenization_dpr.py ├── test_tokenization_fsmt.py ├── test_tokenization_funnel.py ├── test_tokenization_gpt2.py ├── test_tokenization_herbert.py ├── test_tokenization_layoutlm.py ├── test_tokenization_lxmert.py ├── test_tokenization_marian.py ├── test_tokenization_mbart.py ├── test_tokenization_mpnet.py ├── test_tokenization_openai.py ├── test_tokenization_pegasus.py ├── test_tokenization_phobert.py ├── test_tokenization_prophetnet.py ├── test_tokenization_rag.py ├── test_tokenization_reformer.py ├── test_tokenization_roberta.py ├── test_tokenization_squeezebert.py ├── test_tokenization_t5.py ├── test_tokenization_transfo_xl.py ├── test_tokenization_utils.py ├── test_tokenization_xlm.py ├── test_tokenization_xlm_prophetnet.py ├── test_tokenization_xlm_roberta.py ├── test_tokenization_xlnet.py ├── test_trainer.py ├── test_trainer_callback.py ├── test_trainer_distributed.py ├── test_trainer_tpu.py ├── test_trainer_utils.py ├── test_utils_check_copies.py └── test_versions_utils.py ├── utils ├── check_copies.py ├── check_dummies.py ├── check_repo.py ├── check_table.py ├── download_glue_data.py ├── get_modified_files.py ├── link_tester.py └── style_doc.py └── valohai.yaml /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source=transformers 3 | omit = 4 | # skip convertion scripts from testing for now 5 | */convert_* 6 | */__main__.py 7 | [report] 8 | exclude_lines = 9 | pragma: no cover 10 | raise 11 | except 12 | register_parameter -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/---new-benchmark.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F5A5 New benchmark" 3 | about: Benchmark a part of this library and share your results 4 | title: "[Benchmark]" 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | # 🖥 Benchmarking `transformers` 11 | 12 | ## Benchmark 13 | 14 | Which part of `transformers` did you benchmark? 15 | 16 | ## Set-up 17 | 18 | What did you run your benchmarks on? Please include details, such as: CPU, GPU? If using multiple GPUs, which parallelization did you use? 19 | 20 | ## Results 21 | 22 | Put your results here! 23 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/--new-model-addition.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F31F New model addition" 3 | about: Submit a proposal/request to implement a new Transformer-based model 4 | title: '' 5 | labels: New model 6 | assignees: '' 7 | 8 | --- 9 | 10 | # 🌟 New model addition 11 | 12 | ## Model description 13 | 14 | 15 | 16 | ## Open source status 17 | 18 | * [ ] the model implementation is available: (give details) 19 | * [ ] the model weights are available: (give details) 20 | * [ ] who are the authors: (mention them, if possible by @gh-username) 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F680 Feature request" 3 | about: Submit a proposal/request for a new transformers feature 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | # 🚀 Feature request 11 | 12 | 14 | 15 | ## Motivation 16 | 17 | 20 | 21 | ## Your contribution 22 | 23 | 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question-help.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "❓ Questions & Help" 3 | about: Post your general questions on the Hugging Face forum: https://discuss.huggingface.co/ 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | # ❓ Questions & Help 11 | 12 | 16 | 17 | ## Details 18 | 19 | 20 | 21 | 23 | 24 | **A link to original question on the forum**: 25 | 26 | -------------------------------------------------------------------------------- /.github/conda/build.sh: -------------------------------------------------------------------------------- 1 | $PYTHON setup.py install # Python command to install the script. 2 | -------------------------------------------------------------------------------- /.github/conda/meta.yaml: -------------------------------------------------------------------------------- 1 | {% set name = "transformers" %} 2 | 3 | package: 4 | name: "{{ name|lower }}" 5 | version: "{{ TRANSFORMERS_VERSION }}" 6 | 7 | source: 8 | path: ../../ 9 | 10 | build: 11 | noarch: python 12 | 13 | requirements: 14 | host: 15 | - python 16 | - pip 17 | - numpy 18 | - dataclasses 19 | - packaging 20 | - filelock 21 | - requests 22 | - tqdm >=4.27 23 | - sacremoses 24 | - regex !=2019.12.17 25 | - protobuf 26 | - tokenizers ==0.9.4 27 | run: 28 | - python 29 | - numpy 30 | - dataclasses 31 | - packaging 32 | - filelock 33 | - requests 34 | - tqdm >=4.27 35 | - sacremoses 36 | - regex !=2019.12.17 37 | - protobuf 38 | - tokenizers ==0.9.4 39 | 40 | test: 41 | imports: 42 | - transformers 43 | 44 | about: 45 | home: https://huggingface.co 46 | license: Apache License 2.0 47 | license_file: LICENSE 48 | summary: "🤗Transformers: State-of-the-art Natural Language Processing for Pytorch and TensorFlow 2.0." 49 | -------------------------------------------------------------------------------- /.github/stale.yml: -------------------------------------------------------------------------------- 1 | # Number of days of inactivity before an issue becomes stale 2 | daysUntilStale: 60 3 | # Number of days of inactivity before a stale issue is closed 4 | daysUntilClose: 7 5 | # Issues with these labels will never be considered stale 6 | exemptLabels: 7 | - pinned 8 | - security 9 | # Label to use when marking an issue as stale 10 | staleLabel: wontfix 11 | # Comment to post when marking an issue as stale. Set to `false` to disable 12 | markComment: > 13 | This issue has been automatically marked as stale because it has not had 14 | recent activity. It will be closed if no further activity occurs. Thank you 15 | for your contributions. 16 | # Comment to post when closing a stale issue. Set to `false` to disable 17 | closeComment: false -------------------------------------------------------------------------------- /.github/workflows/release-conda.yml: -------------------------------------------------------------------------------- 1 | name: Release - Conda 2 | 3 | on: 4 | push: 5 | tags: 6 | - v* 7 | 8 | env: 9 | ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} 10 | 11 | jobs: 12 | build_and_package: 13 | runs-on: ubuntu-latest 14 | defaults: 15 | run: 16 | shell: bash -l {0} 17 | 18 | steps: 19 | - name: Checkout repository 20 | uses: actions/checkout@v1 21 | 22 | - name: Install miniconda 23 | uses: conda-incubator/setup-miniconda@v2 24 | with: 25 | auto-update-conda: true 26 | auto-activate-base: false 27 | activate-environment: "build-transformers" 28 | channels: huggingface 29 | 30 | - name: Setup conda env 31 | run: | 32 | conda install -c defaults anaconda-client conda-build 33 | 34 | - name: Extract version 35 | run: echo "TRANSFORMERS_VERSION=`python setup.py --version`" >> $GITHUB_ENV 36 | 37 | - name: Build conda packages 38 | run: | 39 | conda info 40 | conda build .github/conda 41 | 42 | - name: Upload to Anaconda 43 | run: anaconda upload `conda build .github/conda --output` --force -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | -------------------------------------------------------------------------------- /docker/transformers-cpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | jupyter \ 18 | tensorflow-cpu \ 19 | torch 20 | 21 | WORKDIR /workspace 22 | COPY . transformers/ 23 | RUN cd transformers/ && \ 24 | python3 -m pip install --no-cache-dir . 25 | 26 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /docker/transformers-gpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | jupyter \ 18 | tensorflow \ 19 | torch 20 | 21 | RUN git clone https://github.com/NVIDIA/apex 22 | RUN cd apex && \ 23 | python3 setup.py install && \ 24 | pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./ 25 | 26 | WORKDIR /workspace 27 | COPY . transformers/ 28 | RUN cd transformers/ && \ 29 | python3 -m pip install --no-cache-dir . 30 | 31 | CMD ["/bin/bash"] 32 | -------------------------------------------------------------------------------- /docker/transformers-pytorch-cpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | jupyter \ 18 | torch 19 | 20 | WORKDIR /workspace 21 | COPY . transformers/ 22 | RUN cd transformers/ && \ 23 | python3 -m pip install --no-cache-dir . 24 | 25 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /docker/transformers-pytorch-gpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | mkl \ 18 | torch 19 | 20 | RUN git clone https://github.com/NVIDIA/apex 21 | RUN cd apex && \ 22 | python3 setup.py install && \ 23 | pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./ 24 | 25 | WORKDIR /workspace 26 | COPY . transformers/ 27 | RUN cd transformers/ && \ 28 | python3 -m pip install --no-cache-dir . 29 | 30 | CMD ["/bin/bash"] 31 | -------------------------------------------------------------------------------- /docker/transformers-pytorch-tpu/bert-base-cased.jsonnet: -------------------------------------------------------------------------------- 1 | local base = import 'templates/base.libsonnet'; 2 | local tpus = import 'templates/tpus.libsonnet'; 3 | local utils = import "templates/utils.libsonnet"; 4 | local volumes = import "templates/volumes.libsonnet"; 5 | 6 | local bertBaseCased = base.BaseTest { 7 | frameworkPrefix: "hf", 8 | modelName: "bert-base-cased", 9 | mode: "example", 10 | configMaps: [], 11 | 12 | timeout: 3600, # 1 hour, in seconds 13 | 14 | image: std.extVar('image'), 15 | imageTag: std.extVar('image-tag'), 16 | 17 | tpuSettings+: { 18 | softwareVersion: "pytorch-nightly", 19 | }, 20 | accelerator: tpus.v3_8, 21 | 22 | volumeMap+: { 23 | datasets: volumes.PersistentVolumeSpec { 24 | name: "huggingface-cluster-disk", 25 | mountPath: "/datasets", 26 | }, 27 | }, 28 | command: utils.scriptCommand( 29 | ||| 30 | python -m pytest -s transformers/examples/test_xla_examples.py -v 31 | test_exit_code=$? 32 | echo "\nFinished running commands.\n" 33 | test $test_exit_code -eq 0 34 | ||| 35 | ), 36 | }; 37 | 38 | bertBaseCased.oneshotJob 39 | -------------------------------------------------------------------------------- /docker/transformers-pytorch-tpu/dataset.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: PersistentVolume 3 | metadata: 4 | name: huggingface-cluster-disk 5 | spec: 6 | storageClassName: "" 7 | capacity: 8 | storage: 500Gi 9 | accessModes: 10 | - ReadOnlyMany 11 | claimRef: 12 | namespace: default 13 | name: huggingface-cluster-disk-claim 14 | gcePersistentDisk: 15 | pdName: huggingface-cluster-disk 16 | fsType: ext4 17 | readOnly: true 18 | --- 19 | apiVersion: v1 20 | kind: PersistentVolumeClaim 21 | metadata: 22 | name: huggingface-cluster-disk-claim 23 | spec: 24 | # Specify "" as the storageClassName so it matches the PersistentVolume's StorageClass. 25 | # A nil storageClassName value uses the default StorageClass. For details, see 26 | # https://kubernetes.io/docs/concepts/storage/persistent-volumes/#class-1 27 | storageClassName: "" 28 | accessModes: 29 | - ReadOnlyMany 30 | resources: 31 | requests: 32 | storage: 1Ki 33 | -------------------------------------------------------------------------------- /docker/transformers-pytorch-tpu/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | source ~/.bashrc 3 | echo "running docker-entrypoint.sh" 4 | conda activate container 5 | echo $KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS 6 | echo "printed TPU info" 7 | export XRT_TPU_CONFIG="tpu_worker;0;${KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS:7}" 8 | exec "$@"#!/bin/bash 9 | -------------------------------------------------------------------------------- /docker/transformers-tensorflow-cpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | mkl \ 18 | tensorflow-cpu 19 | 20 | WORKDIR /workspace 21 | COPY . transformers/ 22 | RUN cd transformers/ && \ 23 | python3 -m pip install --no-cache-dir . 24 | 25 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /docker/transformers-tensorflow-gpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | mkl \ 18 | tensorflow 19 | 20 | WORKDIR /workspace 21 | COPY . transformers/ 22 | RUN cd transformers/ && \ 23 | python3 -m pip install --no-cache-dir . 24 | 25 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = source 8 | BUILDDIR = _build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/source/_static/css/Calibre-Light.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/_static/css/Calibre-Light.ttf -------------------------------------------------------------------------------- /docs/source/_static/css/Calibre-Medium.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/_static/css/Calibre-Medium.otf -------------------------------------------------------------------------------- /docs/source/_static/css/Calibre-Regular.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/_static/css/Calibre-Regular.otf -------------------------------------------------------------------------------- /docs/source/_static/css/Calibre-Thin.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/_static/css/Calibre-Thin.otf -------------------------------------------------------------------------------- /docs/source/_static/css/code-snippets.css: -------------------------------------------------------------------------------- 1 | 2 | .highlight .c1, .highlight .sd{ 3 | color: #999 4 | } 5 | 6 | .highlight .nn, .highlight .k, .highlight .s1, .highlight .nb, .highlight .bp, .highlight .kc { 7 | color: #FB8D68; 8 | } 9 | 10 | .highlight .kn, .highlight .nv, .highlight .s2, .highlight .ow { 11 | color: #6670FF; 12 | } 13 | 14 | .highlight .gp { 15 | color: #FB8D68; 16 | } -------------------------------------------------------------------------------- /docs/source/contributing.md: -------------------------------------------------------------------------------- 1 | ../../CONTRIBUTING.md -------------------------------------------------------------------------------- /docs/source/examples.md: -------------------------------------------------------------------------------- 1 | ../../examples/README.md -------------------------------------------------------------------------------- /docs/source/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/favicon.ico -------------------------------------------------------------------------------- /docs/source/imgs/local_attention_mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/imgs/local_attention_mask.png -------------------------------------------------------------------------------- /docs/source/imgs/ppl_chunked.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/imgs/ppl_chunked.gif -------------------------------------------------------------------------------- /docs/source/imgs/ppl_full.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/imgs/ppl_full.gif -------------------------------------------------------------------------------- /docs/source/imgs/ppl_sliding.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/imgs/ppl_sliding.gif -------------------------------------------------------------------------------- /docs/source/imgs/transformers_logo_name.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/imgs/transformers_logo_name.png -------------------------------------------------------------------------------- /docs/source/imgs/warmup_constant_schedule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/imgs/warmup_constant_schedule.png -------------------------------------------------------------------------------- /docs/source/imgs/warmup_cosine_hard_restarts_schedule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/imgs/warmup_cosine_hard_restarts_schedule.png -------------------------------------------------------------------------------- /docs/source/imgs/warmup_cosine_schedule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/imgs/warmup_cosine_schedule.png -------------------------------------------------------------------------------- /docs/source/imgs/warmup_cosine_warm_restarts_schedule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/imgs/warmup_cosine_warm_restarts_schedule.png -------------------------------------------------------------------------------- /docs/source/imgs/warmup_linear_schedule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/imgs/warmup_linear_schedule.png -------------------------------------------------------------------------------- /docs/source/notebooks.md: -------------------------------------------------------------------------------- 1 | ../../notebooks/README.md -------------------------------------------------------------------------------- /examples/benchmarking/README.md: -------------------------------------------------------------------------------- 1 | # 🤗 Benchmark results 2 | 3 | Here, you can find a list of the different benchmark results created by the community. 4 | 5 | If you would like to list benchmark results on your favorite models of the [model hub](https://huggingface.co/models) here, please open a Pull Request and add it below. 6 | 7 | | Benchmark description | Results | Environment info | Author | 8 | |:----------|:-------------|:-------------|------:| 9 | | PyTorch Benchmark on inference for `bert-base-cased` |[memory](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/inference_memory.csv) | [env](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/env.csv) | [Partick von Platen](https://github.com/patrickvonplaten) | 10 | | PyTorch Benchmark on inference for `bert-base-cased` |[time](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/inference_time.csv) | [env](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/env.csv) | [Partick von Platen](https://github.com/patrickvonplaten) | 11 | -------------------------------------------------------------------------------- /examples/bert-loses-patience/pabee/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/examples/bert-loses-patience/pabee/__init__.py -------------------------------------------------------------------------------- /examples/conftest.py: -------------------------------------------------------------------------------- 1 | # tests directory-specific settings - this file is run automatically 2 | # by pytest before any tests are run 3 | 4 | import sys 5 | import warnings 6 | from os.path import abspath, dirname, join 7 | 8 | 9 | # allow having multiple repository checkouts and not needing to remember to rerun 10 | # 'pip install -e .[dev]' when switching between checkouts and running tests. 11 | git_repo_path = abspath(join(dirname(dirname(__file__)), "src")) 12 | sys.path.insert(1, git_repo_path) 13 | 14 | # silence FutureWarning warnings in tests since often we can't act on them until 15 | # they become normal warnings - i.e. the tests still need to test the current functionality 16 | warnings.simplefilter(action="ignore", category=FutureWarning) 17 | 18 | 19 | def pytest_addoption(parser): 20 | from transformers.testing_utils import pytest_addoption_shared 21 | 22 | pytest_addoption_shared(parser) 23 | 24 | 25 | def pytest_terminal_summary(terminalreporter): 26 | from transformers.testing_utils import pytest_terminal_summary_main 27 | 28 | make_reports = terminalreporter.config.getoption("--make-reports") 29 | if make_reports: 30 | pytest_terminal_summary_main(terminalreporter, id=make_reports) 31 | -------------------------------------------------------------------------------- /examples/contrib/README.md: -------------------------------------------------------------------------------- 1 | # Community contributed examples 2 | 3 | This folder contains examples which are not actively maintained (mostly contributed by the community). 4 | 5 | Using these examples together with a recent version of the library usually requires to make small (sometimes big) adaptations to get the scripts working. 6 | -------------------------------------------------------------------------------- /examples/contrib/mm-imdb/README.md: -------------------------------------------------------------------------------- 1 | ## MM-IMDb 2 | 3 | Based on the script [`run_mmimdb.py`](https://github.com/huggingface/transformers/blob/master/examples/contrib/mm-imdb/run_mmimdb.py). 4 | 5 | [MM-IMDb](http://lisi1.unal.edu.co/mmimdb/) is a Multimodal dataset with around 26,000 movies including images, plots and other metadata. 6 | 7 | ### Training on MM-IMDb 8 | 9 | ``` 10 | python run_mmimdb.py \ 11 | --data_dir /path/to/mmimdb/dataset/ \ 12 | --model_type bert \ 13 | --model_name_or_path bert-base-uncased \ 14 | --output_dir /path/to/save/dir/ \ 15 | --do_train \ 16 | --do_eval \ 17 | --max_seq_len 512 \ 18 | --gradient_accumulation_steps 20 \ 19 | --num_image_embeds 3 \ 20 | --num_train_epochs 100 \ 21 | --patience 5 22 | ``` 23 | 24 | -------------------------------------------------------------------------------- /examples/deebert/entropy_eval.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export CUDA_VISIBLE_DEVICES=0 3 | 4 | PATH_TO_DATA=/h/xinji/projects/GLUE 5 | 6 | MODEL_TYPE=bert # bert or roberta 7 | MODEL_SIZE=base # base or large 8 | DATASET=MRPC # SST-2, MRPC, RTE, QNLI, QQP, or MNLI 9 | 10 | MODEL_NAME=${MODEL_TYPE}-${MODEL_SIZE} 11 | if [ $MODEL_TYPE = 'bert' ] 12 | then 13 | MODEL_NAME=${MODEL_NAME}-uncased 14 | fi 15 | 16 | ENTROPIES="0 0.1 0.2 0.3 0.4 0.5 0.6 0.7" 17 | 18 | for ENTROPY in $ENTROPIES; do 19 | python -u run_glue_deebert.py \ 20 | --model_type $MODEL_TYPE \ 21 | --model_name_or_path ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \ 22 | --task_name $DATASET \ 23 | --do_eval \ 24 | --do_lower_case \ 25 | --data_dir $PATH_TO_DATA/$DATASET \ 26 | --output_dir ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \ 27 | --plot_data_dir ./results/ \ 28 | --max_seq_length 128 \ 29 | --early_exit_entropy $ENTROPY \ 30 | --eval_highway \ 31 | --overwrite_cache \ 32 | --per_gpu_eval_batch_size=1 33 | done 34 | -------------------------------------------------------------------------------- /examples/deebert/eval_deebert.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export CUDA_VISIBLE_DEVICES=0 3 | 4 | PATH_TO_DATA=/h/xinji/projects/GLUE 5 | 6 | MODEL_TYPE=bert # bert or roberta 7 | MODEL_SIZE=base # base or large 8 | DATASET=MRPC # SST-2, MRPC, RTE, QNLI, QQP, or MNLI 9 | 10 | MODEL_NAME=${MODEL_TYPE}-${MODEL_SIZE} 11 | if [ $MODEL_TYPE = 'bert' ] 12 | then 13 | MODEL_NAME=${MODEL_NAME}-uncased 14 | fi 15 | 16 | 17 | python -u run_glue_deebert.py \ 18 | --model_type $MODEL_TYPE \ 19 | --model_name_or_path ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \ 20 | --task_name $DATASET \ 21 | --do_eval \ 22 | --do_lower_case \ 23 | --data_dir $PATH_TO_DATA/$DATASET \ 24 | --output_dir ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \ 25 | --plot_data_dir ./results/ \ 26 | --max_seq_length 128 \ 27 | --eval_each_highway \ 28 | --eval_highway \ 29 | --overwrite_cache \ 30 | --per_gpu_eval_batch_size=1 31 | -------------------------------------------------------------------------------- /examples/deebert/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/examples/deebert/src/__init__.py -------------------------------------------------------------------------------- /examples/deebert/train_deebert.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export CUDA_VISIBLE_DEVICES=0 3 | 4 | PATH_TO_DATA=/h/xinji/projects/GLUE 5 | 6 | MODEL_TYPE=bert # bert or roberta 7 | MODEL_SIZE=base # base or large 8 | DATASET=MRPC # SST-2, MRPC, RTE, QNLI, QQP, or MNLI 9 | 10 | MODEL_NAME=${MODEL_TYPE}-${MODEL_SIZE} 11 | EPOCHS=10 12 | if [ $MODEL_TYPE = 'bert' ] 13 | then 14 | EPOCHS=3 15 | MODEL_NAME=${MODEL_NAME}-uncased 16 | fi 17 | 18 | 19 | python -u run_glue_deebert.py \ 20 | --model_type $MODEL_TYPE \ 21 | --model_name_or_path $MODEL_NAME \ 22 | --task_name $DATASET \ 23 | --do_train \ 24 | --do_eval \ 25 | --do_lower_case \ 26 | --data_dir $PATH_TO_DATA/$DATASET \ 27 | --max_seq_length 128 \ 28 | --per_gpu_eval_batch_size=1 \ 29 | --per_gpu_train_batch_size=8 \ 30 | --learning_rate 2e-5 \ 31 | --num_train_epochs $EPOCHS \ 32 | --overwrite_output_dir \ 33 | --seed 42 \ 34 | --output_dir ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \ 35 | --plot_data_dir ./results/ \ 36 | --save_steps 0 \ 37 | --overwrite_cache \ 38 | --eval_after_first_stage 39 | -------------------------------------------------------------------------------- /examples/distillation/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers 2 | 3 | gitpython==3.0.2 4 | tensorboard>=1.14.0 5 | tensorboardX==1.8 6 | psutil==5.6.6 7 | scipy>=1.4.1 8 | -------------------------------------------------------------------------------- /examples/distillation/training_configs/distilbert-base-cased.json: -------------------------------------------------------------------------------- 1 | { 2 | "activation": "gelu", 3 | "attention_dropout": 0.1, 4 | "dim": 768, 5 | "dropout": 0.1, 6 | "hidden_dim": 3072, 7 | "initializer_range": 0.02, 8 | "max_position_embeddings": 512, 9 | "n_heads": 12, 10 | "n_layers": 6, 11 | "sinusoidal_pos_embds": true, 12 | "tie_weights_": true, 13 | "vocab_size": 28996 14 | } 15 | -------------------------------------------------------------------------------- /examples/distillation/training_configs/distilbert-base-multilingual-cased.json: -------------------------------------------------------------------------------- 1 | { 2 | "activation": "gelu", 3 | "attention_dropout": 0.1, 4 | "dim": 768, 5 | "dropout": 0.1, 6 | "hidden_dim": 3072, 7 | "initializer_range": 0.02, 8 | "max_position_embeddings": 512, 9 | "n_heads": 12, 10 | "n_layers": 6, 11 | "sinusoidal_pos_embds": true, 12 | "tie_weights_": true, 13 | "vocab_size": 119547 14 | } 15 | -------------------------------------------------------------------------------- /examples/distillation/training_configs/distilbert-base-uncased.json: -------------------------------------------------------------------------------- 1 | { 2 | "activation": "gelu", 3 | "attention_dropout": 0.1, 4 | "dim": 768, 5 | "dropout": 0.1, 6 | "hidden_dim": 3072, 7 | "initializer_range": 0.02, 8 | "max_position_embeddings": 512, 9 | "n_heads": 12, 10 | "n_layers": 6, 11 | "sinusoidal_pos_embds": true, 12 | "tie_weights_": true, 13 | "vocab_size": 30522 14 | } 15 | -------------------------------------------------------------------------------- /examples/distillation/training_configs/distilgpt2.json: -------------------------------------------------------------------------------- 1 | { 2 | "initializer_range": 0.02, 3 | "layer_norm_epsilon": 0.00001, 4 | "n_ctx": 1024, 5 | "n_embd": 768, 6 | "n_head": 12, 7 | "n_layer": 6, 8 | "n_positions": 1024, 9 | "vocab_size": 50257 10 | } -------------------------------------------------------------------------------- /examples/distillation/training_configs/distilroberta-base.json: -------------------------------------------------------------------------------- 1 | { 2 | "vocab_size": 50265, 3 | "hidden_size": 768, 4 | "num_hidden_layers": 6, 5 | "num_attention_heads": 12, 6 | "intermediate_size": 3072, 7 | "hidden_act": "gelu", 8 | "hidden_dropout_prob": 0.1, 9 | "attention_probs_dropout_prob": 0.1, 10 | "max_position_embeddings": 514, 11 | "type_vocab_size": 1, 12 | "initializer_range": 0.02, 13 | "layer_norm_eps": 0.00001 14 | } -------------------------------------------------------------------------------- /examples/longform-qa/README.md: -------------------------------------------------------------------------------- 1 | # Long Form Question Answering 2 | 3 | This folder contains the code for the Long Form Question answering [demo](http://35.226.96.115:8080/) as well as methods to train and use a fully end-to-end Long Form Question Answering system using the [🤗transformers](https://github.com/huggingface/transformers) and [🤗datasets](https://github.com/huggingface/datasets) libraries. 4 | 5 | You can use these methods to train your own system by following along the associate [notebook](https://github.com/huggingface/notebooks/blob/master/longform-qa/Long_Form_Question_Answering_with_ELI5_and_Wikipedia.ipynb) or [blog post](https://yjernite.github.io/lfqa.html). 6 | -------------------------------------------------------------------------------- /examples/lxmert/README.md: -------------------------------------------------------------------------------- 1 | # LXMERT DEMO 2 | 3 | 1. make a virtualenv: ``virtualenv venv`` and activate ``source venv/bin/activate`` 4 | 2. install reqs: ``pip install -r ./requirements.txt`` 5 | 3. usage is as shown in demo.ipynb 6 | -------------------------------------------------------------------------------- /examples/movement-pruning/emmental/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .configuration_bert_masked import MaskedBertConfig 3 | from .modeling_bert_masked import ( 4 | MaskedBertForMultipleChoice, 5 | MaskedBertForQuestionAnswering, 6 | MaskedBertForSequenceClassification, 7 | MaskedBertForTokenClassification, 8 | MaskedBertModel, 9 | ) 10 | from .modules import * 11 | -------------------------------------------------------------------------------- /examples/movement-pruning/emmental/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .binarizer import MagnitudeBinarizer, ThresholdBinarizer, TopKBinarizer 3 | from .masked_nn import MaskedLinear 4 | -------------------------------------------------------------------------------- /examples/movement-pruning/requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=1.4.0 2 | -e git+https://github.com/huggingface/transformers.git@352d5472b0c1dec0f420d606d16747d851b4bda8#egg=transformers 3 | knockknock>=0.1.8.1 4 | h5py>=2.10.0 5 | numpy>=1.18.2 6 | scipy>=1.4.1 7 | -------------------------------------------------------------------------------- /examples/rag/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | 5 | sys.path.insert(1, os.path.dirname(os.path.realpath(__file__))) 6 | -------------------------------------------------------------------------------- /examples/rag/finetune_rag.sh: -------------------------------------------------------------------------------- 1 | # Add parent directory to python path to access lightning_base.py 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | 4 | # A sample finetuning run, you need to specify data_dir, output_dir and model_name_or_path 5 | # run ./examples/rag/finetune.sh --help to see all the possible options 6 | 7 | python examples/rag/finetune_rag.py \ 8 | --data_dir $DATA_DIR \ 9 | --output_dir $OUTPUT_DIR \ 10 | --model_name_or_path $MODEL_NAME_OR_PATH \ 11 | --model_type rag_sequence \ 12 | --fp16 \ 13 | --gpus 8 \ 14 | --do_train \ 15 | --do_predict \ 16 | --n_val -1 \ 17 | --val_check_interval 0.25 \ 18 | --train_batch_size 8 \ 19 | --eval_batch_size 1 \ 20 | --max_source_length 128 \ 21 | --max_target_length 25 \ 22 | --val_max_target_length 25 \ 23 | --test_max_target_length 25 \ 24 | --label_smoothing 0.1 \ 25 | --dropout 0.1 \ 26 | --attention_dropout 0.1 \ 27 | --weight_decay 0.001 \ 28 | --adam_epsilon 1e-08 \ 29 | --max_grad_norm 0.1 \ 30 | --lr_scheduler polynomial \ 31 | --learning_rate 3e-05 \ 32 | --num_train_epochs 100 \ 33 | --warmup_steps 500 \ 34 | --gradient_accumulation_steps 1 -------------------------------------------------------------------------------- /examples/rag/requirements.txt: -------------------------------------------------------------------------------- 1 | faiss-cpu >= 1.6.3 2 | datasets >= 1.0.1 3 | psutil >= 5.7.0 4 | torch >= 1.4.0 -------------------------------------------------------------------------------- /examples/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorboard 2 | scikit-learn 3 | seqeval 4 | psutil 5 | sacrebleu 6 | rouge-score 7 | tensorflow_datasets 8 | pytorch-lightning==1.0.4 9 | matplotlib 10 | git-python==1.0.3 11 | faiss-cpu 12 | streamlit 13 | elasticsearch 14 | nltk 15 | pandas 16 | datasets >= 1.1.3 17 | fire 18 | pytest 19 | conllu 20 | sentencepiece != 0.1.92 21 | protobuf 22 | -------------------------------------------------------------------------------- /examples/seq2seq/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | 5 | sys.path.insert(1, os.path.dirname(os.path.realpath(__file__))) 6 | -------------------------------------------------------------------------------- /examples/seq2seq/bertabs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/examples/seq2seq/bertabs/__init__.py -------------------------------------------------------------------------------- /examples/seq2seq/bertabs/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers 2 | 3 | # For ROUGE 4 | nltk 5 | py-rouge 6 | -------------------------------------------------------------------------------- /examples/seq2seq/builtin_trainer/finetune.sh: -------------------------------------------------------------------------------- 1 | # the proper usage is documented in the README, you need to specify data_dir, output_dir and model_name_or_path 2 | # run ./builtin_trainer/finetune.sh --help to see all the possible options 3 | python finetune_trainer.py \ 4 | --learning_rate=3e-5 \ 5 | --fp16 \ 6 | --do_train --do_eval --do_predict \ 7 | --evaluation_strategy steps \ 8 | --predict_with_generate \ 9 | --n_val 1000 \ 10 | "$@" 11 | -------------------------------------------------------------------------------- /examples/seq2seq/builtin_trainer/finetune_tpu.sh: -------------------------------------------------------------------------------- 1 | export TPU_NUM_CORES=8 2 | 3 | # the proper usage is documented in the README, you need to specify data_dir, output_dir and model_name_or_path 4 | # run ./builtin_trainer/finetune_tpu.sh --help to see all the possible options 5 | python xla_spawn.py --num_cores $TPU_NUM_CORES \ 6 | finetune_trainer.py \ 7 | --learning_rate=3e-5 \ 8 | --do_train --do_eval \ 9 | --evaluation_strategy steps \ 10 | --prediction_loss_only \ 11 | --n_val 1000 \ 12 | "$@" 13 | -------------------------------------------------------------------------------- /examples/seq2seq/builtin_trainer/train_distil_marian_enro.sh: -------------------------------------------------------------------------------- 1 | export WANDB_PROJECT=distil-marian 2 | export BS=64 3 | export GAS=1 4 | export m=sshleifer/student_marian_en_ro_6_3 5 | export MAX_LEN=128 6 | python finetune_trainer.py \ 7 | --tokenizer_name $m --model_name_or_path $m \ 8 | --data_dir $ENRO_DIR \ 9 | --output_dir marian_en_ro_6_3 --overwrite_output_dir \ 10 | --learning_rate=3e-4 \ 11 | --warmup_steps 500 --sortish_sampler \ 12 | --fp16 \ 13 | --gradient_accumulation_steps=$GAS \ 14 | --per_device_train_batch_size=$BS --per_device_eval_batch_size=$BS \ 15 | --freeze_encoder --freeze_embeds \ 16 | --num_train_epochs=6 \ 17 | --save_steps 3000 --eval_steps 3000 \ 18 | --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \ 19 | --do_train --do_eval --do_predict \ 20 | --evaluation_strategy steps \ 21 | --predict_with_generate --logging_first_step \ 22 | --task translation --label_smoothing 0.1 \ 23 | "$@" 24 | -------------------------------------------------------------------------------- /examples/seq2seq/builtin_trainer/train_distil_marian_enro_tpu.sh: -------------------------------------------------------------------------------- 1 | export WANDB_PROJECT=distil-marian 2 | export BS=64 3 | export m=sshleifer/student_marian_en_ro_6_3 4 | export MAX_LEN=128 5 | export TPU_NUM_CORES=8 6 | 7 | python xla_spawn.py --num_cores $TPU_NUM_CORES \ 8 | finetune_trainer.py \ 9 | --tokenizer_name $m --model_name_or_path $m \ 10 | --data_dir $ENRO_DIR \ 11 | --output_dir marian_en_ro_6_3 --overwrite_output_dir \ 12 | --learning_rate=3e-4 \ 13 | --warmup_steps 500 \ 14 | --per_device_train_batch_size=$BS --per_device_eval_batch_size=$BS \ 15 | --freeze_encoder --freeze_embeds \ 16 | --num_train_epochs=6 \ 17 | --save_steps 500 --eval_steps 500 \ 18 | --logging_first_step --logging_steps 200 \ 19 | --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \ 20 | --do_train --do_eval \ 21 | --evaluation_strategy steps \ 22 | --prediction_loss_only \ 23 | --task translation --label_smoothing 0.1 \ 24 | "$@" 25 | -------------------------------------------------------------------------------- /examples/seq2seq/builtin_trainer/train_distilbart_cnn.sh: -------------------------------------------------------------------------------- 1 | export WANDB_PROJECT=distilbart-trainer 2 | export BS=32 3 | export m=sshleifer/student_cnn_12_6 4 | export tok=facebook/bart-large 5 | export MAX_TGT_LEN=142 6 | 7 | python finetune_trainer.py \ 8 | --model_name_or_path $m --tokenizer_name $tok \ 9 | --data_dir cnn_dm \ 10 | --output_dir distilbart-cnn-12-6 --overwrite_output_dir \ 11 | --learning_rate=3e-5 \ 12 | --warmup_steps 500 --sortish_sampler \ 13 | --fp16 \ 14 | --n_val 500 \ 15 | --gradient_accumulation_steps=1 \ 16 | --per_device_train_batch_size=$BS --per_device_eval_batch_size=$BS \ 17 | --freeze_encoder --freeze_embeds \ 18 | --num_train_epochs=2 \ 19 | --save_steps 3000 --eval_steps 3000 \ 20 | --logging_first_step \ 21 | --max_target_length 56 --val_max_target_length $MAX_TGT_LEN --test_max_target_length $MAX_TGT_LEN \ 22 | --do_train --do_eval --do_predict \ 23 | --evaluation_strategy steps \ 24 | --predict_with_generate --sortish_sampler \ 25 | "$@" 26 | -------------------------------------------------------------------------------- /examples/seq2seq/builtin_trainer/train_mbart_cc25_enro.sh: -------------------------------------------------------------------------------- 1 | python finetune_trainer.py \ 2 | --model_name_or_path=facebook/mbart-large-cc25 \ 3 | --data_dir $ENRO_DIR \ 4 | --output_dir mbart_cc25_enro --overwrite_output_dir \ 5 | --learning_rate=3e-5 \ 6 | --warmup_steps 500 \ 7 | --fp16 \ 8 | --label_smoothing 0.1 \ 9 | --adam_eps 1e-06 \ 10 | --src_lang en_XX --tgt_lang ro_RO \ 11 | --freeze_embeds \ 12 | --per_device_train_batch_size=4 --per_device_eval_batch_size=4 \ 13 | --max_source_length 128 --max_target_length 128 \ 14 | --val_max_target_length 128 --test_max_target_length 128 \ 15 | --sortish_sampler \ 16 | --num_train_epochs 6 \ 17 | --save_steps 25000 --eval_steps 25000 --logging_steps 1000 \ 18 | --do_train --do_eval --do_predict \ 19 | --evaluation_strategy steps \ 20 | --predict_with_generate --logging_first_step \ 21 | --task translation \ 22 | "$@" 23 | -------------------------------------------------------------------------------- /examples/seq2seq/convert_model_to_fp16.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from typing import Union 4 | 5 | import fire 6 | import torch 7 | from tqdm import tqdm 8 | 9 | 10 | def convert(src_path: str, map_location: str = "cpu", save_path: Union[str, None] = None) -> None: 11 | """Convert a pytorch_model.bin or model.pt file to torch.float16 for faster downloads, less disk space.""" 12 | state_dict = torch.load(src_path, map_location=map_location) 13 | for k, v in tqdm(state_dict.items()): 14 | if not isinstance(v, torch.Tensor): 15 | raise TypeError("FP16 conversion only works on paths that are saved state dicts, like pytorch_model.bin") 16 | state_dict[k] = v.half() 17 | if save_path is None: # overwrite src_path 18 | save_path = src_path 19 | torch.save(state_dict, save_path) 20 | 21 | 22 | if __name__ == "__main__": 23 | fire.Fire(convert) 24 | -------------------------------------------------------------------------------- /examples/seq2seq/distil_marian_enro_teacher.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | export WANDB_PROJECT=dmar 4 | # export MAX_LEN=128 5 | python distillation.py \ 6 | --learning_rate=3e-4 \ 7 | --do_train \ 8 | --fp16 \ 9 | --val_check_interval 0.25 \ 10 | --teacher Helsinki-NLP/opus-mt-en-ro \ 11 | --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \ 12 | --student_decoder_layers 3 --student_encoder_layers 6 \ 13 | --freeze_encoder --freeze_embeds \ 14 | --model_name_or_path IGNORED \ 15 | --alpha_hid=3. \ 16 | --train_batch_size=$BS --eval_batch_size=$BS \ 17 | --tokenizer_name Helsinki-NLP/opus-mt-en-ro \ 18 | --warmup_steps 500 --logger_name wandb \ 19 | --fp16_opt_level O1 --task translation --normalize_hidden --num_sanity_val_steps=0 \ 20 | "$@" 21 | -------------------------------------------------------------------------------- /examples/seq2seq/distil_marian_no_teacher.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | export WANDB_PROJECT=dmar 4 | python distillation.py \ 5 | --learning_rate=3e-4 \ 6 | --do_train \ 7 | --do_predict \ 8 | --fp16 --no_teacher \ 9 | --val_check_interval 0.25 \ 10 | --data_dir $ENRO_DIR \ 11 | --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \ 12 | --freeze_encoder --freeze_embeds \ 13 | --train_batch_size=$BS --eval_batch_size=$BS \ 14 | --tokenizer_name $m --model_name_or_path $m \ 15 | --warmup_steps 500 --sortish_sampler --logger_name wandb \ 16 | --gpus 1 --fp16_opt_level=O1 --task translation --num_sanity_val_steps=0 \ 17 | "$@" 18 | -------------------------------------------------------------------------------- /examples/seq2seq/dynamic_bs_example.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | export WANDB_PROJECT=dmar 4 | export MAX_LEN=128 5 | export m=sshleifer/student_marian_en_ro_6_1 6 | python finetune.py \ 7 | --learning_rate=3e-4 \ 8 | --do_train \ 9 | --fp16 \ 10 | --data_dir wmt_en_ro \ 11 | --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \ 12 | --freeze_encoder --freeze_embeds \ 13 | --train_batch_size=48 --eval_batch_size=64 \ 14 | --tokenizer_name $m --model_name_or_path $m --num_train_epochs=1 \ 15 | --warmup_steps 500 --logger_name wandb --gpus 1 \ 16 | --fp16_opt_level=O1 --task translation \ 17 | "$@" 18 | -------------------------------------------------------------------------------- /examples/seq2seq/finetune.sh: -------------------------------------------------------------------------------- 1 | # the proper usage is documented in the README, you need to specify data_dir, output_dir and model_name_or_path 2 | # run ./finetune.sh --help to see all the possible options 3 | python finetune.py \ 4 | --learning_rate=3e-5 \ 5 | --fp16 \ 6 | --gpus 1 \ 7 | --do_train \ 8 | --do_predict \ 9 | --n_val 1000 \ 10 | --val_check_interval 0.1 \ 11 | "$@" 12 | -------------------------------------------------------------------------------- /examples/seq2seq/finetune_bart_tiny.sh: -------------------------------------------------------------------------------- 1 | # Script for verifying that run_bart_sum can be invoked from its directory 2 | 3 | # Get tiny dataset with cnn_dm format (4 examples for train, val, test) 4 | wget https://cdn-datasets.huggingface.co/summarization/cnn_tiny.tgz 5 | tar -xzvf cnn_tiny.tgz 6 | rm cnn_tiny.tgz 7 | 8 | export OUTPUT_DIR_NAME=bart_utest_output 9 | export CURRENT_DIR=${PWD} 10 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME} 11 | 12 | # Make output directory if it doesn't exist 13 | mkdir -p $OUTPUT_DIR 14 | 15 | # Add parent directory to python path to access lightning_base.py and testing_utils.py 16 | export PYTHONPATH="../":"${PYTHONPATH}" 17 | python finetune.py \ 18 | --data_dir=cnn_tiny/ \ 19 | --model_name_or_path=sshleifer/bart-tiny-random \ 20 | --learning_rate=3e-5 \ 21 | --train_batch_size=2 \ 22 | --eval_batch_size=2 \ 23 | --output_dir=$OUTPUT_DIR \ 24 | --num_train_epochs=1 \ 25 | --gpus=0 \ 26 | --do_train "$@" 27 | 28 | rm -rf cnn_tiny 29 | rm -rf $OUTPUT_DIR 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /examples/seq2seq/finetune_pegasus_xsum.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | 4 | # From appendix C of paper https://arxiv.org/abs/1912.08777 5 | # Set --gradient_accumulation_steps so that effective batch size is 256 (2*128, 4*64, 8*32, 16*16) 6 | python finetune.py \ 7 | --learning_rate=1e-4 \ 8 | --do_train \ 9 | --do_predict \ 10 | --n_val 1000 \ 11 | --val_check_interval 0.25 \ 12 | --max_source_length 512 --max_target_length 56 \ 13 | --freeze_embeds --label_smoothing 0.1 --adafactor --task summarization_xsum \ 14 | "$@" 15 | -------------------------------------------------------------------------------- /examples/seq2seq/finetune_t5.sh: -------------------------------------------------------------------------------- 1 | # Add parent directory to python path to access lightning_base.py 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | 4 | python finetune.py \ 5 | --data_dir=$CNN_DIR \ 6 | --learning_rate=3e-5 \ 7 | --train_batch_size=$BS \ 8 | --eval_batch_size=$BS \ 9 | --output_dir=$OUTPUT_DIR \ 10 | --max_source_length=512 \ 11 | --max_target_length=56 \ 12 | --val_check_interval=0.1 --n_val=200 \ 13 | --do_train --do_predict \ 14 | "$@" 15 | -------------------------------------------------------------------------------- /examples/seq2seq/minify_dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from pathlib import Path 4 | 5 | import fire 6 | 7 | 8 | def minify(src_dir: str, dest_dir: str, n: int): 9 | """Write first n lines of each file f in src_dir to dest_dir/f """ 10 | src_dir = Path(src_dir) 11 | dest_dir = Path(dest_dir) 12 | dest_dir.mkdir(exist_ok=True) 13 | for path in src_dir.iterdir(): 14 | new = [x.rstrip() for x in list(path.open().readlines())][:n] 15 | dest_path = dest_dir.joinpath(path.name) 16 | print(dest_path) 17 | dest_path.open("w").write("\n".join(new)) 18 | 19 | 20 | if __name__ == "__main__": 21 | fire.Fire(minify) 22 | -------------------------------------------------------------------------------- /examples/seq2seq/rouge_cli.py: -------------------------------------------------------------------------------- 1 | import fire 2 | 3 | from utils import calculate_rouge, save_json 4 | 5 | 6 | def calculate_rouge_path(pred_path, tgt_path, save_path=None, **kwargs): 7 | """Kwargs will be passed to calculate_rouge""" 8 | pred_lns = [x.strip() for x in open(pred_path).readlines()] 9 | tgt_lns = [x.strip() for x in open(tgt_path).readlines()][: len(pred_lns)] 10 | metrics = calculate_rouge(pred_lns, tgt_lns, **kwargs) 11 | if save_path is not None: 12 | save_json(metrics, save_path, indent=None) 13 | return metrics # these print nicely 14 | 15 | 16 | if __name__ == "__main__": 17 | fire.Fire(calculate_rouge_path) 18 | -------------------------------------------------------------------------------- /examples/seq2seq/save_randomly_initialized_model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import fire 4 | 5 | from transformers import AutoConfig, AutoModelForSeq2SeqLM, AutoTokenizer 6 | 7 | 8 | def save_randomly_initialized_version(config_name: str, save_dir: str, **config_kwargs): 9 | """Save a randomly initialized version of a model using a pretrained config. 10 | Args: 11 | config_name: which config to use 12 | save_dir: where to save the resulting model and tokenizer 13 | config_kwargs: Passed to AutoConfig 14 | 15 | Usage:: 16 | save_randomly_initialized_version("facebook/bart-large-cnn", "distilbart_random_cnn_6_3", encoder_layers=6, decoder_layers=3, num_beams=3) 17 | """ 18 | cfg = AutoConfig.from_pretrained(config_name, **config_kwargs) 19 | model = AutoModelForSeq2SeqLM.from_config(cfg) 20 | model.save_pretrained(save_dir) 21 | AutoTokenizer.from_pretrained(config_name).save_pretrained(save_dir) 22 | return model 23 | 24 | 25 | if __name__ == "__main__": 26 | fire.Fire(save_randomly_initialized_version) 27 | -------------------------------------------------------------------------------- /examples/seq2seq/sentence_splitter.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from filelock import FileLock 4 | 5 | 6 | try: 7 | import nltk 8 | 9 | NLTK_AVAILABLE = True 10 | except (ImportError, ModuleNotFoundError): 11 | NLTK_AVAILABLE = False 12 | 13 | if NLTK_AVAILABLE: 14 | with FileLock(".lock") as lock: 15 | nltk.download("punkt", quiet=True) 16 | 17 | 18 | def add_newline_to_end_of_each_sentence(x: str) -> str: 19 | """This was added to get rougeLsum scores matching published rougeL scores for BART and PEGASUS.""" 20 | re.sub("", "", x) # remove pegasus newline char 21 | assert NLTK_AVAILABLE, "nltk must be installed to separate newlines between sentences. (pip install nltk)" 22 | return "\n".join(nltk.sent_tokenize(x)) 23 | -------------------------------------------------------------------------------- /examples/seq2seq/test_data/fsmt/build-eval-data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import io 4 | import json 5 | import subprocess 6 | 7 | 8 | pairs = [ 9 | ["en", "ru"], 10 | ["ru", "en"], 11 | ["en", "de"], 12 | ["de", "en"], 13 | ] 14 | 15 | n_objs = 8 16 | 17 | 18 | def get_all_data(pairs, n_objs): 19 | text = {} 20 | for src, tgt in pairs: 21 | pair = f"{src}-{tgt}" 22 | cmd = f"sacrebleu -t wmt19 -l {pair} --echo src".split() 23 | src_lines = subprocess.run(cmd, stdout=subprocess.PIPE).stdout.decode("utf-8").splitlines() 24 | cmd = f"sacrebleu -t wmt19 -l {pair} --echo ref".split() 25 | tgt_lines = subprocess.run(cmd, stdout=subprocess.PIPE).stdout.decode("utf-8").splitlines() 26 | text[pair] = {"src": src_lines[:n_objs], "tgt": tgt_lines[:n_objs]} 27 | return text 28 | 29 | 30 | text = get_all_data(pairs, n_objs) 31 | filename = "./fsmt_val_data.json" 32 | with io.open(filename, "w", encoding="utf-8") as f: 33 | bleu_data = json.dump(text, f, indent=2, ensure_ascii=False) 34 | -------------------------------------------------------------------------------- /examples/seq2seq/test_data/wmt_en_ro/train.len: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/examples/seq2seq/test_data/wmt_en_ro/train.len -------------------------------------------------------------------------------- /examples/seq2seq/test_data/wmt_en_ro/val.len: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/examples/seq2seq/test_data/wmt_en_ro/val.len -------------------------------------------------------------------------------- /examples/seq2seq/test_tatoeba_conversion.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | import unittest 4 | 5 | from transformers.file_utils import cached_property 6 | from transformers.models.marian.convert_marian_tatoeba_to_pytorch import DEFAULT_REPO, TatoebaConverter 7 | from transformers.testing_utils import require_torch_non_multi_gpu_but_fix_me, slow 8 | 9 | 10 | @unittest.skipUnless(os.path.exists(DEFAULT_REPO), "Tatoeba directory does not exist.") 11 | class TatoebaConversionTester(unittest.TestCase): 12 | @cached_property 13 | def resolver(self): 14 | tmp_dir = tempfile.mkdtemp() 15 | return TatoebaConverter(save_dir=tmp_dir) 16 | 17 | @slow 18 | @require_torch_non_multi_gpu_but_fix_me 19 | def test_resolver(self): 20 | self.resolver.convert_models(["heb-eng"]) 21 | 22 | @slow 23 | @require_torch_non_multi_gpu_but_fix_me 24 | def test_model_card(self): 25 | content, mmeta = self.resolver.write_model_card("opus-mt-he-en", dry_run=True) 26 | assert mmeta["long_pair"] == "heb-eng" 27 | -------------------------------------------------------------------------------- /examples/seq2seq/train_distilbart_cnn.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | 4 | export BS=32 5 | export GAS=1 6 | 7 | python finetune.py \ 8 | --learning_rate=3e-5 \ 9 | --fp16 \ 10 | --gpus 1 \ 11 | --do_train \ 12 | --do_predict \ 13 | --val_check_interval 0.25 \ 14 | --n_val 500 \ 15 | --num_train_epochs 2 \ 16 | --freeze_encoder --freeze_embeds --data_dir cnn_dm \ 17 | --max_target_length 142 --val_max_target_length=142 \ 18 | --train_batch_size=$BS --eval_batch_size=$BS --gradient_accumulation_steps=$GAS \ 19 | --model_name_or_path sshleifer/student_cnn_12_6 \ 20 | --tokenizer_name facebook/bart-large \ 21 | --warmup_steps 500 \ 22 | --output_dir distilbart-cnn-12-6 \ 23 | "$@" 24 | 25 | -------------------------------------------------------------------------------- /examples/seq2seq/train_distilbart_xsum.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | python distillation.py \ 4 | --teacher facebook/bart-large-xsum --data_dir xsum \ 5 | --tokenizer_name facebook/bart-large-xsum \ 6 | --student_decoder_layers 6 --student_encoder_layers 12 \ 7 | --freeze_encoder --freeze_embeds \ 8 | --learning_rate=3e-4 \ 9 | --do_train \ 10 | --do_predict \ 11 | --fp16 --fp16_opt_level=O1 \ 12 | --val_check_interval 0.1 --n_val 1000 --eval_beams 2 --length_penalty=0.5 \ 13 | --max_target_length=60 --val_max_target_length=60 --test_max_target_length=100 \ 14 | --model_name_or_path IGNORED \ 15 | --alpha_hid=3. \ 16 | --train_batch_size=16 --eval_batch_size=16 --gradient_accumulation_steps=2 \ 17 | --sortish_sampler \ 18 | --num_train_epochs=6 \ 19 | --warmup_steps 500 \ 20 | --output_dir distilbart_xsum_12_6 \ 21 | "$@" 22 | -------------------------------------------------------------------------------- /examples/seq2seq/train_mbart_cc25_enro.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | 4 | python finetune.py \ 5 | --learning_rate=3e-5 \ 6 | --fp16 \ 7 | --do_train \ 8 | --val_check_interval=0.25 \ 9 | --adam_eps 1e-06 \ 10 | --num_train_epochs 6 --src_lang en_XX --tgt_lang ro_RO \ 11 | --data_dir $ENRO_DIR \ 12 | --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \ 13 | --train_batch_size=$BS --eval_batch_size=$BS \ 14 | --task translation \ 15 | --warmup_steps 500 \ 16 | --freeze_embeds \ 17 | --model_name_or_path=facebook/mbart-large-cc25 \ 18 | "$@" 19 | -------------------------------------------------------------------------------- /examples/text-classification/run_pl.sh: -------------------------------------------------------------------------------- 1 | # Install example requirements 2 | pip install -r ../requirements.txt 3 | 4 | # Download glue data 5 | python3 ../../utils/download_glue_data.py 6 | 7 | export TASK=mrpc 8 | export DATA_DIR=./glue_data/MRPC/ 9 | export MAX_LENGTH=128 10 | export LEARNING_RATE=2e-5 11 | export BERT_MODEL=bert-base-cased 12 | export BATCH_SIZE=32 13 | export NUM_EPOCHS=3 14 | export SEED=2 15 | export OUTPUT_DIR_NAME=mrpc-pl-bert 16 | export CURRENT_DIR=${PWD} 17 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME} 18 | 19 | # Make output directory if it doesn't exist 20 | mkdir -p $OUTPUT_DIR 21 | # Add parent directory to python path to access lightning_base.py 22 | export PYTHONPATH="../":"${PYTHONPATH}" 23 | 24 | python3 run_pl_glue.py --gpus 1 --data_dir $DATA_DIR \ 25 | --task $TASK \ 26 | --model_name_or_path $BERT_MODEL \ 27 | --output_dir $OUTPUT_DIR \ 28 | --max_seq_length $MAX_LENGTH \ 29 | --learning_rate $LEARNING_RATE \ 30 | --num_train_epochs $NUM_EPOCHS \ 31 | --train_batch_size $BATCH_SIZE \ 32 | --seed $SEED \ 33 | --do_train \ 34 | --do_predict 35 | -------------------------------------------------------------------------------- /examples/text-generation/README.md: -------------------------------------------------------------------------------- 1 | ## Language generation 2 | 3 | Based on the script [`run_generation.py`](https://github.com/huggingface/transformers/blob/master/examples/text-generation/run_generation.py). 4 | 5 | Conditional text generation using the auto-regressive models of the library: GPT, GPT-2, Transformer-XL, XLNet, CTRL. 6 | A similar script is used for our official demo [Write With Transfomer](https://transformer.huggingface.co), where you 7 | can try out the different models available in the library. 8 | 9 | Example usage: 10 | 11 | ```bash 12 | python run_generation.py \ 13 | --model_type=gpt2 \ 14 | --model_name_or_path=gpt2 15 | ``` 16 | -------------------------------------------------------------------------------- /examples/text-generation/pplm/imgs/headfigure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/examples/text-generation/pplm/imgs/headfigure.png -------------------------------------------------------------------------------- /examples/text-generation/pplm/imgs/wooly.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/examples/text-generation/pplm/imgs/wooly.png -------------------------------------------------------------------------------- /examples/text-generation/pplm/pplm_classification_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class ClassificationHead(torch.nn.Module): 5 | """Classification Head for transformer encoders""" 6 | 7 | def __init__(self, class_size, embed_size): 8 | super().__init__() 9 | self.class_size = class_size 10 | self.embed_size = embed_size 11 | # self.mlp1 = torch.nn.Linear(embed_size, embed_size) 12 | # self.mlp2 = (torch.nn.Linear(embed_size, class_size)) 13 | self.mlp = torch.nn.Linear(embed_size, class_size) 14 | 15 | def forward(self, hidden_state): 16 | # hidden_state = F.relu(self.mlp1(hidden_state)) 17 | # hidden_state = self.mlp2(hidden_state) 18 | logits = self.mlp(hidden_state) 19 | return logits 20 | -------------------------------------------------------------------------------- /examples/token-classification/run.sh: -------------------------------------------------------------------------------- 1 | python3 run_ner.py \ 2 | --model_name_or_path bert-base-uncased \ 3 | --dataset_name conll2003 \ 4 | --output_dir /tmp/test-ner \ 5 | --do_train \ 6 | --do_eval 7 | -------------------------------------------------------------------------------- /examples/token-classification/run_chunk.sh: -------------------------------------------------------------------------------- 1 | if ! [ -f ./dev.txt ]; then 2 | echo "Downloading CONLL2003 dev dataset...." 3 | curl -L -o ./dev.txt 'https://github.com/davidsbatista/NER-datasets/raw/master/CONLL2003/valid.txt' 4 | fi 5 | 6 | if ! [ -f ./test.txt ]; then 7 | echo "Downloading CONLL2003 test dataset...." 8 | curl -L -o ./test.txt 'https://github.com/davidsbatista/NER-datasets/raw/master/CONLL2003/test.txt' 9 | fi 10 | 11 | if ! [ -f ./train.txt ]; then 12 | echo "Downloading CONLL2003 train dataset...." 13 | curl -L -o ./train.txt 'https://github.com/davidsbatista/NER-datasets/raw/master/CONLL2003/train.txt' 14 | fi 15 | 16 | export MAX_LENGTH=200 17 | export BERT_MODEL=bert-base-uncased 18 | export OUTPUT_DIR=chunker-model 19 | export BATCH_SIZE=32 20 | export NUM_EPOCHS=3 21 | export SAVE_STEPS=750 22 | export SEED=1 23 | 24 | python3 run_ner_old.py \ 25 | --task_type Chunk \ 26 | --data_dir . \ 27 | --model_name_or_path $BERT_MODEL \ 28 | --output_dir $OUTPUT_DIR \ 29 | --max_seq_length $MAX_LENGTH \ 30 | --num_train_epochs $NUM_EPOCHS \ 31 | --per_gpu_train_batch_size $BATCH_SIZE \ 32 | --save_steps $SAVE_STEPS \ 33 | --seed $SEED \ 34 | --do_train \ 35 | --do_eval \ 36 | --do_predict 37 | 38 | -------------------------------------------------------------------------------- /examples/token-classification/run_pos.sh: -------------------------------------------------------------------------------- 1 | if ! [ -f ./dev.txt ]; then 2 | echo "Download dev dataset...." 3 | curl -L -o ./dev.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-dev.conllu' 4 | fi 5 | 6 | if ! [ -f ./test.txt ]; then 7 | echo "Download test dataset...." 8 | curl -L -o ./test.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-test.conllu' 9 | fi 10 | 11 | if ! [ -f ./train.txt ]; then 12 | echo "Download train dataset...." 13 | curl -L -o ./train.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-train.conllu' 14 | fi 15 | 16 | export MAX_LENGTH=200 17 | export BERT_MODEL=bert-base-uncased 18 | export OUTPUT_DIR=postagger-model 19 | export BATCH_SIZE=32 20 | export NUM_EPOCHS=3 21 | export SAVE_STEPS=750 22 | export SEED=1 23 | 24 | python3 run_ner_old.py \ 25 | --task_type POS \ 26 | --data_dir . \ 27 | --model_name_or_path $BERT_MODEL \ 28 | --output_dir $OUTPUT_DIR \ 29 | --max_seq_length $MAX_LENGTH \ 30 | --num_train_epochs $NUM_EPOCHS \ 31 | --per_gpu_train_batch_size $BATCH_SIZE \ 32 | --save_steps $SAVE_STEPS \ 33 | --seed $SEED \ 34 | --do_train \ 35 | --do_eval \ 36 | --do_predict 37 | 38 | -------------------------------------------------------------------------------- /examples/token-classification/run_pos_pl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | if ! [ -f ./dev.txt ]; then 3 | echo "Download dev dataset...." 4 | curl -L -o ./dev.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-dev.conllu' 5 | fi 6 | 7 | if ! [ -f ./test.txt ]; then 8 | echo "Download test dataset...." 9 | curl -L -o ./test.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-test.conllu' 10 | fi 11 | 12 | if ! [ -f ./train.txt ]; then 13 | echo "Download train dataset...." 14 | curl -L -o ./train.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-train.conllu' 15 | fi 16 | 17 | export MAX_LENGTH=200 18 | export BERT_MODEL=bert-base-uncased 19 | export OUTPUT_DIR=postagger-model 20 | export BATCH_SIZE=32 21 | export NUM_EPOCHS=3 22 | export SAVE_STEPS=750 23 | export SEED=1 24 | 25 | 26 | # Add parent directory to python path to access lightning_base.py 27 | export PYTHONPATH="../":"${PYTHONPATH}" 28 | 29 | python3 run_pl_ner.py --data_dir ./ \ 30 | --task_type POS \ 31 | --model_name_or_path $BERT_MODEL \ 32 | --output_dir $OUTPUT_DIR \ 33 | --max_seq_length $MAX_LENGTH \ 34 | --num_train_epochs $NUM_EPOCHS \ 35 | --train_batch_size $BATCH_SIZE \ 36 | --seed $SEED \ 37 | --gpus 1 \ 38 | --do_train \ 39 | --do_predict 40 | -------------------------------------------------------------------------------- /examples/token-classification/scripts/preprocess.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from transformers import AutoTokenizer 4 | 5 | 6 | dataset = sys.argv[1] 7 | model_name_or_path = sys.argv[2] 8 | max_len = int(sys.argv[3]) 9 | 10 | subword_len_counter = 0 11 | 12 | tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) 13 | max_len -= tokenizer.num_special_tokens_to_add() 14 | 15 | with open(dataset, "rt") as f_p: 16 | for line in f_p: 17 | line = line.rstrip() 18 | 19 | if not line: 20 | print(line) 21 | subword_len_counter = 0 22 | continue 23 | 24 | token = line.split()[0] 25 | 26 | current_subwords_len = len(tokenizer.tokenize(token)) 27 | 28 | # Token contains strange control characters like \x96 or \x95 29 | # Just filter out the complete line 30 | if current_subwords_len == 0: 31 | continue 32 | 33 | if (subword_len_counter + current_subwords_len) > max_len: 34 | print("") 35 | print(line) 36 | subword_len_counter = current_subwords_len 37 | continue 38 | 39 | subword_len_counter += current_subwords_len 40 | 41 | print(line) 42 | -------------------------------------------------------------------------------- /model_cards/Cinnamon/electra-small-japanese-discriminator/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: ja 3 | license: apache-2.0 4 | --- 5 | 6 | ## Japanese ELECTRA-small 7 | 8 | We provide a Japanese **ELECTRA-Small** model, as described in [ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators](https://openreview.net/pdf?id=r1xMH1BtvB). 9 | 10 | Our pretraining process employs subword units derived from the [Japanese Wikipedia](https://dumps.wikimedia.org/jawiki/latest), using the [Byte-Pair Encoding](https://www.aclweb.org/anthology/P16-1162.pdf) method and building on an initial tokenization with [mecab-ipadic-NEologd](https://github.com/neologd/mecab-ipadic-neologd). For optimal performance, please take care to set your MeCab dictionary appropriately. 11 | 12 | ## How to use the discriminator in `transformers` 13 | 14 | ``` 15 | from transformers import BertJapaneseTokenizer, ElectraForPreTraining 16 | 17 | tokenizer = BertJapaneseTokenizer.from_pretrained('Cinnamon/electra-small-japanese-discriminator', mecab_kwargs={"mecab_option": "-d /usr/lib/x86_64-linux-gnu/mecab/dic/mecab-ipadic-neologd"}) 18 | 19 | model = ElectraForPreTraining.from_pretrained('Cinnamon/electra-small-japanese-discriminator') 20 | ``` 21 | -------------------------------------------------------------------------------- /model_cards/DeepPavlov/bert-base-bg-cs-pl-ru-cased/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - bg 4 | - cs 5 | - pl 6 | - ru 7 | --- 8 | 9 | # bert-base-bg-cs-pl-ru-cased 10 | 11 | SlavicBERT\[1\] \(Slavic \(bg, cs, pl, ru\), cased, 12‑layer, 768‑hidden, 12‑heads, 180M parameters\) was trained on Russian News and four Wikipedias: Bulgarian, Czech, Polish, and Russian. Subtoken vocabulary was built using this data. Multilingual BERT was used as an initialization for SlavicBERT. 12 | 13 | 14 | \[1\]: Arkhipov M., Trofimova M., Kuratov Y., Sorokin A. \(2019\). [Tuning Multilingual Transformers for Language-Specific Named Entity Recognition](https://www.aclweb.org/anthology/W19-3712/). ACL anthology W19-3712. 15 | -------------------------------------------------------------------------------- /model_cards/DeepPavlov/bert-base-cased-conversational/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: en 3 | --- 4 | 5 | # bert-base-cased-conversational 6 | 7 | Conversational BERT \(English, cased, 12‑layer, 768‑hidden, 12‑heads, 110M parameters\) was trained on the English part of Twitter, Reddit, DailyDialogues\[1\], OpenSubtitles\[2\], Debates\[3\], Blogs\[4\], Facebook News Comments. We used this training data to build the vocabulary of English subtokens and took English cased version of BERT‑base as an initialization for English Conversational BERT. 8 | 9 | 10 | \[1\]: Yanran Li, Hui Su, Xiaoyu Shen, Wenjie Li, Ziqiang Cao, and Shuzi Niu. DailyDialog: A Manually Labelled Multi-turn Dialogue Dataset. IJCNLP 2017. 11 | 12 | \[2\]: P. Lison and J. Tiedemann, 2016, OpenSubtitles2016: Extracting Large Parallel Corpora from Movie and TV Subtitles. In Proceedings of the 10th International Conference on Language Resources and Evaluation \(LREC 2016\) 13 | 14 | \[3\]: Justine Zhang, Ravi Kumar, Sujith Ravi, Cristian Danescu-Niculescu-Mizil. Proceedings of NAACL, 2016. 15 | 16 | \[4\]: J. Schler, M. Koppel, S. Argamon and J. Pennebaker \(2006\). Effects of Age and Gender on Blogging in Proceedings of 2006 AAAI Spring Symposium on Computational Approaches for Analyzing Weblogs. 17 | -------------------------------------------------------------------------------- /model_cards/DeepPavlov/bert-base-multilingual-cased-sentence/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - multilingual 4 | --- 5 | 6 | # bert-base-multilingual-cased-sentence 7 | 8 | Sentence Multilingual BERT \(101 languages, cased, 12‑layer, 768‑hidden, 12‑heads, 180M parameters\) is a representation‑based sentence encoder for 101 languages of Multilingual BERT. It is initialized with Multilingual BERT and then fine‑tuned on english MultiNLI\[1\] and on dev set of multilingual XNLI\[2\]. Sentence representations are mean pooled token embeddings in the same manner as in Sentence‑BERT\[3\]. 9 | 10 | 11 | \[1\]: Williams A., Nangia N. & Bowman S. \(2017\) A Broad-Coverage Challenge Corpus for Sentence Understanding through Inference. arXiv preprint [arXiv:1704.05426](https://arxiv.org/abs/1704.05426) 12 | 13 | \[2\]: Williams A., Bowman S. \(2018\) XNLI: Evaluating Cross-lingual Sentence Representations. arXiv preprint [arXiv:1809.05053](https://arxiv.org/abs/1809.05053) 14 | 15 | \[3\]: N. Reimers, I. Gurevych \(2019\) Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks. arXiv preprint [arXiv:1908.10084](https://arxiv.org/abs/1908.10084) 16 | -------------------------------------------------------------------------------- /model_cards/DeepPavlov/rubert-base-cased-conversational/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - ru 4 | --- 5 | 6 | # rubert-base-cased-conversational 7 | 8 | Conversational RuBERT \(Russian, cased, 12‑layer, 768‑hidden, 12‑heads, 180M parameters\) was trained on OpenSubtitles\[1\], [Dirty](https://d3.ru/), [Pikabu](https://pikabu.ru/), and a Social Media segment of Taiga corpus\[2\]. We assembled a new vocabulary for Conversational RuBERT model on this data and initialized the model with [RuBERT](../rubert-base-cased). 9 | 10 | 11 | \[1\]: P. Lison and J. Tiedemann, 2016, OpenSubtitles2016: Extracting Large Parallel Corpora from Movie and TV Subtitles. In Proceedings of the 10th International Conference on Language Resources and Evaluation \(LREC 2016\) 12 | 13 | \[2\]: Shavrina T., Shapovalova O. \(2017\) TO THE METHODOLOGY OF CORPUS CONSTRUCTION FOR MACHINE LEARNING: «TAIGA» SYNTAX TREE CORPUS AND PARSER. in proc. of “CORPORA2017”, international conference , Saint-Petersbourg, 2017. 14 | -------------------------------------------------------------------------------- /model_cards/DeepPavlov/rubert-base-cased-sentence/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - ru 4 | --- 5 | 6 | # rubert-base-cased-sentence 7 | 8 | Sentence RuBERT \(Russian, cased, 12-layer, 768-hidden, 12-heads, 180M parameters\) is a representation‑based sentence encoder for Russian. It is initialized with RuBERT and fine‑tuned on SNLI\[1\] google-translated to russian and on russian part of XNLI dev set\[2\]. Sentence representations are mean pooled token embeddings in the same manner as in Sentence‑BERT\[3\]. 9 | 10 | 11 | \[1\]: S. R. Bowman, G. Angeli, C. Potts, and C. D. Manning. \(2015\) A large annotated corpus for learning natural language inference. arXiv preprint [arXiv:1508.05326](https://arxiv.org/abs/1508.05326) 12 | 13 | \[2\]: Williams A., Bowman S. \(2018\) XNLI: Evaluating Cross-lingual Sentence Representations. arXiv preprint [arXiv:1809.05053](https://arxiv.org/abs/1809.05053) 14 | 15 | \[3\]: N. Reimers, I. Gurevych \(2019\) Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks. arXiv preprint [arXiv:1908.10084](https://arxiv.org/abs/1908.10084) 16 | -------------------------------------------------------------------------------- /model_cards/DeepPavlov/rubert-base-cased/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - ru 4 | --- 5 | 6 | # rubert-base-cased 7 | 8 | RuBERT \(Russian, cased, 12‑layer, 768‑hidden, 12‑heads, 180M parameters\) was trained on the Russian part of Wikipedia and news data. We used this training data to build a vocabulary of Russian subtokens and took a multilingual version of BERT‑base as an initialization for RuBERT\[1\]. 9 | 10 | 11 | \[1\]: Kuratov, Y., Arkhipov, M. \(2019\). Adaptation of Deep Bidirectional Multilingual Transformers for Russian Language. arXiv preprint [arXiv:1905.07213](https://arxiv.org/abs/1905.07213). 12 | -------------------------------------------------------------------------------- /model_cards/Hate-speech-CNERG/dehatebert-mono-arabic/README.md: -------------------------------------------------------------------------------- 1 | This model is used detecting **hatespeech** in **Arabic language**. The mono in the name refers to the monolingual setting, where the model is trained using only Arabic language data. It is finetuned on multilingual bert model. 2 | The model is trained with different learning rates and the best validation score achieved is 0.877609 for a learning rate of 2e-5. Training code can be found at this [url](https://github.com/punyajoy/DE-LIMIT) 3 | 4 | ### For more details about our paper 5 | 6 | Sai Saketh Aluru, Binny Mathew, Punyajoy Saha and Animesh Mukherjee. "[Deep Learning Models for Multilingual Hate Speech Detection](https://arxiv.org/abs/2004.06465)". Accepted at ECML-PKDD 2020. 7 | 8 | ***Please cite our paper in any published work that uses any of these resources.*** 9 | 10 | ~~~ 11 | @article{aluru2020deep, 12 | title={Deep Learning Models for Multilingual Hate Speech Detection}, 13 | author={Aluru, Sai Saket and Mathew, Binny and Saha, Punyajoy and Mukherjee, Animesh}, 14 | journal={arXiv preprint arXiv:2004.06465}, 15 | year={2020} 16 | } 17 | 18 | ~~~ 19 | -------------------------------------------------------------------------------- /model_cards/Hate-speech-CNERG/dehatebert-mono-english/README.md: -------------------------------------------------------------------------------- 1 | This model is used detecting **hatespeech** in **English language**. The mono in the name refers to the monolingual setting, where the model is trained using only English language data. It is finetuned on multilingual bert model. 2 | The model is trained with different learning rates and the best validation score achieved is 0.726030 for a learning rate of 2e-5. Training code can be found at this [url](https://github.com/punyajoy/DE-LIMIT) 3 | 4 | 5 | 6 | ### For more details about our paper 7 | 8 | Sai Saketh Aluru, Binny Mathew, Punyajoy Saha and Animesh Mukherjee. "[Deep Learning Models for Multilingual Hate Speech Detection](https://arxiv.org/abs/2004.06465)". Accepted at ECML-PKDD 2020. 9 | 10 | ***Please cite our paper in any published work that uses any of these resources.*** 11 | 12 | ~~~ 13 | @article{aluru2020deep, 14 | title={Deep Learning Models for Multilingual Hate Speech Detection}, 15 | author={Aluru, Sai Saket and Mathew, Binny and Saha, Punyajoy and Mukherjee, Animesh}, 16 | journal={arXiv preprint arXiv:2004.06465}, 17 | year={2020} 18 | } 19 | 20 | ~~~ 21 | -------------------------------------------------------------------------------- /model_cards/Hate-speech-CNERG/dehatebert-mono-french/README.md: -------------------------------------------------------------------------------- 1 | This model is used detecting **hatespeech** in **French language**. The mono in the name refers to the monolingual setting, where the model is trained using only English language data. It is finetuned on multilingual bert model. 2 | The model is trained with different learning rates and the best validation score achieved is 0.692094 for a learning rate of 3e-5. Training code can be found at this [url](https://github.com/punyajoy/DE-LIMIT) 3 | 4 | 5 | 6 | ### For more details about our paper 7 | 8 | Sai Saketh Aluru, Binny Mathew, Punyajoy Saha and Animesh Mukherjee. "[Deep Learning Models for Multilingual Hate Speech Detection](https://arxiv.org/abs/2004.06465)". Accepted at ECML-PKDD 2020. 9 | 10 | ***Please cite our paper in any published work that uses any of these resources.*** 11 | 12 | ~~~ 13 | @article{aluru2020deep, 14 | title={Deep Learning Models for Multilingual Hate Speech Detection}, 15 | author={Aluru, Sai Saket and Mathew, Binny and Saha, Punyajoy and Mukherjee, Animesh}, 16 | journal={arXiv preprint arXiv:2004.06465}, 17 | year={2020} 18 | } 19 | 20 | ~~~ 21 | -------------------------------------------------------------------------------- /model_cards/Hate-speech-CNERG/dehatebert-mono-german/README.md: -------------------------------------------------------------------------------- 1 | This model is used detecting **hatespeech** in **German language**. The mono in the name refers to the monolingual setting, where the model is trained using only English language data. It is finetuned on multilingual bert model. 2 | The model is trained with different learning rates and the best validation score achieved is 0.649794 for a learning rate of 3e-5. Training code can be found at this [url](https://github.com/punyajoy/DE-LIMIT) 3 | 4 | 5 | 6 | ### For more details about our paper 7 | 8 | Sai Saketh Aluru, Binny Mathew, Punyajoy Saha and Animesh Mukherjee. "[Deep Learning Models for Multilingual Hate Speech Detection](https://arxiv.org/abs/2004.06465)". Accepted at ECML-PKDD 2020. 9 | 10 | ***Please cite our paper in any published work that uses any of these resources.*** 11 | 12 | ~~~ 13 | @article{aluru2020deep, 14 | title={Deep Learning Models for Multilingual Hate Speech Detection}, 15 | author={Aluru, Sai Saket and Mathew, Binny and Saha, Punyajoy and Mukherjee, Animesh}, 16 | journal={arXiv preprint arXiv:2004.06465}, 17 | year={2020} 18 | } 19 | 20 | ~~~ 21 | -------------------------------------------------------------------------------- /model_cards/Hate-speech-CNERG/dehatebert-mono-indonesian/README.md: -------------------------------------------------------------------------------- 1 | This model is used detecting **hatespeech** in **Indonesian language**. The mono in the name refers to the monolingual setting, where the model is trained using only Arabic language data. It is finetuned on multilingual bert model. 2 | The model is trained with different learning rates and the best validation score achieved is 0.844494 for a learning rate of 2e-5. Training code can be found at this [url](https://github.com/punyajoy/DE-LIMIT) 3 | 4 | 5 | 6 | ### For more details about our paper 7 | 8 | Sai Saketh Aluru, Binny Mathew, Punyajoy Saha and Animesh Mukherjee. "[Deep Learning Models for Multilingual Hate Speech Detection](https://arxiv.org/abs/2004.06465)". Accepted at ECML-PKDD 2020. 9 | 10 | ***Please cite our paper in any published work that uses any of these resources.*** 11 | 12 | ~~~ 13 | @article{aluru2020deep, 14 | title={Deep Learning Models for Multilingual Hate Speech Detection}, 15 | author={Aluru, Sai Saket and Mathew, Binny and Saha, Punyajoy and Mukherjee, Animesh}, 16 | journal={arXiv preprint arXiv:2004.06465}, 17 | year={2020} 18 | } 19 | 20 | ~~~ 21 | -------------------------------------------------------------------------------- /model_cards/Hate-speech-CNERG/dehatebert-mono-italian/README.md: -------------------------------------------------------------------------------- 1 | This model is used detecting **hatespeech** in **Italian language**. The mono in the name refers to the monolingual setting, where the model is trained using only English language data. It is finetuned on multilingual bert model. 2 | The model is trained with different learning rates and the best validation score achieved is 0.837288 for a learning rate of 3e-5. Training code can be found at this [url](https://github.com/punyajoy/DE-LIMIT) 3 | 4 | 5 | 6 | ### For more details about our paper 7 | 8 | Sai Saketh Aluru, Binny Mathew, Punyajoy Saha and Animesh Mukherjee. "[Deep Learning Models for Multilingual Hate Speech Detection](https://arxiv.org/abs/2004.06465)". Accepted at ECML-PKDD 2020. 9 | 10 | ***Please cite our paper in any published work that uses any of these resources.*** 11 | 12 | ~~~ 13 | @article{aluru2020deep, 14 | title={Deep Learning Models for Multilingual Hate Speech Detection}, 15 | author={Aluru, Sai Saket and Mathew, Binny and Saha, Punyajoy and Mukherjee, Animesh}, 16 | journal={arXiv preprint arXiv:2004.06465}, 17 | year={2020} 18 | } 19 | 20 | ~~~ 21 | -------------------------------------------------------------------------------- /model_cards/Hate-speech-CNERG/dehatebert-mono-polish/README.md: -------------------------------------------------------------------------------- 1 | This model is used detecting **hatespeech** in **Polish language**. The mono in the name refers to the monolingual setting, where the model is trained using only English language data. It is finetuned on multilingual bert model. 2 | The model is trained with different learning rates and the best validation score achieved is 0.723254 for a learning rate of 2e-5. Training code can be found at this [url](https://github.com/punyajoy/DE-LIMIT) 3 | 4 | 5 | 6 | ### For more details about our paper 7 | 8 | Sai Saketh Aluru, Binny Mathew, Punyajoy Saha and Animesh Mukherjee. "[Deep Learning Models for Multilingual Hate Speech Detection](https://arxiv.org/abs/2004.06465)". Accepted at ECML-PKDD 2020. 9 | 10 | ***Please cite our paper in any published work that uses any of these resources.*** 11 | 12 | ~~~ 13 | @article{aluru2020deep, 14 | title={Deep Learning Models for Multilingual Hate Speech Detection}, 15 | author={Aluru, Sai Saket and Mathew, Binny and Saha, Punyajoy and Mukherjee, Animesh}, 16 | journal={arXiv preprint arXiv:2004.06465}, 17 | year={2020} 18 | } 19 | 20 | ~~~ 21 | -------------------------------------------------------------------------------- /model_cards/Hate-speech-CNERG/dehatebert-mono-portugese/README.md: -------------------------------------------------------------------------------- 1 | This model is used detecting **hatespeech** in **Portuguese language**. The mono in the name refers to the monolingual setting, where the model is trained using only English language data. It is finetuned on multilingual bert model. 2 | The model is trained with different learning rates and the best validation score achieved is 0.716119 for a learning rate of 3e-5. Training code can be found at this [url](https://github.com/punyajoy/DE-LIMIT) 3 | 4 | 5 | 6 | ### For more details about our paper 7 | 8 | Sai Saketh Aluru, Binny Mathew, Punyajoy Saha and Animesh Mukherjee. "[Deep Learning Models for Multilingual Hate Speech Detection](https://arxiv.org/abs/2004.06465)". Accepted at ECML-PKDD 2020. 9 | 10 | ***Please cite our paper in any published work that uses any of these resources.*** 11 | 12 | ~~~ 13 | @article{aluru2020deep, 14 | title={Deep Learning Models for Multilingual Hate Speech Detection}, 15 | author={Aluru, Sai Saket and Mathew, Binny and Saha, Punyajoy and Mukherjee, Animesh}, 16 | journal={arXiv preprint arXiv:2004.06465}, 17 | year={2020} 18 | } 19 | 20 | ~~~ 21 | -------------------------------------------------------------------------------- /model_cards/Hate-speech-CNERG/dehatebert-mono-spanish/README.md: -------------------------------------------------------------------------------- 1 | This model is used detecting **hatespeech** in **Spanish language**. The mono in the name refers to the monolingual setting, where the model is trained using only English language data. It is finetuned on multilingual bert model. 2 | The model is trained with different learning rates and the best validation score achieved is 0.740287 for a learning rate of 3e-5. Training code can be found at this [url](https://github.com/punyajoy/DE-LIMIT) 3 | 4 | 5 | 6 | ### For more details about our paper 7 | 8 | Sai Saketh Aluru, Binny Mathew, Punyajoy Saha and Animesh Mukherjee. "[Deep Learning Models for Multilingual Hate Speech Detection](https://arxiv.org/abs/2004.06465)". Accepted at ECML-PKDD 2020. 9 | 10 | ***Please cite our paper in any published work that uses any of these resources.*** 11 | 12 | ~~~ 13 | @article{aluru2020deep, 14 | title={Deep Learning Models for Multilingual Hate Speech Detection}, 15 | author={Aluru, Sai Saket and Mathew, Binny and Saha, Punyajoy and Mukherjee, Animesh}, 16 | journal={arXiv preprint arXiv:2004.06465}, 17 | year={2020} 18 | } 19 | 20 | ~~~ 21 | -------------------------------------------------------------------------------- /model_cards/NeuML/bert-small-cord19-squad2/README.md: -------------------------------------------------------------------------------- 1 | # BERT-Small CORD-19 fine-tuned on SQuAD 2.0 2 | 3 | [bert-small-cord19 model](https://huggingface.co/NeuML/bert-small-cord19) fine-tuned on SQuAD 2.0 4 | 5 | ## Building the model 6 | 7 | ```bash 8 | python run_squad.py 9 | --model_type bert 10 | --model_name_or_path bert-small-cord19 11 | --do_train 12 | --do_eval 13 | --do_lower_case 14 | --version_2_with_negative 15 | --train_file train-v2.0.json 16 | --predict_file dev-v2.0.json 17 | --per_gpu_train_batch_size 8 18 | --learning_rate 3e-5 19 | --num_train_epochs 3.0 20 | --max_seq_length 384 21 | --doc_stride 128 22 | --output_dir bert-small-cord19-squad2 23 | --save_steps 0 24 | --threads 8 25 | --overwrite_cache 26 | --overwrite_output_dir 27 | -------------------------------------------------------------------------------- /model_cards/NeuML/bert-small-cord19/README.md: -------------------------------------------------------------------------------- 1 | # BERT-Small fine-tuned on CORD-19 dataset 2 | 3 | [BERT L6_H-512_A-8 model](https://huggingface.co/google/bert_uncased_L-6_H-512_A-8) fine-tuned on the [CORD-19 dataset](https://www.semanticscholar.org/cord19). 4 | 5 | ## CORD-19 data subset 6 | The training data for this dataset is stored as a [Kaggle dataset](https://www.kaggle.com/davidmezzetti/cord19-qa?select=cord19.txt). The training 7 | data is a subset of the full corpus, focusing on high-quality, study-design detected articles. 8 | 9 | ## Building the model 10 | 11 | ```bash 12 | python run_language_modeling.py 13 | --model_type bert 14 | --model_name_or_path google/bert_uncased_L-6_H-512_A-8 15 | --do_train 16 | --mlm 17 | --line_by_line 18 | --block_size 512 19 | --train_data_file cord19.txt 20 | --per_gpu_train_batch_size 4 21 | --learning_rate 3e-5 22 | --num_train_epochs 3.0 23 | --output_dir bert-small-cord19 24 | --save_steps 0 25 | --overwrite_output_dir 26 | -------------------------------------------------------------------------------- /model_cards/T-Systems-onsite/bert-german-dbmdz-uncased-sentence-stsb/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: de 3 | license: mit 4 | --- 5 | 6 | # bert-german-dbmdz-uncased-sentence-stsb 7 | **This model is outdated!** 8 | 9 | The new [T-Systems-onsite/cross-en-de-roberta-sentence-transformer](https://huggingface.co/T-Systems-onsite/cross-en-de-roberta-sentence-transformer) model is better for German language. It is also the current best model for English language and works cross-lingually. Please consider using that model. -------------------------------------------------------------------------------- /model_cards/VictorSanh/roberta-base-finetuned-yelp-polarity/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: en 3 | datasets: 4 | - yelp_polarity 5 | --- 6 | 7 | # RoBERTa-base-finetuned-yelp-polarity 8 | 9 | This is a [RoBERTa-base](https://huggingface.co/roberta-base) checkpoint fine-tuned on binary sentiment classifcation from [Yelp polarity](https://huggingface.co/nlp/viewer/?dataset=yelp_polarity). 10 | It gets **98.08%** accuracy on the test set. 11 | 12 | ## Hyper-parameters 13 | 14 | We used the following hyper-parameters to train the model on one GPU: 15 | ```python 16 | num_train_epochs = 2.0 17 | learning_rate = 1e-05 18 | weight_decay = 0.0 19 | adam_epsilon = 1e-08 20 | max_grad_norm = 1.0 21 | per_device_train_batch_size = 32 22 | gradient_accumulation_steps = 1 23 | warmup_steps = 3500 24 | seed = 42 25 | ``` 26 | -------------------------------------------------------------------------------- /model_cards/ViktorAlm/electra-base-norwegian-uncased-discriminator/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: no 3 | thumbnail: https://i.imgur.com/QqSEC5I.png 4 | --- 5 | 6 | # Norwegian Electra 7 | ![Image of norwegian electra](https://i.imgur.com/QqSEC5I.png) 8 | 9 | Trained on Oscar + wikipedia + opensubtitles + some other data I had with the awesome power of TPUs(V3-8) 10 | 11 | Use with caution. I have no downstream tasks in Norwegian to test on so I have no idea of its performance yet. 12 | # Model 13 | ## Electra: Pre-training Text Encoders as Discriminators Rather Than Generators 14 | Kevin Clark and Minh-Thang Luong and Quoc V. Le and Christopher D. Manning 15 | - https://openreview.net/pdf?id=r1xMH1BtvB 16 | - https://github.com/google-research/electra 17 | # Acknowledgments 18 | ### TensorFlow Research Cloud 19 | Research supported with Cloud TPUs from Google's TensorFlow Research Cloud (TFRC). Thanks for providing access to the TFRC ❤️ 20 | - https://www.tensorflow.org/tfrc 21 | #### OSCAR corpus 22 | - https://oscar-corpus.com/ 23 | #### OPUS 24 | - http://opus.nlpl.eu/ 25 | - http://www.opensubtitles.org/ 26 | -------------------------------------------------------------------------------- /model_cards/adalbertojunior/PTT5-SMALL-SUM/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: pt 3 | --- 4 | 5 | # PTT5-SMALL-SUM 6 | 7 | ## Model description 8 | 9 | This model was trained to summarize texts in portuguese 10 | 11 | 12 | based on ```unicamp-dl/ptt5-small-portuguese-vocab``` 13 | 14 | #### How to use 15 | 16 | ```python 17 | from transformers import T5Tokenizer, T5ForConditionalGeneration 18 | 19 | tokenizer = T5Tokenizer.from_pretrained('adalbertojunior/PTT5-SMALL-SUM') 20 | 21 | t5 = T5ForConditionalGeneration.from_pretrained('adalbertojunior/PTT5-SMALL-SUM') 22 | 23 | text="Esse é um exemplo de sumarização." 24 | 25 | input_ids = tokenizer.encode(text, return_tensors="pt", add_special_tokens=True) 26 | 27 | generated_ids = t5.generate( 28 | input_ids=input_ids, 29 | num_beams=1, 30 | max_length=40, 31 | #repetition_penalty=2.5 32 | ).squeeze() 33 | 34 | predicted_span = tokenizer.decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True) 35 | 36 | 37 | ``` 38 | -------------------------------------------------------------------------------- /model_cards/akhooli/gpt2-small-arabic/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: "ar" 3 | datasets: 4 | - Arabic Wikipedia 5 | metrics: 6 | - none 7 | --- 8 | 9 | # GPT2-Small-Arabic 10 | 11 | ## Model description 12 | 13 | GPT2 model from Arabic Wikipedia dataset based on gpt2-small (using Fastai2). 14 | 15 | ## Intended uses & limitations 16 | 17 | #### How to use 18 | 19 | An example is provided in this [colab notebook](https://colab.research.google.com/drive/1mRl7c-5v-Klx27EEAEOAbrfkustL4g7a?usp=sharing). 20 | Both text and poetry (fine-tuned model) generation are included. 21 | 22 | #### Limitations and bias 23 | 24 | GPT2-small-arabic (trained on Arabic Wikipedia) has several limitations in terms of coverage (Arabic Wikipeedia quality, no diacritics) and training performance. 25 | Use as demonstration or proof of concepts but not as production code. 26 | 27 | ## Training data 28 | 29 | This pretrained model used the Arabic Wikipedia dump (around 900 MB). 30 | 31 | ## Training procedure 32 | 33 | Training was done using [Fastai2](https://github.com/fastai/fastai2/) library on Kaggle, using free GPU. 34 | 35 | ## Eval results 36 | Final perplexity reached was 72.19, loss: 4.28, accuracy: 0.307 37 | 38 | ### BibTeX entry and citation info 39 | 40 | ```bibtex 41 | @inproceedings{Abed Khooli, 42 | year={2020} 43 | } 44 | ``` 45 | -------------------------------------------------------------------------------- /model_cards/akhooli/mbart-large-cc25-ar-en/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - translation 4 | 5 | language: 6 | - ar 7 | - en 8 | 9 | license: mit 10 | --- 11 | ### mbart-large-ar-en 12 | This is mbart-large-cc25, finetuned on a subset of the OPUS corpus for ar_en. 13 | Usage: see [example notebook](https://colab.research.google.com/drive/1I6RFOWMaTpPBX7saJYjnSTddW0TD6H1t?usp=sharing) 14 | Note: model has limited training set, not fully trained (do not use for production). 15 | Other models by me: [Abed Khooli](https://huggingface.co/akhooli) 16 | -------------------------------------------------------------------------------- /model_cards/akhooli/mbart-large-cc25-en-ar/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - translation 4 | 5 | language: 6 | - en 7 | - ar 8 | 9 | license: mit 10 | --- 11 | ### mbart-large-en-ar 12 | This is mbart-large-cc25, finetuned on a subset of the UN corpus for en_ar. 13 | Usage: see [example notebook](https://colab.research.google.com/drive/1I6RFOWMaTpPBX7saJYjnSTddW0TD6H1t?usp=sharing) 14 | Note: model has limited training set, not fully trained (do not use for production). 15 | -------------------------------------------------------------------------------- /model_cards/akhooli/personachat-arabic/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - conversational 4 | language: 5 | - ar 6 | license: mit 7 | --- 8 | ## personachat-arabic (conversational AI) 9 | This is personachat-arabic, using a subset from the persona-chat validation dataset, machine translated to Arabic (from English) 10 | and fine-tuned from [akhooli/gpt2-small-arabic](https://huggingface.co/akhooli/gpt2-small-arabic) which is a limited text generation model. 11 | Usage: see the last section of this [example notebook](https://colab.research.google.com/drive/1I6RFOWMaTpPBX7saJYjnSTddW0TD6H1t?usp=sharing) 12 | Note: model has limited training set which was machine translated (do not use for production). 13 | -------------------------------------------------------------------------------- /model_cards/akhooli/xlm-r-large-arabic-sent/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | language: 4 | - ar 5 | - en 6 | 7 | license: mit 8 | --- 9 | ### xlm-r-large-arabic-sent 10 | Multilingual sentiment classification (Label_0: mixed, Label_1: negative, Label_2: positive) of Arabic reviews by fine-tuning XLM-Roberta-Large. 11 | Zero shot classification of other languages (also works in mixed languages - ex. Arabic & English). Mixed category is not accurate and may confuse other 12 | classes (was based on a rate of 3 out of 5 in reviews). 13 | Usage: see last section in this [Colab notebook](https://lnkd.in/d3bCFyZ) 14 | -------------------------------------------------------------------------------- /model_cards/akhooli/xlm-r-large-arabic-toxic/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | language: 4 | - ar 5 | - en 6 | 7 | license: mit 8 | --- 9 | ### xlm-r-large-arabic-toxic (toxic/hate speech classifier) 10 | Toxic (hate speech) classification (Label_0: non-toxic, Label_1: toxic) of Arabic comments by fine-tuning XLM-Roberta-Large. 11 | Zero shot classification of other languages (also works in mixed languages - ex. Arabic & English). 12 | Usage and further info: see last section in this [Colab notebook](https://lnkd.in/d3bCFyZ) 13 | -------------------------------------------------------------------------------- /model_cards/albert-base-v1-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - exbert 4 | 5 | license: apache-2.0 6 | --- 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /model_cards/albert-xxlarge-v2-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - exbert 4 | 5 | license: apache-2.0 6 | --- 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /model_cards/allenai/longformer-base-4096-extra.pos.embd.only/README.md: -------------------------------------------------------------------------------- 1 | 2 | # longformer-base-4096-extra.pos.embd.only 3 | 4 | This model is similar to `longformer-base-4096` but it was pretrained to preserve RoBERTa weights by freezing all RoBERTa weights and only train the additional position embeddings. 5 | 6 | 7 | ### Citing 8 | 9 | If you use `Longformer` in your research, please cite [Longformer: The Long-Document Transformer](https://arxiv.org/abs/2004.05150). 10 | ``` 11 | @article{Beltagy2020Longformer, 12 | title={Longformer: The Long-Document Transformer}, 13 | author={Iz Beltagy and Matthew E. Peters and Arman Cohan}, 14 | journal={arXiv:2004.05150}, 15 | year={2020}, 16 | } 17 | ``` 18 | 19 | `Longformer` is an open-source project developed by [the Allen Institute for Artificial Intelligence (AI2)](http://www.allenai.org). 20 | AI2 is a non-profit institute with the mission to contribute to humanity through high-impact AI research and engineering. 21 | -------------------------------------------------------------------------------- /model_cards/allenai/longformer-base-4096/README.md: -------------------------------------------------------------------------------- 1 | 2 | # longformer-base-4096 3 | [Longformer](https://arxiv.org/abs/2004.05150) is a transformer model for long documents. 4 | 5 | `longformer-base-4096` is a BERT-like model started from the RoBERTa checkpoint and pretrained for MLM on long documents. It supports sequences of length up to 4,096. 6 | 7 | Longformer uses a combination of a sliding window (local) attention and global attention. Global attention is user-configured based on the task to allow the model to learn task-specific representations. 8 | Please refer to the examples in `modeling_longformer.py` and the paper for more details on how to set global attention. 9 | 10 | 11 | ### Citing 12 | 13 | If you use `Longformer` in your research, please cite [Longformer: The Long-Document Transformer](https://arxiv.org/abs/2004.05150). 14 | ``` 15 | @article{Beltagy2020Longformer, 16 | title={Longformer: The Long-Document Transformer}, 17 | author={Iz Beltagy and Matthew E. Peters and Arman Cohan}, 18 | journal={arXiv:2004.05150}, 19 | year={2020}, 20 | } 21 | ``` 22 | 23 | `Longformer` is an open-source project developed by [the Allen Institute for Artificial Intelligence (AI2)](http://www.allenai.org). 24 | AI2 is a non-profit institute with the mission to contribute to humanity through high-impact AI research and engineering. 25 | -------------------------------------------------------------------------------- /model_cards/allenai/scibert_scivocab_cased/README.md: -------------------------------------------------------------------------------- 1 | # SciBERT 2 | 3 | This is the pretrained model presented in [SciBERT: A Pretrained Language Model for Scientific Text](https://www.aclweb.org/anthology/D19-1371/), which is a BERT model trained on scientific text. 4 | 5 | The training corpus was papers taken from [Semantic Scholar](https://www.semanticscholar.org). Corpus size is 1.14M papers, 3.1B tokens. We use the full text of the papers in training, not just abstracts. 6 | 7 | SciBERT has its own wordpiece vocabulary (scivocab) that's built to best match the training corpus. We trained cased and uncased versions. 8 | 9 | Available models include: 10 | * `scibert_scivocab_cased` 11 | * `scibert_scivocab_uncased` 12 | 13 | 14 | The original repo can be found [here](https://github.com/allenai/scibert). 15 | 16 | If using these models, please cite the following paper: 17 | ``` 18 | @inproceedings{beltagy-etal-2019-scibert, 19 | title = "SciBERT: A Pretrained Language Model for Scientific Text", 20 | author = "Beltagy, Iz and Lo, Kyle and Cohan, Arman", 21 | booktitle = "EMNLP", 22 | year = "2019", 23 | publisher = "Association for Computational Linguistics", 24 | url = "https://www.aclweb.org/anthology/D19-1371" 25 | } 26 | ``` 27 | -------------------------------------------------------------------------------- /model_cards/allenai/scibert_scivocab_uncased/README.md: -------------------------------------------------------------------------------- 1 | # SciBERT 2 | 3 | This is the pretrained model presented in [SciBERT: A Pretrained Language Model for Scientific Text](https://www.aclweb.org/anthology/D19-1371/), which is a BERT model trained on scientific text. 4 | 5 | The training corpus was papers taken from [Semantic Scholar](https://www.semanticscholar.org). Corpus size is 1.14M papers, 3.1B tokens. We use the full text of the papers in training, not just abstracts. 6 | 7 | SciBERT has its own wordpiece vocabulary (scivocab) that's built to best match the training corpus. We trained cased and uncased versions. 8 | 9 | Available models include: 10 | * `scibert_scivocab_cased` 11 | * `scibert_scivocab_uncased` 12 | 13 | 14 | The original repo can be found [here](https://github.com/allenai/scibert). 15 | 16 | If using these models, please cite the following paper: 17 | ``` 18 | @inproceedings{beltagy-etal-2019-scibert, 19 | title = "SciBERT: A Pretrained Language Model for Scientific Text", 20 | author = "Beltagy, Iz and Lo, Kyle and Cohan, Arman", 21 | booktitle = "EMNLP", 22 | year = "2019", 23 | publisher = "Association for Computational Linguistics", 24 | url = "https://www.aclweb.org/anthology/D19-1371" 25 | } 26 | ``` 27 | -------------------------------------------------------------------------------- /model_cards/allenyummy/chinese-bert-wwm-ehr-ner-sl/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: zh-tw 3 | --- 4 | 5 | # Model name 6 | Chinese-bert-wwm-electrical-health-record-ner-sequence-labeling 7 | 8 | 9 | #### How to use 10 | 11 | ``` 12 | from transformers import AutoTokenizer, AutoModelForTokenClassification 13 | tokenizer = AutoTokenizer.from_pretrained("chinese-bert-wwm-ehr-ner-sl") 14 | model = AutoModelForTokenClassification.from_pretrained("chinese-bert-wwm-ehr-ner-sl") 15 | ``` 16 | -------------------------------------------------------------------------------- /model_cards/aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200616/README.md: -------------------------------------------------------------------------------- 1 | # BERT L-10 H-512 fine-tuned on MLM (CORD-19 2020/06/16) 2 | 3 | BERT model with [10 Transformer layers and hidden embedding of size 512](https://huggingface.co/google/bert_uncased_L-10_H-512_A-8), referenced in [Well-Read Students Learn Better: On the Importance of Pre-training Compact Models](https://arxiv.org/abs/1908.08962), fine-tuned for MLM on CORD-19 dataset (as released on 2020/06/16). 4 | 5 | ## Training the model 6 | 7 | ```bash 8 | python run_language_modeling.py 9 | --model_type bert 10 | --model_name_or_path google/bert_uncased_L-10_H-512_A-8 11 | --do_train 12 | --train_data_file {cord19-200616-dataset} 13 | --mlm 14 | --mlm_probability 0.2 15 | --line_by_line 16 | --block_size 512 17 | --per_device_train_batch_size 10 18 | --learning_rate 3e-5 19 | --num_train_epochs 2 20 | --output_dir bert_uncased_L-10_H-512_A-8_cord19-200616 21 | -------------------------------------------------------------------------------- /model_cards/aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200616_squad2/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | datasets: 3 | - squad_v2 4 | --- 5 | 6 | # BERT L-10 H-512 CORD-19 (2020/06/16) fine-tuned on SQuAD v2.0 7 | 8 | BERT model with [10 Transformer layers and hidden embedding of size 512](https://huggingface.co/google/bert_uncased_L-10_H-512_A-8), referenced in [Well-Read Students Learn Better: On the Importance of Pre-training Compact Models](https://arxiv.org/abs/1908.08962), [fine-tuned for MLM](https://huggingface.co/aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200616) on CORD-19 dataset (as released on 2020/06/16) and fine-tuned for QA on SQuAD v2.0. 9 | 10 | ## Training the model 11 | 12 | ```bash 13 | python run_squad.py 14 | --model_type bert 15 | --model_name_or_path aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200616 16 | --train_file 'train-v2.0.json' 17 | --predict_file 'dev-v2.0.json' 18 | --do_train 19 | --do_eval 20 | --do_lower_case 21 | --version_2_with_negative 22 | --max_seq_length 384 23 | --per_gpu_train_batch_size 10 24 | --learning_rate 3e-5 25 | --num_train_epochs 2 26 | --output_dir bert_uncased_L-10_H-512_A-8_cord19-200616_squad2 27 | -------------------------------------------------------------------------------- /model_cards/aodiniz/bert_uncased_L-2_H-512_A-8_cord19-200616/README.md: -------------------------------------------------------------------------------- 1 | # BERT L-2 H-512 fine-tuned on MLM (CORD-19 2020/06/16) 2 | 3 | BERT model with [2 Transformer layers and hidden embedding of size 512](https://huggingface.co/google/bert_uncased_L-2_H-512_A-8), referenced in [Well-Read Students Learn Better: On the Importance of Pre-training Compact Models](https://arxiv.org/abs/1908.08962), fine-tuned for MLM on CORD-19 dataset (as released on 2020/06/16). 4 | 5 | ## Training the model 6 | 7 | ```bash 8 | python run_language_modeling.py 9 | --model_type bert 10 | --model_name_or_path google/bert_uncased_L-2_H-512_A-8 11 | --do_train 12 | --train_data_file {cord19-200616-dataset} 13 | --mlm 14 | --mlm_probability 0.2 15 | --line_by_line 16 | --block_size 512 17 | --per_device_train_batch_size 20 18 | --learning_rate 3e-5 19 | --num_train_epochs 2 20 | --output_dir bert_uncased_L-2_H-512_A-8_cord19-200616 21 | -------------------------------------------------------------------------------- /model_cards/aodiniz/bert_uncased_L-4_H-256_A-4_cord19-200616/README.md: -------------------------------------------------------------------------------- 1 | # BERT L-4 H-256 fine-tuned on MLM (CORD-19 2020/06/16) 2 | 3 | BERT model with [4 Transformer layers and hidden embedding of size 256](https://huggingface.co/google/bert_uncased_L-4_H-256_A-4), referenced in [Well-Read Students Learn Better: On the Importance of Pre-training Compact Models](https://arxiv.org/abs/1908.08962), fine-tuned for MLM on CORD-19 dataset (as released on 2020/06/16). 4 | 5 | ## Training the model 6 | 7 | ```bash 8 | python run_language_modeling.py 9 | --model_type bert 10 | --model_name_or_path google/bert_uncased_L-4_H-256_A-4 11 | --do_train 12 | --train_data_file {cord19-200616-dataset} 13 | --mlm 14 | --mlm_probability 0.2 15 | --line_by_line 16 | --block_size 256 17 | --per_device_train_batch_size 20 18 | --learning_rate 3e-5 19 | --num_train_epochs 2 20 | --output_dir bert_uncased_L-4_H-256_A-4_cord19-200616 21 | -------------------------------------------------------------------------------- /model_cards/bart-large-cnn/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - summarization 4 | --- 5 | 6 | -------------------------------------------------------------------------------- /model_cards/bart-large-xsum/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - summarization 4 | --- 5 | 6 | -------------------------------------------------------------------------------- /model_cards/bert-base-chinese-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: zh 3 | --- 4 | -------------------------------------------------------------------------------- /model_cards/bert-base-german-dbmdz-cased-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: de 3 | license: mit 4 | --- 5 | -------------------------------------------------------------------------------- /model_cards/bert-base-german-dbmdz-uncased-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: de 3 | license: mit 4 | --- 5 | -------------------------------------------------------------------------------- /model_cards/bert-large-cased-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | license: apache-2.0 3 | --- 4 | -------------------------------------------------------------------------------- /model_cards/binwang/xlnet-base-cased/README.md: -------------------------------------------------------------------------------- 1 | This model is pre-trained **XLNET** with 12 layers. 2 | 3 | It comes with paper: SBERT-WK: A Sentence Embedding Method By Dissecting BERT-based Word Models 4 | 5 | Project Page: [SBERT-WK](https://github.com/BinWang28/SBERT-WK-Sentence-Embedding) 6 | -------------------------------------------------------------------------------- /model_cards/ceostroff/harry-potter-gpt2-fanfiction/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - en 4 | tags: 5 | - harry-potter 6 | license: mit 7 | --- 8 | 9 | # Harry Potter Fanfiction Generator 10 | 11 | This is a pre-trained GPT-2 generative text model that allows you to generate your own Harry Potter fanfiction, trained off of the top 100 rated fanficition stories. We intend for this to be used for individual fun and experimentation and not as a commercial product. 12 | -------------------------------------------------------------------------------- /model_cards/clue/albert_chinese_small/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: zh 3 | --- 4 | 5 | ## albert_chinese_small 6 | 7 | ### Overview 8 | 9 | **Language model:** albert-small 10 | **Model size:** 18.5M 11 | **Language:** Chinese 12 | **Training data:** [CLUECorpusSmall](https://github.com/CLUEbenchmark/CLUECorpus2020) 13 | **Eval data:** [CLUE dataset](https://github.com/CLUEbenchmark/CLUE) 14 | 15 | ### Results 16 | 17 | For results on downstream tasks like text classification, please refer to [this repository](https://github.com/CLUEbenchmark/CLUE). 18 | 19 | ### Usage 20 | 21 | **NOTE:**Since sentencepiece is not used in `albert_chinese_small` model, you have to call **BertTokenizer** instead of AlbertTokenizer !!! 22 | 23 | ``` 24 | import torch 25 | from transformers import BertTokenizer, AlbertModel 26 | tokenizer = BertTokenizer.from_pretrained("clue/albert_chinese_small") 27 | albert = AlbertModel.from_pretrained("clue/albert_chinese_small") 28 | ``` 29 | 30 | ### About CLUE benchmark 31 | 32 | Organization of Language Understanding Evaluation benchmark for Chinese: tasks & datasets, baselines, pre-trained Chinese models, corpus and leaderboard. 33 | 34 | Github: https://github.com/CLUEbenchmark 35 | Website: https://www.cluebenchmarks.com/ 36 | -------------------------------------------------------------------------------- /model_cards/clue/albert_chinese_tiny/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: zh 3 | --- 4 | 5 | ## albert_chinese_tiny 6 | 7 | ### Overview 8 | 9 | **Language model:** albert-tiny 10 | **Model size:** 16M 11 | **Language:** Chinese 12 | **Training data:** [CLUECorpusSmall](https://github.com/CLUEbenchmark/CLUECorpus2020) 13 | **Eval data:** [CLUE dataset](https://github.com/CLUEbenchmark/CLUE) 14 | 15 | ### Results 16 | 17 | For results on downstream tasks like text classification, please refer to [this repository](https://github.com/CLUEbenchmark/CLUE). 18 | 19 | ### Usage 20 | 21 | **NOTE:**Since sentencepiece is not used in `albert_chinese_tiny` model, you have to call **BertTokenizer** instead of AlbertTokenizer !!! 22 | 23 | ``` 24 | import torch 25 | from transformers import BertTokenizer, AlbertModel 26 | tokenizer = BertTokenizer.from_pretrained("clue/albert_chinese_tiny") 27 | albert = AlbertModel.from_pretrained("clue/albert_chinese_tiny") 28 | ``` 29 | 30 | ### About CLUE benchmark 31 | 32 | Organization of Language Understanding Evaluation benchmark for Chinese: tasks & datasets, baselines, pre-trained Chinese models, corpus and leaderboard. 33 | 34 | Github: https://github.com/CLUEbenchmark 35 | Website: https://www.cluebenchmarks.com/ 36 | -------------------------------------------------------------------------------- /model_cards/clue/roberta_chinese_base/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: zh 3 | --- 4 | 5 | ## roberta_chinese_base 6 | 7 | ### Overview 8 | 9 | **Language model:** roberta-base 10 | **Model size:** 392M 11 | **Language:** Chinese 12 | **Training data:** [CLUECorpusSmall](https://github.com/CLUEbenchmark/CLUECorpus2020) 13 | **Eval data:** [CLUE dataset](https://github.com/CLUEbenchmark/CLUE) 14 | 15 | ### Results 16 | 17 | For results on downstream tasks like text classification, please refer to [this repository](https://github.com/CLUEbenchmark/CLUE). 18 | 19 | ### Usage 20 | 21 | **NOTE:** You have to call **BertTokenizer** instead of RobertaTokenizer !!! 22 | 23 | ``` 24 | import torch 25 | from transformers import BertTokenizer, BertModel 26 | tokenizer = BertTokenizer.from_pretrained("clue/roberta_chinese_base") 27 | roberta = BertModel.from_pretrained("clue/roberta_chinese_base") 28 | ``` 29 | 30 | ### About CLUE benchmark 31 | 32 | Organization of Language Understanding Evaluation benchmark for Chinese: tasks & datasets, baselines, pre-trained Chinese models, corpus and leaderboard. 33 | 34 | Github: https://github.com/CLUEbenchmark 35 | Website: https://www.cluebenchmarks.com/ 36 | -------------------------------------------------------------------------------- /model_cards/clue/roberta_chinese_large/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: zh 3 | --- 4 | 5 | ## roberta_chinese_large 6 | 7 | ### Overview 8 | 9 | **Language model:** roberta-large 10 | **Model size:** 1.2G 11 | **Language:** Chinese 12 | **Training data:** [CLUECorpusSmall](https://github.com/CLUEbenchmark/CLUECorpus2020) 13 | **Eval data:** [CLUE dataset](https://github.com/CLUEbenchmark/CLUE) 14 | 15 | ### Results 16 | 17 | For results on downstream tasks like text classification, please refer to [this repository](https://github.com/CLUEbenchmark/CLUE). 18 | 19 | ### Usage 20 | 21 | **NOTE:** You have to call **BertTokenizer** instead of RobertaTokenizer !!! 22 | 23 | ``` 24 | import torch 25 | from transformers import BertTokenizer, BertModel 26 | tokenizer = BertTokenizer.from_pretrained("clue/roberta_chinese_large") 27 | roberta = BertModel.from_pretrained("clue/roberta_chinese_large") 28 | ``` 29 | 30 | ### About CLUE benchmark 31 | 32 | Organization of Language Understanding Evaluation benchmark for Chinese: tasks & datasets, baselines, pre-trained Chinese models, corpus and leaderboard. 33 | 34 | Github: https://github.com/CLUEbenchmark 35 | Website: https://www.cluebenchmarks.com/ 36 | -------------------------------------------------------------------------------- /model_cards/clue/xlnet_chinese_large/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: zh 3 | --- 4 | 5 | ## xlnet_chinese_large 6 | 7 | ### Overview 8 | 9 | **Language model:** xlnet-large 10 | **Model size:** 1.3G 11 | **Language:** Chinese 12 | **Training data:** [CLUECorpusSmall](https://github.com/CLUEbenchmark/CLUECorpus2020) 13 | **Eval data:** [CLUE dataset](https://github.com/CLUEbenchmark/CLUE) 14 | 15 | ### Results 16 | 17 | For results on downstream tasks like text classification, please refer to [this repository](https://github.com/CLUEbenchmark/CLUE). 18 | 19 | ### Usage 20 | 21 | ``` 22 | import torch 23 | from transformers import XLNetTokenizer,XLNetModel 24 | tokenizer = XLNetTokenizer.from_pretrained("clue/xlnet_chinese_large") 25 | xlnet = XLNetModel.from_pretrained("clue/xlnet_chinese_large") 26 | ``` 27 | 28 | ### About CLUE benchmark 29 | 30 | Organization of Language Understanding Evaluation benchmark for Chinese: tasks & datasets, baselines, pre-trained Chinese models, corpus and leaderboard. 31 | 32 | Github: https://github.com/CLUEbenchmark 33 | Website: https://www.cluebenchmarks.com/ 34 | -------------------------------------------------------------------------------- /model_cards/daigo/bert-base-japanese-sentiment/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - ja 4 | --- 5 | 6 | binary classification 7 | 8 | # Usage 9 | ``` 10 | print(pipeline("sentiment-analysis",model="daigo/bert-base-japanese-sentiment",tokenizer="daigo/bert-base-japanese-sentiment")("私は幸福である。")) 11 | 12 | [{'label': 'ポジティブ', 'score': 0.98430425}] 13 | ``` 14 | -------------------------------------------------------------------------------- /model_cards/dccuchile/bert-base-spanish-wwm-cased/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: es 3 | --- 4 | -------------------------------------------------------------------------------- /model_cards/dccuchile/bert-base-spanish-wwm-uncased/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: es 3 | --- 4 | -------------------------------------------------------------------------------- /model_cards/deepset/bert-base-german-cased-oldvocab/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: de 3 | license: mit 4 | thumbnail: https://static.tildacdn.com/tild6438-3730-4164-b266-613634323466/german_bert.png 5 | tags: 6 | - exbert 7 | --- 8 | 9 | 10 | 11 | 12 | 13 | # German BERT with old vocabulary 14 | For details see the related [FARM issue](https://github.com/deepset-ai/FARM/issues/60). 15 | 16 | 17 | ## About us 18 | ![deepset logo](https://raw.githubusercontent.com/deepset-ai/FARM/master/docs/img/deepset_logo.png) 19 | 20 | We bring NLP to the industry via open source! 21 | Our focus: Industry specific language models & large scale QA systems. 22 | 23 | Some of our work: 24 | - [German BERT (aka "bert-base-german-cased")](https://deepset.ai/german-bert) 25 | - [FARM](https://github.com/deepset-ai/FARM) 26 | - [Haystack](https://github.com/deepset-ai/haystack/) 27 | 28 | Get in touch: 29 | [Twitter](https://twitter.com/deepset_ai) | [LinkedIn](https://www.linkedin.com/company/deepset-ai/) | [Website](https://deepset.ai) 30 | -------------------------------------------------------------------------------- /model_cards/deepset/sentence_bert/README.md: -------------------------------------------------------------------------------- 1 | This is an upload of the bert-base-nli-stsb-mean-tokens pretrained model from the Sentence Transformers Repo (https://github.com/UKPLab/sentence-transformers) 2 | -------------------------------------------------------------------------------- /model_cards/distilbert-base-cased-distilled-squad-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: "en" 3 | datasets: 4 | - squad 5 | metrics: 6 | - squad 7 | license: apache-2.0 8 | --- 9 | 10 | # DistilBERT base cased distilled SQuAD 11 | 12 | This model is a fine-tune checkpoint of [DistilBERT-base-cased](https://huggingface.co/distilbert-base-cased), fine-tuned using (a second step of) knowledge distillation on SQuAD v1.1. 13 | This model reaches a F1 score of 87.1 on the dev set (for comparison, BERT bert-base-cased version reaches a F1 score of 88.7). 14 | -------------------------------------------------------------------------------- /model_cards/distilbert-base-german-cased-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: de 3 | license: apache-2.0 4 | --- 5 | ## distilbert-base-german-cased 6 | -------------------------------------------------------------------------------- /model_cards/distilbert-base-uncased-finetuned-sst-2-english-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: en 3 | license: apache-2.0 4 | datasets: 5 | - sst-2 6 | --- 7 | 8 | # DistilBERT base uncased finetuned SST-2 9 | 10 | This model is a fine-tune checkpoint of [DistilBERT-base-uncased](https://huggingface.co/distilbert-base-uncased), fine-tuned on SST-2. 11 | This model reaches an accuracy of 91.3 on the dev set (for comparison, Bert bert-base-uncased version reaches an accuracy of 92.7). 12 | 13 | # Fine-tuning hyper-parameters 14 | 15 | - learning_rate = 1e-5 16 | - batch_size = 32 17 | - warmup = 600 18 | - max_seq_length = 128 19 | - num_train_epochs = 3.0 20 | -------------------------------------------------------------------------------- /model_cards/distilgpt2-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: en 3 | tags: 4 | - exbert 5 | 6 | license: apache-2.0 7 | datasets: 8 | - openwebtext 9 | --- 10 | 11 | # DistilGPT2 12 | 13 | DistilGPT2 English language model pretrained with the supervision of [GPT2](https://huggingface.co/gpt2) (the smallest version of GPT2) on [OpenWebTextCorpus](https://skylion007.github.io/OpenWebTextCorpus/), a reproduction of OpenAI's WebText dataset. The model has 6 layers, 768 dimension and 12 heads, totalizing 82M parameters (compared to 124M parameters for GPT2). On average, DistilGPT2 is two times faster than GPT2. 14 | 15 | On the [WikiText-103](https://blog.einstein.ai/the-wikitext-long-term-dependency-language-modeling-dataset/) benchmark, GPT2 reaches a perplexity on the test set of 16.3 compared to 21.1 for DistilGPT2 (after fine-tuning on the train set). 16 | 17 | We encourage to check [GPT2](https://huggingface.co/gpt2) to know more about usage, limitations and potential biases. 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /model_cards/djstrong/bg_cs_pl_ru_cased_L-12_H-768_A-12/README.md: -------------------------------------------------------------------------------- 1 | Slavic BERT from https://github.com/deepmipt/Slavic-BERT-NER http://files.deeppavlov.ai/deeppavlov_data/bg_cs_pl_ru_cased_L-12_H-768_A-12.tar.gz 2 | -------------------------------------------------------------------------------- /model_cards/facebook/bart-large-cnn/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - summarization 4 | 5 | license: mit 6 | thumbnail: https://huggingface.co/front/thumbnails/facebook.png 7 | --- 8 | -------------------------------------------------------------------------------- /model_cards/facebook/bart-large/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | license: mit 3 | thumbnail: https://huggingface.co/front/thumbnails/facebook.png 4 | --- 5 | 6 | The Bart model was proposed by Mike Lewis, Yinhan Liu, Naman Goyal, Marjan Ghazvininejad, Abdelrahman Mohamed, Omer Levy, Ves Stoyanov and Luke Zettlemoyer on 29 Oct, 2019. According to the abstract, 7 | 8 | Bart uses a standard seq2seq/machine translation architecture with a bidirectional encoder (like BERT) and a left-to-right decoder (like GPT). 9 | 10 | The pretraining task involves randomly shuffling the order of the original sentences and a novel in-filling scheme, where spans of text are replaced with a single mask token. 11 | 12 | BART is particularly effective when fine tuned for text generation but also works well for comprehension tasks. It matches the performance of RoBERTa with comparable training resources on GLUE and SQuAD, achieves new state-of-the-art results on a range of abstractive dialogue, question answering, and summarization tasks, with gains of up to 6 ROUGE. 13 | 14 | The Authors’ code can be found here: 15 | https://github.com/pytorch/fairseq/tree/master/examples/bart 16 | -------------------------------------------------------------------------------- /model_cards/facebook/rag-token-nq_new/README.md: -------------------------------------------------------------------------------- 1 | The model can be loaded and used as follows on [this branch](https://github.com/huggingface/transformers/tree/finalize_rag) as follows. 2 | 3 | 4 | # Load model 5 | 6 | ```python 7 | from transformers import RagTokenizer, RagTokenForGeneration, RagRetriever 8 | 9 | # create Retriever augmented model 10 | retriever = RagRetriever.from_pretrained("facebook/rag-token-nq_new", use_dummy_dataset=True) 11 | model = RagTokenForGeneration.from_pretrained("facebook/rag-token-nq_new", retriever=retriever) 12 | 13 | tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-nq_new") 14 | 15 | # create input ids and labels 16 | input_ids = tokenizer("who sings does he love me with reba", return_tensors="pt").input_ids 17 | 18 | # use labels 19 | labels = tokenizer.generator("Linda Davis", return_tensors="pt").input_ids 20 | 21 | 22 | # compute loss 23 | outputs = model(input_ids, labels=labels) 24 | ``` 25 | -------------------------------------------------------------------------------- /model_cards/flexudy/t5-base-multi-sentence-doctor/sent-banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/model_cards/flexudy/t5-base-multi-sentence-doctor/sent-banner.png -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-10_H-128_A-2/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-10_H-256_A-4/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-10_H-512_A-8/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-10_H-768_A-12/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-12_H-128_A-2/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-12_H-256_A-4/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-12_H-512_A-8/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-12_H-768_A-12/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-2_H-128_A-2/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-2_H-256_A-4/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-2_H-512_A-8/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-2_H-768_A-12/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-4_H-128_A-2/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-4_H-256_A-4/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-4_H-512_A-8/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-4_H-768_A-12/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-6_H-128_A-2/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-6_H-256_A-4/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-6_H-512_A-8/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-6_H-768_A-12/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-8_H-128_A-2/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-8_H-256_A-4/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-8_H-512_A-8/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-8_H-768_A-12/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/mobilebert-uncased/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: en 3 | thumbnail: https://huggingface.co/front/thumbnails/google.png 4 | 5 | license: apache-2.0 6 | --- 7 | 8 | ## MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices 9 | 10 | MobileBERT is a thin version of BERT_LARGE, while equipped with bottleneck structures and a carefully designed balance 11 | between self-attentions and feed-forward networks. 12 | 13 | This checkpoint is the original MobileBert Optimized Uncased English: 14 | [uncased_L-24_H-128_B-512_A-4_F-4_OPT](https://storage.googleapis.com/cloud-tpu-checkpoints/mobilebert/uncased_L-24_H-128_B-512_A-4_F-4_OPT.tar.gz) 15 | checkpoint. 16 | 17 | ## How to use MobileBERT in `transformers` 18 | 19 | ```python 20 | from transformers import pipeline 21 | 22 | fill_mask = pipeline( 23 | "fill-mask", 24 | model="google/mobilebert-uncased", 25 | tokenizer="google/mobilebert-uncased" 26 | ) 27 | 28 | print( 29 | fill_mask(f"HuggingFace is creating a {fill_mask.tokenizer.mask_token} that the community uses to solve NLP tasks.") 30 | ) 31 | 32 | ``` 33 | -------------------------------------------------------------------------------- /model_cards/gpt2-large-README.md: -------------------------------------------------------------------------------- 1 | Test the full generation capabilities here: https://transformer.huggingface.co/doc/gpt2-large 2 | -------------------------------------------------------------------------------- /model_cards/gpt2-medium-README.md: -------------------------------------------------------------------------------- 1 | Test the full generation capabilities here: https://transformer.huggingface.co/doc/gpt2-large 2 | -------------------------------------------------------------------------------- /model_cards/gpt2-xl-README.md: -------------------------------------------------------------------------------- 1 | Test the whole generation capabilities here: https://transformer.huggingface.co/doc/gpt2-large 2 | -------------------------------------------------------------------------------- /model_cards/healx/gpt-2-pubmed-large/README.md: -------------------------------------------------------------------------------- 1 | GPT-2 (774M model) finetuned on 0.5m PubMed abstracts. Used in the [writemeanabstract.com](writemeanabstract.com) and the following preprint: 2 | 3 | [Papanikolaou, Yannis, and Andrea Pierleoni. "DARE: Data Augmented Relation Extraction with GPT-2." arXiv preprint arXiv:2004.13845 (2020).](https://arxiv.org/abs/2004.13845) 4 | -------------------------------------------------------------------------------- /model_cards/healx/gpt-2-pubmed-medium/README.md: -------------------------------------------------------------------------------- 1 | GPT-2 (355M model) finetuned on 0.5m PubMed abstracts. Used in the [writemeanabstract.com](writemeanabstract.com) and the following preprint: 2 | 3 | [Papanikolaou, Yannis, and Andrea Pierleoni. "DARE: Data Augmented Relation Extraction with GPT-2." arXiv preprint arXiv:2004.13845 (2020).](https://arxiv.org/abs/2004.13845) 4 | -------------------------------------------------------------------------------- /model_cards/huawei-noah/DynaBERT_MNLI/README.md: -------------------------------------------------------------------------------- 1 | ## DynaBERT: Dynamic BERT with Adaptive Width and Depth 2 | 3 | * DynaBERT can flexibly adjust the size and latency by selecting adaptive width and depth, and 4 | the subnetworks of it have competitive performances as other similar-sized compressed models. 5 | The training process of DynaBERT includes first training a width-adaptive BERT and then 6 | allowing both adaptive width and depth using knowledge distillation. 7 | 8 | * This code is modified based on the repository developed by Hugging Face: [Transformers v2.1.1](https://github.com/huggingface/transformers/tree/v2.1.1), and is released in [GitHub](https://github.com/huawei-noah/Pretrained-Language-Model/tree/master/DynaBERT). 9 | 10 | ### Reference 11 | Lu Hou, Zhiqi Huang, Lifeng Shang, Xin Jiang, Xiao Chen, Qun Liu. 12 | [DynaBERT: Dynamic BERT with Adaptive Width and Depth](https://arxiv.org/abs/2004.04037). 13 | ``` 14 | @inproceedings{hou2020dynabert, 15 | title = {DynaBERT: Dynamic BERT with Adaptive Width and Depth}, 16 | author = {Lu Hou, Zhiqi Huang, Lifeng Shang, Xin Jiang, Xiao Chen, Qun Liu}, 17 | booktitle = {Advances in Neural Information Processing Systems}, 18 | year = {2020} 19 | } 20 | ``` 21 | -------------------------------------------------------------------------------- /model_cards/huawei-noah/DynaBERT_SST-2/README.md: -------------------------------------------------------------------------------- 1 | ## DynaBERT: Dynamic BERT with Adaptive Width and Depth 2 | 3 | * DynaBERT can flexibly adjust the size and latency by selecting adaptive width and depth, and 4 | the subnetworks of it have competitive performances as other similar-sized compressed models. 5 | The training process of DynaBERT includes first training a width-adaptive BERT and then 6 | allowing both adaptive width and depth using knowledge distillation. 7 | 8 | * This code is modified based on the repository developed by Hugging Face: [Transformers v2.1.1](https://github.com/huggingface/transformers/tree/v2.1.1), and is released in [GitHub](https://github.com/huawei-noah/Pretrained-Language-Model/tree/master/DynaBERT). 9 | 10 | ### Reference 11 | Lu Hou, Zhiqi Huang, Lifeng Shang, Xin Jiang, Xiao Chen, Qun Liu. 12 | [DynaBERT: Dynamic BERT with Adaptive Width and Depth](https://arxiv.org/abs/2004.04037). 13 | ``` 14 | @inproceedings{hou2020dynabert, 15 | title = {DynaBERT: Dynamic BERT with Adaptive Width and Depth}, 16 | author = {Lu Hou, Zhiqi Huang, Lifeng Shang, Xin Jiang, Xiao Chen, Qun Liu}, 17 | booktitle = {Advances in Neural Information Processing Systems}, 18 | year = {2020} 19 | } 20 | ``` 21 | -------------------------------------------------------------------------------- /model_cards/iarfmoose/roberta-base-bulgarian/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: bg 3 | --- 4 | 5 | # RoBERTa-base-bulgarian 6 | 7 | 8 | The RoBERTa model was originally introduced in [this paper](https://arxiv.org/abs/1907.11692). This is a version of [RoBERTa-base](https://huggingface.co/roberta-base) pretrained on Bulgarian text. 9 | 10 | ## Intended uses 11 | 12 | This model can be used for cloze tasks (masked language modeling) or finetuned on other tasks in Bulgarian. 13 | 14 | ## Limitations and bias 15 | 16 | The training data is unfiltered text from the internet and may contain all sorts of biases. 17 | 18 | ## Training data 19 | 20 | This model was trained on the following data: 21 | - [bg_dedup from OSCAR](https://oscar-corpus.com/) 22 | - [Newscrawl 1 million sentences 2017 from Leipzig Corpora Collection](https://wortschatz.uni-leipzig.de/en/download/bulgarian) 23 | - [Wikipedia 1 million sentences 2016 from Leipzig Corpora Collection](https://wortschatz.uni-leipzig.de/en/download/bulgarian) 24 | 25 | ## Training procedure 26 | 27 | The model was pretrained using a masked language-modeling objective with dynamic masking as described [here](https://huggingface.co/roberta-base#preprocessing) 28 | 29 | It was trained for 200k steps. The batch size was limited to 8 due to GPU memory limitations. 30 | -------------------------------------------------------------------------------- /model_cards/illuin/lepetit/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: fr 3 | thumbnail: https://miro.medium.com/max/700/1*MoPnD6vA9wTHjdLfW7POyw.png 4 | widget: 5 | - text: "Le camembert LePetit c'est le ." 6 | - text: "Salut les ça va ?" 7 | license: gpl-3.0 8 | tags: 9 | - masked-lm 10 | --- 11 | 12 | # LePetit: A pre-training efficient and lightning fast French Language Model 13 | 14 | See [blogpost](https://medium.com/illuin/lepetit-a-pre-training-efficient-and-lightning-fast-french-language-model-96495ad726b3) 15 | 16 | -------------------------------------------------------------------------------- /model_cards/ipuneetrathore/bert-base-cased-finetuned-finBERT/README.md: -------------------------------------------------------------------------------- 1 | ## FinBERT 2 | 3 | Code for importing and using this model is available [here](https://github.com/ipuneetrathore/BERT_models) 4 | -------------------------------------------------------------------------------- /model_cards/jannesg/bertsson/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: sv 3 | --- 4 | 5 | # BERTSSON Models 6 | 7 | The models are trained on: 8 | - Government Text 9 | - Swedish Literature 10 | - Swedish News 11 | 12 | Corpus size: Roughly 6B tokens. 13 | 14 | The following models are currently available: 15 | 16 | - **bertsson** - A BERT base model trained with the same hyperparameters as first published by Google. 17 | 18 | All models are cased and trained with whole word masking. 19 | 20 | Stay tuned for evaluations. 21 | -------------------------------------------------------------------------------- /model_cards/jimregan/BERTreach/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: ga 3 | tags: 4 | - irish 5 | --- 6 | 7 | ## BERTreach 8 | 9 | ([beirtreach](https://www.teanglann.ie/en/fgb/beirtreach) means 'oyster bed') 10 | 11 | **Model size:** 84M 12 | 13 | **Training data:** 14 | * [PARSEME 1.2](https://gitlab.com/parseme/parseme_corpus_ga/-/blob/master/README.md) 15 | * Newscrawl 300k portion of the [Leipzig Corpora](https://wortschatz.uni-leipzig.de/en/download/irish) 16 | * Private news corpus crawled with [Corpus Crawler](https://github.com/google/corpuscrawler) 17 | 18 | (2125804 sentences, 47419062 tokens, as reckoned by wc) 19 | 20 | ``` 21 | from transformers import pipeline 22 | fill_mask = pipeline("fill-mask", model="jimregan/BERTreach", tokenizer="jimregan/BERTreach") 23 | ``` 24 | -------------------------------------------------------------------------------- /model_cards/jme-p/shrugging-grace-tweet-classifier/README.md: -------------------------------------------------------------------------------- 1 | # shrugging-grace/tweetclassifier 2 | 3 | ## Model description 4 | This model classifies tweets as either relating to the Covid-19 pandemic or not. 5 | 6 | ## Intended uses & limitations 7 | It is intended to be used on tweets commenting on UK politics, in particular those trending with the #PMQs hashtag, as this refers to weekly Prime Ministers' Questions. 8 | 9 | #### How to use 10 | ``LABEL_0`` means that the tweet relates to Covid-19 11 | 12 | ``LABEL_1`` means that the tweet does not relate to Covid-19 13 | 14 | ## Training data 15 | The model was trained on 1000 tweets (with the "#PMQs'), which were manually labeled by the author. The tweets were collected between May-July 2020. 16 | 17 | ### BibTeX entry and citation info 18 | 19 | This was based on a pretrained version of BERT. 20 | 21 | @article{devlin2018bert, 22 | title={Bert: Pre-training of deep bidirectional transformers for language understanding}, 23 | author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina}, 24 | journal={arXiv preprint arXiv:1810.04805}, 25 | year={2018} 26 | } 27 | -------------------------------------------------------------------------------- /model_cards/jordimas/julibert/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: ca 3 | --- 4 | 5 | ## Introduction 6 | 7 | 8 | Download the model here: 9 | 10 | * Catalan Roberta model: [julibert-2020-11-10.zip](https://www.softcatala.org/pub/softcatala/julibert/julibert-2020-11-10.zip) 11 | 12 | ## What's this? 13 | 14 | Source code: https://github.com/Softcatala/julibert 15 | 16 | * Corpus: Oscar Catalan Corpus (3,8G) 17 | * Model type: Roberta 18 | * Vocabulary size: 50265 19 | * Steps: 500000 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /model_cards/julien-c/bert-xsmall-dummy/README.md: -------------------------------------------------------------------------------- 1 | ## How to build a dummy model 2 | 3 | 4 | ```python 5 | from transformers BertConfig, BertForMaskedLM, BertTokenizer, TFBertForMaskedLM 6 | 7 | SMALL_MODEL_IDENTIFIER = "julien-c/bert-xsmall-dummy" 8 | DIRNAME = "./bert-xsmall-dummy" 9 | 10 | config = BertConfig(10, 20, 1, 1, 40) 11 | 12 | model = BertForMaskedLM(config) 13 | model.save_pretrained(DIRNAME) 14 | 15 | tf_model = TFBertForMaskedLM.from_pretrained(DIRNAME, from_pt=True) 16 | tf_model.save_pretrained(DIRNAME) 17 | 18 | # Slightly different for tokenizer. 19 | # tokenizer = BertTokenizer.from_pretrained(DIRNAME) 20 | # tokenizer.save_pretrained() 21 | ``` 22 | -------------------------------------------------------------------------------- /model_cards/keshan/SinhalaBERTo/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: si 3 | tags: 4 | - SinhalaBERTo 5 | - Sinhala 6 | - roberta 7 | datasets: 8 | - oscar 9 | --- 10 | ### Overview 11 | 12 | This is a slightly smaller model trained on [OSCAR](https://oscar-corpus.com/) Sinhala dedup dataset. As Sinhala is one of those low resource languages, there are only a handful of models been trained. So, this would be a great place to start training for more downstream tasks. 13 | 14 | ## Model Specification 15 | 16 | 17 | The model chosen for training is [Roberta](https://arxiv.org/abs/1907.11692) with the following specifications: 18 | 1. vocab_size=52000 19 | 2. max_position_embeddings=514 20 | 3. num_attention_heads=12 21 | 4. num_hidden_layers=6 22 | 5. type_vocab_size=1 23 | 24 | ## How to Use 25 | You can use this model directly with a pipeline for masked language modeling: 26 | 27 | ```py 28 | from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline 29 | 30 | model = BertForMaskedLM.from_pretrained("keshan/SinhalaBERTo") 31 | tokenizer = BertTokenizer.from_pretrained("keshan/SinhalaBERTo") 32 | 33 | fill_mask = pipeline('fill-mask', model=model, tokenizer=tokenizer) 34 | 35 | fill_mask("මම ගෙදර .") 36 | 37 | ``` 38 | -------------------------------------------------------------------------------- /model_cards/kuppuluri/telugu_bertu/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: te 3 | --- 4 | # telugu_bertu 5 | 6 | ## Model description 7 | 8 | This model is a BERT MLM model trained on Telugu. 9 | 10 | ## Intended uses & limitations 11 | 12 | #### How to use 13 | 14 | ```python 15 | from transformers import AutoModelWithLMHead, AutoTokenizer, pipeline 16 | tokenizer = AutoTokenizer.from_pretrained("kuppuluri/telugu_bertu", 17 | clean_text=False, 18 | handle_chinese_chars=False, 19 | strip_accents=False, 20 | wordpieces_prefix='##') 21 | model = AutoModelWithLMHead.from_pretrained("kuppuluri/telugu_bertu") 22 | fill_mask = pipeline("fill-mask", model=model, tokenizer=tokenizer) 23 | results = fill_mask("మక్దూంపల్లి పేరుతో చాలా [MASK] ఉన్నాయి.") 24 | ``` 25 | -------------------------------------------------------------------------------- /model_cards/kuppuluri/telugu_bertu_ner/README.md: -------------------------------------------------------------------------------- 1 | # Named Entity Recognition Model for Telugu 2 | 3 | #### How to use 4 | 5 | ```python 6 | from simpletransformers.ner import NERModel 7 | model = NERModel('bert', 8 | 'kuppuluri/telugu_bertu_ner', 9 | labels=[ 10 | 'B-PERSON', 'I-ORG', 'B-ORG', 'I-LOC', 'B-MISC', 11 | 'I-MISC', 'I-PERSON', 'B-LOC', 'O' 12 | ], 13 | use_cuda=False, 14 | args={"use_multiprocessing": False}) 15 | 16 | text = "విరాట్ కోహ్లీ కూడా అదే నిర్లక్ష్యాన్ని ప్రదర్శించి కేవలం ఒక పరుగుకే రనౌటై పెవిలియన్ చేరాడు ." 17 | results = model.predict([text]) 18 | ``` 19 | 20 | ## Training data 21 | 22 | Training data is from https://github.com/anikethjr/NER_Telugu 23 | 24 | ## Eval results 25 | 26 | On the test set my results were 27 | 28 | eval_loss = 0.0004407190410447974 29 | 30 | f1_score = 0.999519076627124 31 | 32 | precision = 0.9994389677005691 33 | 34 | recall = 0.9995991983967936 35 | 36 | -------------------------------------------------------------------------------- /model_cards/kuppuluri/telugu_bertu_pos/README.md: -------------------------------------------------------------------------------- 1 | # Part of Speech tagging Model for Telugu 2 | 3 | #### How to use 4 | 5 | ```python 6 | from simpletransformers.ner import NERModel 7 | model = NERModel('bert', 8 | 'kuppuluri/telugu_bertu_pos', 9 | args={"use_multiprocessing": False}, 10 | labels=[ 11 | 'QC', 'JJ', 'NN', 'QF', 'RDP', 'O', 12 | 'NNO', 'PRP', 'RP', 'VM', 'WQ', 13 | 'PSP', 'UT', 'CC', 'INTF', 'SYMP', 14 | 'NNP', 'INJ', 'SYM', 'CL', 'QO', 15 | 'DEM', 'RB', 'NST', ], 16 | use_cuda=False) 17 | 18 | text = "విరాట్ కోహ్లీ కూడా అదే నిర్లక్ష్యాన్ని ప్రదర్శించి కేవలం ఒక పరుగుకే రనౌటై పెవిలియన్ చేరాడు ." 19 | results = model.predict([text]) 20 | ``` 21 | 22 | ## Training data 23 | 24 | Training data is from https://github.com/anikethjr/NER_Telugu 25 | 26 | ## Eval results 27 | 28 | On the test set my results were 29 | 30 | eval_loss = 0.0036797842364565416 31 | 32 | f1_score = 0.9983795127912227 33 | 34 | precision = 0.9984325602401637 35 | 36 | recall = 0.9983264709788816 37 | -------------------------------------------------------------------------------- /model_cards/kuppuluri/telugu_bertu_tydiqa/README.md: -------------------------------------------------------------------------------- 1 | # Telugu Question-Answering model trained on Tydiqa dataset from Google 2 | 3 | #### How to use 4 | 5 | ```python 6 | from transformers.pipelines import pipeline, AutoModelForQuestionAnswering, AutoTokenizer 7 | model = AutoModelForQuestionAnswering.from_pretrained(model_name) 8 | tokenizer = AutoTokenizer.from_pretrained("kuppuluri/telugu_bertu_tydiqa", 9 | clean_text=False, 10 | handle_chinese_chars=False, 11 | strip_accents=False, 12 | wordpieces_prefix='##') 13 | nlp = pipeline('question-answering', model=model, tokenizer=tokenizer) 14 | result = nlp({'question': question, 'context': context}) 15 | ``` 16 | 17 | ## Training data 18 | I used Tydiqa Telugu data from Google https://github.com/google-research-datasets/tydiqa 19 | -------------------------------------------------------------------------------- /model_cards/lanwuwei/GigaBERT-v3-Arabic-and-English/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - en 4 | - ar 5 | datasets: 6 | - gigaword 7 | - oscar 8 | - wikipedia 9 | --- 10 | 11 | ## GigaBERT-v3 12 | GigaBERT-v3 is a customized bilingual BERT for English and Arabic. It was pre-trained in a large-scale corpus (Gigaword+Oscar+Wikipedia) with ~10B tokens, showing state-of-the-art zero-shot transfer performance from English to Arabic on information extraction (IE) tasks. More details can be found in the following paper: 13 | 14 | @inproceedings{lan2020gigabert, 15 | author = {Lan, Wuwei and Chen, Yang and Xu, Wei and Ritter, Alan}, 16 | title = {GigaBERT: Zero-shot Transfer Learning from English to Arabic}, 17 | booktitle = {Proceedings of The 2020 Conference on Empirical Methods on Natural Language Processing (EMNLP)}, 18 | year = {2020} 19 | } 20 | 21 | ## Usage 22 | ``` 23 | from transformers import * 24 | tokenizer = BertTokenizer.from_pretrained("lanwuwei/GigaBERT-v3-Arabic-and-English", do_lower_case=True) 25 | model = BertForTokenClassification.from_pretrained("lanwuwei/GigaBERT-v3-Arabic-and-English") 26 | ``` 27 | More code examples can be found [here](https://github.com/lanwuwei/GigaBERT). 28 | -------------------------------------------------------------------------------- /model_cards/lvwerra/bert-imdb/README.md: -------------------------------------------------------------------------------- 1 | # BERT-IMDB 2 | 3 | ## What is it? 4 | BERT (`bert-large-cased`) trained for sentiment classification on the [IMDB dataset](https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews). 5 | 6 | ## Training setting 7 | 8 | The model was trained on 80% of the IMDB dataset for sentiment classification for three epochs with a learning rate of `1e-5` with the `simpletransformers` library. The library uses a learning rate schedule. 9 | 10 | ## Result 11 | The model achieved 90% classification accuracy on the validation set. 12 | 13 | ## Reference 14 | The full experiment is available in the [tlr repo](https://lvwerra.github.io/trl/03-bert-imdb-training/). 15 | -------------------------------------------------------------------------------- /model_cards/lvwerra/gpt2-imdb/README.md: -------------------------------------------------------------------------------- 1 | # GPT2-IMDB 2 | 3 | ## What is it? 4 | A GPT2 (`gpt2`) language model fine-tuned on the [IMDB dataset](https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews). 5 | 6 | ## Training setting 7 | 8 | The GPT2 language model was fine-tuned for 1 epoch on the IMDB dataset. All comments were joined into a single text file separated by the EOS token: 9 | 10 | ``` 11 | import pandas as pd 12 | df = pd.read_csv("imdb-dataset.csv") 13 | imdb_str = " <|endoftext|> ".join(df['review'].tolist()) 14 | 15 | with open ('imdb.txt', 'w') as f: 16 | f.write(imdb_str) 17 | ``` 18 | 19 | To train the model the `run_language_modeling.py` script in the `transformer` library was used: 20 | 21 | ``` 22 | python run_language_modeling.py 23 | --train_data_file imdb.txt 24 | --output_dir gpt2-imdb 25 | --model_type gpt2 26 | --model_name_or_path gpt2 27 | ``` 28 | -------------------------------------------------------------------------------- /model_cards/lvwerra/gpt2-medium-taboo/README.md: -------------------------------------------------------------------------------- 1 | # GPT-2 (medium) Taboo 2 | 3 | ## What is it? 4 | A fine-tuned GPT-2 version for Taboo cards generation. 5 | 6 | ## Training setting 7 | 8 | The model was trained on ~900 Taboo cards in the following format for 100 epochs: 9 | ``` 10 | Describe the word Glitch without using the words Problem, Unexpected, Technology, Minor, Outage. 11 | ```` 12 | 13 | -------------------------------------------------------------------------------- /model_cards/lysandre/arxiv-nlp/README.md: -------------------------------------------------------------------------------- 1 | # ArXiv-NLP GPT-2 checkpoint 2 | 3 | This is a GPT-2 small checkpoint for PyTorch. It is the official `gpt2-small` fine-tuned to ArXiv paper on the computational linguistics field. 4 | 5 | ## Training data 6 | 7 | This model was trained on a subset of ArXiv papers that were parsed from PDF to txt. The resulting data is made of 80MB of text from the computational linguistics (cs.CL) field. -------------------------------------------------------------------------------- /model_cards/lysandre/arxiv/README.md: -------------------------------------------------------------------------------- 1 | # ArXiv GPT-2 checkpoint 2 | 3 | This is a GPT-2 small checkpoint for PyTorch. It is the official `gpt2-small` finetuned to ArXiv paper on physics fields. 4 | 5 | ## Training data 6 | 7 | This model was trained on a subset of ArXiv papers that were parsed from PDF to txt. The resulting data is made of 130MB of text, mostly from quantum physics (quant-ph) and other physics sub-fields. 8 | -------------------------------------------------------------------------------- /model_cards/m3hrdadfi/bert2bert-fa-news-headline/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: fa 3 | license: apache-2.0 4 | tags: 5 | - summarization 6 | --- 7 | 8 | A Bert2Bert model on VoA Persian Corpus (a medium-sized corpus of 7.9 million words, 2003-2008) generates headlines. The model achieved a 25.30 ROUGE-2 score. 9 | 10 | For more detail, please follow the [News Headline Generation](https://github.com/m3hrdadfi/news-headline-generation) repo. 11 | 12 | 13 | ## Eval results 14 | The following table summarizes the ROUGE scores obtained by the Bert2Bert model. 15 | 16 | | % | Precision | Recall | FMeasure | 17 | |:-------:|:---------:|:------:|:--------:| 18 | | ROUGE-1 | 43.78 | 45.52 | 43.54 | 19 | | ROUGE-2 | 24.50 | 25.30* | 24.24 | 20 | | ROUGE-L | 41.20 | 42.22 | 40.76 | 21 | 22 | 23 | ## Questions? 24 | Post a Github issue on the [News Headline Generation](https://github.com/hooshvare/news-headline-generation/issues) repo. 25 | -------------------------------------------------------------------------------- /model_cards/m3hrdadfi/bert2bert-fa-wiki-summary/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: fa 3 | license: apache-2.0 4 | tags: 5 | - summarization 6 | --- 7 | 8 | A Bert2Bert model on the Wiki Summary dataset to summarize articles. The model achieved an 8.47 ROUGE-2 score. 9 | 10 | For more detail, please follow the [Wiki Summary](https://github.com/m3hrdadfi/wiki-summary) repo. 11 | 12 | 13 | ## Eval results 14 | The following table summarizes the ROUGE scores obtained by the Bert2Bert model. 15 | 16 | | % | Precision | Recall | FMeasure | 17 | |:-------:|:---------:|:------:|:--------:| 18 | | ROUGE-1 | 28.14 | 30.86 | 27.34 | 19 | | ROUGE-2 | 07.12 | 08.47* | 07.10 | 20 | | ROUGE-L | 28.49 | 25.87 | 25.50 | 21 | 22 | 23 | ## Questions? 24 | Post a Github issue on the [Wiki Summary](https://github.com/m3hrdadfi/wiki-summary/issues) repo. 25 | -------------------------------------------------------------------------------- /model_cards/monsoon-nlp/dv-wave/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: dv 3 | --- 4 | 5 | # dv-wave 6 | 7 | This is a second attempt at a Dhivehi language model trained with 8 | Google Research's [ELECTRA](https://github.com/google-research/electra). 9 | 10 | Tokenization and pre-training CoLab: https://colab.research.google.com/drive/1ZJ3tU9MwyWj6UtQ-8G7QJKTn-hG1uQ9v?usp=sharing 11 | 12 | Using SimpleTransformers to classify news https://colab.research.google.com/drive/1KnyQxRNWG_yVwms_x9MUAqFQVeMecTV7?usp=sharing 13 | 14 | V1: similar performance to mBERT on news classification task after finetuning for 3 epochs (52%) 15 | 16 | V2: fixed tokenizers ```do_lower_case=False``` and ```strip_accents=False``` to preserve vowel signs of Dhivehi 17 | dv-wave: 89% to mBERT: 52% 18 | 19 | ## Corpus 20 | 21 | Trained on @Sofwath's 307MB corpus of Dhivehi text: https://github.com/Sofwath/DhivehiDatasets - this repo also contains the news classification task CSV 22 | 23 | [OSCAR](https://oscar-corpus.com/) was considered but has not been added to pretraining; as of 24 | this writing their web crawl has 126MB of Dhivehi text (79MB deduped). 25 | 26 | ## Vocabulary 27 | 28 | Included as vocab.txt in the upload - vocab_size is 29874 29 | -------------------------------------------------------------------------------- /model_cards/mrm8488/GuaPeTe-2-tiny/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: es 3 | widget: 4 | - text: "Murcia es la huerta de Europa porque" 5 | --- 6 | 7 | #GuaPeTe-2-tiny: A proof of concept tiny GPT-2 like model trained on Spanish Wikipedia corpus 8 | -------------------------------------------------------------------------------- /model_cards/mrm8488/RoBERTinha/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: gl 3 | widget: 4 | - text: "Galicia é unha autónoma española." 5 | - text: "A lingua oficial de Galicia é o ." 6 | --- 7 | 8 | # RoBERTinha: RoBERTa-like Language model trained on OSCAR Galician corpus 9 | -------------------------------------------------------------------------------- /model_cards/mrm8488/RoBasquERTa/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: eu 3 | widget: 4 | - text: "Euskara da Euskal Herriko ofiziala" 5 | - text: "Gaur egun, Euskadik Espainia osoko ekonomia du" 6 | --- 7 | 8 | # RoBasquERTa: RoBERTa-like Language model trained on OSCAR Basque corpus 9 | -------------------------------------------------------------------------------- /model_cards/mrm8488/RuPERTa-base-finetuned-pawsx-es/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: es 3 | datasets: 4 | - xtreme 5 | widget: 6 | - text: "En 2009 se mudó a Filadelfia y en la actualidad vive en Nueva York. Se mudó nuevamente a Filadelfia en 2009 y ahora vive en la ciudad de Nueva York." 7 | --- 8 | 9 | # RuPERTa-base fine-tuned on PAWS-X-es for Paraphrase Identification 10 | -------------------------------------------------------------------------------- /model_cards/mrm8488/RuPERTa-base-finetuned-squadv1/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: es 3 | datasets: 4 | - squad 5 | --- 6 | -------------------------------------------------------------------------------- /model_cards/mrm8488/RuPERTa-base-finetuned-squadv2/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: es 3 | datasets: 4 | - squad_v2 5 | --- 6 | -------------------------------------------------------------------------------- /model_cards/mrm8488/bert-base-german-dbmdz-cased-finetuned-pawsx-de/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: de 3 | datasets: 4 | - xtreme 5 | widget: 6 | - text: "Winarsky ist Mitglied des IEEE, Phi Beta Kappa, des ACM und des Sigma Xi. Winarsky ist Mitglied des ACM, des IEEE, der Phi Beta Kappa und der Sigma Xi." 7 | --- 8 | 9 | # bert-base-german-dbmdz-cased fine-tuned on PAWS-X-de for Paraphrase Identification 10 | -------------------------------------------------------------------------------- /model_cards/mrm8488/camembert-base-finetuned-pawsx-fr/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: fr 3 | datasets: 4 | - xtreme 5 | widget: 6 | - text: "La première série a été mieux reçue par la critique que la seconde. La seconde série a été bien accueillie par la critique, mieux que la première." 7 | --- 8 | 9 | # Camembert-base fine-tuned on PAWS-X-fr for Paraphrase Identification 10 | -------------------------------------------------------------------------------- /model_cards/mrm8488/electricidad-base-finetuned-pawsx-es/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: es 3 | datasets: 4 | - xtreme 5 | widget: 6 | - text: "El río Tabaci es una vertiente del río Leurda en Rumania. El río Leurda es un afluente del río Tabaci en Rumania." 7 | --- 8 | 9 | # Electricidad-base fine-tuned on PAWS-X-es for Paraphrase Identification 10 | -------------------------------------------------------------------------------- /model_cards/mrm8488/gpt2-finetuned-recipes-cooking/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: en 3 | thumbnail: 4 | widget: 5 | - text: "HuggingFace Cake:" 6 | --- 7 | -------------------------------------------------------------------------------- /model_cards/mrm8488/gpt2-finetuned-recipes-cooking_v2/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: en 3 | thumbnail: 4 | widget: 5 | - text: "HuggingFace Cake:" 6 | --- 7 | -------------------------------------------------------------------------------- /model_cards/mys/electra-base-turkish-cased-ner/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: tr 3 | --- 4 | 5 | ## What is this 6 | 7 | A NER model for Turkish with 48 categories trained on the dataset [Shrinked TWNERTC Turkish NER Data](https://www.kaggle.com/behcetsenturk/shrinked-twnertc-turkish-ner-data-by-kuzgunlar) by Behçet Şentürk, which is itself a filtered and cleaned version of the following automatically labeled dataset: 8 | 9 | > Sahin, H. Bahadir; Eren, Mustafa Tolga; Tirkaz, Caglar; Sonmez, Ozan; Yildiz, Eray (2017), “English/Turkish Wikipedia Named-Entity Recognition and Text Categorization Dataset”, Mendeley Data, v1 http://dx.doi.org/10.17632/cdcztymf4k.1 10 | 11 | ## Backbone model 12 | 13 | The backbone model is [electra-base-turkish-cased-discriminator](https://huggingface.co/dbmdz/electra-base-turkish-cased-discriminator), and I finetuned it for token classification. 14 | 15 | I'm continuing to figure out if it is possible to improve accuracy with this dataset, but it is already usable for non-critic applications. You can reach out to me on [Twitter](https://twitter.com/myusufsarigoz) for discussions and issues. 16 | I will also release a notebook to finetune NER models with Shrinked TWNERTC as well as sample inference code to demonstrate what's possible with this model. 17 | -------------------------------------------------------------------------------- /model_cards/ncoop57/bart-base-code-summarizer-java-v0/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - summarization 4 | 5 | license: mit 6 | --- 7 | 8 | ## ncoop57/bart-base-code-summarizer-java-v0 9 | -------------------------------------------------------------------------------- /model_cards/nikokons/gpt2-greek/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: el 3 | --- 4 | 5 | ## gpt2-greek 6 | -------------------------------------------------------------------------------- /model_cards/nyu-mll/roberta-base-100M-1/README.md: -------------------------------------------------------------------------------- 1 | ../roberta_1M_to_1B/README.md -------------------------------------------------------------------------------- /model_cards/nyu-mll/roberta-base-100M-2/README.md: -------------------------------------------------------------------------------- 1 | ../roberta_1M_to_1B/README.md -------------------------------------------------------------------------------- /model_cards/nyu-mll/roberta-base-100M-3/README.md: -------------------------------------------------------------------------------- 1 | ../roberta_1M_to_1B/README.md -------------------------------------------------------------------------------- /model_cards/nyu-mll/roberta-base-10M-1/README.md: -------------------------------------------------------------------------------- 1 | ../roberta_1M_to_1B/README.md -------------------------------------------------------------------------------- /model_cards/nyu-mll/roberta-base-10M-2/README.md: -------------------------------------------------------------------------------- 1 | ../roberta_1M_to_1B/README.md -------------------------------------------------------------------------------- /model_cards/nyu-mll/roberta-base-10M-3/README.md: -------------------------------------------------------------------------------- 1 | ../roberta_1M_to_1B/README.md -------------------------------------------------------------------------------- /model_cards/nyu-mll/roberta-base-1B-1/README.md: -------------------------------------------------------------------------------- 1 | ../roberta_1M_to_1B/README.md -------------------------------------------------------------------------------- /model_cards/nyu-mll/roberta-base-1B-2/README.md: -------------------------------------------------------------------------------- 1 | ../roberta_1M_to_1B/README.md -------------------------------------------------------------------------------- /model_cards/nyu-mll/roberta-base-1B-3/README.md: -------------------------------------------------------------------------------- 1 | ../roberta_1M_to_1B/README.md -------------------------------------------------------------------------------- /model_cards/nyu-mll/roberta-med-small-1M-1/README.md: -------------------------------------------------------------------------------- 1 | ../roberta_1M_to_1B/README.md -------------------------------------------------------------------------------- /model_cards/nyu-mll/roberta-med-small-1M-2/README.md: -------------------------------------------------------------------------------- 1 | ../roberta_1M_to_1B/README.md -------------------------------------------------------------------------------- /model_cards/nyu-mll/roberta-med-small-1M-3/README.md: -------------------------------------------------------------------------------- 1 | ../roberta_1M_to_1B/README.md -------------------------------------------------------------------------------- /model_cards/patrickvonplaten/bert2bert_cnn_daily_mail/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: en 3 | license: apache-2.0 4 | datasets: 5 | - cnn_dailymail 6 | tags: 7 | - summarization 8 | --- 9 | 10 | Bert2Bert Summarization with 🤗EncoderDecoder Framework 11 | This model is a warm-started *BERT2BERT* model fine-tuned on the *CNN/Dailymail* summarization dataset. 12 | 13 | The model achieves a **18.22** ROUGE-2 score on *CNN/Dailymail*'s test dataset. 14 | 15 | For more details on how the model was fine-tuned, please refer to 16 | [this](https://colab.research.google.com/drive/1Ekd5pUeCX7VOrMx94_czTkwNtLN32Uyu?usp=sharing) notebook. 17 | -------------------------------------------------------------------------------- /model_cards/patrickvonplaten/roberta_shared_bbc_xsum/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: en 3 | license: apache-2.0 4 | datasets: 5 | - xsum 6 | tags: 7 | - summarization 8 | --- 9 | 10 | Shared RoBERTa2RoBERTa Summarization with 🤗EncoderDecoder Framework 11 | This model is a warm-started *RoBERTaShared* model fine-tuned on the *BBC XSum* summarization dataset. 12 | 13 | The model achieves a **16.89** ROUGE-2 score on *BBC XSUM*'s test dataset. 14 | 15 | For more details on how the model was fine-tuned, please refer to 16 | [this](https://colab.research.google.com/drive/1Ekd5pUeCX7VOrMx94_czTkwNtLN32Uyu?usp=sharing) notebook. 17 | -------------------------------------------------------------------------------- /model_cards/pedropei/question-intimacy/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - en 4 | inference: false 5 | --- 6 | -------------------------------------------------------------------------------- /model_cards/pradhyra/AWSBlogBert/README.md: -------------------------------------------------------------------------------- 1 | This model is pre-trained on blog articles from AWS Blogs. 2 | 3 | ## Pre-training corpora 4 | The input text contains around 3000 blog articles on [AWS Blogs website](https://aws.amazon.com/blogs/) technical subject matter including AWS products, tools and tutorials. 5 | 6 | ## Pre-training details 7 | I picked a Roberta architecture for masked language modeling (6-layer, 768-hidden, 12-heads, 82M parameters) and its corresponding ByteLevelBPE tokenization strategy. I then followed HuggingFace's Transformers [blog post](https://huggingface.co/blog/how-to-train) to train the model. 8 | I chose to follow the following training set-up: 28k training steps with batches of 64 sequences of length 512 with an initial learning rate 5e-5. The model acheived a training loss of 3.6 on the MLM task over 10 epochs. 9 | -------------------------------------------------------------------------------- /model_cards/pranavpsv/gpt2-genre-story-generator/README.md: -------------------------------------------------------------------------------- 1 | 2 | # GPT2 Genre Based Story Generator 3 | 4 | ## Model description 5 | 6 | GPT2 fine-tuned on genre-based story generation. 7 | 8 | ## Intended uses 9 | 10 | Used to generate stories based on user inputted genre and starting prompts. 11 | 12 | ## How to use 13 | 14 | #### Supported Genres 15 | superhero, action, drama, horror, thriller, sci_fi 16 | #### Input text format 17 | \ \ Some optional text... 18 | 19 | **Example**: \ \ After discovering time travel, 20 | 21 | ```python 22 | # Example of usage 23 | from transformers import pipeline 24 | 25 | story_gen = pipeline("text-generation", "pranavpsv/gpt2-genre-story-generator") 26 | print(story_gen(" Batman")) 27 | 28 | ``` 29 | 30 | ## Training data 31 | 32 | Initialized with pre-trained weights of "gpt2" checkpoint. Fine-tuned the model on stories of various genres. 33 | -------------------------------------------------------------------------------- /model_cards/rdenadai/BR_BERTo/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: pt 3 | tags: 4 | - portuguese 5 | - brazil 6 | - pt_BR 7 | widget: 8 | - text: gostei muito dessa 9 | --- 10 | 11 | # BR_BERTo 12 | 13 | Portuguese (Brazil) model for text inference. 14 | 15 | ## Params 16 | 17 | Trained on a corpus of 6_993_330 sentences. 18 | 19 | - Vocab size: 150_000 20 | - RobertaForMaskedLM size : 512 21 | - Num train epochs: 3 22 | - Time to train: ~10days (on GCP with a Nvidia T4) 23 | 24 | I follow the great tutorial from HuggingFace team: 25 | 26 | [How to train a new language model from scratch using Transformers and Tokenizers](https://huggingface.co/blog/how-to-train) 27 | 28 | More infor here: 29 | 30 | [BR_BERTo](https://github.com/rdenadai/BR-BERTo) 31 | -------------------------------------------------------------------------------- /model_cards/roberta-large-mnli-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | license: mit 3 | widget: 4 | - text: "I like you. I love you." 5 | --- 6 | 7 | 8 | ## roberta-large-mnli 9 | 10 | Trained by Facebook, [original source](https://github.com/pytorch/fairseq/tree/master/examples/roberta) 11 | 12 | ```bibtex 13 | @article{liu2019roberta, 14 | title = {RoBERTa: A Robustly Optimized BERT Pretraining Approach}, 15 | author = {Yinhan Liu and Myle Ott and Naman Goyal and Jingfei Du and 16 | Mandar Joshi and Danqi Chen and Omer Levy and Mike Lewis and 17 | Luke Zettlemoyer and Veselin Stoyanov}, 18 | journal={arXiv preprint arXiv:1907.11692}, 19 | year = {2019}, 20 | } 21 | ``` 22 | 23 | -------------------------------------------------------------------------------- /model_cards/sarnikowski/electra-small-discriminator-da-256-cased/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: da 3 | license: cc-by-4.0 4 | --- 5 | 6 | # Danish ELECTRA small (cased) 7 | 8 | An [ELECTRA](https://arxiv.org/abs/2003.10555) model pretrained on a custom Danish corpus (~17.5gb). 9 | For details regarding data sources and training procedure, along with benchmarks on downstream tasks, go to: https://github.com/sarnikowski/danish_transformers/tree/main/electra 10 | 11 | ## Usage 12 | 13 | ```python 14 | from transformers import AutoTokenizer, AutoModel 15 | 16 | tokenizer = AutoTokenizer.from_pretrained("sarnikowski/electra-small-discriminator-da-256-cased") 17 | model = AutoModel.from_pretrained("sarnikowski/electra-small-discriminator-da-256-cased") 18 | ``` 19 | 20 | ## Questions? 21 | 22 | If you have any questions feel free to open an issue on the [danish_transformers](https://github.com/sarnikowski/danish_transformers) repository, or send an email to p.sarnikowski@gmail.com 23 | -------------------------------------------------------------------------------- /model_cards/schmidek/electra-small-cased/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: en 3 | license: apache-2.0 4 | --- 5 | 6 | ## ELECTRA-small-cased 7 | 8 | This is a cased version of `google/electra-small-discriminator`, trained on the 9 | [OpenWebText corpus](https://skylion007.github.io/OpenWebTextCorpus/). 10 | 11 | Uses the same tokenizer and vocab from `bert-base-cased` 12 | -------------------------------------------------------------------------------- /model_cards/severinsimmler/literary-german-bert/kfold.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/model_cards/severinsimmler/literary-german-bert/kfold.png -------------------------------------------------------------------------------- /model_cards/severinsimmler/literary-german-bert/prosa-jahre.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/model_cards/severinsimmler/literary-german-bert/prosa-jahre.png -------------------------------------------------------------------------------- /model_cards/shrugging-grace/tweetclassifier/README.md: -------------------------------------------------------------------------------- 1 | # shrugging-grace/tweetclassifier 2 | 3 | ## Model description 4 | This model classifies tweets as either relating to the Covid-19 pandemic or not. 5 | 6 | ## Intended uses & limitations 7 | It is intended to be used on tweets commenting on UK politics, in particular those trending with the #PMQs hashtag, as this refers to weekly Prime Ministers' Questions. 8 | 9 | #### How to use 10 | ``LABEL_0`` means that the tweet relates to Covid-19 11 | 12 | ``LABEL_1`` means that the tweet does not relate to Covid-19 13 | 14 | ## Training data 15 | The model was trained on 1000 tweets (with the "#PMQs'), which were manually labeled by the author. The tweets were collected between May-July 2020. 16 | 17 | ### BibTeX entry and citation info 18 | 19 | This was based on a pretrained version of BERT. 20 | 21 | @article{devlin2018bert, 22 | title={Bert: Pre-training of deep bidirectional transformers for language understanding}, 23 | author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina}, 24 | journal={arXiv preprint arXiv:1810.04805}, 25 | year={2018} 26 | } 27 | -------------------------------------------------------------------------------- /model_cards/spentaur/yelp/README.md: -------------------------------------------------------------------------------- 1 | # DistilBERT Yelp Review Sentiment 2 | This model is used for sentiment analysis on english yelp reviews. 3 | It is a DistilBERT model trained on 1 million reviews from the yelp open dataset. 4 | It is a regression model, with outputs in the range of ~-2 to ~2. With -2 being 1 star and 2 being 5 stars. 5 | It was trained using the [ktrain](https://github.com/amaiya/ktrain) because of it's ease of use. 6 | 7 | Example use: 8 | 9 | ``` 10 | tokenizer = AutoTokenizer.from_pretrained( 11 | 'distilbert-base-uncased', use_fast=True) 12 | model = TFAutoModelForSequenceClassification.from_pretrained( 13 | "spentaur/yelp") 14 | 15 | review = "This place is great!" 16 | input_ids = tokenizer.encode(review, return_tensors='tf') 17 | pred = model(input_ids)[0][0][0].numpy() 18 | # pred should === 1.9562385 19 | ``` 20 | -------------------------------------------------------------------------------- /model_cards/stas/tiny-wmt19-en-de/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - en 4 | - de 5 | thumbnail: 6 | tags: 7 | - wmt19 8 | - testing 9 | license: apache-2.0 10 | datasets: 11 | - wmt19 12 | metrics: 13 | - bleu 14 | --- 15 | 16 | # Tiny FSMT 17 | 18 | This is a tiny model that is used in the `transformers` test suite. It doesn't do anything useful, other than testing that `FSMT` works. 19 | -------------------------------------------------------------------------------- /model_cards/surajp/albert-base-sanskrit/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: sa 3 | --- 4 | 5 | 6 | # ALBERT-base-Sanskrit 7 | 8 | 9 | Explaination Notebook Colab: [SanskritALBERT.ipynb](https://colab.research.google.com/github/parmarsuraj99/suraj-parmar/blob/master/_notebooks/2020-05-02-SanskritALBERT.ipynb) 10 | 11 | Size of the model is **46MB** 12 | 13 | Example of usage: 14 | 15 | ``` 16 | tokenizer = AutoTokenizer.from_pretrained("surajp/albert-base-sanskrit") 17 | model = AutoModel.from_pretrained("surajp/albert-base-sanskrit") 18 | 19 | enc=tokenizer.encode("ॐ सर्वे भवन्तु सुखिनः सर्वे सन्तु निरामयाः । सर्वे भद्राणि पश्यन्तु मा कश्चिद्दुःखभाग्भवेत् । ॐ शान्तिः शान्तिः शान्तिः ॥") 20 | print(tokenizer.decode(enc)) 21 | 22 | ps = model(torch.tensor(enc).unsqueeze(1)) 23 | print(ps[0].shape) 24 | ``` 25 | ``` 26 | ''' 27 | Output: 28 | -------- 29 | [CLS] ॐ सर्वे भवन्तु सुखिनः सर्वे सन्तु निरामयाः । सर्वे भद्राणि पश्यन्तु मा कश्चिद्दुःखभाग्भवेत् । ॐ शान्तिः शान्तिः शान्तिः ॥[SEP] 30 | torch.Size([28, 1, 768]) 31 | ``` 32 | 33 | 34 | > Created by [Suraj Parmar/@parmarsuraj99](https://twitter.com/parmarsuraj99) 35 | 36 | > Made with in India 37 | -------------------------------------------------------------------------------- /model_cards/uncnlp/lxmert-base-uncased/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Hao Tan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /model_cards/uncnlp/lxmert-base-uncased/lxmert_model-1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/model_cards/uncnlp/lxmert-base-uncased/lxmert_model-1.jpg -------------------------------------------------------------------------------- /model_cards/urduhack/roberta-urdu-small/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: ur 3 | thumbnail: https://raw.githubusercontent.com/urduhack/urduhack/master/docs/_static/urduhack.png 4 | tags: 5 | - roberta-urdu-small 6 | - urdu 7 | - transformers 8 | license: mit 9 | --- 10 | ## roberta-urdu-small 11 | 12 | [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/urduhack/urduhack/blob/master/LICENSE) 13 | ### Overview 14 | **Language model:** roberta-urdu-small 15 | **Model size:** 125M 16 | **Language:** Urdu 17 | **Training data:** News data from urdu news resources in Pakistan 18 | ### About roberta-urdu-small 19 | roberta-urdu-small is a language model for urdu language. 20 | ``` 21 | from transformers import pipeline 22 | fill_mask = pipeline("fill-mask", model="urduhack/roberta-urdu-small", tokenizer="urduhack/roberta-urdu-small") 23 | ``` 24 | ## Training procedure 25 | roberta-urdu-small was trained on urdu news corpus. Training data was normalized using normalization module from 26 | urduhack to eliminate characters from other languages like arabic. 27 | 28 | ### About Urduhack 29 | Urduhack is a Natural Language Processing (NLP) library for urdu language. 30 | Github: https://github.com/urduhack/urduhack 31 | -------------------------------------------------------------------------------- /model_cards/wietsedv/bert-base-dutch-cased/README.md: -------------------------------------------------------------------------------- 1 | # BERTje: A Dutch BERT model 2 | 3 | BERTje is a Dutch pre-trained BERT model developed at the University of Groningen. 4 | 5 | ⚠️ **The new home of this model is the [GroNLP](https://huggingface.co/GroNLP) organization.** 6 | 7 | BERTje now lives at: [`GroNLP/bert-base-dutch-cased`](https://huggingface.co/GroNLP/bert-base-dutch-cased) 8 | 9 | The model weights of the versions at `wietsedv/` and `GroNLP/` are the same, so do not worry if you use(d) `wietsedv/bert-base-dutch-cased`. 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /model_cards/wptoux/albert-chinese-large-qa/README.md: -------------------------------------------------------------------------------- 1 | # albert-chinese-large-qa 2 | Albert large QA model pretrained from baidu webqa and baidu dureader datasets. 3 | 4 | ## Data source 5 | + baidu webqa 1.0 6 | + baidu dureader 7 | 8 | ## Traing Method 9 | We combined the two datasets together and created a new dataset in squad format, including 705139 samples for training and 69638 samples for validation. 10 | We finetune the model based on the albert chinese large model. 11 | 12 | ## Hyperparams 13 | + learning_rate 1e-5 14 | + max_seq_length 512 15 | + max_query_length 50 16 | + max_answer_length 300 17 | + doc_stride 256 18 | + num_train_epochs 2 19 | + warmup_steps 1000 20 | + per_gpu_train_batch_size 8 21 | + gradient_accumulation_steps 3 22 | + n_gpu 2 (Nvidia Tesla P100) 23 | 24 | ## Usage 25 | ``` 26 | from transformers import AutoModelForQuestionAnswering, BertTokenizer 27 | 28 | model = AutoModelForQuestionAnswering.from_pretrained('wptoux/albert-chinese-large-qa') 29 | tokenizer = BertTokenizer.from_pretrained('wptoux/albert-chinese-large-qa') 30 | ``` 31 | ***Important: use BertTokenizer*** 32 | 33 | ## MoreInfo 34 | Please visit https://github.com/wptoux/albert-chinese-large-webqa for details. 35 | -------------------------------------------------------------------------------- /model_cards/xlm-mlm-en-2048-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - exbert 4 | 5 | license: cc-by-nc-4.0 6 | --- 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /model_cards/xlm-roberta-base-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - exbert 4 | 5 | license: mit 6 | --- 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /model_cards/xlm-roberta-large-finetuned-conll03-german-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: de 3 | --- 4 | 5 | ## xlm-roberta-large-finetuned-conll03-german 6 | -------------------------------------------------------------------------------- /model_cards/yjernite/bart_eli5/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: en 3 | license: apache-2.0 4 | datasets: 5 | - eli5 6 | --- 7 | 8 | ## BART ELI5 9 | 10 | Read the article at https://yjernite.github.io/lfqa.html and try the demo at https://huggingface.co/qa/ 11 | -------------------------------------------------------------------------------- /model_cards/yuvraj/summarizer-cnndm/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: "en" 3 | tags: 4 | - summarization 5 | --- 6 | ​ 7 | # Summarization 8 | ​ 9 | ## Model description 10 | ​ 11 | BartForConditionalGeneration model fine tuned for summarization on 10000 samples from the cnn-dailymail dataset 12 | ​ 13 | ## How to use 14 | ​ 15 | PyTorch model available 16 | ​ 17 | ```python 18 | from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline 19 | ​ 20 | tokenizer = AutoTokenizer.from_pretrained("yuvraj/summarizer-cnndm") 21 | AutoModelWithLMHead.from_pretrained("yuvraj/summarizer-cnndm") 22 | ​ 23 | summarizer = pipeline('summarization', model=model, tokenizer=tokenizer) 24 | summarizer("") 25 | ​ 26 | ## Limitations and bias 27 | Trained on a small dataset 28 | -------------------------------------------------------------------------------- /model_cards/yuvraj/xSumm/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: "en" 3 | tags: 4 | - summarization 5 | - extreme summarization 6 | --- 7 | ​ 8 | ## Model description 9 | ​ 10 | BartForConditionalGenerationModel for extreme summarization- creates a one line abstractive summary of a given article 11 | ​ 12 | ## How to use 13 | ​ 14 | PyTorch model available 15 | ​ 16 | ```python 17 | from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline 18 | ​ 19 | tokenizer = AutoTokenizer.from_pretrained("yuvraj/xSumm") 20 | model = AutoModelWithLMHead.from_pretrained("yuvraj/xSumm") 21 | ​ 22 | xsumm = pipeline('summarization', model=model, tokenizer=tokenizer) 23 | xsumm("") 24 | ​ 25 | ## Limitations and bias 26 | Trained on a small fraction of the xsumm training dataset 27 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 119 3 | target-version = ['py35'] 4 | -------------------------------------------------------------------------------- /scripts/fsmt/tests-to-run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright 2020 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # these scripts need to be run before any changes to FSMT-related code - it should cover all bases 17 | 18 | CUDA_VISIBLE_DEVICES="" RUN_SLOW=1 pytest --disable-warnings tests/test_tokenization_fsmt.py tests/test_configuration_auto.py tests/test_modeling_fsmt.py examples/seq2seq/test_fsmt_bleu_score.py 19 | RUN_SLOW=1 pytest --disable-warnings tests/test_tokenization_fsmt.py tests/test_configuration_auto.py tests/test_modeling_fsmt.py examples/seq2seq/test_fsmt_bleu_score.py 20 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | default_section = FIRSTPARTY 3 | ensure_newline_before_comments = True 4 | force_grid_wrap = 0 5 | include_trailing_comma = True 6 | known_first_party = transformers 7 | known_third_party = 8 | absl 9 | conllu 10 | datasets 11 | elasticsearch 12 | fairseq 13 | faiss-cpu 14 | fastprogress 15 | fire 16 | fugashi 17 | git 18 | h5py 19 | matplotlib 20 | nltk 21 | numpy 22 | packaging 23 | pandas 24 | PIL 25 | psutil 26 | pytest 27 | pytorch_lightning 28 | rouge_score 29 | sacrebleu 30 | seqeval 31 | sklearn 32 | streamlit 33 | tensorboardX 34 | tensorflow 35 | tensorflow_datasets 36 | timeout_decorator 37 | torch 38 | torchtext 39 | torchvision 40 | torch_xla 41 | tqdm 42 | 43 | line_length = 119 44 | lines_after_imports = 2 45 | multi_line_output = 3 46 | use_parentheses = True 47 | 48 | [flake8] 49 | ignore = E203, E501, E741, W503, W605 50 | max-line-length = 119 51 | -------------------------------------------------------------------------------- /src/transformers/benchmark/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/src/transformers/benchmark/__init__.py -------------------------------------------------------------------------------- /src/transformers/commands/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from abc import ABC, abstractmethod 16 | from argparse import ArgumentParser 17 | 18 | 19 | class BaseTransformersCLICommand(ABC): 20 | @staticmethod 21 | @abstractmethod 22 | def register_subcommand(parser: ArgumentParser): 23 | raise NotImplementedError() 24 | 25 | @abstractmethod 26 | def run(self): 27 | raise NotImplementedError() 28 | -------------------------------------------------------------------------------- /src/transformers/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | # Copyright 2020 The HuggingFace Team. All rights reserved. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | from .glue import GlueDataset, GlueDataTrainingArguments 20 | from .language_modeling import ( 21 | LineByLineTextDataset, 22 | LineByLineWithRefDataset, 23 | LineByLineWithSOPTextDataset, 24 | TextDataset, 25 | TextDatasetForNextSentencePrediction, 26 | ) 27 | from .squad import SquadDataset, SquadDataTrainingArguments 28 | -------------------------------------------------------------------------------- /src/transformers/data/processors/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | # Copyright 2020 The HuggingFace Team. All rights reserved. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | from .glue import glue_convert_examples_to_features, glue_output_modes, glue_processors, glue_tasks_num_labels 20 | from .squad import SquadExample, SquadFeatures, SquadV1Processor, SquadV2Processor, squad_convert_examples_to_features 21 | from .utils import DataProcessor, InputExample, InputFeatures, SingleSentenceClassificationProcessor 22 | from .xnli import xnli_output_modes, xnli_processors, xnli_tasks_num_labels 23 | -------------------------------------------------------------------------------- /src/transformers/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/src/transformers/models/__init__.py -------------------------------------------------------------------------------- /src/transformers/models/barthez/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | # Copyright 2020 The HuggingFace Team. All rights reserved. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | from ...file_utils import is_sentencepiece_available, is_tokenizers_available 20 | 21 | 22 | if is_sentencepiece_available(): 23 | from .tokenization_barthez import BarthezTokenizer 24 | 25 | if is_tokenizers_available(): 26 | from .tokenization_barthez_fast import BarthezTokenizerFast 27 | -------------------------------------------------------------------------------- /src/transformers/models/bert_japanese/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | # Copyright 2020 The HuggingFace Team. All rights reserved. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | from .tokenization_bert_japanese import BertJapaneseTokenizer, CharacterTokenizer, MecabTokenizer 20 | -------------------------------------------------------------------------------- /src/transformers/models/bertweet/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | # Copyright 2020 The HuggingFace Team. All rights reserved. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | from .tokenization_bertweet import BertweetTokenizer 20 | -------------------------------------------------------------------------------- /src/transformers/models/deberta/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | # Copyright 2020 The HuggingFace Team. All rights reserved. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | from ...file_utils import is_torch_available 20 | from .configuration_deberta import DEBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, DebertaConfig 21 | from .tokenization_deberta import DebertaTokenizer 22 | 23 | 24 | if is_torch_available(): 25 | from .modeling_deberta import ( 26 | DEBERTA_PRETRAINED_MODEL_ARCHIVE_LIST, 27 | DebertaForSequenceClassification, 28 | DebertaModel, 29 | DebertaPreTrainedModel, 30 | ) 31 | -------------------------------------------------------------------------------- /src/transformers/models/dialogpt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/src/transformers/models/dialogpt/__init__.py -------------------------------------------------------------------------------- /src/transformers/models/encoder_decoder/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | # Copyright 2020 The HuggingFace Team. All rights reserved. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | from ...file_utils import is_torch_available 20 | from .configuration_encoder_decoder import EncoderDecoderConfig 21 | 22 | 23 | if is_torch_available(): 24 | from .modeling_encoder_decoder import EncoderDecoderModel 25 | -------------------------------------------------------------------------------- /src/transformers/models/fsmt/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | # Copyright 2020 The HuggingFace Team. All rights reserved. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | from ...file_utils import is_torch_available 20 | from .configuration_fsmt import FSMT_PRETRAINED_CONFIG_ARCHIVE_MAP, FSMTConfig 21 | from .tokenization_fsmt import FSMTTokenizer 22 | 23 | 24 | if is_torch_available(): 25 | from .modeling_fsmt import FSMTForConditionalGeneration, FSMTModel, PretrainedFSMTModel 26 | -------------------------------------------------------------------------------- /src/transformers/models/herbert/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | # Copyright 2020 The HuggingFace Team. All rights reserved. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | from ...file_utils import is_tokenizers_available 20 | from .tokenization_herbert import HerbertTokenizer 21 | 22 | 23 | if is_tokenizers_available(): 24 | from .tokenization_herbert_fast import HerbertTokenizerFast 25 | -------------------------------------------------------------------------------- /src/transformers/models/marian/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | # Copyright 2020 The HuggingFace Team. All rights reserved. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | from ...file_utils import is_sentencepiece_available, is_tf_available, is_torch_available 20 | from .configuration_marian import MarianConfig 21 | 22 | 23 | if is_sentencepiece_available(): 24 | from .tokenization_marian import MarianTokenizer 25 | 26 | if is_torch_available(): 27 | from .modeling_marian import MarianMTModel 28 | 29 | if is_tf_available(): 30 | from .modeling_tf_marian import TFMarianMTModel 31 | -------------------------------------------------------------------------------- /src/transformers/models/mmbt/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | # Copyright 2020 The HuggingFace Team. All rights reserved. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | from ...file_utils import is_torch_available 20 | from .configuration_mmbt import MMBTConfig 21 | 22 | 23 | if is_torch_available(): 24 | from .modeling_mmbt import MMBTForClassification, MMBTModel, ModalEmbeddings 25 | -------------------------------------------------------------------------------- /src/transformers/models/phobert/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | # Copyright 2020 The HuggingFace Team. All rights reserved. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | from .tokenization_phobert import PhobertTokenizer 20 | -------------------------------------------------------------------------------- /src/transformers/models/rag/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | # Copyright 2020 The HuggingFace Team. All rights reserved. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | from ...file_utils import is_torch_available 20 | from .configuration_rag import RagConfig 21 | from .retrieval_rag import RagRetriever 22 | from .tokenization_rag import RagTokenizer 23 | 24 | 25 | if is_torch_available(): 26 | from .modeling_rag import RagModel, RagSequenceForGeneration, RagTokenForGeneration 27 | -------------------------------------------------------------------------------- /src/transformers/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/src/transformers/utils/__init__.py -------------------------------------------------------------------------------- /src/transformers/utils/dummy_flax_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..file_utils import requires_flax 3 | 4 | 5 | FLAX_MODEL_MAPPING = None 6 | 7 | 8 | class FlaxAutoModel: 9 | def __init__(self, *args, **kwargs): 10 | requires_flax(self) 11 | 12 | @classmethod 13 | def from_pretrained(self, *args, **kwargs): 14 | requires_flax(self) 15 | 16 | 17 | class FlaxBertForMaskedLM: 18 | def __init__(self, *args, **kwargs): 19 | requires_flax(self) 20 | 21 | @classmethod 22 | def from_pretrained(self, *args, **kwargs): 23 | requires_flax(self) 24 | 25 | 26 | class FlaxBertModel: 27 | def __init__(self, *args, **kwargs): 28 | requires_flax(self) 29 | 30 | @classmethod 31 | def from_pretrained(self, *args, **kwargs): 32 | requires_flax(self) 33 | 34 | 35 | class FlaxRobertaModel: 36 | def __init__(self, *args, **kwargs): 37 | requires_flax(self) 38 | 39 | @classmethod 40 | def from_pretrained(self, *args, **kwargs): 41 | requires_flax(self) 42 | -------------------------------------------------------------------------------- /templates/adding_a_new_example_script/cookiecutter.json: -------------------------------------------------------------------------------- 1 | { 2 | "example_name": "text classification", 3 | "directory_name": "{{cookiecutter.example_name|lower|replace(' ', '-')}}", 4 | "example_shortcut": "{{cookiecutter.directory_name}}", 5 | "model_class": "AutoModel", 6 | "authors": "The HuggingFace Team", 7 | "can_train_from_scratch": ["True", "False"] 8 | } -------------------------------------------------------------------------------- /templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/configuration.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "{{cookiecutter.modelname}}", 3 | "uppercase_modelname": "{{cookiecutter.uppercase_modelname}}", 4 | "lowercase_modelname": "{{cookiecutter.lowercase_modelname}}", 5 | "camelcase_modelname": "{{cookiecutter.camelcase_modelname}}", 6 | "authors": "{{cookiecutter.authors}}", 7 | "checkpoint_identifier": "{{cookiecutter.checkpoint_identifier}}", 8 | "tokenizer_type": "{{cookiecutter.tokenizer_type}}", 9 | "generate_tensorflow_and_pytorch": "{{cookiecutter.generate_tensorflow_and_pytorch}}" 10 | } 11 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/cookiecutter.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "BrandNewBERT", 3 | "uppercase_modelname": "BRAND_NEW_BERT", 4 | "lowercase_modelname": "brand_new_bert", 5 | "camelcase_modelname": "BrandNewBert", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "brand-new-bert-base-cased", 8 | "tokenizer_type": ["Based on BERT", "Standalone"], 9 | "generate_tensorflow_and_pytorch": ["PyTorch & TensorFlow", "PyTorch", "TensorFlow"] 10 | } -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/encoder-bert-tokenizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "Template", 3 | "uppercase_modelname": "TEMPLATE", 4 | "lowercase_modelname": "template", 5 | "camelcase_modelname": "Template", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "brand-new-bert-base-cased", 8 | "tokenizer_type": "Based on BERT", 9 | "generate_tensorflow_and_pytorch": "PyTorch & TensorFlow" 10 | } 11 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/pt-encoder-bert-tokenizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "TemplatePT", 3 | "uppercase_modelname": "TEMPLATE_PT", 4 | "lowercase_modelname": "template_pt", 5 | "camelcase_modelname": "TemplatePt", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "brand-new-bert-base-cased", 8 | "tokenizer_type": "Based on BERT", 9 | "generate_tensorflow_and_pytorch": "PyTorch" 10 | } 11 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/standalone.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "TemplateBI", 3 | "uppercase_modelname": "TEMPLATE_BI", 4 | "lowercase_modelname": "template_bi", 5 | "camelcase_modelname": "TemplateBi", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "bi-brand-new-bert-base-cased", 8 | "tokenizer_type": "Standalone", 9 | "generate_tensorflow_and_pytorch": "PyTorch & TensorFlow" 10 | } 11 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/tf-encoder-bert-tokenizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "TemplateTF", 3 | "uppercase_modelname": "TEMPLATE_TF", 4 | "lowercase_modelname": "template_tf", 5 | "camelcase_modelname": "TemplateTf", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "brand-new-bert-base-cased", 8 | "tokenizer_type": "Based on BERT", 9 | "generate_tensorflow_and_pytorch": "TensorFlow" 10 | } 11 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/tests/__init__.py -------------------------------------------------------------------------------- /tests/fixtures/dummy-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "model_type": "roberta" 3 | } -------------------------------------------------------------------------------- /tests/fixtures/empty.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/tests/fixtures/empty.txt -------------------------------------------------------------------------------- /tests/fixtures/input.txt: -------------------------------------------------------------------------------- 1 | Who was Jim Henson ? ||| Jim Henson was a puppeteer 2 | -------------------------------------------------------------------------------- /tests/fixtures/spiece.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/tests/fixtures/spiece.model -------------------------------------------------------------------------------- /tests/fixtures/test_sentencepiece.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/tests/fixtures/test_sentencepiece.model -------------------------------------------------------------------------------- /tests/fixtures/test_sentencepiece_no_bos.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/tests/fixtures/test_sentencepiece_no_bos.model -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/.gitignore: -------------------------------------------------------------------------------- 1 | *.* 2 | cache* 3 | temp* 4 | !*.txt 5 | !*.tsv 6 | !*.json 7 | !.gitignore -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/GermEval/labels.txt: -------------------------------------------------------------------------------- 1 | B-LOC 2 | B-LOCderiv 3 | B-LOCpart 4 | B-ORG 5 | B-ORGderiv 6 | B-ORGpart 7 | B-OTH 8 | B-OTHderiv 9 | B-OTHpart 10 | B-PER 11 | B-PERderiv 12 | B-PERpart 13 | I-LOC 14 | I-LOCderiv 15 | I-LOCpart 16 | I-ORG 17 | I-ORGderiv 18 | I-ORGpart 19 | I-OTH 20 | I-OTHderiv 21 | I-OTHpart 22 | I-PER 23 | I-PERderiv 24 | I-PERpart 25 | O 26 | -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/STS-B/dev.tsv: -------------------------------------------------------------------------------- 1 | index genre filename year old_index source1 source2 sentence1 sentence2 score 2 | 0 main-captions MSRvid 2012test 0000 none none A man with a hard hat is dancing. A man wearing a hard hat is dancing. 5.000 3 | 1 main-captions MSRvid 2012test 0002 none none A young child is riding a horse. A child is riding a horse. 4.750 4 | 2 main-captions MSRvid 2012test 0003 none none A man is feeding a mouse to a snake. The man is feeding a mouse to the snake. 5.000 5 | 3 main-captions MSRvid 2012test 0007 none none A woman is playing the guitar. A man is playing guitar. 2.400 6 | 4 main-captions MSRvid 2012test 0008 none none A woman is playing the flute. A man is playing a flute. 2.750 7 | 5 main-captions MSRvid 2012test 0010 none none A woman is cutting an onion. A man is cutting onions. 2.615 8 | 6 main-captions MSRvid 2012test 0015 none none A man is erasing a chalk board. The man is erasing the chalk board. 5.000 9 | 7 main-captions MSRvid 2012test 0023 none none A woman is carrying a boy. A woman is carrying her baby. 2.333 10 | 8 main-captions MSRvid 2012test 0027 none none Three men are playing guitars. Three men are on stage playing guitars. 3.750 11 | -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/STS-B/train.tsv: -------------------------------------------------------------------------------- 1 | index genre filename year old_index source1 source2 sentence1 sentence2 score 2 | 0 main-captions MSRvid 2012test 0001 none none A plane is taking off. An air plane is taking off. 5.000 3 | 1 main-captions MSRvid 2012test 0004 none none A man is playing a large flute. A man is playing a flute. 3.800 4 | 2 main-captions MSRvid 2012test 0005 none none A man is spreading shreded cheese on a pizza. A man is spreading shredded cheese on an uncooked pizza. 3.800 5 | 3 main-captions MSRvid 2012test 0006 none none Three men are playing chess. Two men are playing chess. 2.600 6 | 4 main-captions MSRvid 2012test 0009 none none A man is playing the cello. A man seated is playing the cello. 4.250 7 | 5 main-captions MSRvid 2012test 0011 none none Some men are fighting. Two men are fighting. 4.250 8 | 6 main-captions MSRvid 2012test 0012 none none A man is smoking. A man is skating. 0.500 9 | 7 main-captions MSRvid 2012test 0013 none none The man is playing the piano. The man is playing the guitar. 1.600 10 | 8 main-captions MSRvid 2012test 0014 none none A man is playing on a guitar and singing. A woman is playing an acoustic guitar and singing. 2.200 11 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019-present, the HuggingFace Inc. team. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import unittest 17 | from unittest.mock import patch 18 | 19 | from transformers.testing_utils import CaptureStd 20 | 21 | 22 | class CLITest(unittest.TestCase): 23 | @patch("sys.argv", ["fakeprogrampath", "env"]) 24 | def test_cli_env(self): 25 | # test transformers-cli env 26 | import transformers.commands.transformers_cli 27 | 28 | with CaptureStd() as cs: 29 | transformers.commands.transformers_cli.main() 30 | assert "Python version" in cs.out 31 | assert "Platform" in cs.out 32 | assert "Using distributed or parallel set-up in script?" in cs.out 33 | -------------------------------------------------------------------------------- /tests/test_pipelines_feature_extraction.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import unittest 16 | 17 | from .test_pipelines_common import MonoInputPipelineCommonMixin 18 | 19 | 20 | class FeatureExtractionPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase): 21 | pipeline_task = "feature-extraction" 22 | small_models = [ 23 | "sshleifer/tiny-distilbert-base-cased" 24 | ] # Default model - Models tested without the @slow decorator 25 | large_models = [None] # Models tested with the @slow decorator 26 | mandatory_keys = {} # Keys which should be in the output 27 | -------------------------------------------------------------------------------- /tests/test_pipelines_sentiment_analysis.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import unittest 16 | 17 | from .test_pipelines_common import MonoInputPipelineCommonMixin 18 | 19 | 20 | class SentimentAnalysisPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase): 21 | pipeline_task = "sentiment-analysis" 22 | small_models = [ 23 | "sshleifer/tiny-distilbert-base-uncased-finetuned-sst-2-english" 24 | ] # Default model - Models tested without the @slow decorator 25 | large_models = [None] # Models tested with the @slow decorator 26 | mandatory_keys = {"label", "score"} # Keys which should be in the output 27 | -------------------------------------------------------------------------------- /tests/test_pipelines_text2text_generation.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import unittest 16 | 17 | from .test_pipelines_common import MonoInputPipelineCommonMixin 18 | 19 | 20 | class Text2TextGenerationPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase): 21 | pipeline_task = "text2text-generation" 22 | small_models = ["patrickvonplaten/t5-tiny-random"] # Default model - Models tested without the @slow decorator 23 | large_models = [] # Models tested with the @slow decorator 24 | invalid_inputs = [4, ""] 25 | mandatory_keys = ["generated_text"] 26 | --------------------------------------------------------------------------------