├── .circleci
    ├── config.yml
    └── deploy.sh
├── .coveragerc
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── ---new-benchmark.md
    │   ├── --new-model-addition.md
    │   ├── bug-report.md
    │   ├── feature-request.md
    │   ├── migration.md
    │   └── question-help.md
    ├── PULL_REQUEST_TEMPLATE.md
    ├── conda
    │   ├── build.sh
    │   └── meta.yaml
    ├── stale.yml
    └── workflows
    │   ├── github-torch-hub.yml
    │   ├── model-templates.yml
    │   ├── release-conda.yml
    │   ├── self-push.yml
    │   └── self-scheduled.yml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── docker
    ├── transformers-cpu
    │   └── Dockerfile
    ├── transformers-gpu
    │   └── Dockerfile
    ├── transformers-pytorch-cpu
    │   └── Dockerfile
    ├── transformers-pytorch-gpu
    │   └── Dockerfile
    ├── transformers-pytorch-tpu
    │   ├── Dockerfile
    │   ├── bert-base-cased.jsonnet
    │   ├── dataset.yaml
    │   └── docker-entrypoint.sh
    ├── transformers-tensorflow-cpu
    │   └── Dockerfile
    └── transformers-tensorflow-gpu
    │   └── Dockerfile
├── docs
    ├── Makefile
    ├── README.md
    └── source
    │   ├── _static
    │       ├── css
    │       │   ├── Calibre-Light.ttf
    │       │   ├── Calibre-Medium.otf
    │       │   ├── Calibre-Regular.otf
    │       │   ├── Calibre-Thin.otf
    │       │   ├── code-snippets.css
    │       │   └── huggingface.css
    │       └── js
    │       │   ├── custom.js
    │       │   └── huggingface_logo.svg
    │   ├── benchmarks.rst
    │   ├── bertology.rst
    │   ├── conf.py
    │   ├── contributing.md
    │   ├── converting_tensorflow_models.rst
    │   ├── custom_datasets.rst
    │   ├── examples.md
    │   ├── favicon.ico
    │   ├── glossary.rst
    │   ├── imgs
    │       ├── local_attention_mask.png
    │       ├── ppl_chunked.gif
    │       ├── ppl_full.gif
    │       ├── ppl_sliding.gif
    │       ├── transformers_logo_name.png
    │       ├── warmup_constant_schedule.png
    │       ├── warmup_cosine_hard_restarts_schedule.png
    │       ├── warmup_cosine_schedule.png
    │       ├── warmup_cosine_warm_restarts_schedule.png
    │       └── warmup_linear_schedule.png
    │   ├── index.rst
    │   ├── installation.md
    │   ├── internal
    │       ├── generation_utils.rst
    │       ├── modeling_utils.rst
    │       ├── pipelines_utils.rst
    │       ├── tokenization_utils.rst
    │       └── trainer_utils.rst
    │   ├── main_classes
    │       ├── callback.rst
    │       ├── configuration.rst
    │       ├── logging.rst
    │       ├── model.rst
    │       ├── optimizer_schedules.rst
    │       ├── output.rst
    │       ├── pipelines.rst
    │       ├── processors.rst
    │       ├── tokenizer.rst
    │       └── trainer.rst
    │   ├── migration.md
    │   ├── model_doc
    │       ├── albert.rst
    │       ├── auto.rst
    │       ├── bart.rst
    │       ├── barthez.rst
    │       ├── bert.rst
    │       ├── bertgeneration.rst
    │       ├── blenderbot.rst
    │       ├── camembert.rst
    │       ├── ctrl.rst
    │       ├── deberta.rst
    │       ├── dialogpt.rst
    │       ├── distilbert.rst
    │       ├── dpr.rst
    │       ├── electra.rst
    │       ├── encoderdecoder.rst
    │       ├── flaubert.rst
    │       ├── fsmt.rst
    │       ├── funnel.rst
    │       ├── gpt.rst
    │       ├── gpt2.rst
    │       ├── layoutlm.rst
    │       ├── longformer.rst
    │       ├── lxmert.rst
    │       ├── marian.rst
    │       ├── mbart.rst
    │       ├── mobilebert.rst
    │       ├── mpnet.rst
    │       ├── mt5.rst
    │       ├── pegasus.rst
    │       ├── prophetnet.rst
    │       ├── rag.rst
    │       ├── reformer.rst
    │       ├── retribert.rst
    │       ├── roberta.rst
    │       ├── squeezebert.rst
    │       ├── t5.rst
    │       ├── transformerxl.rst
    │       ├── xlm.rst
    │       ├── xlmprophetnet.rst
    │       ├── xlmroberta.rst
    │       └── xlnet.rst
    │   ├── model_sharing.rst
    │   ├── model_summary.rst
    │   ├── multilingual.rst
    │   ├── notebooks.md
    │   ├── perplexity.rst
    │   ├── philosophy.rst
    │   ├── preprocessing.rst
    │   ├── pretrained_models.rst
    │   ├── quicktour.rst
    │   ├── serialization.rst
    │   ├── task_summary.rst
    │   ├── testing.rst
    │   ├── tokenizer_summary.rst
    │   └── training.rst
├── examples
    ├── README.md
    ├── adversarial
    │   ├── README.md
    │   ├── run_hans.py
    │   └── utils_hans.py
    ├── benchmarking
    │   ├── README.md
    │   ├── plot_csv_file.py
    │   ├── run_benchmark.py
    │   └── run_benchmark_tf.py
    ├── bert-loses-patience
    │   ├── README.md
    │   ├── pabee
    │   │   ├── __init__.py
    │   │   ├── modeling_pabee_albert.py
    │   │   └── modeling_pabee_bert.py
    │   ├── run_glue_with_pabee.py
    │   └── test_run_glue_with_pabee.py
    ├── bertology
    │   └── run_bertology.py
    ├── conftest.py
    ├── contrib
    │   ├── README.md
    │   ├── legacy
    │   │   └── run_language_modeling.py
    │   ├── mm-imdb
    │   │   ├── README.md
    │   │   ├── run_mmimdb.py
    │   │   └── utils_mmimdb.py
    │   ├── run_camembert.py
    │   ├── run_chinese_ref.py
    │   ├── run_openai_gpt.py
    │   ├── run_swag.py
    │   └── run_transfo_xl.py
    ├── deebert
    │   ├── README.md
    │   ├── entropy_eval.sh
    │   ├── eval_deebert.sh
    │   ├── run_glue_deebert.py
    │   ├── src
    │   │   ├── __init__.py
    │   │   ├── modeling_highway_bert.py
    │   │   └── modeling_highway_roberta.py
    │   ├── test_glue_deebert.py
    │   └── train_deebert.sh
    ├── distillation
    │   ├── README.md
    │   ├── distiller.py
    │   ├── grouped_batch_sampler.py
    │   ├── lm_seqs_dataset.py
    │   ├── requirements.txt
    │   ├── run_squad_w_distillation.py
    │   ├── scripts
    │   │   ├── binarized_data.py
    │   │   ├── extract.py
    │   │   ├── extract_distilbert.py
    │   │   └── token_counts.py
    │   ├── train.py
    │   ├── training_configs
    │   │   ├── distilbert-base-cased.json
    │   │   ├── distilbert-base-multilingual-cased.json
    │   │   ├── distilbert-base-uncased.json
    │   │   ├── distilgpt2.json
    │   │   └── distilroberta-base.json
    │   └── utils.py
    ├── language-modeling
    │   ├── README.md
    │   ├── run_clm.py
    │   ├── run_mlm.py
    │   ├── run_mlm_flax.py
    │   ├── run_mlm_wwm.py
    │   └── run_plm.py
    ├── lightning_base.py
    ├── longform-qa
    │   ├── README.md
    │   ├── eli5_app.py
    │   └── eli5_utils.py
    ├── lxmert
    │   ├── README.md
    │   ├── demo.ipynb
    │   ├── extracting_data.py
    │   ├── modeling_frcnn.py
    │   ├── processing_image.py
    │   ├── requirements.txt
    │   ├── utils.py
    │   └── visualizing_image.py
    ├── movement-pruning
    │   ├── README.md
    │   ├── Saving_PruneBERT.ipynb
    │   ├── bertarize.py
    │   ├── counts_parameters.py
    │   ├── emmental
    │   │   ├── __init__.py
    │   │   ├── configuration_bert_masked.py
    │   │   ├── modeling_bert_masked.py
    │   │   └── modules
    │   │   │   ├── __init__.py
    │   │   │   ├── binarizer.py
    │   │   │   └── masked_nn.py
    │   ├── masked_run_glue.py
    │   ├── masked_run_squad.py
    │   └── requirements.txt
    ├── multiple-choice
    │   ├── README.md
    │   ├── run_multiple_choice.py
    │   ├── run_tf_multiple_choice.py
    │   └── utils_multiple_choice.py
    ├── question-answering
    │   ├── README.md
    │   ├── run_qa.py
    │   ├── run_qa_beam_search.py
    │   ├── run_squad.py
    │   ├── run_squad_trainer.py
    │   ├── run_tf_squad.py
    │   ├── squad_v2_local
    │   │   ├── evaluate.py
    │   │   └── squad_v2_local.py
    │   ├── trainer_qa.py
    │   └── utils_qa.py
    ├── rag
    │   ├── README.md
    │   ├── __init__.py
    │   ├── callbacks_rag.py
    │   ├── consolidate_rag_checkpoint.py
    │   ├── distributed_retriever.py
    │   ├── eval_rag.py
    │   ├── finetune_rag.py
    │   ├── finetune_rag.sh
    │   ├── parse_dpr_relevance_data.py
    │   ├── requirements.txt
    │   ├── test_data
    │   │   └── my_knowledge_dataset.csv
    │   ├── test_distributed_retriever.py
    │   ├── test_finetune_rag.py
    │   ├── use_own_knowledge_dataset.py
    │   └── utils_rag.py
    ├── requirements.txt
    ├── seq2seq
    │   ├── README.md
    │   ├── __init__.py
    │   ├── bertabs
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── configuration_bertabs.py
    │   │   ├── convert_bertabs_original_pytorch_checkpoint.py
    │   │   ├── modeling_bertabs.py
    │   │   ├── requirements.txt
    │   │   ├── run_summarization.py
    │   │   ├── test_utils_summarization.py
    │   │   └── utils_summarization.py
    │   ├── builtin_trainer
    │   │   ├── finetune.sh
    │   │   ├── finetune_tpu.sh
    │   │   ├── train_distil_marian_enro.sh
    │   │   ├── train_distil_marian_enro_tpu.sh
    │   │   ├── train_distilbart_cnn.sh
    │   │   └── train_mbart_cc25_enro.sh
    │   ├── callbacks.py
    │   ├── convert_model_to_fp16.py
    │   ├── convert_pl_checkpoint_to_hf.py
    │   ├── distil_marian_enro_teacher.sh
    │   ├── distil_marian_no_teacher.sh
    │   ├── distillation.py
    │   ├── download_wmt.py
    │   ├── dynamic_bs_example.sh
    │   ├── finetune.py
    │   ├── finetune.sh
    │   ├── finetune_bart_tiny.sh
    │   ├── finetune_pegasus_xsum.sh
    │   ├── finetune_t5.sh
    │   ├── finetune_trainer.py
    │   ├── make_student.py
    │   ├── minify_dataset.py
    │   ├── pack_dataset.py
    │   ├── precomputed_pseudo_labels.md
    │   ├── romanian_postprocessing.md
    │   ├── rouge_cli.py
    │   ├── run_distributed_eval.py
    │   ├── run_eval.py
    │   ├── run_eval_search.py
    │   ├── save_len_file.py
    │   ├── save_randomly_initialized_model.py
    │   ├── sentence_splitter.py
    │   ├── seq2seq_trainer.py
    │   ├── seq2seq_training_args.py
    │   ├── test_bash_script.py
    │   ├── test_calculate_rouge.py
    │   ├── test_data
    │   │   ├── fsmt
    │   │   │   ├── build-eval-data.py
    │   │   │   └── fsmt_val_data.json
    │   │   └── wmt_en_ro
    │   │   │   ├── test.source
    │   │   │   ├── test.target
    │   │   │   ├── train.len
    │   │   │   ├── train.source
    │   │   │   ├── train.target
    │   │   │   ├── val.len
    │   │   │   ├── val.source
    │   │   │   └── val.target
    │   ├── test_datasets.py
    │   ├── test_finetune_trainer.py
    │   ├── test_fsmt_bleu_score.py
    │   ├── test_make_student.py
    │   ├── test_seq2seq_examples.py
    │   ├── test_seq2seq_examples_multi_gpu.py
    │   ├── test_tatoeba_conversion.py
    │   ├── train_distilbart_cnn.sh
    │   ├── train_distilbart_xsum.sh
    │   ├── train_mbart_cc25_enro.sh
    │   ├── utils.py
    │   └── xla_spawn.py
    ├── test_examples.py
    ├── test_xla_examples.py
    ├── text-classification
    │   ├── README.md
    │   ├── run_glue.py
    │   ├── run_pl.sh
    │   ├── run_pl_glue.py
    │   ├── run_tf_glue.py
    │   ├── run_tf_text_classification.py
    │   └── run_xnli.py
    ├── text-generation
    │   ├── README.md
    │   ├── pplm
    │   │   ├── README.md
    │   │   ├── imgs
    │   │   │   ├── headfigure.png
    │   │   │   └── wooly.png
    │   │   ├── pplm_classification_head.py
    │   │   ├── run_pplm.py
    │   │   └── run_pplm_discrim_train.py
    │   └── run_generation.py
    ├── token-classification
    │   ├── README.md
    │   ├── run.sh
    │   ├── run_chunk.sh
    │   ├── run_ner.py
    │   ├── run_ner_old.py
    │   ├── run_old.sh
    │   ├── run_pl.sh
    │   ├── run_pl_ner.py
    │   ├── run_pos.sh
    │   ├── run_pos_pl.sh
    │   ├── run_tf_ner.py
    │   ├── scripts
    │   │   └── preprocess.py
    │   ├── tasks.py
    │   ├── test_ner_examples.py
    │   └── utils_ner.py
    └── xla_spawn.py
├── hubconf.py
├── model_cards
    ├── Cinnamon
    │   └── electra-small-japanese-discriminator
    │   │   └── README.md
    ├── DJSammy
    │   └── bert-base-danish-uncased_BotXO,ai
    │   │   └── README.md
    ├── DeepPavlov
    │   ├── bert-base-bg-cs-pl-ru-cased
    │   │   └── README.md
    │   ├── bert-base-cased-conversational
    │   │   └── README.md
    │   ├── bert-base-multilingual-cased-sentence
    │   │   └── README.md
    │   ├── rubert-base-cased-conversational
    │   │   └── README.md
    │   ├── rubert-base-cased-sentence
    │   │   └── README.md
    │   └── rubert-base-cased
    │   │   └── README.md
    ├── Geotrend
    │   ├── bert-base-15lang-cased
    │   │   └── README.md
    │   ├── bert-base-ar-cased
    │   │   └── README.md
    │   ├── bert-base-bg-cased
    │   │   └── README.md
    │   ├── bert-base-de-cased
    │   │   └── README.md
    │   ├── bert-base-el-cased
    │   │   └── README.md
    │   ├── bert-base-en-ar-cased
    │   │   └── README.md
    │   ├── bert-base-en-bg-cased
    │   │   └── README.md
    │   ├── bert-base-en-cased
    │   │   └── README.md
    │   ├── bert-base-en-de-cased
    │   │   └── README.md
    │   ├── bert-base-en-el-cased
    │   │   └── README.md
    │   ├── bert-base-en-es-cased
    │   │   └── README.md
    │   ├── bert-base-en-fr-cased
    │   │   └── README.md
    │   ├── bert-base-en-hi-cased
    │   │   └── README.md
    │   ├── bert-base-en-ru-cased
    │   │   └── README.md
    │   ├── bert-base-en-sw-cased
    │   │   └── README.md
    │   ├── bert-base-en-th-cased
    │   │   └── README.md
    │   ├── bert-base-en-tr-cased
    │   │   └── README.md
    │   ├── bert-base-en-ur-cased
    │   │   └── README.md
    │   ├── bert-base-en-vi-cased
    │   │   └── README.md
    │   ├── bert-base-en-zh-cased
    │   │   └── README.md
    │   ├── bert-base-es-cased
    │   │   └── README.md
    │   ├── bert-base-fr-cased
    │   │   └── README.md
    │   ├── bert-base-hi-cased
    │   │   └── README.md
    │   ├── bert-base-ru-cased
    │   │   └── README.md
    │   ├── bert-base-sw-cased
    │   │   └── README.md
    │   ├── bert-base-th-cased
    │   │   └── README.md
    │   ├── bert-base-tr-cased
    │   │   └── README.md
    │   ├── bert-base-ur-cased
    │   │   └── README.md
    │   ├── bert-base-vi-cased
    │   │   └── README.md
    │   └── bert-base-zh-cased
    │   │   └── README.md
    ├── Hate-speech-CNERG
    │   ├── dehatebert-mono-arabic
    │   │   └── README.md
    │   ├── dehatebert-mono-english
    │   │   └── README.md
    │   ├── dehatebert-mono-french
    │   │   └── README.md
    │   ├── dehatebert-mono-german
    │   │   └── README.md
    │   ├── dehatebert-mono-indonesian
    │   │   └── README.md
    │   ├── dehatebert-mono-italian
    │   │   └── README.md
    │   ├── dehatebert-mono-polish
    │   │   └── README.md
    │   ├── dehatebert-mono-portugese
    │   │   └── README.md
    │   └── dehatebert-mono-spanish
    │   │   └── README.md
    ├── HooshvareLab
    │   ├── bert-base-parsbert-armanner-uncased
    │   │   └── README.md
    │   ├── bert-base-parsbert-ner-uncased
    │   │   └── README.md
    │   ├── bert-base-parsbert-peymaner-uncased
    │   │   └── README.md
    │   ├── bert-base-parsbert-uncased
    │   │   └── README.md
    │   └── bert-fa-base-uncased
    │   │   └── README.md
    ├── KB
    │   ├── albert-base-swedish-cased-alpha
    │   │   └── README.md
    │   ├── bert-base-swedish-cased-ner
    │   │   └── README.md
    │   └── bert-base-swedish-cased
    │   │   └── README.md
    ├── LorenzoDeMattei
    │   └── GePpeTto
    │   │   └── README.md
    ├── Michau
    │   └── t5-base-en-generate-headline
    │   │   └── README.md
    ├── MoseliMotsoehli
    │   ├── TswanaBert
    │   │   └── README.md
    │   └── zuBERTa
    │   │   └── README.md
    ├── Musixmatch
    │   ├── umberto-commoncrawl-cased-v1
    │   │   └── README.md
    │   └── umberto-wikipedia-uncased-v1
    │   │   └── README.md
    ├── NLP4H
    │   └── ms_bert
    │   │   └── README.md
    ├── Naveen-k
    │   └── KanBERTo
    │   │   └── README.md
    ├── NeuML
    │   ├── bert-small-cord19-squad2
    │   │   └── README.md
    │   ├── bert-small-cord19
    │   │   └── README.md
    │   └── bert-small-cord19qa
    │   │   └── README.md
    ├── NlpHUST
    │   └── vibert4news-base-cased
    │   │   └── README.md
    ├── Norod78
    │   └── hewiki-articles-distilGPT2py-il
    │   │   └── README.md
    ├── Ogayo
    │   └── Hel-ach-en
    │   │   └── README.md
    ├── Primer
    │   └── bart-squad2
    │   │   └── README.md
    ├── Rostlab
    │   ├── prot_bert
    │   │   └── README.md
    │   ├── prot_bert_bfd
    │   │   └── README.md
    │   └── prot_t5_xl_bfd
    │   │   └── README.md
    ├── SZTAKI-HLT
    │   └── hubert-base-cc
    │   │   └── README.md
    ├── SparkBeyond
    │   └── roberta-large-sts-b
    │   │   └── README.md
    ├── T-Systems-onsite
    │   ├── bert-german-dbmdz-uncased-sentence-stsb
    │   │   └── README.md
    │   ├── cross-en-de-roberta-sentence-transformer
    │   │   └── README.md
    │   └── german-roberta-sentence-transformer-v2
    │   │   └── README.md
    ├── Tereveni-AI
    │   └── gpt2-124M-uk-fiction
    │   │   └── README.md
    ├── TurkuNLP
    │   ├── bert-base-finnish-cased-v1
    │   │   └── README.md
    │   └── bert-base-finnish-uncased-v1
    │   │   └── README.md
    ├── TypicaAI
    │   └── magbert-ner
    │   │   └── README.md
    ├── Vamsi
    │   └── T5_Paraphrase_Paws
    │   │   └── README.md
    ├── VictorSanh
    │   └── roberta-base-finetuned-yelp-polarity
    │   │   └── README.md
    ├── ViktorAlm
    │   └── electra-base-norwegian-uncased-discriminator
    │   │   └── README.md
    ├── a-ware
    │   ├── bart-squadv2
    │   │   └── README.md
    │   ├── roberta-large-squad-classification
    │   │   └── README.md
    │   └── xlmroberta-squadv2
    │   │   └── README.md
    ├── abhilash1910
    │   ├── financial_roberta
    │   │   └── README.md
    │   └── french-roberta
    │   │   └── README.md
    ├── activebus
    │   ├── BERT-DK_laptop
    │   │   └── README.md
    │   ├── BERT-DK_rest
    │   │   └── README.md
    │   ├── BERT-PT_laptop
    │   │   └── README.md
    │   ├── BERT-PT_rest
    │   │   └── README.md
    │   ├── BERT-XD_Review
    │   │   └── README.md
    │   └── BERT_Review
    │   │   └── README.md
    ├── adalbertojunior
    │   └── PTT5-SMALL-SUM
    │   │   └── README.md
    ├── ahotrod
    │   ├── albert_xxlargev1_squad2_512
    │   │   └── README.md
    │   ├── electra_large_discriminator_squad2_512
    │   │   └── README.md
    │   ├── roberta_large_squad2
    │   │   └── README.md
    │   └── xlnet_large_squad2_512
    │   │   └── README.md
    ├── ai4bharat
    │   └── indic-bert
    │   │   └── README.md
    ├── akhooli
    │   ├── gpt2-small-arabic-poetry
    │   │   └── README.md
    │   ├── gpt2-small-arabic
    │   │   └── README.md
    │   ├── mbart-large-cc25-ar-en
    │   │   └── README.md
    │   ├── mbart-large-cc25-en-ar
    │   │   └── README.md
    │   ├── personachat-arabic
    │   │   └── README.md
    │   ├── xlm-r-large-arabic-sent
    │   │   └── README.md
    │   └── xlm-r-large-arabic-toxic
    │   │   └── README.md
    ├── albert-base-v1-README.md
    ├── albert-xxlarge-v2-README.md
    ├── aliosm
    │   ├── ComVE-distilgpt2
    │   │   └── README.md
    │   ├── ComVE-gpt2-large
    │   │   └── README.md
    │   ├── ComVE-gpt2-medium
    │   │   └── README.md
    │   ├── ComVE-gpt2
    │   │   └── README.md
    │   ├── ai-soco-cpp-roberta-small-clas
    │   │   └── README.md
    │   ├── ai-soco-cpp-roberta-small
    │   │   └── README.md
    │   ├── ai-soco-cpp-roberta-tiny-96-clas
    │   │   └── README.md
    │   ├── ai-soco-cpp-roberta-tiny-96
    │   │   └── README.md
    │   ├── ai-soco-cpp-roberta-tiny-clas
    │   │   └── README.md
    │   └── ai-soco-cpp-roberta-tiny
    │   │   └── README.md
    ├── allegro
    │   ├── herbert-base-cased
    │   │   └── README.md
    │   ├── herbert-klej-cased-tokenizer-v1
    │   │   └── README.md
    │   ├── herbert-klej-cased-v1
    │   │   └── README.md
    │   └── herbert-large-cased
    │   │   └── README.md
    ├── allenai
    │   ├── biomed_roberta_base
    │   │   └── README.md
    │   ├── longformer-base-4096-extra.pos.embd.only
    │   │   └── README.md
    │   ├── longformer-base-4096
    │   │   └── README.md
    │   ├── scibert_scivocab_cased
    │   │   └── README.md
    │   ├── scibert_scivocab_uncased
    │   │   └── README.md
    │   ├── wmt16-en-de-12-1
    │   │   └── README.md
    │   ├── wmt16-en-de-dist-12-1
    │   │   └── README.md
    │   ├── wmt16-en-de-dist-6-1
    │   │   └── README.md
    │   ├── wmt19-de-en-6-6-base
    │   │   └── README.md
    │   └── wmt19-de-en-6-6-big
    │   │   └── README.md
    ├── allenyummy
    │   └── chinese-bert-wwm-ehr-ner-sl
    │   │   └── README.md
    ├── amberoad
    │   └── bert-multilingual-passage-reranking-msmarco
    │   │   └── README.md
    ├── amine
    │   └── bert-base-5lang-cased
    │   │   └── README.md
    ├── antoiloui
    │   └── belgpt2
    │   │   └── README.md
    ├── aodiniz
    │   ├── bert_uncased_L-10_H-512_A-8_cord19-200616
    │   │   └── README.md
    │   ├── bert_uncased_L-10_H-512_A-8_cord19-200616_squad2
    │   │   └── README.md
    │   ├── bert_uncased_L-2_H-512_A-8_cord19-200616
    │   │   └── README.md
    │   └── bert_uncased_L-4_H-256_A-4_cord19-200616
    │   │   └── README.md
    ├── asafaya
    │   ├── bert-base-arabic
    │   │   └── README.md
    │   ├── bert-large-arabic
    │   │   └── README.md
    │   ├── bert-medium-arabic
    │   │   └── README.md
    │   └── bert-mini-arabic
    │   │   └── README.md
    ├── ashwani-tanwar
    │   └── Gujarati-XLM-R-Base
    │   │   └── README.md
    ├── aubmindlab
    │   ├── bert-base-arabert
    │   │   └── README.md
    │   └── bert-base-arabertv01
    │   │   └── README.md
    ├── bart-large-cnn
    │   └── README.md
    ├── bart-large-xsum
    │   └── README.md
    ├── bashar-talafha
    │   └── multi-dialect-bert-base-arabic
    │   │   └── README.md
    ├── bayartsogt
    │   ├── albert-mongolian
    │   │   └── README.md
    │   ├── bert-base-mongolian-cased
    │   │   └── README.md
    │   └── bert-base-mongolian-uncased
    │   │   └── README.md
    ├── bert-base-cased-README.md
    ├── bert-base-chinese-README.md
    ├── bert-base-german-cased-README.md
    ├── bert-base-german-dbmdz-cased-README.md
    ├── bert-base-german-dbmdz-uncased-README.md
    ├── bert-base-multilingual-cased-README.md
    ├── bert-base-multilingual-uncased-README.md
    ├── bert-base-uncased-README.md
    ├── bert-large-cased-README.md
    ├── binwang
    │   └── xlnet-base-cased
    │   │   └── README.md
    ├── bionlp
    │   ├── bluebert_pubmed_mimic_uncased_L-12_H-768_A-12
    │   │   └── README.md
    │   ├── bluebert_pubmed_mimic_uncased_L-24_H-1024_A-16
    │   │   └── README.md
    │   ├── bluebert_pubmed_uncased_L-12_H-768_A-12
    │   │   └── README.md
    │   └── bluebert_pubmed_uncased_L-24_H-1024_A-16
    │   │   └── README.md
    ├── blinoff
    │   └── roberta-base-russian-v0
    │   │   └── README.md
    ├── cahya
    │   ├── bert-base-indonesian-522M
    │   │   └── README.md
    │   ├── gpt2-small-indonesian-522M
    │   │   └── README.md
    │   └── roberta-base-indonesian-522M
    │   │   └── README.md
    ├── cambridgeltl
    │   └── BioRedditBERT-uncased
    │   │   └── README.md
    ├── camembert-base-README.md
    ├── camembert
    │   ├── camembert-base-ccnet-4gb
    │   │   └── README.md
    │   ├── camembert-base-ccnet
    │   │   └── README.md
    │   ├── camembert-base-oscar-4gb
    │   │   └── README.md
    │   ├── camembert-base-wikipedia-4gb
    │   │   └── README.md
    │   └── camembert-large
    │   │   └── README.md
    ├── canwenxu
    │   └── BERT-of-Theseus-MNLI
    │   │   └── README.md
    ├── cedpsam
    │   └── chatbot_fr
    │   │   └── README.md
    ├── ceostroff
    │   └── harry-potter-gpt2-fanfiction
    │   │   └── README.md
    ├── chrisliu298
    │   └── arxiv_ai_gpt2
    │   │   └── README.md
    ├── cimm-kzn
    │   ├── endr-bert
    │   │   └── README.md
    │   ├── enrudr-bert
    │   │   └── README.md
    │   └── rudr-bert
    │   │   └── README.md
    ├── clue
    │   ├── albert_chinese_small
    │   │   └── README.md
    │   ├── albert_chinese_tiny
    │   │   └── README.md
    │   ├── roberta_chinese_3L312_clue_tiny
    │   │   └── README.md
    │   ├── roberta_chinese_base
    │   │   └── README.md
    │   ├── roberta_chinese_large
    │   │   └── README.md
    │   └── xlnet_chinese_large
    │   │   └── README.md
    ├── codegram
    │   ├── calbert-base-uncased
    │   │   └── README.md
    │   └── calbert-tiny-uncased
    │   │   └── README.md
    ├── cooelf
    │   └── limitbert
    │   │   └── README.md
    ├── csarron
    │   ├── bert-base-uncased-squad-v1
    │   │   └── README.md
    │   ├── mobilebert-uncased-squad-v1
    │   │   └── README.md
    │   ├── mobilebert-uncased-squad-v2
    │   │   └── README.md
    │   └── roberta-base-squad-v1
    │   │   └── README.md
    ├── daigo
    │   └── bert-base-japanese-sentiment
    │   │   └── README.md
    ├── dbmdz
    │   ├── bert-base-german-cased
    │   │   └── README.md
    │   ├── bert-base-german-europeana-cased
    │   │   └── README.md
    │   ├── bert-base-german-europeana-uncased
    │   │   └── README.md
    │   ├── bert-base-german-uncased
    │   │   └── README.md
    │   ├── bert-base-italian-cased
    │   │   └── README.md
    │   ├── bert-base-italian-uncased
    │   │   └── README.md
    │   ├── bert-base-italian-xxl-cased
    │   │   └── README.md
    │   ├── bert-base-italian-xxl-uncased
    │   │   └── README.md
    │   ├── bert-base-turkish-128k-cased
    │   │   └── README.md
    │   ├── bert-base-turkish-128k-uncased
    │   │   └── README.md
    │   ├── bert-base-turkish-cased
    │   │   └── README.md
    │   ├── bert-base-turkish-uncased
    │   │   └── README.md
    │   ├── distilbert-base-turkish-cased
    │   │   └── README.md
    │   ├── electra-base-italian-xxl-cased-discriminator
    │   │   └── README.md
    │   ├── electra-base-italian-xxl-cased-generator
    │   │   └── README.md
    │   ├── electra-base-turkish-cased-discriminator
    │   │   └── README.md
    │   └── electra-small-turkish-cased-discriminator
    │   │   └── README.md
    ├── dccuchile
    │   ├── bert-base-spanish-wwm-cased
    │   │   └── README.md
    │   └── bert-base-spanish-wwm-uncased
    │   │   └── README.md
    ├── deepset
    │   ├── bert-base-german-cased-oldvocab
    │   │   └── README.md
    │   ├── electra-base-squad2
    │   │   └── README.md
    │   ├── gbert-base
    │   │   └── README.md
    │   ├── gbert-large
    │   │   └── README.md
    │   ├── gelectra-base-generator
    │   │   └── README.md
    │   ├── gelectra-base
    │   │   └── README.md
    │   ├── gelectra-large-generator
    │   │   └── README.md
    │   ├── gelectra-large
    │   │   └── README.md
    │   ├── minilm-uncased-squad2
    │   │   └── README.md
    │   ├── quora_dedup_bert_base
    │   │   └── README.md
    │   ├── roberta-base-squad2-covid
    │   │   └── README.md
    │   ├── roberta-base-squad2-v2
    │   │   └── README.md
    │   ├── roberta-base-squad2
    │   │   └── README.md
    │   ├── sentence_bert
    │   │   └── README.md
    │   └── xlm-roberta-large-squad2
    │   │   └── README.md
    ├── digitalepidemiologylab
    │   └── covid-twitter-bert
    │   │   └── README.md
    ├── distilbert-base-cased-README.md
    ├── distilbert-base-cased-distilled-squad-README.md
    ├── distilbert-base-german-cased-README.md
    ├── distilbert-base-multilingual-cased-README.md
    ├── distilbert-base-uncased-README.md
    ├── distilbert-base-uncased-distilled-squad-README.md
    ├── distilbert-base-uncased-finetuned-sst-2-english-README.md
    ├── distilgpt2-README.md
    ├── distilroberta-base-README.md
    ├── djstrong
    │   └── bg_cs_pl_ru_cased_L-12_H-768_A-12
    │   │   └── README.md
    ├── dkleczek
    │   ├── bert-base-polish-cased-v1
    │   │   └── README.md
    │   └── bert-base-polish-uncased-v1
    │   │   └── README.md
    ├── dslim
    │   └── bert-base-NER
    │   │   └── README.md
    ├── dumitrescustefan
    │   ├── bert-base-romanian-cased-v1
    │   │   └── README.md
    │   └── bert-base-romanian-uncased-v1
    │   │   └── README.md
    ├── e-tony
    │   └── gpt2-rnm
    │   │   └── README.md
    ├── elgeish
    │   ├── cs224n-squad2.0-albert-base-v2
    │   │   └── README.md
    │   ├── cs224n-squad2.0-albert-large-v2
    │   │   └── README.md
    │   ├── cs224n-squad2.0-albert-xxlarge-v1
    │   │   └── README.md
    │   ├── cs224n-squad2.0-distilbert-base-uncased
    │   │   └── README.md
    │   └── cs224n-squad2.0-roberta-base
    │   │   └── README.md
    ├── emilyalsentzer
    │   ├── Bio_ClinicalBERT
    │   │   └── README.md
    │   └── Bio_Discharge_Summary_BERT
    │   │   └── README.md
    ├── etalab-ia
    │   └── camembert-base-squadFR-fquad-piaf
    │   │   └── README.md
    ├── ethanyt
    │   ├── guwenbert-base
    │   │   └── README.md
    │   └── guwenbert-large
    │   │   └── README.md
    ├── facebook
    │   ├── bart-large-cnn
    │   │   └── README.md
    │   ├── bart-large-mnli
    │   │   └── README.md
    │   ├── bart-large
    │   │   └── README.md
    │   ├── rag-sequence-base
    │   │   └── README.md
    │   ├── rag-sequence-nq
    │   │   └── README.md
    │   ├── rag-token-base
    │   │   └── README.md
    │   ├── rag-token-nq
    │   │   └── README.md
    │   ├── rag-token-nq_new
    │   │   └── README.md
    │   ├── wmt19-de-en
    │   │   └── README.md
    │   ├── wmt19-en-de
    │   │   └── README.md
    │   ├── wmt19-en-ru
    │   │   └── README.md
    │   └── wmt19-ru-en
    │   │   └── README.md
    ├── flexudy
    │   └── t5-base-multi-sentence-doctor
    │   │   ├── README.md
    │   │   └── sent-banner.png
    ├── fmikaelian
    │   ├── camembert-base-fquad
    │   │   └── README.md
    │   ├── camembert-base-squad
    │   │   └── README.md
    │   └── flaubert-base-uncased-squad
    │   │   └── README.md
    ├── fran-martinez
    │   └── scibert_scivocab_cased_ner_jnlpba
    │   │   └── README.md
    ├── funnel-transformer
    │   ├── intermediate-base
    │   │   └── README.md
    │   ├── intermediate
    │   │   └── README.md
    │   ├── large-base
    │   │   └── README.md
    │   ├── large
    │   │   └── README.md
    │   ├── medium-base
    │   │   └── README.md
    │   ├── medium
    │   │   └── README.md
    │   ├── small-base
    │   │   └── README.md
    │   ├── small
    │   │   └── README.md
    │   ├── xlarge-base
    │   │   └── README.md
    │   └── xlarge
    │   │   └── README.md
    ├── gaochangkuan
    │   └── model_dir
    │   │   └── README.md
    ├── german-nlp-group
    │   └── electra-base-german-uncased
    │   │   └── README.md
    ├── giganticode
    │   └── StackOBERTflow-comments-small-v1
    │   │   └── README.md
    ├── gilf
    │   ├── french-camembert-postag-model
    │   │   └── README.md
    │   └── french-postag-model
    │   │   └── README.md
    ├── google
    │   ├── bert2bert_L-24_wmt_de_en
    │   │   └── README.md
    │   ├── bert2bert_L-24_wmt_en_de
    │   │   └── README.md
    │   ├── bert_uncased_L-10_H-128_A-2
    │   │   └── README.md
    │   ├── bert_uncased_L-10_H-256_A-4
    │   │   └── README.md
    │   ├── bert_uncased_L-10_H-512_A-8
    │   │   └── README.md
    │   ├── bert_uncased_L-10_H-768_A-12
    │   │   └── README.md
    │   ├── bert_uncased_L-12_H-128_A-2
    │   │   └── README.md
    │   ├── bert_uncased_L-12_H-256_A-4
    │   │   └── README.md
    │   ├── bert_uncased_L-12_H-512_A-8
    │   │   └── README.md
    │   ├── bert_uncased_L-12_H-768_A-12
    │   │   └── README.md
    │   ├── bert_uncased_L-2_H-128_A-2
    │   │   └── README.md
    │   ├── bert_uncased_L-2_H-256_A-4
    │   │   └── README.md
    │   ├── bert_uncased_L-2_H-512_A-8
    │   │   └── README.md
    │   ├── bert_uncased_L-2_H-768_A-12
    │   │   └── README.md
    │   ├── bert_uncased_L-4_H-128_A-2
    │   │   └── README.md
    │   ├── bert_uncased_L-4_H-256_A-4
    │   │   └── README.md
    │   ├── bert_uncased_L-4_H-512_A-8
    │   │   └── README.md
    │   ├── bert_uncased_L-4_H-768_A-12
    │   │   └── README.md
    │   ├── bert_uncased_L-6_H-128_A-2
    │   │   └── README.md
    │   ├── bert_uncased_L-6_H-256_A-4
    │   │   └── README.md
    │   ├── bert_uncased_L-6_H-512_A-8
    │   │   └── README.md
    │   ├── bert_uncased_L-6_H-768_A-12
    │   │   └── README.md
    │   ├── bert_uncased_L-8_H-128_A-2
    │   │   └── README.md
    │   ├── bert_uncased_L-8_H-256_A-4
    │   │   └── README.md
    │   ├── bert_uncased_L-8_H-512_A-8
    │   │   └── README.md
    │   ├── bert_uncased_L-8_H-768_A-12
    │   │   └── README.md
    │   ├── electra-base-discriminator
    │   │   └── README.md
    │   ├── electra-base-generator
    │   │   └── README.md
    │   ├── electra-large-discriminator
    │   │   └── README.md
    │   ├── electra-large-generator
    │   │   └── README.md
    │   ├── electra-small-discriminator
    │   │   └── README.md
    │   ├── electra-small-generator
    │   │   └── README.md
    │   ├── mobilebert-uncased
    │   │   └── README.md
    │   ├── reformer-crime-and-punishment
    │   │   └── README.md
    │   ├── reformer-enwik8
    │   │   └── README.md
    │   ├── roberta2roberta_L-24_bbc
    │   │   └── README.md
    │   ├── roberta2roberta_L-24_cnn_daily_mail
    │   │   └── README.md
    │   ├── roberta2roberta_L-24_discofuse
    │   │   └── README.md
    │   ├── roberta2roberta_L-24_gigaword
    │   │   └── README.md
    │   └── roberta2roberta_L-24_wikisplit
    │   │   └── README.md
    ├── gpt2-README.md
    ├── gpt2-large-README.md
    ├── gpt2-medium-README.md
    ├── gpt2-xl-README.md
    ├── gsarti
    │   ├── biobert-nli
    │   │   └── README.md
    │   ├── covidbert-nli
    │   │   └── README.md
    │   └── scibert-nli
    │   │   └── README.md
    ├── gurkan08
    │   └── bert-turkish-text-classification
    │   │   └── README.md
    ├── hatmimoha
    │   └── arabic-ner
    │   │   └── README.md
    ├── healx
    │   ├── gpt-2-pubmed-large
    │   │   └── README.md
    │   └── gpt-2-pubmed-medium
    │   │   └── README.md
    ├── henryk
    │   ├── bert-base-multilingual-cased-finetuned-dutch-squad2
    │   │   └── README.md
    │   ├── bert-base-multilingual-cased-finetuned-polish-squad1
    │   │   └── README.md
    │   └── bert-base-multilingual-cased-finetuned-polish-squad2
    │   │   └── README.md
    ├── huawei-noah
    │   ├── DynaBERT_MNLI
    │   │   └── README.md
    │   ├── DynaBERT_SST-2
    │   │   └── README.md
    │   └── TinyBERT_General_4L_312D
    │   │   └── README.md
    ├── huggingface
    │   ├── CodeBERTa-language-id
    │   │   └── README.md
    │   └── CodeBERTa-small-v1
    │   │   └── README.md
    ├── huseinzol05
    │   ├── albert-base-bahasa-cased
    │   │   └── README.md
    │   ├── albert-tiny-bahasa-cased
    │   │   └── README.md
    │   ├── bert-base-bahasa-cased
    │   │   └── README.md
    │   ├── electra-base-discriminator-bahasa-cased
    │   │   └── README.md
    │   ├── electra-base-generator-bahasa-cased
    │   │   └── README.md
    │   ├── electra-small-discriminator-bahasa-cased
    │   │   └── README.md
    │   ├── electra-small-generator-bahasa-cased
    │   │   └── README.md
    │   ├── gpt2-117M-bahasa-cased
    │   │   └── README.md
    │   ├── gpt2-345M-bahasa-cased
    │   │   └── README.md
    │   ├── t5-base-bahasa-cased
    │   │   └── README.md
    │   ├── t5-base-bahasa-summarization-cased
    │   │   └── README.md
    │   ├── t5-small-bahasa-cased
    │   │   └── README.md
    │   ├── t5-small-bahasa-summarization-cased
    │   │   └── README.md
    │   ├── tiny-bert-bahasa-cased
    │   │   └── README.md
    │   └── xlnet-base-bahasa-cased
    │   │   └── README.md
    ├── iarfmoose
    │   ├── bert-base-cased-qa-evaluator
    │   │   └── README.md
    │   ├── roberta-base-bulgarian-pos
    │   │   └── README.md
    │   ├── roberta-base-bulgarian
    │   │   └── README.md
    │   ├── roberta-small-bulgarian-pos
    │   │   └── README.md
    │   ├── roberta-small-bulgarian
    │   │   └── README.md
    │   └── t5-base-question-generator
    │   │   └── README.md
    ├── illuin
    │   ├── camembert-base-fquad
    │   │   └── README.md
    │   ├── camembert-large-fquad
    │   │   └── README.md
    │   └── lepetit
    │   │   └── README.md
    ├── indobenchmark
    │   ├── indobert-base-p1
    │   │   └── README.md
    │   ├── indobert-base-p2
    │   │   └── README.md
    │   ├── indobert-large-p1
    │   │   └── README.md
    │   ├── indobert-large-p2
    │   │   └── README.md
    │   ├── indobert-lite-base-p1
    │   │   └── README.md
    │   ├── indobert-lite-base-p2
    │   │   └── README.md
    │   ├── indobert-lite-large-p1
    │   │   └── README.md
    │   └── indobert-lite-large-p2
    │   │   └── README.md
    ├── indolem
    │   └── indobert-base-uncased
    │   │   └── README.md
    ├── ipuneetrathore
    │   └── bert-base-cased-finetuned-finBERT
    │   │   └── README.md
    ├── iuliaturc
    │   └── bert_uncased_L-2_H-128_A-2
    │   │   └── README.md
    ├── ixa-ehu
    │   ├── berteus-base-cased
    │   │   └── README.md
    │   └── ixambert-base-cased
    │   │   └── README.md
    ├── jannesg
    │   ├── bertsson
    │   │   └── README.md
    │   ├── takalane_afr_roberta
    │   │   └── README.md
    │   ├── takalane_nbl_roberta
    │   │   └── README.md
    │   ├── takalane_nso_roberta
    │   │   └── README.md
    │   ├── takalane_sot_roberta
    │   │   └── README.md
    │   ├── takalane_ssw_roberta
    │   │   └── README.md
    │   ├── takalane_tsn_roberta
    │   │   └── README.md
    │   ├── takalane_tso_roberta
    │   │   └── README.md
    │   ├── takalane_ven_roberta
    │   │   └── README.md
    │   ├── takalane_xho_roberta
    │   │   └── README.md
    │   └── takalane_zul_roberta
    │   │   └── README.md
    ├── jcblaise
    │   ├── bert-tagalog-base-cased-WWM
    │   │   └── README.md
    │   ├── bert-tagalog-base-cased
    │   │   └── README.md
    │   ├── bert-tagalog-base-uncased-WWM
    │   │   └── README.md
    │   ├── bert-tagalog-base-uncased
    │   │   └── README.md
    │   ├── distilbert-tagalog-base-cased
    │   │   └── README.md
    │   ├── electra-tagalog-base-cased-discriminator
    │   │   └── README.md
    │   ├── electra-tagalog-base-cased-generator
    │   │   └── README.md
    │   ├── electra-tagalog-base-uncased-discriminator
    │   │   └── README.md
    │   ├── electra-tagalog-base-uncased-generator
    │   │   └── README.md
    │   ├── electra-tagalog-small-cased-discriminator
    │   │   └── README.md
    │   ├── electra-tagalog-small-cased-generator
    │   │   └── README.md
    │   ├── electra-tagalog-small-uncased-discriminator
    │   │   └── README.md
    │   └── electra-tagalog-small-uncased-generator
    │   │   └── README.md
    ├── jimregan
    │   └── BERTreach
    │   │   └── README.md
    ├── jme-p
    │   └── shrugging-grace-tweet-classifier
    │   │   └── README.md
    ├── joeddav
    │   ├── bart-large-mnli-yahoo-answers
    │   │   └── README.md
    │   └── xlm-roberta-large-xnli
    │   │   └── README.md
    ├── jordimas
    │   └── julibert
    │   │   └── README.md
    ├── jplu
    │   ├── tf-camembert-base
    │   │   └── README.md
    │   ├── tf-xlm-r-ner-40-lang
    │   │   └── README.md
    │   ├── tf-xlm-roberta-base
    │   │   └── README.md
    │   └── tf-xlm-roberta-large
    │   │   └── README.md
    ├── julien-c
    │   ├── EsperBERTo-small-pos
    │   │   └── README.md
    │   ├── EsperBERTo-small
    │   │   └── README.md
    │   ├── bert-xsmall-dummy
    │   │   └── README.md
    │   └── dummy-unknown
    │   │   └── README.md
    ├── keshan
    │   └── SinhalaBERTo
    │   │   └── README.md
    ├── krevas
    │   ├── finance-koelectra-base-discriminator
    │   │   └── README.md
    │   ├── finance-koelectra-base-generator
    │   │   └── README.md
    │   ├── finance-koelectra-small-discriminator
    │   │   └── README.md
    │   └── finance-koelectra-small-generator
    │   │   └── README.md
    ├── ktrapeznikov
    │   ├── albert-xlarge-v2-squad-v2
    │   │   └── README.md
    │   ├── biobert_v1.1_pubmed_squad_v2
    │   │   └── README.md
    │   ├── gpt2-medium-topic-news
    │   │   └── README.md
    │   └── scibert_scivocab_uncased_squad_v2
    │   │   └── README.md
    ├── kuisailab
    │   ├── albert-base-arabic
    │   │   └── README.md
    │   ├── albert-large-arabic
    │   │   └── README.md
    │   └── albert-xlarge-arabic
    │   │   └── README.md
    ├── kuppuluri
    │   ├── telugu_bertu
    │   │   └── README.md
    │   ├── telugu_bertu_ner
    │   │   └── README.md
    │   ├── telugu_bertu_pos
    │   │   └── README.md
    │   └── telugu_bertu_tydiqa
    │   │   └── README.md
    ├── lanwuwei
    │   └── GigaBERT-v3-Arabic-and-English
    │   │   └── README.md
    ├── loodos
    │   ├── albert-base-turkish-uncased
    │   │   └── README.md
    │   ├── bert-base-turkish-uncased
    │   │   └── README.md
    │   ├── electra-base-turkish-64k-uncased-discriminator
    │   │   └── README.md
    │   ├── electra-base-turkish-uncased-discriminator
    │   │   └── README.md
    │   ├── electra-small-turkish-cased-discriminator
    │   │   └── README.md
    │   └── electra-small-turkish-uncased-discriminator
    │   │   └── README.md
    ├── lordtt13
    │   ├── COVID-SciBERT
    │   │   └── README.md
    │   └── emo-mobilebert
    │   │   └── README.md
    ├── lserinol
    │   └── bert-turkish-question-answering
    │   │   └── README.md
    ├── lvwerra
    │   ├── bert-imdb
    │   │   └── README.md
    │   ├── gpt2-imdb-ctrl
    │   │   └── README.md
    │   ├── gpt2-imdb-pos
    │   │   └── README.md
    │   ├── gpt2-imdb
    │   │   └── README.md
    │   └── gpt2-medium-taboo
    │   │   └── README.md
    ├── lysandre
    │   ├── arxiv-nlp
    │   │   └── README.md
    │   └── arxiv
    │   │   └── README.md
    ├── m3hrdadfi
    │   ├── albert-fa-base-v2
    │   │   └── README.md
    │   ├── bert2bert-fa-news-headline
    │   │   └── README.md
    │   └── bert2bert-fa-wiki-summary
    │   │   └── README.md
    ├── microsoft
    │   ├── DeBERTa-base
    │   │   └── README.md
    │   ├── DeBERTa-large
    │   │   └── README.md
    │   ├── DialoGPT-large
    │   │   └── README.md
    │   ├── DialoGPT-medium
    │   │   └── README.md
    │   ├── DialoGPT-small
    │   │   └── README.md
    │   ├── MiniLM-L12-H384-uncased
    │   │   └── README.md
    │   ├── Multilingual-MiniLM-L12-H384
    │   │   └── README.md
    │   ├── codebert-base-mlm
    │   │   └── README.md
    │   ├── codebert-base
    │   │   └── README.md
    │   ├── layoutlm-base-uncased
    │   │   └── README.md
    │   ├── layoutlm-large-uncased
    │   │   └── README.md
    │   ├── prophetnet-large-uncased-cnndm
    │   │   └── README.md
    │   ├── prophetnet-large-uncased-squad-qg
    │   │   └── README.md
    │   ├── prophetnet-large-uncased
    │   │   └── README.md
    │   ├── xprophetnet-large-wiki100-cased-xglue-ntg
    │   │   └── README.md
    │   ├── xprophetnet-large-wiki100-cased-xglue-qg
    │   │   └── README.md
    │   └── xprophetnet-large-wiki100-cased
    │   │   └── README.md
    ├── monilouise
    │   └── ner_pt_br
    │   │   └── README.md
    ├── monologg
    │   ├── koelectra-base-discriminator
    │   │   └── README.md
    │   ├── koelectra-base-generator
    │   │   └── README.md
    │   ├── koelectra-small-discriminator
    │   │   └── README.md
    │   └── koelectra-small-generator
    │   │   └── README.md
    ├── monsoon-nlp
    │   └── dv-wave
    │   │   └── README.md
    ├── moumeneb1
    │   └── flaubert-base-cased-ecology_crisis
    │   │   └── README.md
    ├── mrm8488
    │   ├── CodeBERTaPy
    │   │   └── README.md
    │   ├── GPT-2-finetuned-CORD19
    │   │   └── README.md
    │   ├── GPT-2-finetuned-common_gen
    │   │   └── README.md
    │   ├── GPT-2-finetuned-covid-bio-medrxiv
    │   │   └── README.md
    │   ├── GuaPeTe-2-tiny
    │   │   └── README.md
    │   ├── RoBERTinha
    │   │   └── README.md
    │   ├── RoBasquERTa
    │   │   └── README.md
    │   ├── RuPERTa-base-finetuned-ner
    │   │   └── README.md
    │   ├── RuPERTa-base-finetuned-pawsx-es
    │   │   └── README.md
    │   ├── RuPERTa-base-finetuned-pos
    │   │   └── README.md
    │   ├── RuPERTa-base-finetuned-squadv1
    │   │   └── README.md
    │   ├── RuPERTa-base-finetuned-squadv2
    │   │   └── README.md
    │   ├── RuPERTa-base
    │   │   └── README.md
    │   ├── TinyBERT-spanish-uncased-finetuned-ner
    │   │   └── README.md
    │   ├── bert-base-german-dbmdz-cased-finetuned-pawsx-de
    │   │   └── README.md
    │   ├── bert-base-german-finetuned-ler
    │   │   └── README.md
    │   ├── bert-base-spanish-wwm-cased-finetuned-spa-squad2-es
    │   │   └── README.md
    │   ├── bert-italian-finedtuned-squadv1-it-alfa
    │   │   └── README.md
    │   ├── bert-medium-finetuned-squadv2
    │   │   └── README.md
    │   ├── bert-mini-finetuned-squadv2
    │   │   └── README.md
    │   ├── bert-mini2bert-mini-finetuned-cnn_daily_mail-summarization
    │   │   └── README.md
    │   ├── bert-multi-cased-finedtuned-xquad-tydiqa-goldp
    │   │   └── README.md
    │   ├── bert-multi-cased-finetuned-xquadv1
    │   │   └── README.md
    │   ├── bert-multi-uncased-finetuned-xquadv1
    │   │   └── README.md
    │   ├── bert-small-finetuned-squadv2
    │   │   └── README.md
    │   ├── bert-small-finetuned-typo-detection
    │   │   └── README.md
    │   ├── bert-small2bert-small-finetuned-cnn_daily_mail-summarization
    │   │   └── README.md
    │   ├── bert-spanish-cased-finetuned-ner
    │   │   └── README.md
    │   ├── bert-spanish-cased-finetuned-pos-syntax
    │   │   └── README.md
    │   ├── bert-spanish-cased-finetuned-pos
    │   │   └── README.md
    │   ├── bert-tiny-finetuned-squadv2
    │   │   └── README.md
    │   ├── bert-uncased-finetuned-qnli
    │   │   └── README.md
    │   ├── camembert-base-finetuned-pawsx-fr
    │   │   └── README.md
    │   ├── chEMBL_smiles_v1
    │   │   └── README.md
    │   ├── codeBERTaJS
    │   │   └── README.md
    │   ├── codebert-base-finetuned-detect-insecure-code
    │   │   └── README.md
    │   ├── distilbert-base-multi-cased-finetuned-typo-detection
    │   │   └── README.md
    │   ├── distilbert-multi-finetuned-for-xqua-on-tydiqa
    │   │   └── README.md
    │   ├── distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es
    │   │   └── README.md
    │   ├── distilroberta-base-finetuned-sentiment
    │   │   └── README.md
    │   ├── electra-base-finetuned-squadv1
    │   │   └── README.md
    │   ├── electra-small-finetuned-squadv1
    │   │   └── README.md
    │   ├── electra-small-finetuned-squadv2
    │   │   └── README.md
    │   ├── electricidad-base-discriminator
    │   │   └── README.md
    │   ├── electricidad-base-finetuned-pawsx-es
    │   │   └── README.md
    │   ├── electricidad-base-generator
    │   │   └── README.md
    │   ├── electricidad-small-discriminator
    │   │   └── README.md
    │   ├── electricidad-small-finetuned-squadv1-es
    │   │   └── README.md
    │   ├── gpt2-finetuned-recipes-cooking
    │   │   └── README.md
    │   ├── gpt2-finetuned-recipes-cooking_v2
    │   │   └── README.md
    │   ├── gpt2-imdb-neg
    │   │   └── README.md
    │   ├── gpt2-imdb-neutral
    │   │   └── README.md
    │   ├── longformer-base-4096-finetuned-squadv2
    │   │   └── README.md
    │   ├── mT5-small-finetuned-tydiqa-for-xqa
    │   │   └── README.md
    │   ├── mobilebert-uncased-finetuned-squadv1
    │   │   └── README.md
    │   ├── mobilebert-uncased-finetuned-squadv2
    │   │   └── README.md
    │   ├── roberta-base-1B-1-finetuned-squadv1
    │   │   └── README.md
    │   ├── roberta-base-1B-1-finetuned-squadv2
    │   │   └── README.md
    │   ├── roberta-large-finetuned-wsc
    │   │   └── README.md
    │   ├── spanbert-base-finetuned-squadv1
    │   │   └── README.md
    │   ├── spanbert-base-finetuned-squadv2
    │   │   └── README.md
    │   ├── spanbert-base-finetuned-tacred
    │   │   └── README.md
    │   ├── spanbert-finetuned-squadv1
    │   │   └── README.md
    │   ├── spanbert-finetuned-squadv2
    │   │   └── README.md
    │   ├── spanbert-large-finetuned-squadv1
    │   │   └── README.md
    │   ├── spanbert-large-finetuned-squadv2
    │   │   └── README.md
    │   ├── spanbert-large-finetuned-tacred
    │   │   └── README.md
    │   ├── squeezebert-finetuned-squadv1
    │   │   └── README.md
    │   ├── squeezebert-finetuned-squadv2
    │   │   └── README.md
    │   ├── t5-base-finetuned-break_data-question-retrieval
    │   │   └── README.md
    │   ├── t5-base-finetuned-break_data
    │   │   └── README.md
    │   ├── t5-base-finetuned-common_gen
    │   │   └── README.md
    │   ├── t5-base-finetuned-e2m-intent
    │   │   └── README.md
    │   ├── t5-base-finetuned-emotion
    │   │   └── README.md
    │   ├── t5-base-finetuned-imdb-sentiment
    │   │   └── README.md
    │   ├── t5-base-finetuned-qasc
    │   │   └── README.md
    │   ├── t5-base-finetuned-quarel
    │   │   └── README.md
    │   ├── t5-base-finetuned-quartz
    │   │   └── README.md
    │   ├── t5-base-finetuned-question-generation-ap
    │   │   └── README.md
    │   ├── t5-base-finetuned-sarcasm-twitter
    │   │   └── README.md
    │   ├── t5-base-finetuned-span-sentiment-extraction
    │   │   └── README.md
    │   ├── t5-base-finetuned-squadv2
    │   │   └── README.md
    │   ├── t5-base-finetuned-summarize-news
    │   │   └── README.md
    │   ├── t5-base-finetuned-wikiSQL-sql-to-en
    │   │   └── README.md
    │   ├── t5-base-finetuned-wikiSQL
    │   │   └── README.md
    │   ├── t5-small-finetuned-emotion
    │   │   └── README.md
    │   ├── t5-small-finetuned-imdb-sentiment
    │   │   └── README.md
    │   ├── t5-small-finetuned-quora-for-paraphrasing
    │   │   └── README.md
    │   ├── t5-small-finetuned-squadv1
    │   │   └── README.md
    │   ├── t5-small-finetuned-squadv2
    │   │   └── README.md
    │   ├── t5-small-finetuned-wikiSQL
    │   │   └── README.md
    │   ├── umberto-wikipedia-uncased-v1-finetuned-squadv1-it
    │   │   └── README.md
    │   └── xlm-multi-finetuned-xquadv1
    │   │   └── README.md
    ├── mymusise
    │   └── gpt2-medium-chinese
    │   │   └── README.md
    ├── mys
    │   └── electra-base-turkish-cased-ner
    │   │   └── README.md
    ├── ncoop57
    │   └── bart-base-code-summarizer-java-v0
    │   │   └── README.md
    ├── neuralmind
    │   ├── bert-base-portuguese-cased
    │   │   └── README.md
    │   └── bert-large-portuguese-cased
    │   │   └── README.md
    ├── neuralspace-reverie
    │   ├── indic-transformers-bn-bert
    │   │   └── README.md
    │   ├── indic-transformers-bn-distilbert
    │   │   └── README.md
    │   ├── indic-transformers-bn-roberta
    │   │   └── README.md
    │   ├── indic-transformers-bn-xlmroberta
    │   │   └── README.md
    │   ├── indic-transformers-hi-bert
    │   │   └── README.md
    │   ├── indic-transformers-hi-distilbert
    │   │   └── README.md
    │   ├── indic-transformers-hi-roberta
    │   │   └── README.md
    │   ├── indic-transformers-hi-xlmroberta
    │   │   └── README.md
    │   ├── indic-transformers-te-bert
    │   │   └── README.md
    │   ├── indic-transformers-te-distilbert
    │   │   └── README.md
    │   ├── indic-transformers-te-roberta
    │   │   └── README.md
    │   └── indic-transformers-te-xlmroberta
    │   │   └── README.md
    ├── neuraly
    │   └── bert-base-italian-cased-sentiment
    │   │   └── README.md
    ├── neurocode
    │   └── IsRoBERTa
    │   │   └── README.md
    ├── nghuyong
    │   ├── ernie-1.0
    │   │   └── README.md
    │   ├── ernie-2.0-en
    │   │   └── README.md
    │   ├── ernie-2.0-large-en
    │   │   └── README.md
    │   └── ernie-tiny
    │   │   └── README.md
    ├── nikokons
    │   └── gpt2-greek
    │   │   └── README.md
    ├── nlpaueb
    │   ├── bert-base-greek-uncased-v1
    │   │   └── README.md
    │   └── legal-bert-base-uncased
    │   │   └── README.md
    ├── nlptown
    │   └── bert-base-multilingual-uncased-sentiment
    │   │   └── README.md
    ├── nyu-mll
    │   ├── roberta-base-100M-1
    │   │   └── README.md
    │   ├── roberta-base-100M-2
    │   │   └── README.md
    │   ├── roberta-base-100M-3
    │   │   └── README.md
    │   ├── roberta-base-10M-1
    │   │   └── README.md
    │   ├── roberta-base-10M-2
    │   │   └── README.md
    │   ├── roberta-base-10M-3
    │   │   └── README.md
    │   ├── roberta-base-1B-1
    │   │   └── README.md
    │   ├── roberta-base-1B-2
    │   │   └── README.md
    │   ├── roberta-base-1B-3
    │   │   └── README.md
    │   ├── roberta-med-small-1M-1
    │   │   └── README.md
    │   ├── roberta-med-small-1M-2
    │   │   └── README.md
    │   ├── roberta-med-small-1M-3
    │   │   └── README.md
    │   └── roberta_1M_to_1B
    │   │   └── README.md
    ├── oliverguhr
    │   └── german-sentiment-bert
    │   │   └── README.md
    ├── patrickvonplaten
    │   ├── bert2bert-cnn_dailymail-fp16
    │   │   └── README.md
    │   ├── bert2bert_cnn_daily_mail
    │   │   └── README.md
    │   ├── bert2gpt2-cnn_dailymail-fp16
    │   │   └── README.md
    │   ├── longformer2roberta-cnn_dailymail-fp16
    │   │   └── README.md
    │   ├── roberta2roberta-cnn_dailymail-fp16
    │   │   └── README.md
    │   ├── roberta2roberta-share-cnn_dailymail-fp16
    │   │   └── README.md
    │   └── roberta_shared_bbc_xsum
    │   │   └── README.md
    ├── pdelobelle
    │   └── robbert-v2-dutch-base
    │   │   └── README.md
    ├── pedropei
    │   └── question-intimacy
    │   │   └── README.md
    ├── pierreguillou
    │   └── gpt2-small-portuguese
    │   │   └── README.md
    ├── pradhyra
    │   └── AWSBlogBert
    │   │   └── README.md
    ├── pranavpsv
    │   └── gpt2-genre-story-generator
    │   │   └── README.md
    ├── pvl
    │   └── labse_bert
    │   │   └── README.md
    ├── ramsrigouthamg
    │   └── t5_paraphraser
    │   │   └── README.md
    ├── rdenadai
    │   └── BR_BERTo
    │   │   └── README.md
    ├── redewiedergabe
    │   └── bert-base-historical-german-rw-cased
    │   │   └── README.md
    ├── rjbownes
    │   └── Magic-The-Generating
    │   │   └── README.md
    ├── roberta-base-README.md
    ├── roberta-large-README.md
    ├── roberta-large-mnli-README.md
    ├── rohanrajpal
    │   ├── bert-base-codemixed-uncased-sentiment
    │   │   └── README.md
    │   ├── bert-base-en-es-codemix-cased
    │   │   └── README.md
    │   ├── bert-base-en-hi-codemix-cased
    │   │   └── README.md
    │   └── bert-base-multilingual-codemixed-cased-sentiment
    │   │   └── README.md
    ├── sachaarbonel
    │   └── bert-italian-cased-finetuned-pos
    │   │   └── README.md
    ├── sagorsarker
    │   ├── bangla-bert-base
    │   │   └── README.md
    │   ├── bangla-bert-sentiment
    │   │   └── README.md
    │   ├── codeswitch-hineng-lid-lince
    │   │   └── README.md
    │   ├── codeswitch-hineng-ner-lince
    │   │   └── README.md
    │   ├── codeswitch-hineng-pos-lince
    │   │   └── README.md
    │   ├── codeswitch-nepeng-lid-lince
    │   │   └── README.md
    │   ├── codeswitch-spaeng-lid-lince
    │   │   └── README.md
    │   ├── codeswitch-spaeng-ner-lince
    │   │   └── README.md
    │   ├── codeswitch-spaeng-pos-lince
    │   │   └── README.md
    │   └── codeswitch-spaeng-sentiment-analysis-lince
    │   │   └── README.md
    ├── sarahlintang
    │   └── IndoBERT
    │   │   └── README.md
    ├── sarnikowski
    │   └── electra-small-discriminator-da-256-cased
    │   │   └── README.md
    ├── savasy
    │   ├── bert-base-turkish-ner-cased
    │   │   └── README.md
    │   ├── bert-base-turkish-sentiment-cased
    │   │   └── README.md
    │   ├── bert-base-turkish-squad
    │   │   └── README.md
    │   └── bert-turkish-text-classification
    │   │   └── README.md
    ├── schmidek
    │   └── electra-small-cased
    │   │   └── README.md
    ├── seiya
    │   └── oubiobert-base-uncased
    │   │   └── README.md
    ├── sentence-transformers
    │   ├── LaBSE
    │   │   └── README.md
    │   ├── bert-base-nli-cls-token
    │   │   └── README.md
    │   ├── bert-base-nli-max-tokens
    │   │   └── README.md
    │   └── bert-base-nli-mean-tokens
    │   │   └── README.md
    ├── severinsimmler
    │   └── literary-german-bert
    │   │   ├── README.md
    │   │   ├── kfold.png
    │   │   └── prosa-jahre.png
    ├── seyonec
    │   └── ChemBERTa-zinc-base-v1
    │   │   └── README.md
    ├── shoarora
    │   ├── alectra-small-owt
    │   │   └── README.md
    │   └── electra-small-owt
    │   │   └── README.md
    ├── shrugging-grace
    │   └── tweetclassifier
    │   │   └── README.md
    ├── smanjil
    │   └── German-MedBERT
    │   │   └── README.md
    ├── spentaur
    │   └── yelp
    │   │   └── README.md
    ├── squeezebert
    │   ├── squeezebert-mnli-headless
    │   │   └── README.md
    │   ├── squeezebert-mnli
    │   │   └── README.md
    │   └── squeezebert-uncased
    │   │   └── README.md
    ├── stas
    │   └── tiny-wmt19-en-de
    │   │   └── README.md
    ├── stevhliu
    │   └── astroGPT
    │   │   └── README.md
    ├── surajp
    │   ├── RoBERTa-hindi-guj-san
    │   │   └── README.md
    │   ├── SanBERTa
    │   │   └── README.md
    │   └── albert-base-sanskrit
    │   │   └── README.md
    ├── t5-11b-README.md
    ├── t5-3b-README.md
    ├── t5-base-README.md
    ├── t5-large-README.md
    ├── t5-small-README.md
    ├── tartuNLP
    │   └── EstBERT
    │   │   └── README.md
    ├── tblard
    │   └── tf-allocine
    │   │   └── README.md
    ├── tuner007
    │   ├── pegasus_paraphrase
    │   │   └── README.md
    │   ├── pegasus_qa
    │   │   └── README.md
    │   └── t5_abs_qa
    │   │   └── README.md
    ├── twmkn9
    │   ├── albert-base-v2-squad2
    │   │   └── README.md
    │   ├── bert-base-uncased-squad2
    │   │   └── README.md
    │   ├── distilbert-base-uncased-squad2
    │   │   └── README.md
    │   └── distilroberta-base-squad2
    │   │   └── README.md
    ├── uer
    │   ├── chinese_roberta_L-2_H-128
    │   │   └── README.md
    │   ├── gpt2-chinese-couplet
    │   │   └── README.md
    │   └── gpt2-chinese-poem
    │   │   └── README.md
    ├── uncnlp
    │   └── lxmert-base-uncased
    │   │   ├── LICENSE
    │   │   ├── README.md
    │   │   └── lxmert_model-1.jpg
    ├── unideeplearning
    │   └── polibert_sa
    │   │   └── README.md
    ├── urduhack
    │   └── roberta-urdu-small
    │   │   └── README.md
    ├── valhalla
    │   ├── bart-large-finetuned-squadv1
    │   │   └── README.md
    │   ├── distilbart-mnli-12-1
    │   │   └── README.md
    │   ├── distilbart-mnli-12-3
    │   │   └── README.md
    │   ├── distilbart-mnli-12-6
    │   │   └── README.md
    │   ├── distilbart-mnli-12-9
    │   │   └── README.md
    │   ├── electra-base-discriminator-finetuned_squadv1
    │   │   └── README.md
    │   ├── longformer-base-4096-finetuned-squadv1
    │   │   └── README.md
    │   ├── t5-base-e2e-qg
    │   │   └── README.md
    │   ├── t5-base-qa-qg-hl
    │   │   └── README.md
    │   ├── t5-base-qg-hl
    │   │   └── README.md
    │   ├── t5-base-squad
    │   │   └── README.md
    │   ├── t5-samll-qg-prepend
    │   │   └── README.md
    │   ├── t5-small-e2e-qg
    │   │   └── README.md
    │   ├── t5-small-qa-qg-hl
    │   │   └── README.md
    │   └── t5-small-qg-hl
    │   │   └── README.md
    ├── vinai
    │   ├── bertweet-base
    │   │   └── README.md
    │   ├── bertweet-covid19-base-cased
    │   │   └── README.md
    │   ├── bertweet-covid19-base-uncased
    │   │   └── README.md
    │   ├── phobert-base
    │   │   └── README.md
    │   └── phobert-large
    │   │   └── README.md
    ├── voidful
    │   ├── albert_chinese_base
    │   │   └── README.md
    │   ├── albert_chinese_large
    │   │   └── README.md
    │   ├── albert_chinese_small
    │   │   └── README.md
    │   ├── albert_chinese_tiny
    │   │   └── README.md
    │   ├── albert_chinese_xlarge
    │   │   └── README.md
    │   └── albert_chinese_xxlarge
    │   │   └── README.md
    ├── wietsedv
    │   └── bert-base-dutch-cased
    │   │   └── README.md
    ├── wptoux
    │   └── albert-chinese-large-qa
    │   │   └── README.md
    ├── xlm-mlm-en-2048-README.md
    ├── xlm-roberta-base-README.md
    ├── xlm-roberta-large-finetuned-conll03-german-README.md
    ├── yjernite
    │   └── bart_eli5
    │   │   └── README.md
    ├── ynie
    │   └── roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli
    │   │   └── README.md
    ├── youscan
    │   └── ukr-roberta-base
    │   │   └── README.md
    ├── yuvraj
    │   ├── summarizer-cnndm
    │   │   └── README.md
    │   └── xSumm
    │   │   └── README.md
    └── zanelim
    │   ├── singbert-large-sg
    │       └── README.md
    │   ├── singbert-lite-sg
    │       └── README.md
    │   └── singbert
    │       └── README.md
├── notebooks
    ├── 01-training-tokenizers.ipynb
    ├── 02-transformers.ipynb
    ├── 03-pipelines.ipynb
    ├── 04-onnx-export.ipynb
    ├── 05-benchmark.ipynb
    └── README.md
├── pyproject.toml
├── scripts
    ├── fsmt
    │   ├── convert-allenai-wmt16.sh
    │   ├── convert-allenai-wmt19.sh
    │   ├── convert-facebook-wmt19.sh
    │   ├── eval-allenai-wmt16.sh
    │   ├── eval-allenai-wmt19.sh
    │   ├── eval-facebook-wmt19.sh
    │   ├── fsmt-make-super-tiny-model.py
    │   ├── fsmt-make-tiny-model.py
    │   ├── gen-card-allenai-wmt16.py
    │   ├── gen-card-allenai-wmt19.py
    │   ├── gen-card-facebook-wmt19.py
    │   ├── s3-move.sh
    │   └── tests-to-run.sh
    ├── pegasus
    │   └── build_test_sample_spm_no_bos.py
    └── tatoeba
    │   └── README.md
├── setup.cfg
├── setup.py
├── src
    └── transformers
    │   ├── __init__.py
    │   ├── activations.py
    │   ├── activations_tf.py
    │   ├── benchmark
    │       ├── __init__.py
    │       ├── benchmark.py
    │       ├── benchmark_args.py
    │       ├── benchmark_args_tf.py
    │       ├── benchmark_args_utils.py
    │       ├── benchmark_tf.py
    │       └── benchmark_utils.py
    │   ├── commands
    │       ├── __init__.py
    │       ├── add_new_model.py
    │       ├── convert.py
    │       ├── download.py
    │       ├── env.py
    │       ├── lfs.py
    │       ├── run.py
    │       ├── serving.py
    │       ├── train.py
    │       ├── transformers_cli.py
    │       └── user.py
    │   ├── configuration_utils.py
    │   ├── convert_graph_to_onnx.py
    │   ├── convert_pytorch_checkpoint_to_tf2.py
    │   ├── convert_slow_tokenizer.py
    │   ├── convert_slow_tokenizers_checkpoints_to_fast.py
    │   ├── convert_tf_hub_seq_to_seq_bert_to_pytorch.py
    │   ├── data
    │       ├── __init__.py
    │       ├── data_collator.py
    │       ├── datasets
    │       │   ├── __init__.py
    │       │   ├── glue.py
    │       │   ├── language_modeling.py
    │       │   └── squad.py
    │       ├── metrics
    │       │   ├── __init__.py
    │       │   └── squad_metrics.py
    │       ├── processors
    │       │   ├── __init__.py
    │       │   ├── glue.py
    │       │   ├── squad.py
    │       │   ├── utils.py
    │       │   └── xnli.py
    │       └── test_generation_utils.py
    │   ├── dependency_versions_check.py
    │   ├── dependency_versions_table.py
    │   ├── file_utils.py
    │   ├── generation_beam_search.py
    │   ├── generation_logits_process.py
    │   ├── generation_tf_utils.py
    │   ├── generation_utils.py
    │   ├── hf_api.py
    │   ├── hf_argparser.py
    │   ├── integrations.py
    │   ├── modelcard.py
    │   ├── modeling_flax_utils.py
    │   ├── modeling_outputs.py
    │   ├── modeling_tf_outputs.py
    │   ├── modeling_tf_pytorch_utils.py
    │   ├── modeling_tf_utils.py
    │   ├── modeling_utils.py
    │   ├── models
    │       ├── __init__.py
    │       ├── albert
    │       │   ├── __init__.py
    │       │   ├── configuration_albert.py
    │       │   ├── convert_albert_original_tf_checkpoint_to_pytorch.py
    │       │   ├── modeling_albert.py
    │       │   ├── modeling_tf_albert.py
    │       │   ├── tokenization_albert.py
    │       │   └── tokenization_albert_fast.py
    │       ├── auto
    │       │   ├── __init__.py
    │       │   ├── configuration_auto.py
    │       │   ├── modeling_auto.py
    │       │   ├── modeling_flax_auto.py
    │       │   ├── modeling_tf_auto.py
    │       │   └── tokenization_auto.py
    │       ├── bart
    │       │   ├── __init__.py
    │       │   ├── configuration_bart.py
    │       │   ├── convert_bart_original_pytorch_checkpoint_to_pytorch.py
    │       │   ├── modeling_bart.py
    │       │   ├── modeling_tf_bart.py
    │       │   ├── tokenization_bart.py
    │       │   └── tokenization_bart_fast.py
    │       ├── barthez
    │       │   ├── __init__.py
    │       │   ├── tokenization_barthez.py
    │       │   └── tokenization_barthez_fast.py
    │       ├── bert
    │       │   ├── __init__.py
    │       │   ├── configuration_bert.py
    │       │   ├── convert_bert_original_tf2_checkpoint_to_pytorch.py
    │       │   ├── convert_bert_original_tf_checkpoint_to_pytorch.py
    │       │   ├── convert_bert_pytorch_checkpoint_to_original_tf.py
    │       │   ├── modeling_bert.py
    │       │   ├── modeling_flax_bert.py
    │       │   ├── modeling_tf_bert.py
    │       │   ├── tokenization_bert.py
    │       │   └── tokenization_bert_fast.py
    │       ├── bert_generation
    │       │   ├── __init__.py
    │       │   ├── configuration_bert_generation.py
    │       │   ├── modeling_bert_generation.py
    │       │   └── tokenization_bert_generation.py
    │       ├── bert_japanese
    │       │   ├── __init__.py
    │       │   └── tokenization_bert_japanese.py
    │       ├── bertweet
    │       │   ├── __init__.py
    │       │   └── tokenization_bertweet.py
    │       ├── blenderbot
    │       │   ├── __init__.py
    │       │   ├── configuration_blenderbot.py
    │       │   ├── convert_blenderbot_original_pytorch_checkpoint_to_pytorch.py
    │       │   ├── modeling_blenderbot.py
    │       │   ├── modeling_tf_blenderbot.py
    │       │   └── tokenization_blenderbot.py
    │       ├── camembert
    │       │   ├── __init__.py
    │       │   ├── configuration_camembert.py
    │       │   ├── modeling_camembert.py
    │       │   ├── modeling_tf_camembert.py
    │       │   ├── tokenization_camembert.py
    │       │   └── tokenization_camembert_fast.py
    │       ├── ctrl
    │       │   ├── __init__.py
    │       │   ├── configuration_ctrl.py
    │       │   ├── modeling_ctrl.py
    │       │   ├── modeling_tf_ctrl.py
    │       │   └── tokenization_ctrl.py
    │       ├── deberta
    │       │   ├── __init__.py
    │       │   ├── configuration_deberta.py
    │       │   ├── modeling_deberta.py
    │       │   └── tokenization_deberta.py
    │       ├── dialogpt
    │       │   ├── __init__.py
    │       │   └── convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
    │       ├── distilbert
    │       │   ├── __init__.py
    │       │   ├── configuration_distilbert.py
    │       │   ├── modeling_distilbert.py
    │       │   ├── modeling_tf_distilbert.py
    │       │   ├── tokenization_distilbert.py
    │       │   └── tokenization_distilbert_fast.py
    │       ├── dpr
    │       │   ├── __init__.py
    │       │   ├── configuration_dpr.py
    │       │   ├── convert_dpr_original_checkpoint_to_pytorch.py
    │       │   ├── modeling_dpr.py
    │       │   ├── modeling_tf_dpr.py
    │       │   ├── tokenization_dpr.py
    │       │   └── tokenization_dpr_fast.py
    │       ├── electra
    │       │   ├── __init__.py
    │       │   ├── configuration_electra.py
    │       │   ├── convert_electra_original_tf_checkpoint_to_pytorch.py
    │       │   ├── modeling_electra.py
    │       │   ├── modeling_tf_electra.py
    │       │   ├── tokenization_electra.py
    │       │   └── tokenization_electra_fast.py
    │       ├── encoder_decoder
    │       │   ├── __init__.py
    │       │   ├── configuration_encoder_decoder.py
    │       │   └── modeling_encoder_decoder.py
    │       ├── flaubert
    │       │   ├── __init__.py
    │       │   ├── configuration_flaubert.py
    │       │   ├── modeling_flaubert.py
    │       │   ├── modeling_tf_flaubert.py
    │       │   └── tokenization_flaubert.py
    │       ├── fsmt
    │       │   ├── __init__.py
    │       │   ├── configuration_fsmt.py
    │       │   ├── convert_fsmt_original_pytorch_checkpoint_to_pytorch.py
    │       │   ├── modeling_fsmt.py
    │       │   └── tokenization_fsmt.py
    │       ├── funnel
    │       │   ├── __init__.py
    │       │   ├── configuration_funnel.py
    │       │   ├── convert_funnel_original_tf_checkpoint_to_pytorch.py
    │       │   ├── modeling_funnel.py
    │       │   ├── modeling_tf_funnel.py
    │       │   ├── tokenization_funnel.py
    │       │   └── tokenization_funnel_fast.py
    │       ├── gpt2
    │       │   ├── __init__.py
    │       │   ├── configuration_gpt2.py
    │       │   ├── convert_gpt2_original_tf_checkpoint_to_pytorch.py
    │       │   ├── modeling_gpt2.py
    │       │   ├── modeling_tf_gpt2.py
    │       │   ├── tokenization_gpt2.py
    │       │   └── tokenization_gpt2_fast.py
    │       ├── herbert
    │       │   ├── __init__.py
    │       │   ├── tokenization_herbert.py
    │       │   └── tokenization_herbert_fast.py
    │       ├── layoutlm
    │       │   ├── __init__.py
    │       │   ├── configuration_layoutlm.py
    │       │   ├── modeling_layoutlm.py
    │       │   ├── tokenization_layoutlm.py
    │       │   └── tokenization_layoutlm_fast.py
    │       ├── longformer
    │       │   ├── __init__.py
    │       │   ├── configuration_longformer.py
    │       │   ├── convert_longformer_original_pytorch_lightning_to_pytorch.py
    │       │   ├── modeling_longformer.py
    │       │   ├── modeling_tf_longformer.py
    │       │   ├── tokenization_longformer.py
    │       │   └── tokenization_longformer_fast.py
    │       ├── lxmert
    │       │   ├── __init__.py
    │       │   ├── configuration_lxmert.py
    │       │   ├── convert_lxmert_original_tf_checkpoint_to_pytorch.py
    │       │   ├── modeling_lxmert.py
    │       │   ├── modeling_tf_lxmert.py
    │       │   ├── tokenization_lxmert.py
    │       │   └── tokenization_lxmert_fast.py
    │       ├── marian
    │       │   ├── __init__.py
    │       │   ├── configuration_marian.py
    │       │   ├── convert_marian_tatoeba_to_pytorch.py
    │       │   ├── convert_marian_to_pytorch.py
    │       │   ├── modeling_marian.py
    │       │   ├── modeling_tf_marian.py
    │       │   └── tokenization_marian.py
    │       ├── mbart
    │       │   ├── __init__.py
    │       │   ├── configuration_mbart.py
    │       │   ├── convert_mbart_original_checkpoint_to_pytorch.py
    │       │   ├── modeling_mbart.py
    │       │   ├── modeling_tf_mbart.py
    │       │   ├── tokenization_mbart.py
    │       │   └── tokenization_mbart_fast.py
    │       ├── mmbt
    │       │   ├── __init__.py
    │       │   ├── configuration_mmbt.py
    │       │   └── modeling_mmbt.py
    │       ├── mobilebert
    │       │   ├── __init__.py
    │       │   ├── configuration_mobilebert.py
    │       │   ├── convert_mobilebert_original_tf_checkpoint_to_pytorch.py
    │       │   ├── modeling_mobilebert.py
    │       │   ├── modeling_tf_mobilebert.py
    │       │   ├── tokenization_mobilebert.py
    │       │   └── tokenization_mobilebert_fast.py
    │       ├── mpnet
    │       │   ├── __init__.py
    │       │   ├── configuration_mpnet.py
    │       │   ├── modeling_mpnet.py
    │       │   ├── modeling_tf_mpnet.py
    │       │   ├── tokenization_mpnet.py
    │       │   └── tokenization_mpnet_fast.py
    │       ├── mt5
    │       │   ├── __init__.py
    │       │   ├── configuration_mt5.py
    │       │   ├── modeling_mt5.py
    │       │   └── modeling_tf_mt5.py
    │       ├── openai
    │       │   ├── __init__.py
    │       │   ├── configuration_openai.py
    │       │   ├── convert_openai_original_tf_checkpoint_to_pytorch.py
    │       │   ├── modeling_openai.py
    │       │   ├── modeling_tf_openai.py
    │       │   ├── tokenization_openai.py
    │       │   └── tokenization_openai_fast.py
    │       ├── pegasus
    │       │   ├── __init__.py
    │       │   ├── configuration_pegasus.py
    │       │   ├── convert_pegasus_tf_to_pytorch.py
    │       │   ├── modeling_pegasus.py
    │       │   ├── modeling_tf_pegasus.py
    │       │   ├── tokenization_pegasus.py
    │       │   └── tokenization_pegasus_fast.py
    │       ├── phobert
    │       │   ├── __init__.py
    │       │   └── tokenization_phobert.py
    │       ├── prophetnet
    │       │   ├── __init__.py
    │       │   ├── configuration_prophetnet.py
    │       │   ├── convert_prophetnet_original_pytorch_checkpoint_to_pytorch.py
    │       │   ├── modeling_prophetnet.py
    │       │   └── tokenization_prophetnet.py
    │       ├── rag
    │       │   ├── __init__.py
    │       │   ├── configuration_rag.py
    │       │   ├── modeling_rag.py
    │       │   ├── retrieval_rag.py
    │       │   └── tokenization_rag.py
    │       ├── reformer
    │       │   ├── __init__.py
    │       │   ├── configuration_reformer.py
    │       │   ├── convert_reformer_trax_checkpoint_to_pytorch.py
    │       │   ├── modeling_reformer.py
    │       │   ├── tokenization_reformer.py
    │       │   └── tokenization_reformer_fast.py
    │       ├── retribert
    │       │   ├── __init__.py
    │       │   ├── configuration_retribert.py
    │       │   ├── modeling_retribert.py
    │       │   ├── tokenization_retribert.py
    │       │   └── tokenization_retribert_fast.py
    │       ├── roberta
    │       │   ├── __init__.py
    │       │   ├── configuration_roberta.py
    │       │   ├── convert_roberta_original_pytorch_checkpoint_to_pytorch.py
    │       │   ├── modeling_flax_roberta.py
    │       │   ├── modeling_roberta.py
    │       │   ├── modeling_tf_roberta.py
    │       │   ├── tokenization_roberta.py
    │       │   └── tokenization_roberta_fast.py
    │       ├── squeezebert
    │       │   ├── __init__.py
    │       │   ├── configuration_squeezebert.py
    │       │   ├── modeling_squeezebert.py
    │       │   ├── tokenization_squeezebert.py
    │       │   └── tokenization_squeezebert_fast.py
    │       ├── t5
    │       │   ├── __init__.py
    │       │   ├── configuration_t5.py
    │       │   ├── convert_t5_original_tf_checkpoint_to_pytorch.py
    │       │   ├── modeling_t5.py
    │       │   ├── modeling_tf_t5.py
    │       │   ├── tokenization_t5.py
    │       │   └── tokenization_t5_fast.py
    │       ├── transfo_xl
    │       │   ├── __init__.py
    │       │   ├── configuration_transfo_xl.py
    │       │   ├── convert_transfo_xl_original_tf_checkpoint_to_pytorch.py
    │       │   ├── modeling_tf_transfo_xl.py
    │       │   ├── modeling_tf_transfo_xl_utilities.py
    │       │   ├── modeling_transfo_xl.py
    │       │   ├── modeling_transfo_xl_utilities.py
    │       │   └── tokenization_transfo_xl.py
    │       ├── xlm
    │       │   ├── __init__.py
    │       │   ├── configuration_xlm.py
    │       │   ├── convert_xlm_original_pytorch_checkpoint_to_pytorch.py
    │       │   ├── modeling_tf_xlm.py
    │       │   ├── modeling_xlm.py
    │       │   └── tokenization_xlm.py
    │       ├── xlm_prophetnet
    │       │   ├── __init__.py
    │       │   ├── configuration_xlm_prophetnet.py
    │       │   ├── modeling_xlm_prophetnet.py
    │       │   └── tokenization_xlm_prophetnet.py
    │       ├── xlm_roberta
    │       │   ├── __init__.py
    │       │   ├── configuration_xlm_roberta.py
    │       │   ├── modeling_tf_xlm_roberta.py
    │       │   ├── modeling_xlm_roberta.py
    │       │   ├── tokenization_xlm_roberta.py
    │       │   └── tokenization_xlm_roberta_fast.py
    │       └── xlnet
    │       │   ├── __init__.py
    │       │   ├── configuration_xlnet.py
    │       │   ├── convert_xlnet_original_tf_checkpoint_to_pytorch.py
    │       │   ├── modeling_tf_xlnet.py
    │       │   ├── modeling_xlnet.py
    │       │   ├── tokenization_xlnet.py
    │       │   └── tokenization_xlnet_fast.py
    │   ├── optimization.py
    │   ├── optimization_tf.py
    │   ├── pipelines.py
    │   ├── testing_utils.py
    │   ├── tokenization_utils.py
    │   ├── tokenization_utils_base.py
    │   ├── tokenization_utils_fast.py
    │   ├── trainer.py
    │   ├── trainer_callback.py
    │   ├── trainer_pt_utils.py
    │   ├── trainer_tf.py
    │   ├── trainer_utils.py
    │   ├── training_args.py
    │   ├── training_args_tf.py
    │   └── utils
    │       ├── __init__.py
    │       ├── dummy_flax_objects.py
    │       ├── dummy_pt_objects.py
    │       ├── dummy_sentencepiece_objects.py
    │       ├── dummy_tf_objects.py
    │       ├── dummy_tokenizers_objects.py
    │       ├── hp_naming.py
    │       ├── logging.py
    │       ├── model_parallel_utils.py
    │       ├── notebook.py
    │       ├── sentencepiece_model_pb2.py
    │       └── versions.py
├── templates
    ├── adding_a_new_example_script
    │   ├── README.md
    │   ├── cookiecutter.json
    │   └── {{cookiecutter.directory_name}}
    │   │   └── run_{{cookiecutter.example_shortcut}}.py
    └── adding_a_new_model
    │   ├── README.md
    │   ├── cookiecutter-template-{{cookiecutter.modelname}}
    │       ├── __init__.py
    │       ├── configuration.json
    │       ├── configuration_{{cookiecutter.lowercase_modelname}}.py
    │       ├── modeling_tf_{{cookiecutter.lowercase_modelname}}.py
    │       ├── modeling_{{cookiecutter.lowercase_modelname}}.py
    │       ├── test_modeling_tf_{{cookiecutter.lowercase_modelname}}.py
    │       ├── test_modeling_{{cookiecutter.lowercase_modelname}}.py
    │       ├── to_replace_{{cookiecutter.lowercase_modelname}}.py
    │       ├── tokenization_fast_{{cookiecutter.lowercase_modelname}}.py
    │       ├── tokenization_{{cookiecutter.lowercase_modelname}}.py
    │       └── {{cookiecutter.lowercase_modelname}}.rst
    │   ├── cookiecutter.json
    │   └── tests
    │       ├── encoder-bert-tokenizer.json
    │       ├── pt-encoder-bert-tokenizer.json
    │       ├── standalone.json
    │       └── tf-encoder-bert-tokenizer.json
├── tests
    ├── __init__.py
    ├── conftest.py
    ├── fixtures
    │   ├── dummy-config.json
    │   ├── empty.txt
    │   ├── input.txt
    │   ├── sample_text.txt
    │   ├── sample_text_no_unicode.txt
    │   ├── spiece.model
    │   ├── test_sentencepiece.model
    │   ├── test_sentencepiece_no_bos.model
    │   └── tests_samples
    │   │   ├── .gitignore
    │   │   ├── GermEval
    │   │       ├── dev.txt
    │   │       ├── labels.txt
    │   │       └── train.txt
    │   │   ├── MRPC
    │   │       ├── dev.csv
    │   │       ├── dev.tsv
    │   │       ├── train.csv
    │   │       └── train.tsv
    │   │   ├── SQUAD
    │   │       └── sample.json
    │   │   ├── STS-B
    │   │       ├── dev.tsv
    │   │       └── train.tsv
    │   │   ├── conll
    │   │       └── sample.json
    │   │   └── wiki_text
    │   │       └── wiki_00
    ├── test_activations.py
    ├── test_activations_tf.py
    ├── test_benchmark.py
    ├── test_benchmark_tf.py
    ├── test_cli.py
    ├── test_configuration_auto.py
    ├── test_configuration_common.py
    ├── test_data_collator.py
    ├── test_doc_samples.py
    ├── test_file_utils.py
    ├── test_flax_auto.py
    ├── test_generation_beam_search.py
    ├── test_generation_logits_process.py
    ├── test_generation_utils.py
    ├── test_hf_api.py
    ├── test_hf_argparser.py
    ├── test_logging.py
    ├── test_model_card.py
    ├── test_model_output.py
    ├── test_modeling_albert.py
    ├── test_modeling_auto.py
    ├── test_modeling_bart.py
    ├── test_modeling_bert.py
    ├── test_modeling_bert_generation.py
    ├── test_modeling_blenderbot.py
    ├── test_modeling_camembert.py
    ├── test_modeling_common.py
    ├── test_modeling_ctrl.py
    ├── test_modeling_deberta.py
    ├── test_modeling_distilbert.py
    ├── test_modeling_dpr.py
    ├── test_modeling_electra.py
    ├── test_modeling_encoder_decoder.py
    ├── test_modeling_flaubert.py
    ├── test_modeling_flax_bert.py
    ├── test_modeling_flax_roberta.py
    ├── test_modeling_fsmt.py
    ├── test_modeling_funnel.py
    ├── test_modeling_gpt2.py
    ├── test_modeling_layoutlm.py
    ├── test_modeling_longformer.py
    ├── test_modeling_lxmert.py
    ├── test_modeling_marian.py
    ├── test_modeling_mbart.py
    ├── test_modeling_mobilebert.py
    ├── test_modeling_mpnet.py
    ├── test_modeling_mt5.py
    ├── test_modeling_openai.py
    ├── test_modeling_pegasus.py
    ├── test_modeling_prophetnet.py
    ├── test_modeling_rag.py
    ├── test_modeling_reformer.py
    ├── test_modeling_roberta.py
    ├── test_modeling_squeezebert.py
    ├── test_modeling_t5.py
    ├── test_modeling_tf_albert.py
    ├── test_modeling_tf_auto.py
    ├── test_modeling_tf_bart.py
    ├── test_modeling_tf_bert.py
    ├── test_modeling_tf_blenderbot.py
    ├── test_modeling_tf_camembert.py
    ├── test_modeling_tf_common.py
    ├── test_modeling_tf_ctrl.py
    ├── test_modeling_tf_distilbert.py
    ├── test_modeling_tf_dpr.py
    ├── test_modeling_tf_electra.py
    ├── test_modeling_tf_flaubert.py
    ├── test_modeling_tf_funnel.py
    ├── test_modeling_tf_gpt2.py
    ├── test_modeling_tf_longformer.py
    ├── test_modeling_tf_lxmert.py
    ├── test_modeling_tf_marian.py
    ├── test_modeling_tf_mbart.py
    ├── test_modeling_tf_mobilebert.py
    ├── test_modeling_tf_mpnet.py
    ├── test_modeling_tf_mt5.py
    ├── test_modeling_tf_openai.py
    ├── test_modeling_tf_pegasus.py
    ├── test_modeling_tf_pytorch.py
    ├── test_modeling_tf_roberta.py
    ├── test_modeling_tf_t5.py
    ├── test_modeling_tf_transfo_xl.py
    ├── test_modeling_tf_xlm.py
    ├── test_modeling_tf_xlm_roberta.py
    ├── test_modeling_tf_xlnet.py
    ├── test_modeling_transfo_xl.py
    ├── test_modeling_xlm.py
    ├── test_modeling_xlm_prophetnet.py
    ├── test_modeling_xlm_roberta.py
    ├── test_modeling_xlnet.py
    ├── test_onnx.py
    ├── test_optimization.py
    ├── test_optimization_tf.py
    ├── test_pipelines_common.py
    ├── test_pipelines_conversational.py
    ├── test_pipelines_feature_extraction.py
    ├── test_pipelines_fill_mask.py
    ├── test_pipelines_ner.py
    ├── test_pipelines_question_answering.py
    ├── test_pipelines_sentiment_analysis.py
    ├── test_pipelines_summarization.py
    ├── test_pipelines_text2text_generation.py
    ├── test_pipelines_text_generation.py
    ├── test_pipelines_translation.py
    ├── test_pipelines_zero_shot.py
    ├── test_retrieval_rag.py
    ├── test_skip_decorators.py
    ├── test_tokenization_albert.py
    ├── test_tokenization_auto.py
    ├── test_tokenization_bart.py
    ├── test_tokenization_barthez.py
    ├── test_tokenization_bert.py
    ├── test_tokenization_bert_generation.py
    ├── test_tokenization_bert_japanese.py
    ├── test_tokenization_bertweet.py
    ├── test_tokenization_blenderbot.py
    ├── test_tokenization_camembert.py
    ├── test_tokenization_common.py
    ├── test_tokenization_ctrl.py
    ├── test_tokenization_deberta.py
    ├── test_tokenization_distilbert.py
    ├── test_tokenization_dpr.py
    ├── test_tokenization_fsmt.py
    ├── test_tokenization_funnel.py
    ├── test_tokenization_gpt2.py
    ├── test_tokenization_herbert.py
    ├── test_tokenization_layoutlm.py
    ├── test_tokenization_lxmert.py
    ├── test_tokenization_marian.py
    ├── test_tokenization_mbart.py
    ├── test_tokenization_mpnet.py
    ├── test_tokenization_openai.py
    ├── test_tokenization_pegasus.py
    ├── test_tokenization_phobert.py
    ├── test_tokenization_prophetnet.py
    ├── test_tokenization_rag.py
    ├── test_tokenization_reformer.py
    ├── test_tokenization_roberta.py
    ├── test_tokenization_squeezebert.py
    ├── test_tokenization_t5.py
    ├── test_tokenization_transfo_xl.py
    ├── test_tokenization_utils.py
    ├── test_tokenization_xlm.py
    ├── test_tokenization_xlm_prophetnet.py
    ├── test_tokenization_xlm_roberta.py
    ├── test_tokenization_xlnet.py
    ├── test_trainer.py
    ├── test_trainer_callback.py
    ├── test_trainer_distributed.py
    ├── test_trainer_tpu.py
    ├── test_trainer_utils.py
    ├── test_utils_check_copies.py
    └── test_versions_utils.py
├── utils
    ├── check_copies.py
    ├── check_dummies.py
    ├── check_repo.py
    ├── check_table.py
    ├── download_glue_data.py
    ├── get_modified_files.py
    ├── link_tester.py
    └── style_doc.py
└── valohai.yaml


/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | source=transformers
 3 | omit =
 4 |     # skip convertion scripts from testing for now
 5 |     */convert_*
 6 |     */__main__.py
 7 | [report]
 8 | exclude_lines =
 9 |     pragma: no cover
10 |     raise
11 |     except
12 |     register_parameter


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/---new-benchmark.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F5A5 New benchmark"
 3 | about: Benchmark a part of this library and share your results
 4 | title: "[Benchmark]"
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | # 🖥 Benchmarking `transformers`
11 | 
12 | ## Benchmark
13 | 
14 | Which part of `transformers` did you benchmark?
15 | 
16 | ## Set-up
17 | 
18 | What did you run your benchmarks on? Please include details, such as: CPU, GPU? If using multiple GPUs, which parallelization did you use?
19 | 
20 | ## Results
21 | 
22 | Put your results here!
23 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/--new-model-addition.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F31F New model addition"
 3 | about: Submit a proposal/request to implement a new Transformer-based model
 4 | title: ''
 5 | labels: New model
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | # 🌟 New model addition
11 | 
12 | ## Model description
13 | 
14 | <!-- Important information -->
15 | 
16 | ## Open source status
17 | 
18 | * [ ] the model implementation is available: (give details)
19 | * [ ] the model weights are available: (give details)
20 | * [ ] who are the authors: (mention them, if possible by @gh-username)
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F680 Feature request"
 3 | about: Submit a proposal/request for a new transformers feature
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | # 🚀 Feature request
11 | 
12 | <!-- A clear and concise description of the feature proposal.
13 |      Please provide a link to the paper and code in case they exist. -->
14 | 
15 | ## Motivation
16 | 
17 | <!-- Please outline the motivation for the proposal. Is your feature request
18 |      related to a problem? e.g., I'm always frustrated when [...]. If this is related
19 |      to another GitHub issue, please link here too. -->
20 | 
21 | ## Your contribution
22 | 
23 | <!-- Is there any way that you could help, e.g. by submitting a PR?
24 |      Make sure to read the CONTRIBUTING.MD readme:
25 |      https://github.com/huggingface/transformers/blob/master/CONTRIBUTING.md -->
26 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question-help.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "❓ Questions & Help"
 3 | about: Post your general questions on the Hugging Face forum: https://discuss.huggingface.co/
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | # ❓ Questions & Help
11 | 
12 | <!-- The GitHub issue tracker is primarly intended for bugs, feature requests,
13 |      new models, benchmarks, and migration questions. For all other questions,
14 |      we direct you to the Hugging Face forum: https://discuss.huggingface.co/ .
15 |      -->
16 | 
17 | ## Details
18 | 
19 | <!-- Description of your issue -->
20 | 
21 | <!-- You should first ask your question on the forum, and only if
22 |      you didn't get an answer after a few days ask it here on GitHub. -->
23 | 
24 | **A link to original question on the forum**:
25 | 
26 | <!-- Your issue will be closed if you don't fill this part. -->


--------------------------------------------------------------------------------
/.github/conda/build.sh:
--------------------------------------------------------------------------------
1 | $PYTHON setup.py install     # Python command to install the script.
2 | 


--------------------------------------------------------------------------------
/.github/conda/meta.yaml:
--------------------------------------------------------------------------------
 1 | {% set name = "transformers" %}
 2 | 
 3 | package:
 4 |   name: "{{ name|lower }}"
 5 |   version: "{{ TRANSFORMERS_VERSION }}"
 6 | 
 7 | source:
 8 |   path: ../../
 9 | 
10 | build:
11 |   noarch: python
12 | 
13 | requirements:
14 |   host:
15 |     - python
16 |     - pip
17 |     - numpy
18 |     - dataclasses
19 |     - packaging
20 |     - filelock
21 |     - requests
22 |     - tqdm >=4.27
23 |     - sacremoses
24 |     - regex !=2019.12.17
25 |     - protobuf
26 |     - tokenizers ==0.9.4
27 |   run:
28 |     - python
29 |     - numpy
30 |     - dataclasses
31 |     - packaging
32 |     - filelock
33 |     - requests
34 |     - tqdm >=4.27
35 |     - sacremoses
36 |     - regex !=2019.12.17
37 |     - protobuf
38 |     - tokenizers ==0.9.4
39 | 
40 | test:
41 |   imports:
42 |     - transformers
43 | 
44 | about:
45 |   home: https://huggingface.co
46 |   license: Apache License 2.0
47 |   license_file: LICENSE
48 |   summary: "🤗Transformers: State-of-the-art Natural Language Processing for Pytorch and TensorFlow 2.0."
49 | 


--------------------------------------------------------------------------------
/.github/stale.yml:
--------------------------------------------------------------------------------
 1 | # Number of days of inactivity before an issue becomes stale
 2 | daysUntilStale: 60
 3 | # Number of days of inactivity before a stale issue is closed
 4 | daysUntilClose: 7
 5 | # Issues with these labels will never be considered stale
 6 | exemptLabels:
 7 |   - pinned
 8 |   - security
 9 | # Label to use when marking an issue as stale
10 | staleLabel: wontfix
11 | # Comment to post when marking an issue as stale. Set to `false` to disable
12 | markComment: >
13 |   This issue has been automatically marked as stale because it has not had
14 |   recent activity. It will be closed if no further activity occurs. Thank you
15 |   for your contributions.
16 | # Comment to post when closing a stale issue. Set to `false` to disable
17 | closeComment: false


--------------------------------------------------------------------------------
/.github/workflows/release-conda.yml:
--------------------------------------------------------------------------------
 1 | name: Release - Conda
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - v*
 7 | 
 8 | env:
 9 |   ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }}
10 | 
11 | jobs:
12 |   build_and_package:
13 |     runs-on: ubuntu-latest
14 |     defaults:
15 |       run:
16 |         shell: bash -l {0}
17 | 
18 |     steps:
19 |       - name: Checkout repository
20 |         uses: actions/checkout@v1
21 | 
22 |       - name: Install miniconda
23 |         uses: conda-incubator/setup-miniconda@v2
24 |         with:
25 |           auto-update-conda: true
26 |           auto-activate-base: false
27 |           activate-environment: "build-transformers"
28 |           channels: huggingface
29 | 
30 |       - name: Setup conda env
31 |         run: |
32 |           conda install -c defaults anaconda-client conda-build
33 | 
34 |       - name: Extract version
35 |         run: echo "TRANSFORMERS_VERSION=`python setup.py --version`" >> $GITHUB_ENV
36 | 
37 |       - name: Build conda packages
38 |         run: |
39 |           conda info
40 |           conda build .github/conda
41 | 
42 |       - name: Upload to Anaconda
43 |         run: anaconda upload `conda build .github/conda --output` --force


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | 


--------------------------------------------------------------------------------
/docker/transformers-cpu/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | LABEL maintainer="Hugging Face"
 3 | LABEL repository="transformers"
 4 | 
 5 | RUN apt update && \
 6 |     apt install -y bash \
 7 |                    build-essential \
 8 |                    git \
 9 |                    curl \
10 |                    ca-certificates \
11 |                    python3 \
12 |                    python3-pip && \
13 |     rm -rf /var/lib/apt/lists
14 | 
15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
16 |     python3 -m pip install --no-cache-dir \
17 |     jupyter \
18 |     tensorflow-cpu \
19 |     torch
20 | 
21 | WORKDIR /workspace
22 | COPY . transformers/
23 | RUN cd transformers/ && \
24 |     python3 -m pip install --no-cache-dir .
25 | 
26 | CMD ["/bin/bash"]


--------------------------------------------------------------------------------
/docker/transformers-gpu/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04
 2 | LABEL maintainer="Hugging Face"
 3 | LABEL repository="transformers"
 4 | 
 5 | RUN apt update && \
 6 |     apt install -y bash \
 7 |                    build-essential \
 8 |                    git \
 9 |                    curl \
10 |                    ca-certificates \
11 |                    python3 \
12 |                    python3-pip && \
13 |     rm -rf /var/lib/apt/lists
14 | 
15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
16 |     python3 -m pip install --no-cache-dir \
17 |     jupyter \
18 |     tensorflow \
19 |     torch
20 | 
21 | RUN git clone https://github.com/NVIDIA/apex
22 | RUN cd apex && \
23 |     python3 setup.py install && \
24 |     pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
25 | 
26 | WORKDIR /workspace
27 | COPY . transformers/
28 | RUN cd transformers/ && \
29 |     python3 -m pip install --no-cache-dir .
30 | 
31 | CMD ["/bin/bash"]
32 | 


--------------------------------------------------------------------------------
/docker/transformers-pytorch-cpu/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | LABEL maintainer="Hugging Face"
 3 | LABEL repository="transformers"
 4 | 
 5 | RUN apt update && \
 6 |     apt install -y bash \
 7 |                    build-essential \
 8 |                    git \
 9 |                    curl \
10 |                    ca-certificates \
11 |                    python3 \
12 |                    python3-pip && \
13 |     rm -rf /var/lib/apt/lists
14 | 
15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
16 |     python3 -m pip install --no-cache-dir \
17 |     jupyter \
18 |     torch
19 | 
20 | WORKDIR /workspace
21 | COPY . transformers/
22 | RUN cd transformers/ && \
23 |     python3 -m pip install --no-cache-dir .
24 | 
25 | CMD ["/bin/bash"]


--------------------------------------------------------------------------------
/docker/transformers-pytorch-gpu/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04
 2 | LABEL maintainer="Hugging Face"
 3 | LABEL repository="transformers"
 4 | 
 5 | RUN apt update && \
 6 |     apt install -y bash \
 7 |                    build-essential \
 8 |                    git \
 9 |                    curl \
10 |                    ca-certificates \
11 |                    python3 \
12 |                    python3-pip && \
13 |     rm -rf /var/lib/apt/lists
14 | 
15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
16 |     python3 -m pip install --no-cache-dir \
17 |     mkl \
18 |     torch
19 | 
20 | RUN git clone https://github.com/NVIDIA/apex
21 | RUN cd apex && \
22 |     python3 setup.py install && \
23 |     pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
24 | 
25 | WORKDIR /workspace
26 | COPY . transformers/
27 | RUN cd transformers/ && \
28 |     python3 -m pip install --no-cache-dir .
29 | 
30 | CMD ["/bin/bash"]
31 | 


--------------------------------------------------------------------------------
/docker/transformers-pytorch-tpu/bert-base-cased.jsonnet:
--------------------------------------------------------------------------------
 1 | local base = import 'templates/base.libsonnet';
 2 | local tpus = import 'templates/tpus.libsonnet';
 3 | local utils = import "templates/utils.libsonnet";
 4 | local volumes = import "templates/volumes.libsonnet";
 5 | 
 6 | local bertBaseCased = base.BaseTest {
 7 |   frameworkPrefix: "hf",
 8 |   modelName: "bert-base-cased",
 9 |   mode: "example",
10 |   configMaps: [],
11 | 
12 |   timeout: 3600, # 1 hour, in seconds
13 | 
14 |   image: std.extVar('image'),
15 |   imageTag: std.extVar('image-tag'),
16 | 
17 |   tpuSettings+: {
18 |     softwareVersion: "pytorch-nightly",
19 |   },
20 |   accelerator: tpus.v3_8,
21 | 
22 |   volumeMap+: {
23 |     datasets: volumes.PersistentVolumeSpec {
24 |       name: "huggingface-cluster-disk",
25 |       mountPath: "/datasets",
26 |     },
27 |   },
28 |   command: utils.scriptCommand(
29 |     |||
30 |       python -m pytest -s transformers/examples/test_xla_examples.py -v
31 |       test_exit_code=$?
32 |       echo "\nFinished running commands.\n"
33 |       test $test_exit_code -eq 0
34 |     |||
35 |   ),
36 | };
37 | 
38 | bertBaseCased.oneshotJob
39 | 


--------------------------------------------------------------------------------
/docker/transformers-pytorch-tpu/dataset.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: PersistentVolume
 3 | metadata:
 4 |   name: huggingface-cluster-disk
 5 | spec:
 6 |   storageClassName: ""
 7 |   capacity:
 8 |     storage: 500Gi
 9 |   accessModes:
10 |     - ReadOnlyMany
11 |   claimRef:
12 |     namespace: default
13 |     name: huggingface-cluster-disk-claim
14 |   gcePersistentDisk:
15 |     pdName: huggingface-cluster-disk
16 |     fsType: ext4
17 |     readOnly: true
18 | ---
19 | apiVersion: v1
20 | kind: PersistentVolumeClaim
21 | metadata:
22 |   name: huggingface-cluster-disk-claim
23 | spec:
24 |   # Specify "" as the storageClassName so it matches the PersistentVolume's StorageClass.
25 |   # A nil storageClassName value uses the default StorageClass. For details, see
26 |   # https://kubernetes.io/docs/concepts/storage/persistent-volumes/#class-1
27 |   storageClassName: ""
28 |   accessModes:
29 |     - ReadOnlyMany
30 |   resources:
31 |     requests:
32 |       storage: 1Ki
33 | 


--------------------------------------------------------------------------------
/docker/transformers-pytorch-tpu/docker-entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | source ~/.bashrc
3 | echo "running docker-entrypoint.sh"
4 | conda activate container
5 | echo $KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS
6 | echo "printed TPU info"
7 | export XRT_TPU_CONFIG="tpu_worker;0;${KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS:7}"
8 | exec "$@"#!/bin/bash
9 | 


--------------------------------------------------------------------------------
/docker/transformers-tensorflow-cpu/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | LABEL maintainer="Hugging Face"
 3 | LABEL repository="transformers"
 4 | 
 5 | RUN apt update && \
 6 |     apt install -y bash \
 7 |                    build-essential \
 8 |                    git \
 9 |                    curl \
10 |                    ca-certificates \
11 |                    python3 \
12 |                    python3-pip && \
13 |     rm -rf /var/lib/apt/lists
14 | 
15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
16 |     python3 -m pip install --no-cache-dir \
17 |     mkl \
18 |     tensorflow-cpu
19 | 
20 | WORKDIR /workspace
21 | COPY . transformers/
22 | RUN cd transformers/ && \
23 |     python3 -m pip install --no-cache-dir .
24 | 
25 | CMD ["/bin/bash"]


--------------------------------------------------------------------------------
/docker/transformers-tensorflow-gpu/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04
 2 | LABEL maintainer="Hugging Face"
 3 | LABEL repository="transformers"
 4 | 
 5 | RUN apt update && \
 6 |     apt install -y bash \
 7 |                    build-essential \
 8 |                    git \
 9 |                    curl \
10 |                    ca-certificates \
11 |                    python3 \
12 |                    python3-pip && \
13 |     rm -rf /var/lib/apt/lists
14 | 
15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
16 |     python3 -m pip install --no-cache-dir \
17 |     mkl \
18 |     tensorflow
19 | 
20 | WORKDIR /workspace
21 | COPY . transformers/
22 | RUN cd transformers/ && \
23 |     python3 -m pip install --no-cache-dir .
24 | 
25 | CMD ["/bin/bash"]


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SOURCEDIR     = source
 8 | BUILDDIR      = _build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help Makefile
15 | 
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/source/_static/css/Calibre-Light.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/_static/css/Calibre-Light.ttf


--------------------------------------------------------------------------------
/docs/source/_static/css/Calibre-Medium.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/_static/css/Calibre-Medium.otf


--------------------------------------------------------------------------------
/docs/source/_static/css/Calibre-Regular.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/_static/css/Calibre-Regular.otf


--------------------------------------------------------------------------------
/docs/source/_static/css/Calibre-Thin.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/_static/css/Calibre-Thin.otf


--------------------------------------------------------------------------------
/docs/source/_static/css/code-snippets.css:
--------------------------------------------------------------------------------
 1 | 
 2 | .highlight .c1, .highlight .sd{
 3 |     color: #999
 4 | }
 5 | 
 6 | .highlight .nn, .highlight .k, .highlight .s1, .highlight .nb, .highlight .bp, .highlight .kc {
 7 |     color: #FB8D68;
 8 | }
 9 | 
10 | .highlight .kn, .highlight .nv, .highlight .s2, .highlight .ow {
11 |     color: #6670FF;
12 | }
13 | 
14 | .highlight .gp {
15 |     color: #FB8D68;
16 | }


--------------------------------------------------------------------------------
/docs/source/contributing.md:
--------------------------------------------------------------------------------
1 | ../../CONTRIBUTING.md


--------------------------------------------------------------------------------
/docs/source/examples.md:
--------------------------------------------------------------------------------
1 | ../../examples/README.md


--------------------------------------------------------------------------------
/docs/source/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/favicon.ico


--------------------------------------------------------------------------------
/docs/source/imgs/local_attention_mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/imgs/local_attention_mask.png


--------------------------------------------------------------------------------
/docs/source/imgs/ppl_chunked.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/imgs/ppl_chunked.gif


--------------------------------------------------------------------------------
/docs/source/imgs/ppl_full.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/imgs/ppl_full.gif


--------------------------------------------------------------------------------
/docs/source/imgs/ppl_sliding.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/imgs/ppl_sliding.gif


--------------------------------------------------------------------------------
/docs/source/imgs/transformers_logo_name.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/imgs/transformers_logo_name.png


--------------------------------------------------------------------------------
/docs/source/imgs/warmup_constant_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/imgs/warmup_constant_schedule.png


--------------------------------------------------------------------------------
/docs/source/imgs/warmup_cosine_hard_restarts_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/imgs/warmup_cosine_hard_restarts_schedule.png


--------------------------------------------------------------------------------
/docs/source/imgs/warmup_cosine_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/imgs/warmup_cosine_schedule.png


--------------------------------------------------------------------------------
/docs/source/imgs/warmup_cosine_warm_restarts_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/imgs/warmup_cosine_warm_restarts_schedule.png


--------------------------------------------------------------------------------
/docs/source/imgs/warmup_linear_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/docs/source/imgs/warmup_linear_schedule.png


--------------------------------------------------------------------------------
/docs/source/notebooks.md:
--------------------------------------------------------------------------------
1 | ../../notebooks/README.md


--------------------------------------------------------------------------------
/examples/benchmarking/README.md:
--------------------------------------------------------------------------------
 1 | # 🤗 Benchmark results
 2 | 
 3 | Here, you can find a list of the different benchmark results created by the community.
 4 | 
 5 | If you would like to list benchmark results on your favorite models of the [model hub](https://huggingface.co/models) here, please open a Pull Request and add it below.
 6 | 
 7 | | Benchmark description | Results | Environment info |      Author      |
 8 | |:----------|:-------------|:-------------|------:|
 9 | | PyTorch Benchmark on inference for `bert-base-cased` |[memory](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/inference_memory.csv) | [env](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/env.csv) | [Partick von Platen](https://github.com/patrickvonplaten) | 
10 | | PyTorch Benchmark on inference for `bert-base-cased` |[time](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/inference_time.csv) | [env](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/env.csv) | [Partick von Platen](https://github.com/patrickvonplaten) | 
11 | 


--------------------------------------------------------------------------------
/examples/bert-loses-patience/pabee/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/examples/bert-loses-patience/pabee/__init__.py


--------------------------------------------------------------------------------
/examples/conftest.py:
--------------------------------------------------------------------------------
 1 | # tests directory-specific settings - this file is run automatically
 2 | # by pytest before any tests are run
 3 | 
 4 | import sys
 5 | import warnings
 6 | from os.path import abspath, dirname, join
 7 | 
 8 | 
 9 | # allow having multiple repository checkouts and not needing to remember to rerun
10 | # 'pip install -e .[dev]' when switching between checkouts and running tests.
11 | git_repo_path = abspath(join(dirname(dirname(__file__)), "src"))
12 | sys.path.insert(1, git_repo_path)
13 | 
14 | # silence FutureWarning warnings in tests since often we can't act on them until
15 | # they become normal warnings - i.e. the tests still need to test the current functionality
16 | warnings.simplefilter(action="ignore", category=FutureWarning)
17 | 
18 | 
19 | def pytest_addoption(parser):
20 |     from transformers.testing_utils import pytest_addoption_shared
21 | 
22 |     pytest_addoption_shared(parser)
23 | 
24 | 
25 | def pytest_terminal_summary(terminalreporter):
26 |     from transformers.testing_utils import pytest_terminal_summary_main
27 | 
28 |     make_reports = terminalreporter.config.getoption("--make-reports")
29 |     if make_reports:
30 |         pytest_terminal_summary_main(terminalreporter, id=make_reports)
31 | 


--------------------------------------------------------------------------------
/examples/contrib/README.md:
--------------------------------------------------------------------------------
1 | # Community contributed examples
2 | 
3 | This folder contains examples which are not actively maintained (mostly contributed by the community).
4 | 
5 | Using these examples together with a recent version of the library usually requires to make small (sometimes big) adaptations to get the scripts working.
6 | 


--------------------------------------------------------------------------------
/examples/contrib/mm-imdb/README.md:
--------------------------------------------------------------------------------
 1 | ## MM-IMDb
 2 | 
 3 | Based on the script [`run_mmimdb.py`](https://github.com/huggingface/transformers/blob/master/examples/contrib/mm-imdb/run_mmimdb.py).
 4 | 
 5 | [MM-IMDb](http://lisi1.unal.edu.co/mmimdb/) is a Multimodal dataset with around 26,000 movies including images, plots and other metadata.
 6 | 
 7 | ### Training on MM-IMDb
 8 | 
 9 | ```
10 | python run_mmimdb.py \
11 |     --data_dir /path/to/mmimdb/dataset/ \
12 |     --model_type bert \
13 |     --model_name_or_path bert-base-uncased \
14 |     --output_dir /path/to/save/dir/ \
15 |     --do_train \
16 |     --do_eval \
17 |     --max_seq_len 512 \
18 |     --gradient_accumulation_steps 20 \
19 |     --num_image_embeds 3 \
20 |     --num_train_epochs 100 \
21 |     --patience 5
22 | ```
23 | 
24 | 


--------------------------------------------------------------------------------
/examples/deebert/entropy_eval.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export CUDA_VISIBLE_DEVICES=0
 3 | 
 4 | PATH_TO_DATA=/h/xinji/projects/GLUE
 5 | 
 6 | MODEL_TYPE=bert  # bert or roberta
 7 | MODEL_SIZE=base  # base or large
 8 | DATASET=MRPC  # SST-2, MRPC, RTE, QNLI, QQP, or MNLI
 9 | 
10 | MODEL_NAME=${MODEL_TYPE}-${MODEL_SIZE}
11 | if [ $MODEL_TYPE = 'bert' ]
12 | then
13 |   MODEL_NAME=${MODEL_NAME}-uncased
14 | fi
15 | 
16 | ENTROPIES="0 0.1 0.2 0.3 0.4 0.5 0.6 0.7"
17 | 
18 | for ENTROPY in $ENTROPIES; do
19 |   python -u run_glue_deebert.py \
20 |     --model_type $MODEL_TYPE \
21 |     --model_name_or_path ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \
22 |     --task_name $DATASET \
23 |     --do_eval \
24 |     --do_lower_case \
25 |     --data_dir $PATH_TO_DATA/$DATASET \
26 |     --output_dir ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \
27 |     --plot_data_dir ./results/ \
28 |     --max_seq_length 128 \
29 |     --early_exit_entropy $ENTROPY \
30 |     --eval_highway \
31 |     --overwrite_cache \
32 |     --per_gpu_eval_batch_size=1
33 | done
34 | 


--------------------------------------------------------------------------------
/examples/deebert/eval_deebert.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export CUDA_VISIBLE_DEVICES=0
 3 | 
 4 | PATH_TO_DATA=/h/xinji/projects/GLUE
 5 | 
 6 | MODEL_TYPE=bert  # bert or roberta
 7 | MODEL_SIZE=base  # base or large
 8 | DATASET=MRPC  # SST-2, MRPC, RTE, QNLI, QQP, or MNLI
 9 | 
10 | MODEL_NAME=${MODEL_TYPE}-${MODEL_SIZE}
11 | if [ $MODEL_TYPE = 'bert' ]
12 | then
13 |   MODEL_NAME=${MODEL_NAME}-uncased
14 | fi
15 | 
16 | 
17 | python -u run_glue_deebert.py  \
18 |   --model_type $MODEL_TYPE \
19 |   --model_name_or_path ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \
20 |   --task_name $DATASET \
21 |   --do_eval \
22 |   --do_lower_case \
23 |   --data_dir $PATH_TO_DATA/$DATASET \
24 |   --output_dir ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \
25 |   --plot_data_dir ./results/ \
26 |   --max_seq_length 128 \
27 |   --eval_each_highway \
28 |   --eval_highway \
29 |   --overwrite_cache \
30 |   --per_gpu_eval_batch_size=1
31 | 


--------------------------------------------------------------------------------
/examples/deebert/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/examples/deebert/src/__init__.py


--------------------------------------------------------------------------------
/examples/deebert/train_deebert.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export CUDA_VISIBLE_DEVICES=0
 3 | 
 4 | PATH_TO_DATA=/h/xinji/projects/GLUE
 5 | 
 6 | MODEL_TYPE=bert  # bert or roberta
 7 | MODEL_SIZE=base  # base or large
 8 | DATASET=MRPC  # SST-2, MRPC, RTE, QNLI, QQP, or MNLI
 9 | 
10 | MODEL_NAME=${MODEL_TYPE}-${MODEL_SIZE}
11 | EPOCHS=10
12 | if [ $MODEL_TYPE = 'bert' ]
13 | then
14 |   EPOCHS=3
15 |   MODEL_NAME=${MODEL_NAME}-uncased
16 | fi
17 | 
18 | 
19 | python -u run_glue_deebert.py \
20 |   --model_type $MODEL_TYPE \
21 |   --model_name_or_path $MODEL_NAME \
22 |   --task_name $DATASET \
23 |   --do_train \
24 |   --do_eval \
25 |   --do_lower_case \
26 |   --data_dir $PATH_TO_DATA/$DATASET \
27 |   --max_seq_length 128 \
28 |   --per_gpu_eval_batch_size=1 \
29 |   --per_gpu_train_batch_size=8 \
30 |   --learning_rate 2e-5 \
31 |   --num_train_epochs $EPOCHS \
32 |   --overwrite_output_dir \
33 |   --seed 42 \
34 |   --output_dir ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \
35 |   --plot_data_dir ./results/ \
36 |   --save_steps 0 \
37 |   --overwrite_cache \
38 |   --eval_after_first_stage
39 | 


--------------------------------------------------------------------------------
/examples/distillation/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers
2 | 
3 | gitpython==3.0.2
4 | tensorboard>=1.14.0
5 | tensorboardX==1.8
6 | psutil==5.6.6
7 | scipy>=1.4.1
8 | 


--------------------------------------------------------------------------------
/examples/distillation/training_configs/distilbert-base-cased.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"activation": "gelu",
 3 | 	"attention_dropout": 0.1,
 4 | 	"dim": 768,
 5 | 	"dropout": 0.1,
 6 | 	"hidden_dim": 3072,
 7 | 	"initializer_range": 0.02,
 8 | 	"max_position_embeddings": 512,
 9 | 	"n_heads": 12,
10 | 	"n_layers": 6,
11 | 	"sinusoidal_pos_embds": true,
12 | 	"tie_weights_": true,
13 | 	"vocab_size": 28996
14 |   }
15 |   


--------------------------------------------------------------------------------
/examples/distillation/training_configs/distilbert-base-multilingual-cased.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"activation": "gelu",
 3 | 	"attention_dropout": 0.1,
 4 | 	"dim": 768,
 5 | 	"dropout": 0.1,
 6 | 	"hidden_dim": 3072,
 7 | 	"initializer_range": 0.02,
 8 | 	"max_position_embeddings": 512,
 9 | 	"n_heads": 12,
10 | 	"n_layers": 6,
11 | 	"sinusoidal_pos_embds": true,
12 | 	"tie_weights_": true,
13 | 	"vocab_size": 119547
14 |   }
15 |   


--------------------------------------------------------------------------------
/examples/distillation/training_configs/distilbert-base-uncased.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"activation": "gelu",
 3 | 	"attention_dropout": 0.1,
 4 | 	"dim": 768,
 5 | 	"dropout": 0.1,
 6 | 	"hidden_dim": 3072,
 7 | 	"initializer_range": 0.02,
 8 | 	"max_position_embeddings": 512,
 9 | 	"n_heads": 12,
10 | 	"n_layers": 6,
11 | 	"sinusoidal_pos_embds": true,
12 | 	"tie_weights_": true,
13 | 	"vocab_size": 30522
14 |   }
15 |   


--------------------------------------------------------------------------------
/examples/distillation/training_configs/distilgpt2.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"initializer_range": 0.02,
 3 | 	"layer_norm_epsilon": 0.00001,
 4 | 	"n_ctx": 1024,
 5 | 	"n_embd": 768,
 6 | 	"n_head": 12,
 7 | 	"n_layer": 6,
 8 | 	"n_positions": 1024,
 9 | 	"vocab_size": 50257
10 | }


--------------------------------------------------------------------------------
/examples/distillation/training_configs/distilroberta-base.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "vocab_size": 50265,
 3 |     "hidden_size": 768,
 4 |     "num_hidden_layers": 6,
 5 |     "num_attention_heads": 12,
 6 |     "intermediate_size": 3072,
 7 |     "hidden_act": "gelu",
 8 |     "hidden_dropout_prob": 0.1,
 9 |     "attention_probs_dropout_prob": 0.1,
10 |     "max_position_embeddings": 514,
11 |     "type_vocab_size": 1,
12 |     "initializer_range": 0.02,
13 |     "layer_norm_eps": 0.00001
14 | }


--------------------------------------------------------------------------------
/examples/longform-qa/README.md:
--------------------------------------------------------------------------------
1 | # Long Form Question Answering
2 | 
3 | This folder contains the code for the Long Form Question answering [demo](http://35.226.96.115:8080/) as well as methods to train and use a fully end-to-end Long Form Question Answering system using the [🤗transformers](https://github.com/huggingface/transformers) and [🤗datasets](https://github.com/huggingface/datasets) libraries.
4 | 
5 | You can use these methods to train your own system by following along the associate [notebook](https://github.com/huggingface/notebooks/blob/master/longform-qa/Long_Form_Question_Answering_with_ELI5_and_Wikipedia.ipynb) or [blog post](https://yjernite.github.io/lfqa.html).
6 | 


--------------------------------------------------------------------------------
/examples/lxmert/README.md:
--------------------------------------------------------------------------------
1 | # LXMERT DEMO
2 | 
3 | 1. make a virtualenv: ``virtualenv venv`` and activate ``source venv/bin/activate``
4 | 2. install reqs: ``pip install -r ./requirements.txt``
5 | 3. usage is as shown in demo.ipynb
6 | 


--------------------------------------------------------------------------------
/examples/movement-pruning/emmental/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | from .configuration_bert_masked import MaskedBertConfig
 3 | from .modeling_bert_masked import (
 4 |     MaskedBertForMultipleChoice,
 5 |     MaskedBertForQuestionAnswering,
 6 |     MaskedBertForSequenceClassification,
 7 |     MaskedBertForTokenClassification,
 8 |     MaskedBertModel,
 9 | )
10 | from .modules import *
11 | 


--------------------------------------------------------------------------------
/examples/movement-pruning/emmental/modules/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .binarizer import MagnitudeBinarizer, ThresholdBinarizer, TopKBinarizer
3 | from .masked_nn import MaskedLinear
4 | 


--------------------------------------------------------------------------------
/examples/movement-pruning/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.4.0
2 | -e git+https://github.com/huggingface/transformers.git@352d5472b0c1dec0f420d606d16747d851b4bda8#egg=transformers
3 | knockknock>=0.1.8.1
4 | h5py>=2.10.0
5 | numpy>=1.18.2
6 | scipy>=1.4.1
7 | 


--------------------------------------------------------------------------------
/examples/rag/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | 
4 | 
5 | sys.path.insert(1, os.path.dirname(os.path.realpath(__file__)))
6 | 


--------------------------------------------------------------------------------
/examples/rag/finetune_rag.sh:
--------------------------------------------------------------------------------
 1 | # Add parent directory to python path to access lightning_base.py
 2 | export PYTHONPATH="../":"${PYTHONPATH}"
 3 | 
 4 | # A sample finetuning run, you need to specify data_dir, output_dir and model_name_or_path
 5 | # run ./examples/rag/finetune.sh --help to see all the possible options
 6 | 
 7 | python examples/rag/finetune_rag.py \
 8 |     --data_dir $DATA_DIR \
 9 |     --output_dir $OUTPUT_DIR \
10 |     --model_name_or_path $MODEL_NAME_OR_PATH \
11 |     --model_type rag_sequence \
12 |     --fp16 \
13 |     --gpus 8 \
14 |     --do_train \
15 |     --do_predict \
16 |     --n_val -1 \
17 |     --val_check_interval 0.25 \
18 |     --train_batch_size 8 \
19 |     --eval_batch_size 1 \
20 |     --max_source_length 128 \
21 |     --max_target_length 25 \
22 |     --val_max_target_length 25 \
23 |     --test_max_target_length 25 \
24 |     --label_smoothing 0.1 \
25 |     --dropout 0.1 \
26 |     --attention_dropout 0.1 \
27 |     --weight_decay 0.001 \
28 |     --adam_epsilon 1e-08 \
29 |     --max_grad_norm 0.1 \
30 |     --lr_scheduler polynomial \
31 |     --learning_rate 3e-05 \
32 |     --num_train_epochs 100 \
33 |     --warmup_steps 500 \
34 |     --gradient_accumulation_steps 1


--------------------------------------------------------------------------------
/examples/rag/requirements.txt:
--------------------------------------------------------------------------------
1 | faiss-cpu >= 1.6.3
2 | datasets >= 1.0.1
3 | psutil >= 5.7.0
4 | torch >= 1.4.0


--------------------------------------------------------------------------------
/examples/requirements.txt:
--------------------------------------------------------------------------------
 1 | tensorboard
 2 | scikit-learn
 3 | seqeval
 4 | psutil
 5 | sacrebleu
 6 | rouge-score
 7 | tensorflow_datasets
 8 | pytorch-lightning==1.0.4
 9 | matplotlib
10 | git-python==1.0.3
11 | faiss-cpu
12 | streamlit
13 | elasticsearch
14 | nltk
15 | pandas
16 | datasets >= 1.1.3
17 | fire
18 | pytest
19 | conllu
20 | sentencepiece != 0.1.92
21 | protobuf
22 | 


--------------------------------------------------------------------------------
/examples/seq2seq/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | 
4 | 
5 | sys.path.insert(1, os.path.dirname(os.path.realpath(__file__)))
6 | 


--------------------------------------------------------------------------------
/examples/seq2seq/bertabs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/examples/seq2seq/bertabs/__init__.py


--------------------------------------------------------------------------------
/examples/seq2seq/bertabs/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers
2 | 
3 | # For ROUGE
4 | nltk
5 | py-rouge
6 | 


--------------------------------------------------------------------------------
/examples/seq2seq/builtin_trainer/finetune.sh:
--------------------------------------------------------------------------------
 1 | # the proper usage is documented in the README, you need to specify data_dir, output_dir and model_name_or_path
 2 | # run ./builtin_trainer/finetune.sh --help to see all the possible options
 3 | python finetune_trainer.py \
 4 |     --learning_rate=3e-5 \
 5 |     --fp16 \
 6 |     --do_train --do_eval --do_predict \
 7 |     --evaluation_strategy steps \
 8 |     --predict_with_generate \
 9 |     --n_val 1000 \
10 |     "$@"
11 | 


--------------------------------------------------------------------------------
/examples/seq2seq/builtin_trainer/finetune_tpu.sh:
--------------------------------------------------------------------------------
 1 | export TPU_NUM_CORES=8
 2 | 
 3 | # the proper usage is documented in the README, you need to specify data_dir, output_dir and model_name_or_path
 4 | # run ./builtin_trainer/finetune_tpu.sh --help to see all the possible options
 5 | python xla_spawn.py --num_cores $TPU_NUM_CORES \
 6 |     finetune_trainer.py \
 7 |     --learning_rate=3e-5 \
 8 |     --do_train --do_eval \
 9 |     --evaluation_strategy steps \
10 |     --prediction_loss_only \
11 |     --n_val 1000 \
12 |     "$@"
13 | 


--------------------------------------------------------------------------------
/examples/seq2seq/builtin_trainer/train_distil_marian_enro.sh:
--------------------------------------------------------------------------------
 1 | export WANDB_PROJECT=distil-marian
 2 | export BS=64
 3 | export GAS=1
 4 | export m=sshleifer/student_marian_en_ro_6_3
 5 | export MAX_LEN=128
 6 | python finetune_trainer.py \
 7 |     --tokenizer_name $m --model_name_or_path $m \
 8 |     --data_dir $ENRO_DIR \
 9 |     --output_dir marian_en_ro_6_3 --overwrite_output_dir \
10 |     --learning_rate=3e-4 \
11 |     --warmup_steps 500 --sortish_sampler \
12 |     --fp16 \
13 |     --gradient_accumulation_steps=$GAS \
14 |     --per_device_train_batch_size=$BS --per_device_eval_batch_size=$BS \
15 |     --freeze_encoder --freeze_embeds \
16 |     --num_train_epochs=6 \
17 |     --save_steps 3000 --eval_steps 3000 \
18 |     --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \
19 |     --do_train --do_eval --do_predict \
20 |     --evaluation_strategy steps \
21 |     --predict_with_generate --logging_first_step \
22 |     --task translation --label_smoothing 0.1 \
23 |     "$@"
24 | 


--------------------------------------------------------------------------------
/examples/seq2seq/builtin_trainer/train_distil_marian_enro_tpu.sh:
--------------------------------------------------------------------------------
 1 | export WANDB_PROJECT=distil-marian
 2 | export BS=64
 3 | export m=sshleifer/student_marian_en_ro_6_3
 4 | export MAX_LEN=128
 5 | export TPU_NUM_CORES=8
 6 | 
 7 | python xla_spawn.py --num_cores $TPU_NUM_CORES \
 8 |     finetune_trainer.py \
 9 |     --tokenizer_name $m --model_name_or_path $m \
10 |     --data_dir $ENRO_DIR \
11 |     --output_dir marian_en_ro_6_3 --overwrite_output_dir \
12 |     --learning_rate=3e-4 \
13 |     --warmup_steps 500 \
14 |     --per_device_train_batch_size=$BS --per_device_eval_batch_size=$BS \
15 |     --freeze_encoder --freeze_embeds \
16 |     --num_train_epochs=6 \
17 |     --save_steps 500 --eval_steps 500 \
18 |     --logging_first_step --logging_steps 200 \
19 |     --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \
20 |     --do_train --do_eval \
21 |     --evaluation_strategy steps \
22 |     --prediction_loss_only \
23 |     --task translation --label_smoothing 0.1 \
24 |     "$@"
25 | 


--------------------------------------------------------------------------------
/examples/seq2seq/builtin_trainer/train_distilbart_cnn.sh:
--------------------------------------------------------------------------------
 1 | export WANDB_PROJECT=distilbart-trainer
 2 | export BS=32
 3 | export m=sshleifer/student_cnn_12_6
 4 | export tok=facebook/bart-large
 5 | export MAX_TGT_LEN=142
 6 | 
 7 | python finetune_trainer.py \
 8 |     --model_name_or_path $m --tokenizer_name $tok \ 
 9 |     --data_dir cnn_dm \
10 |     --output_dir distilbart-cnn-12-6 --overwrite_output_dir \
11 |     --learning_rate=3e-5 \
12 |     --warmup_steps 500 --sortish_sampler \
13 |     --fp16 \
14 |     --n_val 500 \
15 |     --gradient_accumulation_steps=1 \
16 |     --per_device_train_batch_size=$BS --per_device_eval_batch_size=$BS \
17 |     --freeze_encoder --freeze_embeds \
18 |     --num_train_epochs=2 \
19 |     --save_steps 3000 --eval_steps 3000 \
20 |     --logging_first_step \
21 |     --max_target_length 56 --val_max_target_length $MAX_TGT_LEN --test_max_target_length $MAX_TGT_LEN \
22 |     --do_train --do_eval --do_predict \
23 |     --evaluation_strategy steps \
24 |     --predict_with_generate --sortish_sampler \
25 |     "$@"
26 | 


--------------------------------------------------------------------------------
/examples/seq2seq/builtin_trainer/train_mbart_cc25_enro.sh:
--------------------------------------------------------------------------------
 1 | python finetune_trainer.py \
 2 |     --model_name_or_path=facebook/mbart-large-cc25 \
 3 |     --data_dir $ENRO_DIR \
 4 |     --output_dir mbart_cc25_enro --overwrite_output_dir \
 5 |     --learning_rate=3e-5 \
 6 |     --warmup_steps 500 \ 
 7 |     --fp16 \
 8 |     --label_smoothing 0.1 \
 9 |     --adam_eps 1e-06 \
10 |     --src_lang en_XX --tgt_lang ro_RO \
11 |     --freeze_embeds \
12 |     --per_device_train_batch_size=4 --per_device_eval_batch_size=4 \
13 |     --max_source_length 128 --max_target_length 128 \
14 |     --val_max_target_length 128 --test_max_target_length 128 \
15 |     --sortish_sampler \
16 |     --num_train_epochs 6 \
17 |     --save_steps 25000 --eval_steps 25000 --logging_steps 1000 \
18 |     --do_train --do_eval --do_predict \
19 |     --evaluation_strategy steps \
20 |     --predict_with_generate --logging_first_step \
21 |     --task translation \
22 |     "$@"
23 | 


--------------------------------------------------------------------------------
/examples/seq2seq/convert_model_to_fp16.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from typing import Union
 4 | 
 5 | import fire
 6 | import torch
 7 | from tqdm import tqdm
 8 | 
 9 | 
10 | def convert(src_path: str, map_location: str = "cpu", save_path: Union[str, None] = None) -> None:
11 |     """Convert a pytorch_model.bin or model.pt file to torch.float16 for faster downloads, less disk space."""
12 |     state_dict = torch.load(src_path, map_location=map_location)
13 |     for k, v in tqdm(state_dict.items()):
14 |         if not isinstance(v, torch.Tensor):
15 |             raise TypeError("FP16 conversion only works on paths that are saved state dicts, like pytorch_model.bin")
16 |         state_dict[k] = v.half()
17 |     if save_path is None:  # overwrite src_path
18 |         save_path = src_path
19 |     torch.save(state_dict, save_path)
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     fire.Fire(convert)
24 | 


--------------------------------------------------------------------------------
/examples/seq2seq/distil_marian_enro_teacher.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | export PYTHONPATH="../":"${PYTHONPATH}"
 3 | export WANDB_PROJECT=dmar
 4 | # export MAX_LEN=128
 5 | python distillation.py \
 6 |   --learning_rate=3e-4 \
 7 |   --do_train \
 8 |   --fp16 \
 9 |   --val_check_interval 0.25 \
10 |   --teacher Helsinki-NLP/opus-mt-en-ro \
11 |   --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \
12 |   --student_decoder_layers 3 --student_encoder_layers 6 \
13 |   --freeze_encoder --freeze_embeds \
14 |   --model_name_or_path IGNORED \
15 |   --alpha_hid=3. \
16 |   --train_batch_size=$BS --eval_batch_size=$BS \
17 |   --tokenizer_name Helsinki-NLP/opus-mt-en-ro \
18 |   --warmup_steps 500 --logger_name wandb \
19 |   --fp16_opt_level O1 --task translation --normalize_hidden --num_sanity_val_steps=0 \
20 |   "$@"
21 | 


--------------------------------------------------------------------------------
/examples/seq2seq/distil_marian_no_teacher.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | export PYTHONPATH="../":"${PYTHONPATH}"
 3 | export WANDB_PROJECT=dmar
 4 | python distillation.py \
 5 |   --learning_rate=3e-4 \
 6 |   --do_train \
 7 |   --do_predict \
 8 |   --fp16 --no_teacher \
 9 |   --val_check_interval 0.25 \
10 |   --data_dir $ENRO_DIR \
11 |   --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \
12 |   --freeze_encoder --freeze_embeds \
13 |   --train_batch_size=$BS --eval_batch_size=$BS \
14 |   --tokenizer_name $m --model_name_or_path $m \
15 |   --warmup_steps 500 --sortish_sampler --logger_name wandb \
16 |   --gpus 1 --fp16_opt_level=O1 --task translation --num_sanity_val_steps=0 \
17 |   "$@"
18 | 


--------------------------------------------------------------------------------
/examples/seq2seq/dynamic_bs_example.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | export PYTHONPATH="../":"${PYTHONPATH}"
 3 | export WANDB_PROJECT=dmar
 4 | export MAX_LEN=128
 5 | export m=sshleifer/student_marian_en_ro_6_1
 6 | python finetune.py \
 7 |   --learning_rate=3e-4 \
 8 |   --do_train \
 9 |   --fp16 \
10 |   --data_dir wmt_en_ro \
11 |   --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \
12 |   --freeze_encoder --freeze_embeds \
13 |   --train_batch_size=48 --eval_batch_size=64 \
14 |   --tokenizer_name $m --model_name_or_path $m --num_train_epochs=1 \
15 |   --warmup_steps 500 --logger_name wandb --gpus 1 \
16 |   --fp16_opt_level=O1 --task translation \
17 |   "$@"
18 | 


--------------------------------------------------------------------------------
/examples/seq2seq/finetune.sh:
--------------------------------------------------------------------------------
 1 | # the proper usage is documented in the README, you need to specify data_dir, output_dir and model_name_or_path
 2 | # run ./finetune.sh --help to see all the possible options
 3 | python finetune.py \
 4 |     --learning_rate=3e-5 \
 5 |     --fp16 \
 6 |     --gpus 1 \
 7 |     --do_train \
 8 |     --do_predict \
 9 |     --n_val 1000 \
10 |     --val_check_interval 0.1 \
11 |     "$@"
12 | 


--------------------------------------------------------------------------------
/examples/seq2seq/finetune_bart_tiny.sh:
--------------------------------------------------------------------------------
 1 | # Script for verifying that run_bart_sum can be invoked from its directory
 2 | 
 3 | # Get tiny dataset with cnn_dm format (4 examples for train, val, test)
 4 | wget https://cdn-datasets.huggingface.co/summarization/cnn_tiny.tgz
 5 | tar -xzvf cnn_tiny.tgz
 6 | rm cnn_tiny.tgz
 7 | 
 8 | export OUTPUT_DIR_NAME=bart_utest_output
 9 | export CURRENT_DIR=${PWD}
10 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME}
11 | 
12 | # Make output directory if it doesn't exist
13 | mkdir -p $OUTPUT_DIR
14 | 
15 | # Add parent directory to python path to access lightning_base.py and testing_utils.py
16 | export PYTHONPATH="../":"${PYTHONPATH}"
17 | python finetune.py \
18 | --data_dir=cnn_tiny/ \
19 | --model_name_or_path=sshleifer/bart-tiny-random \
20 | --learning_rate=3e-5 \
21 | --train_batch_size=2 \
22 | --eval_batch_size=2 \
23 | --output_dir=$OUTPUT_DIR \
24 | --num_train_epochs=1  \
25 | --gpus=0 \
26 | --do_train "$@"
27 | 
28 | rm -rf cnn_tiny
29 | rm -rf $OUTPUT_DIR
30 | 
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/examples/seq2seq/finetune_pegasus_xsum.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | export PYTHONPATH="../":"${PYTHONPATH}"
 3 | 
 4 | # From appendix C of paper https://arxiv.org/abs/1912.08777
 5 | # Set --gradient_accumulation_steps  so that effective batch size is 256 (2*128, 4*64, 8*32, 16*16)
 6 | python finetune.py \
 7 |     --learning_rate=1e-4 \
 8 |     --do_train \
 9 |     --do_predict \
10 |     --n_val 1000 \
11 |     --val_check_interval 0.25 \
12 |     --max_source_length 512 --max_target_length 56 \
13 |     --freeze_embeds --label_smoothing 0.1 --adafactor --task summarization_xsum \
14 |     "$@"
15 | 


--------------------------------------------------------------------------------
/examples/seq2seq/finetune_t5.sh:
--------------------------------------------------------------------------------
 1 | # Add parent directory to python path to access lightning_base.py
 2 | export PYTHONPATH="../":"${PYTHONPATH}"
 3 | 
 4 | python finetune.py \
 5 | --data_dir=$CNN_DIR \
 6 | --learning_rate=3e-5 \
 7 | --train_batch_size=$BS \
 8 | --eval_batch_size=$BS \
 9 | --output_dir=$OUTPUT_DIR \
10 | --max_source_length=512 \
11 | --max_target_length=56 \
12 | --val_check_interval=0.1 --n_val=200 \
13 | --do_train --do_predict \
14 |  "$@"
15 | 


--------------------------------------------------------------------------------
/examples/seq2seq/minify_dataset.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from pathlib import Path
 4 | 
 5 | import fire
 6 | 
 7 | 
 8 | def minify(src_dir: str, dest_dir: str, n: int):
 9 |     """Write first n lines of each file f in src_dir to dest_dir/f """
10 |     src_dir = Path(src_dir)
11 |     dest_dir = Path(dest_dir)
12 |     dest_dir.mkdir(exist_ok=True)
13 |     for path in src_dir.iterdir():
14 |         new = [x.rstrip() for x in list(path.open().readlines())][:n]
15 |         dest_path = dest_dir.joinpath(path.name)
16 |         print(dest_path)
17 |         dest_path.open("w").write("\n".join(new))
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     fire.Fire(minify)
22 | 


--------------------------------------------------------------------------------
/examples/seq2seq/rouge_cli.py:
--------------------------------------------------------------------------------
 1 | import fire
 2 | 
 3 | from utils import calculate_rouge, save_json
 4 | 
 5 | 
 6 | def calculate_rouge_path(pred_path, tgt_path, save_path=None, **kwargs):
 7 |     """Kwargs will be passed to calculate_rouge"""
 8 |     pred_lns = [x.strip() for x in open(pred_path).readlines()]
 9 |     tgt_lns = [x.strip() for x in open(tgt_path).readlines()][: len(pred_lns)]
10 |     metrics = calculate_rouge(pred_lns, tgt_lns, **kwargs)
11 |     if save_path is not None:
12 |         save_json(metrics, save_path, indent=None)
13 |     return metrics  # these print nicely
14 | 
15 | 
16 | if __name__ == "__main__":
17 |     fire.Fire(calculate_rouge_path)
18 | 


--------------------------------------------------------------------------------
/examples/seq2seq/save_randomly_initialized_model.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import fire
 4 | 
 5 | from transformers import AutoConfig, AutoModelForSeq2SeqLM, AutoTokenizer
 6 | 
 7 | 
 8 | def save_randomly_initialized_version(config_name: str, save_dir: str, **config_kwargs):
 9 |     """Save a randomly initialized version of a model using a pretrained config.
10 |     Args:
11 |         config_name: which config to use
12 |         save_dir: where to save the resulting model and tokenizer
13 |         config_kwargs: Passed to AutoConfig
14 | 
15 |     Usage::
16 |         save_randomly_initialized_version("facebook/bart-large-cnn", "distilbart_random_cnn_6_3", encoder_layers=6, decoder_layers=3, num_beams=3)
17 |     """
18 |     cfg = AutoConfig.from_pretrained(config_name, **config_kwargs)
19 |     model = AutoModelForSeq2SeqLM.from_config(cfg)
20 |     model.save_pretrained(save_dir)
21 |     AutoTokenizer.from_pretrained(config_name).save_pretrained(save_dir)
22 |     return model
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     fire.Fire(save_randomly_initialized_version)
27 | 


--------------------------------------------------------------------------------
/examples/seq2seq/sentence_splitter.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | from filelock import FileLock
 4 | 
 5 | 
 6 | try:
 7 |     import nltk
 8 | 
 9 |     NLTK_AVAILABLE = True
10 | except (ImportError, ModuleNotFoundError):
11 |     NLTK_AVAILABLE = False
12 | 
13 | if NLTK_AVAILABLE:
14 |     with FileLock(".lock") as lock:
15 |         nltk.download("punkt", quiet=True)
16 | 
17 | 
18 | def add_newline_to_end_of_each_sentence(x: str) -> str:
19 |     """This was added to get rougeLsum scores matching published rougeL scores for BART and PEGASUS."""
20 |     re.sub("<n>", "", x)  # remove pegasus newline char
21 |     assert NLTK_AVAILABLE, "nltk must be installed to separate newlines between sentences. (pip install nltk)"
22 |     return "\n".join(nltk.sent_tokenize(x))
23 | 


--------------------------------------------------------------------------------
/examples/seq2seq/test_data/fsmt/build-eval-data.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import io
 4 | import json
 5 | import subprocess
 6 | 
 7 | 
 8 | pairs = [
 9 |     ["en", "ru"],
10 |     ["ru", "en"],
11 |     ["en", "de"],
12 |     ["de", "en"],
13 | ]
14 | 
15 | n_objs = 8
16 | 
17 | 
18 | def get_all_data(pairs, n_objs):
19 |     text = {}
20 |     for src, tgt in pairs:
21 |         pair = f"{src}-{tgt}"
22 |         cmd = f"sacrebleu -t wmt19 -l {pair} --echo src".split()
23 |         src_lines = subprocess.run(cmd, stdout=subprocess.PIPE).stdout.decode("utf-8").splitlines()
24 |         cmd = f"sacrebleu -t wmt19 -l {pair} --echo ref".split()
25 |         tgt_lines = subprocess.run(cmd, stdout=subprocess.PIPE).stdout.decode("utf-8").splitlines()
26 |         text[pair] = {"src": src_lines[:n_objs], "tgt": tgt_lines[:n_objs]}
27 |     return text
28 | 
29 | 
30 | text = get_all_data(pairs, n_objs)
31 | filename = "./fsmt_val_data.json"
32 | with io.open(filename, "w", encoding="utf-8") as f:
33 |     bleu_data = json.dump(text, f, indent=2, ensure_ascii=False)
34 | 


--------------------------------------------------------------------------------
/examples/seq2seq/test_data/wmt_en_ro/train.len:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/examples/seq2seq/test_data/wmt_en_ro/train.len


--------------------------------------------------------------------------------
/examples/seq2seq/test_data/wmt_en_ro/val.len:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/examples/seq2seq/test_data/wmt_en_ro/val.len


--------------------------------------------------------------------------------
/examples/seq2seq/test_tatoeba_conversion.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | import unittest
 4 | 
 5 | from transformers.file_utils import cached_property
 6 | from transformers.models.marian.convert_marian_tatoeba_to_pytorch import DEFAULT_REPO, TatoebaConverter
 7 | from transformers.testing_utils import require_torch_non_multi_gpu_but_fix_me, slow
 8 | 
 9 | 
10 | @unittest.skipUnless(os.path.exists(DEFAULT_REPO), "Tatoeba directory does not exist.")
11 | class TatoebaConversionTester(unittest.TestCase):
12 |     @cached_property
13 |     def resolver(self):
14 |         tmp_dir = tempfile.mkdtemp()
15 |         return TatoebaConverter(save_dir=tmp_dir)
16 | 
17 |     @slow
18 |     @require_torch_non_multi_gpu_but_fix_me
19 |     def test_resolver(self):
20 |         self.resolver.convert_models(["heb-eng"])
21 | 
22 |     @slow
23 |     @require_torch_non_multi_gpu_but_fix_me
24 |     def test_model_card(self):
25 |         content, mmeta = self.resolver.write_model_card("opus-mt-he-en", dry_run=True)
26 |         assert mmeta["long_pair"] == "heb-eng"
27 | 


--------------------------------------------------------------------------------
/examples/seq2seq/train_distilbart_cnn.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | export PYTHONPATH="../":"${PYTHONPATH}"
 3 | 
 4 | export BS=32
 5 | export GAS=1
 6 | 
 7 | python finetune.py \
 8 |     --learning_rate=3e-5 \
 9 |     --fp16 \
10 |     --gpus 1 \
11 |     --do_train \
12 |     --do_predict \
13 |     --val_check_interval 0.25 \
14 |     --n_val 500 \
15 |     --num_train_epochs 2 \
16 |     --freeze_encoder --freeze_embeds --data_dir cnn_dm \
17 |     --max_target_length 142 --val_max_target_length=142 \
18 |     --train_batch_size=$BS --eval_batch_size=$BS --gradient_accumulation_steps=$GAS \
19 |     --model_name_or_path sshleifer/student_cnn_12_6 \
20 |     --tokenizer_name facebook/bart-large \
21 |     --warmup_steps 500 \
22 |     --output_dir distilbart-cnn-12-6 \
23 |     "$@"
24 | 
25 | 


--------------------------------------------------------------------------------
/examples/seq2seq/train_distilbart_xsum.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | export PYTHONPATH="../":"${PYTHONPATH}"
 3 | python distillation.py \
 4 |   --teacher facebook/bart-large-xsum --data_dir xsum \
 5 |   --tokenizer_name facebook/bart-large-xsum \
 6 |   --student_decoder_layers 6 --student_encoder_layers 12 \
 7 |   --freeze_encoder --freeze_embeds \
 8 |   --learning_rate=3e-4 \
 9 |   --do_train \
10 |   --do_predict \
11 |   --fp16 --fp16_opt_level=O1 \
12 |   --val_check_interval 0.1 --n_val 1000 --eval_beams 2 --length_penalty=0.5 \
13 |   --max_target_length=60 --val_max_target_length=60 --test_max_target_length=100 \
14 |   --model_name_or_path IGNORED \
15 |   --alpha_hid=3. \
16 |   --train_batch_size=16 --eval_batch_size=16 --gradient_accumulation_steps=2 \
17 |   --sortish_sampler \
18 |   --num_train_epochs=6 \
19 |   --warmup_steps 500 \
20 |   --output_dir distilbart_xsum_12_6 \
21 |   "$@"
22 | 


--------------------------------------------------------------------------------
/examples/seq2seq/train_mbart_cc25_enro.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | export PYTHONPATH="../":"${PYTHONPATH}"
 3 | 
 4 | python finetune.py \
 5 |     --learning_rate=3e-5 \
 6 |     --fp16 \
 7 |     --do_train \
 8 |     --val_check_interval=0.25 \
 9 |     --adam_eps 1e-06 \
10 |     --num_train_epochs 6 --src_lang en_XX --tgt_lang ro_RO \
11 |     --data_dir $ENRO_DIR \
12 |     --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \
13 |     --train_batch_size=$BS --eval_batch_size=$BS \
14 |     --task translation \
15 |     --warmup_steps 500 \
16 |     --freeze_embeds \
17 |     --model_name_or_path=facebook/mbart-large-cc25 \
18 |     "$@"
19 | 


--------------------------------------------------------------------------------
/examples/text-classification/run_pl.sh:
--------------------------------------------------------------------------------
 1 | # Install example requirements
 2 | pip install -r ../requirements.txt
 3 | 
 4 | # Download glue data
 5 | python3 ../../utils/download_glue_data.py
 6 | 
 7 | export TASK=mrpc
 8 | export DATA_DIR=./glue_data/MRPC/
 9 | export MAX_LENGTH=128
10 | export LEARNING_RATE=2e-5
11 | export BERT_MODEL=bert-base-cased
12 | export BATCH_SIZE=32
13 | export NUM_EPOCHS=3
14 | export SEED=2
15 | export OUTPUT_DIR_NAME=mrpc-pl-bert
16 | export CURRENT_DIR=${PWD}
17 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME}
18 | 
19 | # Make output directory if it doesn't exist
20 | mkdir -p $OUTPUT_DIR
21 | # Add parent directory to python path to access lightning_base.py
22 | export PYTHONPATH="../":"${PYTHONPATH}"
23 | 
24 | python3 run_pl_glue.py --gpus 1 --data_dir $DATA_DIR \
25 | --task $TASK \
26 | --model_name_or_path $BERT_MODEL \
27 | --output_dir $OUTPUT_DIR \
28 | --max_seq_length  $MAX_LENGTH \
29 | --learning_rate $LEARNING_RATE \
30 | --num_train_epochs $NUM_EPOCHS \
31 | --train_batch_size $BATCH_SIZE \
32 | --seed $SEED \
33 | --do_train \
34 | --do_predict
35 | 


--------------------------------------------------------------------------------
/examples/text-generation/README.md:
--------------------------------------------------------------------------------
 1 | ## Language generation
 2 | 
 3 | Based on the script [`run_generation.py`](https://github.com/huggingface/transformers/blob/master/examples/text-generation/run_generation.py).
 4 | 
 5 | Conditional text generation using the auto-regressive models of the library: GPT, GPT-2, Transformer-XL, XLNet, CTRL.
 6 | A similar script is used for our official demo [Write With Transfomer](https://transformer.huggingface.co), where you
 7 | can try out the different models available in the library.
 8 | 
 9 | Example usage:
10 | 
11 | ```bash
12 | python run_generation.py \
13 |     --model_type=gpt2 \
14 |     --model_name_or_path=gpt2
15 | ```
16 | 


--------------------------------------------------------------------------------
/examples/text-generation/pplm/imgs/headfigure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/examples/text-generation/pplm/imgs/headfigure.png


--------------------------------------------------------------------------------
/examples/text-generation/pplm/imgs/wooly.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/examples/text-generation/pplm/imgs/wooly.png


--------------------------------------------------------------------------------
/examples/text-generation/pplm/pplm_classification_head.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class ClassificationHead(torch.nn.Module):
 5 |     """Classification Head for  transformer encoders"""
 6 | 
 7 |     def __init__(self, class_size, embed_size):
 8 |         super().__init__()
 9 |         self.class_size = class_size
10 |         self.embed_size = embed_size
11 |         # self.mlp1 = torch.nn.Linear(embed_size, embed_size)
12 |         # self.mlp2 = (torch.nn.Linear(embed_size, class_size))
13 |         self.mlp = torch.nn.Linear(embed_size, class_size)
14 | 
15 |     def forward(self, hidden_state):
16 |         # hidden_state = F.relu(self.mlp1(hidden_state))
17 |         # hidden_state = self.mlp2(hidden_state)
18 |         logits = self.mlp(hidden_state)
19 |         return logits
20 | 


--------------------------------------------------------------------------------
/examples/token-classification/run.sh:
--------------------------------------------------------------------------------
1 | python3 run_ner.py \
2 |   --model_name_or_path bert-base-uncased \
3 |   --dataset_name conll2003 \
4 |   --output_dir /tmp/test-ner \
5 |   --do_train \
6 |   --do_eval
7 | 


--------------------------------------------------------------------------------
/examples/token-classification/run_chunk.sh:
--------------------------------------------------------------------------------
 1 | if ! [ -f ./dev.txt ]; then
 2 |   echo "Downloading CONLL2003 dev dataset...."
 3 |   curl -L -o ./dev.txt 'https://github.com/davidsbatista/NER-datasets/raw/master/CONLL2003/valid.txt'
 4 | fi
 5 | 
 6 | if ! [ -f ./test.txt ]; then
 7 |   echo "Downloading CONLL2003 test dataset...."
 8 |   curl -L -o ./test.txt 'https://github.com/davidsbatista/NER-datasets/raw/master/CONLL2003/test.txt'
 9 | fi
10 | 
11 | if ! [ -f ./train.txt ]; then
12 |   echo "Downloading CONLL2003 train dataset...."
13 |   curl -L -o ./train.txt 'https://github.com/davidsbatista/NER-datasets/raw/master/CONLL2003/train.txt'
14 | fi
15 | 
16 | export MAX_LENGTH=200
17 | export BERT_MODEL=bert-base-uncased
18 | export OUTPUT_DIR=chunker-model
19 | export BATCH_SIZE=32
20 | export NUM_EPOCHS=3
21 | export SAVE_STEPS=750
22 | export SEED=1
23 | 
24 | python3 run_ner_old.py \
25 | --task_type Chunk \
26 | --data_dir . \
27 | --model_name_or_path $BERT_MODEL \
28 | --output_dir $OUTPUT_DIR \
29 | --max_seq_length  $MAX_LENGTH \
30 | --num_train_epochs $NUM_EPOCHS \
31 | --per_gpu_train_batch_size $BATCH_SIZE \
32 | --save_steps $SAVE_STEPS \
33 | --seed $SEED \
34 | --do_train \
35 | --do_eval \
36 | --do_predict
37 | 
38 | 


--------------------------------------------------------------------------------
/examples/token-classification/run_pos.sh:
--------------------------------------------------------------------------------
 1 | if ! [ -f ./dev.txt ]; then
 2 |   echo "Download dev dataset...."
 3 |   curl -L -o ./dev.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-dev.conllu'
 4 | fi
 5 | 
 6 | if ! [ -f ./test.txt ]; then
 7 |   echo "Download test dataset...."
 8 |   curl -L -o ./test.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-test.conllu'
 9 | fi
10 | 
11 | if ! [ -f ./train.txt ]; then
12 |   echo "Download train dataset...."
13 |   curl -L -o ./train.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-train.conllu'
14 | fi
15 | 
16 | export MAX_LENGTH=200
17 | export BERT_MODEL=bert-base-uncased
18 | export OUTPUT_DIR=postagger-model
19 | export BATCH_SIZE=32
20 | export NUM_EPOCHS=3
21 | export SAVE_STEPS=750
22 | export SEED=1
23 | 
24 | python3 run_ner_old.py \
25 | --task_type POS \
26 | --data_dir . \
27 | --model_name_or_path $BERT_MODEL \
28 | --output_dir $OUTPUT_DIR \
29 | --max_seq_length  $MAX_LENGTH \
30 | --num_train_epochs $NUM_EPOCHS \
31 | --per_gpu_train_batch_size $BATCH_SIZE \
32 | --save_steps $SAVE_STEPS \
33 | --seed $SEED \
34 | --do_train \
35 | --do_eval \
36 | --do_predict
37 | 
38 | 


--------------------------------------------------------------------------------
/examples/token-classification/run_pos_pl.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | if ! [ -f ./dev.txt ]; then
 3 |   echo "Download dev dataset...."
 4 |   curl -L -o ./dev.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-dev.conllu'
 5 | fi
 6 | 
 7 | if ! [ -f ./test.txt ]; then
 8 |   echo "Download test dataset...."
 9 |   curl -L -o ./test.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-test.conllu'
10 | fi
11 | 
12 | if ! [ -f ./train.txt ]; then
13 |   echo "Download train dataset...."
14 |   curl -L -o ./train.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-train.conllu'
15 | fi
16 | 
17 | export MAX_LENGTH=200
18 | export BERT_MODEL=bert-base-uncased
19 | export OUTPUT_DIR=postagger-model
20 | export BATCH_SIZE=32
21 | export NUM_EPOCHS=3
22 | export SAVE_STEPS=750
23 | export SEED=1
24 | 
25 | 
26 | # Add parent directory to python path to access lightning_base.py
27 | export PYTHONPATH="../":"${PYTHONPATH}"
28 | 
29 | python3 run_pl_ner.py --data_dir ./ \
30 | --task_type POS \
31 | --model_name_or_path $BERT_MODEL \
32 | --output_dir $OUTPUT_DIR \
33 | --max_seq_length  $MAX_LENGTH \
34 | --num_train_epochs $NUM_EPOCHS \
35 | --train_batch_size $BATCH_SIZE \
36 | --seed $SEED \
37 | --gpus 1 \
38 | --do_train \
39 | --do_predict
40 | 


--------------------------------------------------------------------------------
/examples/token-classification/scripts/preprocess.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from transformers import AutoTokenizer
 4 | 
 5 | 
 6 | dataset = sys.argv[1]
 7 | model_name_or_path = sys.argv[2]
 8 | max_len = int(sys.argv[3])
 9 | 
10 | subword_len_counter = 0
11 | 
12 | tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
13 | max_len -= tokenizer.num_special_tokens_to_add()
14 | 
15 | with open(dataset, "rt") as f_p:
16 |     for line in f_p:
17 |         line = line.rstrip()
18 | 
19 |         if not line:
20 |             print(line)
21 |             subword_len_counter = 0
22 |             continue
23 | 
24 |         token = line.split()[0]
25 | 
26 |         current_subwords_len = len(tokenizer.tokenize(token))
27 | 
28 |         # Token contains strange control characters like \x96 or \x95
29 |         # Just filter out the complete line
30 |         if current_subwords_len == 0:
31 |             continue
32 | 
33 |         if (subword_len_counter + current_subwords_len) > max_len:
34 |             print("")
35 |             print(line)
36 |             subword_len_counter = current_subwords_len
37 |             continue
38 | 
39 |         subword_len_counter += current_subwords_len
40 | 
41 |         print(line)
42 | 


--------------------------------------------------------------------------------
/model_cards/Cinnamon/electra-small-japanese-discriminator/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: ja
 3 | license: apache-2.0
 4 | ---
 5 | 
 6 | ## Japanese ELECTRA-small
 7 | 
 8 | We provide a Japanese **ELECTRA-Small** model, as described in [ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators](https://openreview.net/pdf?id=r1xMH1BtvB).
 9 | 
10 | Our pretraining process employs subword units derived from the [Japanese Wikipedia](https://dumps.wikimedia.org/jawiki/latest), using the [Byte-Pair Encoding](https://www.aclweb.org/anthology/P16-1162.pdf) method and building on an initial tokenization with [mecab-ipadic-NEologd](https://github.com/neologd/mecab-ipadic-neologd). For optimal performance, please take care to set your MeCab dictionary appropriately.
11 | 
12 | ## How to use the discriminator in `transformers`
13 | 
14 | ```
15 | from transformers import BertJapaneseTokenizer, ElectraForPreTraining
16 | 
17 | tokenizer = BertJapaneseTokenizer.from_pretrained('Cinnamon/electra-small-japanese-discriminator', mecab_kwargs={"mecab_option": "-d /usr/lib/x86_64-linux-gnu/mecab/dic/mecab-ipadic-neologd"})
18 | 
19 | model = ElectraForPreTraining.from_pretrained('Cinnamon/electra-small-japanese-discriminator')
20 | ```
21 | 


--------------------------------------------------------------------------------
/model_cards/DeepPavlov/bert-base-bg-cs-pl-ru-cased/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language:
 3 | - bg
 4 | - cs
 5 | - pl
 6 | - ru
 7 | ---
 8 | 
 9 | # bert-base-bg-cs-pl-ru-cased
10 | 
11 | SlavicBERT\[1\] \(Slavic \(bg, cs, pl, ru\), cased, 12‑layer, 768‑hidden, 12‑heads, 180M parameters\) was trained on Russian News and four Wikipedias: Bulgarian, Czech, Polish, and Russian. Subtoken vocabulary was built using this data. Multilingual BERT was used as an initialization for SlavicBERT.
12 | 
13 | 
14 | \[1\]: Arkhipov M., Trofimova M., Kuratov Y., Sorokin A. \(2019\). [Tuning Multilingual Transformers for Language-Specific Named Entity Recognition](https://www.aclweb.org/anthology/W19-3712/). ACL anthology W19-3712.
15 | 


--------------------------------------------------------------------------------
/model_cards/DeepPavlov/bert-base-cased-conversational/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: en
 3 | ---
 4 | 
 5 | # bert-base-cased-conversational
 6 | 
 7 | Conversational BERT \(English, cased, 12‑layer, 768‑hidden, 12‑heads, 110M parameters\) was trained on the English part of Twitter, Reddit, DailyDialogues\[1\], OpenSubtitles\[2\], Debates\[3\], Blogs\[4\], Facebook News Comments. We used this training data to build the vocabulary of English subtokens and took English cased version of BERT‑base as an initialization for English Conversational BERT.
 8 | 
 9 | 
10 | \[1\]: Yanran Li, Hui Su, Xiaoyu Shen, Wenjie Li, Ziqiang Cao, and Shuzi Niu. DailyDialog: A Manually Labelled Multi-turn Dialogue Dataset. IJCNLP 2017.
11 | 
12 | \[2\]: P. Lison and J. Tiedemann, 2016, OpenSubtitles2016: Extracting Large Parallel Corpora from Movie and TV Subtitles. In Proceedings of the 10th International Conference on Language Resources and Evaluation \(LREC 2016\)
13 | 
14 | \[3\]: Justine Zhang, Ravi Kumar, Sujith Ravi, Cristian Danescu-Niculescu-Mizil. Proceedings of NAACL, 2016.
15 | 
16 | \[4\]: J. Schler, M. Koppel, S. Argamon and J. Pennebaker \(2006\). Effects of Age and Gender on Blogging in Proceedings of 2006 AAAI Spring Symposium on Computational Approaches for Analyzing Weblogs.
17 | 


--------------------------------------------------------------------------------
/model_cards/DeepPavlov/bert-base-multilingual-cased-sentence/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language:
 3 | - multilingual
 4 | ---
 5 | 
 6 | # bert-base-multilingual-cased-sentence
 7 | 
 8 | Sentence Multilingual BERT \(101 languages, cased, 12‑layer, 768‑hidden, 12‑heads, 180M parameters\) is a representation‑based sentence encoder for 101 languages of Multilingual BERT. It is initialized with Multilingual BERT and then fine‑tuned on english MultiNLI\[1\] and on dev set of multilingual XNLI\[2\]. Sentence representations are mean pooled token embeddings in the same manner as in Sentence‑BERT\[3\].
 9 | 
10 | 
11 | \[1\]: Williams A., Nangia N. & Bowman S. \(2017\) A Broad-Coverage Challenge Corpus for Sentence Understanding through Inference. arXiv preprint [arXiv:1704.05426](https://arxiv.org/abs/1704.05426)
12 | 
13 | \[2\]: Williams A., Bowman S. \(2018\) XNLI: Evaluating Cross-lingual Sentence Representations. arXiv preprint [arXiv:1809.05053](https://arxiv.org/abs/1809.05053)
14 | 
15 | \[3\]: N. Reimers, I. Gurevych \(2019\) Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks. arXiv preprint [arXiv:1908.10084](https://arxiv.org/abs/1908.10084)
16 | 


--------------------------------------------------------------------------------
/model_cards/DeepPavlov/rubert-base-cased-conversational/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language:
 3 | - ru
 4 | ---
 5 | 
 6 | # rubert-base-cased-conversational
 7 | 
 8 | Conversational RuBERT \(Russian, cased, 12‑layer, 768‑hidden, 12‑heads, 180M parameters\) was trained on OpenSubtitles\[1\], [Dirty](https://d3.ru/), [Pikabu](https://pikabu.ru/), and a Social Media segment of Taiga corpus\[2\]. We assembled a new vocabulary for Conversational RuBERT model on this data and initialized the model with [RuBERT](../rubert-base-cased).
 9 | 
10 | 
11 | \[1\]: P. Lison and J. Tiedemann, 2016, OpenSubtitles2016: Extracting Large Parallel Corpora from Movie and TV Subtitles. In Proceedings of the 10th International Conference on Language Resources and Evaluation \(LREC 2016\)
12 | 
13 | \[2\]: Shavrina T., Shapovalova O. \(2017\) TO THE METHODOLOGY OF CORPUS CONSTRUCTION FOR MACHINE LEARNING: «TAIGA» SYNTAX TREE CORPUS AND PARSER. in proc. of “CORPORA2017”, international conference , Saint-Petersbourg, 2017.
14 | 


--------------------------------------------------------------------------------
/model_cards/DeepPavlov/rubert-base-cased-sentence/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language:
 3 | - ru
 4 | ---
 5 | 
 6 | # rubert-base-cased-sentence
 7 | 
 8 | Sentence RuBERT \(Russian, cased, 12-layer, 768-hidden, 12-heads, 180M parameters\) is a representation‑based sentence encoder for Russian. It is initialized with RuBERT and fine‑tuned on SNLI\[1\] google-translated to russian and on russian part of XNLI dev set\[2\]. Sentence representations are mean pooled token embeddings in the same manner as in Sentence‑BERT\[3\].
 9 | 
10 | 
11 | \[1\]: S. R. Bowman, G. Angeli, C. Potts, and C. D. Manning. \(2015\) A large annotated corpus for learning natural language inference. arXiv preprint [arXiv:1508.05326](https://arxiv.org/abs/1508.05326)
12 | 
13 | \[2\]: Williams A., Bowman S. \(2018\) XNLI: Evaluating Cross-lingual Sentence Representations. arXiv preprint [arXiv:1809.05053](https://arxiv.org/abs/1809.05053)
14 | 
15 | \[3\]: N. Reimers, I. Gurevych \(2019\) Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks. arXiv preprint [arXiv:1908.10084](https://arxiv.org/abs/1908.10084)
16 | 


--------------------------------------------------------------------------------
/model_cards/DeepPavlov/rubert-base-cased/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language:
 3 | - ru
 4 | ---
 5 | 
 6 | # rubert-base-cased
 7 | 
 8 | RuBERT \(Russian, cased, 12‑layer, 768‑hidden, 12‑heads, 180M parameters\) was trained on the Russian part of Wikipedia and news data. We used this training data to build a vocabulary of Russian subtokens and took a multilingual version of BERT‑base as an initialization for RuBERT\[1\].
 9 | 
10 | 
11 | \[1\]: Kuratov, Y., Arkhipov, M. \(2019\). Adaptation of Deep Bidirectional Multilingual Transformers for Russian Language. arXiv preprint [arXiv:1905.07213](https://arxiv.org/abs/1905.07213).
12 | 


--------------------------------------------------------------------------------
/model_cards/Hate-speech-CNERG/dehatebert-mono-arabic/README.md:
--------------------------------------------------------------------------------
 1 | This model is used detecting **hatespeech** in **Arabic language**. The mono in the name refers to the monolingual setting, where the model is trained using only Arabic language data. It is finetuned on multilingual bert model.
 2 | The model is trained with different learning rates and the best validation score achieved is 0.877609 for a learning rate of 2e-5. Training code can be found at this [url](https://github.com/punyajoy/DE-LIMIT)
 3 | 
 4 | ### For more details about our paper
 5 | 
 6 | Sai Saketh Aluru, Binny Mathew, Punyajoy Saha and Animesh Mukherjee. "[Deep Learning Models for Multilingual Hate Speech Detection](https://arxiv.org/abs/2004.06465)". Accepted at ECML-PKDD 2020.
 7 | 
 8 | ***Please cite our paper in any published work that uses any of these resources.***
 9 | 
10 | ~~~
11 | @article{aluru2020deep,
12 |   title={Deep Learning Models for Multilingual Hate Speech Detection},
13 |   author={Aluru, Sai Saket and Mathew, Binny and Saha, Punyajoy and Mukherjee, Animesh},
14 |   journal={arXiv preprint arXiv:2004.06465},
15 |   year={2020}
16 | }
17 | 
18 | ~~~
19 | 


--------------------------------------------------------------------------------
/model_cards/Hate-speech-CNERG/dehatebert-mono-english/README.md:
--------------------------------------------------------------------------------
 1 | This model is used detecting **hatespeech** in **English language**. The mono in the name refers to the monolingual setting, where the model is trained using only English language data. It is finetuned on multilingual bert model.
 2 | The model is trained with different learning rates and the best validation score achieved is 0.726030 for a learning rate of 2e-5. Training code can be found at this [url](https://github.com/punyajoy/DE-LIMIT)
 3 | 
 4 | 
 5 | 
 6 | ### For more details about our paper
 7 | 
 8 | Sai Saketh Aluru, Binny Mathew, Punyajoy Saha and Animesh Mukherjee. "[Deep Learning Models for Multilingual Hate Speech Detection](https://arxiv.org/abs/2004.06465)". Accepted at ECML-PKDD 2020.
 9 | 
10 | ***Please cite our paper in any published work that uses any of these resources.***
11 | 
12 | ~~~
13 | @article{aluru2020deep,
14 |   title={Deep Learning Models for Multilingual Hate Speech Detection},
15 |   author={Aluru, Sai Saket and Mathew, Binny and Saha, Punyajoy and Mukherjee, Animesh},
16 |   journal={arXiv preprint arXiv:2004.06465},
17 |   year={2020}
18 | }
19 | 
20 | ~~~
21 | 


--------------------------------------------------------------------------------
/model_cards/Hate-speech-CNERG/dehatebert-mono-french/README.md:
--------------------------------------------------------------------------------
 1 | This model is used detecting **hatespeech** in **French language**. The mono in the name refers to the monolingual setting, where the model is trained using only English language data. It is finetuned on multilingual bert model.
 2 | The model is trained with different learning rates and the best validation score achieved is 0.692094 for a learning rate of 3e-5. Training code can be found at this [url](https://github.com/punyajoy/DE-LIMIT)
 3 | 
 4 | 
 5 | 
 6 | ### For more details about our paper
 7 | 
 8 | Sai Saketh Aluru, Binny Mathew, Punyajoy Saha and Animesh Mukherjee. "[Deep Learning Models for Multilingual Hate Speech Detection](https://arxiv.org/abs/2004.06465)". Accepted at ECML-PKDD 2020.
 9 | 
10 | ***Please cite our paper in any published work that uses any of these resources.***
11 | 
12 | ~~~
13 | @article{aluru2020deep,
14 |   title={Deep Learning Models for Multilingual Hate Speech Detection},
15 |   author={Aluru, Sai Saket and Mathew, Binny and Saha, Punyajoy and Mukherjee, Animesh},
16 |   journal={arXiv preprint arXiv:2004.06465},
17 |   year={2020}
18 | }
19 | 
20 | ~~~
21 | 


--------------------------------------------------------------------------------
/model_cards/Hate-speech-CNERG/dehatebert-mono-german/README.md:
--------------------------------------------------------------------------------
 1 | This model is used detecting **hatespeech** in **German language**. The mono in the name refers to the monolingual setting, where the model is trained using only English language data. It is finetuned on multilingual bert model.
 2 | The model is trained with different learning rates and the best validation score achieved is 0.649794 for a learning rate of 3e-5. Training code can be found at this [url](https://github.com/punyajoy/DE-LIMIT)
 3 | 
 4 | 
 5 | 
 6 | ### For more details about our paper
 7 | 
 8 | Sai Saketh Aluru, Binny Mathew, Punyajoy Saha and Animesh Mukherjee. "[Deep Learning Models for Multilingual Hate Speech Detection](https://arxiv.org/abs/2004.06465)". Accepted at ECML-PKDD 2020.
 9 | 
10 | ***Please cite our paper in any published work that uses any of these resources.***
11 | 
12 | ~~~
13 | @article{aluru2020deep,
14 |   title={Deep Learning Models for Multilingual Hate Speech Detection},
15 |   author={Aluru, Sai Saket and Mathew, Binny and Saha, Punyajoy and Mukherjee, Animesh},
16 |   journal={arXiv preprint arXiv:2004.06465},
17 |   year={2020}
18 | }
19 | 
20 | ~~~
21 | 


--------------------------------------------------------------------------------
/model_cards/Hate-speech-CNERG/dehatebert-mono-indonesian/README.md:
--------------------------------------------------------------------------------
 1 | This model is used detecting **hatespeech** in **Indonesian language**. The mono in the name refers to the monolingual setting, where the model is trained using only Arabic language data. It is finetuned on multilingual bert model.
 2 | The model is trained with different learning rates and the best validation score achieved is 0.844494 for a learning rate of 2e-5. Training code can be found at this [url](https://github.com/punyajoy/DE-LIMIT)
 3 | 
 4 | 
 5 | 
 6 | ### For more details about our paper
 7 | 
 8 | Sai Saketh Aluru, Binny Mathew, Punyajoy Saha and Animesh Mukherjee. "[Deep Learning Models for Multilingual Hate Speech Detection](https://arxiv.org/abs/2004.06465)". Accepted at ECML-PKDD 2020.
 9 | 
10 | ***Please cite our paper in any published work that uses any of these resources.***
11 | 
12 | ~~~
13 | @article{aluru2020deep,
14 |   title={Deep Learning Models for Multilingual Hate Speech Detection},
15 |   author={Aluru, Sai Saket and Mathew, Binny and Saha, Punyajoy and Mukherjee, Animesh},
16 |   journal={arXiv preprint arXiv:2004.06465},
17 |   year={2020}
18 | }
19 | 
20 | ~~~
21 | 


--------------------------------------------------------------------------------
/model_cards/Hate-speech-CNERG/dehatebert-mono-italian/README.md:
--------------------------------------------------------------------------------
 1 | This model is used detecting **hatespeech** in **Italian language**. The mono in the name refers to the monolingual setting, where the model is trained using only English language data. It is finetuned on multilingual bert model.
 2 | The model is trained with different learning rates and the best validation score achieved is 0.837288 for a learning rate of 3e-5. Training code can be found at this [url](https://github.com/punyajoy/DE-LIMIT)
 3 | 
 4 | 
 5 | 
 6 | ### For more details about our paper
 7 | 
 8 | Sai Saketh Aluru, Binny Mathew, Punyajoy Saha and Animesh Mukherjee. "[Deep Learning Models for Multilingual Hate Speech Detection](https://arxiv.org/abs/2004.06465)". Accepted at ECML-PKDD 2020.
 9 | 
10 | ***Please cite our paper in any published work that uses any of these resources.***
11 | 
12 | ~~~
13 | @article{aluru2020deep,
14 |   title={Deep Learning Models for Multilingual Hate Speech Detection},
15 |   author={Aluru, Sai Saket and Mathew, Binny and Saha, Punyajoy and Mukherjee, Animesh},
16 |   journal={arXiv preprint arXiv:2004.06465},
17 |   year={2020}
18 | }
19 | 
20 | ~~~
21 | 


--------------------------------------------------------------------------------
/model_cards/Hate-speech-CNERG/dehatebert-mono-polish/README.md:
--------------------------------------------------------------------------------
 1 | This model is used detecting **hatespeech** in **Polish language**. The mono in the name refers to the monolingual setting, where the model is trained using only English language data. It is finetuned on multilingual bert model.
 2 | The model is trained with different learning rates and the best validation score achieved is 0.723254 for a learning rate of 2e-5. Training code can be found at this [url](https://github.com/punyajoy/DE-LIMIT)
 3 | 
 4 | 
 5 | 
 6 | ### For more details about our paper
 7 | 
 8 | Sai Saketh Aluru, Binny Mathew, Punyajoy Saha and Animesh Mukherjee. "[Deep Learning Models for Multilingual Hate Speech Detection](https://arxiv.org/abs/2004.06465)". Accepted at ECML-PKDD 2020.
 9 | 
10 | ***Please cite our paper in any published work that uses any of these resources.***
11 | 
12 | ~~~
13 | @article{aluru2020deep,
14 |   title={Deep Learning Models for Multilingual Hate Speech Detection},
15 |   author={Aluru, Sai Saket and Mathew, Binny and Saha, Punyajoy and Mukherjee, Animesh},
16 |   journal={arXiv preprint arXiv:2004.06465},
17 |   year={2020}
18 | }
19 | 
20 | ~~~
21 | 


--------------------------------------------------------------------------------
/model_cards/Hate-speech-CNERG/dehatebert-mono-portugese/README.md:
--------------------------------------------------------------------------------
 1 | This model is used detecting **hatespeech** in **Portuguese language**. The mono in the name refers to the monolingual setting, where the model is trained using only English language data. It is finetuned on multilingual bert model.
 2 | The model is trained with different learning rates and the best validation score achieved is 0.716119 for a learning rate of 3e-5. Training code can be found at this [url](https://github.com/punyajoy/DE-LIMIT)
 3 | 
 4 | 
 5 | 
 6 | ### For more details about our paper
 7 | 
 8 | Sai Saketh Aluru, Binny Mathew, Punyajoy Saha and Animesh Mukherjee. "[Deep Learning Models for Multilingual Hate Speech Detection](https://arxiv.org/abs/2004.06465)". Accepted at ECML-PKDD 2020.
 9 | 
10 | ***Please cite our paper in any published work that uses any of these resources.***
11 | 
12 | ~~~
13 | @article{aluru2020deep,
14 |   title={Deep Learning Models for Multilingual Hate Speech Detection},
15 |   author={Aluru, Sai Saket and Mathew, Binny and Saha, Punyajoy and Mukherjee, Animesh},
16 |   journal={arXiv preprint arXiv:2004.06465},
17 |   year={2020}
18 | }
19 | 
20 | ~~~
21 | 


--------------------------------------------------------------------------------
/model_cards/Hate-speech-CNERG/dehatebert-mono-spanish/README.md:
--------------------------------------------------------------------------------
 1 | This model is used detecting **hatespeech** in **Spanish language**. The mono in the name refers to the monolingual setting, where the model is trained using only English language data. It is finetuned on multilingual bert model.
 2 | The model is trained with different learning rates and the best validation score achieved is 0.740287 for a learning rate of 3e-5. Training code can be found at this [url](https://github.com/punyajoy/DE-LIMIT)
 3 | 
 4 | 
 5 | 
 6 | ### For more details about our paper
 7 | 
 8 | Sai Saketh Aluru, Binny Mathew, Punyajoy Saha and Animesh Mukherjee. "[Deep Learning Models for Multilingual Hate Speech Detection](https://arxiv.org/abs/2004.06465)". Accepted at ECML-PKDD 2020.
 9 | 
10 | ***Please cite our paper in any published work that uses any of these resources.***
11 | 
12 | ~~~
13 | @article{aluru2020deep,
14 |   title={Deep Learning Models for Multilingual Hate Speech Detection},
15 |   author={Aluru, Sai Saket and Mathew, Binny and Saha, Punyajoy and Mukherjee, Animesh},
16 |   journal={arXiv preprint arXiv:2004.06465},
17 |   year={2020}
18 | }
19 | 
20 | ~~~
21 | 


--------------------------------------------------------------------------------
/model_cards/NeuML/bert-small-cord19-squad2/README.md:
--------------------------------------------------------------------------------
 1 | # BERT-Small CORD-19 fine-tuned on SQuAD 2.0
 2 | 
 3 | [bert-small-cord19 model](https://huggingface.co/NeuML/bert-small-cord19) fine-tuned on SQuAD 2.0
 4 | 
 5 | ## Building the model
 6 | 
 7 | ```bash
 8 | python run_squad.py
 9 |     --model_type bert
10 |     --model_name_or_path bert-small-cord19
11 |     --do_train
12 |     --do_eval
13 |     --do_lower_case
14 |     --version_2_with_negative
15 |     --train_file train-v2.0.json
16 |     --predict_file dev-v2.0.json
17 |     --per_gpu_train_batch_size 8
18 |     --learning_rate 3e-5
19 |     --num_train_epochs 3.0
20 |     --max_seq_length 384
21 |     --doc_stride 128
22 |     --output_dir bert-small-cord19-squad2
23 |     --save_steps 0
24 |     --threads 8
25 |     --overwrite_cache
26 |     --overwrite_output_dir
27 | 


--------------------------------------------------------------------------------
/model_cards/NeuML/bert-small-cord19/README.md:
--------------------------------------------------------------------------------
 1 | # BERT-Small fine-tuned on CORD-19 dataset
 2 | 
 3 | [BERT L6_H-512_A-8 model](https://huggingface.co/google/bert_uncased_L-6_H-512_A-8) fine-tuned on the [CORD-19 dataset](https://www.semanticscholar.org/cord19).
 4 | 
 5 | ## CORD-19 data subset
 6 | The training data for this dataset is stored as a [Kaggle dataset](https://www.kaggle.com/davidmezzetti/cord19-qa?select=cord19.txt). The training
 7 | data is a subset of the full corpus, focusing on high-quality, study-design detected articles.
 8 | 
 9 | ## Building the model
10 | 
11 | ```bash
12 | python run_language_modeling.py
13 |     --model_type bert
14 |     --model_name_or_path google/bert_uncased_L-6_H-512_A-8
15 |     --do_train
16 |     --mlm
17 |     --line_by_line
18 |     --block_size 512
19 |     --train_data_file cord19.txt
20 |     --per_gpu_train_batch_size 4
21 |     --learning_rate 3e-5
22 |     --num_train_epochs 3.0
23 |     --output_dir bert-small-cord19
24 |     --save_steps 0
25 |     --overwrite_output_dir
26 | 


--------------------------------------------------------------------------------
/model_cards/T-Systems-onsite/bert-german-dbmdz-uncased-sentence-stsb/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: de
3 | license: mit
4 | ---
5 | 
6 | # bert-german-dbmdz-uncased-sentence-stsb
7 | **This model is outdated!**
8 | 
9 | The new [T-Systems-onsite/cross-en-de-roberta-sentence-transformer](https://huggingface.co/T-Systems-onsite/cross-en-de-roberta-sentence-transformer) model is better for German language. It is also the current best model for English language and works cross-lingually. Please consider using that model.


--------------------------------------------------------------------------------
/model_cards/VictorSanh/roberta-base-finetuned-yelp-polarity/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: en
 3 | datasets:
 4 | - yelp_polarity
 5 | ---
 6 | 
 7 | # RoBERTa-base-finetuned-yelp-polarity
 8 | 
 9 | This is a [RoBERTa-base](https://huggingface.co/roberta-base) checkpoint fine-tuned on binary sentiment classifcation from [Yelp polarity](https://huggingface.co/nlp/viewer/?dataset=yelp_polarity).
10 | It gets **98.08%** accuracy on the test set.
11 | 
12 | ## Hyper-parameters
13 | 
14 | We used the following hyper-parameters to train the model on one GPU:
15 | ```python
16 | num_train_epochs            = 2.0
17 | learning_rate               = 1e-05
18 | weight_decay                = 0.0
19 | adam_epsilon                = 1e-08
20 | max_grad_norm               = 1.0
21 | per_device_train_batch_size = 32
22 | gradient_accumulation_steps = 1
23 | warmup_steps                = 3500
24 | seed                        = 42
25 | ```
26 | 


--------------------------------------------------------------------------------
/model_cards/ViktorAlm/electra-base-norwegian-uncased-discriminator/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: no
 3 | thumbnail: https://i.imgur.com/QqSEC5I.png
 4 | ---
 5 | 
 6 | # Norwegian Electra
 7 | ![Image of norwegian electra](https://i.imgur.com/QqSEC5I.png)
 8 | 
 9 | Trained on Oscar + wikipedia + opensubtitles + some other data I had with the awesome power of TPUs(V3-8)
10 | 
11 | Use with caution. I have no downstream tasks in Norwegian to test on so I have no idea of its performance yet.
12 | # Model
13 | ## Electra: Pre-training Text Encoders as Discriminators Rather Than Generators
14 | Kevin Clark and Minh-Thang Luong and Quoc V. Le and Christopher D. Manning
15 | - https://openreview.net/pdf?id=r1xMH1BtvB
16 | - https://github.com/google-research/electra
17 | # Acknowledgments
18 | ### TensorFlow Research Cloud
19 | Research supported with Cloud TPUs from Google's TensorFlow Research Cloud (TFRC). Thanks for providing access to the TFRC ❤️
20 | - https://www.tensorflow.org/tfrc
21 | #### OSCAR corpus
22 | - https://oscar-corpus.com/
23 | #### OPUS
24 | - http://opus.nlpl.eu/
25 | - http://www.opensubtitles.org/
26 | 


--------------------------------------------------------------------------------
/model_cards/adalbertojunior/PTT5-SMALL-SUM/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: pt
 3 | ---
 4 | 
 5 | # PTT5-SMALL-SUM
 6 | 
 7 | ## Model description
 8 | 
 9 | This model was trained to summarize texts in portuguese
10 | 
11 | 
12 | based on ```unicamp-dl/ptt5-small-portuguese-vocab```
13 | 
14 | #### How to use
15 | 
16 | ```python
17 | from transformers import T5Tokenizer, T5ForConditionalGeneration
18 | 
19 | tokenizer = T5Tokenizer.from_pretrained('adalbertojunior/PTT5-SMALL-SUM')
20 | 
21 | t5 = T5ForConditionalGeneration.from_pretrained('adalbertojunior/PTT5-SMALL-SUM')
22 | 
23 | text="Esse é um exemplo de sumarização."
24 | 
25 | input_ids = tokenizer.encode(text, return_tensors="pt", add_special_tokens=True)
26 | 
27 | generated_ids = t5.generate(
28 |         input_ids=input_ids,
29 |         num_beams=1,
30 |         max_length=40,
31 |         #repetition_penalty=2.5
32 |     ).squeeze()
33 |     
34 | predicted_span = tokenizer.decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
35 | 
36 | 
37 | ```
38 | 


--------------------------------------------------------------------------------
/model_cards/akhooli/gpt2-small-arabic/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: "ar"
 3 | datasets:
 4 | - Arabic Wikipedia
 5 | metrics:
 6 | - none
 7 | ---
 8 | 
 9 | # GPT2-Small-Arabic
10 | 
11 | ## Model description
12 | 
13 | GPT2 model from Arabic Wikipedia dataset based on gpt2-small (using Fastai2).
14 | 
15 | ## Intended uses & limitations
16 | 
17 | #### How to use
18 | 
19 | An example is provided in this [colab notebook](https://colab.research.google.com/drive/1mRl7c-5v-Klx27EEAEOAbrfkustL4g7a?usp=sharing). 
20 | Both text and poetry (fine-tuned model) generation are included.
21 | 
22 | #### Limitations and bias
23 | 
24 | GPT2-small-arabic (trained on Arabic Wikipedia) has several limitations in terms of coverage (Arabic Wikipeedia quality, no diacritics) and training performance. 
25 | Use as demonstration or proof of concepts but not as production code.
26 | 
27 | ## Training data
28 | 
29 | This pretrained model used the Arabic Wikipedia dump (around 900 MB). 
30 | 
31 | ## Training procedure
32 | 
33 | Training was done using [Fastai2](https://github.com/fastai/fastai2/) library on Kaggle, using free GPU.
34 | 
35 | ## Eval results 
36 | Final perplexity reached was 72.19,  loss: 4.28, accuracy: 0.307
37 | 
38 | ### BibTeX entry and citation info
39 | 
40 | ```bibtex
41 | @inproceedings{Abed Khooli,
42 |   year={2020}
43 | }
44 | ```
45 | 


--------------------------------------------------------------------------------
/model_cards/akhooli/mbart-large-cc25-ar-en/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | tags:
 3 | - translation
 4 | 
 5 | language:
 6 | - ar
 7 | - en
 8 | 
 9 | license: mit
10 | ---
11 | ### mbart-large-ar-en
12 | This is mbart-large-cc25, finetuned on a subset of the OPUS corpus for ar_en.   
13 | Usage: see [example notebook](https://colab.research.google.com/drive/1I6RFOWMaTpPBX7saJYjnSTddW0TD6H1t?usp=sharing)  
14 | Note: model has limited training set, not fully trained (do not use for production).   
15 | Other models by me: [Abed Khooli](https://huggingface.co/akhooli)  
16 | 


--------------------------------------------------------------------------------
/model_cards/akhooli/mbart-large-cc25-en-ar/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | tags:
 3 | - translation
 4 | 
 5 | language:
 6 | - en
 7 | - ar
 8 | 
 9 | license: mit
10 | ---
11 | ### mbart-large-en-ar
12 | This is mbart-large-cc25, finetuned on a subset of the UN corpus for en_ar.  
13 | Usage: see [example notebook](https://colab.research.google.com/drive/1I6RFOWMaTpPBX7saJYjnSTddW0TD6H1t?usp=sharing) 
14 | Note: model has limited training set, not fully trained (do not use for production). 
15 | 


--------------------------------------------------------------------------------
/model_cards/akhooli/personachat-arabic/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | tags:
 3 | - conversational
 4 | language:
 5 | - ar
 6 | license: mit
 7 | ---
 8 | ## personachat-arabic (conversational AI)
 9 | This is personachat-arabic, using a subset from the persona-chat validation dataset, machine translated to Arabic (from English) 
10 | and fine-tuned from [akhooli/gpt2-small-arabic](https://huggingface.co/akhooli/gpt2-small-arabic) which is a limited text generation model.  
11 | Usage: see the last section of this [example notebook](https://colab.research.google.com/drive/1I6RFOWMaTpPBX7saJYjnSTddW0TD6H1t?usp=sharing) 
12 | Note: model has limited training set which was machine translated (do not use for production). 
13 | 


--------------------------------------------------------------------------------
/model_cards/akhooli/xlm-r-large-arabic-sent/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | language:
 4 | - ar
 5 | - en
 6 | 
 7 | license: mit
 8 | ---
 9 | ### xlm-r-large-arabic-sent 
10 | Multilingual sentiment classification (Label_0: mixed, Label_1: negative, Label_2: positive) of Arabic reviews by fine-tuning XLM-Roberta-Large. 
11 | Zero shot classification of other languages (also works in mixed languages - ex. Arabic & English). Mixed category is not accurate and may confuse other 
12 | classes (was based on a rate of 3 out of 5 in reviews).  
13 | Usage: see last section in this [Colab notebook](https://lnkd.in/d3bCFyZ)
14 | 


--------------------------------------------------------------------------------
/model_cards/akhooli/xlm-r-large-arabic-toxic/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | language:
 4 | - ar
 5 | - en
 6 | 
 7 | license: mit
 8 | ---
 9 | ### xlm-r-large-arabic-toxic (toxic/hate speech classifier) 
10 | Toxic (hate speech) classification (Label_0: non-toxic, Label_1: toxic) of Arabic comments by fine-tuning XLM-Roberta-Large. 
11 | Zero shot classification of other languages (also works in mixed languages - ex. Arabic & English).  
12 | Usage and further info: see last section in this [Colab notebook](https://lnkd.in/d3bCFyZ)
13 | 


--------------------------------------------------------------------------------
/model_cards/albert-base-v1-README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | tags:
 3 | - exbert
 4 | 
 5 | license: apache-2.0
 6 | ---
 7 | 
 8 | <a href="https://huggingface.co/exbert/?model=albert-base-v1">
 9 | 	<img width="300px" src="https://cdn-media.huggingface.co/exbert/button.png">
10 | </a>
11 | 


--------------------------------------------------------------------------------
/model_cards/albert-xxlarge-v2-README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | tags:
 3 | - exbert
 4 | 
 5 | license: apache-2.0
 6 | ---
 7 | 
 8 | <a href="https://huggingface.co/exbert/?model=albert-xxlarge-v2">
 9 | 	<img width="300px" src="https://cdn-media.huggingface.co/exbert/button.png">
10 | </a>


--------------------------------------------------------------------------------
/model_cards/allenai/longformer-base-4096-extra.pos.embd.only/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # longformer-base-4096-extra.pos.embd.only
 3 | 
 4 | This model is similar to `longformer-base-4096` but it was pretrained to preserve RoBERTa weights by freezing all RoBERTa weights and only train the additional position embeddings. 
 5 | 
 6 | 
 7 | ### Citing
 8 | 
 9 | If you use `Longformer` in your research, please cite [Longformer: The Long-Document Transformer](https://arxiv.org/abs/2004.05150).
10 | ```
11 | @article{Beltagy2020Longformer,
12 |   title={Longformer: The Long-Document Transformer},
13 |   author={Iz Beltagy and Matthew E. Peters and Arman Cohan},
14 |   journal={arXiv:2004.05150},
15 |   year={2020},
16 | }
17 | ```
18 | 
19 | `Longformer` is an open-source project developed by [the Allen Institute for Artificial Intelligence (AI2)](http://www.allenai.org).
20 | AI2 is a non-profit institute with the mission to contribute to humanity through high-impact AI research and engineering.
21 | 


--------------------------------------------------------------------------------
/model_cards/allenai/longformer-base-4096/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # longformer-base-4096
 3 | [Longformer](https://arxiv.org/abs/2004.05150) is a transformer model for long documents. 
 4 | 
 5 | `longformer-base-4096` is a BERT-like model started from the RoBERTa checkpoint and pretrained for MLM on long documents. It supports sequences of length up to 4,096. 
 6 |  
 7 | Longformer uses a combination of a sliding window (local) attention and global attention. Global attention is user-configured based on the task to allow the model to learn task-specific representations.
 8 | Please refer to the examples in `modeling_longformer.py` and the paper for more details on how to set global attention.
 9 | 
10 | 
11 | ### Citing
12 | 
13 | If you use `Longformer` in your research, please cite [Longformer: The Long-Document Transformer](https://arxiv.org/abs/2004.05150).
14 | ```
15 | @article{Beltagy2020Longformer,
16 |   title={Longformer: The Long-Document Transformer},
17 |   author={Iz Beltagy and Matthew E. Peters and Arman Cohan},
18 |   journal={arXiv:2004.05150},
19 |   year={2020},
20 | }
21 | ```
22 | 
23 | `Longformer` is an open-source project developed by [the Allen Institute for Artificial Intelligence (AI2)](http://www.allenai.org).
24 | AI2 is a non-profit institute with the mission to contribute to humanity through high-impact AI research and engineering.
25 | 


--------------------------------------------------------------------------------
/model_cards/allenai/scibert_scivocab_cased/README.md:
--------------------------------------------------------------------------------
 1 | # SciBERT
 2 | 
 3 | This is the pretrained model presented in [SciBERT: A Pretrained Language Model for Scientific Text](https://www.aclweb.org/anthology/D19-1371/), which is a BERT model trained on scientific text.
 4 | 
 5 | The training corpus was papers taken from [Semantic Scholar](https://www.semanticscholar.org). Corpus size is 1.14M papers, 3.1B tokens. We use the full text of the papers in training, not just abstracts.
 6 | 
 7 | SciBERT has its own wordpiece vocabulary (scivocab) that's built to best match the training corpus. We trained cased and uncased versions. 
 8 | 
 9 | Available models include:
10 | * `scibert_scivocab_cased`
11 | * `scibert_scivocab_uncased`
12 | 
13 | 
14 | The original repo can be found [here](https://github.com/allenai/scibert).
15 | 
16 | If using these models, please cite the following paper:
17 | ```
18 | @inproceedings{beltagy-etal-2019-scibert,
19 |     title = "SciBERT: A Pretrained Language Model for Scientific Text",
20 |     author = "Beltagy, Iz  and Lo, Kyle  and Cohan, Arman",
21 |     booktitle = "EMNLP",
22 |     year = "2019",
23 |     publisher = "Association for Computational Linguistics",
24 |     url = "https://www.aclweb.org/anthology/D19-1371"
25 | }
26 | ```
27 | 


--------------------------------------------------------------------------------
/model_cards/allenai/scibert_scivocab_uncased/README.md:
--------------------------------------------------------------------------------
 1 | # SciBERT
 2 | 
 3 | This is the pretrained model presented in [SciBERT: A Pretrained Language Model for Scientific Text](https://www.aclweb.org/anthology/D19-1371/), which is a BERT model trained on scientific text.
 4 | 
 5 | The training corpus was papers taken from [Semantic Scholar](https://www.semanticscholar.org). Corpus size is 1.14M papers, 3.1B tokens. We use the full text of the papers in training, not just abstracts.
 6 | 
 7 | SciBERT has its own wordpiece vocabulary (scivocab) that's built to best match the training corpus. We trained cased and uncased versions. 
 8 | 
 9 | Available models include:
10 | * `scibert_scivocab_cased`
11 | * `scibert_scivocab_uncased`
12 | 
13 | 
14 | The original repo can be found [here](https://github.com/allenai/scibert).
15 | 
16 | If using these models, please cite the following paper:
17 | ```
18 | @inproceedings{beltagy-etal-2019-scibert,
19 |     title = "SciBERT: A Pretrained Language Model for Scientific Text",
20 |     author = "Beltagy, Iz  and Lo, Kyle  and Cohan, Arman",
21 |     booktitle = "EMNLP",
22 |     year = "2019",
23 |     publisher = "Association for Computational Linguistics",
24 |     url = "https://www.aclweb.org/anthology/D19-1371"
25 | }
26 | ```
27 | 


--------------------------------------------------------------------------------
/model_cards/allenyummy/chinese-bert-wwm-ehr-ner-sl/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: zh-tw
 3 | ---
 4 | 
 5 | # Model name
 6 | Chinese-bert-wwm-electrical-health-record-ner-sequence-labeling
 7 | 
 8 | 
 9 | #### How to use
10 | 
11 | ```
12 | from transformers import AutoTokenizer, AutoModelForTokenClassification  
13 | tokenizer = AutoTokenizer.from_pretrained("chinese-bert-wwm-ehr-ner-sl")  
14 | model = AutoModelForTokenClassification.from_pretrained("chinese-bert-wwm-ehr-ner-sl") 
15 | ```
16 | 


--------------------------------------------------------------------------------
/model_cards/aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200616/README.md:
--------------------------------------------------------------------------------
 1 | # BERT L-10 H-512 fine-tuned on MLM (CORD-19 2020/06/16)
 2 | 
 3 | BERT model with [10 Transformer layers and hidden embedding of size 512](https://huggingface.co/google/bert_uncased_L-10_H-512_A-8), referenced in [Well-Read Students Learn Better: On the Importance of Pre-training Compact Models](https://arxiv.org/abs/1908.08962), fine-tuned for MLM on CORD-19 dataset (as released on 2020/06/16).
 4 | 
 5 | ## Training the model
 6 | 
 7 | ```bash
 8 | python run_language_modeling.py
 9 |     --model_type bert
10 |     --model_name_or_path google/bert_uncased_L-10_H-512_A-8
11 |     --do_train
12 |     --train_data_file {cord19-200616-dataset}
13 |     --mlm
14 |     --mlm_probability 0.2
15 |     --line_by_line
16 |     --block_size 512
17 |     --per_device_train_batch_size 10
18 |     --learning_rate 3e-5
19 |     --num_train_epochs 2
20 |     --output_dir bert_uncased_L-10_H-512_A-8_cord19-200616
21 | 


--------------------------------------------------------------------------------
/model_cards/aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200616_squad2/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | datasets:
 3 | - squad_v2
 4 | ---
 5 | 
 6 | # BERT L-10 H-512 CORD-19 (2020/06/16) fine-tuned on SQuAD v2.0
 7 | 
 8 | BERT model with [10 Transformer layers and hidden embedding of size 512](https://huggingface.co/google/bert_uncased_L-10_H-512_A-8), referenced in [Well-Read Students Learn Better: On the Importance of Pre-training Compact Models](https://arxiv.org/abs/1908.08962), [fine-tuned for MLM](https://huggingface.co/aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200616) on CORD-19 dataset (as released on 2020/06/16) and fine-tuned for QA on SQuAD v2.0.
 9 | 
10 | ## Training the model
11 | 
12 | ```bash
13 | python run_squad.py
14 |     --model_type bert
15 |     --model_name_or_path aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200616
16 |     --train_file 'train-v2.0.json'
17 |     --predict_file 'dev-v2.0.json'
18 |     --do_train
19 |     --do_eval
20 |     --do_lower_case
21 |     --version_2_with_negative
22 |     --max_seq_length 384
23 |     --per_gpu_train_batch_size 10
24 |     --learning_rate 3e-5
25 |     --num_train_epochs 2
26 |     --output_dir bert_uncased_L-10_H-512_A-8_cord19-200616_squad2
27 | 


--------------------------------------------------------------------------------
/model_cards/aodiniz/bert_uncased_L-2_H-512_A-8_cord19-200616/README.md:
--------------------------------------------------------------------------------
 1 | # BERT L-2 H-512 fine-tuned on MLM (CORD-19 2020/06/16)
 2 | 
 3 | BERT model with [2 Transformer layers and hidden embedding of size 512](https://huggingface.co/google/bert_uncased_L-2_H-512_A-8), referenced in [Well-Read Students Learn Better: On the Importance of Pre-training Compact Models](https://arxiv.org/abs/1908.08962), fine-tuned for MLM on CORD-19 dataset (as released on 2020/06/16).
 4 | 
 5 | ## Training the model
 6 | 
 7 | ```bash
 8 | python run_language_modeling.py
 9 |     --model_type bert
10 |     --model_name_or_path google/bert_uncased_L-2_H-512_A-8
11 |     --do_train
12 |     --train_data_file {cord19-200616-dataset}
13 |     --mlm
14 |     --mlm_probability 0.2
15 |     --line_by_line
16 |     --block_size 512
17 |     --per_device_train_batch_size 20
18 |     --learning_rate 3e-5
19 |     --num_train_epochs 2
20 |     --output_dir bert_uncased_L-2_H-512_A-8_cord19-200616
21 | 


--------------------------------------------------------------------------------
/model_cards/aodiniz/bert_uncased_L-4_H-256_A-4_cord19-200616/README.md:
--------------------------------------------------------------------------------
 1 | # BERT L-4 H-256 fine-tuned on MLM (CORD-19 2020/06/16)
 2 | 
 3 | BERT model with [4 Transformer layers and hidden embedding of size 256](https://huggingface.co/google/bert_uncased_L-4_H-256_A-4), referenced in [Well-Read Students Learn Better: On the Importance of Pre-training Compact Models](https://arxiv.org/abs/1908.08962), fine-tuned for MLM on CORD-19 dataset (as released on 2020/06/16).
 4 | 
 5 | ## Training the model
 6 | 
 7 | ```bash
 8 | python run_language_modeling.py
 9 |     --model_type bert
10 |     --model_name_or_path google/bert_uncased_L-4_H-256_A-4
11 |     --do_train
12 |     --train_data_file {cord19-200616-dataset}
13 |     --mlm
14 |     --mlm_probability 0.2
15 |     --line_by_line
16 |     --block_size 256
17 |     --per_device_train_batch_size 20
18 |     --learning_rate 3e-5
19 |     --num_train_epochs 2
20 |     --output_dir bert_uncased_L-4_H-256_A-4_cord19-200616
21 | 


--------------------------------------------------------------------------------
/model_cards/bart-large-cnn/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - summarization
4 | ---
5 | 
6 | 


--------------------------------------------------------------------------------
/model_cards/bart-large-xsum/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - summarization
4 | ---
5 | 
6 | 


--------------------------------------------------------------------------------
/model_cards/bert-base-chinese-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: zh
3 | ---
4 | 


--------------------------------------------------------------------------------
/model_cards/bert-base-german-dbmdz-cased-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: de
3 | license: mit
4 | ---
5 | 


--------------------------------------------------------------------------------
/model_cards/bert-base-german-dbmdz-uncased-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: de
3 | license: mit
4 | ---
5 | 


--------------------------------------------------------------------------------
/model_cards/bert-large-cased-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | license: apache-2.0
3 | ---
4 | 


--------------------------------------------------------------------------------
/model_cards/binwang/xlnet-base-cased/README.md:
--------------------------------------------------------------------------------
1 | This model is pre-trained **XLNET** with 12 layers.
2 | 
3 | It comes with paper: SBERT-WK: A Sentence Embedding Method By Dissecting BERT-based Word Models
4 | 
5 | Project Page: [SBERT-WK](https://github.com/BinWang28/SBERT-WK-Sentence-Embedding)
6 | 


--------------------------------------------------------------------------------
/model_cards/ceostroff/harry-potter-gpt2-fanfiction/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: 
 3 | - en
 4 | tags:
 5 | - harry-potter
 6 | license: mit
 7 | ---
 8 | 
 9 | # Harry Potter Fanfiction Generator
10 | 
11 | This is a pre-trained GPT-2 generative text model that allows you to generate your own Harry Potter fanfiction, trained off of the top 100 rated fanficition stories. We intend for this to be used for individual fun and experimentation and not as a commercial product. 
12 | 


--------------------------------------------------------------------------------
/model_cards/clue/albert_chinese_small/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: zh
 3 | ---
 4 | 
 5 | ## albert_chinese_small
 6 | 
 7 | ### Overview
 8 | 
 9 | **Language model:** albert-small
10 | **Model size:** 18.5M
11 | **Language:** Chinese
12 | **Training data:** [CLUECorpusSmall](https://github.com/CLUEbenchmark/CLUECorpus2020)
13 | **Eval data:** [CLUE dataset](https://github.com/CLUEbenchmark/CLUE)
14 | 
15 | ### Results
16 | 
17 | For results on downstream tasks like text classification, please refer to [this repository](https://github.com/CLUEbenchmark/CLUE).
18 | 
19 | ### Usage
20 | 
21 | **NOTE:**Since sentencepiece is not used in `albert_chinese_small` model, you have to call **BertTokenizer** instead of AlbertTokenizer !!!
22 | 
23 | ```
24 | import torch
25 | from transformers import BertTokenizer, AlbertModel
26 | tokenizer = BertTokenizer.from_pretrained("clue/albert_chinese_small")
27 | albert = AlbertModel.from_pretrained("clue/albert_chinese_small")
28 | ```
29 | 
30 | ### About CLUE benchmark
31 | 
32 | Organization of Language Understanding Evaluation benchmark for Chinese: tasks & datasets, baselines, pre-trained Chinese models, corpus and leaderboard.
33 | 
34 | Github: https://github.com/CLUEbenchmark
35 | Website: https://www.cluebenchmarks.com/
36 | 


--------------------------------------------------------------------------------
/model_cards/clue/albert_chinese_tiny/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: zh
 3 | ---
 4 | 
 5 | ## albert_chinese_tiny
 6 | 
 7 | ### Overview
 8 | 
 9 | **Language model:** albert-tiny
10 | **Model size:** 16M
11 | **Language:** Chinese
12 | **Training data:** [CLUECorpusSmall](https://github.com/CLUEbenchmark/CLUECorpus2020)
13 | **Eval data:** [CLUE dataset](https://github.com/CLUEbenchmark/CLUE)
14 | 
15 | ### Results
16 | 
17 | For results on downstream tasks like text classification, please refer to [this repository](https://github.com/CLUEbenchmark/CLUE).
18 | 
19 | ### Usage
20 | 
21 | **NOTE:**Since sentencepiece is not used in `albert_chinese_tiny` model, you have to call **BertTokenizer** instead of AlbertTokenizer !!!
22 | 
23 | ```
24 | import torch
25 | from transformers import BertTokenizer, AlbertModel
26 | tokenizer = BertTokenizer.from_pretrained("clue/albert_chinese_tiny")
27 | albert = AlbertModel.from_pretrained("clue/albert_chinese_tiny")
28 | ```
29 | 
30 | ### About CLUE benchmark
31 | 
32 | Organization of Language Understanding Evaluation benchmark for Chinese: tasks & datasets, baselines, pre-trained Chinese models, corpus and leaderboard.
33 | 
34 | Github: https://github.com/CLUEbenchmark
35 | Website: https://www.cluebenchmarks.com/
36 | 


--------------------------------------------------------------------------------
/model_cards/clue/roberta_chinese_base/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: zh
 3 | ---
 4 | 
 5 | ## roberta_chinese_base
 6 | 
 7 | ### Overview
 8 | 
 9 | **Language model:** roberta-base
10 | **Model size:** 392M
11 | **Language:** Chinese
12 | **Training data:** [CLUECorpusSmall](https://github.com/CLUEbenchmark/CLUECorpus2020)
13 | **Eval data:** [CLUE dataset](https://github.com/CLUEbenchmark/CLUE)
14 | 
15 | ### Results
16 | 
17 | For results on downstream tasks like text classification, please refer to [this repository](https://github.com/CLUEbenchmark/CLUE).
18 | 
19 | ### Usage
20 | 
21 | **NOTE:** You have to call **BertTokenizer** instead of RobertaTokenizer !!!
22 | 
23 | ```
24 | import torch
25 | from transformers import BertTokenizer, BertModel
26 | tokenizer = BertTokenizer.from_pretrained("clue/roberta_chinese_base")
27 | roberta = BertModel.from_pretrained("clue/roberta_chinese_base")
28 | ```
29 | 
30 | ### About CLUE benchmark
31 | 
32 | Organization of Language Understanding Evaluation benchmark for Chinese: tasks & datasets, baselines, pre-trained Chinese models, corpus and leaderboard.
33 | 
34 | Github: https://github.com/CLUEbenchmark
35 | Website: https://www.cluebenchmarks.com/
36 | 


--------------------------------------------------------------------------------
/model_cards/clue/roberta_chinese_large/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: zh
 3 | ---
 4 | 
 5 | ## roberta_chinese_large
 6 | 
 7 | ### Overview
 8 | 
 9 | **Language model:** roberta-large
10 | **Model size:** 1.2G
11 | **Language:** Chinese
12 | **Training data:** [CLUECorpusSmall](https://github.com/CLUEbenchmark/CLUECorpus2020)
13 | **Eval data:** [CLUE dataset](https://github.com/CLUEbenchmark/CLUE)
14 | 
15 | ### Results
16 | 
17 | For results on downstream tasks like text classification, please refer to [this repository](https://github.com/CLUEbenchmark/CLUE).
18 | 
19 | ### Usage
20 | 
21 | **NOTE:** You have to call **BertTokenizer** instead of RobertaTokenizer !!!
22 | 
23 | ```
24 | import torch
25 | from transformers import BertTokenizer, BertModel
26 | tokenizer = BertTokenizer.from_pretrained("clue/roberta_chinese_large")
27 | roberta = BertModel.from_pretrained("clue/roberta_chinese_large")
28 | ```
29 | 
30 | ### About CLUE benchmark
31 | 
32 | Organization of Language Understanding Evaluation benchmark for Chinese: tasks & datasets, baselines, pre-trained Chinese models, corpus and leaderboard.
33 | 
34 | Github: https://github.com/CLUEbenchmark
35 | Website: https://www.cluebenchmarks.com/
36 | 


--------------------------------------------------------------------------------
/model_cards/clue/xlnet_chinese_large/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: zh
 3 | ---
 4 | 
 5 | ## xlnet_chinese_large
 6 | 
 7 | ### Overview
 8 | 
 9 | **Language model:** xlnet-large
10 | **Model size:** 1.3G
11 | **Language:** Chinese
12 | **Training data:** [CLUECorpusSmall](https://github.com/CLUEbenchmark/CLUECorpus2020)
13 | **Eval data:** [CLUE dataset](https://github.com/CLUEbenchmark/CLUE)
14 | 
15 | ### Results
16 | 
17 | For results on downstream tasks like text classification, please refer to [this repository](https://github.com/CLUEbenchmark/CLUE).
18 | 
19 | ### Usage
20 | 
21 | ```
22 | import torch
23 | from transformers import XLNetTokenizer,XLNetModel
24 | tokenizer = XLNetTokenizer.from_pretrained("clue/xlnet_chinese_large")
25 | xlnet = XLNetModel.from_pretrained("clue/xlnet_chinese_large")
26 | ```
27 | 
28 | ### About CLUE benchmark
29 | 
30 | Organization of Language Understanding Evaluation benchmark for Chinese: tasks & datasets, baselines, pre-trained Chinese models, corpus and leaderboard.
31 | 
32 | Github: https://github.com/CLUEbenchmark
33 | Website: https://www.cluebenchmarks.com/
34 | 


--------------------------------------------------------------------------------
/model_cards/daigo/bert-base-japanese-sentiment/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language:
 3 | - ja
 4 | ---
 5 | 
 6 | binary classification
 7 | 
 8 | # Usage
 9 | ```
10 | print(pipeline("sentiment-analysis",model="daigo/bert-base-japanese-sentiment",tokenizer="daigo/bert-base-japanese-sentiment")("私は幸福である。"))
11 | 
12 | [{'label': 'ポジティブ', 'score': 0.98430425}]
13 | ```
14 | 


--------------------------------------------------------------------------------
/model_cards/dccuchile/bert-base-spanish-wwm-cased/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: es
3 | ---
4 | 


--------------------------------------------------------------------------------
/model_cards/dccuchile/bert-base-spanish-wwm-uncased/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: es
3 | ---
4 | 


--------------------------------------------------------------------------------
/model_cards/deepset/bert-base-german-cased-oldvocab/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: de
 3 | license: mit
 4 | thumbnail: https://static.tildacdn.com/tild6438-3730-4164-b266-613634323466/german_bert.png
 5 | tags:
 6 | - exbert
 7 | ---
 8 | 
 9 | <a href="https://huggingface.co/exbert/?model=bert-base-german-cased">
10 | 	<img width="300px" src="https://cdn-media.huggingface.co/exbert/button.png">
11 | </a>
12 | 
13 | # German BERT with old vocabulary
14 | For details see the related [FARM issue](https://github.com/deepset-ai/FARM/issues/60).
15 | 
16 | 
17 | ## About us
18 | ![deepset logo](https://raw.githubusercontent.com/deepset-ai/FARM/master/docs/img/deepset_logo.png)
19 | 
20 | We bring NLP to the industry via open source!  
21 | Our focus: Industry specific language models & large scale QA systems.  
22 |   
23 | Some of our work: 
24 | - [German BERT (aka "bert-base-german-cased")](https://deepset.ai/german-bert)
25 | - [FARM](https://github.com/deepset-ai/FARM)
26 | - [Haystack](https://github.com/deepset-ai/haystack/)
27 | 
28 | Get in touch:
29 | [Twitter](https://twitter.com/deepset_ai) | [LinkedIn](https://www.linkedin.com/company/deepset-ai/) | [Website](https://deepset.ai)  
30 | 


--------------------------------------------------------------------------------
/model_cards/deepset/sentence_bert/README.md:
--------------------------------------------------------------------------------
1 | This is an upload of the bert-base-nli-stsb-mean-tokens pretrained model from the Sentence Transformers Repo (https://github.com/UKPLab/sentence-transformers)
2 | 


--------------------------------------------------------------------------------
/model_cards/distilbert-base-cased-distilled-squad-README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: "en"
 3 | datasets:
 4 | - squad
 5 | metrics:
 6 | - squad
 7 | license: apache-2.0
 8 | ---
 9 | 
10 | # DistilBERT base cased distilled SQuAD
11 | 
12 | This model is a fine-tune checkpoint of [DistilBERT-base-cased](https://huggingface.co/distilbert-base-cased), fine-tuned using (a second step of) knowledge distillation on SQuAD v1.1.
13 | This model reaches a F1 score of 87.1 on the dev set (for comparison, BERT bert-base-cased version reaches a F1 score of 88.7).
14 | 


--------------------------------------------------------------------------------
/model_cards/distilbert-base-german-cased-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: de
3 | license: apache-2.0
4 | ---
5 | ## distilbert-base-german-cased
6 | 


--------------------------------------------------------------------------------
/model_cards/distilbert-base-uncased-finetuned-sst-2-english-README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: en
 3 | license: apache-2.0
 4 | datasets:
 5 | - sst-2
 6 | ---
 7 | 
 8 | # DistilBERT base uncased finetuned SST-2
 9 | 
10 | This model is a fine-tune checkpoint of [DistilBERT-base-uncased](https://huggingface.co/distilbert-base-uncased), fine-tuned on SST-2.
11 | This model reaches an accuracy of 91.3 on the dev set (for comparison, Bert bert-base-uncased version reaches an accuracy of 92.7).
12 | 
13 | # Fine-tuning hyper-parameters
14 | 
15 | - learning_rate = 1e-5
16 | - batch_size = 32
17 | - warmup = 600
18 | - max_seq_length = 128
19 | - num_train_epochs = 3.0
20 | 


--------------------------------------------------------------------------------
/model_cards/distilgpt2-README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: en
 3 | tags:
 4 | - exbert
 5 | 
 6 | license: apache-2.0
 7 | datasets:
 8 | - openwebtext
 9 | ---
10 | 
11 | # DistilGPT2
12 | 
13 | DistilGPT2 English language model pretrained with the supervision of [GPT2](https://huggingface.co/gpt2) (the smallest version of GPT2) on [OpenWebTextCorpus](https://skylion007.github.io/OpenWebTextCorpus/), a reproduction of OpenAI's WebText dataset. The model has 6 layers, 768 dimension and 12 heads, totalizing 82M parameters (compared to 124M parameters for GPT2). On average, DistilGPT2 is two times faster than GPT2.
14 | 
15 | On the [WikiText-103](https://blog.einstein.ai/the-wikitext-long-term-dependency-language-modeling-dataset/) benchmark, GPT2 reaches a perplexity on the test set of 16.3 compared to 21.1 for DistilGPT2 (after fine-tuning on the train set).
16 | 
17 | We encourage to check [GPT2](https://huggingface.co/gpt2) to know more about usage, limitations and potential biases.
18 | 
19 | <a href="https://huggingface.co/exbert/?model=distilgpt2">
20 | 	<img width="300px" src="https://cdn-media.huggingface.co/exbert/button.png">
21 | </a>
22 | 


--------------------------------------------------------------------------------
/model_cards/djstrong/bg_cs_pl_ru_cased_L-12_H-768_A-12/README.md:
--------------------------------------------------------------------------------
1 | Slavic BERT from https://github.com/deepmipt/Slavic-BERT-NER http://files.deeppavlov.ai/deeppavlov_data/bg_cs_pl_ru_cased_L-12_H-768_A-12.tar.gz
2 | 


--------------------------------------------------------------------------------
/model_cards/facebook/bart-large-cnn/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - summarization
4 | 
5 | license: mit
6 | thumbnail: https://huggingface.co/front/thumbnails/facebook.png
7 | ---
8 | 


--------------------------------------------------------------------------------
/model_cards/facebook/bart-large/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | license: mit
 3 | thumbnail: https://huggingface.co/front/thumbnails/facebook.png
 4 | ---
 5 | 
 6 | The Bart model was proposed by Mike Lewis, Yinhan Liu, Naman Goyal, Marjan Ghazvininejad, Abdelrahman Mohamed, Omer Levy, Ves Stoyanov and Luke Zettlemoyer on 29 Oct, 2019. According to the abstract,
 7 | 
 8 | Bart uses a standard seq2seq/machine translation architecture with a bidirectional encoder (like BERT) and a left-to-right decoder (like GPT).
 9 | 
10 | The pretraining task involves randomly shuffling the order of the original sentences and a novel in-filling scheme, where spans of text are replaced with a single mask token.
11 | 
12 | BART is particularly effective when fine tuned for text generation but also works well for comprehension tasks. It matches the performance of RoBERTa with comparable training resources on GLUE and SQuAD, achieves new state-of-the-art results on a range of abstractive dialogue, question answering, and summarization tasks, with gains of up to 6 ROUGE.
13 | 
14 | The Authors’ code can be found here:
15 | https://github.com/pytorch/fairseq/tree/master/examples/bart
16 | 


--------------------------------------------------------------------------------
/model_cards/facebook/rag-token-nq_new/README.md:
--------------------------------------------------------------------------------
 1 | The model can be loaded and used as follows on [this branch](https://github.com/huggingface/transformers/tree/finalize_rag) as follows.
 2 | 
 3 | 
 4 | # Load model
 5 | 
 6 | ```python
 7 | from transformers import RagTokenizer, RagTokenForGeneration, RagRetriever
 8 | 
 9 | # create Retriever augmented model
10 | retriever = RagRetriever.from_pretrained("facebook/rag-token-nq_new", use_dummy_dataset=True)
11 | model = RagTokenForGeneration.from_pretrained("facebook/rag-token-nq_new", retriever=retriever)
12 | 
13 | tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-nq_new")
14 | 
15 | # create input ids and labels
16 | input_ids = tokenizer("who sings does he love me with reba", return_tensors="pt").input_ids
17 | 
18 | # use labels
19 | labels = tokenizer.generator("Linda Davis", return_tensors="pt").input_ids
20 | 
21 | 
22 | # compute loss
23 | outputs = model(input_ids, labels=labels)
24 | ```
25 | 


--------------------------------------------------------------------------------
/model_cards/flexudy/t5-base-multi-sentence-doctor/sent-banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/model_cards/flexudy/t5-base-multi-sentence-doctor/sent-banner.png


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-10_H-128_A-2/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-10_H-256_A-4/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-10_H-512_A-8/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-10_H-768_A-12/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-12_H-128_A-2/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-12_H-256_A-4/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-12_H-512_A-8/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-12_H-768_A-12/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-2_H-128_A-2/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-2_H-256_A-4/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-2_H-512_A-8/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-2_H-768_A-12/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-4_H-128_A-2/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-4_H-256_A-4/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-4_H-512_A-8/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-4_H-768_A-12/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-6_H-128_A-2/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-6_H-256_A-4/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-6_H-512_A-8/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-6_H-768_A-12/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-8_H-128_A-2/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-8_H-256_A-4/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-8_H-512_A-8/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-8_H-768_A-12/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/mobilebert-uncased/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: en
 3 | thumbnail: https://huggingface.co/front/thumbnails/google.png
 4 | 
 5 | license: apache-2.0
 6 | ---
 7 | 
 8 | ## MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices
 9 | 
10 | MobileBERT is a thin version of BERT_LARGE, while equipped with bottleneck structures and a carefully designed balance
11 | between self-attentions and feed-forward networks.
12 | 
13 | This checkpoint is the original MobileBert Optimized Uncased English: 
14 | [uncased_L-24_H-128_B-512_A-4_F-4_OPT](https://storage.googleapis.com/cloud-tpu-checkpoints/mobilebert/uncased_L-24_H-128_B-512_A-4_F-4_OPT.tar.gz) 
15 | checkpoint.
16 | 
17 | ## How to use MobileBERT in `transformers`
18 | 
19 | ```python
20 | from transformers import pipeline
21 | 
22 | fill_mask = pipeline(
23 | 	"fill-mask",
24 | 	model="google/mobilebert-uncased",
25 | 	tokenizer="google/mobilebert-uncased"
26 | )
27 | 
28 | print(
29 | 	fill_mask(f"HuggingFace is creating a {fill_mask.tokenizer.mask_token} that the community uses to solve NLP tasks.")
30 | )
31 | 
32 | ```
33 | 


--------------------------------------------------------------------------------
/model_cards/gpt2-large-README.md:
--------------------------------------------------------------------------------
1 | Test the full generation capabilities here: https://transformer.huggingface.co/doc/gpt2-large
2 | 


--------------------------------------------------------------------------------
/model_cards/gpt2-medium-README.md:
--------------------------------------------------------------------------------
1 | Test the full generation capabilities here: https://transformer.huggingface.co/doc/gpt2-large
2 | 


--------------------------------------------------------------------------------
/model_cards/gpt2-xl-README.md:
--------------------------------------------------------------------------------
1 | Test the whole generation capabilities here: https://transformer.huggingface.co/doc/gpt2-large
2 | 


--------------------------------------------------------------------------------
/model_cards/healx/gpt-2-pubmed-large/README.md:
--------------------------------------------------------------------------------
1 | GPT-2 (774M model) finetuned on 0.5m PubMed abstracts. Used in the [writemeanabstract.com](writemeanabstract.com) and the following preprint:
2 | 
3 | [Papanikolaou, Yannis, and Andrea Pierleoni. "DARE: Data Augmented Relation Extraction with GPT-2." arXiv preprint arXiv:2004.13845 (2020).](https://arxiv.org/abs/2004.13845)
4 | 


--------------------------------------------------------------------------------
/model_cards/healx/gpt-2-pubmed-medium/README.md:
--------------------------------------------------------------------------------
1 | GPT-2 (355M model) finetuned on 0.5m PubMed abstracts. Used in the [writemeanabstract.com](writemeanabstract.com) and the following preprint:
2 | 
3 | [Papanikolaou, Yannis, and Andrea Pierleoni. "DARE: Data Augmented Relation Extraction with GPT-2." arXiv preprint arXiv:2004.13845 (2020).](https://arxiv.org/abs/2004.13845)
4 | 


--------------------------------------------------------------------------------
/model_cards/huawei-noah/DynaBERT_MNLI/README.md:
--------------------------------------------------------------------------------
 1 | ## DynaBERT: Dynamic BERT with Adaptive Width and Depth
 2 | 
 3 | * DynaBERT can flexibly adjust the size and latency by selecting adaptive width and depth, and 
 4 | the subnetworks of it have competitive performances as other similar-sized compressed models.
 5 | The training process of DynaBERT includes first training a width-adaptive BERT and then 
 6 | allowing both adaptive width and depth using knowledge distillation. 
 7 | 
 8 | * This code is modified based on the repository developed by Hugging Face: [Transformers v2.1.1](https://github.com/huggingface/transformers/tree/v2.1.1), and is released in [GitHub](https://github.com/huawei-noah/Pretrained-Language-Model/tree/master/DynaBERT).
 9 | 
10 | ### Reference
11 | Lu Hou, Zhiqi Huang, Lifeng Shang, Xin Jiang, Xiao Chen, Qun Liu.
12 | [DynaBERT: Dynamic BERT with Adaptive Width and Depth](https://arxiv.org/abs/2004.04037).
13 | ```
14 | @inproceedings{hou2020dynabert,
15 |   title = {DynaBERT: Dynamic BERT with Adaptive Width and Depth},
16 |   author = {Lu Hou, Zhiqi Huang, Lifeng Shang, Xin Jiang, Xiao Chen, Qun Liu},  
17 |   booktitle = {Advances in Neural Information Processing Systems},
18 |   year = {2020}
19 | }
20 | ```
21 | 


--------------------------------------------------------------------------------
/model_cards/huawei-noah/DynaBERT_SST-2/README.md:
--------------------------------------------------------------------------------
 1 | ## DynaBERT: Dynamic BERT with Adaptive Width and Depth
 2 | 
 3 | * DynaBERT can flexibly adjust the size and latency by selecting adaptive width and depth, and 
 4 | the subnetworks of it have competitive performances as other similar-sized compressed models.
 5 | The training process of DynaBERT includes first training a width-adaptive BERT and then 
 6 | allowing both adaptive width and depth using knowledge distillation. 
 7 | 
 8 | * This code is modified based on the repository developed by Hugging Face: [Transformers v2.1.1](https://github.com/huggingface/transformers/tree/v2.1.1), and is released in [GitHub](https://github.com/huawei-noah/Pretrained-Language-Model/tree/master/DynaBERT).
 9 | 
10 | ### Reference
11 | Lu Hou, Zhiqi Huang, Lifeng Shang, Xin Jiang, Xiao Chen, Qun Liu.
12 | [DynaBERT: Dynamic BERT with Adaptive Width and Depth](https://arxiv.org/abs/2004.04037).
13 | ```
14 | @inproceedings{hou2020dynabert,
15 |   title = {DynaBERT: Dynamic BERT with Adaptive Width and Depth},
16 |   author = {Lu Hou, Zhiqi Huang, Lifeng Shang, Xin Jiang, Xiao Chen, Qun Liu},  
17 |   booktitle = {Advances in Neural Information Processing Systems},
18 |   year = {2020}
19 | }
20 | ```
21 | 


--------------------------------------------------------------------------------
/model_cards/iarfmoose/roberta-base-bulgarian/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: bg
 3 | ---
 4 | 
 5 | # RoBERTa-base-bulgarian
 6 | 
 7 | 
 8 | The RoBERTa model was originally introduced in [this paper](https://arxiv.org/abs/1907.11692). This is a version of [RoBERTa-base](https://huggingface.co/roberta-base) pretrained on Bulgarian text.
 9 | 
10 | ## Intended uses
11 | 
12 | This model can be used for cloze tasks (masked language modeling) or finetuned on other tasks in Bulgarian.
13 | 
14 | ## Limitations and bias
15 | 
16 | The training data is unfiltered text from the internet and may contain all sorts of biases.
17 | 
18 | ## Training data
19 | 
20 | This model was trained on the following data:
21 | - [bg_dedup from OSCAR](https://oscar-corpus.com/)
22 | - [Newscrawl 1 million sentences 2017 from Leipzig Corpora Collection](https://wortschatz.uni-leipzig.de/en/download/bulgarian)
23 | - [Wikipedia 1 million sentences 2016 from Leipzig Corpora Collection](https://wortschatz.uni-leipzig.de/en/download/bulgarian)
24 | 
25 | ## Training procedure
26 | 
27 | The model was pretrained using a masked language-modeling objective with dynamic masking as described [here](https://huggingface.co/roberta-base#preprocessing)
28 | 
29 | It was trained for 200k steps. The batch size was limited to 8 due to GPU memory limitations.
30 | 


--------------------------------------------------------------------------------
/model_cards/illuin/lepetit/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: fr
 3 | thumbnail: https://miro.medium.com/max/700/1*MoPnD6vA9wTHjdLfW7POyw.png
 4 | widget:
 5 | - text: "Le camembert LePetit c'est le <mask>."
 6 | - text: "Salut les <mask> ça va ?"
 7 | license: gpl-3.0
 8 | tags:
 9 | - masked-lm
10 | ---
11 | 
12 | # LePetit: A pre-training efficient and lightning fast French Language Model
13 | 
14 | See [blogpost](https://medium.com/illuin/lepetit-a-pre-training-efficient-and-lightning-fast-french-language-model-96495ad726b3)
15 | 
16 | 


--------------------------------------------------------------------------------
/model_cards/ipuneetrathore/bert-base-cased-finetuned-finBERT/README.md:
--------------------------------------------------------------------------------
1 | ## FinBERT
2 | 
3 | Code for importing and using this model is available [here](https://github.com/ipuneetrathore/BERT_models)
4 | 


--------------------------------------------------------------------------------
/model_cards/jannesg/bertsson/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: sv
 3 | ---
 4 | 
 5 | # BERTSSON Models
 6 | 
 7 | The models are trained on:
 8 | - Government Text
 9 | - Swedish Literature
10 | - Swedish News
11 | 
12 | Corpus size: Roughly 6B tokens.
13 | 
14 | The following models are currently available:
15 | 
16 | - **bertsson** - A BERT base model trained with the same hyperparameters as first published by Google.
17 | 
18 | All models are cased and trained with whole word masking.
19 | 
20 | Stay tuned for evaluations. 
21 | 


--------------------------------------------------------------------------------
/model_cards/jimregan/BERTreach/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: ga
 3 | tags:
 4 | - irish
 5 | ---
 6 | 
 7 | ## BERTreach
 8 | 
 9 | ([beirtreach](https://www.teanglann.ie/en/fgb/beirtreach) means 'oyster bed')
10 | 
11 | **Model size:** 84M
12 | 
13 | **Training data:** 
14 | * [PARSEME 1.2](https://gitlab.com/parseme/parseme_corpus_ga/-/blob/master/README.md) 
15 | * Newscrawl 300k portion of the [Leipzig Corpora](https://wortschatz.uni-leipzig.de/en/download/irish)
16 | * Private news corpus crawled with [Corpus Crawler](https://github.com/google/corpuscrawler)
17 | 
18 | (2125804 sentences, 47419062 tokens, as reckoned by wc)
19 | 
20 | ```
21 | from transformers import pipeline
22 | fill_mask = pipeline("fill-mask", model="jimregan/BERTreach", tokenizer="jimregan/BERTreach")
23 | ```
24 | 


--------------------------------------------------------------------------------
/model_cards/jme-p/shrugging-grace-tweet-classifier/README.md:
--------------------------------------------------------------------------------
 1 | # shrugging-grace/tweetclassifier
 2 | 
 3 | ## Model description
 4 | This model classifies tweets as either relating to the Covid-19 pandemic or not. 
 5 | 
 6 | ## Intended uses & limitations
 7 | It is intended to be used on tweets commenting on UK politics, in particular those trending with the #PMQs hashtag, as this refers to weekly Prime Ministers' Questions.  
 8 | 
 9 | #### How to use
10 | ``LABEL_0`` means that the tweet relates to Covid-19
11 | 
12 | ``LABEL_1`` means that the tweet does not relate to Covid-19
13 | 
14 | ## Training data
15 | The model was trained on 1000 tweets (with the "#PMQs'), which were manually labeled by the author. The tweets were collected between May-July 2020. 
16 | 
17 | ### BibTeX entry and citation info
18 | 
19 | This was based on a pretrained version of BERT. 
20 | 
21 | @article{devlin2018bert,
22 |   title={Bert: Pre-training of deep bidirectional transformers for language understanding},
23 |   author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
24 |   journal={arXiv preprint arXiv:1810.04805},
25 |   year={2018}
26 | }
27 | 


--------------------------------------------------------------------------------
/model_cards/jordimas/julibert/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: ca
 3 | ---
 4 | 
 5 | ## Introduction
 6 | 
 7 | 
 8 | Download the model here:
 9 | 
10 | * Catalan Roberta model: [julibert-2020-11-10.zip](https://www.softcatala.org/pub/softcatala/julibert/julibert-2020-11-10.zip)
11 | 
12 | ## What's this?
13 | 
14 | Source code: https://github.com/Softcatala/julibert
15 | 
16 | * Corpus: Oscar Catalan Corpus (3,8G)
17 | * Model type: Roberta
18 | * Vocabulary size: 50265
19 | * Steps: 500000
20 | 
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/model_cards/julien-c/bert-xsmall-dummy/README.md:
--------------------------------------------------------------------------------
 1 | ## How to build a dummy model
 2 | 
 3 | 
 4 | ```python
 5 | from transformers BertConfig, BertForMaskedLM, BertTokenizer, TFBertForMaskedLM
 6 | 
 7 | SMALL_MODEL_IDENTIFIER = "julien-c/bert-xsmall-dummy"
 8 | DIRNAME = "./bert-xsmall-dummy"
 9 | 
10 | config = BertConfig(10, 20, 1, 1, 40)
11 | 
12 | model = BertForMaskedLM(config)
13 | model.save_pretrained(DIRNAME)
14 | 
15 | tf_model = TFBertForMaskedLM.from_pretrained(DIRNAME, from_pt=True)
16 | tf_model.save_pretrained(DIRNAME)
17 | 
18 | # Slightly different for tokenizer.
19 | # tokenizer = BertTokenizer.from_pretrained(DIRNAME)
20 | # tokenizer.save_pretrained()
21 | ```
22 | 


--------------------------------------------------------------------------------
/model_cards/keshan/SinhalaBERTo/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: si
 3 | tags:
 4 | - SinhalaBERTo
 5 | - Sinhala
 6 | - roberta
 7 | datasets:
 8 | - oscar
 9 | ---
10 | ### Overview
11 | 
12 | This is a slightly smaller model trained on [OSCAR](https://oscar-corpus.com/) Sinhala dedup dataset. As Sinhala is one of those low resource languages, there are only a handful of models been trained. So, this would be a great place to start training for more downstream tasks. 
13 | 
14 | ## Model Specification
15 | 
16 | 
17 | The model chosen for training is [Roberta](https://arxiv.org/abs/1907.11692) with the following specifications:
18 |  1. vocab_size=52000
19 |  2. max_position_embeddings=514
20 |  3. num_attention_heads=12
21 |  4. num_hidden_layers=6
22 |  5. type_vocab_size=1
23 |  
24 | ## How to Use
25 | You can use this model directly with a pipeline for masked language modeling:
26 | 
27 | ```py
28 | from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline
29 | 
30 | model = BertForMaskedLM.from_pretrained("keshan/SinhalaBERTo")
31 | tokenizer = BertTokenizer.from_pretrained("keshan/SinhalaBERTo")
32 | 
33 | fill_mask = pipeline('fill-mask', model=model, tokenizer=tokenizer)
34 | 
35 | fill_mask("මම ගෙදර <mask>.")
36 | 
37 | ```
38 | 


--------------------------------------------------------------------------------
/model_cards/kuppuluri/telugu_bertu/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: te
 3 | ---
 4 | # telugu_bertu
 5 | 
 6 | ## Model description
 7 | 
 8 | This model is a BERT MLM model trained on Telugu.
 9 | 
10 | ## Intended uses & limitations
11 | 
12 | #### How to use
13 | 
14 | ```python
15 | from transformers import AutoModelWithLMHead, AutoTokenizer, pipeline
16 | tokenizer = AutoTokenizer.from_pretrained("kuppuluri/telugu_bertu",
17 |                                           clean_text=False,
18 |                                           handle_chinese_chars=False,
19 |                                           strip_accents=False,
20 |                                           wordpieces_prefix='##')
21 | model = AutoModelWithLMHead.from_pretrained("kuppuluri/telugu_bertu")
22 | fill_mask = pipeline("fill-mask", model=model, tokenizer=tokenizer)
23 | results = fill_mask("మక్దూంపల్లి పేరుతో చాలా [MASK] ఉన్నాయి.")
24 | ```
25 | 


--------------------------------------------------------------------------------
/model_cards/kuppuluri/telugu_bertu_ner/README.md:
--------------------------------------------------------------------------------
 1 | # Named Entity Recognition Model for Telugu
 2 | 
 3 | #### How to use
 4 | 
 5 | ```python
 6 | from simpletransformers.ner import NERModel
 7 | model = NERModel('bert',
 8 |                  'kuppuluri/telugu_bertu_ner',
 9 |                  labels=[
10 |                      'B-PERSON', 'I-ORG', 'B-ORG', 'I-LOC', 'B-MISC',
11 |                      'I-MISC', 'I-PERSON', 'B-LOC', 'O'
12 |                  ],
13 |                  use_cuda=False,
14 |                  args={"use_multiprocessing": False})
15 | 
16 | text = "విరాట్ కోహ్లీ కూడా అదే నిర్లక్ష్యాన్ని ప్రదర్శించి కేవలం ఒక పరుగుకే రనౌటై పెవిలియన్ చేరాడు ."
17 | results = model.predict([text])
18 | ```
19 | 
20 | ## Training data
21 | 
22 | Training data is from https://github.com/anikethjr/NER_Telugu
23 | 
24 | ## Eval results
25 | 
26 | On the test set my results were
27 | 
28 | eval_loss = 0.0004407190410447974
29 | 
30 | f1_score = 0.999519076627124
31 | 
32 | precision = 0.9994389677005691
33 | 
34 | recall = 0.9995991983967936
35 | 
36 | 


--------------------------------------------------------------------------------
/model_cards/kuppuluri/telugu_bertu_pos/README.md:
--------------------------------------------------------------------------------
 1 | # Part of Speech tagging Model for Telugu
 2 | 
 3 | #### How to use
 4 | 
 5 | ```python
 6 | from simpletransformers.ner import NERModel
 7 | model = NERModel('bert',
 8 |                  'kuppuluri/telugu_bertu_pos',
 9 |                  args={"use_multiprocessing": False},
10 |                  labels=[
11 |                      'QC', 'JJ', 'NN', 'QF', 'RDP', 'O',
12 |                      'NNO', 'PRP', 'RP', 'VM', 'WQ',
13 |                      'PSP', 'UT', 'CC', 'INTF', 'SYMP',
14 |                      'NNP', 'INJ', 'SYM', 'CL', 'QO',
15 |                      'DEM', 'RB', 'NST', ],
16 |                  use_cuda=False)
17 | 
18 | text = "విరాట్ కోహ్లీ కూడా అదే నిర్లక్ష్యాన్ని ప్రదర్శించి కేవలం ఒక పరుగుకే రనౌటై పెవిలియన్ చేరాడు ."
19 | results = model.predict([text])
20 | ```
21 | 
22 | ## Training data
23 | 
24 | Training data is from https://github.com/anikethjr/NER_Telugu
25 | 
26 | ## Eval results
27 | 
28 | On the test set my results were
29 | 
30 | eval_loss = 0.0036797842364565416
31 | 
32 | f1_score = 0.9983795127912227
33 | 
34 | precision = 0.9984325602401637
35 | 
36 | recall = 0.9983264709788816
37 | 


--------------------------------------------------------------------------------
/model_cards/kuppuluri/telugu_bertu_tydiqa/README.md:
--------------------------------------------------------------------------------
 1 | # Telugu Question-Answering model trained on Tydiqa dataset from Google
 2 | 
 3 | #### How to use
 4 | 
 5 | ```python
 6 | from transformers.pipelines import pipeline, AutoModelForQuestionAnswering, AutoTokenizer
 7 | model = AutoModelForQuestionAnswering.from_pretrained(model_name)
 8 | tokenizer = AutoTokenizer.from_pretrained("kuppuluri/telugu_bertu_tydiqa",
 9 |                                           clean_text=False,
10 |                                           handle_chinese_chars=False,
11 |                                           strip_accents=False,
12 |                                           wordpieces_prefix='##')
13 | nlp = pipeline('question-answering', model=model, tokenizer=tokenizer)
14 | result = nlp({'question': question, 'context': context})
15 | ```
16 | 
17 | ## Training data
18 | I used Tydiqa Telugu data from Google https://github.com/google-research-datasets/tydiqa
19 | 


--------------------------------------------------------------------------------
/model_cards/lanwuwei/GigaBERT-v3-Arabic-and-English/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language:
 3 | - en
 4 | - ar
 5 | datasets:
 6 | - gigaword
 7 | - oscar
 8 | - wikipedia
 9 | ---
10 | 
11 | ## GigaBERT-v3
12 | GigaBERT-v3 is a customized bilingual BERT for English and Arabic. It was pre-trained in a large-scale corpus (Gigaword+Oscar+Wikipedia) with ~10B tokens, showing state-of-the-art zero-shot transfer performance from English to Arabic on information extraction (IE) tasks. More details can be found in the following paper:
13 | 
14 | 	@inproceedings{lan2020gigabert,
15 | 	  author     = {Lan, Wuwei and Chen, Yang and Xu, Wei and Ritter, Alan},
16 |   	  title      = {GigaBERT: Zero-shot Transfer Learning from English to Arabic},
17 |   	  booktitle  = {Proceedings of The 2020 Conference on Empirical Methods on Natural Language Processing (EMNLP)},
18 |   	  year       = {2020}
19 |   	} 
20 | 
21 | ## Usage
22 | ```
23 | from transformers import *
24 | tokenizer = BertTokenizer.from_pretrained("lanwuwei/GigaBERT-v3-Arabic-and-English", do_lower_case=True)
25 | model = BertForTokenClassification.from_pretrained("lanwuwei/GigaBERT-v3-Arabic-and-English")
26 | ```
27 | More code examples can be found [here](https://github.com/lanwuwei/GigaBERT).
28 | 


--------------------------------------------------------------------------------
/model_cards/lvwerra/bert-imdb/README.md:
--------------------------------------------------------------------------------
 1 | # BERT-IMDB
 2 | 
 3 | ## What is it?
 4 | BERT (`bert-large-cased`) trained for sentiment classification on the [IMDB dataset](https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews).
 5 | 
 6 | ## Training setting
 7 | 
 8 | The model was trained on 80% of the IMDB dataset for sentiment classification for three epochs with a learning rate of `1e-5` with the `simpletransformers` library. The library uses a learning rate schedule.
 9 | 
10 | ## Result
11 | The model achieved 90% classification accuracy on the validation set.
12 | 
13 | ## Reference
14 | The full experiment is available in the [tlr repo](https://lvwerra.github.io/trl/03-bert-imdb-training/).
15 | 


--------------------------------------------------------------------------------
/model_cards/lvwerra/gpt2-imdb/README.md:
--------------------------------------------------------------------------------
 1 | # GPT2-IMDB
 2 | 
 3 | ## What is it?
 4 | A GPT2 (`gpt2`) language model fine-tuned on the [IMDB dataset](https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews).
 5 | 
 6 | ## Training setting
 7 | 
 8 | The GPT2 language model was fine-tuned for 1 epoch on the IMDB dataset. All comments were joined into a single text file separated by the EOS token:
 9 | 
10 | ```
11 | import pandas as pd
12 | df = pd.read_csv("imdb-dataset.csv")
13 | imdb_str = " <|endoftext|> ".join(df['review'].tolist())
14 | 
15 | with open ('imdb.txt', 'w') as f:
16 |     f.write(imdb_str)
17 | ```
18 | 
19 | To train the model the `run_language_modeling.py` script in the `transformer` library was used:
20 | 
21 | ```
22 | python run_language_modeling.py 
23 | 	--train_data_file imdb.txt 
24 | 	--output_dir gpt2-imdb 
25 | 	--model_type gpt2 
26 | 	--model_name_or_path gpt2
27 | ```
28 | 


--------------------------------------------------------------------------------
/model_cards/lvwerra/gpt2-medium-taboo/README.md:
--------------------------------------------------------------------------------
 1 | # GPT-2 (medium) Taboo
 2 | 
 3 | ## What is it?
 4 | A fine-tuned GPT-2 version for Taboo cards generation.
 5 | 
 6 | ## Training setting
 7 | 
 8 | The model was trained on ~900 Taboo cards in the following format for 100 epochs:
 9 | ```
10 | Describe the word Glitch without using the words Problem, Unexpected, Technology, Minor, Outage.
11 | ````
12 | 
13 | 


--------------------------------------------------------------------------------
/model_cards/lysandre/arxiv-nlp/README.md:
--------------------------------------------------------------------------------
1 | # ArXiv-NLP GPT-2 checkpoint
2 | 
3 | This is a GPT-2 small checkpoint for PyTorch. It is the official `gpt2-small` fine-tuned to ArXiv paper on the computational linguistics field.
4 | 
5 | ## Training data
6 | 
7 | This model was trained on a subset of ArXiv papers that were parsed from PDF to txt. The resulting data is made of 80MB of text from the computational linguistics (cs.CL) field.


--------------------------------------------------------------------------------
/model_cards/lysandre/arxiv/README.md:
--------------------------------------------------------------------------------
1 | # ArXiv GPT-2 checkpoint
2 | 
3 | This is a GPT-2 small checkpoint for PyTorch. It is the official `gpt2-small` finetuned to ArXiv paper on physics fields.
4 | 
5 | ## Training data
6 | 
7 | This model was trained on a subset of ArXiv papers that were parsed from PDF to txt. The resulting data is made of 130MB of text, mostly from quantum physics (quant-ph) and other physics sub-fields.
8 | 


--------------------------------------------------------------------------------
/model_cards/m3hrdadfi/bert2bert-fa-news-headline/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: fa
 3 | license: apache-2.0
 4 | tags:
 5 | - summarization
 6 | ---
 7 | 
 8 | A Bert2Bert model on VoA Persian Corpus (a medium-sized corpus of 7.9 million words, 2003-2008) generates headlines. The model achieved a 25.30 ROUGE-2 score. 
 9 |  
10 | For more detail, please follow the [News Headline Generation](https://github.com/m3hrdadfi/news-headline-generation) repo. 
11 | 
12 | 
13 | ## Eval results
14 | The following table summarizes the ROUGE scores obtained by the Bert2Bert model.
15 | 
16 | |    %    | Precision | Recall | FMeasure |
17 | |:-------:|:---------:|:------:|:--------:|
18 | | ROUGE-1 |   43.78   |  45.52 |   43.54  |
19 | | ROUGE-2 |   24.50   | 25.30* |   24.24  |
20 | | ROUGE-L |   41.20   |  42.22 |   40.76  |
21 | 
22 | 
23 | ## Questions?
24 | Post a Github issue on the [News Headline Generation](https://github.com/hooshvare/news-headline-generation/issues) repo.
25 | 


--------------------------------------------------------------------------------
/model_cards/m3hrdadfi/bert2bert-fa-wiki-summary/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: fa
 3 | license: apache-2.0
 4 | tags:
 5 | - summarization
 6 | ---
 7 | 
 8 | A Bert2Bert model on the Wiki Summary dataset to summarize articles. The model achieved an 8.47 ROUGE-2 score. 
 9 |  
10 | For more detail, please follow the [Wiki Summary](https://github.com/m3hrdadfi/wiki-summary) repo. 
11 | 
12 | 
13 | ## Eval results
14 | The following table summarizes the ROUGE scores obtained by the Bert2Bert model.
15 | 
16 | |    %    | Precision | Recall | FMeasure |
17 | |:-------:|:---------:|:------:|:--------:|
18 | | ROUGE-1 |   28.14   |  30.86 |   27.34  |
19 | | ROUGE-2 |   07.12   | 08.47* |   07.10  |
20 | | ROUGE-L |   28.49   |  25.87 |   25.50  |
21 | 
22 | 
23 | ## Questions?
24 | Post a Github issue on the [Wiki Summary](https://github.com/m3hrdadfi/wiki-summary/issues) repo.
25 | 


--------------------------------------------------------------------------------
/model_cards/monsoon-nlp/dv-wave/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: dv
 3 | ---
 4 | 
 5 | # dv-wave
 6 | 
 7 | This is a second attempt at a Dhivehi language model trained with
 8 | Google Research's [ELECTRA](https://github.com/google-research/electra).
 9 | 
10 | Tokenization and pre-training CoLab: https://colab.research.google.com/drive/1ZJ3tU9MwyWj6UtQ-8G7QJKTn-hG1uQ9v?usp=sharing
11 | 
12 | Using SimpleTransformers to classify news https://colab.research.google.com/drive/1KnyQxRNWG_yVwms_x9MUAqFQVeMecTV7?usp=sharing
13 | 
14 | V1: similar performance to mBERT on news classification task after finetuning for 3 epochs (52%)
15 | 
16 | V2: fixed tokenizers ```do_lower_case=False``` and ```strip_accents=False``` to preserve vowel signs of Dhivehi
17 |   dv-wave: 89% to mBERT: 52%
18 | 
19 | ## Corpus
20 | 
21 | Trained on @Sofwath's 307MB corpus of Dhivehi text: https://github.com/Sofwath/DhivehiDatasets - this repo also contains the news classification task CSV
22 | 
23 | [OSCAR](https://oscar-corpus.com/) was considered but has not been added to pretraining; as of
24 | this writing their web crawl has 126MB of Dhivehi text (79MB deduped).
25 | 
26 | ## Vocabulary
27 | 
28 | Included as vocab.txt in the upload - vocab_size is 29874
29 | 


--------------------------------------------------------------------------------
/model_cards/mrm8488/GuaPeTe-2-tiny/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: es
3 | widget:
4 | - text: "Murcia es la huerta de Europa porque"
5 | ---
6 | 
7 | #GuaPeTe-2-tiny: A proof of concept tiny GPT-2 like model trained on Spanish Wikipedia corpus
8 | 


--------------------------------------------------------------------------------
/model_cards/mrm8488/RoBERTinha/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: gl
3 | widget:
4 | - text: "Galicia é unha <mask> autónoma española."
5 | - text: "A lingua oficial de Galicia é o <mask>."
6 | ---
7 | 
8 | # RoBERTinha: RoBERTa-like Language model trained on OSCAR Galician corpus
9 | 


--------------------------------------------------------------------------------
/model_cards/mrm8488/RoBasquERTa/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: eu
3 | widget:
4 | - text: "Euskara da Euskal Herriko <mask> ofiziala"
5 | - text: "Gaur egun, Euskadik Espainia osoko ekonomia <mask> du"
6 | ---
7 | 
8 | # RoBasquERTa: RoBERTa-like Language model trained on OSCAR Basque corpus
9 | 


--------------------------------------------------------------------------------
/model_cards/mrm8488/RuPERTa-base-finetuned-pawsx-es/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: es
 3 | datasets:
 4 | - xtreme
 5 | widget:
 6 | - text: "En 2009 se mudó a Filadelfia y en la actualidad vive en Nueva York. Se mudó nuevamente a Filadelfia en 2009 y ahora vive en la ciudad de Nueva York."
 7 | ---
 8 | 
 9 | # RuPERTa-base fine-tuned on PAWS-X-es for Paraphrase Identification
10 | 


--------------------------------------------------------------------------------
/model_cards/mrm8488/RuPERTa-base-finetuned-squadv1/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: es
3 | datasets:
4 | - squad
5 | ---
6 | 


--------------------------------------------------------------------------------
/model_cards/mrm8488/RuPERTa-base-finetuned-squadv2/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: es
3 | datasets:
4 | - squad_v2
5 | ---
6 | 


--------------------------------------------------------------------------------
/model_cards/mrm8488/bert-base-german-dbmdz-cased-finetuned-pawsx-de/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: de
 3 | datasets:
 4 | - xtreme
 5 | widget:
 6 | - text: "Winarsky ist Mitglied des IEEE, Phi Beta Kappa, des ACM und des Sigma Xi. Winarsky ist Mitglied des ACM, des IEEE, der Phi Beta Kappa und der Sigma Xi."
 7 | ---
 8 | 
 9 | # bert-base-german-dbmdz-cased fine-tuned on PAWS-X-de for Paraphrase Identification
10 | 


--------------------------------------------------------------------------------
/model_cards/mrm8488/camembert-base-finetuned-pawsx-fr/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: fr
 3 | datasets:
 4 | - xtreme
 5 | widget:
 6 | - text: "La première série a été mieux reçue par la critique que la seconde. La seconde série a été bien accueillie par la critique, mieux que la première."
 7 | ---
 8 | 
 9 | # Camembert-base fine-tuned on PAWS-X-fr for Paraphrase Identification
10 | 


--------------------------------------------------------------------------------
/model_cards/mrm8488/electricidad-base-finetuned-pawsx-es/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: es
 3 | datasets:
 4 | - xtreme
 5 | widget:
 6 | - text: "El río Tabaci es una vertiente del río Leurda en Rumania. El río Leurda es un afluente del río Tabaci en Rumania."
 7 | ---
 8 | 
 9 | # Electricidad-base fine-tuned on PAWS-X-es for Paraphrase Identification
10 | 


--------------------------------------------------------------------------------
/model_cards/mrm8488/gpt2-finetuned-recipes-cooking/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: en
3 | thumbnail:
4 | widget:
5 | - text: "HuggingFace Cake:"
6 | ---
7 | 


--------------------------------------------------------------------------------
/model_cards/mrm8488/gpt2-finetuned-recipes-cooking_v2/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: en
3 | thumbnail:
4 | widget:
5 | - text: "HuggingFace Cake:"
6 | ---
7 | 


--------------------------------------------------------------------------------
/model_cards/mys/electra-base-turkish-cased-ner/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: tr
 3 | ---
 4 | 
 5 | ## What is this
 6 | 
 7 | A NER model for Turkish with 48 categories trained on the dataset [Shrinked TWNERTC Turkish NER Data](https://www.kaggle.com/behcetsenturk/shrinked-twnertc-turkish-ner-data-by-kuzgunlar) by Behçet Şentürk, which is itself a filtered and cleaned version of the following automatically labeled dataset:
 8 | 
 9 | > Sahin, H. Bahadir; Eren, Mustafa Tolga; Tirkaz, Caglar; Sonmez, Ozan; Yildiz, Eray (2017), “English/Turkish Wikipedia Named-Entity Recognition and Text Categorization Dataset”, Mendeley Data, v1 http://dx.doi.org/10.17632/cdcztymf4k.1
10 | 
11 | ## Backbone model
12 | 
13 | The backbone model is [electra-base-turkish-cased-discriminator](https://huggingface.co/dbmdz/electra-base-turkish-cased-discriminator), and I finetuned it for token classification.
14 | 
15 | I'm continuing to figure out if it is possible to improve accuracy with this dataset, but it is already usable for non-critic applications. You can reach out to me on [Twitter](https://twitter.com/myusufsarigoz) for discussions and issues. 
16 | I will also release a notebook to finetune NER models with Shrinked TWNERTC as well as sample inference code to demonstrate what's possible with this model.
17 | 


--------------------------------------------------------------------------------
/model_cards/ncoop57/bart-base-code-summarizer-java-v0/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - summarization
4 | 
5 | license: mit
6 | ---
7 | 
8 | ## ncoop57/bart-base-code-summarizer-java-v0
9 | 


--------------------------------------------------------------------------------
/model_cards/nikokons/gpt2-greek/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: el
3 | ---
4 | 
5 | ## gpt2-greek
6 | 


--------------------------------------------------------------------------------
/model_cards/nyu-mll/roberta-base-100M-1/README.md:
--------------------------------------------------------------------------------
1 | ../roberta_1M_to_1B/README.md


--------------------------------------------------------------------------------
/model_cards/nyu-mll/roberta-base-100M-2/README.md:
--------------------------------------------------------------------------------
1 | ../roberta_1M_to_1B/README.md


--------------------------------------------------------------------------------
/model_cards/nyu-mll/roberta-base-100M-3/README.md:
--------------------------------------------------------------------------------
1 | ../roberta_1M_to_1B/README.md


--------------------------------------------------------------------------------
/model_cards/nyu-mll/roberta-base-10M-1/README.md:
--------------------------------------------------------------------------------
1 | ../roberta_1M_to_1B/README.md


--------------------------------------------------------------------------------
/model_cards/nyu-mll/roberta-base-10M-2/README.md:
--------------------------------------------------------------------------------
1 | ../roberta_1M_to_1B/README.md


--------------------------------------------------------------------------------
/model_cards/nyu-mll/roberta-base-10M-3/README.md:
--------------------------------------------------------------------------------
1 | ../roberta_1M_to_1B/README.md


--------------------------------------------------------------------------------
/model_cards/nyu-mll/roberta-base-1B-1/README.md:
--------------------------------------------------------------------------------
1 | ../roberta_1M_to_1B/README.md


--------------------------------------------------------------------------------
/model_cards/nyu-mll/roberta-base-1B-2/README.md:
--------------------------------------------------------------------------------
1 | ../roberta_1M_to_1B/README.md


--------------------------------------------------------------------------------
/model_cards/nyu-mll/roberta-base-1B-3/README.md:
--------------------------------------------------------------------------------
1 | ../roberta_1M_to_1B/README.md


--------------------------------------------------------------------------------
/model_cards/nyu-mll/roberta-med-small-1M-1/README.md:
--------------------------------------------------------------------------------
1 | ../roberta_1M_to_1B/README.md


--------------------------------------------------------------------------------
/model_cards/nyu-mll/roberta-med-small-1M-2/README.md:
--------------------------------------------------------------------------------
1 | ../roberta_1M_to_1B/README.md


--------------------------------------------------------------------------------
/model_cards/nyu-mll/roberta-med-small-1M-3/README.md:
--------------------------------------------------------------------------------
1 | ../roberta_1M_to_1B/README.md


--------------------------------------------------------------------------------
/model_cards/patrickvonplaten/bert2bert_cnn_daily_mail/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: en
 3 | license: apache-2.0
 4 | datasets:
 5 | - cnn_dailymail
 6 | tags:
 7 | - summarization
 8 | ---
 9 | 
10 | Bert2Bert Summarization with 🤗EncoderDecoder Framework
11 | This model is a warm-started *BERT2BERT* model fine-tuned on the *CNN/Dailymail* summarization dataset.
12 | 
13 | The model achieves a **18.22** ROUGE-2 score on *CNN/Dailymail*'s test dataset.
14 | 
15 | For more details on how the model was fine-tuned, please refer to 
16 | [this](https://colab.research.google.com/drive/1Ekd5pUeCX7VOrMx94_czTkwNtLN32Uyu?usp=sharing) notebook.
17 | 


--------------------------------------------------------------------------------
/model_cards/patrickvonplaten/roberta_shared_bbc_xsum/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: en
 3 | license: apache-2.0
 4 | datasets:
 5 | - xsum
 6 | tags:
 7 | - summarization
 8 | ---
 9 | 
10 | Shared RoBERTa2RoBERTa Summarization with 🤗EncoderDecoder Framework
11 | This model is a warm-started *RoBERTaShared* model fine-tuned on the *BBC XSum* summarization dataset.
12 | 
13 | The model achieves a **16.89** ROUGE-2 score on *BBC XSUM*'s test dataset.
14 | 
15 | For more details on how the model was fine-tuned, please refer to 
16 | [this](https://colab.research.google.com/drive/1Ekd5pUeCX7VOrMx94_czTkwNtLN32Uyu?usp=sharing) notebook.
17 | 


--------------------------------------------------------------------------------
/model_cards/pedropei/question-intimacy/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language:
3 | - en
4 | inference: false
5 | ---
6 | 


--------------------------------------------------------------------------------
/model_cards/pradhyra/AWSBlogBert/README.md:
--------------------------------------------------------------------------------
1 | This model is pre-trained on blog articles from AWS Blogs.
2 | 
3 | ## Pre-training corpora
4 | The input text contains around 3000 blog articles on [AWS Blogs website](https://aws.amazon.com/blogs/) technical subject matter including AWS products, tools and tutorials. 
5 | 
6 | ## Pre-training details
7 | I picked a Roberta architecture for masked language modeling (6-layer, 768-hidden, 12-heads, 82M parameters) and its corresponding ByteLevelBPE tokenization strategy. I then followed HuggingFace's Transformers [blog post](https://huggingface.co/blog/how-to-train) to train the model.
8 | I chose to follow the following training set-up: 28k training steps with batches of 64 sequences of length 512 with an initial learning rate 5e-5. The model acheived a training loss of 3.6 on the MLM task over 10 epochs.
9 | 


--------------------------------------------------------------------------------
/model_cards/pranavpsv/gpt2-genre-story-generator/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # GPT2 Genre Based Story Generator
 3 | 
 4 | ## Model description
 5 | 
 6 | GPT2 fine-tuned on genre-based story generation.
 7 | 
 8 | ## Intended uses
 9 | 
10 | Used to generate stories based on user inputted genre and starting prompts.
11 | 
12 | ## How to use
13 | 
14 | #### Supported Genres
15 | superhero, action, drama, horror, thriller, sci_fi
16 | #### Input text format
17 | \<BOS> \<genre> Some optional text...
18 | 
19 | **Example**: \<BOS> \<sci_fi> After discovering time travel,
20 | 
21 | ```python
22 | # Example of usage
23 | from transformers import pipeline
24 | 
25 | story_gen = pipeline("text-generation", "pranavpsv/gpt2-genre-story-generator")
26 | print(story_gen("<BOS> <superhero> Batman"))
27 | 
28 | ```
29 | 
30 | ## Training data
31 | 
32 | Initialized with pre-trained weights of "gpt2" checkpoint. Fine-tuned the model on stories of various genres.
33 | 


--------------------------------------------------------------------------------
/model_cards/rdenadai/BR_BERTo/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: pt
 3 | tags:
 4 | - portuguese
 5 | - brazil
 6 | - pt_BR
 7 | widget:
 8 | - text: gostei muito dessa <mask>
 9 | ---
10 | 
11 | # BR_BERTo
12 | 
13 | Portuguese (Brazil) model for text inference.
14 | 
15 | ## Params
16 | 
17 | Trained on a corpus of 6_993_330 sentences.
18 | 
19 | - Vocab size: 150_000
20 | - RobertaForMaskedLM  size : 512
21 | - Num train epochs: 3
22 | - Time to train: ~10days (on GCP with a Nvidia T4)
23 | 
24 | I follow the great tutorial from HuggingFace team:
25 | 
26 | [How to train a new language model from scratch using Transformers and Tokenizers](https://huggingface.co/blog/how-to-train)
27 | 
28 | More infor here:
29 | 
30 | [BR_BERTo](https://github.com/rdenadai/BR-BERTo)
31 | 


--------------------------------------------------------------------------------
/model_cards/roberta-large-mnli-README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | license: mit
 3 | widget:
 4 | - text: "I like you. </s></s> I love you."
 5 | ---
 6 | 
 7 | 
 8 | ## roberta-large-mnli
 9 | 
10 | Trained by Facebook, [original source](https://github.com/pytorch/fairseq/tree/master/examples/roberta)
11 | 
12 | ```bibtex
13 | @article{liu2019roberta,
14 |     title = {RoBERTa: A Robustly Optimized BERT Pretraining Approach},
15 |     author = {Yinhan Liu and Myle Ott and Naman Goyal and Jingfei Du and
16 |               Mandar Joshi and Danqi Chen and Omer Levy and Mike Lewis and
17 |               Luke Zettlemoyer and Veselin Stoyanov},
18 |     journal={arXiv preprint arXiv:1907.11692},
19 |     year = {2019},
20 | }
21 | ```
22 | 
23 | 


--------------------------------------------------------------------------------
/model_cards/sarnikowski/electra-small-discriminator-da-256-cased/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: da
 3 | license: cc-by-4.0
 4 | ---
 5 | 
 6 | # Danish ELECTRA small (cased)
 7 | 
 8 | An [ELECTRA](https://arxiv.org/abs/2003.10555) model pretrained on a custom Danish corpus (~17.5gb). 
 9 | For details regarding data sources and training procedure, along with benchmarks on downstream tasks, go to: https://github.com/sarnikowski/danish_transformers/tree/main/electra 
10 | 
11 | ## Usage
12 | 
13 | ```python
14 | from transformers import AutoTokenizer, AutoModel
15 | 
16 | tokenizer = AutoTokenizer.from_pretrained("sarnikowski/electra-small-discriminator-da-256-cased")
17 | model = AutoModel.from_pretrained("sarnikowski/electra-small-discriminator-da-256-cased")
18 | ```
19 | 
20 | ## Questions?
21 | 
22 | If you have any questions feel free to open an issue on the [danish_transformers](https://github.com/sarnikowski/danish_transformers) repository, or send an email to p.sarnikowski@gmail.com
23 | 


--------------------------------------------------------------------------------
/model_cards/schmidek/electra-small-cased/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: en
 3 | license: apache-2.0
 4 | ---
 5 | 
 6 | ## ELECTRA-small-cased
 7 | 
 8 | This is a cased version of `google/electra-small-discriminator`, trained on the
 9 | [OpenWebText corpus](https://skylion007.github.io/OpenWebTextCorpus/).
10 | 
11 | Uses the same tokenizer and vocab from `bert-base-cased`
12 | 


--------------------------------------------------------------------------------
/model_cards/severinsimmler/literary-german-bert/kfold.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/model_cards/severinsimmler/literary-german-bert/kfold.png


--------------------------------------------------------------------------------
/model_cards/severinsimmler/literary-german-bert/prosa-jahre.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/model_cards/severinsimmler/literary-german-bert/prosa-jahre.png


--------------------------------------------------------------------------------
/model_cards/shrugging-grace/tweetclassifier/README.md:
--------------------------------------------------------------------------------
 1 | # shrugging-grace/tweetclassifier
 2 | 
 3 | ## Model description
 4 | This model classifies tweets as either relating to the Covid-19 pandemic or not. 
 5 | 
 6 | ## Intended uses & limitations
 7 | It is intended to be used on tweets commenting on UK politics, in particular those trending with the #PMQs hashtag, as this refers to weekly Prime Ministers' Questions.  
 8 | 
 9 | #### How to use
10 | ``LABEL_0`` means that the tweet relates to Covid-19
11 | 
12 | ``LABEL_1`` means that the tweet does not relate to Covid-19
13 | 
14 | ## Training data
15 | The model was trained on 1000 tweets (with the "#PMQs'), which were manually labeled by the author. The tweets were collected between May-July 2020. 
16 | 
17 | ### BibTeX entry and citation info
18 | 
19 | This was based on a pretrained version of BERT. 
20 | 
21 | @article{devlin2018bert,
22 |   title={Bert: Pre-training of deep bidirectional transformers for language understanding},
23 |   author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
24 |   journal={arXiv preprint arXiv:1810.04805},
25 |   year={2018}
26 | }
27 | 


--------------------------------------------------------------------------------
/model_cards/spentaur/yelp/README.md:
--------------------------------------------------------------------------------
 1 | # DistilBERT Yelp Review Sentiment
 2 | This model is used for sentiment analysis on english yelp reviews.  
 3 | It is a DistilBERT model trained on 1 million reviews from the yelp open dataset.  
 4 | It is a regression model, with outputs in the range of ~-2 to ~2. With -2 being 1 star and 2 being 5 stars.  
 5 | It was trained using the [ktrain](https://github.com/amaiya/ktrain) because of it's ease of use.
 6 | 
 7 | Example use:
 8 | 
 9 | ```
10 | tokenizer = AutoTokenizer.from_pretrained(
11 |     'distilbert-base-uncased', use_fast=True)
12 | model = TFAutoModelForSequenceClassification.from_pretrained(
13 |     "spentaur/yelp")
14 |     
15 | review = "This place is great!"
16 | input_ids = tokenizer.encode(review, return_tensors='tf')
17 | pred = model(input_ids)[0][0][0].numpy()
18 | # pred should === 1.9562385
19 | ```
20 | 


--------------------------------------------------------------------------------
/model_cards/stas/tiny-wmt19-en-de/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language:
 3 | - en
 4 | - de
 5 | thumbnail:
 6 | tags:
 7 | - wmt19
 8 | - testing
 9 | license: apache-2.0
10 | datasets:
11 | - wmt19
12 | metrics:
13 | - bleu
14 | ---
15 | 
16 | # Tiny FSMT
17 | 
18 | This is a tiny model that is used in the `transformers` test suite. It doesn't do anything useful, other than testing that `FSMT` works.
19 | 


--------------------------------------------------------------------------------
/model_cards/surajp/albert-base-sanskrit/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: sa
 3 | ---
 4 | 
 5 | 
 6 | # ALBERT-base-Sanskrit
 7 | 
 8 | 
 9 | Explaination Notebook Colab: [SanskritALBERT.ipynb](https://colab.research.google.com/github/parmarsuraj99/suraj-parmar/blob/master/_notebooks/2020-05-02-SanskritALBERT.ipynb)
10 | 
11 | Size of the model is **46MB**
12 | 
13 | Example of usage:
14 | 
15 | ```
16 | tokenizer = AutoTokenizer.from_pretrained("surajp/albert-base-sanskrit")
17 | model = AutoModel.from_pretrained("surajp/albert-base-sanskrit")
18 | 
19 | enc=tokenizer.encode("ॐ सर्वे भवन्तु सुखिनः सर्वे सन्तु निरामयाः । सर्वे भद्राणि पश्यन्तु मा कश्चिद्दुःखभाग्भवेत् । ॐ शान्तिः शान्तिः शान्तिः ॥")
20 | print(tokenizer.decode(enc))
21 | 
22 | ps = model(torch.tensor(enc).unsqueeze(1))
23 | print(ps[0].shape)
24 | ```
25 | ```
26 | '''
27 | Output:
28 | --------
29 | [CLS] ॐ सर्वे भवन्तु सुखिनः सर्वे सन्तु निरामयाः । सर्वे भद्राणि पश्यन्तु मा कश्चिद्दुःखभाग्भवेत् । ॐ शान्तिः शान्तिः शान्तिः ॥[SEP]
30 | torch.Size([28, 1, 768])
31 | ```
32 | 
33 | 
34 | > Created by [Suraj Parmar/@parmarsuraj99](https://twitter.com/parmarsuraj99)
35 | 
36 | > Made with <span style="color: #e25555;">&hearts;</span> in India
37 | 


--------------------------------------------------------------------------------
/model_cards/uncnlp/lxmert-base-uncased/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Hao Tan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/model_cards/uncnlp/lxmert-base-uncased/lxmert_model-1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/model_cards/uncnlp/lxmert-base-uncased/lxmert_model-1.jpg


--------------------------------------------------------------------------------
/model_cards/urduhack/roberta-urdu-small/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: ur
 3 | thumbnail: https://raw.githubusercontent.com/urduhack/urduhack/master/docs/_static/urduhack.png
 4 | tags:
 5 | - roberta-urdu-small
 6 | - urdu
 7 | - transformers
 8 | license: mit
 9 | ---
10 | ## roberta-urdu-small
11 | 
12 | [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/urduhack/urduhack/blob/master/LICENSE)
13 | ### Overview
14 | **Language model:** roberta-urdu-small
15 | **Model size:** 125M
16 | **Language:** Urdu
17 | **Training data:** News data from urdu news resources in Pakistan
18 | ### About roberta-urdu-small
19 | roberta-urdu-small is a language model for urdu language.
20 | ```
21 | from transformers import pipeline
22 | fill_mask = pipeline("fill-mask", model="urduhack/roberta-urdu-small", tokenizer="urduhack/roberta-urdu-small")
23 | ```
24 | ## Training procedure
25 | roberta-urdu-small was trained on urdu news corpus. Training data was normalized using normalization module from
26 | urduhack to eliminate characters from other languages like arabic.
27 | 
28 | ### About Urduhack
29 | Urduhack is a Natural Language Processing (NLP) library for urdu language.
30 | Github: https://github.com/urduhack/urduhack
31 | 


--------------------------------------------------------------------------------
/model_cards/wietsedv/bert-base-dutch-cased/README.md:
--------------------------------------------------------------------------------
 1 | # BERTje: A Dutch BERT model
 2 | 
 3 | BERTje is a Dutch pre-trained BERT model developed at the University of Groningen.
 4 | 
 5 | ⚠️ **The new home of this model is the [GroNLP](https://huggingface.co/GroNLP) organization.**
 6 | 
 7 | BERTje now lives at: [`GroNLP/bert-base-dutch-cased`](https://huggingface.co/GroNLP/bert-base-dutch-cased)
 8 | 
 9 | The model weights of the versions at `wietsedv/` and `GroNLP/` are the same, so do not worry if you use(d) `wietsedv/bert-base-dutch-cased`.
10 | 
11 | 
12 | <img src="https://raw.githubusercontent.com/wietsedv/bertje/master/bertje.png" height="250">
13 | 


--------------------------------------------------------------------------------
/model_cards/wptoux/albert-chinese-large-qa/README.md:
--------------------------------------------------------------------------------
 1 | # albert-chinese-large-qa
 2 | Albert large QA model pretrained from baidu webqa and baidu dureader datasets.
 3 | 
 4 | ## Data source
 5 | + baidu webqa 1.0
 6 | + baidu dureader
 7 | 
 8 | ## Traing Method
 9 | We combined the two datasets together and created a new dataset in squad format, including 705139 samples for training and 69638 samples for validation.
10 | We finetune the model based on the albert chinese large model.
11 | 
12 | ## Hyperparams
13 | + learning_rate 1e-5
14 | + max_seq_length 512
15 | + max_query_length 50
16 | + max_answer_length 300
17 | + doc_stride 256
18 | + num_train_epochs 2
19 | + warmup_steps 1000
20 | + per_gpu_train_batch_size 8
21 | + gradient_accumulation_steps 3
22 | + n_gpu 2 (Nvidia Tesla P100)
23 | 
24 | ## Usage
25 | ```
26 | from transformers import AutoModelForQuestionAnswering, BertTokenizer
27 | 
28 | model = AutoModelForQuestionAnswering.from_pretrained('wptoux/albert-chinese-large-qa')
29 | tokenizer = BertTokenizer.from_pretrained('wptoux/albert-chinese-large-qa')
30 | ```
31 | ***Important: use BertTokenizer***
32 | 
33 | ## MoreInfo
34 | Please visit https://github.com/wptoux/albert-chinese-large-webqa for details.
35 | 


--------------------------------------------------------------------------------
/model_cards/xlm-mlm-en-2048-README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | tags:
 3 | - exbert
 4 | 
 5 | license: cc-by-nc-4.0
 6 | ---
 7 | 
 8 | <a href="https://huggingface.co/exbert/?model=xlm-mlm-en-2048">
 9 | 	<img width="300px" src="https://cdn-media.huggingface.co/exbert/button.png">
10 | </a>
11 | 


--------------------------------------------------------------------------------
/model_cards/xlm-roberta-base-README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | tags:
 3 | - exbert
 4 | 
 5 | license: mit
 6 | ---
 7 | 
 8 | <a href="https://huggingface.co/exbert/?model=xlm-roberta-base">
 9 | 	<img width="300px" src="https://cdn-media.huggingface.co/exbert/button.png">
10 | </a>
11 | 


--------------------------------------------------------------------------------
/model_cards/xlm-roberta-large-finetuned-conll03-german-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: de
3 | ---
4 | 
5 | ## xlm-roberta-large-finetuned-conll03-german
6 | 


--------------------------------------------------------------------------------
/model_cards/yjernite/bart_eli5/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: en
 3 | license: apache-2.0
 4 | datasets:
 5 | - eli5
 6 | ---
 7 | 
 8 | ## BART ELI5
 9 | 
10 | Read the article at https://yjernite.github.io/lfqa.html and try the demo at https://huggingface.co/qa/
11 | 


--------------------------------------------------------------------------------
/model_cards/yuvraj/summarizer-cnndm/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: "en"
 3 | tags:
 4 | - summarization
 5 | ---
 6 | ​
 7 | # Summarization
 8 | ​
 9 | ## Model description
10 | ​
11 | BartForConditionalGeneration model fine tuned for summarization on 10000 samples from the cnn-dailymail dataset
12 | ​
13 | ## How to use
14 | ​
15 | PyTorch model available
16 | ​
17 | ```python
18 | from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline
19 | ​
20 | tokenizer = AutoTokenizer.from_pretrained("yuvraj/summarizer-cnndm") 
21 | AutoModelWithLMHead.from_pretrained("yuvraj/summarizer-cnndm")
22 | ​
23 | summarizer = pipeline('summarization', model=model, tokenizer=tokenizer)
24 | summarizer("<Text to be summarized>")
25 | ​
26 | ## Limitations and bias
27 | Trained on a small dataset
28 | 


--------------------------------------------------------------------------------
/model_cards/yuvraj/xSumm/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: "en"
 3 | tags:
 4 | - summarization
 5 | - extreme summarization
 6 | ---
 7 | ​
 8 | ## Model description
 9 | ​
10 | BartForConditionalGenerationModel for extreme summarization- creates a one line abstractive summary of a given article
11 | ​
12 | ## How to use
13 | ​
14 | PyTorch model available
15 | ​
16 | ```python
17 | from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline
18 | ​
19 | tokenizer = AutoTokenizer.from_pretrained("yuvraj/xSumm")			
20 | model = AutoModelWithLMHead.from_pretrained("yuvraj/xSumm")
21 | ​
22 | xsumm = pipeline('summarization', model=model, tokenizer=tokenizer)
23 | xsumm("<text to be summarized>")
24 | ​
25 | ## Limitations and bias
26 | Trained on a small fraction of the xsumm training dataset
27 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 119
3 | target-version = ['py35']
4 | 


--------------------------------------------------------------------------------
/scripts/fsmt/tests-to-run.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | # these scripts need to be run before any changes to FSMT-related code - it should cover all bases
17 | 
18 | CUDA_VISIBLE_DEVICES="" RUN_SLOW=1 pytest --disable-warnings tests/test_tokenization_fsmt.py tests/test_configuration_auto.py tests/test_modeling_fsmt.py examples/seq2seq/test_fsmt_bleu_score.py
19 | RUN_SLOW=1 pytest --disable-warnings tests/test_tokenization_fsmt.py tests/test_configuration_auto.py tests/test_modeling_fsmt.py examples/seq2seq/test_fsmt_bleu_score.py
20 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | default_section = FIRSTPARTY
 3 | ensure_newline_before_comments = True
 4 | force_grid_wrap = 0
 5 | include_trailing_comma = True
 6 | known_first_party = transformers
 7 | known_third_party =
 8 |     absl
 9 |     conllu
10 |     datasets
11 |     elasticsearch
12 |     fairseq
13 |     faiss-cpu
14 |     fastprogress
15 |     fire
16 |     fugashi
17 |     git
18 |     h5py
19 |     matplotlib
20 |     nltk
21 |     numpy
22 |     packaging
23 |     pandas
24 |     PIL
25 |     psutil
26 |     pytest
27 |     pytorch_lightning
28 |     rouge_score
29 |     sacrebleu
30 |     seqeval
31 |     sklearn
32 |     streamlit
33 |     tensorboardX
34 |     tensorflow
35 |     tensorflow_datasets
36 |     timeout_decorator
37 |     torch
38 |     torchtext
39 |     torchvision
40 |     torch_xla
41 |     tqdm
42 | 
43 | line_length = 119
44 | lines_after_imports = 2
45 | multi_line_output = 3
46 | use_parentheses = True
47 | 
48 | [flake8]
49 | ignore = E203, E501, E741, W503, W605
50 | max-line-length = 119
51 | 


--------------------------------------------------------------------------------
/src/transformers/benchmark/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/src/transformers/benchmark/__init__.py


--------------------------------------------------------------------------------
/src/transformers/commands/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from abc import ABC, abstractmethod
16 | from argparse import ArgumentParser
17 | 
18 | 
19 | class BaseTransformersCLICommand(ABC):
20 |     @staticmethod
21 |     @abstractmethod
22 |     def register_subcommand(parser: ArgumentParser):
23 |         raise NotImplementedError()
24 | 
25 |     @abstractmethod
26 |     def run(self):
27 |         raise NotImplementedError()
28 | 


--------------------------------------------------------------------------------
/src/transformers/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from .glue import GlueDataset, GlueDataTrainingArguments
20 | from .language_modeling import (
21 |     LineByLineTextDataset,
22 |     LineByLineWithRefDataset,
23 |     LineByLineWithSOPTextDataset,
24 |     TextDataset,
25 |     TextDatasetForNextSentencePrediction,
26 | )
27 | from .squad import SquadDataset, SquadDataTrainingArguments
28 | 


--------------------------------------------------------------------------------
/src/transformers/data/processors/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from .glue import glue_convert_examples_to_features, glue_output_modes, glue_processors, glue_tasks_num_labels
20 | from .squad import SquadExample, SquadFeatures, SquadV1Processor, SquadV2Processor, squad_convert_examples_to_features
21 | from .utils import DataProcessor, InputExample, InputFeatures, SingleSentenceClassificationProcessor
22 | from .xnli import xnli_output_modes, xnli_processors, xnli_tasks_num_labels
23 | 


--------------------------------------------------------------------------------
/src/transformers/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/src/transformers/models/__init__.py


--------------------------------------------------------------------------------
/src/transformers/models/barthez/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from ...file_utils import is_sentencepiece_available, is_tokenizers_available
20 | 
21 | 
22 | if is_sentencepiece_available():
23 |     from .tokenization_barthez import BarthezTokenizer
24 | 
25 | if is_tokenizers_available():
26 |     from .tokenization_barthez_fast import BarthezTokenizerFast
27 | 


--------------------------------------------------------------------------------
/src/transformers/models/bert_japanese/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from .tokenization_bert_japanese import BertJapaneseTokenizer, CharacterTokenizer, MecabTokenizer
20 | 


--------------------------------------------------------------------------------
/src/transformers/models/bertweet/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from .tokenization_bertweet import BertweetTokenizer
20 | 


--------------------------------------------------------------------------------
/src/transformers/models/deberta/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from ...file_utils import is_torch_available
20 | from .configuration_deberta import DEBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, DebertaConfig
21 | from .tokenization_deberta import DebertaTokenizer
22 | 
23 | 
24 | if is_torch_available():
25 |     from .modeling_deberta import (
26 |         DEBERTA_PRETRAINED_MODEL_ARCHIVE_LIST,
27 |         DebertaForSequenceClassification,
28 |         DebertaModel,
29 |         DebertaPreTrainedModel,
30 |     )
31 | 


--------------------------------------------------------------------------------
/src/transformers/models/dialogpt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/src/transformers/models/dialogpt/__init__.py


--------------------------------------------------------------------------------
/src/transformers/models/encoder_decoder/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from ...file_utils import is_torch_available
20 | from .configuration_encoder_decoder import EncoderDecoderConfig
21 | 
22 | 
23 | if is_torch_available():
24 |     from .modeling_encoder_decoder import EncoderDecoderModel
25 | 


--------------------------------------------------------------------------------
/src/transformers/models/fsmt/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from ...file_utils import is_torch_available
20 | from .configuration_fsmt import FSMT_PRETRAINED_CONFIG_ARCHIVE_MAP, FSMTConfig
21 | from .tokenization_fsmt import FSMTTokenizer
22 | 
23 | 
24 | if is_torch_available():
25 |     from .modeling_fsmt import FSMTForConditionalGeneration, FSMTModel, PretrainedFSMTModel
26 | 


--------------------------------------------------------------------------------
/src/transformers/models/herbert/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from ...file_utils import is_tokenizers_available
20 | from .tokenization_herbert import HerbertTokenizer
21 | 
22 | 
23 | if is_tokenizers_available():
24 |     from .tokenization_herbert_fast import HerbertTokenizerFast
25 | 


--------------------------------------------------------------------------------
/src/transformers/models/marian/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from ...file_utils import is_sentencepiece_available, is_tf_available, is_torch_available
20 | from .configuration_marian import MarianConfig
21 | 
22 | 
23 | if is_sentencepiece_available():
24 |     from .tokenization_marian import MarianTokenizer
25 | 
26 | if is_torch_available():
27 |     from .modeling_marian import MarianMTModel
28 | 
29 | if is_tf_available():
30 |     from .modeling_tf_marian import TFMarianMTModel
31 | 


--------------------------------------------------------------------------------
/src/transformers/models/mmbt/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from ...file_utils import is_torch_available
20 | from .configuration_mmbt import MMBTConfig
21 | 
22 | 
23 | if is_torch_available():
24 |     from .modeling_mmbt import MMBTForClassification, MMBTModel, ModalEmbeddings
25 | 


--------------------------------------------------------------------------------
/src/transformers/models/phobert/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from .tokenization_phobert import PhobertTokenizer
20 | 


--------------------------------------------------------------------------------
/src/transformers/models/rag/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from ...file_utils import is_torch_available
20 | from .configuration_rag import RagConfig
21 | from .retrieval_rag import RagRetriever
22 | from .tokenization_rag import RagTokenizer
23 | 
24 | 
25 | if is_torch_available():
26 |     from .modeling_rag import RagModel, RagSequenceForGeneration, RagTokenForGeneration
27 | 


--------------------------------------------------------------------------------
/src/transformers/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/src/transformers/utils/__init__.py


--------------------------------------------------------------------------------
/src/transformers/utils/dummy_flax_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | from ..file_utils import requires_flax
 3 | 
 4 | 
 5 | FLAX_MODEL_MAPPING = None
 6 | 
 7 | 
 8 | class FlaxAutoModel:
 9 |     def __init__(self, *args, **kwargs):
10 |         requires_flax(self)
11 | 
12 |     @classmethod
13 |     def from_pretrained(self, *args, **kwargs):
14 |         requires_flax(self)
15 | 
16 | 
17 | class FlaxBertForMaskedLM:
18 |     def __init__(self, *args, **kwargs):
19 |         requires_flax(self)
20 | 
21 |     @classmethod
22 |     def from_pretrained(self, *args, **kwargs):
23 |         requires_flax(self)
24 | 
25 | 
26 | class FlaxBertModel:
27 |     def __init__(self, *args, **kwargs):
28 |         requires_flax(self)
29 | 
30 |     @classmethod
31 |     def from_pretrained(self, *args, **kwargs):
32 |         requires_flax(self)
33 | 
34 | 
35 | class FlaxRobertaModel:
36 |     def __init__(self, *args, **kwargs):
37 |         requires_flax(self)
38 | 
39 |     @classmethod
40 |     def from_pretrained(self, *args, **kwargs):
41 |         requires_flax(self)
42 | 


--------------------------------------------------------------------------------
/templates/adding_a_new_example_script/cookiecutter.json:
--------------------------------------------------------------------------------
1 | {
2 |   "example_name": "text classification",
3 |   "directory_name": "{{cookiecutter.example_name|lower|replace(' ', '-')}}",
4 |   "example_shortcut": "{{cookiecutter.directory_name}}",
5 |   "model_class": "AutoModel",
6 |   "authors": "The HuggingFace Team",
7 |   "can_train_from_scratch": ["True", "False"]
8 | }


--------------------------------------------------------------------------------
/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/configuration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "modelname": "{{cookiecutter.modelname}}",
 3 |   "uppercase_modelname": "{{cookiecutter.uppercase_modelname}}",
 4 |   "lowercase_modelname": "{{cookiecutter.lowercase_modelname}}",
 5 |   "camelcase_modelname": "{{cookiecutter.camelcase_modelname}}",
 6 |   "authors": "{{cookiecutter.authors}}",
 7 |   "checkpoint_identifier": "{{cookiecutter.checkpoint_identifier}}",
 8 |   "tokenizer_type": "{{cookiecutter.tokenizer_type}}",
 9 |   "generate_tensorflow_and_pytorch": "{{cookiecutter.generate_tensorflow_and_pytorch}}"
10 | }
11 | 


--------------------------------------------------------------------------------
/templates/adding_a_new_model/cookiecutter.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "modelname": "BrandNewBERT",
 3 |   "uppercase_modelname": "BRAND_NEW_BERT",
 4 |   "lowercase_modelname": "brand_new_bert",
 5 |   "camelcase_modelname": "BrandNewBert",
 6 |   "authors": "The HuggingFace Team",
 7 |   "checkpoint_identifier": "brand-new-bert-base-cased",
 8 |   "tokenizer_type": ["Based on BERT", "Standalone"],
 9 |   "generate_tensorflow_and_pytorch": ["PyTorch & TensorFlow", "PyTorch", "TensorFlow"]
10 | }


--------------------------------------------------------------------------------
/templates/adding_a_new_model/tests/encoder-bert-tokenizer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "modelname": "Template",
 3 |   "uppercase_modelname": "TEMPLATE",
 4 |   "lowercase_modelname": "template",
 5 |   "camelcase_modelname": "Template",
 6 |   "authors": "The HuggingFace Team",
 7 |   "checkpoint_identifier": "brand-new-bert-base-cased",
 8 |   "tokenizer_type": "Based on BERT",
 9 |   "generate_tensorflow_and_pytorch": "PyTorch & TensorFlow"
10 | }
11 | 


--------------------------------------------------------------------------------
/templates/adding_a_new_model/tests/pt-encoder-bert-tokenizer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "modelname": "TemplatePT",
 3 |   "uppercase_modelname": "TEMPLATE_PT",
 4 |   "lowercase_modelname": "template_pt",
 5 |   "camelcase_modelname": "TemplatePt",
 6 |   "authors": "The HuggingFace Team",
 7 |   "checkpoint_identifier": "brand-new-bert-base-cased",
 8 |   "tokenizer_type": "Based on BERT",
 9 |   "generate_tensorflow_and_pytorch": "PyTorch"
10 | }
11 | 


--------------------------------------------------------------------------------
/templates/adding_a_new_model/tests/standalone.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "modelname": "TemplateBI",
 3 |   "uppercase_modelname": "TEMPLATE_BI",
 4 |   "lowercase_modelname": "template_bi",
 5 |   "camelcase_modelname": "TemplateBi",
 6 |   "authors": "The HuggingFace Team",
 7 |   "checkpoint_identifier": "bi-brand-new-bert-base-cased",
 8 |   "tokenizer_type": "Standalone",
 9 |   "generate_tensorflow_and_pytorch": "PyTorch & TensorFlow"
10 | }
11 | 


--------------------------------------------------------------------------------
/templates/adding_a_new_model/tests/tf-encoder-bert-tokenizer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "modelname": "TemplateTF",
 3 |   "uppercase_modelname": "TEMPLATE_TF",
 4 |   "lowercase_modelname": "template_tf",
 5 |   "camelcase_modelname": "TemplateTf",
 6 |   "authors": "The HuggingFace Team",
 7 |   "checkpoint_identifier": "brand-new-bert-base-cased",
 8 |   "tokenizer_type": "Based on BERT",
 9 |   "generate_tensorflow_and_pytorch": "TensorFlow"
10 | }
11 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/tests/__init__.py


--------------------------------------------------------------------------------
/tests/fixtures/dummy-config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "model_type": "roberta"
3 | }


--------------------------------------------------------------------------------
/tests/fixtures/empty.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/tests/fixtures/empty.txt


--------------------------------------------------------------------------------
/tests/fixtures/input.txt:
--------------------------------------------------------------------------------
1 | Who was Jim Henson ? ||| Jim Henson was a puppeteer
2 | 


--------------------------------------------------------------------------------
/tests/fixtures/spiece.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/tests/fixtures/spiece.model


--------------------------------------------------------------------------------
/tests/fixtures/test_sentencepiece.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/tests/fixtures/test_sentencepiece.model


--------------------------------------------------------------------------------
/tests/fixtures/test_sentencepiece_no_bos.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astariul/transformers/5e637e6c690e45d13ebf7296e1ea9dcc188d0f07/tests/fixtures/test_sentencepiece_no_bos.model


--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/.gitignore:
--------------------------------------------------------------------------------
1 | *.*
2 | cache*
3 | temp*
4 | !*.txt
5 | !*.tsv
6 | !*.json
7 | !.gitignore


--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/GermEval/labels.txt:
--------------------------------------------------------------------------------
 1 | B-LOC
 2 | B-LOCderiv
 3 | B-LOCpart
 4 | B-ORG
 5 | B-ORGderiv
 6 | B-ORGpart
 7 | B-OTH
 8 | B-OTHderiv
 9 | B-OTHpart
10 | B-PER
11 | B-PERderiv
12 | B-PERpart
13 | I-LOC
14 | I-LOCderiv
15 | I-LOCpart
16 | I-ORG
17 | I-ORGderiv
18 | I-ORGpart
19 | I-OTH
20 | I-OTHderiv
21 | I-OTHpart
22 | I-PER
23 | I-PERderiv
24 | I-PERpart
25 | O
26 | 


--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/STS-B/dev.tsv:
--------------------------------------------------------------------------------
 1 | index	genre	filename	year	old_index	source1	source2	sentence1	sentence2	score
 2 | 0	main-captions	MSRvid	2012test	0000	none	none	A man with a hard hat is dancing.	A man wearing a hard hat is dancing.	5.000
 3 | 1	main-captions	MSRvid	2012test	0002	none	none	A young child is riding a horse.	A child is riding a horse.	4.750
 4 | 2	main-captions	MSRvid	2012test	0003	none	none	A man is feeding a mouse to a snake.	The man is feeding a mouse to the snake.	5.000
 5 | 3	main-captions	MSRvid	2012test	0007	none	none	A woman is playing the guitar.	A man is playing guitar.	2.400
 6 | 4	main-captions	MSRvid	2012test	0008	none	none	A woman is playing the flute.	A man is playing a flute.	2.750
 7 | 5	main-captions	MSRvid	2012test	0010	none	none	A woman is cutting an onion.	A man is cutting onions.	2.615
 8 | 6	main-captions	MSRvid	2012test	0015	none	none	A man is erasing a chalk board.	The man is erasing the chalk board.	5.000
 9 | 7	main-captions	MSRvid	2012test	0023	none	none	A woman is carrying a boy.	A woman is carrying her baby.	2.333
10 | 8	main-captions	MSRvid	2012test	0027	none	none	Three men are playing guitars.	Three men are on stage playing guitars.	3.750
11 | 


--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/STS-B/train.tsv:
--------------------------------------------------------------------------------
 1 | index	genre	filename	year	old_index	source1	source2	sentence1	sentence2	score
 2 | 0	main-captions	MSRvid	2012test	0001	none	none	A plane is taking off.	An air plane is taking off.	5.000
 3 | 1	main-captions	MSRvid	2012test	0004	none	none	A man is playing a large flute.	A man is playing a flute.	3.800
 4 | 2	main-captions	MSRvid	2012test	0005	none	none	A man is spreading shreded cheese on a pizza.	A man is spreading shredded cheese on an uncooked pizza.	3.800
 5 | 3	main-captions	MSRvid	2012test	0006	none	none	Three men are playing chess.	Two men are playing chess.	2.600
 6 | 4	main-captions	MSRvid	2012test	0009	none	none	A man is playing the cello.	A man seated is playing the cello.	4.250
 7 | 5	main-captions	MSRvid	2012test	0011	none	none	Some men are fighting.	Two men are fighting.	4.250
 8 | 6	main-captions	MSRvid	2012test	0012	none	none	A man is smoking.	A man is skating.	0.500
 9 | 7	main-captions	MSRvid	2012test	0013	none	none	The man is playing the piano.	The man is playing the guitar.	1.600
10 | 8	main-captions	MSRvid	2012test	0014	none	none	A man is playing on a guitar and singing.	A woman is playing an acoustic guitar and singing.	2.200
11 | 


--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2019-present, the HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import unittest
17 | from unittest.mock import patch
18 | 
19 | from transformers.testing_utils import CaptureStd
20 | 
21 | 
22 | class CLITest(unittest.TestCase):
23 |     @patch("sys.argv", ["fakeprogrampath", "env"])
24 |     def test_cli_env(self):
25 |         # test transformers-cli env
26 |         import transformers.commands.transformers_cli
27 | 
28 |         with CaptureStd() as cs:
29 |             transformers.commands.transformers_cli.main()
30 |         assert "Python version" in cs.out
31 |         assert "Platform" in cs.out
32 |         assert "Using distributed or parallel set-up in script?" in cs.out
33 | 


--------------------------------------------------------------------------------
/tests/test_pipelines_feature_extraction.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import unittest
16 | 
17 | from .test_pipelines_common import MonoInputPipelineCommonMixin
18 | 
19 | 
20 | class FeatureExtractionPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase):
21 |     pipeline_task = "feature-extraction"
22 |     small_models = [
23 |         "sshleifer/tiny-distilbert-base-cased"
24 |     ]  # Default model - Models tested without the @slow decorator
25 |     large_models = [None]  # Models tested with the @slow decorator
26 |     mandatory_keys = {}  # Keys which should be in the output
27 | 


--------------------------------------------------------------------------------
/tests/test_pipelines_sentiment_analysis.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import unittest
16 | 
17 | from .test_pipelines_common import MonoInputPipelineCommonMixin
18 | 
19 | 
20 | class SentimentAnalysisPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase):
21 |     pipeline_task = "sentiment-analysis"
22 |     small_models = [
23 |         "sshleifer/tiny-distilbert-base-uncased-finetuned-sst-2-english"
24 |     ]  # Default model - Models tested without the @slow decorator
25 |     large_models = [None]  # Models tested with the @slow decorator
26 |     mandatory_keys = {"label", "score"}  # Keys which should be in the output
27 | 


--------------------------------------------------------------------------------
/tests/test_pipelines_text2text_generation.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import unittest
16 | 
17 | from .test_pipelines_common import MonoInputPipelineCommonMixin
18 | 
19 | 
20 | class Text2TextGenerationPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase):
21 |     pipeline_task = "text2text-generation"
22 |     small_models = ["patrickvonplaten/t5-tiny-random"]  # Default model - Models tested without the @slow decorator
23 |     large_models = []  # Models tested with the @slow decorator
24 |     invalid_inputs = [4, "<mask>"]
25 |     mandatory_keys = ["generated_text"]
26 | 


--------------------------------------------------------------------------------